From 77996879a87c06262be4e555ae0aaf703af8e5ce Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 7 Dec 2020 18:34:42 +0100 Subject: rename GPU2DSoft.cpp to GPU2D_Soft.cpp --- src/GPU2D_Soft.cpp | 2227 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2227 insertions(+) create mode 100644 src/GPU2D_Soft.cpp (limited to 'src/GPU2D_Soft.cpp') diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp new file mode 100644 index 0000000..a7ad1d7 --- /dev/null +++ b/src/GPU2D_Soft.cpp @@ -0,0 +1,2227 @@ +#include "GPU2D.h" +#include "GPU.h" + +GPU2DSoft::GPU2DSoft(u32 num) + : GPU2D(num) +{ + // initialize mosaic table + for (int m = 0; m < 16; m++) + { + for (int x = 0; x < 256; x++) + { + int offset = x % (m+1); + MosaicTable[m][x] = offset; + } + } +} + +void GPU2DSoft::SetRenderSettings(bool accel) +{ + Accelerated = accel; +} + +u32 GPU2DSoft::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb) +{ + u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4; + u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 4) & 0x007F00; + u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 4) & 0x7F0000; + + if (r > 0x00003F) r = 0x00003F; + if (g > 0x003F00) g = 0x003F00; + if (b > 0x3F0000) b = 0x3F0000; + + return r | g | b | 0xFF000000; +} + +u32 GPU2DSoft::ColorBlend5(u32 val1, u32 val2) +{ + u32 eva = ((val1 >> 24) & 0x1F) + 1; + u32 evb = 32 - eva; + + if (eva == 32) return val1; + + u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 5; + u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 5) & 0x007F00; + u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 5) & 0x7F0000; + + if (eva <= 16) + { + r += 0x000001; + g += 0x000100; + b += 0x010000; + } + + if (r > 0x00003F) r = 0x00003F; + if (g > 0x003F00) g = 0x003F00; + if (b > 0x3F0000) b = 0x3F0000; + + return r | g | b | 0xFF000000; +} + +u32 GPU2DSoft::ColorBrightnessUp(u32 val, u32 factor) +{ + u32 rb = val & 0x3F003F; + u32 g = val & 0x003F00; + + rb += ((((0x3F003F - rb) * factor) >> 4) & 0x3F003F); + g += ((((0x003F00 - g) * factor) >> 4) & 0x003F00); + + return rb | g | 0xFF000000; +} + +u32 GPU2DSoft::ColorBrightnessDown(u32 val, u32 factor) +{ + u32 rb = val & 0x3F003F; + u32 g = val & 0x003F00; + + rb -= (((rb * factor) >> 4) & 0x3F003F); + g -= (((g * factor) >> 4) & 0x003F00); + + return rb | g | 0xFF000000; +} + +u32 GPU2DSoft::ColorComposite(int i, u32 val1, u32 val2) +{ + u32 coloreffect = 0; + u32 eva, evb; + + u32 flag1 = val1 >> 24; + u32 flag2 = val2 >> 24; + + u32 target2; + if (flag2 & 0x80) target2 = 0x1000; + else if (flag2 & 0x40) target2 = 0x0100; + else target2 = flag2 << 8; + + if ((flag1 & 0x80) && (BlendCnt & target2)) + { + // sprite blending + + coloreffect = 1; + + if (flag1 & 0x40) + { + eva = flag1 & 0x1F; + evb = 16 - eva; + } + else + { + eva = EVA; + evb = EVB; + } + } + else if ((flag1 & 0x40) && (BlendCnt & target2)) + { + // 3D layer blending + + coloreffect = 4; + } + else + { + if (flag1 & 0x80) flag1 = 0x10; + else if (flag1 & 0x40) flag1 = 0x01; + + if ((BlendCnt & flag1) && (WindowMask[i] & 0x20)) + { + coloreffect = (BlendCnt >> 6) & 0x3; + + if (coloreffect == 1) + { + if (BlendCnt & target2) + { + eva = EVA; + evb = EVB; + } + else + coloreffect = 0; + } + } + } + + switch (coloreffect) + { + case 0: return val1; + case 1: return ColorBlend4(val1, val2, eva, evb); + case 2: return ColorBrightnessUp(val1, EVY); + case 3: return ColorBrightnessDown(val1, EVY); + case 4: return ColorBlend5(val1, val2); + } + + return val1; +} + +void GPU2DSoft::DrawScanline(u32 line) +{ + int stride = Accelerated ? (256*3 + 1) : 256; + u32* dst = &Framebuffer[stride * line]; + + int n3dline = line; + line = GPU::VCount; + + if (Num == 0) + { + auto bgDirty = GPU::VRAMDirty_ABG.DeriveState(GPU::VRAMMap_ABG); + GPU::MakeVRAMFlat_ABGCoherent(bgDirty); + auto bgExtPalDirty = GPU::VRAMDirty_ABGExtPal.DeriveState(GPU::VRAMMap_ABGExtPal); + GPU::MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty); + auto objExtPalDirty = GPU::VRAMDirty_AOBJExtPal.DeriveState(&GPU::VRAMMap_AOBJExtPal); + GPU::MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty); + } + else + { + auto bgDirty = GPU::VRAMDirty_BBG.DeriveState(GPU::VRAMMap_BBG); + GPU::MakeVRAMFlat_BBGCoherent(bgDirty); + auto bgExtPalDirty = GPU::VRAMDirty_BBGExtPal.DeriveState(GPU::VRAMMap_BBGExtPal); + GPU::MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty); + auto objExtPalDirty = GPU::VRAMDirty_BOBJExtPal.DeriveState(&GPU::VRAMMap_BOBJExtPal); + GPU::MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty); + } + + bool forceblank = false; + + // scanlines that end up outside of the GPU drawing range + // (as a result of writing to VCount) are filled white + if (line > 192) forceblank = true; + + // GPU B can be completely disabled by POWCNT1 + // oddly that's not the case for GPU A + if (Num && !Enabled) forceblank = true; + + if (forceblank) + { + for (int i = 0; i < 256; i++) + dst[i] = 0xFFFFFFFF; + + if (Accelerated) + { + dst[256*3] = 0; + } + return; + } + + u32 dispmode = DispCnt >> 16; + dispmode &= (Num ? 0x1 : 0x3); + + if (Num == 0) + { + if (!Accelerated) + _3DLine = GPU3D::GetLine(n3dline); + else if ((CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1)) + { + _3DLine = GPU3D::GetLine(n3dline); + //GPU3D::GLRenderer::PrepareCaptureFrame(); + } + } + + // always render regular graphics + DrawScanline_BGOBJ(line); + UpdateMosaicCounters(line); + + switch (dispmode) + { + case 0: // screen off + { + for (int i = 0; i < 256; i++) + dst[i] = 0x003F3F3F; + } + break; + + case 1: // regular display + { + int i = 0; + for (; i < (stride & ~1); i+=2) + *(u64*)&dst[i] = *(u64*)&BGOBJLine[i]; + } + break; + + case 2: // VRAM display + { + u32 vrambank = (DispCnt >> 18) & 0x3; + if (GPU::VRAMMap_LCDC & (1<> 4; + u8 b = (color & 0x7C00) >> 9; + + dst[i] = r | (g << 8) | (b << 16); + } + } + else + { + for (int i = 0; i < 256; i++) + { + dst[i] = 0; + } + } + } + break; + + case 3: // FIFO display + { + for (int i = 0; i < 256; i++) + { + u16 color = DispFIFOBuffer[i]; + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + dst[i] = r | (g << 8) | (b << 16); + } + } + break; + } + + // capture + if ((Num == 0) && (CaptureCnt & (1<<31))) + { + u32 capwidth, capheight; + switch ((CaptureCnt >> 20) & 0x3) + { + case 0: capwidth = 128; capheight = 128; break; + case 1: capwidth = 256; capheight = 64; break; + case 2: capwidth = 256; capheight = 128; break; + case 3: capwidth = 256; capheight = 192; break; + } + + if (line < capheight) + DoCapture(line, capwidth); + } + + if (Accelerated) + { + dst[256*3] = MasterBrightness | (DispCnt & 0x30000); + return; + } + + // master brightness + if (dispmode != 0) + { + if ((MasterBrightness >> 14) == 1) + { + // up + u32 factor = MasterBrightness & 0x1F; + if (factor > 16) factor = 16; + + for (int i = 0; i < 256; i++) + { + dst[i] = ColorBrightnessUp(dst[i], factor); + } + } + else if ((MasterBrightness >> 14) == 2) + { + // down + u32 factor = MasterBrightness & 0x1F; + if (factor > 16) factor = 16; + + for (int i = 0; i < 256; i++) + { + dst[i] = ColorBrightnessDown(dst[i], factor); + } + } + } + + // convert to 32-bit BGRA + // note: 32-bit RGBA would be more straightforward, but + // BGRA seems to be more compatible (Direct2D soft, cairo...) + for (int i = 0; i < 256; i+=2) + { + u64 c = *(u64*)&dst[i]; + + u64 r = (c << 18) & 0xFC000000FC0000; + u64 g = (c << 2) & 0xFC000000FC00; + u64 b = (c >> 14) & 0xFC000000FC; + c = r | g | b; + + *(u64*)&dst[i] = c | ((c & 0x00C0C0C000C0C0C0) >> 6) | 0xFF000000FF000000; + } +} + +void GPU2DSoft::VBlankEnd() +{ + GPU2D::VBlankEnd(); + +#ifdef OGLRENDERER_ENABLED + if (Accelerated) + { + if ((Num == 0) && (CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1)) + { + GPU3D::GLRenderer::PrepareCaptureFrame(); + } + } +#endif +} + +void GPU2DSoft::DoCapture(u32 line, u32 width) +{ + u32 dstvram = (CaptureCnt >> 16) & 0x3; + + // TODO: confirm this + // it should work like VRAM display mode, which requires VRAM to be mapped to LCDC + if (!(GPU::VRAMMap_LCDC & (1<> 18) & 0x3) << 14) + (line * width); + + static_assert(GPU::VRAMDirtyGranularity == 512); + GPU::VRAMDirty[dstvram][(dstaddr & 0x1FFFF) / GPU::VRAMDirtyGranularity] = true; + + // TODO: handle 3D in accelerated mode!! + + u32* srcA; + if (CaptureCnt & (1<<24)) + { + srcA = _3DLine; + } + else + { + srcA = BGOBJLine; + if (Accelerated) + { + // in accelerated mode, compositing is normally done on the GPU + // but when doing display capture, we do need the composited output + // so we do it here + + for (int i = 0; i < 256; i++) + { + u32 val1 = BGOBJLine[i]; + u32 val2 = BGOBJLine[256+i]; + u32 val3 = BGOBJLine[512+i]; + + u32 compmode = (val3 >> 24) & 0xF; + + if (compmode == 4) + { + // 3D on top, blending + + u32 _3dval = _3DLine[val3 & 0xFF]; + if ((_3dval >> 24) > 0) + val1 = ColorBlend5(_3dval, val1); + else + val1 = val2; + } + else if (compmode == 1) + { + // 3D on bottom, blending + + u32 _3dval = _3DLine[val3 & 0xFF]; + if ((_3dval >> 24) > 0) + { + u32 eva = (val3 >> 8) & 0x1F; + u32 evb = (val3 >> 16) & 0x1F; + + val1 = ColorBlend4(val1, _3dval, eva, evb); + } + else + val1 = val2; + } + else if (compmode <= 3) + { + // 3D on top, normal/fade + + u32 _3dval = _3DLine[val3 & 0xFF]; + if ((_3dval >> 24) > 0) + { + u32 evy = (val3 >> 8) & 0x1F; + + val1 = _3dval; + if (compmode == 2) val1 = ColorBrightnessUp(val1, evy); + else if (compmode == 3) val1 = ColorBrightnessDown(val1, evy); + } + else + val1 = val2; + } + + BGOBJLine[i] = val1; + } + } + } + + u16* srcB = NULL; + u32 srcBaddr = line * 256; + + if (CaptureCnt & (1<<25)) + { + srcB = &DispFIFOBuffer[0]; + srcBaddr = 0; + } + else + { + u32 srcvram = (DispCnt >> 18) & 0x3; + if (GPU::VRAMMap_LCDC & (1<> 16) & 0x3) != 2) + srcBaddr += ((CaptureCnt >> 26) & 0x3) << 14; + } + + dstaddr &= 0xFFFF; + srcBaddr &= 0xFFFF; + + switch ((CaptureCnt >> 29) & 0x3) + { + case 0: // source A + { + for (u32 i = 0; i < width; i++) + { + u32 val = srcA[i]; + + // TODO: check what happens when alpha=0 + + u32 r = (val >> 1) & 0x1F; + u32 g = (val >> 9) & 0x1F; + u32 b = (val >> 17) & 0x1F; + u32 a = ((val >> 24) != 0) ? 0x8000 : 0; + + dst[dstaddr] = r | (g << 5) | (b << 10) | a; + dstaddr = (dstaddr + 1) & 0xFFFF; + } + } + break; + + case 1: // source B + { + if (srcB) + { + for (u32 i = 0; i < width; i++) + { + dst[dstaddr] = srcB[srcBaddr]; + srcBaddr = (srcBaddr + 1) & 0xFFFF; + dstaddr = (dstaddr + 1) & 0xFFFF; + } + } + else + { + for (u32 i = 0; i < width; i++) + { + dst[dstaddr] = 0; + dstaddr = (dstaddr + 1) & 0xFFFF; + } + } + } + break; + + case 2: // sources A+B + case 3: + { + u32 eva = CaptureCnt & 0x1F; + u32 evb = (CaptureCnt >> 8) & 0x1F; + + // checkme + if (eva > 16) eva = 16; + if (evb > 16) evb = 16; + + if (srcB) + { + for (u32 i = 0; i < width; i++) + { + u32 val = srcA[i]; + + // TODO: check what happens when alpha=0 + + u32 rA = (val >> 1) & 0x1F; + u32 gA = (val >> 9) & 0x1F; + u32 bA = (val >> 17) & 0x1F; + u32 aA = ((val >> 24) != 0) ? 1 : 0; + + val = srcB[srcBaddr]; + + u32 rB = val & 0x1F; + u32 gB = (val >> 5) & 0x1F; + u32 bB = (val >> 10) & 0x1F; + u32 aB = val >> 15; + + u32 rD = ((rA * aA * eva) + (rB * aB * evb)) >> 4; + u32 gD = ((gA * aA * eva) + (gB * aB * evb)) >> 4; + u32 bD = ((bA * aA * eva) + (bB * aB * evb)) >> 4; + u32 aD = (eva>0 ? aA : 0) | (evb>0 ? aB : 0); + + if (rD > 0x1F) rD = 0x1F; + if (gD > 0x1F) gD = 0x1F; + if (bD > 0x1F) bD = 0x1F; + + dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15); + srcBaddr = (srcBaddr + 1) & 0xFFFF; + dstaddr = (dstaddr + 1) & 0xFFFF; + } + } + else + { + for (u32 i = 0; i < width; i++) + { + u32 val = srcA[i]; + + // TODO: check what happens when alpha=0 + + u32 rA = (val >> 1) & 0x1F; + u32 gA = (val >> 9) & 0x1F; + u32 bA = (val >> 17) & 0x1F; + u32 aA = ((val >> 24) != 0) ? 1 : 0; + + u32 rD = (rA * aA * eva) >> 4; + u32 gD = (gA * aA * eva) >> 4; + u32 bD = (bA * aA * eva) >> 4; + u32 aD = (eva>0 ? aA : 0); + + dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15); + dstaddr = (dstaddr + 1) & 0xFFFF; + } + } + } + break; + } +} + +#define DoDrawBG(type, line, num) \ + do \ + { \ + if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) \ + { \ + if (Accelerated) DrawBG_##type(line, num); \ + else DrawBG_##type(line, num); \ + } \ + else \ + { \ + if (Accelerated) DrawBG_##type(line, num); \ + else DrawBG_##type(line, num); \ + } \ + } while (false) + +#define DoDrawBG_Large(line) \ + do \ + { \ + if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) \ + { \ + if (Accelerated) DrawBG_Large(line); \ + else DrawBG_Large(line); \ + } \ + else \ + { \ + if (Accelerated) DrawBG_Large(line); \ + else DrawBG_Large(line); \ + } \ + } while (false) + +#define DoInterleaveSprites(prio) \ + if (Accelerated) InterleaveSprites(prio); else InterleaveSprites(prio); + +template +void GPU2DSoft::DrawScanlineBGMode(u32 line) +{ + for (int i = 3; i >= 0; i--) + { + if ((BGCnt[3] & 0x3) == i) + { + if (DispCnt & 0x0800) + { + if (bgmode >= 3) + DoDrawBG(Extended, line, 3); + else if (bgmode >= 1) + DoDrawBG(Affine, line, 3); + else + DoDrawBG(Text, line, 3); + } + } + if ((BGCnt[2] & 0x3) == i) + { + if (DispCnt & 0x0400) + { + if (bgmode == 5) + DoDrawBG(Extended, line, 2); + else if (bgmode == 4 || bgmode == 2) + DoDrawBG(Affine, line, 2); + else + DoDrawBG(Text, line, 2); + } + } + if ((BGCnt[1] & 0x3) == i) + { + if (DispCnt & 0x0200) + { + DoDrawBG(Text, line, 1); + } + } + if ((BGCnt[0] & 0x3) == i) + { + if (DispCnt & 0x0100) + { + if ((!Num) && (DispCnt & 0x8)) + DrawBG_3D(); + else + DoDrawBG(Text, line, 0); + } + } + if ((DispCnt & 0x1000) && NumSprites) + DoInterleaveSprites(0x40000 | (i<<16)); + } +} + +void GPU2DSoft::DrawScanlineBGMode6(u32 line) +{ + for (int i = 3; i >= 0; i--) + { + if ((BGCnt[2] & 0x3) == i) + { + if (DispCnt & 0x0400) + { + DoDrawBG_Large(line); + } + } + if ((BGCnt[0] & 0x3) == i) + { + if (DispCnt & 0x0100) + { + if ((!Num) && (DispCnt & 0x8)) + DrawBG_3D(); + } + } + if ((DispCnt & 0x1000) && NumSprites) + DoInterleaveSprites(0x40000 | (i<<16)) + } +} + +void GPU2DSoft::DrawScanlineBGMode7(u32 line) +{ + // mode 7 only has text-mode BG0 and BG1 + + for (int i = 3; i >= 0; i--) + { + if ((BGCnt[1] & 0x3) == i) + { + if (DispCnt & 0x0200) + { + DoDrawBG(Text, line, 1); + } + } + if ((BGCnt[0] & 0x3) == i) + { + if (DispCnt & 0x0100) + { + if ((!Num) && (DispCnt & 0x8)) + DrawBG_3D(); + else + DoDrawBG(Text, line, 0); + } + } + if ((DispCnt & 0x1000) && NumSprites) + DoInterleaveSprites(0x40000 | (i<<16)) + } +} + +void GPU2DSoft::DrawScanline_BGOBJ(u32 line) +{ + // forced blank disables BG/OBJ compositing + if (DispCnt & (1<<7)) + { + for (int i = 0; i < 256; i++) + BGOBJLine[i] = 0xFF3F3F3F; + + return; + } + + u64 backdrop; + if (Num) backdrop = *(u16*)&GPU::Palette[0x400]; + else backdrop = *(u16*)&GPU::Palette[0]; + + { + u8 r = (backdrop & 0x001F) << 1; + u8 g = (backdrop & 0x03E0) >> 4; + u8 b = (backdrop & 0x7C00) >> 9; + + backdrop = r | (g << 8) | (b << 16) | 0x20000000; + backdrop |= (backdrop << 32); + + for (int i = 0; i < 256; i+=2) + *(u64*)&BGOBJLine[i] = backdrop; + } + + if (DispCnt & 0xE000) + CalculateWindowMask(line); + else + memset(WindowMask, 0xFF, 256); + + ApplySpriteMosaicX(); + + switch (DispCnt & 0x7) + { + case 0: DrawScanlineBGMode<0>(line); break; + case 1: DrawScanlineBGMode<1>(line); break; + case 2: DrawScanlineBGMode<2>(line); break; + case 3: DrawScanlineBGMode<3>(line); break; + case 4: DrawScanlineBGMode<4>(line); break; + case 5: DrawScanlineBGMode<5>(line); break; + case 6: DrawScanlineBGMode6(line); break; + case 7: DrawScanlineBGMode7(line); break; + } + + // color special effects + // can likely be optimized + + if (!Accelerated) + { + for (int i = 0; i < 256; i++) + { + u32 val1 = BGOBJLine[i]; + u32 val2 = BGOBJLine[256+i]; + + BGOBJLine[i] = ColorComposite(i, val1, val2); + } + } + else + { + if (Num == 0) + { + for (int i = 0; i < 256; i++) + { + u32 val1 = BGOBJLine[i]; + u32 val2 = BGOBJLine[256+i]; + u32 val3 = BGOBJLine[512+i]; + + u32 flag1 = val1 >> 24; + u32 flag2 = val2 >> 24; + + u32 bldcnteffect = (BlendCnt >> 6) & 0x3; + + u32 target1; + if (flag1 & 0x80) target1 = 0x0010; + else if (flag1 & 0x40) target1 = 0x0001; + else target1 = flag1; + + u32 target2; + if (flag2 & 0x80) target2 = 0x1000; + else if (flag2 & 0x40) target2 = 0x0100; + else target2 = flag2 << 8; + + if (((flag1 & 0xC0) == 0x40) && (BlendCnt & target2)) + { + // 3D on top, blending + + BGOBJLine[i] = val2; + BGOBJLine[256+i] = ColorComposite(i, val2, val3); + BGOBJLine[512+i] = 0x04000000 | (val1 & 0xFF); + } + else if ((flag1 & 0xC0) == 0x40) + { + // 3D on top, normal/fade + + if (bldcnteffect == 1) bldcnteffect = 0; + if (!(BlendCnt & 0x0001)) bldcnteffect = 0; + if (!(WindowMask[i] & 0x20)) bldcnteffect = 0; + + BGOBJLine[i] = val2; + BGOBJLine[256+i] = ColorComposite(i, val2, val3); + BGOBJLine[512+i] = (bldcnteffect << 24) | (EVY << 8) | (val1 & 0xFF); + } + else if (((flag2 & 0xC0) == 0x40) && ((BlendCnt & 0x01C0) == 0x0140)) + { + // 3D on bottom, blending + + u32 eva, evb; + if ((flag1 & 0xC0) == 0xC0) + { + eva = flag1 & 0x1F; + evb = 16 - eva; + } + else if (((BlendCnt & target1) && (WindowMask[i] & 0x20)) || + ((flag1 & 0xC0) == 0x80)) + { + eva = EVA; + evb = EVB; + } + else + bldcnteffect = 7; + + BGOBJLine[i] = val1; + BGOBJLine[256+i] = ColorComposite(i, val1, val3); + BGOBJLine[512+i] = (bldcnteffect << 24) | (EVB << 16) | (EVA << 8) | (val2 & 0xFF); + } + else + { + // no potential 3D pixel involved + + BGOBJLine[i] = ColorComposite(i, val1, val2); + BGOBJLine[256+i] = 0; + BGOBJLine[512+i] = 0x07000000; + } + } + } + else + { + for (int i = 0; i < 256; i++) + { + u32 val1 = BGOBJLine[i]; + u32 val2 = BGOBJLine[256+i]; + + BGOBJLine[i] = ColorComposite(i, val1, val2); + BGOBJLine[256+i] = 0; + BGOBJLine[512+i] = 0x07000000; + } + } + } + + if (BGMosaicY >= BGMosaicYMax) + { + BGMosaicY = 0; + BGMosaicYMax = BGMosaicSize[1]; + } + else + BGMosaicY++; + + /*if (OBJMosaicY >= OBJMosaicYMax) + { + OBJMosaicY = 0; + OBJMosaicYMax = OBJMosaicSize[1]; + } + else + OBJMosaicY++;*/ +} + + +void GPU2DSoft::DrawPixel_Normal(u32* dst, u16 color, u32 flag) +{ + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + //g |= ((color & 0x8000) >> 15); + + *(dst+256) = *dst; + *dst = r | (g << 8) | (b << 16) | flag; +} + +void GPU2DSoft::DrawPixel_Accel(u32* dst, u16 color, u32 flag) +{ + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + *(dst+512) = *(dst+256); + *(dst+256) = *dst; + *dst = r | (g << 8) | (b << 16) | flag; +} + +void GPU2DSoft::DrawBG_3D() +{ + u16 xoff = BGXPos[0]; + int i = 0; + int iend = 256; + + if (xoff & 0x100) + { + i = (0x100 - (xoff & 0xFF)); + xoff += i; + } + if ((xoff - i + iend - 1) & 0x100) + { + iend -= (xoff & 0xFF); + } + + if (Accelerated) + { + for (; i < iend; i++) + { + int pos = xoff++; + + if (!(WindowMask[i] & 0x01)) continue; + + BGOBJLine[i+512] = BGOBJLine[i+256]; + BGOBJLine[i+256] = BGOBJLine[i]; + BGOBJLine[i] = 0x40000000 | pos; // 3D-layer placeholder + } + } + else + { + for (; i < iend; i++) + { + u32 c = _3DLine[xoff]; + xoff++; + + if ((c >> 24) == 0) continue; + if (!(WindowMask[i] & 0x01)) continue; + + BGOBJLine[i+256] = BGOBJLine[i]; + BGOBJLine[i] = c | 0x40000000; + } + } +} + +template +void GPU2DSoft::DrawBG_Text(u32 line, u32 bgnum) +{ + u16 bgcnt = BGCnt[bgnum]; + + u32 tilesetaddr, tilemapaddr; + u16* pal; + u32 extpal, extpalslot; + + u16 xoff = BGXPos[bgnum]; + u16 yoff = BGYPos[bgnum] + line; + + if (bgcnt & 0x0040) + { + // vertical mosaic + yoff -= BGMosaicY; + } + + u32 widexmask = (bgcnt & 0x4000) ? 0x100 : 0; + + extpal = (DispCnt & 0x40000000); + if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum; + + u8* bgvram; + u32 bgvrammask; + GetBGVRAM(bgvram, bgvrammask); + if (Num) + { + tilesetaddr = ((bgcnt & 0x003C) << 12); + tilemapaddr = ((bgcnt & 0x1F00) << 3); + + pal = (u16*)&GPU::Palette[0x400]; + } + else + { + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + + pal = (u16*)&GPU::Palette[0]; + } + + // adjust Y position in tilemap + if (bgcnt & 0x8000) + { + tilemapaddr += ((yoff & 0x1F8) << 3); + if (bgcnt & 0x4000) + tilemapaddr += ((yoff & 0x100) << 3); + } + else + tilemapaddr += ((yoff & 0xF8) << 3); + + u16 curtile; + u16* curpal; + u32 pixelsaddr; + u8 color; + u32 lastxpos; + + if (bgcnt & 0x0080) + { + // 256-color + + // preload shit as needed + if ((xoff & 0x7) || mosaic) + { + curtile = *(u16*)&bgvram[(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)) & bgvrammask]; + + if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); + else curpal = pal; + + pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6) + + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3); + } + + if (mosaic) lastxpos = xoff; + + for (int i = 0; i < 256; i++) + { + u32 xpos; + if (mosaic) xpos = xoff - CurBGXMosaicTable[i]; + else xpos = xoff; + + if ((!mosaic && (!(xpos & 0x7))) || + (mosaic && ((xpos >> 3) != (lastxpos >> 3)))) + { + // load a new tile + curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask]; + + if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); + else curpal = pal; + + pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6) + + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3); + + if (mosaic) lastxpos = xpos; + } + + // draw pixel + if (WindowMask[i] & (1<> 2) + ((xoff & widexmask) << 3))) & bgvrammask]; + curpal = pal + ((curtile & 0xF000) >> 8); + pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) + + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); + } + + if (mosaic) lastxpos = xoff; + + for (int i = 0; i < 256; i++) + { + u32 xpos; + if (mosaic) xpos = xoff - CurBGXMosaicTable[i]; + else xpos = xoff; + + if ((!mosaic && (!(xpos & 0x7))) || + (mosaic && ((xpos >> 3) != (lastxpos >> 3)))) + { + // load a new tile + curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask]; + curpal = pal + ((curtile & 0xF000) >> 8); + pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) + + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); + + if (mosaic) lastxpos = xpos; + } + + // draw pixel + if (WindowMask[i] & (1<> 1)) & bgvrammask] >> 4; + } + else + { + color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] & 0x0F; + } + + if (color) + drawPixel(&BGOBJLine[i], curpal[color], 0x01000000< +void GPU2DSoft::DrawBG_Affine(u32 line, u32 bgnum) +{ + u16 bgcnt = BGCnt[bgnum]; + + u32 tilesetaddr, tilemapaddr; + u16* pal; + + u32 coordmask; + u32 yshift; + switch (bgcnt & 0xC000) + { + case 0x0000: coordmask = 0x07800; yshift = 7; break; + case 0x4000: coordmask = 0x0F800; yshift = 8; break; + case 0x8000: coordmask = 0x1F800; yshift = 9; break; + case 0xC000: coordmask = 0x3F800; yshift = 10; break; + } + + u32 overflowmask; + if (bgcnt & 0x2000) overflowmask = 0; + else overflowmask = ~(coordmask | 0x7FF); + + s16 rotA = BGRotA[bgnum-2]; + s16 rotB = BGRotB[bgnum-2]; + s16 rotC = BGRotC[bgnum-2]; + s16 rotD = BGRotD[bgnum-2]; + + s32 rotX = BGXRefInternal[bgnum-2]; + s32 rotY = BGYRefInternal[bgnum-2]; + + if (bgcnt & 0x0040) + { + // vertical mosaic + rotX -= (BGMosaicY * rotB); + rotY -= (BGMosaicY * rotD); + } + + u8* bgvram; + u32 bgvrammask; + GetBGVRAM(bgvram, bgvrammask); + + if (Num) + { + tilesetaddr = ((bgcnt & 0x003C) << 12); + tilemapaddr = ((bgcnt & 0x1F00) << 3); + + pal = (u16*)&GPU::Palette[0x400]; + } + else + { + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + + pal = (u16*)&GPU::Palette[0]; + } + + u16 curtile; + u8 color; + + yshift -= 3; + + for (int i = 0; i < 256; i++) + { + if (WindowMask[i] & (1<> 11) << yshift) + ((finalX & coordmask) >> 11))) & bgvrammask]; + + // draw pixel + u32 tilexoff = (finalX >> 8) & 0x7; + u32 tileyoff = (finalY >> 8) & 0x7; + + color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask]; + + if (color) + drawPixel(&BGOBJLine[i], pal[color], 0x01000000< +void GPU2DSoft::DrawBG_Extended(u32 line, u32 bgnum) +{ + u16 bgcnt = BGCnt[bgnum]; + + u32 tilesetaddr, tilemapaddr; + u16* pal; + u32 extpal; + + u8* bgvram; + u32 bgvrammask; + GetBGVRAM(bgvram, bgvrammask); + + extpal = (DispCnt & 0x40000000); + + s16 rotA = BGRotA[bgnum-2]; + s16 rotB = BGRotB[bgnum-2]; + s16 rotC = BGRotC[bgnum-2]; + s16 rotD = BGRotD[bgnum-2]; + + s32 rotX = BGXRefInternal[bgnum-2]; + s32 rotY = BGYRefInternal[bgnum-2]; + + if (bgcnt & 0x0040) + { + // vertical mosaic + rotX -= (BGMosaicY * rotB); + rotY -= (BGMosaicY * rotD); + } + + if (bgcnt & 0x0080) + { + // bitmap modes + + u32 xmask, ymask; + u32 yshift; + switch (bgcnt & 0xC000) + { + case 0x0000: xmask = 0x07FFF; ymask = 0x07FFF; yshift = 7; break; + case 0x4000: xmask = 0x0FFFF; ymask = 0x0FFFF; yshift = 8; break; + case 0x8000: xmask = 0x1FFFF; ymask = 0x0FFFF; yshift = 9; break; + case 0xC000: xmask = 0x1FFFF; ymask = 0x1FFFF; yshift = 9; break; + } + + u32 ofxmask, ofymask; + if (bgcnt & 0x2000) + { + ofxmask = 0; + ofymask = 0; + } + else + { + ofxmask = ~xmask; + ofymask = ~ymask; + } + + if (Num) tilemapaddr = ((bgcnt & 0x1F00) << 6); + else tilemapaddr = ((bgcnt & 0x1F00) << 6); + + if (bgcnt & 0x0004) + { + // direct color bitmap + + u16 color; + + for (int i = 0; i < 256; i++) + { + if (WindowMask[i] & (1<> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask]; + + if (color & 0x8000) + drawPixel(&BGOBJLine[i], color, 0x01000000<> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask]; + + if (color) + drawPixel(&BGOBJLine[i], pal[color], 0x01000000<> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + + pal = (u16*)&GPU::Palette[0]; + } + + u16 curtile; + u16* curpal; + u8 color; + + yshift -= 3; + + for (int i = 0; i < 256; i++) + { + if (WindowMask[i] & (1<> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)) & bgvrammask]; + + if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12); + else curpal = pal; + + // draw pixel + u32 tilexoff = (finalX >> 8) & 0x7; + u32 tileyoff = (finalY >> 8) & 0x7; + + if (curtile & 0x0400) tilexoff = 7-tilexoff; + if (curtile & 0x0800) tileyoff = 7-tileyoff; + + color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask]; + + if (color) + drawPixel(&BGOBJLine[i], curpal[color], 0x01000000< +void GPU2DSoft::DrawBG_Large(u32 line) // BG is always BG2 +{ + u16 bgcnt = BGCnt[2]; + + u32 tilesetaddr, tilemapaddr; + u16* pal; + + // large BG sizes: + // 0: 512x1024 + // 1: 1024x512 + // 2: 512x256 + // 3: 512x512 + u32 xmask, ymask; + u32 yshift; + switch (bgcnt & 0xC000) + { + case 0x0000: xmask = 0x1FFFF; ymask = 0x3FFFF; yshift = 9; break; + case 0x4000: xmask = 0x3FFFF; ymask = 0x1FFFF; yshift = 10; break; + case 0x8000: xmask = 0x1FFFF; ymask = 0x0FFFF; yshift = 9; break; + case 0xC000: xmask = 0x1FFFF; ymask = 0x1FFFF; yshift = 9; break; + } + + u32 ofxmask, ofymask; + if (bgcnt & 0x2000) + { + ofxmask = 0; + ofymask = 0; + } + else + { + ofxmask = ~xmask; + ofymask = ~ymask; + } + + s16 rotA = BGRotA[0]; + s16 rotB = BGRotB[0]; + s16 rotC = BGRotC[0]; + s16 rotD = BGRotD[0]; + + s32 rotX = BGXRefInternal[0]; + s32 rotY = BGYRefInternal[0]; + + if (bgcnt & 0x0040) + { + // vertical mosaic + rotX -= (BGMosaicY * rotB); + rotY -= (BGMosaicY * rotD); + } + + u8* bgvram; + u32 bgvrammask; + GetBGVRAM(bgvram, bgvrammask); + + // 256-color bitmap + + if (Num) pal = (u16*)&GPU::Palette[0x400]; + else pal = (u16*)&GPU::Palette[0]; + + u8 color; + + for (int i = 0; i < 256; i++) + { + if (WindowMask[i] & (1<<2)) + { + s32 finalX, finalY; + if (mosaic) + { + int im = CurBGXMosaicTable[i]; + finalX = rotX - (im * rotA); + finalY = rotY - (im * rotC); + } + else + { + finalX = rotX; + finalY = rotY; + } + + if (!(finalX & ofxmask) && !(finalY & ofymask)) + { + color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask]; + + if (color) + drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); + } + } + + rotX += rotA; + rotY += rotC; + } + + BGXRefInternal[0] += rotB; + BGYRefInternal[0] += rotD; +} + +// OBJ line buffer: +// * bit0-15: color (bit15=1: direct color, bit15=0: palette index, bit12=0 to indicate extpal) +// * bit16-17: BG-relative priority +// * bit18: non-transparent sprite pixel exists here +// * bit19: X mosaic should be applied here +// * bit24-31: compositor flags + +void GPU2DSoft::ApplySpriteMosaicX() +{ + // apply X mosaic if needed + // X mosaic for sprites is applied after all sprites are rendered + + if (OBJMosaicSize[0] == 0) return; + + u32 lastcolor = OBJLine[0]; + + for (u32 i = 1; i < 256; i++) + { + if (!(OBJLine[i] & 0x100000)) + { + // not a mosaic'd sprite pixel + continue; + } + + if ((OBJIndex[i] != OBJIndex[i-1]) || (CurOBJXMosaicTable[i] == 0)) + lastcolor = OBJLine[i]; + else + OBJLine[i] = lastcolor; + } +} + +template +void GPU2DSoft::InterleaveSprites(u32 prio) +{ + u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + + if (DispCnt & 0x80000000) + { + u16* extpal = GetOBJExtPal(); + + for (u32 i = 0; i < 256; i++) + { + if ((OBJLine[i] & 0x70000) != prio) continue; + if (!(WindowMask[i] & 0x10)) continue; + + u16 color; + u32 pixel = OBJLine[i]; + + if (pixel & 0x8000) + color = pixel & 0x7FFF; + else if (pixel & 0x1000) + color = pal[pixel & 0xFF]; + else + color = extpal[pixel & 0xFFF]; + + drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); + } + } + else + { + // optimized no-extpal version + + for (u32 i = 0; i < 256; i++) + { + if ((OBJLine[i] & 0x70000) != prio) continue; + if (!(WindowMask[i] & 0x10)) continue; + + u16 color; + u32 pixel = OBJLine[i]; + + if (pixel & 0x8000) + color = pixel & 0x7FFF; + else + color = pal[pixel & 0xFF]; + + drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); + } + } +} + +#define DoDrawSprite(type, ...) \ + if (iswin) \ + { \ + DrawSprite_##type(__VA_ARGS__); \ + } \ + else \ + { \ + DrawSprite_##type(__VA_ARGS__); \ + } + +void GPU2DSoft::DrawSprites(u32 line) +{ + if (line == 0) + { + // reset those counters here + // TODO: find out when those are supposed to be reset + // it would make sense to reset them at the end of VBlank + // however, sprites are rendered one scanline in advance + // so they need to be reset a bit earlier + + OBJMosaicY = 0; + OBJMosaicYCount = 0; + } + + if (Num == 0) + { + auto objDirty = GPU::VRAMDirty_AOBJ.DeriveState(GPU::VRAMMap_AOBJ); + GPU::MakeVRAMFlat_AOBJCoherent(objDirty); + } + else + { + auto objDirty = GPU::VRAMDirty_BOBJ.DeriveState(GPU::VRAMMap_BOBJ); + GPU::MakeVRAMFlat_BOBJCoherent(objDirty); + } + + NumSprites = 0; + memset(OBJLine, 0, 256*4); + memset(OBJWindow, 0, 256); + if (!(DispCnt & 0x1000)) return; + + memset(OBJIndex, 0xFF, 256); + + u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0]; + + const s32 spritewidth[16] = + { + 8, 16, 8, 8, + 16, 32, 8, 8, + 32, 32, 16, 8, + 64, 64, 32, 8 + }; + const s32 spriteheight[16] = + { + 8, 8, 16, 8, + 16, 8, 32, 8, + 32, 16, 32, 8, + 64, 32, 64, 8 + }; + + for (int bgnum = 0x0C00; bgnum >= 0x0000; bgnum -= 0x0400) + { + for (int sprnum = 127; sprnum >= 0; sprnum--) + { + u16* attrib = &oam[sprnum*4]; + + if ((attrib[2] & 0x0C00) != bgnum) + continue; + + bool iswin = (((attrib[0] >> 10) & 0x3) == 2); + + u32 sprline; + if ((attrib[0] & 0x1000) && !iswin) + { + // apply Y mosaic + sprline = OBJMosaicY; + } + else + sprline = line; + + if (attrib[0] & 0x0100) + { + u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); + s32 width = spritewidth[sizeparam]; + s32 height = spriteheight[sizeparam]; + s32 boundwidth = width; + s32 boundheight = height; + + if (attrib[0] & 0x0200) + { + boundwidth <<= 1; + boundheight <<= 1; + } + + u32 ypos = attrib[0] & 0xFF; + ypos = (sprline - ypos) & 0xFF; + if (ypos >= (u32)boundheight) + continue; + + s32 xpos = (s32)(attrib[1] << 23) >> 23; + if (xpos <= -boundwidth) + continue; + + u32 rotparamgroup = (attrib[1] >> 9) & 0x1F; + + DoDrawSprite(Rotscale, sprnum, boundwidth, boundheight, width, height, xpos, ypos); + + NumSprites++; + } + else + { + if (attrib[0] & 0x0200) + continue; + + u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); + s32 width = spritewidth[sizeparam]; + s32 height = spriteheight[sizeparam]; + + u32 ypos = attrib[0] & 0xFF; + ypos = (sprline - ypos) & 0xFF; + if (ypos >= (u32)height) + continue; + + s32 xpos = (s32)(attrib[1] << 23) >> 23; + if (xpos <= -width) + continue; + + DoDrawSprite(Normal, sprnum, width, height, xpos, ypos); + + NumSprites++; + } + } + } +} + +template +void GPU2DSoft::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos) +{ + u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0]; + u16* attrib = &oam[num * 4]; + u16* rotparams = &oam[(((attrib[1] >> 9) & 0x1F) * 16) + 3]; + + u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | 0xC0000; + u32 tilenum = attrib[2] & 0x03FF; + u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3); + + u32 ytilefactor; + + u8* objvram; + u32 objvrammask; + GetOBJVRAM(objvram, objvrammask); + + s32 centerX = boundwidth >> 1; + s32 centerY = boundheight >> 1; + + if ((attrib[0] & 0x1000) && !window) + { + // apply Y mosaic + pixelattr |= 0x100000; + } + + u32 xoff; + if (xpos >= 0) + { + xoff = 0; + if ((xpos+boundwidth) > 256) + boundwidth = 256-xpos; + } + else + { + xoff = -xpos; + xpos = 0; + } + + s16 rotA = (s16)rotparams[0]; + s16 rotB = (s16)rotparams[4]; + s16 rotC = (s16)rotparams[8]; + s16 rotD = (s16)rotparams[12]; + + s32 rotX = ((xoff-centerX) * rotA) + ((ypos-centerY) * rotB) + (width << 7); + s32 rotY = ((xoff-centerX) * rotC) + ((ypos-centerY) * rotD) + (height << 7); + + width <<= 8; + height <<= 8; + + u16 color = 0; // transparent in all cases + + if (spritemode == 3) + { + u32 alpha = attrib[2] >> 12; + if (!alpha) return; + alpha++; + + pixelattr |= (0xC0000000 | (alpha << 24)); + + u32 pixelsaddr; + if (DispCnt & 0x40) + { + if (DispCnt & 0x20) + { + // 'reserved' + // draws nothing + + return; + } + else + { + pixelsaddr = tilenum << (7 + ((DispCnt >> 22) & 0x1)); + ytilefactor = ((width >> 8) * 2); + } + } + else + { + if (DispCnt & 0x20) + { + pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); + ytilefactor = (256 * 2); + } + else + { + pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); + ytilefactor = (128 * 2); + } + } + + for (; xoff < boundwidth;) + { + if ((u32)rotX < width && (u32)rotY < height) + { + color = *(u16*)&objvram[(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)) & objvrammask]; + + if (color & 0x8000) + { + if (window) OBJWindow[xpos] = 1; + else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; } + } + else if (!window) + { + if (OBJLine[xpos] == 0) + { + OBJLine[xpos] = pixelattr & 0x180000; + OBJIndex[xpos] = num; + } + } + } + + rotX += rotA; + rotY += rotC; + xoff++; + xpos++; + } + } + else + { + u32 pixelsaddr = tilenum; + if (DispCnt & 0x10) + { + pixelsaddr <<= ((DispCnt >> 20) & 0x3); + ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0); + } + else + { + ytilefactor = 0x20; + } + + if (spritemode == 1) pixelattr |= 0x80000000; + else pixelattr |= 0x10000000; + + ytilefactor <<= 5; + pixelsaddr <<= 5; + + if (attrib[0] & 0x2000) + { + // 256-color + + if (!window) + { + if (!(DispCnt & 0x80000000)) + pixelattr |= 0x1000; + else + pixelattr |= ((attrib[2] & 0xF000) >> 4); + } + + for (; xoff < boundwidth;) + { + if ((u32)rotX < width && (u32)rotY < height) + { + color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)) & objvrammask]; + + if (color) + { + if (window) OBJWindow[xpos] = 1; + else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; } + } + else if (!window) + { + if (OBJLine[xpos] == 0) + { + OBJLine[xpos] = pixelattr & 0x180000; + OBJIndex[xpos] = num; + } + } + } + + rotX += rotA; + rotY += rotC; + xoff++; + xpos++; + } + } + else + { + // 16-color + if (!window) + { + pixelattr |= 0x1000; + pixelattr |= ((attrib[2] & 0xF000) >> 8); + } + + for (; xoff < boundwidth;) + { + if ((u32)rotX < width && (u32)rotY < height) + { + color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)) & objvrammask]; + if (rotX & 0x100) + color >>= 4; + else + color &= 0x0F; + + if (color) + { + if (window) OBJWindow[xpos] = 1; + else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; } + } + else if (!window) + { + if (OBJLine[xpos] == 0) + { + OBJLine[xpos] = pixelattr & 0x180000; + OBJIndex[xpos] = num; + } + } + } + + rotX += rotA; + rotY += rotC; + xoff++; + xpos++; + } + } + } +} + +template +void GPU2DSoft::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos) +{ + u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0]; + u16* attrib = &oam[num * 4]; + + u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | 0xC0000; + u32 tilenum = attrib[2] & 0x03FF; + u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3); + + u32 wmask = width - 8; // really ((width - 1) & ~0x7) + + if ((attrib[0] & 0x1000) && !window) + { + // apply Y mosaic + pixelattr |= 0x100000; + } + + u8* objvram; + u32 objvrammask; + GetOBJVRAM(objvram, objvrammask); + + // yflip + if (attrib[1] & 0x2000) + ypos = height-1 - ypos; + + u32 xoff; + u32 xend = width; + if (xpos >= 0) + { + xoff = 0; + if ((xpos+xend) > 256) + xend = 256-xpos; + } + else + { + xoff = -xpos; + xpos = 0; + } + + u16 color = 0; // transparent in all cases + + if (spritemode == 3) + { + // bitmap sprite + + u32 alpha = attrib[2] >> 12; + if (!alpha) return; + alpha++; + + pixelattr |= (0xC0000000 | (alpha << 24)); + + u32 pixelsaddr = tilenum; + if (DispCnt & 0x40) + { + if (DispCnt & 0x20) + { + // 'reserved' + // draws nothing + + return; + } + else + { + pixelsaddr <<= (7 + ((DispCnt >> 22) & 0x1)); + pixelsaddr += (ypos * width * 2); + } + } + else + { + if (DispCnt & 0x20) + { + pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); + pixelsaddr += (ypos * 256 * 2); + } + else + { + pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); + pixelsaddr += (ypos * 128 * 2); + } + } + + s32 pixelstride; + + if (attrib[1] & 0x1000) // xflip + { + pixelsaddr += (width-1 << 1); + pixelsaddr -= (xoff << 1); + pixelstride = -2; + } + else + { + pixelsaddr += (xoff << 1); + pixelstride = 2; + } + + for (; xoff < xend;) + { + color = *(u16*)&objvram[pixelsaddr & objvrammask]; + + pixelsaddr += pixelstride; + + if (color & 0x8000) + { + if (window) OBJWindow[xpos] = 1; + else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; } + } + else if (!window) + { + if (OBJLine[xpos] == 0) + { + OBJLine[xpos] = pixelattr & 0x180000; + OBJIndex[xpos] = num; + } + } + + xoff++; + xpos++; + } + } + else + { + u32 pixelsaddr = tilenum; + if (DispCnt & 0x10) + { + pixelsaddr <<= ((DispCnt >> 20) & 0x3); + pixelsaddr += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); + } + else + { + pixelsaddr += ((ypos >> 3) * 0x20); + } + + if (spritemode == 1) pixelattr |= 0x80000000; + else pixelattr |= 0x10000000; + + if (attrib[0] & 0x2000) + { + // 256-color + pixelsaddr <<= 5; + pixelsaddr += ((ypos & 0x7) << 3); + s32 pixelstride; + + if (!window) + { + if (!(DispCnt & 0x80000000)) + pixelattr |= 0x1000; + else + pixelattr |= ((attrib[2] & 0xF000) >> 4); + } + + if (attrib[1] & 0x1000) // xflip + { + pixelsaddr += (((width-1) & wmask) << 3); + pixelsaddr += ((width-1) & 0x7); + pixelsaddr -= ((xoff & wmask) << 3); + pixelsaddr -= (xoff & 0x7); + pixelstride = -1; + } + else + { + pixelsaddr += ((xoff & wmask) << 3); + pixelsaddr += (xoff & 0x7); + pixelstride = 1; + } + + for (; xoff < xend;) + { + color = objvram[pixelsaddr]; + + pixelsaddr += pixelstride; + + if (color) + { + if (window) OBJWindow[xpos] = 1; + else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; } + } + else if (!window) + { + if (OBJLine[xpos] == 0) + { + OBJLine[xpos] = pixelattr & 0x180000; + OBJIndex[xpos] = num; + } + } + + xoff++; + xpos++; + if (!(xoff & 0x7)) pixelsaddr += (56 * pixelstride); + } + } + else + { + // 16-color + pixelsaddr <<= 5; + pixelsaddr += ((ypos & 0x7) << 2); + s32 pixelstride; + + if (!window) + { + pixelattr |= 0x1000; + pixelattr |= ((attrib[2] & 0xF000) >> 8); + } + + // TODO: optimize VRAM access!! + // TODO: do xflip better? the 'two pixels per byte' thing makes it a bit shitty + + if (attrib[1] & 0x1000) // xflip + { + pixelsaddr += (((width-1) & wmask) << 2); + pixelsaddr += (((width-1) & 0x7) >> 1); + pixelsaddr -= ((xoff & wmask) << 2); + pixelsaddr -= ((xoff & 0x7) >> 1); + pixelstride = -1; + } + else + { + pixelsaddr += ((xoff & wmask) << 2); + pixelsaddr += ((xoff & 0x7) >> 1); + pixelstride = 1; + } + + for (; xoff < xend;) + { + if (attrib[1] & 0x1000) + { + if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] & 0x0F; pixelsaddr--; } + else color = objvram[pixelsaddr & objvrammask] >> 4; + } + else + { + if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] >> 4; pixelsaddr++; } + else color = objvram[pixelsaddr & objvrammask] & 0x0F; + } + + if (color) + { + if (window) OBJWindow[xpos] = 1; + else { OBJLine[xpos] = color | pixelattr; OBJIndex[xpos] = num; } + } + else if (!window) + { + if (OBJLine[xpos] == 0) + { + OBJLine[xpos] = pixelattr & 0x180000; + OBJIndex[xpos] = num; + } + } + + xoff++; + xpos++; + if (!(xoff & 0x7)) pixelsaddr += ((attrib[1] & 0x1000) ? -28 : 28); + } + } + } +} + +void GPU2DSoft::MosaicXSizeChanged() +{ + CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]]; + CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[1]]; +} \ No newline at end of file -- cgit v1.2.3 From d2cfd71c32b9b8d7b52249270ea124235d02de79 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 7 Dec 2020 18:45:50 +0100 Subject: rename the class as well this is getting emberassing --- src/GPU.cpp | 4 ++-- src/GPU2D.h | 6 ++--- src/GPU2D_Soft.cpp | 64 +++++++++++++++++++++++++++--------------------------- 3 files changed, 37 insertions(+), 37 deletions(-) (limited to 'src/GPU2D_Soft.cpp') diff --git a/src/GPU.cpp b/src/GPU.cpp index 3ae1d4d..a3b7934 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -144,8 +144,8 @@ u8 VRAMFlat_TexPal[128*1024]; bool Init() { - GPU2D_A = new GPU2DSoft(0); - GPU2D_B = new GPU2DSoft(1); + GPU2D_A = new GPU2D_Soft(0); + GPU2D_B = new GPU2D_Soft(1); if (!GPU3D::Init()) return false; FrontBuffer = 0; diff --git a/src/GPU2D.h b/src/GPU2D.h index 04dcd10..132a1bc 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -123,11 +123,11 @@ protected: virtual void MosaicXSizeChanged() = 0; }; -class GPU2DSoft : public GPU2D +class GPU2D_Soft : public GPU2D { public: - GPU2DSoft(u32 num); - ~GPU2DSoft() override {} + GPU2D_Soft(u32 num); + ~GPU2D_Soft() override {} void SetRenderSettings(bool accel) override; diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp index a7ad1d7..ecc5016 100644 --- a/src/GPU2D_Soft.cpp +++ b/src/GPU2D_Soft.cpp @@ -1,7 +1,7 @@ #include "GPU2D.h" #include "GPU.h" -GPU2DSoft::GPU2DSoft(u32 num) +GPU2D_Soft::GPU2D_Soft(u32 num) : GPU2D(num) { // initialize mosaic table @@ -15,12 +15,12 @@ GPU2DSoft::GPU2DSoft(u32 num) } } -void GPU2DSoft::SetRenderSettings(bool accel) +void GPU2D_Soft::SetRenderSettings(bool accel) { Accelerated = accel; } -u32 GPU2DSoft::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb) +u32 GPU2D_Soft::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb) { u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4; u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 4) & 0x007F00; @@ -33,7 +33,7 @@ u32 GPU2DSoft::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb) return r | g | b | 0xFF000000; } -u32 GPU2DSoft::ColorBlend5(u32 val1, u32 val2) +u32 GPU2D_Soft::ColorBlend5(u32 val1, u32 val2) { u32 eva = ((val1 >> 24) & 0x1F) + 1; u32 evb = 32 - eva; @@ -58,7 +58,7 @@ u32 GPU2DSoft::ColorBlend5(u32 val1, u32 val2) return r | g | b | 0xFF000000; } -u32 GPU2DSoft::ColorBrightnessUp(u32 val, u32 factor) +u32 GPU2D_Soft::ColorBrightnessUp(u32 val, u32 factor) { u32 rb = val & 0x3F003F; u32 g = val & 0x003F00; @@ -69,7 +69,7 @@ u32 GPU2DSoft::ColorBrightnessUp(u32 val, u32 factor) return rb | g | 0xFF000000; } -u32 GPU2DSoft::ColorBrightnessDown(u32 val, u32 factor) +u32 GPU2D_Soft::ColorBrightnessDown(u32 val, u32 factor) { u32 rb = val & 0x3F003F; u32 g = val & 0x003F00; @@ -80,7 +80,7 @@ u32 GPU2DSoft::ColorBrightnessDown(u32 val, u32 factor) return rb | g | 0xFF000000; } -u32 GPU2DSoft::ColorComposite(int i, u32 val1, u32 val2) +u32 GPU2D_Soft::ColorComposite(int i, u32 val1, u32 val2) { u32 coloreffect = 0; u32 eva, evb; @@ -150,7 +150,7 @@ u32 GPU2DSoft::ColorComposite(int i, u32 val1, u32 val2) return val1; } -void GPU2DSoft::DrawScanline(u32 line) +void GPU2D_Soft::DrawScanline(u32 line) { int stride = Accelerated ? (256*3 + 1) : 256; u32* dst = &Framebuffer[stride * line]; @@ -342,7 +342,7 @@ void GPU2DSoft::DrawScanline(u32 line) } } -void GPU2DSoft::VBlankEnd() +void GPU2D_Soft::VBlankEnd() { GPU2D::VBlankEnd(); @@ -357,7 +357,7 @@ void GPU2DSoft::VBlankEnd() #endif } -void GPU2DSoft::DoCapture(u32 line, u32 width) +void GPU2D_Soft::DoCapture(u32 line, u32 width) { u32 dstvram = (CaptureCnt >> 16) & 0x3; @@ -612,7 +612,7 @@ void GPU2DSoft::DoCapture(u32 line, u32 width) if (Accelerated) InterleaveSprites(prio); else InterleaveSprites(prio); template -void GPU2DSoft::DrawScanlineBGMode(u32 line) +void GPU2D_Soft::DrawScanlineBGMode(u32 line) { for (int i = 3; i >= 0; i--) { @@ -662,7 +662,7 @@ void GPU2DSoft::DrawScanlineBGMode(u32 line) } } -void GPU2DSoft::DrawScanlineBGMode6(u32 line) +void GPU2D_Soft::DrawScanlineBGMode6(u32 line) { for (int i = 3; i >= 0; i--) { @@ -686,7 +686,7 @@ void GPU2DSoft::DrawScanlineBGMode6(u32 line) } } -void GPU2DSoft::DrawScanlineBGMode7(u32 line) +void GPU2D_Soft::DrawScanlineBGMode7(u32 line) { // mode 7 only has text-mode BG0 and BG1 @@ -714,7 +714,7 @@ void GPU2DSoft::DrawScanlineBGMode7(u32 line) } } -void GPU2DSoft::DrawScanline_BGOBJ(u32 line) +void GPU2D_Soft::DrawScanline_BGOBJ(u32 line) { // forced blank disables BG/OBJ compositing if (DispCnt & (1<<7)) @@ -883,7 +883,7 @@ void GPU2DSoft::DrawScanline_BGOBJ(u32 line) } -void GPU2DSoft::DrawPixel_Normal(u32* dst, u16 color, u32 flag) +void GPU2D_Soft::DrawPixel_Normal(u32* dst, u16 color, u32 flag) { u8 r = (color & 0x001F) << 1; u8 g = (color & 0x03E0) >> 4; @@ -894,7 +894,7 @@ void GPU2DSoft::DrawPixel_Normal(u32* dst, u16 color, u32 flag) *dst = r | (g << 8) | (b << 16) | flag; } -void GPU2DSoft::DrawPixel_Accel(u32* dst, u16 color, u32 flag) +void GPU2D_Soft::DrawPixel_Accel(u32* dst, u16 color, u32 flag) { u8 r = (color & 0x001F) << 1; u8 g = (color & 0x03E0) >> 4; @@ -905,7 +905,7 @@ void GPU2DSoft::DrawPixel_Accel(u32* dst, u16 color, u32 flag) *dst = r | (g << 8) | (b << 16) | flag; } -void GPU2DSoft::DrawBG_3D() +void GPU2D_Soft::DrawBG_3D() { u16 xoff = BGXPos[0]; int i = 0; @@ -950,8 +950,8 @@ void GPU2DSoft::DrawBG_3D() } } -template -void GPU2DSoft::DrawBG_Text(u32 line, u32 bgnum) +template +void GPU2D_Soft::DrawBG_Text(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -1114,8 +1114,8 @@ void GPU2DSoft::DrawBG_Text(u32 line, u32 bgnum) } } -template -void GPU2DSoft::DrawBG_Affine(u32 line, u32 bgnum) +template +void GPU2D_Soft::DrawBG_Affine(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -1215,8 +1215,8 @@ void GPU2DSoft::DrawBG_Affine(u32 line, u32 bgnum) BGYRefInternal[bgnum-2] += rotD; } -template -void GPU2DSoft::DrawBG_Extended(u32 line, u32 bgnum) +template +void GPU2D_Soft::DrawBG_Extended(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -1436,8 +1436,8 @@ void GPU2DSoft::DrawBG_Extended(u32 line, u32 bgnum) BGYRefInternal[bgnum-2] += rotD; } -template -void GPU2DSoft::DrawBG_Large(u32 line) // BG is always BG2 +template +void GPU2D_Soft::DrawBG_Large(u32 line) // BG is always BG2 { u16 bgcnt = BGCnt[2]; @@ -1538,7 +1538,7 @@ void GPU2DSoft::DrawBG_Large(u32 line) // BG is always BG2 // * bit19: X mosaic should be applied here // * bit24-31: compositor flags -void GPU2DSoft::ApplySpriteMosaicX() +void GPU2D_Soft::ApplySpriteMosaicX() { // apply X mosaic if needed // X mosaic for sprites is applied after all sprites are rendered @@ -1562,8 +1562,8 @@ void GPU2DSoft::ApplySpriteMosaicX() } } -template -void GPU2DSoft::InterleaveSprites(u32 prio) +template +void GPU2D_Soft::InterleaveSprites(u32 prio) { u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; @@ -1621,7 +1621,7 @@ void GPU2DSoft::InterleaveSprites(u32 prio) DrawSprite_##type(__VA_ARGS__); \ } -void GPU2DSoft::DrawSprites(u32 line) +void GPU2D_Soft::DrawSprites(u32 line) { if (line == 0) { @@ -1746,7 +1746,7 @@ void GPU2DSoft::DrawSprites(u32 line) } template -void GPU2DSoft::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos) +void GPU2D_Soft::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos) { u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0]; u16* attrib = &oam[num * 4]; @@ -1964,7 +1964,7 @@ void GPU2DSoft::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u3 } template -void GPU2DSoft::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos) +void GPU2D_Soft::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos) { u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0]; u16* attrib = &oam[num * 4]; @@ -2220,7 +2220,7 @@ void GPU2DSoft::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 } } -void GPU2DSoft::MosaicXSizeChanged() +void GPU2D_Soft::MosaicXSizeChanged() { CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]]; CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[1]]; -- cgit v1.2.3 From e34ce013dfc10ccf80c34ef5b1f5f3c69059c1e2 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Wed, 9 Dec 2020 22:45:16 +0100 Subject: only start display capture on first line fixes Spearpillar in Pokemon D/P/Pt also fixes #782 and #474 --- src/GPU2D.cpp | 6 +++++- src/GPU2D.h | 1 + src/GPU2D_Soft.cpp | 5 ++++- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'src/GPU2D_Soft.cpp') diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index 030936e..d2a8b34 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -581,7 +581,11 @@ void GPU2D::UpdateMosaicCounters(u32 line) void GPU2D::VBlank() { - CaptureCnt &= ~(1<<31); + if (CaptureLatch) + { + CaptureCnt &= ~(1<<31); + CaptureLatch = false; + } DispFIFOReadPtr = 0; DispFIFOWritePtr = 0; diff --git a/src/GPU2D.h b/src/GPU2D.h index 132a1bc..0f59ae3 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -110,6 +110,7 @@ protected: u8 EVA, EVB; u8 EVY; + bool CaptureLatch; u32 CaptureCnt; u16 MasterBrightness; diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp index ecc5016..7345af9 100644 --- a/src/GPU2D_Soft.cpp +++ b/src/GPU2D_Soft.cpp @@ -213,6 +213,9 @@ void GPU2D_Soft::DrawScanline(u32 line) } } + if (line == 0 && CaptureCnt & (1 << 31)) + CaptureLatch = true; + // always render regular graphics DrawScanline_BGOBJ(line); UpdateMosaicCounters(line); @@ -278,7 +281,7 @@ void GPU2D_Soft::DrawScanline(u32 line) } // capture - if ((Num == 0) && (CaptureCnt & (1<<31))) + if ((Num == 0) && CaptureLatch) { u32 capwidth, capheight; switch ((CaptureCnt >> 20) & 0x3) -- cgit v1.2.3 From 66cec85a9a1e95c8fa76ac6ebf9f718cfafdf5bf Mon Sep 17 00:00:00 2001 From: Arisotura Date: Thu, 10 Dec 2020 19:12:08 +0100 Subject: GPU: forward BG0HOFS to internal rendering engine register for 3D layer scroll (only when the rendering engine is enabled). fixes #840 thank you RSDuck and Hydr8gon for your insight into this. --- src/GPU2D.cpp | 17 +++++++++++++---- src/GPU2D_Soft.cpp | 37 +++++++++++-------------------------- src/GPU3D.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++---- src/GPU3D.h | 4 ++++ src/GPU_OpenGL.cpp | 5 +++++ src/GPU_OpenGL_shaders.h | 9 ++++++--- 6 files changed, 81 insertions(+), 37 deletions(-) (limited to 'src/GPU2D_Soft.cpp') diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index d2a8b34..eb160d8 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -303,8 +303,14 @@ void GPU2D::Write8(u32 addr, u8 val) case 0x00E: BGCnt[3] = (BGCnt[3] & 0xFF00) | val; return; case 0x00F: BGCnt[3] = (BGCnt[3] & 0x00FF) | (val << 8); return; - case 0x010: BGXPos[0] = (BGXPos[0] & 0xFF00) | val; return; - case 0x011: BGXPos[0] = (BGXPos[0] & 0x00FF) | (val << 8); return; + case 0x010: + BGXPos[0] = (BGXPos[0] & 0xFF00) | val; + if (Num == 0) GPU3D::SetRenderXPos(BGXPos[0]); + return; + case 0x011: + BGXPos[0] = (BGXPos[0] & 0x00FF) | (val << 8); + if (Num == 0) GPU3D::SetRenderXPos(BGXPos[0]); + return; case 0x012: BGYPos[0] = (BGYPos[0] & 0xFF00) | val; return; case 0x013: BGYPos[0] = (BGYPos[0] & 0x00FF) | (val << 8); return; case 0x014: BGXPos[1] = (BGXPos[1] & 0xFF00) | val; return; @@ -401,7 +407,10 @@ void GPU2D::Write16(u32 addr, u16 val) case 0x00C: BGCnt[2] = val; return; case 0x00E: BGCnt[3] = val; return; - case 0x010: BGXPos[0] = val; return; + case 0x010: + BGXPos[0] = val; + if (Num == 0) GPU3D::SetRenderXPos(BGXPos[0]); + return; case 0x012: BGYPos[0] = val; return; case 0x014: BGXPos[1] = val; return; case 0x016: BGYPos[1] = val; return; @@ -716,4 +725,4 @@ void GPU2D::GetOBJVRAM(u8*& data, u32& mask) data = GPU::VRAMFlat_BOBJ; mask = 0x1FFFF; } -} \ No newline at end of file +} diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp index 7345af9..c686bad 100644 --- a/src/GPU2D_Soft.cpp +++ b/src/GPU2D_Soft.cpp @@ -403,7 +403,7 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) { // 3D on top, blending - u32 _3dval = _3DLine[val3 & 0xFF]; + u32 _3dval = _3DLine[i]; if ((_3dval >> 24) > 0) val1 = ColorBlend5(_3dval, val1); else @@ -413,7 +413,7 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) { // 3D on bottom, blending - u32 _3dval = _3DLine[val3 & 0xFF]; + u32 _3dval = _3DLine[i]; if ((_3dval >> 24) > 0) { u32 eva = (val3 >> 8) & 0x1F; @@ -428,7 +428,7 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) { // 3D on top, normal/fade - u32 _3dval = _3DLine[val3 & 0xFF]; + u32 _3dval = _3DLine[i]; if ((_3dval >> 24) > 0) { u32 evy = (val3 >> 8) & 0x1F; @@ -807,7 +807,7 @@ void GPU2D_Soft::DrawScanline_BGOBJ(u32 line) BGOBJLine[i] = val2; BGOBJLine[256+i] = ColorComposite(i, val2, val3); - BGOBJLine[512+i] = 0x04000000 | (val1 & 0xFF); + BGOBJLine[512+i] = 0x04000000; } else if ((flag1 & 0xC0) == 0x40) { @@ -819,7 +819,7 @@ void GPU2D_Soft::DrawScanline_BGOBJ(u32 line) BGOBJLine[i] = val2; BGOBJLine[256+i] = ColorComposite(i, val2, val3); - BGOBJLine[512+i] = (bldcnteffect << 24) | (EVY << 8) | (val1 & 0xFF); + BGOBJLine[512+i] = (bldcnteffect << 24) | (EVY << 8); } else if (((flag2 & 0xC0) == 0x40) && ((BlendCnt & 0x01C0) == 0x0140)) { @@ -842,7 +842,7 @@ void GPU2D_Soft::DrawScanline_BGOBJ(u32 line) BGOBJLine[i] = val1; BGOBJLine[256+i] = ColorComposite(i, val1, val3); - BGOBJLine[512+i] = (bldcnteffect << 24) | (EVB << 16) | (EVA << 8) | (val2 & 0xFF); + BGOBJLine[512+i] = (bldcnteffect << 24) | (EVB << 16) | (EVA << 8); } else { @@ -910,39 +910,24 @@ void GPU2D_Soft::DrawPixel_Accel(u32* dst, u16 color, u32 flag) void GPU2D_Soft::DrawBG_3D() { - u16 xoff = BGXPos[0]; int i = 0; - int iend = 256; - - if (xoff & 0x100) - { - i = (0x100 - (xoff & 0xFF)); - xoff += i; - } - if ((xoff - i + iend - 1) & 0x100) - { - iend -= (xoff & 0xFF); - } if (Accelerated) { - for (; i < iend; i++) + for (i = 0; i < 256; i++) { - int pos = xoff++; - if (!(WindowMask[i] & 0x01)) continue; BGOBJLine[i+512] = BGOBJLine[i+256]; BGOBJLine[i+256] = BGOBJLine[i]; - BGOBJLine[i] = 0x40000000 | pos; // 3D-layer placeholder + BGOBJLine[i] = 0x40000000; // 3D-layer placeholder } } else { - for (; i < iend; i++) + for (i = 0; i < 256; i++) { - u32 c = _3DLine[xoff]; - xoff++; + u32 c = _3DLine[i]; if ((c >> 24) == 0) continue; if (!(WindowMask[i] & 0x01)) continue; @@ -2227,4 +2212,4 @@ void GPU2D_Soft::MosaicXSizeChanged() { CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]]; CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[1]]; -} \ No newline at end of file +} diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index fd8d320..a02e286 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -181,6 +181,8 @@ u32 RenderClearAttr1, RenderClearAttr2; bool RenderFrameIdentical; +u16 RenderXPos; + u32 ZeroDotWLimit; u32 GXStat; @@ -385,6 +387,8 @@ void Reset() FlushAttributes = 0; ResetRenderingState(); + + RenderXPos = 0; } void DoSavestate(Savestate* file) @@ -430,6 +434,8 @@ void DoSavestate(Savestate* file) file->Var32(&RenderClearAttr1); file->Var32(&RenderClearAttr2); + file->Var16(&RenderXPos); + file->Var32(&ZeroDotWLimit); file->Var32(&GXStat); @@ -587,8 +593,6 @@ void DoSavestate(Savestate* file) } } - // probably not worth storing the vblank-latched Renderxxxxxx variables - CmdStallQueue->DoSavestate(file); file->Var32((u32*)&VertexPipeline); file->Var32((u32*)&NormalPipeline); @@ -2564,14 +2568,48 @@ void VCount215() #endif } +void SetRenderXPos(u16 xpos) +{ + if (!RenderingEnabled) return; + + RenderXPos = xpos & 0x01FF; +} + +u32 ScrolledLine[256]; + u32* GetLine(int line) { - if (GPU::Renderer == 0) return SoftRenderer::GetLine(line); + u32* rawline; + + if (GPU::Renderer == 0) rawline = SoftRenderer::GetLine(line); #ifdef OGLRENDERER_ENABLED - else return GLRenderer::GetLine(line); + else rawline = GLRenderer::GetLine(line); #else return NULL; #endif + + if (RenderXPos == 0) return rawline; + + // apply X scroll + + if (RenderXPos & 0x100) + { + int i = 0, j = RenderXPos; + for (; j < 512; i++, j++) + ScrolledLine[i] = 0; + for (j = 0; i < 256; i++, j++) + ScrolledLine[i] = rawline[j]; + } + else + { + int i = 0, j = RenderXPos; + for (; j < 256; i++, j++) + ScrolledLine[i] = rawline[j]; + for (; i < 256; i++) + ScrolledLine[i] = 0; + } + + return ScrolledLine; } diff --git a/src/GPU3D.h b/src/GPU3D.h index 0477c4f..69b67fa 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -89,6 +89,8 @@ extern u32 RenderClearAttr1, RenderClearAttr2; extern bool RenderFrameIdentical; +extern u16 RenderXPos; + extern std::array RenderPolygonRAM; extern u32 RenderNumPolygons; @@ -114,6 +116,8 @@ void CheckFIFODMA(); void VCount144(); void VBlank(); void VCount215(); + +void SetRenderXPos(u16 xpos); u32* GetLine(int line); void WriteToGXFIFO(u32 val); diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp index 359e9cd..0c6cf00 100644 --- a/src/GPU_OpenGL.cpp +++ b/src/GPU_OpenGL.cpp @@ -36,6 +36,7 @@ int ScreenH, ScreenW; GLuint CompShader[1][3]; GLuint CompScaleLoc[1]; +GLuint Comp3DXPosLoc[1]; GLuint CompVertexBufferID; GLuint CompVertexArrayID; @@ -64,6 +65,7 @@ bool Init() return false; CompScaleLoc[i] = glGetUniformLocation(CompShader[i][2], "u3DScale"); + Comp3DXPosLoc[i] = glGetUniformLocation(CompShader[i][2], "u3DXPos"); glUseProgram(CompShader[i][2]); uni_id = glGetUniformLocation(CompShader[i][2], "ScreenTex"); @@ -180,6 +182,9 @@ void RenderFrame() OpenGL::UseShaderProgram(CompShader[0]); glUniform1ui(CompScaleLoc[0], Scale); + // TODO: support setting this midframe, if ever needed + glUniform1i(Comp3DXPosLoc[0], ((int)GPU3D::RenderXPos << 23) >> 23); + int frontbuf = GPU::FrontBuffer; glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, CompScreenInputTex); diff --git a/src/GPU_OpenGL_shaders.h b/src/GPU_OpenGL_shaders.h index 20ac767..03ddb7a 100644 --- a/src/GPU_OpenGL_shaders.h +++ b/src/GPU_OpenGL_shaders.h @@ -40,6 +40,7 @@ void main() const char* kCompositorFS_Nearest = R"(#version 140 uniform uint u3DScale; +uniform int u3DXPos; uniform usampler2D ScreenTex; uniform sampler2D _3DTex; @@ -52,6 +53,8 @@ void main() { ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); + float _3dxpos = float(u3DXPos); + ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0)); int dispmode = mbright.b & 0x3; @@ -68,7 +71,7 @@ void main() { // 3D on top, blending - float xpos = val3.r + fract(fTexcoord.x); + float xpos = fTexcoord.x + _3dxpos; float ypos = mod(fTexcoord.y, 192); ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra * vec4(63,63,63,31)); @@ -89,7 +92,7 @@ void main() { // 3D on bottom, blending - float xpos = val3.r + fract(fTexcoord.x); + float xpos = fTexcoord.x + _3dxpos; float ypos = mod(fTexcoord.y, 192); ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra * vec4(63,63,63,31)); @@ -109,7 +112,7 @@ void main() { // 3D on top, normal/fade - float xpos = val3.r + fract(fTexcoord.x); + float xpos = fTexcoord.x + _3dxpos; float ypos = mod(fTexcoord.y, 192); ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra * vec4(63,63,63,31)); -- cgit v1.2.3