aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/GPU.cpp338
-rw-r--r--src/GPU.h87
-rw-r--r--src/GPU2D.cpp294
-rw-r--r--src/GPU2D.h6
-rw-r--r--src/GPU3D.cpp15
-rw-r--r--src/GPU3D.h2
-rw-r--r--src/GPU3D_Soft.cpp83
-rw-r--r--src/NonStupidBitfield.h149
-rw-r--r--src/Platform.h2
-rw-r--r--src/frontend/qt_sdl/Platform.cpp4
10 files changed, 765 insertions, 215 deletions
diff --git a/src/GPU.cpp b/src/GPU.cpp
index 7989750..e6b24e0 100644
--- a/src/GPU.cpp
+++ b/src/GPU.cpp
@@ -49,8 +49,8 @@ u8 VRAM_F[ 16*1024];
u8 VRAM_G[ 16*1024];
u8 VRAM_H[ 32*1024];
u8 VRAM_I[ 16*1024];
-u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
-u32 VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF};
+u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
+u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF};
u8 VRAMCNT[9];
u8 VRAMSTAT;
@@ -85,6 +85,62 @@ bool Accelerated;
GPU2D* GPU2D_A;
GPU2D* GPU2D_B;
+/*
+ VRAM invalidation tracking
+
+ - we want to know when a VRAM region used for graphics changed
+ - for some regions unmapping is mandatory to modify them (Texture, TexPal and ExtPal) and
+ we don't want to completely invalidate them every time they're unmapped and remapped
+
+ For this reason we don't track the dirtyness per mapping region, but instead per VRAM bank
+ with VRAMDirty. Writes to LCDC go directly into VRAMDirty, while writes via other mapping regions
+ like BG or OBJ are first tracked in VRAMWritten_* and need to be flushed using SyncDirtyFlags.
+
+ This is more or less a description of VRAMTrackingSet::DeriveState
+ Each time before the memory is read two things could have happened
+ to each 16kb piece (16kb is the smallest unit in which mappings can
+ be made thus also the size VRAMMap_* use):
+ - this piece was remapped compared to last time we checked,
+ which means this location in memory is invalid.
+ - this piece wasn't remapped, which means we need to check whether
+ it was changed. This can be archived by checking VRAMDirty.
+ VRAMDirty need to be reset for the respective VRAM bank.
+*/
+
+VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
+VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
+VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
+VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
+
+VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
+VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
+VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
+VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
+
+VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
+VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
+
+
+NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
+NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
+NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
+NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
+NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
+
+NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
+
+u8 VRAMFlat_ABG[512*1024];
+u8 VRAMFlat_BBG[128*1024];
+u8 VRAMFlat_AOBJ[256*1024];
+u8 VRAMFlat_BOBJ[128*1024];
+
+u8 VRAMFlat_ABGExtPal[32*1024];
+u8 VRAMFlat_BBGExtPal[32*1024];
+u8 VRAMFlat_AOBJExtPal[8*1024];
+u8 VRAMFlat_BOBJExtPal[8*1024];
+
+u8 VRAMFlat_Texture[512*1024];
+u8 VRAMFlat_TexPal[128*1024];
bool Init()
{
@@ -113,6 +169,30 @@ void DeInit()
if (Framebuffer[1][1]) delete[] Framebuffer[1][1];
}
+void ResetVRAMCache()
+{
+ for (int i = 0; i < 9; i++)
+ VRAMDirty[i] = NonStupidBitField<128*1024/VRAMDirtyGranularity>();
+
+ VRAMDirty_ABG.Reset();
+ VRAMDirty_BBG.Reset();
+ VRAMDirty_AOBJ.Reset();
+ VRAMDirty_BOBJ.Reset();
+ VRAMDirty_ABGExtPal.Reset();
+ VRAMDirty_BBGExtPal.Reset();
+ VRAMDirty_AOBJExtPal.Reset();
+ VRAMDirty_BOBJExtPal.Reset();
+
+ memset(VRAMFlat_ABG, 0, sizeof(VRAMFlat_ABG));
+ memset(VRAMFlat_BBG, 0, sizeof(VRAMFlat_BBG));
+ memset(VRAMFlat_AOBJ, 0, sizeof(VRAMFlat_AOBJ));
+ memset(VRAMFlat_BOBJ, 0, sizeof(VRAMFlat_BOBJ));
+ memset(VRAMFlat_ABGExtPal, 0, sizeof(VRAMFlat_ABGExtPal));
+ memset(VRAMFlat_BBGExtPal, 0, sizeof(VRAMFlat_BBGExtPal));
+ memset(VRAMFlat_AOBJExtPal, 0, sizeof(VRAMFlat_AOBJExtPal));
+ memset(VRAMFlat_BOBJExtPal, 0, sizeof(VRAMFlat_BOBJExtPal));
+}
+
void Reset()
{
VCount = 0;
@@ -186,6 +266,8 @@ void Reset()
GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]);
ResetRenderer();
+
+ ResetVRAMCache();
}
void Stop()
@@ -261,6 +343,8 @@ void DoSavestate(Savestate* file)
GPU2D_A->DoSavestate(file);
GPU2D_B->DoSavestate(file);
GPU3D::DoSavestate(file);
+
+ ResetVRAMCache();
}
void AssignFramebuffers()
@@ -411,18 +495,8 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
u8* GetUniqueBankPtr(u32 mask, u32 offset)
{
- if (!mask) return NULL;
-
- int num = 0;
- if (!(mask & 0xFF)) { mask >>= 8; num += 8; }
- else
- {
- if (!(mask & 0xF)) { mask >>= 4; num += 4; }
- if (!(mask & 0x3)) { mask >>= 2; num += 2; }
- if (!(mask & 0x1)) { mask >>= 1; num += 1; }
- }
- if (mask != 1) return NULL;
-
+ if (!mask || (mask & (mask - 1)) != 0) return NULL;
+ int num = __builtin_ctz(mask);
return &VRAM[num][offset & VRAMMask[num]];
}
@@ -606,8 +680,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 4: // ABG ext palette
UNMAP_RANGE(ABGExtPal, 0, 4);
- GPU2D_A->BGExtPalDirty(0);
- GPU2D_A->BGExtPalDirty(2);
break;
}
}
@@ -634,8 +706,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 4: // ABG ext palette
MAP_RANGE(ABGExtPal, 0, 4);
- GPU2D_A->BGExtPalDirty(0);
- GPU2D_A->BGExtPalDirty(2);
break;
}
}
@@ -687,12 +757,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 4: // ABG ext palette
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask;
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask;
- GPU2D_A->BGExtPalDirty((oldofs & 0x1) << 1);
break;
case 5: // AOBJ ext palette
VRAMMap_AOBJExtPal &= ~bankmask;
- GPU2D_A->OBJExtPalDirty();
break;
}
}
@@ -732,12 +800,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 4: // ABG ext palette
VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask;
VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask;
- GPU2D_A->BGExtPalDirty((ofs & 0x1) << 1);
break;
case 5: // AOBJ ext palette
VRAMMap_AOBJExtPal |= bankmask;
- GPU2D_A->OBJExtPalDirty();
break;
}
}
@@ -773,8 +839,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
case 2: // BBG ext palette
UNMAP_RANGE(BBGExtPal, 0, 4);
- GPU2D_B->BGExtPalDirty(0);
- GPU2D_B->BGExtPalDirty(2);
break;
}
}
@@ -800,8 +864,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
case 2: // BBG ext palette
MAP_RANGE(BBGExtPal, 0, 4);
- GPU2D_B->BGExtPalDirty(0);
- GPU2D_B->BGExtPalDirty(2);
break;
}
}
@@ -841,7 +903,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
case 3: // BOBJ ext palette
VRAMMap_BOBJExtPal &= ~bankmask;
- GPU2D_B->OBJExtPalDirty();
break;
}
}
@@ -871,7 +932,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
case 3: // BOBJ ext palette
VRAMMap_BOBJExtPal |= bankmask;
- GPU2D_B->OBJExtPalDirty();
break;
}
}
@@ -937,6 +997,8 @@ void StartHBlank(u32 line)
DispStat[0] |= (1<<1);
DispStat[1] |= (1<<1);
+ SyncDirtyFlags();
+
if (VCount < 192)
{
// draw
@@ -1096,4 +1158,224 @@ void SetVCount(u16 val)
NextVCount = val;
}
+template <u32 Size, u32 MappingGranularity>
+NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranularity>::DeriveState(u32* currentMappings)
+{
+ NonStupidBitField<Size/VRAMDirtyGranularity> result;
+ u16 banksToBeZeroed = 0;
+ for (u32 i = 0; i < Size / MappingGranularity; i++)
+ {
+ if (currentMappings[i] != Mapping[i])
+ {
+ result |= NonStupidBitField<Size/VRAMDirtyGranularity>(i*VRAMBitsPerMapping, VRAMBitsPerMapping);
+ banksToBeZeroed |= currentMappings[i];
+ Mapping[i] = currentMappings[i];
+ }
+ else
+ {
+ u32 mapping = Mapping[i];
+
+ banksToBeZeroed |= mapping;
+
+ while (mapping != 0)
+ {
+ u32 num = __builtin_ctz(mapping);
+ mapping &= ~(1 << num);
+
+ // hack for **speed**
+ // this could probably be done less ugly but then we would rely
+ // on the compiler for vectorisation
+ static_assert(VRAMDirtyGranularity == 512);
+ if (MappingGranularity == 16*1024)
+ {
+ u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)];
+ ((u32*)result.Data)[i] |= dirty;
+ }
+ else if (MappingGranularity == 8*1024)
+ {
+ u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)];
+ ((u16*)result.Data)[i] |= dirty;
+ }
+ else if (MappingGranularity == 128*1024)
+ {
+ ((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0];
+ ((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1];
+ ((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2];
+ ((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3];
+ }
+ else
+ {
+ // welp
+ abort();
+ }
+ }
+ }
+ }
+
+ while (banksToBeZeroed != 0)
+ {
+ u32 num = __builtin_ctz(banksToBeZeroed);
+ banksToBeZeroed &= ~(1 << num);
+ memset(VRAMDirty[num].Data, 0, sizeof(VRAMDirty[num].Data));
+ }
+
+ return result;
+}
+
+template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*);
+template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*);
+template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*);
+template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*);
+template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*);
+template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*);
+
+template <u32 Size>
+void SyncDirtyFlags(u32* mappings, NonStupidBitField<Size>& writtenFlags)
+{
+ const u32 VRAMWrittenBitsPer16KB = 16*1024/VRAMDirtyGranularity;
+
+ for (typename NonStupidBitField<Size>::Iterator it = writtenFlags.Begin(); it != writtenFlags.End(); it++)
+ {
+ u32 mapping = mappings[*it / VRAMWrittenBitsPer16KB];
+ while (mapping != 0)
+ {
+ u32 num = __builtin_ctz(mapping);
+
+ VRAMDirty[num][*it & (VRAMMask[num] / VRAMDirtyGranularity)] = true;
+
+ mapping &= ~(1 << num);
+ }
+ }
+ memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data));
+}
+
+void SyncDirtyFlags()
+{
+ SyncDirtyFlags(VRAMMap_ABG, VRAMWritten_ABG);
+ SyncDirtyFlags(VRAMMap_AOBJ, VRAMWritten_AOBJ);
+ SyncDirtyFlags(VRAMMap_BBG, VRAMWritten_BBG);
+ SyncDirtyFlags(VRAMMap_BOBJ, VRAMWritten_BOBJ);
+ SyncDirtyFlags(VRAMMap_ARM7, VRAMWritten_ARM7);
+}
+
+template <u32 MappingGranularity, u32 Size>
+inline bool CopyLinearVRAM(u8* flat, u32* mappings, NonStupidBitField<Size>& dirty, u64 (*slowAccess)(u32 addr))
+{
+ const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
+
+ bool change = false;
+
+ typename NonStupidBitField<Size>::Iterator it = dirty.Begin();
+ while (it != dirty.End())
+ {
+ u32 offset = *it * VRAMDirtyGranularity;
+ u8* dst = flat + offset;
+ u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset);
+ if (fastAccess)
+ {
+ memcpy(dst, fastAccess, VRAMDirtyGranularity);
+ }
+ else
+ {
+ for (u32 i = 0; i < VRAMDirtyGranularity; i += 8)
+ *(u64*)&dst[i] = slowAccess(offset + i);
+ }
+ change = true;
+ it++;
+ }
+ return change;
+}
+
+bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, ReadVRAM_Texture<u64>);
+}
+bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, ReadVRAM_TexPal<u64>);
+}
+
+bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, ReadVRAM_ABG<u64>);
+}
+bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, ReadVRAM_BBG<u64>);
+}
+
+bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, ReadVRAM_AOBJ<u64>);
+}
+bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, ReadVRAM_BOBJ<u64>);
}
+
+template<typename T>
+T ReadVRAM_ABGExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3];
+
+ T ret = 0;
+ if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF];
+ if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
+ if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
+
+ return ret;
+}
+
+template<typename T>
+T ReadVRAM_BBGExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3];
+
+ T ret = 0;
+ if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF];
+
+ return ret;
+}
+
+template<typename T>
+T ReadVRAM_AOBJExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_AOBJExtPal;
+
+ T ret = 0;
+ if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF];
+ if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF];
+
+ return ret;
+}
+
+template<typename T>
+T ReadVRAM_BOBJExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_BOBJExtPal;
+
+ T ret = 0;
+ if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF];
+
+ return ret;
+}
+
+bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, ReadVRAM_ABGExtPal<u64>);
+}
+bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, ReadVRAM_BBGExtPal<u64>);
+}
+
+bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, ReadVRAM_AOBJExtPal<u64>);
+}
+bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, ReadVRAM_BOBJExtPal<u64>);
+}
+
+} \ No newline at end of file
diff --git a/src/GPU.h b/src/GPU.h
index 1564ef7..2f71da6 100644
--- a/src/GPU.h
+++ b/src/GPU.h
@@ -20,6 +20,7 @@
#define GPU_H
#include "GPU2D.h"
+#include "NonStupidBitfield.h"
namespace GPU
{
@@ -45,7 +46,7 @@ extern u8 VRAM_G[ 16*1024];
extern u8 VRAM_H[ 32*1024];
extern u8 VRAM_I[ 16*1024];
-extern u8* VRAM[9];
+extern u8* const VRAM[9];
extern u32 VRAMMap_LCDC;
extern u32 VRAMMap_ABG[0x20];
@@ -73,6 +74,73 @@ extern GPU2D* GPU2D_B;
extern int Renderer;
+const u32 VRAMDirtyGranularity = 512;
+
+extern NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
+extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
+extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
+extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
+extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
+
+extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
+
+template <u32 Size, u32 MappingGranularity>
+struct VRAMTrackingSet
+{
+ u16 Mapping[Size / MappingGranularity];
+
+ const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
+
+ void Reset()
+ {
+ memset(Mapping, 0, sizeof(Mapping));
+ }
+ NonStupidBitField<Size/VRAMDirtyGranularity> DeriveState(u32* currentMappings);
+};
+
+extern VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
+extern VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
+extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
+extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
+
+extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
+extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
+extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
+extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
+
+extern VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
+extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
+
+extern u8 VRAMFlat_ABG[512*1024];
+extern u8 VRAMFlat_BBG[128*1024];
+extern u8 VRAMFlat_AOBJ[256*1024];
+extern u8 VRAMFlat_BOBJ[128*1024];
+
+extern u8 VRAMFlat_ABGExtPal[32*1024];
+extern u8 VRAMFlat_BBGExtPal[32*1024];
+
+extern u8 VRAMFlat_AOBJExtPal[8*1024];
+extern u8 VRAMFlat_BOBJExtPal[8*1024];
+
+extern u8 VRAMFlat_Texture[512*1024];
+extern u8 VRAMFlat_TexPal[128*1024];
+
+bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
+
+void SyncDirtyFlags();
typedef struct
{
@@ -233,7 +301,11 @@ void WriteVRAM_LCDC(u32 addr, T val)
default: return;
}
- if (VRAMMap_LCDC & (1<<bank)) *(T*)&VRAM[bank][addr] = val;
+ if (VRAMMap_LCDC & (1<<bank))
+ {
+ *(T*)&VRAM[bank][addr] = val;
+ VRAMDirty[bank][addr / VRAMDirtyGranularity] = true;
+ }
}
@@ -262,6 +334,8 @@ void WriteVRAM_ABG(u32 addr, T val)
{
u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F];
+ VRAMWritten_ABG[(addr & 0x7FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
@@ -295,6 +369,8 @@ void WriteVRAM_AOBJ(u32 addr, T val)
{
u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF];
+ VRAMWritten_AOBJ[(addr & 0x3FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val;
@@ -324,6 +400,8 @@ void WriteVRAM_BBG(u32 addr, T val)
{
u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7];
+ VRAMWritten_BBG[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
@@ -350,11 +428,12 @@ void WriteVRAM_BOBJ(u32 addr, T val)
{
u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7];
+ VRAMWritten_BOBJ[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
}
-
template<typename T>
T ReadVRAM_ARM7(u32 addr)
{
@@ -372,6 +451,8 @@ void WriteVRAM_ARM7(u32 addr, T val)
{
u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1];
+ VRAMWritten_ARM7[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
}
diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp
index 27aa608..07b5b21 100644
--- a/src/GPU2D.cpp
+++ b/src/GPU2D.cpp
@@ -148,12 +148,6 @@ void GPU2D::Reset()
CaptureCnt = 0;
MasterBrightness = 0;
-
- BGExtPalStatus[0] = 0;
- BGExtPalStatus[1] = 0;
- BGExtPalStatus[2] = 0;
- BGExtPalStatus[3] = 0;
- OBJExtPalStatus = 0;
}
void GPU2D::DoSavestate(Savestate* file)
@@ -208,13 +202,6 @@ void GPU2D::DoSavestate(Savestate* file)
if (!file->Saving)
{
- // refresh those
- BGExtPalStatus[0] = 0;
- BGExtPalStatus[1] = 0;
- BGExtPalStatus[2] = 0;
- BGExtPalStatus[3] = 0;
- OBJExtPalStatus = 0;
-
CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]];
CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]];
}
@@ -758,6 +745,25 @@ void GPU2D::DrawScanline(u32 line)
int n3dline = line;
line = GPU::VCount;
+ if (Num == 0)
+ {
+ auto bgDirty = GPU::VRAMDirty_ABG.DeriveState(GPU::VRAMMap_ABG);
+ GPU::MakeVRAMFlat_ABGCoherent(bgDirty);
+ auto bgExtPalDirty = GPU::VRAMDirty_ABGExtPal.DeriveState(GPU::VRAMMap_ABGExtPal);
+ GPU::MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty);
+ auto objExtPalDirty = GPU::VRAMDirty_AOBJExtPal.DeriveState(&GPU::VRAMMap_AOBJExtPal);
+ GPU::MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty);
+ }
+ else
+ {
+ auto bgDirty = GPU::VRAMDirty_BBG.DeriveState(GPU::VRAMMap_BBG);
+ GPU::MakeVRAMFlat_BBGCoherent(bgDirty);
+ auto bgExtPalDirty = GPU::VRAMDirty_BBGExtPal.DeriveState(GPU::VRAMMap_BBGExtPal);
+ GPU::MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty);
+ auto objExtPalDirty = GPU::VRAMDirty_BOBJExtPal.DeriveState(&GPU::VRAMMap_BOBJExtPal);
+ GPU::MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty);
+ }
+
bool forceblank = false;
// scanlines that end up outside of the GPU drawing range
@@ -970,6 +976,9 @@ void GPU2D::DoCapture(u32 line, u32 width)
u16* dst = (u16*)GPU::VRAM[dstvram];
u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width);
+ static_assert(GPU::VRAMDirtyGranularity == 512);
+ GPU::VRAMDirty[dstvram][(dstaddr & 0x1FFFF) / GPU::VRAMDirtyGranularity] = true;
+
// TODO: handle 3D in accelerated mode!!
u32* srcA;
@@ -1188,85 +1197,20 @@ void GPU2D::SampleFIFO(u32 offset, u32 num)
}
}
-
-void GPU2D::BGExtPalDirty(u32 base)
-{
- BGExtPalStatus[base] = 0;
- BGExtPalStatus[base+1] = 0;
-}
-
-void GPU2D::OBJExtPalDirty()
-{
- OBJExtPalStatus = 0;
-}
-
-
u16* GPU2D::GetBGExtPal(u32 slot, u32 pal)
{
- u16* dst = &BGExtPalCache[slot][pal << 8];
-
- if (!(BGExtPalStatus[slot] & (1<<pal)))
- {
- if (Num)
- {
- if (GPU::VRAMMap_BBGExtPal[slot] & (1<<7))
- memcpy(dst, &GPU::VRAM_H[(slot << 13) + (pal << 9)], 256*2);
- else
- memset(dst, 0, 256*2);
- }
- else
- {
- memset(dst, 0, 256*2);
-
- if (GPU::VRAMMap_ABGExtPal[slot] & (1<<4))
- for (int i = 0; i < 256; i+=2)
- *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_E[(slot << 13) + (pal << 9) + (i << 1)];
-
- if (GPU::VRAMMap_ABGExtPal[slot] & (1<<5))
- for (int i = 0; i < 256; i+=2)
- *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[((slot&1) << 13) + (pal << 9) + (i << 1)];
-
- if (GPU::VRAMMap_ABGExtPal[slot] & (1<<6))
- for (int i = 0; i < 256; i+=2)
- *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[((slot&1) << 13) + (pal << 9) + (i << 1)];
- }
-
- BGExtPalStatus[slot] |= (1<<pal);
- }
-
- return dst;
+ const u32 PaletteSize = 256 * 2;
+ const u32 SlotSize = PaletteSize * 16;
+ return (u16*)&(Num == 0
+ ? GPU::VRAMFlat_ABGExtPal
+ : GPU::VRAMFlat_BBGExtPal)[slot * SlotSize + pal * PaletteSize];
}
u16* GPU2D::GetOBJExtPal()
{
- u16* dst = OBJExtPalCache;
-
- if (!OBJExtPalStatus)
- {
- if (Num)
- {
- if (GPU::VRAMMap_BOBJExtPal & (1<<8))
- memcpy(dst, &GPU::VRAM_I[0], 16*256*2);
- else
- memset(dst, 0, 16*256*2);
- }
- else
- {
- memset(dst, 0, 16*256*2);
-
- if (GPU::VRAMMap_AOBJExtPal & (1<<5))
- for (int i = 0; i < 16*256; i+=2)
- *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[i << 1];
-
- if (GPU::VRAMMap_AOBJExtPal & (1<<6))
- for (int i = 0; i < 16*256; i+=2)
- *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[i << 1];
- }
-
- OBJExtPalStatus = 1;
- }
-
- return dst;
+ return Num == 0
+ ? (u16*)GPU::VRAMFlat_AOBJExtPal
+ : (u16*)GPU::VRAMFlat_BOBJExtPal;
}
@@ -1697,6 +1641,20 @@ void GPU2D::DrawBG_3D()
}
}
+void GetBGVRAM(u32 num, u8*& data, u32& mask)
+{
+ if (num == 0)
+ {
+ data = GPU::VRAMFlat_ABG;
+ mask = 0x7FFFF;
+ }
+ else
+ {
+ data = GPU::VRAMFlat_BBG;
+ mask = 0x1FFFF;
+ }
+}
+
template<bool mosaic, GPU2D::DrawPixel drawPixel>
void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
{
@@ -1720,17 +1678,20 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
extpal = (DispCnt & 0x40000000);
if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum;
+ u8* bgvram;
+ u32 bgvrammask;
+ GetBGVRAM(Num, bgvram, bgvrammask);
if (Num)
{
- tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0x400];
}
else
{
- tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0];
}
@@ -1758,7 +1719,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
// preload shit as needed
if ((xoff & 0x7) || mosaic)
{
- curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
+ curtile = *(u16*)&bgvram[(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)) & bgvrammask];
if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
else curpal = pal;
@@ -1779,7 +1740,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
(mosaic && ((xpos >> 3) != (lastxpos >> 3))))
{
// load a new tile
- curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3));
+ curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask];
if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
else curpal = pal;
@@ -1794,7 +1755,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
if (WindowMask[i] & (1<<bgnum))
{
u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7);
- color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff);
+ color = bgvram[(pixelsaddr + tilexoff) & bgvrammask];
if (color)
drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
@@ -1810,7 +1771,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
// preload shit as needed
if ((xoff & 0x7) || mosaic)
{
- curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
+ curtile = *(u16*)&bgvram[((tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3))) & bgvrammask];
curpal = pal + ((curtile & 0xF000) >> 8);
pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
+ (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
@@ -1828,7 +1789,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
(mosaic && ((xpos >> 3) != (lastxpos >> 3))))
{
// load a new tile
- curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3));
+ curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask];
curpal = pal + ((curtile & 0xF000) >> 8);
pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
+ (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
@@ -1842,11 +1803,11 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7);
if (tilexoff & 0x1)
{
- color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4;
+ color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] >> 4;
}
else
{
- color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F;
+ color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] & 0x0F;
}
if (color)
@@ -1895,17 +1856,20 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
rotY -= (BGMosaicY * rotD);
}
+ u8* bgvram;
+ u32 bgvrammask;
+
if (Num)
{
- tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0x400];
}
else
{
- tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0];
}
@@ -1934,13 +1898,13 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
if ((!((finalX|finalY) & overflowmask)))
{
- curtile = GPU::ReadVRAM_BG<u8>(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)));
+ curtile = bgvram[(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))) & bgvrammask];
// draw pixel
u32 tilexoff = (finalX >> 8) & 0x7;
u32 tileyoff = (finalY >> 8) & 0x7;
- color = GPU::ReadVRAM_BG<u8>(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff);
+ color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
if (color)
drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
@@ -1964,6 +1928,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
u16* pal;
u32 extpal;
+ u8* bgvram;
+ u32 bgvrammask;
+ GetBGVRAM(Num, bgvram, bgvrammask);
+
extpal = (DispCnt & 0x40000000);
s16 rotA = BGRotA[bgnum-2];
@@ -2007,8 +1975,8 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
ofymask = ~ymask;
}
- if (Num) tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 6);
- else tilemapaddr = 0x06000000 + ((bgcnt & 0x1F00) << 6);
+ if (Num) tilemapaddr = ((bgcnt & 0x1F00) << 6);
+ else tilemapaddr = ((bgcnt & 0x1F00) << 6);
if (bgcnt & 0x0004)
{
@@ -2035,7 +2003,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (!(finalX & ofxmask) && !(finalY & ofymask))
{
- color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1));
+ color = *(u16*)&bgvram[(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask];
if (color & 0x8000)
drawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum);
@@ -2074,7 +2042,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (!(finalX & ofxmask) && !(finalY & ofymask))
{
- color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8));
+ color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
if (color)
drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
@@ -2106,15 +2074,15 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (Num)
{
- tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0x400];
}
else
{
- tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0];
}
@@ -2144,7 +2112,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if ((!((finalX|finalY) & overflowmask)))
{
- curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1));
+ curtile = *(u16*)&bgvram[(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)) & bgvrammask];
if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12);
else curpal = pal;
@@ -2156,7 +2124,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (curtile & 0x0400) tilexoff = 7-tilexoff;
if (curtile & 0x0800) tileyoff = 7-tileyoff;
- color = GPU::ReadVRAM_BG<u8>(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff);
+ color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
if (color)
drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
@@ -2222,8 +2190,9 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2
rotY -= (BGMosaicY * rotD);
}
- if (Num) tilemapaddr = 0x06200000;
- else tilemapaddr = 0x06000000;
+ u8* bgvram;
+ u32 bgvrammask;
+ GetBGVRAM(Num, bgvram, bgvrammask);
// 256-color bitmap
@@ -2251,7 +2220,7 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2
if (!(finalX & ofxmask) && !(finalY & ofymask))
{
- color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8));
+ color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
if (color)
drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2);
@@ -2346,6 +2315,20 @@ void GPU2D::InterleaveSprites(u32 prio)
}
}
+void GetOBJVRAM(u32 num, u8*& data, u32& mask)
+{
+ if (num == 0)
+ {
+ data = GPU::VRAMFlat_AOBJ;
+ mask = 0x3FFFF;
+ }
+ else
+ {
+ data = GPU::VRAMFlat_BOBJ;
+ mask = 0x1FFFF;
+ }
+}
+
#define DoDrawSprite(type, ...) \
if (iswin) \
{ \
@@ -2370,6 +2353,17 @@ void GPU2D::DrawSprites(u32 line)
OBJMosaicYCount = 0;
}
+ if (Num == 0)
+ {
+ auto objDirty = GPU::VRAMDirty_AOBJ.DeriveState(GPU::VRAMMap_AOBJ);
+ GPU::MakeVRAMFlat_AOBJCoherent(objDirty);
+ }
+ else
+ {
+ auto objDirty = GPU::VRAMDirty_BOBJ.DeriveState(GPU::VRAMMap_BOBJ);
+ GPU::MakeVRAMFlat_BOBJCoherent(objDirty);
+ }
+
NumSprites = 0;
memset(OBJLine, 0, 256*4);
memset(OBJWindow, 0, 256);
@@ -2482,6 +2476,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
u32 ytilefactor;
+ u8* objvram;
+ u32 objvrammask;
+ GetOBJVRAM(Num, objvram, objvrammask);
+
s32 centerX = boundwidth >> 1;
s32 centerY = boundheight >> 1;
@@ -2525,6 +2523,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
pixelattr |= (0xC0000000 | (alpha << 24));
+ u32 pixelsaddr;
if (DispCnt & 0x40)
{
if (DispCnt & 0x20)
@@ -2536,7 +2535,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
}
else
{
- tilenum <<= (7 + ((DispCnt >> 22) & 0x1));
+ pixelsaddr = tilenum << (7 + ((DispCnt >> 22) & 0x1));
ytilefactor = ((width >> 8) * 2);
}
}
@@ -2544,23 +2543,21 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
{
if (DispCnt & 0x20)
{
- tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
+ pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
ytilefactor = (256 * 2);
}
else
{
- tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
+ pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
ytilefactor = (128 * 2);
}
}
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
-
for (; xoff < boundwidth;)
{
if ((u32)rotX < width && (u32)rotY < height)
{
- color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1));
+ color = *(u16*)&objvram[(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)) & objvrammask];
if (color & 0x8000)
{
@@ -2585,9 +2582,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
}
else
{
+ u32 pixelsaddr = tilenum;
if (DispCnt & 0x10)
{
- tilenum <<= ((DispCnt >> 20) & 0x3);
+ pixelsaddr <<= ((DispCnt >> 20) & 0x3);
ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0);
}
else
@@ -2601,9 +2599,8 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
if (attrib[0] & 0x2000)
{
// 256-color
- tilenum <<= 5;
ytilefactor <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
+ pixelsaddr <<= 5;
if (!window)
{
@@ -2617,7 +2614,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
{
if ((u32)rotX < width && (u32)rotY < height)
{
- color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8));
+ color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)) & objvrammask];
if (color)
{
@@ -2657,7 +2654,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
{
if ((u32)rotX < width && (u32)rotY < height)
{
- color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9));
+ color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)) & objvrammask];
if (rotX & 0x100)
color >>= 4;
else
@@ -2705,6 +2702,10 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
pixelattr |= 0x100000;
}
+ u8* objvram;
+ u32 objvrammask;
+ GetOBJVRAM(Num, objvram, objvrammask);
+
// yflip
if (attrib[1] & 0x2000)
ypos = height-1 - ypos;
@@ -2735,6 +2736,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
pixelattr |= (0xC0000000 | (alpha << 24));
+ u32 pixelsaddr = tilenum;
if (DispCnt & 0x40)
{
if (DispCnt & 0x20)
@@ -2746,25 +2748,24 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
}
else
{
- tilenum <<= (7 + ((DispCnt >> 22) & 0x1));
- tilenum += (ypos * width * 2);
+ pixelsaddr <<= (7 + ((DispCnt >> 22) & 0x1));
+ pixelsaddr += (ypos * width * 2);
}
}
else
{
if (DispCnt & 0x20)
{
- tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
- tilenum += (ypos * 256 * 2);
+ pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
+ pixelsaddr += (ypos * 256 * 2);
}
else
{
- tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
- tilenum += (ypos * 128 * 2);
+ pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
+ pixelsaddr += (ypos * 128 * 2);
}
}
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
s32 pixelstride;
if (attrib[1] & 0x1000) // xflip
@@ -2781,7 +2782,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
for (; xoff < xend;)
{
- color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr);
+ color = *(u16*)&objvram[pixelsaddr & objvrammask];
pixelsaddr += pixelstride;
@@ -2805,14 +2806,15 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
}
else
{
+ u32 pixelsaddr = tilenum;
if (DispCnt & 0x10)
{
- tilenum <<= ((DispCnt >> 20) & 0x3);
- tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0);
+ pixelsaddr <<= ((DispCnt >> 20) & 0x3);
+ pixelsaddr += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0);
}
else
{
- tilenum += ((ypos >> 3) * 0x20);
+ pixelsaddr += ((ypos >> 3) * 0x20);
}
if (spritemode == 1) pixelattr |= 0x80000000;
@@ -2821,8 +2823,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
if (attrib[0] & 0x2000)
{
// 256-color
- tilenum <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
+ pixelsaddr <<= 5;
pixelsaddr += ((ypos & 0x7) << 3);
s32 pixelstride;
@@ -2851,7 +2852,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
for (; xoff < xend;)
{
- color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr);
+ color = objvram[pixelsaddr];
pixelsaddr += pixelstride;
@@ -2877,8 +2878,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
else
{
// 16-color
- tilenum <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
+ pixelsaddr <<= 5;
pixelsaddr += ((ypos & 0x7) << 2);
s32 pixelstride;
@@ -2910,13 +2910,13 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
{
if (attrib[1] & 0x1000)
{
- if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F; pixelsaddr--; }
- else color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4;
+ if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] & 0x0F; pixelsaddr--; }
+ else color = objvram[pixelsaddr & objvrammask] >> 4;
}
else
{
- if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4; pixelsaddr++; }
- else color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F;
+ if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] >> 4; pixelsaddr++; }
+ else color = objvram[pixelsaddr & objvrammask] & 0x0F;
}
if (color)
diff --git a/src/GPU2D.h b/src/GPU2D.h
index 469d6a2..db15adc 100644
--- a/src/GPU2D.h
+++ b/src/GPU2D.h
@@ -59,9 +59,6 @@ public:
void CheckWindows(u32 line);
- void BGExtPalDirty(u32 base);
- void OBJExtPalDirty();
-
u16* GetBGExtPal(u32 slot, u32 pal);
u16* GetOBJExtPal();
@@ -128,9 +125,6 @@ private:
u16 MasterBrightness;
u16 BGExtPalCache[4][16*256];
- u16 OBJExtPalCache[16*256];
- u32 BGExtPalStatus[4];
- u32 OBJExtPalStatus;
u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
u32 ColorBlend5(u32 val1, u32 val2);
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
index 74debfe..4e6ac42 100644
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@@ -179,6 +179,8 @@ u8 RenderFogDensityTable[34];
u32 RenderClearAttr1, RenderClearAttr2;
+bool RenderFrameIdentical;
+
u32 ZeroDotWLimit;
u32 GXStat;
@@ -2491,6 +2493,19 @@ void VBlank()
}
RenderNumPolygons = NumPolygons;
+ RenderFrameIdentical = false;
+ }
+ else
+ {
+ RenderFrameIdentical = RenderDispCnt == DispCnt
+ && RenderAlphaRef == AlphaRef
+ && RenderClearAttr1 == ClearAttr1
+ && RenderClearAttr2 == ClearAttr2
+ && RenderFogColor == FogColor
+ && RenderFogOffset == FogOffset * 0x200
+ && memcmp(RenderEdgeTable, EdgeTable, 8*2) == 0
+ && memcmp(RenderFogDensityTable + 1, FogDensityTable, 32) == 0
+ && memcmp(RenderToonTable, ToonTable, 32*2) == 0;
}
RenderDispCnt = DispCnt;
diff --git a/src/GPU3D.h b/src/GPU3D.h
index c69adde..0477c4f 100644
--- a/src/GPU3D.h
+++ b/src/GPU3D.h
@@ -87,6 +87,8 @@ extern u8 RenderFogDensityTable[34];
extern u32 RenderClearAttr1, RenderClearAttr2;
+extern bool RenderFrameIdentical;
+
extern std::array<Polygon*,2048> RenderPolygonRAM;
extern u32 RenderNumPolygons;
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index 7ee9e5d..d66eb76 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -58,6 +58,8 @@ bool PrevIsShadowMask;
bool Enabled;
+bool FrameIdentical;
+
// threading
bool Threaded;
@@ -550,6 +552,16 @@ typedef struct
RendererPolygon PolygonList[2048];
+template <typename T>
+inline T ReadVRAM_Texture(u32 addr)
+{
+ return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF];
+}
+template <typename T>
+inline T ReadVRAM_TexPal(u32 addr)
+{
+ return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF];
+}
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
{
@@ -606,10 +618,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 1: // A3I5
{
vramaddr += ((t * width) + s);
- u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
+ *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
*alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
}
break;
@@ -617,12 +629,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 2: // 4-color
{
vramaddr += (((t * width) + s) >> 2);
- u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
pixel >>= ((s & 0x3) << 1);
pixel &= 0x3;
texpal <<= 3;
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
+ *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;
@@ -630,12 +642,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 3: // 16-color
{
vramaddr += (((t * width) + s) >> 1);
- u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
if (s & 0x1) pixel >>= 4;
else pixel &= 0xF;
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
+ *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;
@@ -643,10 +655,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 4: // 256-color
{
vramaddr += ((t * width) + s);
- u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
+ *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;
@@ -660,30 +672,30 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
if (vramaddr >= 0x40000)
slot1addr += 0x10000;
- u8 val = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 val = ReadVRAM_Texture<u8>(vramaddr);
val >>= (2 * (s & 0x3));
- u16 palinfo = GPU::ReadVRAM_Texture<u16>(slot1addr);
+ u16 palinfo = ReadVRAM_Texture<u16>(slot1addr);
u32 paloffset = (palinfo & 0x3FFF) << 2;
texpal <<= 4;
switch (val & 0x3)
{
case 0:
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
+ *color = ReadVRAM_TexPal<u16>(texpal + paloffset);
*alpha = 31;
break;
case 1:
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+ *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
*alpha = 31;
break;
case 2:
if ((palinfo >> 14) == 1)
{
- u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
- u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+ u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
+ u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
@@ -700,8 +712,8 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
}
else if ((palinfo >> 14) == 3)
{
- u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
- u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+ u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
+ u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
@@ -717,20 +729,20 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
*color = r | g | b;
}
else
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 4);
+ *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 4);
*alpha = 31;
break;
case 3:
if ((palinfo >> 14) == 2)
{
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 6);
+ *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 6);
*alpha = 31;
}
else if ((palinfo >> 14) == 3)
{
- u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
- u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+ u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
+ u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
@@ -759,10 +771,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 6: // A5I3
{
vramaddr += ((t * width) + s);
- u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1));
+ *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1));
*alpha = (pixel >> 3);
}
break;
@@ -770,7 +782,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 7: // direct color
{
vramaddr += (((t * width) + s) << 1);
- *color = GPU::ReadVRAM_Texture<u16>(vramaddr);
+ *color = ReadVRAM_Texture<u16>(vramaddr);
*alpha = (*color & 0x8000) ? 31 : 0;
}
break;
@@ -2007,8 +2019,8 @@ void ClearBuffers()
{
for (int x = 0; x < 256; x++)
{
- u16 val2 = GPU::ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
- u16 val3 = GPU::ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
+ u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
+ u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
// TODO: confirm color conversion
u32 r = (val2 << 1) & 0x3E; if (r) r++;
@@ -2088,11 +2100,19 @@ void VCount144()
void RenderFrame()
{
+ auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture);
+ auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal);
+
+ bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty);
+ bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty);
+
+ FrameIdentical = !(textureChanged || texPalChanged) && RenderFrameIdentical;
+
if (RenderThreadRunning)
{
Platform::Semaphore_Post(Sema_RenderStart);
}
- else
+ else if (!FrameIdentical)
{
ClearBuffers();
RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons);
@@ -2107,8 +2127,15 @@ void RenderThreadFunc()
if (!RenderThreadRunning) return;
RenderThreadRendering = true;
- ClearBuffers();
- RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
+ if (FrameIdentical)
+ {
+ Platform::Semaphore_Post(Sema_ScanlineCount, 192);
+ }
+ else
+ {
+ ClearBuffers();
+ RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
+ }
Platform::Semaphore_Post(Sema_RenderDone);
RenderThreadRendering = false;
diff --git a/src/NonStupidBitfield.h b/src/NonStupidBitfield.h
new file mode 100644
index 0000000..124ba76
--- /dev/null
+++ b/src/NonStupidBitfield.h
@@ -0,0 +1,149 @@
+#ifndef NONSTUPIDBITFIELD_H
+#define NONSTUPIDBITFIELD_H
+
+#include "types.h"
+
+#include <memory.h>
+
+#include <initializer_list>
+#include <algorithm>
+
+// like std::bitset but less stupid and optimised for
+// our use case (keeping track of memory invalidations)
+
+template <u32 Size>
+struct NonStupidBitField
+{
+ static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8");
+ static const u32 DataLength = Size / 8;
+ u8 Data[DataLength];
+
+ struct Ref
+ {
+ NonStupidBitField<Size>& BitField;
+ u32 Idx;
+
+ operator bool()
+ {
+ return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7));
+ }
+
+ Ref& operator=(bool set)
+ {
+ BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7));
+ BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7));
+ return *this;
+ }
+ };
+
+ struct Iterator
+ {
+ NonStupidBitField<Size>& BitField;
+ u32 DataIdx;
+ u32 BitIdx;
+ u64 RemainingBits;
+
+ u32 operator*() { return DataIdx * 8 + BitIdx; }
+
+ bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; }
+ bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; }
+
+ template <typename T>
+ void Next()
+ {
+ while (RemainingBits == 0 && DataIdx < DataLength)
+ {
+ DataIdx += sizeof(T);
+ RemainingBits = *(T*)&BitField.Data[DataIdx];
+ }
+
+ BitIdx = __builtin_ctzll(RemainingBits);
+ RemainingBits &= ~(1ULL << BitIdx);
+ }
+
+ Iterator operator++(int)
+ {
+ Iterator prev(*this);
+ ++*this;
+ return prev;
+ }
+
+ Iterator& operator++()
+ {
+ if ((DataLength % 8) == 0)
+ Next<u64>();
+ else if ((DataLength % 4) == 0)
+ Next<u32>();
+ else if ((DataLength % 2) == 0)
+ Next<u16>();
+ else
+ Next<u8>();
+
+ return *this;
+ }
+ };
+
+ NonStupidBitField(u32 start, u32 size)
+ {
+ memset(Data, 0, sizeof(Data));
+
+ if (size == 0)
+ return;
+
+ u32 roundedStartBit = (start + 7) & ~7;
+ u32 roundedEndBit = (start + size) & ~7;
+ if (roundedStartBit != roundedEndBit)
+ memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8);
+
+ if (start & 0x7)
+ Data[start >> 3] = 0xFF << (start & 0x7);
+ if ((start + size) & 0x7)
+ Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7);
+ }
+
+ NonStupidBitField()
+ {
+ memset(Data, 0, sizeof(Data));
+ }
+
+ Iterator End()
+ {
+ return Iterator{*this, DataLength, 0, 0};
+ }
+ Iterator Begin()
+ {
+ if ((DataLength % 8) == 0)
+ return ++Iterator{*this, 0, 0, *(u64*)Data};
+ else if ((DataLength % 4) == 0)
+ return ++Iterator{*this, 0, 0, *(u32*)Data};
+ else if ((DataLength % 2) == 0)
+ return ++Iterator{*this, 0, 0, *(u16*)Data};
+ else
+ return ++Iterator{*this, 0, 0, *Data};
+ }
+
+ Ref operator[](u32 idx)
+ {
+ return Ref{*this, idx};
+ }
+
+ NonStupidBitField& operator|=(const NonStupidBitField<Size>& other)
+ {
+ for (u32 i = 0; i < DataLength; i++)
+ {
+ Data[i] |= other.Data[i];
+ }
+ return *this;
+ }
+ NonStupidBitField& operator&=(const NonStupidBitField<Size>& other)
+ {
+ for (u32 i = 0; i < DataLength; i++)
+ {
+ Data[i] &= other.Data[i];
+ }
+ return *this;
+ }
+};
+
+
+#endif \ No newline at end of file
diff --git a/src/Platform.h b/src/Platform.h
index deb3785..b4dda9e 100644
--- a/src/Platform.h
+++ b/src/Platform.h
@@ -77,7 +77,7 @@ Semaphore* Semaphore_Create();
void Semaphore_Free(Semaphore* sema);
void Semaphore_Reset(Semaphore* sema);
void Semaphore_Wait(Semaphore* sema);
-void Semaphore_Post(Semaphore* sema);
+void Semaphore_Post(Semaphore* sema, int count = 1);
struct Mutex;
Mutex* Mutex_Create();
diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp
index a51a985..d3480e4 100644
--- a/src/frontend/qt_sdl/Platform.cpp
+++ b/src/frontend/qt_sdl/Platform.cpp
@@ -230,9 +230,9 @@ void Semaphore_Wait(Semaphore* sema)
((QSemaphore*) sema)->acquire();
}
-void Semaphore_Post(Semaphore* sema)
+void Semaphore_Post(Semaphore* sema, int count)
{
- ((QSemaphore*) sema)->release();
+ ((QSemaphore*) sema)->release(count);
}
Mutex* Mutex_Create()