aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorArisotura <thetotalworm@gmail.com>2020-12-04 18:28:15 +0100
committerArisotura <thetotalworm@gmail.com>2020-12-04 18:28:15 +0100
commit129018a6626cbec915ef73484c51c9d07af8e8b9 (patch)
tree8e5712bd241319731f7b6ade3363cefe845e3a5e /src
parent6aad429383015a0ac135b081931ae9c5876a7ad0 (diff)
parent42e083960e52cce31589714dcc7fab8e173efb81 (diff)
Merge remote-tracking branch 'remotes/origin/master' into dsi_camera
Diffstat (limited to 'src')
-rw-r--r--src/ARMJIT.cpp17
-rw-r--r--src/ARMJIT_A64/ARMJIT_Compiler.cpp9
-rw-r--r--src/ARMJIT_A64/ARMJIT_Compiler.h5
-rw-r--r--src/ARMJIT_A64/ARMJIT_Linkage.S (renamed from src/ARMJIT_A64/ARMJIT_Linkage.s)0
-rw-r--r--src/ARMJIT_A64/ARMJIT_LoadStore.cpp17
-rw-r--r--src/ARMJIT_Internal.h4
-rw-r--r--src/ARMJIT_Memory.cpp273
-rw-r--r--src/ARMJIT_x64/ARMJIT_Branch.cpp16
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp24
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.h4
-rw-r--r--src/ARMJIT_x64/ARMJIT_Linkage.S (renamed from src/ARMJIT_x64/ARMJIT_Linkage.s)10
-rw-r--r--src/ARMJIT_x64/ARMJIT_LoadStore.cpp23
-rw-r--r--src/CMakeLists.txt15
-rw-r--r--src/Config.cpp6
-rw-r--r--src/DMA.cpp55
-rw-r--r--src/DMA.h8
-rw-r--r--src/DSi.cpp24
-rw-r--r--src/GPU.cpp338
-rw-r--r--src/GPU.h87
-rw-r--r--src/GPU2D.cpp369
-rw-r--r--src/GPU2D.h18
-rw-r--r--src/GPU3D.cpp15
-rw-r--r--src/GPU3D.h2
-rw-r--r--src/GPU3D_OpenGL.cpp66
-rw-r--r--src/GPU3D_Soft.cpp91
-rw-r--r--src/NDS.cpp51
-rw-r--r--src/NDSCart.cpp27
-rw-r--r--src/NDSCart.h3
-rw-r--r--src/NonStupidBitfield.h149
-rw-r--r--src/OpenGLSupport.h15
-rw-r--r--src/Platform.h27
-rw-r--r--src/SPU.cpp296
-rw-r--r--src/SPU.h25
-rw-r--r--src/frontend/SharedConfig.h13
-rw-r--r--src/frontend/Util_ROM.cpp2
-rw-r--r--src/frontend/qt_sdl/CMakeLists.txt15
-rw-r--r--src/frontend/qt_sdl/EmuSettingsDialog.cpp7
-rw-r--r--src/frontend/qt_sdl/InputConfigDialog.cpp1
-rw-r--r--src/frontend/qt_sdl/LAN_PCap.cpp21
-rw-r--r--src/frontend/qt_sdl/Platform.cpp47
-rw-r--r--src/frontend/qt_sdl/PlatformConfig.cpp2
-rw-r--r--src/frontend/qt_sdl/WifiSettingsDialog.cpp18
-rw-r--r--src/frontend/qt_sdl/WifiSettingsDialog.h3
-rw-r--r--src/frontend/qt_sdl/WifiSettingsDialog.ui121
-rw-r--r--src/frontend/qt_sdl/main.cpp68
45 files changed, 1658 insertions, 749 deletions
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp
index c9d2b62..1921f13 100644
--- a/src/ARMJIT.cpp
+++ b/src/ARMJIT.cpp
@@ -176,7 +176,7 @@ T SlowRead9(u32 addr, ARMv5* cpu)
}
template <typename T, int ConsoleType>
-void SlowWrite9(u32 addr, ARMv5* cpu, T val)
+void SlowWrite9(u32 addr, ARMv5* cpu, u32 val)
{
addr &= ~(sizeof(T) - 1);
@@ -224,7 +224,7 @@ T SlowRead7(u32 addr)
}
template <typename T, int ConsoleType>
-void SlowWrite7(u32 addr, T val)
+void SlowWrite7(u32 addr, u32 val)
{
addr &= ~(sizeof(T) - 1);
@@ -266,16 +266,16 @@ void SlowBlockTransfer7(u32 addr, u64* data, u32 num)
#define INSTANTIATE_SLOWMEM(consoleType) \
template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \
- template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u16); \
- template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u8); \
+ template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u32); \
+ template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u32); \
\
template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \
template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \
template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \
\
template void SlowWrite7<u32, consoleType>(u32, u32); \
- template void SlowWrite7<u16, consoleType>(u32, u16); \
- template void SlowWrite7<u8, consoleType>(u32, u8); \
+ template void SlowWrite7<u16, consoleType>(u32, u32); \
+ template void SlowWrite7<u8, consoleType>(u32, u32); \
\
template u32 SlowRead7<u32, consoleType>(u32); \
template u16 SlowRead7<u16, consoleType>(u32); \
@@ -298,6 +298,7 @@ void Init()
void DeInit()
{
+ ResetBlockCache();
ARMJIT_Memory::DeInit();
delete JITCompiler;
@@ -594,7 +595,8 @@ void CompileBlock(ARM* cpu)
u32 r15 = cpu->R[15];
u32 addressRanges[Config::JIT_MaxBlockSize];
- u32 addressMasks[Config::JIT_MaxBlockSize] = {0};
+ u32 addressMasks[Config::JIT_MaxBlockSize];
+ memset(addressMasks, 0, Config::JIT_MaxBlockSize * sizeof(u32));
u32 numAddressRanges = 0;
u32 numLiterals = 0;
@@ -1116,6 +1118,7 @@ void ResetBlockCache()
range->Blocks.Clear();
range->Code = 0;
}
+ delete block;
}
JitBlocks9.clear();
JitBlocks7.clear();
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
index 80c7f04..93563b9 100644
--- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
@@ -68,6 +68,11 @@ void Compiler::A_Comp_MRS()
MOV(rd, RCPSR);
}
+void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode)
+{
+ arm->UpdateMode(oldmode, newmode);
+}
+
void Compiler::A_Comp_MSR()
{
Comp_AddCycles_C();
@@ -139,7 +144,7 @@ void Compiler::A_Comp_MSR()
PushRegs(true);
- QuickCallFunction(X3, (void*)&ARM::UpdateMode);
+ QuickCallFunction(X3, (void*)&UpdateModeTrampoline);
PopRegs(true);
}
@@ -915,4 +920,4 @@ void Compiler::Comp_AddCycles_CD()
ConstantCycles += cycles;
}
-} \ No newline at end of file
+}
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h
index af7497a..a79e9da 100644
--- a/src/ARMJIT_A64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.h
@@ -187,6 +187,7 @@ public:
void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
+
enum
{
memop_Writeback = 1 << 0,
@@ -213,8 +214,8 @@ public:
return (u8*)entry - GetRXBase();
}
- bool IsJITFault(u64 pc);
- s64 RewriteMemAccess(u64 pc);
+ bool IsJITFault(u8* pc);
+ u8* RewriteMemAccess(u8* pc);
void SwapCodeRegion()
{
diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.s b/src/ARMJIT_A64/ARMJIT_Linkage.S
index 7886315..7886315 100644
--- a/src/ARMJIT_A64/ARMJIT_Linkage.s
+++ b/src/ARMJIT_A64/ARMJIT_Linkage.S
diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
index 86e257a..2c14dc6 100644
--- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
@@ -9,37 +9,34 @@ using namespace Arm64Gen;
namespace ARMJIT
{
-bool Compiler::IsJITFault(u64 pc)
+bool Compiler::IsJITFault(u8* pc)
{
- return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
+ return (u64)pc >= (u64)GetRXBase() && (u64)pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
}
-s64 Compiler::RewriteMemAccess(u64 pc)
+u8* Compiler::RewriteMemAccess(u8* pc)
{
- ptrdiff_t pcOffset = pc - (u64)GetRXBase();
+ ptrdiff_t pcOffset = pc - GetRXBase();
auto it = LoadStorePatches.find(pcOffset);
if (it != LoadStorePatches.end())
{
LoadStorePatch patch = it->second;
+ LoadStorePatches.erase(it);
ptrdiff_t curCodeOffset = GetCodeOffset();
SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
BL(patch.PatchFunc);
-
for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
HINT(HINT_NOP);
-
FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
SetCodePtrUnsafe(curCodeOffset);
- LoadStorePatches.erase(it);
-
- return patch.PatchOffset;
+ return pc + (ptrdiff_t)patch.PatchOffset;
}
printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
abort();
@@ -192,7 +189,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
else
{
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
- if (size == 32)
+ if (size == 32 && !addrIsStatic)
{
UBFIZ(W0, W0, 3, 2);
RORV(rdMapped, rdMapped, W0);
diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h
index 4244470..b1e35f5 100644
--- a/src/ARMJIT_Internal.h
+++ b/src/ARMJIT_Internal.h
@@ -216,9 +216,9 @@ template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset);
template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu);
-template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, T val);
+template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, u32 val);
template <typename T, int ConsoleType> T SlowRead7(u32 addr);
-template <typename T, int ConsoleType> void SlowWrite7(u32 addr, T val);
+template <typename T, int ConsoleType> void SlowWrite7(u32 addr, u32 val);
template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num);
diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp
index d321d2f..f9f82aa 100644
--- a/src/ARMJIT_Memory.cpp
+++ b/src/ARMJIT_Memory.cpp
@@ -10,6 +10,12 @@
#include <signal.h>
#endif
+#if defined(__ANDROID__)
+#include <dlfcn.h>
+#include <linux/ashmem.h>
+#include <sys/ioctl.h>
+#endif
+
#include "ARMJIT_Memory.h"
#include "ARMJIT_Internal.h"
@@ -22,7 +28,9 @@
#include "NDSCart.h"
#include "SPU.h"
+#ifndef __APPLE__
#include <malloc.h>
+#endif
/*
We're handling fastmem here.
@@ -40,7 +48,8 @@
We handle this by only mapping those regions which are actually
used and by praying the games don't go wild.
- Beware, this file is full of platform specific code.
+ Beware, this file is full of platform specific code and copied
+ from Dolphin, so enjoy the copied comments!
*/
@@ -49,12 +58,16 @@ namespace ARMJIT_Memory
struct FaultDescription
{
u32 EmulatedFaultAddr;
- u64 FaultPC;
+ u8* FaultPC;
};
-bool FaultHandler(FaultDescription* faultDesc, s32& offset);
+bool FaultHandler(FaultDescription& faultDesc);
}
+#if defined(__ANDROID__)
+#define ASHMEM_DEVICE "/dev/ashmem"
+#endif
+
#if defined(__SWITCH__)
// with LTO the symbols seem to be not properly overriden
// if they're somewhere else
@@ -75,7 +88,7 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea;
- desc.FaultPC = ctx->pc.x;
+ desc.FaultPC = (u8*)ctx->pc.x;
u64 integerRegisters[33];
memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29);
@@ -84,10 +97,9 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
integerRegisters[31] = ctx->sp.x;
integerRegisters[32] = ctx->pc.x;
- s32 offset;
- if (ARMJIT_Memory::FaultHandler(&desc, offset))
+ if (ARMJIT_Memory::FaultHandler(desc, offset))
{
- integerRegisters[32] += offset;
+ integerRegisters[32] = (u64)desc.FaultPC;
ARM_RestoreContext(integerRegisters);
}
@@ -117,12 +129,11 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea;
- desc.FaultPC = exceptionInfo->ContextRecord->Rip;
+ desc.FaultPC = (u8*)exceptionInfo->ContextRecord->Rip;
- s32 offset = 0;
- if (ARMJIT_Memory::FaultHandler(&desc, offset))
+ if (ARMJIT_Memory::FaultHandler(desc))
{
- exceptionInfo->ContextRecord->Rip += offset;
+ exceptionInfo->ContextRecord->Rip = (u64)desc.FaultPC;
return EXCEPTION_CONTINUE_EXECUTION;
}
@@ -131,50 +142,75 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
#else
-struct sigaction NewSa;
-struct sigaction OldSa;
+static struct sigaction OldSaSegv;
+static struct sigaction OldSaBus;
static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext)
{
+ if (sig != SIGSEGV && sig != SIGBUS)
+ {
+ // We are not interested in other signals - handle it as usual.
+ return;
+ }
+ if (info->si_code != SEGV_MAPERR && info->si_code != SEGV_ACCERR)
+ {
+ // Huh? Return.
+ return;
+ }
+
ucontext_t* context = (ucontext_t*)rawContext;
-
+
ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
#ifdef __x86_64__
desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea;
- desc.FaultPC = context->uc_mcontext.gregs[REG_RIP];
+ #ifdef __APPLE__
+ desc.FaultPC = (u8*)context->uc_mcontext->__ss.__rip;
+ #else
+ desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP];
+ #endif
+
#else
desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea;
- desc.FaultPC = context->uc_mcontext.pc;
+ desc.FaultPC = (u8*)context->uc_mcontext.pc;
#endif
- s32 offset = 0;
- if (ARMJIT_Memory::FaultHandler(&desc, offset))
+ if (ARMJIT_Memory::FaultHandler(desc))
{
#ifdef __x86_64__
- context->uc_mcontext.gregs[REG_RIP] += offset;
+ #ifdef __APPLE__
+ context->uc_mcontext->__ss.__rip = (u64)desc.FaultPC;
+ #else
+ context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC;
+ #endif
#else
- context->uc_mcontext.pc += offset;
+ context->uc_mcontext.pc = (u64)desc.FaultPC;
#endif
return;
}
- if (OldSa.sa_flags & SA_SIGINFO)
+ struct sigaction* oldSa;
+ if (sig == SIGSEGV)
+ oldSa = &OldSaSegv;
+ else
+ oldSa = &OldSaBus;
+
+ if (oldSa->sa_flags & SA_SIGINFO)
{
- OldSa.sa_sigaction(sig, info, rawContext);
+ oldSa->sa_sigaction(sig, info, rawContext);
return;
}
- if (OldSa.sa_handler == SIG_DFL)
+ if (oldSa->sa_handler == SIG_DFL)
{
signal(sig, SIG_DFL);
return;
}
- if (OldSa.sa_handler == SIG_IGN)
+ if (oldSa->sa_handler == SIG_IGN)
{
// Ignore signal
return;
}
- OldSa.sa_handler(sig);
+ oldSa->sa_handler(sig);
}
#endif
@@ -231,7 +267,7 @@ enum
{
memstate_Unmapped,
memstate_MappedRW,
- // on switch this is unmapped as well
+ // on Switch this is unmapped as well
memstate_MappedProtected,
};
@@ -314,14 +350,16 @@ struct Mapping
void Unmap(int region)
{
+ u32 dtcmStart = NDS::ARM9->DTCMBase;
+ u32 dtcmSize = NDS::ARM9->DTCMSize;
bool skipDTCM = Num == 0 && region != memregion_DTCM;
u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7;
u32 offset = 0;
while (offset < Size)
{
- if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase)
+ if (skipDTCM && Addr + offset == dtcmStart)
{
- offset += NDS::ARM9->DTCMSize;
+ offset += dtcmSize;
}
else
{
@@ -329,7 +367,7 @@ struct Mapping
u8 status = statuses[(Addr + offset) >> 12];
while (statuses[(Addr + offset) >> 12] == status
&& offset < Size
- && (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase))
+ && (!skipDTCM || Addr + offset != dtcmStart))
{
assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped);
statuses[(Addr + offset) >> 12] = memstate_Unmapped;
@@ -347,9 +385,33 @@ struct Mapping
#endif
}
}
+
#ifndef __SWITCH__
- bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size);
- assert(succeded);
+#ifndef _WIN32
+ u32 dtcmEnd = dtcmStart + dtcmSize;
+ if (Num == 0
+ && dtcmEnd >= Addr
+ && dtcmStart < Addr + Size)
+ {
+ bool success;
+ if (dtcmStart > Addr)
+ {
+ success = UnmapFromRange(Addr, 0, OffsetsPerRegion[region] + LocalOffset, dtcmStart - Addr);
+ assert(success);
+ }
+ if (dtcmEnd < Addr + Size)
+ {
+ u32 offset = dtcmStart - Addr + dtcmSize;
+ success = UnmapFromRange(dtcmEnd, 0, OffsetsPerRegion[region] + LocalOffset + offset, Size - offset);
+ assert(success);
+ }
+ }
+ else
+#endif
+ {
+ bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size);
+ assert(succeded);
+ }
#endif
}
};
@@ -418,10 +480,10 @@ void RemapDTCM(u32 newBase, u32 newSize)
printf("unmapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset);
- bool oldOverlap = NDS::ARM9->DTCMSize > 0 && !(oldDTCMBase >= end || oldDTCBEnd <= start);
- bool newOverlap = newSize > 0 && !(newBase >= end || newEnd <= start);
+ bool overlap = (NDS::ARM9->DTCMSize > 0 && oldDTCMBase < end && oldDTCBEnd > start)
+ || (newSize > 0 && newBase < end && newEnd > start);
- if (mapping.Num == 0 && (oldOverlap || newOverlap))
+ if (mapping.Num == 0 && overlap)
{
mapping.Unmap(region);
Mappings[region].Remove(i);
@@ -445,8 +507,8 @@ void RemapNWRAM(int num)
for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;)
{
Mapping& mapping = Mappings[memregion_SharedWRAM][i];
- if (!(DSi::NWRAMStart[mapping.Num][num] >= mapping.Addr + mapping.Size
- || DSi::NWRAMEnd[mapping.Num][num] < mapping.Addr))
+ if (DSi::NWRAMStart[mapping.Num][num] < mapping.Addr + mapping.Size
+ && DSi::NWRAMEnd[mapping.Num][num] > mapping.Addr)
{
mapping.Unmap(memregion_SharedWRAM);
Mappings[memregion_SharedWRAM].Remove(i);
@@ -469,7 +531,7 @@ void RemapSWRAM()
for (int i = 0; i < Mappings[memregion_WRAM7].Length;)
{
Mapping& mapping = Mappings[memregion_WRAM7][i];
- if (mapping.Addr + mapping.Size < 0x03800000)
+ if (mapping.Addr + mapping.Size <= 0x03800000)
{
mapping.Unmap(memregion_WRAM7);
Mappings[memregion_WRAM7].Remove(i);
@@ -501,26 +563,53 @@ bool MapAtAddress(u32 addr)
return false;
u8* states = num == 0 ? MappingStatus9 : MappingStatus7;
- printf("trying to create mapping %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num);
+ printf("mapping mirror %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num);
bool isExecutable = ARMJIT::CodeMemRegions[region];
+ u32 dtcmStart = NDS::ARM9->DTCMBase;
+ u32 dtcmSize = NDS::ARM9->DTCMSize;
+ u32 dtcmEnd = dtcmStart + dtcmSize;
#ifndef __SWITCH__
- bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize);
- assert(succeded);
+#ifndef _WIN32
+ if (num == 0
+ && dtcmEnd >= mirrorStart
+ && dtcmStart < mirrorStart + mirrorSize)
+ {
+ bool success;
+ if (dtcmStart > mirrorStart)
+ {
+ success = MapIntoRange(mirrorStart, 0, OffsetsPerRegion[region] + memoryOffset, dtcmStart - mirrorStart);
+ assert(success);
+ }
+ if (dtcmEnd < mirrorStart + mirrorSize)
+ {
+ u32 offset = dtcmStart - mirrorStart + dtcmSize;
+ success = MapIntoRange(dtcmEnd, 0, OffsetsPerRegion[region] + memoryOffset + offset, mirrorSize - offset);
+ assert(success);
+ }
+ }
+ else
+#endif
+ {
+ bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize);
+ assert(succeded);
+ }
#endif
ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512;
// this overcomplicated piece of code basically just finds whole pieces of code memory
- // which can be mapped
+ // which can be mapped/protected
u32 offset = 0;
bool skipDTCM = num == 0 && region != memregion_DTCM;
while (offset < mirrorSize)
{
- if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase)
+ if (skipDTCM && mirrorStart + offset == dtcmStart)
{
- SetCodeProtectionRange(NDS::ARM9->DTCMBase, NDS::ARM9->DTCMSize, 0, 0);
- offset += NDS::ARM9->DTCMSize;
+#ifdef _WIN32
+ SetCodeProtectionRange(dtcmStart, dtcmSize, 0, 0);
+#endif
+ offset += dtcmSize;
}
else
{
@@ -557,26 +646,25 @@ bool MapAtAddress(u32 addr)
Mapping mapping{mirrorStart, mirrorSize, memoryOffset, num};
Mappings[region].Add(mapping);
- printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1);
+ //printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1);
return true;
}
-bool FaultHandler(FaultDescription* faultDesc, s32& offset)
+bool FaultHandler(FaultDescription& faultDesc)
{
- if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC))
+ if (ARMJIT::JITCompiler->IsJITFault(faultDesc.FaultPC))
{
bool rewriteToSlowPath = true;
- u32 addr = faultDesc->EmulatedFaultAddr;
+ u8* memStatus = NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7;
- if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped)
- rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr);
+ if (memStatus[faultDesc.EmulatedFaultAddr >> 12] == memstate_Unmapped)
+ rewriteToSlowPath = !MapAtAddress(faultDesc.EmulatedFaultAddr);
if (rewriteToSlowPath)
- {
- offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC);
- }
+ faultDesc.FaultPC = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc.FaultPC);
+
return true;
}
return false;
@@ -624,22 +712,52 @@ void Init()
u8* basePtr = MemoryBase;
#else
- FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
- FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
-
- MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
-
+ // this used to be allocated with three different mmaps
+ // The idea was to give the OS more freedom where to position the buffers,
+ // but something was bad about this so instead we take this vmem eating monster
+ // which seems to work better.
+ MemoryBase = (u8*)mmap(NULL, AddrSpaceSize*4, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
+ munmap(MemoryBase, AddrSpaceSize*4);
+ FastMem9Start = MemoryBase;
+ FastMem7Start = MemoryBase + AddrSpaceSize;
+ MemoryBase = MemoryBase + AddrSpaceSize*2;
+
+#if defined(__ANDROID__)
+ static void* libandroid = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL);
+ using type_ASharedMemory_create = int(*)(const char* name, size_t size);
+ static void* symbol = dlsym(libandroid, "ASharedMemory_create");
+ static auto shared_memory_create = reinterpret_cast<type_ASharedMemory_create>(symbol);
+
+ if (shared_memory_create)
+ {
+ MemoryFile = shared_memory_create("melondsfastmem", MemoryTotalSize);
+ }
+ else
+ {
+ int fd = open(ASHMEM_DEVICE, O_RDWR);
+ ioctl(fd, ASHMEM_SET_NAME, "melondsfastmem");
+ ioctl(fd, ASHMEM_SET_SIZE, MemoryTotalSize);
+ MemoryFile = fd;
+ }
+#elif defined(__APPLE__)
+ char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1];
+ sprintf(fastmemPidName, "melondsfastmem%d", getpid());
+ MemoryFile = shm_open(fastmemPidName, O_RDWR|O_CREAT, 0600);
+ delete[] fastmemPidName;
+#else
MemoryFile = memfd_create("melondsfastmem", 0);
+#endif
ftruncate(MemoryFile, MemoryTotalSize);
- NewSa.sa_flags = SA_SIGINFO;
- sigemptyset(&NewSa.sa_mask);
- NewSa.sa_sigaction = SigsegvHandler;
- sigaction(SIGSEGV, &NewSa, &OldSa);
-
- munmap(MemoryBase, MemoryTotalSize);
- munmap(FastMem9Start, AddrSpaceSize);
- munmap(FastMem7Start, AddrSpaceSize);
+ struct sigaction sa;
+ sa.sa_handler = nullptr;
+ sa.sa_sigaction = &SigsegvHandler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGSEGV, &sa, &OldSaSegv);
+#ifdef __APPLE__
+ sigaction(SIGBUS, &sa, &OldSaBus);
+#endif
mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0);
@@ -657,17 +775,30 @@ void Init()
void DeInit()
{
#if defined(__SWITCH__)
- virtmemFree(FastMem9Start, 0x100000000);
- virtmemFree(FastMem7Start, 0x100000000);
+ virtmemFree(FastMem9Start, AddrSpaceSize);
+ virtmemFree(FastMem7Start, AddrSpaceSize);
svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize);
virtmemFree(MemoryBaseCodeMem, MemoryTotalSize);
free(MemoryBase);
+#elif defined(__APPLE__)
+ char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1];
+ sprintf(fastmemPidName, "melondsfastmem%d", getpid());
+ shm_unlink(fastmemPidName);
+ delete[] fastmemPidName;
#elif defined(_WIN32)
assert(UnmapViewOfFile(MemoryBase));
CloseHandle(MemoryFile);
RemoveVectoredExceptionHandler(ExceptionHandlerHandle);
+#else
+ sigaction(SIGSEGV, &OldSaSegv, nullptr);
+#ifdef __APPLE__
+ sigaction(SIGBUS, &OldSaBus, nullptr);
+#endif
+
+ munmap(MemoryBase, MemoryTotalSize);
+ close(MemoryFile);
#endif
}
@@ -997,9 +1128,11 @@ int ClassifyAddress7(u32 addr)
case 0x06000000:
case 0x06800000:
return memregion_VWRAM;
+
+ default:
+ return memregion_Other;
}
}
- return memregion_Other;
}
void WifiWrite32(u32 addr, u32 val)
@@ -1176,4 +1309,4 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
return NULL;
}
-} \ No newline at end of file
+}
diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp
index 819fe3c..70ec781 100644
--- a/src/ARMJIT_x64/ARMJIT_Branch.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp
@@ -130,6 +130,16 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
}
+void ARMv4JumpToTrampoline(ARMv4* arm, u32 addr, bool restorecpsr)
+{
+ arm->JumpTo(addr, restorecpsr);
+}
+
+void ARMv5JumpToTrampoline(ARMv5* arm, u32 addr, bool restorecpsr)
+{
+ arm->JumpTo(addr, restorecpsr);
+}
+
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
{
IrregularCycles = true;
@@ -146,9 +156,9 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
else
MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
if (Num == 0)
- CALL((void*)&ARMv5::JumpTo);
+ CALL((void*)&ARMv5JumpToTrampoline);
else
- CALL((void*)&ARMv4::JumpTo);
+ CALL((void*)&ARMv4JumpToTrampoline);
PopRegs(restoreCPSR);
@@ -269,4 +279,4 @@ void Compiler::T_Comp_BL_Merged()
Comp_JumpTo(target);
}
-} \ No newline at end of file
+}
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index c6419c9..cc4ad80 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -101,6 +101,11 @@ void Compiler::A_Comp_MRS()
MOV(32, rd, R(RCPSR));
}
+void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode)
+{
+ arm->UpdateMode(oldmode, newmode);
+}
+
void Compiler::A_Comp_MSR()
{
Comp_AddCycles_C();
@@ -185,7 +190,7 @@ void Compiler::A_Comp_MSR()
MOV(32, R(ABI_PARAM3), R(RCPSR));
MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
MOV(64, R(ABI_PARAM1), R(RCPU));
- CALL((void*)&ARM::UpdateMode);
+ CALL((void*)&UpdateModeTrampoline);
PopRegs(true);
}
@@ -216,6 +221,8 @@ Compiler::Compiler()
#ifdef _WIN32
DWORD dummy;
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
+ #elif defined(__APPLE__)
+ pageAligned = (u8*)mmap(NULL, 1024*1024*32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0);
#else
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif
@@ -340,7 +347,7 @@ Compiler::Compiler()
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
- switch ((8 << size) | num)
+ switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
case 33: ABI_CallFunction(SlowWrite7<u32, 0>); break;
@@ -352,7 +359,7 @@ Compiler::Compiler()
}
else
{
- switch ((8 << size) | num)
+ switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 1>); break;
case 33: ABI_CallFunction(SlowWrite7<u32, 1>); break;
@@ -375,7 +382,7 @@ Compiler::Compiler()
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
- switch ((8 << size) | num)
+ switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9<u32, 0>); break;
case 33: ABI_CallFunction(SlowRead7<u32, 0>); break;
@@ -387,7 +394,7 @@ Compiler::Compiler()
}
else
{
- switch ((8 << size) | num)
+ switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9<u32, 1>); break;
case 33: ABI_CallFunction(SlowRead7<u32, 1>); break;
@@ -612,9 +619,9 @@ void Compiler::Reset()
LoadStorePatches.clear();
}
-bool Compiler::IsJITFault(u64 addr)
+bool Compiler::IsJITFault(u8* addr)
{
- return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory);
+ return (u64)addr >= (u64)ResetStart && (u64)addr < (u64)ResetStart + CodeMemSize;
}
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
@@ -896,5 +903,4 @@ void Compiler::Comp_AddCycles_CD()
else
ConstantCycles += cycles;
}
-
-} \ No newline at end of file
+}
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
index 3e900c3..57aab7b 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -208,9 +208,9 @@ public:
SetCodePtr(FarCode);
}
- bool IsJITFault(u64 addr);
+ bool IsJITFault(u8* addr);
- s32 RewriteMemAccess(u64 pc);
+ u8* RewriteMemAccess(u8* pc);
u8* FarCode;
u8* NearCode;
diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.s b/src/ARMJIT_x64/ARMJIT_Linkage.S
index 0a84df0..8cc0b5f 100644
--- a/src/ARMJIT_x64/ARMJIT_Linkage.s
+++ b/src/ARMJIT_x64/ARMJIT_Linkage.S
@@ -29,8 +29,13 @@
.p2align 4,,15
+#ifdef __APPLE__
+.global _ARM_Dispatch
+_ARM_Dispatch:
+#else
.global ARM_Dispatch
ARM_Dispatch:
+#endif
#ifdef WIN64
push rdi
push rsi
@@ -54,8 +59,13 @@ ARM_Dispatch:
.p2align 4,,15
+#ifdef __APPLE__
+.global _ARM_Ret
+_ARM_Ret:
+#else
.global ARM_Ret
ARM_Ret:
+#endif
mov [RCPU + ARM_CPSR_offset], RCPSR
#ifdef WIN64
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
index 8b4e8fe..d80b25b 100644
--- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -15,28 +15,24 @@ int squeezePointer(T* ptr)
return truncated;
}
-s32 Compiler::RewriteMemAccess(u64 pc)
+u8* Compiler::RewriteMemAccess(u8* pc)
{
- auto it = LoadStorePatches.find((u8*)pc);
+ auto it = LoadStorePatches.find(pc);
if (it != LoadStorePatches.end())
{
LoadStorePatch patch = it->second;
LoadStorePatches.erase(it);
- u8* curCodePtr = GetWritableCodePtr();
- u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset;
- SetCodePtr(rewritePtr);
+ //printf("rewriting memory access %p %d %d\n", (u8*)pc-ResetStart, patch.Offset, patch.Size);
- CALL(patch.PatchFunc);
- u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr);
+ XEmitter emitter(pc + (ptrdiff_t)patch.Offset);
+ emitter.CALL(patch.PatchFunc);
+ ptrdiff_t remainingSize = (ptrdiff_t)patch.Size - 5;
+ assert(remainingSize >= 0);
if (remainingSize > 0)
- NOP(remainingSize);
+ emitter.NOP(remainingSize);
- //printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size);
-
- SetCodePtr(curCodePtr);
-
- return patch.Offset;
+ return pc + (ptrdiff_t)patch.Offset;
}
printf("this is a JIT bug %llx\n", pc);
@@ -192,6 +188,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
u8* memopStart = GetWritableCodePtr();
LoadStorePatch patch;
+ assert(rdMapped.GetSimpleReg() >= 0 && rdMapped.GetSimpleReg() < 16);
patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()]
: PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()];
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d6c3897..c16da9f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -80,9 +80,8 @@ if (ENABLE_JIT)
ARMJIT_x64/ARMJIT_LoadStore.cpp
ARMJIT_x64/ARMJIT_Branch.cpp
- ARMJIT_x64/ARMJIT_Linkage.s
+ ARMJIT_x64/ARMJIT_Linkage.S
)
- set_source_files_properties(ARMJIT_x64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif()
if (ARCHITECTURE STREQUAL ARM64)
target_sources(core PRIVATE
@@ -94,16 +93,22 @@ if (ENABLE_JIT)
ARMJIT_A64/ARMJIT_LoadStore.cpp
ARMJIT_A64/ARMJIT_Branch.cpp
- ARMJIT_A64/ARMJIT_Linkage.s
+ ARMJIT_A64/ARMJIT_Linkage.S
)
- set_source_files_properties(ARMJIT_A64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif()
endif()
+if (APPLE)
+ target_include_directories(core PUBLIC /usr/local/include)
+ target_link_directories(core PUBLIC /usr/local/lib)
+endif()
+
if (ENABLE_OGLRENDERER)
if (WIN32)
target_link_libraries(core ole32 comctl32 ws2_32 opengl32)
- else()
+ elseif (APPLE)
+ target_link_libraries(core "-framework OpenGL")
+ else()
target_link_libraries(core GL EGL)
endif()
else()
diff --git a/src/Config.cpp b/src/Config.cpp
index 341b14c..f7db252 100644
--- a/src/Config.cpp
+++ b/src/Config.cpp
@@ -73,7 +73,11 @@ ConfigEntry ConfigFile[] =
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0},
{"JIT_BranchOptimisations", 0, &JIT_BranchOptimisations, 1, NULL, 0},
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
- {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0},
+ #ifdef __APPLE__
+ {"JIT_FastMemory", 0, &JIT_FastMemory, 0, NULL, 0},
+ #else
+ {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0},
+ #endif
#endif
{"", -1, NULL, 0, NULL, 0}
diff --git a/src/DMA.cpp b/src/DMA.cpp
index 18b8a2f..8ad3918 100644
--- a/src/DMA.cpp
+++ b/src/DMA.cpp
@@ -77,21 +77,6 @@ void DMA::Reset()
Running = false;
InProgress = false;
-
- if (NDS::ConsoleType == 1)
- {
- BusRead16 = (CPU==0) ? DSi::ARM9Read16 : DSi::ARM7Read16;
- BusRead32 = (CPU==0) ? DSi::ARM9Read32 : DSi::ARM7Read32;
- BusWrite16 = (CPU==0) ? DSi::ARM9Write16 : DSi::ARM7Write16;
- BusWrite32 = (CPU==0) ? DSi::ARM9Write32 : DSi::ARM7Write32;
- }
- else
- {
- BusRead16 = (CPU==0) ? NDS::ARM9Read16 : NDS::ARM7Read16;
- BusRead32 = (CPU==0) ? NDS::ARM9Read32 : NDS::ARM7Read32;
- BusWrite16 = (CPU==0) ? NDS::ARM9Write16 : NDS::ARM7Write16;
- BusWrite32 = (CPU==0) ? NDS::ARM9Write32 : NDS::ARM7Write32;
- }
}
void DMA::DoSavestate(Savestate* file)
@@ -198,13 +183,7 @@ void DMA::Start()
NDS::StopCPU(CPU, 1<<Num);
}
-void DMA::Run()
-{
- if (!Running) return;
- if (CPU == 0) return Run9();
- else return Run7();
-}
-
+template <int ConsoleType>
void DMA::Run9()
{
if (NDS::ARM9Timestamp >= NDS::ARM9Target) return;
@@ -242,7 +221,10 @@ void DMA::Run9()
{
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift);
- BusWrite16(CurDstAddr, BusRead16(CurSrcAddr));
+ if (ConsoleType == 1)
+ DSi::ARM9Write16(CurDstAddr, DSi::ARM9Read16(CurSrcAddr));
+ else
+ NDS::ARM9Write16(CurDstAddr, NDS::ARM9Read16(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<1;
CurDstAddr += DstAddrInc<<1;
@@ -278,7 +260,10 @@ void DMA::Run9()
{
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift);
- BusWrite32(CurDstAddr, BusRead32(CurSrcAddr));
+ if (ConsoleType == 1)
+ DSi::ARM9Write32(CurDstAddr, DSi::ARM9Read32(CurSrcAddr));
+ else
+ NDS::ARM9Write32(CurDstAddr, NDS::ARM9Read32(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2;
@@ -317,6 +302,7 @@ void DMA::Run9()
NDS::ResumeCPU(0, 1<<Num);
}
+template <int ConsoleType>
void DMA::Run7()
{
if (NDS::ARM7Timestamp >= NDS::ARM7Target) return;
@@ -354,7 +340,10 @@ void DMA::Run7()
{
NDS::ARM7Timestamp += unitcycles;
- BusWrite16(CurDstAddr, BusRead16(CurSrcAddr));
+ if (ConsoleType == 1)
+ DSi::ARM7Write16(CurDstAddr, DSi::ARM7Read16(CurSrcAddr));
+ else
+ NDS::ARM7Write16(CurDstAddr, NDS::ARM7Read16(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<1;
CurDstAddr += DstAddrInc<<1;
@@ -390,7 +379,10 @@ void DMA::Run7()
{
NDS::ARM7Timestamp += unitcycles;
- BusWrite32(CurDstAddr, BusRead32(CurSrcAddr));
+ if (ConsoleType == 1)
+ DSi::ARM7Write32(CurDstAddr, DSi::ARM7Read32(CurSrcAddr));
+ else
+ NDS::ARM7Write32(CurDstAddr, NDS::ARM7Read32(CurSrcAddr));
CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2;
@@ -425,3 +417,14 @@ void DMA::Run7()
InProgress = false;
NDS::ResumeCPU(1, 1<<Num);
}
+
+template <int ConsoleType>
+void DMA::Run()
+{
+ if (!Running) return;
+ if (CPU == 0) return Run9<ConsoleType>();
+ else return Run7<ConsoleType>();
+}
+
+template void DMA::Run<0>();
+template void DMA::Run<1>(); \ No newline at end of file
diff --git a/src/DMA.h b/src/DMA.h
index 0344fba..b0b4ab2 100644
--- a/src/DMA.h
+++ b/src/DMA.h
@@ -34,9 +34,12 @@ public:
void WriteCnt(u32 val);
void Start();
+ template <int ConsoleType>
void Run();
+ template <int ConsoleType>
void Run9();
+ template <int ConsoleType>
void Run7();
bool IsInMode(u32 mode)
@@ -86,11 +89,6 @@ private:
bool Stall;
bool IsGXFIFODMA;
-
- u16 (*BusRead16)(u32 addr);
- u32 (*BusRead32)(u32 addr);
- void (*BusWrite16)(u32 addr, u16 val);
- void (*BusWrite32)(u32 addr, u32 val);
};
#endif
diff --git a/src/DSi.cpp b/src/DSi.cpp
index 0e62f5b..bcc1f92 100644
--- a/src/DSi.cpp
+++ b/src/DSi.cpp
@@ -543,15 +543,15 @@ void MapNWRAM_A(u32 num, u8 val)
return;
}
-#ifdef JIT_ENABLED
- ARMJIT_Memory::RemapNWRAM(0);
-#endif
-
int mbkn = 0, mbks = 8*num;
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
if (oldval == val) return;
+#ifdef JIT_ENABLED
+ ARMJIT_Memory::RemapNWRAM(0);
+#endif
+
MBK[0][mbkn] &= ~(0xFF << mbks);
MBK[0][mbkn] |= (val << mbks);
MBK[1][mbkn] = MBK[0][mbkn];
@@ -578,15 +578,15 @@ void MapNWRAM_B(u32 num, u8 val)
return;
}
-#ifdef JIT_ENABLED
- ARMJIT_Memory::RemapNWRAM(1);
-#endif
-
int mbkn = 1+(num>>2), mbks = 8*(num&3);
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
if (oldval == val) return;
+#ifdef JIT_ENABLED
+ ARMJIT_Memory::RemapNWRAM(1);
+#endif
+
MBK[0][mbkn] &= ~(0xFF << mbks);
MBK[0][mbkn] |= (val << mbks);
MBK[1][mbkn] = MBK[0][mbkn];
@@ -617,15 +617,15 @@ void MapNWRAM_C(u32 num, u8 val)
return;
}
-#ifdef JIT_ENABLED
- ARMJIT_Memory::RemapNWRAM(2);
-#endif
-
int mbkn = 3+(num>>2), mbks = 8*(num&3);
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
if (oldval == val) return;
+#ifdef JIT_ENABLED
+ ARMJIT_Memory::RemapNWRAM(2);
+#endif
+
MBK[0][mbkn] &= ~(0xFF << mbks);
MBK[0][mbkn] |= (val << mbks);
MBK[1][mbkn] = MBK[0][mbkn];
diff --git a/src/GPU.cpp b/src/GPU.cpp
index 7989750..e6b24e0 100644
--- a/src/GPU.cpp
+++ b/src/GPU.cpp
@@ -49,8 +49,8 @@ u8 VRAM_F[ 16*1024];
u8 VRAM_G[ 16*1024];
u8 VRAM_H[ 32*1024];
u8 VRAM_I[ 16*1024];
-u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
-u32 VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF};
+u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
+u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF};
u8 VRAMCNT[9];
u8 VRAMSTAT;
@@ -85,6 +85,62 @@ bool Accelerated;
GPU2D* GPU2D_A;
GPU2D* GPU2D_B;
+/*
+ VRAM invalidation tracking
+
+ - we want to know when a VRAM region used for graphics changed
+ - for some regions unmapping is mandatory to modify them (Texture, TexPal and ExtPal) and
+ we don't want to completely invalidate them every time they're unmapped and remapped
+
+ For this reason we don't track the dirtyness per mapping region, but instead per VRAM bank
+ with VRAMDirty. Writes to LCDC go directly into VRAMDirty, while writes via other mapping regions
+ like BG or OBJ are first tracked in VRAMWritten_* and need to be flushed using SyncDirtyFlags.
+
+ This is more or less a description of VRAMTrackingSet::DeriveState
+ Each time before the memory is read two things could have happened
+ to each 16kb piece (16kb is the smallest unit in which mappings can
+ be made thus also the size VRAMMap_* use):
+ - this piece was remapped compared to last time we checked,
+ which means this location in memory is invalid.
+ - this piece wasn't remapped, which means we need to check whether
+ it was changed. This can be archived by checking VRAMDirty.
+ VRAMDirty need to be reset for the respective VRAM bank.
+*/
+
+VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
+VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
+VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
+VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
+
+VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
+VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
+VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
+VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
+
+VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
+VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
+
+
+NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
+NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
+NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
+NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
+NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
+
+NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
+
+u8 VRAMFlat_ABG[512*1024];
+u8 VRAMFlat_BBG[128*1024];
+u8 VRAMFlat_AOBJ[256*1024];
+u8 VRAMFlat_BOBJ[128*1024];
+
+u8 VRAMFlat_ABGExtPal[32*1024];
+u8 VRAMFlat_BBGExtPal[32*1024];
+u8 VRAMFlat_AOBJExtPal[8*1024];
+u8 VRAMFlat_BOBJExtPal[8*1024];
+
+u8 VRAMFlat_Texture[512*1024];
+u8 VRAMFlat_TexPal[128*1024];
bool Init()
{
@@ -113,6 +169,30 @@ void DeInit()
if (Framebuffer[1][1]) delete[] Framebuffer[1][1];
}
+void ResetVRAMCache()
+{
+ for (int i = 0; i < 9; i++)
+ VRAMDirty[i] = NonStupidBitField<128*1024/VRAMDirtyGranularity>();
+
+ VRAMDirty_ABG.Reset();
+ VRAMDirty_BBG.Reset();
+ VRAMDirty_AOBJ.Reset();
+ VRAMDirty_BOBJ.Reset();
+ VRAMDirty_ABGExtPal.Reset();
+ VRAMDirty_BBGExtPal.Reset();
+ VRAMDirty_AOBJExtPal.Reset();
+ VRAMDirty_BOBJExtPal.Reset();
+
+ memset(VRAMFlat_ABG, 0, sizeof(VRAMFlat_ABG));
+ memset(VRAMFlat_BBG, 0, sizeof(VRAMFlat_BBG));
+ memset(VRAMFlat_AOBJ, 0, sizeof(VRAMFlat_AOBJ));
+ memset(VRAMFlat_BOBJ, 0, sizeof(VRAMFlat_BOBJ));
+ memset(VRAMFlat_ABGExtPal, 0, sizeof(VRAMFlat_ABGExtPal));
+ memset(VRAMFlat_BBGExtPal, 0, sizeof(VRAMFlat_BBGExtPal));
+ memset(VRAMFlat_AOBJExtPal, 0, sizeof(VRAMFlat_AOBJExtPal));
+ memset(VRAMFlat_BOBJExtPal, 0, sizeof(VRAMFlat_BOBJExtPal));
+}
+
void Reset()
{
VCount = 0;
@@ -186,6 +266,8 @@ void Reset()
GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]);
ResetRenderer();
+
+ ResetVRAMCache();
}
void Stop()
@@ -261,6 +343,8 @@ void DoSavestate(Savestate* file)
GPU2D_A->DoSavestate(file);
GPU2D_B->DoSavestate(file);
GPU3D::DoSavestate(file);
+
+ ResetVRAMCache();
}
void AssignFramebuffers()
@@ -411,18 +495,8 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
u8* GetUniqueBankPtr(u32 mask, u32 offset)
{
- if (!mask) return NULL;
-
- int num = 0;
- if (!(mask & 0xFF)) { mask >>= 8; num += 8; }
- else
- {
- if (!(mask & 0xF)) { mask >>= 4; num += 4; }
- if (!(mask & 0x3)) { mask >>= 2; num += 2; }
- if (!(mask & 0x1)) { mask >>= 1; num += 1; }
- }
- if (mask != 1) return NULL;
-
+ if (!mask || (mask & (mask - 1)) != 0) return NULL;
+ int num = __builtin_ctz(mask);
return &VRAM[num][offset & VRAMMask[num]];
}
@@ -606,8 +680,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 4: // ABG ext palette
UNMAP_RANGE(ABGExtPal, 0, 4);
- GPU2D_A->BGExtPalDirty(0);
- GPU2D_A->BGExtPalDirty(2);
break;
}
}
@@ -634,8 +706,6 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 4: // ABG ext palette
MAP_RANGE(ABGExtPal, 0, 4);
- GPU2D_A->BGExtPalDirty(0);
- GPU2D_A->BGExtPalDirty(2);
break;
}
}
@@ -687,12 +757,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 4: // ABG ext palette
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask;
VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask;
- GPU2D_A->BGExtPalDirty((oldofs & 0x1) << 1);
break;
case 5: // AOBJ ext palette
VRAMMap_AOBJExtPal &= ~bankmask;
- GPU2D_A->OBJExtPalDirty();
break;
}
}
@@ -732,12 +800,10 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 4: // ABG ext palette
VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask;
VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask;
- GPU2D_A->BGExtPalDirty((ofs & 0x1) << 1);
break;
case 5: // AOBJ ext palette
VRAMMap_AOBJExtPal |= bankmask;
- GPU2D_A->OBJExtPalDirty();
break;
}
}
@@ -773,8 +839,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
case 2: // BBG ext palette
UNMAP_RANGE(BBGExtPal, 0, 4);
- GPU2D_B->BGExtPalDirty(0);
- GPU2D_B->BGExtPalDirty(2);
break;
}
}
@@ -800,8 +864,6 @@ void MapVRAM_H(u32 bank, u8 cnt)
case 2: // BBG ext palette
MAP_RANGE(BBGExtPal, 0, 4);
- GPU2D_B->BGExtPalDirty(0);
- GPU2D_B->BGExtPalDirty(2);
break;
}
}
@@ -841,7 +903,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
case 3: // BOBJ ext palette
VRAMMap_BOBJExtPal &= ~bankmask;
- GPU2D_B->OBJExtPalDirty();
break;
}
}
@@ -871,7 +932,6 @@ void MapVRAM_I(u32 bank, u8 cnt)
case 3: // BOBJ ext palette
VRAMMap_BOBJExtPal |= bankmask;
- GPU2D_B->OBJExtPalDirty();
break;
}
}
@@ -937,6 +997,8 @@ void StartHBlank(u32 line)
DispStat[0] |= (1<<1);
DispStat[1] |= (1<<1);
+ SyncDirtyFlags();
+
if (VCount < 192)
{
// draw
@@ -1096,4 +1158,224 @@ void SetVCount(u16 val)
NextVCount = val;
}
+template <u32 Size, u32 MappingGranularity>
+NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranularity>::DeriveState(u32* currentMappings)
+{
+ NonStupidBitField<Size/VRAMDirtyGranularity> result;
+ u16 banksToBeZeroed = 0;
+ for (u32 i = 0; i < Size / MappingGranularity; i++)
+ {
+ if (currentMappings[i] != Mapping[i])
+ {
+ result |= NonStupidBitField<Size/VRAMDirtyGranularity>(i*VRAMBitsPerMapping, VRAMBitsPerMapping);
+ banksToBeZeroed |= currentMappings[i];
+ Mapping[i] = currentMappings[i];
+ }
+ else
+ {
+ u32 mapping = Mapping[i];
+
+ banksToBeZeroed |= mapping;
+
+ while (mapping != 0)
+ {
+ u32 num = __builtin_ctz(mapping);
+ mapping &= ~(1 << num);
+
+ // hack for **speed**
+ // this could probably be done less ugly but then we would rely
+ // on the compiler for vectorisation
+ static_assert(VRAMDirtyGranularity == 512);
+ if (MappingGranularity == 16*1024)
+ {
+ u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)];
+ ((u32*)result.Data)[i] |= dirty;
+ }
+ else if (MappingGranularity == 8*1024)
+ {
+ u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)];
+ ((u16*)result.Data)[i] |= dirty;
+ }
+ else if (MappingGranularity == 128*1024)
+ {
+ ((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0];
+ ((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1];
+ ((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2];
+ ((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3];
+ }
+ else
+ {
+ // welp
+ abort();
+ }
+ }
+ }
+ }
+
+ while (banksToBeZeroed != 0)
+ {
+ u32 num = __builtin_ctz(banksToBeZeroed);
+ banksToBeZeroed &= ~(1 << num);
+ memset(VRAMDirty[num].Data, 0, sizeof(VRAMDirty[num].Data));
+ }
+
+ return result;
+}
+
+template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*);
+template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*);
+template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*);
+template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*);
+template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*);
+template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*);
+
+template <u32 Size>
+void SyncDirtyFlags(u32* mappings, NonStupidBitField<Size>& writtenFlags)
+{
+ const u32 VRAMWrittenBitsPer16KB = 16*1024/VRAMDirtyGranularity;
+
+ for (typename NonStupidBitField<Size>::Iterator it = writtenFlags.Begin(); it != writtenFlags.End(); it++)
+ {
+ u32 mapping = mappings[*it / VRAMWrittenBitsPer16KB];
+ while (mapping != 0)
+ {
+ u32 num = __builtin_ctz(mapping);
+
+ VRAMDirty[num][*it & (VRAMMask[num] / VRAMDirtyGranularity)] = true;
+
+ mapping &= ~(1 << num);
+ }
+ }
+ memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data));
+}
+
+void SyncDirtyFlags()
+{
+ SyncDirtyFlags(VRAMMap_ABG, VRAMWritten_ABG);
+ SyncDirtyFlags(VRAMMap_AOBJ, VRAMWritten_AOBJ);
+ SyncDirtyFlags(VRAMMap_BBG, VRAMWritten_BBG);
+ SyncDirtyFlags(VRAMMap_BOBJ, VRAMWritten_BOBJ);
+ SyncDirtyFlags(VRAMMap_ARM7, VRAMWritten_ARM7);
+}
+
+template <u32 MappingGranularity, u32 Size>
+inline bool CopyLinearVRAM(u8* flat, u32* mappings, NonStupidBitField<Size>& dirty, u64 (*slowAccess)(u32 addr))
+{
+ const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
+
+ bool change = false;
+
+ typename NonStupidBitField<Size>::Iterator it = dirty.Begin();
+ while (it != dirty.End())
+ {
+ u32 offset = *it * VRAMDirtyGranularity;
+ u8* dst = flat + offset;
+ u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset);
+ if (fastAccess)
+ {
+ memcpy(dst, fastAccess, VRAMDirtyGranularity);
+ }
+ else
+ {
+ for (u32 i = 0; i < VRAMDirtyGranularity; i += 8)
+ *(u64*)&dst[i] = slowAccess(offset + i);
+ }
+ change = true;
+ it++;
+ }
+ return change;
+}
+
+bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, ReadVRAM_Texture<u64>);
+}
+bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, ReadVRAM_TexPal<u64>);
+}
+
+bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, ReadVRAM_ABG<u64>);
+}
+bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, ReadVRAM_BBG<u64>);
+}
+
+bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, ReadVRAM_AOBJ<u64>);
+}
+bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, ReadVRAM_BOBJ<u64>);
}
+
+template<typename T>
+T ReadVRAM_ABGExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3];
+
+ T ret = 0;
+ if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF];
+ if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
+ if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
+
+ return ret;
+}
+
+template<typename T>
+T ReadVRAM_BBGExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3];
+
+ T ret = 0;
+ if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF];
+
+ return ret;
+}
+
+template<typename T>
+T ReadVRAM_AOBJExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_AOBJExtPal;
+
+ T ret = 0;
+ if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF];
+ if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF];
+
+ return ret;
+}
+
+template<typename T>
+T ReadVRAM_BOBJExtPal(u32 addr)
+{
+ u32 mask = VRAMMap_BOBJExtPal;
+
+ T ret = 0;
+ if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF];
+
+ return ret;
+}
+
+bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, ReadVRAM_ABGExtPal<u64>);
+}
+bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, ReadVRAM_BBGExtPal<u64>);
+}
+
+bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, ReadVRAM_AOBJExtPal<u64>);
+}
+bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty)
+{
+ return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, ReadVRAM_BOBJExtPal<u64>);
+}
+
+} \ No newline at end of file
diff --git a/src/GPU.h b/src/GPU.h
index 1564ef7..2f71da6 100644
--- a/src/GPU.h
+++ b/src/GPU.h
@@ -20,6 +20,7 @@
#define GPU_H
#include "GPU2D.h"
+#include "NonStupidBitfield.h"
namespace GPU
{
@@ -45,7 +46,7 @@ extern u8 VRAM_G[ 16*1024];
extern u8 VRAM_H[ 32*1024];
extern u8 VRAM_I[ 16*1024];
-extern u8* VRAM[9];
+extern u8* const VRAM[9];
extern u32 VRAMMap_LCDC;
extern u32 VRAMMap_ABG[0x20];
@@ -73,6 +74,73 @@ extern GPU2D* GPU2D_B;
extern int Renderer;
+const u32 VRAMDirtyGranularity = 512;
+
+extern NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG;
+extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ;
+extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG;
+extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ;
+extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7;
+
+extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9];
+
+template <u32 Size, u32 MappingGranularity>
+struct VRAMTrackingSet
+{
+ u16 Mapping[Size / MappingGranularity];
+
+ const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity;
+
+ void Reset()
+ {
+ memset(Mapping, 0, sizeof(Mapping));
+ }
+ NonStupidBitField<Size/VRAMDirtyGranularity> DeriveState(u32* currentMappings);
+};
+
+extern VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG;
+extern VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ;
+extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG;
+extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ;
+
+extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal;
+extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal;
+extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal;
+extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal;
+
+extern VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture;
+extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal;
+
+extern u8 VRAMFlat_ABG[512*1024];
+extern u8 VRAMFlat_BBG[128*1024];
+extern u8 VRAMFlat_AOBJ[256*1024];
+extern u8 VRAMFlat_BOBJ[128*1024];
+
+extern u8 VRAMFlat_ABGExtPal[32*1024];
+extern u8 VRAMFlat_BBGExtPal[32*1024];
+
+extern u8 VRAMFlat_AOBJExtPal[8*1024];
+extern u8 VRAMFlat_BOBJExtPal[8*1024];
+
+extern u8 VRAMFlat_Texture[512*1024];
+extern u8 VRAMFlat_TexPal[128*1024];
+
+bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty);
+
+bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty);
+bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty);
+
+void SyncDirtyFlags();
typedef struct
{
@@ -233,7 +301,11 @@ void WriteVRAM_LCDC(u32 addr, T val)
default: return;
}
- if (VRAMMap_LCDC & (1<<bank)) *(T*)&VRAM[bank][addr] = val;
+ if (VRAMMap_LCDC & (1<<bank))
+ {
+ *(T*)&VRAM[bank][addr] = val;
+ VRAMDirty[bank][addr / VRAMDirtyGranularity] = true;
+ }
}
@@ -262,6 +334,8 @@ void WriteVRAM_ABG(u32 addr, T val)
{
u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F];
+ VRAMWritten_ABG[(addr & 0x7FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
@@ -295,6 +369,8 @@ void WriteVRAM_AOBJ(u32 addr, T val)
{
u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF];
+ VRAMWritten_AOBJ[(addr & 0x3FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val;
@@ -324,6 +400,8 @@ void WriteVRAM_BBG(u32 addr, T val)
{
u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7];
+ VRAMWritten_BBG[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
@@ -350,11 +428,12 @@ void WriteVRAM_BOBJ(u32 addr, T val)
{
u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7];
+ VRAMWritten_BOBJ[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
}
-
template<typename T>
T ReadVRAM_ARM7(u32 addr)
{
@@ -372,6 +451,8 @@ void WriteVRAM_ARM7(u32 addr, T val)
{
u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1];
+ VRAMWritten_ARM7[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true;
+
if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
}
diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp
index 7774c65..c1a2d47 100644
--- a/src/GPU2D.cpp
+++ b/src/GPU2D.cpp
@@ -148,12 +148,6 @@ void GPU2D::Reset()
CaptureCnt = 0;
MasterBrightness = 0;
-
- BGExtPalStatus[0] = 0;
- BGExtPalStatus[1] = 0;
- BGExtPalStatus[2] = 0;
- BGExtPalStatus[3] = 0;
- OBJExtPalStatus = 0;
}
void GPU2D::DoSavestate(Savestate* file)
@@ -208,13 +202,6 @@ void GPU2D::DoSavestate(Savestate* file)
if (!file->Saving)
{
- // refresh those
- BGExtPalStatus[0] = 0;
- BGExtPalStatus[1] = 0;
- BGExtPalStatus[2] = 0;
- BGExtPalStatus[3] = 0;
- OBJExtPalStatus = 0;
-
CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]];
CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]];
}
@@ -228,9 +215,6 @@ void GPU2D::SetFramebuffer(u32* buf)
void GPU2D::SetRenderSettings(bool accel)
{
Accelerated = accel;
-
- if (Accelerated) DrawPixel = DrawPixel_Accel;
- else DrawPixel = DrawPixel_Normal;
}
@@ -761,6 +745,25 @@ void GPU2D::DrawScanline(u32 line)
int n3dline = line;
line = GPU::VCount;
+ if (Num == 0)
+ {
+ auto bgDirty = GPU::VRAMDirty_ABG.DeriveState(GPU::VRAMMap_ABG);
+ GPU::MakeVRAMFlat_ABGCoherent(bgDirty);
+ auto bgExtPalDirty = GPU::VRAMDirty_ABGExtPal.DeriveState(GPU::VRAMMap_ABGExtPal);
+ GPU::MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty);
+ auto objExtPalDirty = GPU::VRAMDirty_AOBJExtPal.DeriveState(&GPU::VRAMMap_AOBJExtPal);
+ GPU::MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty);
+ }
+ else
+ {
+ auto bgDirty = GPU::VRAMDirty_BBG.DeriveState(GPU::VRAMMap_BBG);
+ GPU::MakeVRAMFlat_BBGCoherent(bgDirty);
+ auto bgExtPalDirty = GPU::VRAMDirty_BBGExtPal.DeriveState(GPU::VRAMMap_BBGExtPal);
+ GPU::MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty);
+ auto objExtPalDirty = GPU::VRAMDirty_BOBJExtPal.DeriveState(&GPU::VRAMMap_BOBJExtPal);
+ GPU::MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty);
+ }
+
bool forceblank = false;
// scanlines that end up outside of the GPU drawing range
@@ -973,6 +976,9 @@ void GPU2D::DoCapture(u32 line, u32 width)
u16* dst = (u16*)GPU::VRAM[dstvram];
u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width);
+ static_assert(GPU::VRAMDirtyGranularity == 512);
+ GPU::VRAMDirty[dstvram][(dstaddr & 0x1FFFF) / GPU::VRAMDirtyGranularity] = true;
+
// TODO: handle 3D in accelerated mode!!
u32* srcA;
@@ -1191,85 +1197,20 @@ void GPU2D::SampleFIFO(u32 offset, u32 num)
}
}
-
-void GPU2D::BGExtPalDirty(u32 base)
-{
- BGExtPalStatus[base] = 0;
- BGExtPalStatus[base+1] = 0;
-}
-
-void GPU2D::OBJExtPalDirty()
-{
- OBJExtPalStatus = 0;
-}
-
-
u16* GPU2D::GetBGExtPal(u32 slot, u32 pal)
{
- u16* dst = &BGExtPalCache[slot][pal << 8];
-
- if (!(BGExtPalStatus[slot] & (1<<pal)))
- {
- if (Num)
- {
- if (GPU::VRAMMap_BBGExtPal[slot] & (1<<7))
- memcpy(dst, &GPU::VRAM_H[(slot << 13) + (pal << 9)], 256*2);
- else
- memset(dst, 0, 256*2);
- }
- else
- {
- memset(dst, 0, 256*2);
-
- if (GPU::VRAMMap_ABGExtPal[slot] & (1<<4))
- for (int i = 0; i < 256; i+=2)
- *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_E[(slot << 13) + (pal << 9) + (i << 1)];
-
- if (GPU::VRAMMap_ABGExtPal[slot] & (1<<5))
- for (int i = 0; i < 256; i+=2)
- *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[((slot&1) << 13) + (pal << 9) + (i << 1)];
-
- if (GPU::VRAMMap_ABGExtPal[slot] & (1<<6))
- for (int i = 0; i < 256; i+=2)
- *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[((slot&1) << 13) + (pal << 9) + (i << 1)];
- }
-
- BGExtPalStatus[slot] |= (1<<pal);
- }
-
- return dst;
+ const u32 PaletteSize = 256 * 2;
+ const u32 SlotSize = PaletteSize * 16;
+ return (u16*)&(Num == 0
+ ? GPU::VRAMFlat_ABGExtPal
+ : GPU::VRAMFlat_BBGExtPal)[slot * SlotSize + pal * PaletteSize];
}
u16* GPU2D::GetOBJExtPal()
{
- u16* dst = OBJExtPalCache;
-
- if (!OBJExtPalStatus)
- {
- if (Num)
- {
- if (GPU::VRAMMap_BOBJExtPal & (1<<8))
- memcpy(dst, &GPU::VRAM_I[0], 16*256*2);
- else
- memset(dst, 0, 16*256*2);
- }
- else
- {
- memset(dst, 0, 16*256*2);
-
- if (GPU::VRAMMap_AOBJExtPal & (1<<5))
- for (int i = 0; i < 16*256; i+=2)
- *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[i << 1];
-
- if (GPU::VRAMMap_AOBJExtPal & (1<<6))
- for (int i = 0; i < 16*256; i+=2)
- *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[i << 1];
- }
-
- OBJExtPalStatus = 1;
- }
-
- return dst;
+ return Num == 0
+ ? (u16*)GPU::VRAMFlat_AOBJExtPal
+ : (u16*)GPU::VRAMFlat_BOBJExtPal;
}
@@ -1330,10 +1271,36 @@ void GPU2D::CalculateWindowMask(u32 line)
#define DoDrawBG(type, line, num) \
- { if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) DrawBG_##type<true>(line, num); else DrawBG_##type<false>(line, num); }
+ { \
+ if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) \
+ { \
+ if (Accelerated) DrawBG_##type<true, DrawPixel_Accel>(line, num); \
+ else DrawBG_##type<true, DrawPixel_Normal>(line, num); \
+ } \
+ else \
+ { \
+ if (Accelerated) DrawBG_##type<false, DrawPixel_Accel>(line, num); \
+ else DrawBG_##type<false, DrawPixel_Normal>(line, num); \
+ } \
+ }
#define DoDrawBG_Large(line) \
- { if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) DrawBG_Large<true>(line); else DrawBG_Large<false>(line); }
+ do \
+ { \
+ if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) \
+ { \
+ if (Accelerated) DrawBG_Large<true, DrawPixel_Accel>(line); \
+ else DrawBG_Large<true, DrawPixel_Normal>(line); \
+ } \
+ else \
+ { \
+ if (Accelerated) DrawBG_Large<false, DrawPixel_Accel>(line); \
+ else DrawBG_Large<false, DrawPixel_Normal>(line); \
+ } \
+ } while (false)
+
+#define DoInterleaveSprites(prio) \
+ if (Accelerated) InterleaveSprites<DrawPixel_Accel>(prio); else InterleaveSprites<DrawPixel_Normal>(prio);
template<u32 bgmode>
void GPU2D::DrawScanlineBGMode(u32 line)
@@ -1382,7 +1349,7 @@ void GPU2D::DrawScanlineBGMode(u32 line)
}
}
if ((DispCnt & 0x1000) && NumSprites)
- InterleaveSprites(0x40000 | (i<<16));
+ DoInterleaveSprites(0x40000 | (i<<16));
}
}
@@ -1394,7 +1361,7 @@ void GPU2D::DrawScanlineBGMode6(u32 line)
{
if (DispCnt & 0x0400)
{
- DoDrawBG_Large(line)
+ DoDrawBG_Large(line);
}
}
if ((BGCnt[0] & 0x3) == i)
@@ -1406,7 +1373,7 @@ void GPU2D::DrawScanlineBGMode6(u32 line)
}
}
if ((DispCnt & 0x1000) && NumSprites)
- InterleaveSprites(0x40000 | (i<<16));
+ DoInterleaveSprites(0x40000 | (i<<16))
}
}
@@ -1434,7 +1401,7 @@ void GPU2D::DrawScanlineBGMode7(u32 line)
}
}
if ((DispCnt & 0x1000) && NumSprites)
- InterleaveSprites(0x40000 | (i<<16));
+ DoInterleaveSprites(0x40000 | (i<<16))
}
}
@@ -1674,7 +1641,21 @@ void GPU2D::DrawBG_3D()
}
}
-template<bool mosaic>
+void GetBGVRAM(u32 num, u8*& data, u32& mask)
+{
+ if (num == 0)
+ {
+ data = GPU::VRAMFlat_ABG;
+ mask = 0x7FFFF;
+ }
+ else
+ {
+ data = GPU::VRAMFlat_BBG;
+ mask = 0x1FFFF;
+ }
+}
+
+template<bool mosaic, GPU2D::DrawPixel drawPixel>
void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
{
u16 bgcnt = BGCnt[bgnum];
@@ -1697,17 +1678,20 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
extpal = (DispCnt & 0x40000000);
if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum;
+ u8* bgvram;
+ u32 bgvrammask;
+ GetBGVRAM(Num, bgvram, bgvrammask);
if (Num)
{
- tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0x400];
}
else
{
- tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0];
}
@@ -1735,7 +1719,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
// preload shit as needed
if ((xoff & 0x7) || mosaic)
{
- curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
+ curtile = *(u16*)&bgvram[(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)) & bgvrammask];
if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
else curpal = pal;
@@ -1756,7 +1740,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
(mosaic && ((xpos >> 3) != (lastxpos >> 3))))
{
// load a new tile
- curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3));
+ curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask];
if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
else curpal = pal;
@@ -1771,10 +1755,10 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
if (WindowMask[i] & (1<<bgnum))
{
u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7);
- color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff);
+ color = bgvram[(pixelsaddr + tilexoff) & bgvrammask];
if (color)
- DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
+ drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
}
xoff++;
@@ -1787,7 +1771,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
// preload shit as needed
if ((xoff & 0x7) || mosaic)
{
- curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
+ curtile = *(u16*)&bgvram[((tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3))) & bgvrammask];
curpal = pal + ((curtile & 0xF000) >> 8);
pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
+ (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
@@ -1805,7 +1789,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
(mosaic && ((xpos >> 3) != (lastxpos >> 3))))
{
// load a new tile
- curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3));
+ curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask];
curpal = pal + ((curtile & 0xF000) >> 8);
pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
+ (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
@@ -1819,15 +1803,15 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7);
if (tilexoff & 0x1)
{
- color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4;
+ color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] >> 4;
}
else
{
- color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F;
+ color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] & 0x0F;
}
if (color)
- DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
+ drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
}
xoff++;
@@ -1835,7 +1819,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum)
}
}
-template<bool mosaic>
+template<bool mosaic, GPU2D::DrawPixel drawPixel>
void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
{
u16 bgcnt = BGCnt[bgnum];
@@ -1872,17 +1856,20 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
rotY -= (BGMosaicY * rotD);
}
+ u8* bgvram;
+ u32 bgvrammask;
+
if (Num)
{
- tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0x400];
}
else
{
- tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0];
}
@@ -1911,16 +1898,16 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
if ((!((finalX|finalY) & overflowmask)))
{
- curtile = GPU::ReadVRAM_BG<u8>(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)));
+ curtile = bgvram[(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))) & bgvrammask];
// draw pixel
u32 tilexoff = (finalX >> 8) & 0x7;
u32 tileyoff = (finalY >> 8) & 0x7;
- color = GPU::ReadVRAM_BG<u8>(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff);
+ color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
if (color)
- DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
+ drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
}
}
@@ -1932,7 +1919,7 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum)
BGYRefInternal[bgnum-2] += rotD;
}
-template<bool mosaic>
+template<bool mosaic, GPU2D::DrawPixel drawPixel>
void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
{
u16 bgcnt = BGCnt[bgnum];
@@ -1941,6 +1928,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
u16* pal;
u32 extpal;
+ u8* bgvram;
+ u32 bgvrammask;
+ GetBGVRAM(Num, bgvram, bgvrammask);
+
extpal = (DispCnt & 0x40000000);
s16 rotA = BGRotA[bgnum-2];
@@ -1984,8 +1975,8 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
ofymask = ~ymask;
}
- if (Num) tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 6);
- else tilemapaddr = 0x06000000 + ((bgcnt & 0x1F00) << 6);
+ if (Num) tilemapaddr = ((bgcnt & 0x1F00) << 6);
+ else tilemapaddr = ((bgcnt & 0x1F00) << 6);
if (bgcnt & 0x0004)
{
@@ -2012,10 +2003,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (!(finalX & ofxmask) && !(finalY & ofymask))
{
- color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1));
+ color = *(u16*)&bgvram[(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask];
if (color & 0x8000)
- DrawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum);
+ drawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum);
}
}
@@ -2051,10 +2042,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (!(finalX & ofxmask) && !(finalY & ofymask))
{
- color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8));
+ color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
if (color)
- DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
+ drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
}
}
@@ -2083,15 +2074,15 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (Num)
{
- tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0x400];
}
else
{
- tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
- tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+ tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
pal = (u16*)&GPU::Palette[0];
}
@@ -2121,7 +2112,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if ((!((finalX|finalY) & overflowmask)))
{
- curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1));
+ curtile = *(u16*)&bgvram[(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)) & bgvrammask];
if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12);
else curpal = pal;
@@ -2133,10 +2124,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
if (curtile & 0x0400) tilexoff = 7-tilexoff;
if (curtile & 0x0800) tileyoff = 7-tileyoff;
- color = GPU::ReadVRAM_BG<u8>(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff);
+ color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
if (color)
- DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
+ drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
}
}
@@ -2149,7 +2140,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum)
BGYRefInternal[bgnum-2] += rotD;
}
-template<bool mosaic>
+template<bool mosaic, GPU2D::DrawPixel drawPixel>
void GPU2D::DrawBG_Large(u32 line) // BG is always BG2
{
u16 bgcnt = BGCnt[2];
@@ -2199,8 +2190,9 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2
rotY -= (BGMosaicY * rotD);
}
- if (Num) tilemapaddr = 0x06200000;
- else tilemapaddr = 0x06000000;
+ u8* bgvram;
+ u32 bgvrammask;
+ GetBGVRAM(Num, bgvram, bgvrammask);
// 256-color bitmap
@@ -2228,10 +2220,10 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2
if (!(finalX & ofxmask) && !(finalY & ofymask))
{
- color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8));
+ color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
if (color)
- DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2);
+ drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2);
}
}
@@ -2274,6 +2266,7 @@ void GPU2D::ApplySpriteMosaicX()
}
}
+template <GPU2D::DrawPixel drawPixel>
void GPU2D::InterleaveSprites(u32 prio)
{
u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
@@ -2297,7 +2290,7 @@ void GPU2D::InterleaveSprites(u32 prio)
else
color = extpal[pixel & 0xFFF];
- DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000);
+ drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000);
}
}
else
@@ -2317,11 +2310,25 @@ void GPU2D::InterleaveSprites(u32 prio)
else
color = pal[pixel & 0xFF];
- DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000);
+ drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000);
}
}
}
+void GetOBJVRAM(u32 num, u8*& data, u32& mask)
+{
+ if (num == 0)
+ {
+ data = GPU::VRAMFlat_AOBJ;
+ mask = 0x3FFFF;
+ }
+ else
+ {
+ data = GPU::VRAMFlat_BOBJ;
+ mask = 0x1FFFF;
+ }
+}
+
#define DoDrawSprite(type, ...) \
if (iswin) \
{ \
@@ -2346,6 +2353,17 @@ void GPU2D::DrawSprites(u32 line)
OBJMosaicYCount = 0;
}
+ if (Num == 0)
+ {
+ auto objDirty = GPU::VRAMDirty_AOBJ.DeriveState(GPU::VRAMMap_AOBJ);
+ GPU::MakeVRAMFlat_AOBJCoherent(objDirty);
+ }
+ else
+ {
+ auto objDirty = GPU::VRAMDirty_BOBJ.DeriveState(GPU::VRAMMap_BOBJ);
+ GPU::MakeVRAMFlat_BOBJCoherent(objDirty);
+ }
+
NumSprites = 0;
memset(OBJLine, 0, 256*4);
memset(OBJWindow, 0, 256);
@@ -2458,6 +2476,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
u32 ytilefactor;
+ u8* objvram;
+ u32 objvrammask;
+ GetOBJVRAM(Num, objvram, objvrammask);
+
s32 centerX = boundwidth >> 1;
s32 centerY = boundheight >> 1;
@@ -2501,6 +2523,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
pixelattr |= (0xC0000000 | (alpha << 24));
+ u32 pixelsaddr;
if (DispCnt & 0x40)
{
if (DispCnt & 0x20)
@@ -2512,7 +2535,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
}
else
{
- tilenum <<= (7 + ((DispCnt >> 22) & 0x1));
+ pixelsaddr = tilenum << (7 + ((DispCnt >> 22) & 0x1));
ytilefactor = ((width >> 8) * 2);
}
}
@@ -2520,23 +2543,21 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
{
if (DispCnt & 0x20)
{
- tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
+ pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
ytilefactor = (256 * 2);
}
else
{
- tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
+ pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
ytilefactor = (128 * 2);
}
}
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
-
for (; xoff < boundwidth;)
{
if ((u32)rotX < width && (u32)rotY < height)
{
- color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1));
+ color = *(u16*)&objvram[(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)) & objvrammask];
if (color & 0x8000)
{
@@ -2561,9 +2582,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
}
else
{
+ u32 pixelsaddr = tilenum;
if (DispCnt & 0x10)
{
- tilenum <<= ((DispCnt >> 20) & 0x3);
+ pixelsaddr <<= ((DispCnt >> 20) & 0x3);
ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0);
}
else
@@ -2574,12 +2596,12 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
if (spritemode == 1) pixelattr |= 0x80000000;
else pixelattr |= 0x10000000;
+ ytilefactor <<= 5;
+ pixelsaddr <<= 5;
+
if (attrib[0] & 0x2000)
{
// 256-color
- tilenum <<= 5;
- ytilefactor <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
if (!window)
{
@@ -2593,7 +2615,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
{
if ((u32)rotX < width && (u32)rotY < height)
{
- color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8));
+ color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)) & objvrammask];
if (color)
{
@@ -2619,10 +2641,6 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
else
{
// 16-color
- tilenum <<= 5;
- ytilefactor <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
-
if (!window)
{
pixelattr |= 0x1000;
@@ -2633,7 +2651,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi
{
if ((u32)rotX < width && (u32)rotY < height)
{
- color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9));
+ color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)) & objvrammask];
if (rotX & 0x100)
color >>= 4;
else
@@ -2681,6 +2699,10 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
pixelattr |= 0x100000;
}
+ u8* objvram;
+ u32 objvrammask;
+ GetOBJVRAM(Num, objvram, objvrammask);
+
// yflip
if (attrib[1] & 0x2000)
ypos = height-1 - ypos;
@@ -2711,6 +2733,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
pixelattr |= (0xC0000000 | (alpha << 24));
+ u32 pixelsaddr = tilenum;
if (DispCnt & 0x40)
{
if (DispCnt & 0x20)
@@ -2722,25 +2745,24 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
}
else
{
- tilenum <<= (7 + ((DispCnt >> 22) & 0x1));
- tilenum += (ypos * width * 2);
+ pixelsaddr <<= (7 + ((DispCnt >> 22) & 0x1));
+ pixelsaddr += (ypos * width * 2);
}
}
else
{
if (DispCnt & 0x20)
{
- tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
- tilenum += (ypos * 256 * 2);
+ pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
+ pixelsaddr += (ypos * 256 * 2);
}
else
{
- tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
- tilenum += (ypos * 128 * 2);
+ pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
+ pixelsaddr += (ypos * 128 * 2);
}
}
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
s32 pixelstride;
if (attrib[1] & 0x1000) // xflip
@@ -2757,7 +2779,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
for (; xoff < xend;)
{
- color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr);
+ color = *(u16*)&objvram[pixelsaddr & objvrammask];
pixelsaddr += pixelstride;
@@ -2781,14 +2803,15 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
}
else
{
+ u32 pixelsaddr = tilenum;
if (DispCnt & 0x10)
{
- tilenum <<= ((DispCnt >> 20) & 0x3);
- tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0);
+ pixelsaddr <<= ((DispCnt >> 20) & 0x3);
+ pixelsaddr += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0);
}
else
{
- tilenum += ((ypos >> 3) * 0x20);
+ pixelsaddr += ((ypos >> 3) * 0x20);
}
if (spritemode == 1) pixelattr |= 0x80000000;
@@ -2797,8 +2820,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
if (attrib[0] & 0x2000)
{
// 256-color
- tilenum <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
+ pixelsaddr <<= 5;
pixelsaddr += ((ypos & 0x7) << 3);
s32 pixelstride;
@@ -2827,7 +2849,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
for (; xoff < xend;)
{
- color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr);
+ color = objvram[pixelsaddr];
pixelsaddr += pixelstride;
@@ -2853,8 +2875,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
else
{
// 16-color
- tilenum <<= 5;
- u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
+ pixelsaddr <<= 5;
pixelsaddr += ((ypos & 0x7) << 2);
s32 pixelstride;
@@ -2886,13 +2907,13 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos
{
if (attrib[1] & 0x1000)
{
- if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F; pixelsaddr--; }
- else color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4;
+ if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] & 0x0F; pixelsaddr--; }
+ else color = objvram[pixelsaddr & objvrammask] >> 4;
}
else
{
- if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4; pixelsaddr++; }
- else color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F;
+ if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] >> 4; pixelsaddr++; }
+ else color = objvram[pixelsaddr & objvrammask] & 0x0F;
}
if (color)
diff --git a/src/GPU2D.h b/src/GPU2D.h
index 521adf0..db15adc 100644
--- a/src/GPU2D.h
+++ b/src/GPU2D.h
@@ -59,9 +59,6 @@ public:
void CheckWindows(u32 line);
- void BGExtPalDirty(u32 base);
- void OBJExtPalDirty();
-
u16* GetBGExtPal(u32 slot, u32 pal);
u16* GetOBJExtPal();
@@ -128,9 +125,6 @@ private:
u16 MasterBrightness;
u16 BGExtPalCache[4][16*256];
- u16 OBJExtPalCache[16*256];
- u32 BGExtPalStatus[4];
- u32 OBJExtPalStatus;
u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
u32 ColorBlend5(u32 val1, u32 val2);
@@ -147,15 +141,17 @@ private:
static void DrawPixel_Normal(u32* dst, u16 color, u32 flag);
static void DrawPixel_Accel(u32* dst, u16 color, u32 flag);
- void (*DrawPixel)(u32* dst, u16 color, u32 flag);
+
+ typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag);
void DrawBG_3D();
- template<bool mosaic> void DrawBG_Text(u32 line, u32 bgnum);
- template<bool mosaic> void DrawBG_Affine(u32 line, u32 bgnum);
- template<bool mosaic> void DrawBG_Extended(u32 line, u32 bgnum);
- template<bool mosaic> void DrawBG_Large(u32 line);
+ template<bool mosaic, DrawPixel drawPixel> void DrawBG_Text(u32 line, u32 bgnum);
+ template<bool mosaic, DrawPixel drawPixel> void DrawBG_Affine(u32 line, u32 bgnum);
+ template<bool mosaic, DrawPixel drawPixel> void DrawBG_Extended(u32 line, u32 bgnum);
+ template<bool mosaic, DrawPixel drawPixel> void DrawBG_Large(u32 line);
void ApplySpriteMosaicX();
+ template<DrawPixel drawPixel>
void InterleaveSprites(u32 prio);
template<bool window> void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos);
template<bool window> void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos);
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
index 74debfe..4e6ac42 100644
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@@ -179,6 +179,8 @@ u8 RenderFogDensityTable[34];
u32 RenderClearAttr1, RenderClearAttr2;
+bool RenderFrameIdentical;
+
u32 ZeroDotWLimit;
u32 GXStat;
@@ -2491,6 +2493,19 @@ void VBlank()
}
RenderNumPolygons = NumPolygons;
+ RenderFrameIdentical = false;
+ }
+ else
+ {
+ RenderFrameIdentical = RenderDispCnt == DispCnt
+ && RenderAlphaRef == AlphaRef
+ && RenderClearAttr1 == ClearAttr1
+ && RenderClearAttr2 == ClearAttr2
+ && RenderFogColor == FogColor
+ && RenderFogOffset == FogOffset * 0x200
+ && memcmp(RenderEdgeTable, EdgeTable, 8*2) == 0
+ && memcmp(RenderFogDensityTable + 1, FogDensityTable, 32) == 0
+ && memcmp(RenderToonTable, ToonTable, 32*2) == 0;
}
RenderDispCnt = DispCnt;
diff --git a/src/GPU3D.h b/src/GPU3D.h
index c69adde..0477c4f 100644
--- a/src/GPU3D.h
+++ b/src/GPU3D.h
@@ -87,6 +87,8 @@ extern u8 RenderFogDensityTable[34];
extern u32 RenderClearAttr1, RenderClearAttr2;
+extern bool RenderFrameIdentical;
+
extern std::array<Polygon*,2048> RenderPolygonRAM;
extern u32 RenderNumPolygons;
diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp
index 658b261..ba9548e 100644
--- a/src/GPU3D_OpenGL.cpp
+++ b/src/GPU3D_OpenGL.cpp
@@ -74,11 +74,11 @@ typedef struct
Polygon* PolyData;
u32 NumIndices;
- u16* Indices;
+ u32 IndicesOffset;
GLuint PrimType;
u32 NumEdgeIndices;
- u16* EdgeIndices;
+ u32 EdgeIndicesOffset;
u32 RenderKey;
@@ -107,7 +107,11 @@ u32 VertexBuffer[10240 * 7];
u32 NumVertices;
GLuint VertexArrayID;
+GLuint IndexBufferID;
u16 IndexBuffer[2048 * 40];
+u32 NumIndices, NumEdgeIndices;
+
+const u32 EdgeIndicesOffset = 2048 * 30;
GLuint TexMemID;
GLuint TexPalMemID;
@@ -320,6 +324,9 @@ bool Init()
glEnableVertexAttribArray(3); // attrib
glVertexAttribIPointer(3, 3, GL_UNSIGNED_INT, 7*4, (void*)(4*4));
+ glGenBuffers(1, &IndexBufferID);
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, IndexBufferID);
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(IndexBuffer), NULL, GL_DYNAMIC_DRAW);
glGenFramebuffers(4, &FramebufferID[0]);
glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]);
@@ -563,15 +570,15 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
u32* vptr = &VertexBuffer[0];
u32 vidx = 0;
- u16* iptr = &IndexBuffer[0];
- u16* eiptr = &IndexBuffer[2048*30];
+ u32 iidx = 0;
+ u32 eidx = EdgeIndicesOffset;
for (int i = 0; i < npolys; i++)
{
RendererPolygon* rp = &polygons[i];
Polygon* poly = rp->PolyData;
- rp->Indices = iptr;
+ rp->IndicesOffset = iidx;
rp->NumIndices = 0;
u32 vidx_first = vidx;
@@ -606,7 +613,7 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
vptr = SetupVertex(poly, j, vtx, vtxattr, vptr);
- *iptr++ = vidx;
+ IndexBuffer[iidx++] = vidx;
rp->NumIndices++;
vidx++;
@@ -627,9 +634,9 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
}
// build a triangle
- *iptr++ = vidx_first;
- *iptr++ = vidx - 2;
- *iptr++ = vidx - 1;
+ IndexBuffer[iidx++] = vidx_first;
+ IndexBuffer[iidx++] = vidx - 2;
+ IndexBuffer[iidx++] = vidx - 1;
rp->NumIndices += 3;
}
else // quad, pentagon, etc
@@ -649,9 +656,9 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
if (j >= 2)
{
// build a triangle
- *iptr++ = vidx_first;
- *iptr++ = vidx - 1;
- *iptr++ = vidx;
+ IndexBuffer[iidx++] = vidx_first;
+ IndexBuffer[iidx++] = vidx - 1;
+ IndexBuffer[iidx++] = vidx;
rp->NumIndices += 3;
}
@@ -743,46 +750,48 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
if (j >= 1)
{
// build a triangle
- *iptr++ = vidx_first;
- *iptr++ = vidx - 1;
- *iptr++ = vidx;
+ IndexBuffer[iidx++] = vidx_first;
+ IndexBuffer[iidx++] = vidx - 1;
+ IndexBuffer[iidx++] = vidx;
rp->NumIndices += 3;
}
vidx++;
}
- *iptr++ = vidx_first;
- *iptr++ = vidx - 1;
- *iptr++ = vidx_first + 1;
+ IndexBuffer[iidx++] = vidx_first;
+ IndexBuffer[iidx++] = vidx - 1;
+ IndexBuffer[iidx++] = vidx_first + 1;
rp->NumIndices += 3;
}
}
- rp->EdgeIndices = eiptr;
+ rp->EdgeIndicesOffset = eidx;
rp->NumEdgeIndices = 0;
u32 vidx_cur = vidx_first;
for (int j = 1; j < poly->NumVertices; j++)
{
- *eiptr++ = vidx_cur;
- *eiptr++ = vidx_cur + 1;
+ IndexBuffer[eidx++] = vidx_cur;
+ IndexBuffer[eidx++] = vidx_cur + 1;
vidx_cur++;
rp->NumEdgeIndices += 2;
}
- *eiptr++ = vidx_cur;
- *eiptr++ = vidx_first;
+ IndexBuffer[eidx++] = vidx_cur;
+ IndexBuffer[eidx++] = vidx_first;
rp->NumEdgeIndices += 2;
}
NumVertices = vidx;
+ NumIndices = iidx;
+ NumEdgeIndices = eidx - EdgeIndicesOffset;
}
void RenderSinglePolygon(int i)
{
RendererPolygon* rp = &PolygonList[i];
- glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, rp->Indices);
+ glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2));
}
int RenderPolygonBatch(int i)
@@ -803,7 +812,7 @@ int RenderPolygonBatch(int i)
numindices += cur_rp->NumIndices;
}
- glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, rp->Indices);
+ glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2));
return numpolys;
}
@@ -823,7 +832,7 @@ int RenderPolygonEdgeBatch(int i)
numindices += cur_rp->NumEdgeIndices;
}
- glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, rp->EdgeIndices);
+ glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->EdgeIndicesOffset * 2));
return numpolys;
}
@@ -1320,6 +1329,11 @@ void RenderFrame()
glBindBuffer(GL_ARRAY_BUFFER, VertexBufferID);
glBufferSubData(GL_ARRAY_BUFFER, 0, NumVertices*7*4, VertexBuffer);
+ // bind to access the index buffer
+ glBindVertexArray(VertexArrayID);
+ glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, NumIndices * 2, IndexBuffer);
+ glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, EdgeIndicesOffset * 2, NumEdgeIndices * 2, IndexBuffer + EdgeIndicesOffset);
+
RenderSceneChunk(0, 192);
}
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index e9d8e75..d66eb76 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -58,15 +58,17 @@ bool PrevIsShadowMask;
bool Enabled;
+bool FrameIdentical;
+
// threading
bool Threaded;
-void* RenderThread;
+Platform::Thread* RenderThread;
bool RenderThreadRunning;
bool RenderThreadRendering;
-void* Sema_RenderStart;
-void* Sema_RenderDone;
-void* Sema_ScanlineCount;
+Platform::Semaphore* Sema_RenderStart;
+Platform::Semaphore* Sema_RenderDone;
+Platform::Semaphore* Sema_ScanlineCount;
void RenderThreadFunc();
@@ -550,6 +552,16 @@ typedef struct
RendererPolygon PolygonList[2048];
+template <typename T>
+inline T ReadVRAM_Texture(u32 addr)
+{
+ return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF];
+}
+template <typename T>
+inline T ReadVRAM_TexPal(u32 addr)
+{
+ return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF];
+}
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
{
@@ -606,10 +618,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 1: // A3I5
{
vramaddr += ((t * width) + s);
- u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
+ *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
*alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
}
break;
@@ -617,12 +629,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 2: // 4-color
{
vramaddr += (((t * width) + s) >> 2);
- u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
pixel >>= ((s & 0x3) << 1);
pixel &= 0x3;
texpal <<= 3;
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
+ *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;
@@ -630,12 +642,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 3: // 16-color
{
vramaddr += (((t * width) + s) >> 1);
- u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
if (s & 0x1) pixel >>= 4;
else pixel &= 0xF;
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
+ *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;
@@ -643,10 +655,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 4: // 256-color
{
vramaddr += ((t * width) + s);
- u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
+ *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;
@@ -660,30 +672,30 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
if (vramaddr >= 0x40000)
slot1addr += 0x10000;
- u8 val = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 val = ReadVRAM_Texture<u8>(vramaddr);
val >>= (2 * (s & 0x3));
- u16 palinfo = GPU::ReadVRAM_Texture<u16>(slot1addr);
+ u16 palinfo = ReadVRAM_Texture<u16>(slot1addr);
u32 paloffset = (palinfo & 0x3FFF) << 2;
texpal <<= 4;
switch (val & 0x3)
{
case 0:
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
+ *color = ReadVRAM_TexPal<u16>(texpal + paloffset);
*alpha = 31;
break;
case 1:
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+ *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
*alpha = 31;
break;
case 2:
if ((palinfo >> 14) == 1)
{
- u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
- u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+ u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
+ u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
@@ -700,8 +712,8 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
}
else if ((palinfo >> 14) == 3)
{
- u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
- u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+ u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
+ u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
@@ -717,20 +729,20 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
*color = r | g | b;
}
else
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 4);
+ *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 4);
*alpha = 31;
break;
case 3:
if ((palinfo >> 14) == 2)
{
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 6);
+ *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 6);
*alpha = 31;
}
else if ((palinfo >> 14) == 3)
{
- u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
- u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+ u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset);
+ u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
@@ -759,10 +771,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 6: // A5I3
{
vramaddr += ((t * width) + s);
- u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ u8 pixel = ReadVRAM_Texture<u8>(vramaddr);
texpal <<= 4;
- *color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1));
+ *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1));
*alpha = (pixel >> 3);
}
break;
@@ -770,7 +782,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
case 7: // direct color
{
vramaddr += (((t * width) + s) << 1);
- *color = GPU::ReadVRAM_Texture<u16>(vramaddr);
+ *color = ReadVRAM_Texture<u16>(vramaddr);
*alpha = (*color & 0x8000) ? 31 : 0;
}
break;
@@ -2007,8 +2019,8 @@ void ClearBuffers()
{
for (int x = 0; x < 256; x++)
{
- u16 val2 = GPU::ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
- u16 val3 = GPU::ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
+ u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
+ u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
// TODO: confirm color conversion
u32 r = (val2 << 1) & 0x3E; if (r) r++;
@@ -2088,11 +2100,19 @@ void VCount144()
void RenderFrame()
{
+ auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture);
+ auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal);
+
+ bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty);
+ bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty);
+
+ FrameIdentical = !(textureChanged || texPalChanged) && RenderFrameIdentical;
+
if (RenderThreadRunning)
{
Platform::Semaphore_Post(Sema_RenderStart);
}
- else
+ else if (!FrameIdentical)
{
ClearBuffers();
RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons);
@@ -2107,8 +2127,15 @@ void RenderThreadFunc()
if (!RenderThreadRunning) return;
RenderThreadRendering = true;
- ClearBuffers();
- RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
+ if (FrameIdentical)
+ {
+ Platform::Semaphore_Post(Sema_ScanlineCount, 192);
+ }
+ else
+ {
+ ClearBuffers();
+ RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
+ }
Platform::Semaphore_Post(Sema_RenderDone);
RenderThreadRendering = false;
diff --git a/src/NDS.cpp b/src/NDS.cpp
index 1781dd5..b313db0 100644
--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@@ -210,13 +210,13 @@ bool Init()
void DeInit()
{
- delete ARM9;
- delete ARM7;
-
#ifdef JIT_ENABLED
ARMJIT::DeInit();
#endif
+ delete ARM9;
+ delete ARM7;
+
for (int i = 0; i < 8; i++)
delete DMAs[i];
@@ -908,7 +908,7 @@ void RunSystem(u64 timestamp)
}
}
-template <bool EnableJIT>
+template <bool EnableJIT, int ConsoleType>
u32 RunFrame()
{
FrameStartTimestamp = SysTimestamp;
@@ -934,10 +934,10 @@ u32 RunFrame()
}
else if (CPUStop & 0x0FFF)
{
- DMAs[0]->Run();
- if (!(CPUStop & 0x80000000)) DMAs[1]->Run();
- if (!(CPUStop & 0x80000000)) DMAs[2]->Run();
- if (!(CPUStop & 0x80000000)) DMAs[3]->Run();
+ DMAs[0]->Run<ConsoleType>();
+ if (!(CPUStop & 0x80000000)) DMAs[1]->Run<ConsoleType>();
+ if (!(CPUStop & 0x80000000)) DMAs[2]->Run<ConsoleType>();
+ if (!(CPUStop & 0x80000000)) DMAs[3]->Run<ConsoleType>();
if (ConsoleType == 1) DSi::RunNDMAs(0);
}
else
@@ -962,10 +962,10 @@ u32 RunFrame()
if (CPUStop & 0x0FFF0000)
{
- DMAs[4]->Run();
- DMAs[5]->Run();
- DMAs[6]->Run();
- DMAs[7]->Run();
+ DMAs[4]->Run<ConsoleType>();
+ DMAs[5]->Run<ConsoleType>();
+ DMAs[6]->Run<ConsoleType>();
+ DMAs[7]->Run<ConsoleType>();
if (ConsoleType == 1) DSi::RunNDMAs(1);
}
else
@@ -999,6 +999,9 @@ u32 RunFrame()
ARM7Timestamp-SysTimestamp,
GPU3D::Timestamp-SysTimestamp);
#endif
+ SPU::TransferOutput();
+
+ NDSCart::FlushSRAMFile();
NumFrames++;
@@ -1009,10 +1012,14 @@ u32 RunFrame()
{
#ifdef JIT_ENABLED
if (Config::JIT_Enable)
- return RunFrame<true>();
+ return NDS::ConsoleType == 1
+ ? RunFrame<true, 1>()
+ : RunFrame<true, 0>();
else
#endif
- return RunFrame<false>();
+ return NDS::ConsoleType == 1
+ ? RunFrame<false, 1>()
+ : RunFrame<false, 0>();
}
void Reschedule(u64 target)
@@ -3130,6 +3137,10 @@ void ARM9IOWrite8(u32 addr, u8 val)
NDSCart::WriteSPIData(val);
return;
+ case 0x04000188:
+ ARM9IOWrite32(addr, val | (val << 8) | (val << 16) | (val << 24));
+ return;
+
case 0x040001A8: NDSCart::ROMCommand[0] = val; return;
case 0x040001A9: NDSCart::ROMCommand[1] = val; return;
case 0x040001AA: NDSCart::ROMCommand[2] = val; return;
@@ -3246,6 +3257,10 @@ void ARM9IOWrite16(u32 addr, u16 val)
IPCFIFOCnt9 = val & 0x8404;
return;
+ case 0x04000188:
+ ARM9IOWrite32(addr, val | (val << 16));
+ return;
+
case 0x040001A0:
if (!(ExMemCnt[0] & (1<<11))) NDSCart::WriteSPICnt(val);
return;
@@ -3733,6 +3748,10 @@ void ARM7IOWrite8(u32 addr, u8 val)
case 0x04000138: RTC::Write(val, true); return;
+ case 0x04000188:
+ ARM7IOWrite32(addr, val | (val << 8) | (val << 16) | (val << 24));
+ return;
+
case 0x040001A0:
if (ExMemCnt[0] & (1<<11))
{
@@ -3841,6 +3860,10 @@ void ARM7IOWrite16(u32 addr, u16 val)
IPCFIFOCnt7 = val & 0x8404;
return;
+ case 0x04000188:
+ ARM7IOWrite32(addr, val | (val << 16));
+ return;
+
case 0x040001A0:
if (ExMemCnt[0] & (1<<11))
NDSCart::WriteSPICnt(val);
diff --git a/src/NDSCart.cpp b/src/NDSCart.cpp
index 077bf48..2d8396a 100644
--- a/src/NDSCart.cpp
+++ b/src/NDSCart.cpp
@@ -37,6 +37,7 @@ u8* SRAM;
u32 SRAMLength;
char SRAMPath[1024];
+bool SRAMFileDirty;
void (*WriteFunc)(u8 val, bool islast);
@@ -445,14 +446,21 @@ void Write(u8 val, u32 hold)
break;
}
- if (islast && (CurCmd == 0x02 || CurCmd == 0x0A) && (SRAMLength > 0))
+ SRAMFileDirty |= islast && (CurCmd == 0x02 || CurCmd == 0x0A) && (SRAMLength > 0);
+}
+
+void FlushSRAMFile()
+{
+ if (!SRAMFileDirty)
+ return;
+
+ SRAMFileDirty = false;
+
+ FILE* f = Platform::OpenFile(SRAMPath, "wb");
+ if (f)
{
- FILE* f = Platform::OpenFile(SRAMPath, "wb");
- if (f)
- {
- fwrite(SRAM, SRAMLength, 1, f);
- fclose(f);
- }
+ fwrite(SRAM, SRAMLength, 1, f);
+ fclose(f);
}
}
@@ -1034,6 +1042,11 @@ void RelocateSave(const char* path, bool write)
NDSCart_SRAM::RelocateSave(path, write);
}
+void FlushSRAMFile()
+{
+ NDSCart_SRAM::FlushSRAMFile();
+}
+
int ImportSRAM(const u8* data, u32 length)
{
memcpy(NDSCart_SRAM::SRAM, data, std::min(length, NDSCart_SRAM::SRAMLength));
diff --git a/src/NDSCart.h b/src/NDSCart.h
index 9fe916d..7d3f4a1 100644
--- a/src/NDSCart.h
+++ b/src/NDSCart.h
@@ -46,6 +46,9 @@ void DoSavestate(Savestate* file);
void DecryptSecureArea(u8* out);
bool LoadROM(const char* path, const char* sram, bool direct);
+
+void FlushSRAMFile();
+
void RelocateSave(const char* path, bool write);
int ImportSRAM(const u8* data, u32 length);
diff --git a/src/NonStupidBitfield.h b/src/NonStupidBitfield.h
new file mode 100644
index 0000000..124ba76
--- /dev/null
+++ b/src/NonStupidBitfield.h
@@ -0,0 +1,149 @@
+#ifndef NONSTUPIDBITFIELD_H
+#define NONSTUPIDBITFIELD_H
+
+#include "types.h"
+
+#include <memory.h>
+
+#include <initializer_list>
+#include <algorithm>
+
+// like std::bitset but less stupid and optimised for
+// our use case (keeping track of memory invalidations)
+
+template <u32 Size>
+struct NonStupidBitField
+{
+ static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8");
+ static const u32 DataLength = Size / 8;
+ u8 Data[DataLength];
+
+ struct Ref
+ {
+ NonStupidBitField<Size>& BitField;
+ u32 Idx;
+
+ operator bool()
+ {
+ return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7));
+ }
+
+ Ref& operator=(bool set)
+ {
+ BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7));
+ BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7));
+ return *this;
+ }
+ };
+
+ struct Iterator
+ {
+ NonStupidBitField<Size>& BitField;
+ u32 DataIdx;
+ u32 BitIdx;
+ u64 RemainingBits;
+
+ u32 operator*() { return DataIdx * 8 + BitIdx; }
+
+ bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; }
+ bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; }
+
+ template <typename T>
+ void Next()
+ {
+ while (RemainingBits == 0 && DataIdx < DataLength)
+ {
+ DataIdx += sizeof(T);
+ RemainingBits = *(T*)&BitField.Data[DataIdx];
+ }
+
+ BitIdx = __builtin_ctzll(RemainingBits);
+ RemainingBits &= ~(1ULL << BitIdx);
+ }
+
+ Iterator operator++(int)
+ {
+ Iterator prev(*this);
+ ++*this;
+ return prev;
+ }
+
+ Iterator& operator++()
+ {
+ if ((DataLength % 8) == 0)
+ Next<u64>();
+ else if ((DataLength % 4) == 0)
+ Next<u32>();
+ else if ((DataLength % 2) == 0)
+ Next<u16>();
+ else
+ Next<u8>();
+
+ return *this;
+ }
+ };
+
+ NonStupidBitField(u32 start, u32 size)
+ {
+ memset(Data, 0, sizeof(Data));
+
+ if (size == 0)
+ return;
+
+ u32 roundedStartBit = (start + 7) & ~7;
+ u32 roundedEndBit = (start + size) & ~7;
+ if (roundedStartBit != roundedEndBit)
+ memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8);
+
+ if (start & 0x7)
+ Data[start >> 3] = 0xFF << (start & 0x7);
+ if ((start + size) & 0x7)
+ Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7);
+ }
+
+ NonStupidBitField()
+ {
+ memset(Data, 0, sizeof(Data));
+ }
+
+ Iterator End()
+ {
+ return Iterator{*this, DataLength, 0, 0};
+ }
+ Iterator Begin()
+ {
+ if ((DataLength % 8) == 0)
+ return ++Iterator{*this, 0, 0, *(u64*)Data};
+ else if ((DataLength % 4) == 0)
+ return ++Iterator{*this, 0, 0, *(u32*)Data};
+ else if ((DataLength % 2) == 0)
+ return ++Iterator{*this, 0, 0, *(u16*)Data};
+ else
+ return ++Iterator{*this, 0, 0, *Data};
+ }
+
+ Ref operator[](u32 idx)
+ {
+ return Ref{*this, idx};
+ }
+
+ NonStupidBitField& operator|=(const NonStupidBitField<Size>& other)
+ {
+ for (u32 i = 0; i < DataLength; i++)
+ {
+ Data[i] |= other.Data[i];
+ }
+ return *this;
+ }
+ NonStupidBitField& operator&=(const NonStupidBitField<Size>& other)
+ {
+ for (u32 i = 0; i < DataLength; i++)
+ {
+ Data[i] &= other.Data[i];
+ }
+ return *this;
+ }
+};
+
+
+#endif \ No newline at end of file
diff --git a/src/OpenGLSupport.h b/src/OpenGLSupport.h
index 925c0ad..44c511f 100644
--- a/src/OpenGLSupport.h
+++ b/src/OpenGLSupport.h
@@ -23,8 +23,13 @@
#include <string.h>
// TODO: different includes for each platform
-#include <GL/gl.h>
-#include <GL/glext.h>
+#ifdef __APPLE__
+ #include <OpenGL/gl3.h>
+ #include <OpenGL/gl3ext.h>
+#else
+ #include <GL/gl.h>
+ #include <GL/glext.h>
+#endif
#include "Platform.h"
@@ -61,6 +66,11 @@
#endif
+#ifdef __APPLE__
+
+#define DO_PROCLIST(func)
+
+#else
#define DO_PROCLIST(func) \
DO_PROCLIST_1_3(func) \
@@ -128,6 +138,7 @@
\
func(GLGETSTRINGI, glGetStringi); \
+#endif
namespace OpenGL
{
diff --git a/src/Platform.h b/src/Platform.h
index fea98dd..b4dda9e 100644
--- a/src/Platform.h
+++ b/src/Platform.h
@@ -67,15 +67,24 @@ inline bool LocalFileExists(const char* name)
return true;
}
-void* Thread_Create(void (*func)());
-void Thread_Free(void* thread);
-void Thread_Wait(void* thread);
-
-void* Semaphore_Create();
-void Semaphore_Free(void* sema);
-void Semaphore_Reset(void* sema);
-void Semaphore_Wait(void* sema);
-void Semaphore_Post(void* sema);
+struct Thread;
+Thread* Thread_Create(void (*func)());
+void Thread_Free(Thread* thread);
+void Thread_Wait(Thread* thread);
+
+struct Semaphore;
+Semaphore* Semaphore_Create();
+void Semaphore_Free(Semaphore* sema);
+void Semaphore_Reset(Semaphore* sema);
+void Semaphore_Wait(Semaphore* sema);
+void Semaphore_Post(Semaphore* sema, int count = 1);
+
+struct Mutex;
+Mutex* Mutex_Create();
+void Mutex_Free(Mutex* mutex);
+void Mutex_Lock(Mutex* mutex);
+void Mutex_Unlock(Mutex* mutex);
+bool Mutex_TryLock(Mutex* mutex);
void* GL_GetProcAddress(const char* proc);
diff --git a/src/SPU.cpp b/src/SPU.cpp
index 5b74bda..fe798c7 100644
--- a/src/SPU.cpp
+++ b/src/SPU.cpp
@@ -18,6 +18,7 @@
#include <stdio.h>
#include <string.h>
+#include "Platform.h"
#include "NDS.h"
#include "DSi.h"
#include "SPU.h"
@@ -61,13 +62,15 @@ const s16 PSGTable[8][8] =
{-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF}
};
-const u32 kSamplesPerRun = 1;
+const u32 OutputBufferSize = 2*2048;
+s16 OutputBackbuffer[2 * OutputBufferSize];
+u32 OutputBackbufferWritePosition;
-const u32 OutputBufferSize = 2*1024;
-s16 OutputBuffer[2 * OutputBufferSize];
-volatile u32 OutputReadOffset;
-volatile u32 OutputWriteOffset;
+s16 OutputFrontBuffer[2 * OutputBufferSize];
+u32 OutputFrontBufferWritePosition;
+u32 OutputFrontBufferReadPosition;
+Platform::Mutex* AudioLock;
u16 Cnt;
u8 MasterVolume;
@@ -85,6 +88,8 @@ bool Init()
Capture[0] = new CaptureUnit(0);
Capture[1] = new CaptureUnit(1);
+ AudioLock = Platform::Mutex_Create();
+
return true;
}
@@ -95,6 +100,8 @@ void DeInit()
delete Capture[0];
delete Capture[1];
+
+ Platform::Mutex_Free(AudioLock);
}
void Reset()
@@ -111,15 +118,18 @@ void Reset()
Capture[0]->Reset();
Capture[1]->Reset();
- NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*kSamplesPerRun, Mix, kSamplesPerRun);
+ NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0);
}
void Stop()
{
- memset(OutputBuffer, 0, 2*OutputBufferSize*2);
+ Platform::Mutex_Lock(AudioLock);
+ memset(OutputFrontBuffer, 0, 2*OutputBufferSize*2);
- OutputReadOffset = 0;
- OutputWriteOffset = 0;
+ OutputBackbufferWritePosition = 0;
+ OutputFrontBufferReadPosition = 0;
+ OutputFrontBufferWritePosition = 0;
+ Platform::Mutex_Unlock(AudioLock);
}
void DoSavestate(Savestate* file)
@@ -416,11 +426,11 @@ void Channel::NextSample_Noise()
}
template<u32 type>
-void Channel::Run(s32* buf, u32 samples)
+s32 Channel::Run()
{
- if (!(Cnt & (1<<31))) return;
+ if (!(Cnt & (1<<31))) return 0;
- if ((type < 3) && ((Length+LoopPos) < 16)) return;
+ if ((type < 3) && ((Length+LoopPos) < 16)) return 0;
if (KeyOn)
{
@@ -428,45 +438,32 @@ void Channel::Run(s32* buf, u32 samples)
KeyOn = false;
}
- for (u32 s = 0; s < samples; s++)
+ Timer += 512; // 1 sample = 512 cycles at 16MHz
+
+ while (Timer >> 16)
{
- Timer += 512; // 1 sample = 512 cycles at 16MHz
+ Timer = TimerReload + (Timer - 0x10000);
- while (Timer >> 16)
+ switch (type)
{
- Timer = TimerReload + (Timer - 0x10000);
-
- switch (type)
- {
- case 0: NextSample_PCM8(); break;
- case 1: NextSample_PCM16(); break;
- case 2: NextSample_ADPCM(); break;
- case 3: NextSample_PSG(); break;
- case 4: NextSample_Noise(); break;
- }
+ case 0: NextSample_PCM8(); break;
+ case 1: NextSample_PCM16(); break;
+ case 2: NextSample_ADPCM(); break;
+ case 3: NextSample_PSG(); break;
+ case 4: NextSample_Noise(); break;
}
-
- s32 val = (s32)CurSample;
- val <<= VolumeShift;
- val *= Volume;
- buf[s] = val;
-
- if (!(Cnt & (1<<31))) break;
}
+
+ s32 val = (s32)CurSample;
+ val <<= VolumeShift;
+ val *= Volume;
+ return val;
}
-void Channel::PanOutput(s32* inbuf, u32 samples, s32* leftbuf, s32* rightbuf)
+void Channel::PanOutput(s32 in, s32& left, s32& right)
{
- for (u32 s = 0; s < samples; s++)
- {
- s32 val = (s32)inbuf[s];
-
- s32 l = ((s64)val * (128-Pan)) >> 10;
- s32 r = ((s64)val * Pan) >> 10;
-
- leftbuf[s] += l;
- rightbuf[s] += r;
- }
+ left += ((s64)in * (128-Pan)) >> 10;
+ right += ((s64)in * Pan) >> 10;
}
@@ -602,39 +599,31 @@ void CaptureUnit::Run(s32 sample)
}
-void Mix(u32 samples)
+void Mix(u32 dummy)
{
- s32 channelbuf[32];
- s32 leftbuf[32], rightbuf[32];
- s32 ch0buf[32], ch1buf[32], ch2buf[32], ch3buf[32];
- s32 leftoutput[32], rightoutput[32];
-
- for (u32 s = 0; s < samples; s++)
- {
- leftbuf[s] = 0; rightbuf[s] = 0;
- leftoutput[s] = 0; rightoutput[s] = 0;
- }
+ s32 left = 0, right = 0;
+ s32 leftoutput = 0, rightoutput = 0;
if (Cnt & (1<<15))
{
- Channels[0]->DoRun(ch0buf, samples);
- Channels[1]->DoRun(ch1buf, samples);
- Channels[2]->DoRun(ch2buf, samples);
- Channels[3]->DoRun(ch3buf, samples);
+ s32 ch0 = Channels[0]->DoRun();
+ s32 ch1 = Channels[1]->DoRun();
+ s32 ch2 = Channels[2]->DoRun();
+ s32 ch3 = Channels[3]->DoRun();
// TODO: addition from capture registers
- Channels[0]->PanOutput(ch0buf, samples, leftbuf, rightbuf);
- Channels[2]->PanOutput(ch2buf, samples, leftbuf, rightbuf);
+ Channels[0]->PanOutput(ch0, left, right);
+ Channels[2]->PanOutput(ch2, left, right);
- if (!(Cnt & (1<<12))) Channels[1]->PanOutput(ch1buf, samples, leftbuf, rightbuf);
- if (!(Cnt & (1<<13))) Channels[3]->PanOutput(ch3buf, samples, leftbuf, rightbuf);
+ if (!(Cnt & (1<<12))) Channels[1]->PanOutput(ch1, left, right);
+ if (!(Cnt & (1<<13))) Channels[3]->PanOutput(ch3, left, right);
for (int i = 4; i < 16; i++)
{
Channel* chan = Channels[i];
- chan->DoRun(channelbuf, samples);
- chan->PanOutput(channelbuf, samples, leftbuf, rightbuf);
+ s32 channel = chan->DoRun();
+ chan->PanOutput(channel, left, right);
}
// sound capture
@@ -642,32 +631,24 @@ void Mix(u32 samples)
if (Capture[0]->Cnt & (1<<7))
{
- for (u32 s = 0; s < samples; s++)
- {
- s32 val = leftbuf[s];
+ s32 val = left;
- val >>= 8;
- if (val < -0x8000) val = -0x8000;
- else if (val > 0x7FFF) val = 0x7FFF;
+ val >>= 8;
+ if (val < -0x8000) val = -0x8000;
+ else if (val > 0x7FFF) val = 0x7FFF;
- Capture[0]->Run(val);
- if (!(Capture[0]->Cnt & (1<<7))) break;
- }
+ Capture[0]->Run(val);
}
if (Capture[1]->Cnt & (1<<7))
{
- for (u32 s = 0; s < samples; s++)
- {
- s32 val = rightbuf[s];
+ s32 val = right;
- val >>= 8;
- if (val < -0x8000) val = -0x8000;
- else if (val > 0x7FFF) val = 0x7FFF;
+ val >>= 8;
+ if (val < -0x8000) val = -0x8000;
+ else if (val > 0x7FFF) val = 0x7FFF;
- Capture[1]->Run(val);
- if (!(Capture[1]->Cnt & (1<<7))) break;
- }
+ Capture[1]->Run(val);
}
// final output
@@ -675,31 +656,25 @@ void Mix(u32 samples)
switch (Cnt & 0x0300)
{
case 0x0000: // left mixer
- {
- for (u32 s = 0; s < samples; s++)
- leftoutput[s] = leftbuf[s];
- }
+ leftoutput = left;
break;
case 0x0100: // channel 1
{
s32 pan = 128 - Channels[1]->Pan;
- for (u32 s = 0; s < samples; s++)
- leftoutput[s] = ((s64)ch1buf[s] * pan) >> 10;
+ leftoutput = ((s64)ch1 * pan) >> 10;
}
break;
case 0x0200: // channel 3
{
s32 pan = 128 - Channels[3]->Pan;
- for (u32 s = 0; s < samples; s++)
- leftoutput[s] = ((s64)ch3buf[s] * pan) >> 10;
+ leftoutput = ((s64)ch3 * pan) >> 10;
}
break;
case 0x0300: // channel 1+3
{
s32 pan1 = 128 - Channels[1]->Pan;
s32 pan3 = 128 - Channels[3]->Pan;
- for (u32 s = 0; s < samples; s++)
- leftoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10);
+ leftoutput = (((s64)ch1 * pan1) >> 10) + (((s64)ch3 * pan3) >> 10);
}
break;
}
@@ -707,105 +682,122 @@ void Mix(u32 samples)
switch (Cnt & 0x0C00)
{
case 0x0000: // right mixer
- {
- for (u32 s = 0; s < samples; s++)
- rightoutput[s] = rightbuf[s];
- }
+ rightoutput = right;
break;
case 0x0400: // channel 1
{
s32 pan = Channels[1]->Pan;
- for (u32 s = 0; s < samples; s++)
- rightoutput[s] = ((s64)ch1buf[s] * pan) >> 10;
+ rightoutput = ((s64)ch1 * pan) >> 10;
}
break;
case 0x0800: // channel 3
{
s32 pan = Channels[3]->Pan;
- for (u32 s = 0; s < samples; s++)
- rightoutput[s] = ((s64)ch3buf[s] * pan) >> 10;
+ rightoutput = ((s64)ch3 * pan) >> 10;
}
break;
case 0x0C00: // channel 1+3
{
s32 pan1 = Channels[1]->Pan;
s32 pan3 = Channels[3]->Pan;
- for (u32 s = 0; s < samples; s++)
- rightoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10);
+ rightoutput = (((s64)ch1 * pan1) >> 10) + (((s64)ch3 * pan3) >> 10);
}
break;
}
}
- for (u32 s = 0; s < samples; s++)
+ leftoutput = ((s64)leftoutput * MasterVolume) >> 7;
+ rightoutput = ((s64)rightoutput * MasterVolume) >> 7;
+
+ leftoutput >>= 8;
+ if (leftoutput < -0x8000) leftoutput = -0x8000;
+ else if (leftoutput > 0x7FFF) leftoutput = 0x7FFF;
+ rightoutput >>= 8;
+ if (rightoutput < -0x8000) rightoutput = -0x8000;
+ else if (rightoutput > 0x7FFF) rightoutput = 0x7FFF;
+
+ // OutputBufferFrame can never get full because it's
+ // transfered to OutputBuffer at the end of the frame
+ OutputBackbuffer[OutputBackbufferWritePosition ] = leftoutput >> 1;
+ OutputBackbuffer[OutputBackbufferWritePosition + 1] = rightoutput >> 1;
+ OutputBackbufferWritePosition += 2;
+
+ NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0);
+}
+
+void TransferOutput()
+{
+ Platform::Mutex_Lock(AudioLock);
+ for (u32 i = 0; i < OutputBackbufferWritePosition; i += 2)
{
- s32 l = leftoutput[s];
- s32 r = rightoutput[s];
-
- l = ((s64)l * MasterVolume) >> 7;
- r = ((s64)r * MasterVolume) >> 7;
-
- l >>= 8;
- if (l < -0x8000) l = -0x8000;
- else if (l > 0x7FFF) l = 0x7FFF;
- r >>= 8;
- if (r < -0x8000) r = -0x8000;
- else if (r > 0x7FFF) r = 0x7FFF;
-
- OutputBuffer[OutputWriteOffset ] = l >> 1;
- OutputBuffer[OutputWriteOffset + 1] = r >> 1;
- OutputWriteOffset += 2;
- OutputWriteOffset &= ((2*OutputBufferSize)-1);
- if (OutputWriteOffset == OutputReadOffset)
+ OutputFrontBuffer[OutputFrontBufferWritePosition ] = OutputBackbuffer[i ];
+ OutputFrontBuffer[OutputFrontBufferWritePosition + 1] = OutputBackbuffer[i + 1];
+
+ OutputFrontBufferWritePosition += 2;
+ OutputFrontBufferWritePosition &= OutputBufferSize*2-1;
+ if (OutputFrontBufferWritePosition == OutputFrontBufferReadPosition)
{
- //printf("!! SOUND FIFO OVERFLOW %d\n", OutputWriteOffset>>1);
// advance the read position too, to avoid losing the entire FIFO
- OutputReadOffset += 2;
- OutputReadOffset &= ((2*OutputBufferSize)-1);
+ OutputFrontBufferReadPosition += 2;
+ OutputFrontBufferReadPosition &= OutputBufferSize*2-1;
}
}
-
- NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*kSamplesPerRun, Mix, kSamplesPerRun);
+ OutputBackbufferWritePosition = 0;
+ Platform::Mutex_Unlock(AudioLock);
}
-
void TrimOutput()
{
+ Platform::Mutex_Lock(AudioLock);
const int halflimit = (OutputBufferSize / 2);
- int readpos = OutputWriteOffset - (halflimit*2);
+ int readpos = OutputFrontBufferWritePosition - (halflimit*2);
if (readpos < 0) readpos += (OutputBufferSize*2);
- OutputReadOffset = readpos;
+ OutputFrontBufferReadPosition = readpos;
+ Platform::Mutex_Unlock(AudioLock);
}
void DrainOutput()
{
- OutputReadOffset = 0;
- OutputWriteOffset = 0;
+ Platform::Mutex_Lock(AudioLock);
+ OutputFrontBufferWritePosition = 0;
+ OutputFrontBufferReadPosition = 0;
+ Platform::Mutex_Unlock(AudioLock);
}
void InitOutput()
{
- memset(OutputBuffer, 0, 2*OutputBufferSize*2);
- OutputReadOffset = 0;
- OutputWriteOffset = OutputBufferSize;
+ Platform::Mutex_Lock(AudioLock);
+ memset(OutputBackbuffer, 0, 2*OutputBufferSize*2);
+ memset(OutputFrontBuffer, 0, 2*OutputBufferSize*2);
+ OutputFrontBufferReadPosition = 0;
+ OutputFrontBufferWritePosition = 0;
+ Platform::Mutex_Unlock(AudioLock);
}
int GetOutputSize()
{
+ Platform::Mutex_Lock(AudioLock);
+
int ret;
- if (OutputWriteOffset >= OutputReadOffset)
- ret = OutputWriteOffset - OutputReadOffset;
+ if (OutputFrontBufferWritePosition >= OutputFrontBufferReadPosition)
+ ret = OutputFrontBufferWritePosition - OutputFrontBufferReadPosition;
else
- ret = (OutputBufferSize*2) - OutputReadOffset + OutputWriteOffset;
+ ret = (OutputBufferSize*2) - OutputFrontBufferReadPosition + OutputFrontBufferWritePosition;
ret >>= 1;
+
+ Platform::Mutex_Unlock(AudioLock);
return ret;
}
void Sync(bool wait)
{
+ // this function is currently not used anywhere
+ // depending on the usage context the thread safety measures could be made
+ // a lot faster
+
// sync to audio output in case the core is running too fast
// * wait=true: wait until enough audio data has been played
// * wait=false: merely skip some audio data to avoid a FIFO overflow
@@ -819,32 +811,42 @@ void Sync(bool wait)
}
else if (GetOutputSize() > halflimit)
{
- int readpos = OutputWriteOffset - (halflimit*2);
+ Platform::Mutex_Lock(AudioLock);
+
+ int readpos = OutputFrontBufferWritePosition - (halflimit*2);
if (readpos < 0) readpos += (OutputBufferSize*2);
- OutputReadOffset = readpos;
+ OutputFrontBufferReadPosition = readpos;
+
+ Platform::Mutex_Unlock(AudioLock);
}
}
int ReadOutput(s16* data, int samples)
{
- if (OutputReadOffset == OutputWriteOffset)
+ Platform::Mutex_Lock(AudioLock);
+ if (OutputFrontBufferReadPosition == OutputFrontBufferWritePosition)
+ {
+ Platform::Mutex_Unlock(AudioLock);
return 0;
+ }
for (int i = 0; i < samples; i++)
{
- *data++ = OutputBuffer[OutputReadOffset];
- *data++ = OutputBuffer[OutputReadOffset + 1];
+ *data++ = OutputFrontBuffer[OutputFrontBufferReadPosition];
+ *data++ = OutputFrontBuffer[OutputFrontBufferReadPosition + 1];
+
+ OutputFrontBufferReadPosition += 2;
+ OutputFrontBufferReadPosition &= ((2*OutputBufferSize)-1);
- //if (OutputReadOffset != OutputWriteOffset)
+ if (OutputFrontBufferWritePosition == OutputFrontBufferReadPosition)
{
- OutputReadOffset += 2;
- OutputReadOffset &= ((2*OutputBufferSize)-1);
- }
- if (OutputReadOffset == OutputWriteOffset)
+ Platform::Mutex_Unlock(AudioLock);
return i+1;
+ }
}
+ Platform::Mutex_Unlock(AudioLock);
return samples;
}
diff --git a/src/SPU.h b/src/SPU.h
index 964841d..c6b1c7f 100644
--- a/src/SPU.h
+++ b/src/SPU.h
@@ -33,7 +33,7 @@ void DoSavestate(Savestate* file);
void SetBias(u16 bias);
-void Mix(u32 samples);
+void Mix(u32 dummy);
void TrimOutput();
void DrainOutput();
@@ -41,6 +41,7 @@ void InitOutput();
int GetOutputSize();
void Sync(bool wait);
int ReadOutput(s16* data, int samples);
+void TransferOutput();
u8 Read8(u32 addr);
u16 Read16(u32 addr);
@@ -123,26 +124,24 @@ public:
void NextSample_PSG();
void NextSample_Noise();
- template<u32 type> void Run(s32* buf, u32 samples);
+ template<u32 type> s32 Run();
- void DoRun(s32* buf, u32 samples)
+ s32 DoRun()
{
- for (u32 s = 0; s < samples; s++)
- buf[s] = 0;
-
switch ((Cnt >> 29) & 0x3)
{
- case 0: Run<0>(buf, samples); break;
- case 1: Run<1>(buf, samples); break;
- case 2: Run<2>(buf, samples); break;
+ case 0: return Run<0>(); break;
+ case 1: return Run<1>(); break;
+ case 2: return Run<2>(); break;
case 3:
- if (Num >= 14) Run<4>(buf, samples);
- else if (Num >= 8) Run<3>(buf, samples);
- break;
+ if (Num >= 14) return Run<4>();
+ else if (Num >= 8) return Run<3>();
+ default:
+ return 0;
}
}
- void PanOutput(s32* inbuf, u32 samples, s32* leftbuf, s32* rightbuf);
+ void PanOutput(s32 in, s32& left, s32& right);
private:
u32 (*BusRead32)(u32 addr);
diff --git a/src/frontend/SharedConfig.h b/src/frontend/SharedConfig.h
new file mode 100644
index 0000000..b4b18c5
--- /dev/null
+++ b/src/frontend/SharedConfig.h
@@ -0,0 +1,13 @@
+#ifndef SHAREDCONFIG_H
+#define SHAREDCONFIG_H
+
+namespace Config
+{
+
+extern int ConsoleType;
+extern int DirectBoot;
+extern int SavestateRelocSRAM;
+
+}
+
+#endif \ No newline at end of file
diff --git a/src/frontend/Util_ROM.cpp b/src/frontend/Util_ROM.cpp
index f61c3e3..9f22f5f 100644
--- a/src/frontend/Util_ROM.cpp
+++ b/src/frontend/Util_ROM.cpp
@@ -21,7 +21,7 @@
#include "FrontendUtil.h"
#include "Config.h"
-#include "qt_sdl/PlatformConfig.h" // FIXME!!!
+#include "SharedConfig.h"
#include "Platform.h"
#include "NDS.h"
diff --git a/src/frontend/qt_sdl/CMakeLists.txt b/src/frontend/qt_sdl/CMakeLists.txt
index 9a0a025..0d695d6 100644
--- a/src/frontend/qt_sdl/CMakeLists.txt
+++ b/src/frontend/qt_sdl/CMakeLists.txt
@@ -95,6 +95,19 @@ if (PORTABLE)
add_definitions(-DPORTABLE)
endif()
+if (APPLE)
+ set_target_properties(melonDS PROPERTIES
+ MACOSX_BUNDLE true
+ MACOSX_BUNDLE_INFO_PLIST ${CMAKE_SOURCE_DIR}/melonDS.plist
+ OUTPUT_NAME melonDS
+ )
+
+ # Copy icon into the bundle
+ target_sources(melonDS PRIVATE "${CMAKE_SOURCE_DIR}/melonDS.icns")
+ set_source_files_properties("${CMAKE_SOURCE_DIR}/melonDS.icns" PROPERTIES MACOSX_PACKAGE_LOCATION Resources)
+
+endif()
+
install(FILES ../../../net.kuribo64.melonDS.desktop DESTINATION ${CMAKE_INSTALL_PREFIX}/share/applications)
install(FILES ../../../icon/melon_16x16.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/16x16/apps RENAME net.kuribo64.melonDS.png)
install(FILES ../../../icon/melon_32x32.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/32x32/apps RENAME net.kuribo64.melonDS.png)
@@ -102,4 +115,4 @@ install(FILES ../../../icon/melon_48x48.png DESTINATION ${CMAKE_INSTALL_PREFIX}/
install(FILES ../../../icon/melon_64x64.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/64x64/apps RENAME net.kuribo64.melonDS.png)
install(FILES ../../../icon/melon_128x128.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/128x128/apps RENAME net.kuribo64.melonDS.png)
install(FILES ../../../icon/melon_256x256.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/256x256/apps RENAME net.kuribo64.melonDS.png)
-install(TARGETS melonDS RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
+install(TARGETS melonDS BUNDLE DESTINATION ${CMAKE_BINARY_DIR} RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
diff --git a/src/frontend/qt_sdl/EmuSettingsDialog.cpp b/src/frontend/qt_sdl/EmuSettingsDialog.cpp
index 79ce5ed..3183182 100644
--- a/src/frontend/qt_sdl/EmuSettingsDialog.cpp
+++ b/src/frontend/qt_sdl/EmuSettingsDialog.cpp
@@ -65,6 +65,9 @@ EmuSettingsDialog::EmuSettingsDialog(QWidget* parent) : QDialog(parent), ui(new
ui->chkJITBranchOptimisations->setChecked(Config::JIT_BranchOptimisations != 0);
ui->chkJITLiteralOptimisations->setChecked(Config::JIT_LiteralOptimisations != 0);
ui->chkJITFastMemory->setChecked(Config::JIT_FastMemory != 0);
+ #ifdef __APPLE__
+ ui->chkJITFastMemory->setDisabled(true);
+ #endif
ui->spnJITMaximumBlockSize->setValue(Config::JIT_MaxBlockSize);
#else
ui->chkEnableJIT->setDisabled(true);
@@ -329,6 +332,8 @@ void EmuSettingsDialog::on_chkEnableJIT_toggled()
bool disabled = !ui->chkEnableJIT->isChecked();
ui->chkJITBranchOptimisations->setDisabled(disabled);
ui->chkJITLiteralOptimisations->setDisabled(disabled);
- ui->chkJITFastMemory->setDisabled(disabled);
+ #ifndef __APPLE__
+ ui->chkJITFastMemory->setDisabled(disabled);
+ #endif
ui->spnJITMaximumBlockSize->setDisabled(disabled);
}
diff --git a/src/frontend/qt_sdl/InputConfigDialog.cpp b/src/frontend/qt_sdl/InputConfigDialog.cpp
index 9f08731..eaf1e9b 100644
--- a/src/frontend/qt_sdl/InputConfigDialog.cpp
+++ b/src/frontend/qt_sdl/InputConfigDialog.cpp
@@ -216,6 +216,7 @@ KeyMapButton::KeyMapButton(int* mapping, bool hotkey) : QPushButton()
setCheckable(true);
setText(mappingText());
+ setFocusPolicy(Qt::StrongFocus); //Fixes binding keys in macOS
connect(this, &KeyMapButton::clicked, this, &KeyMapButton::onClick);
}
diff --git a/src/frontend/qt_sdl/LAN_PCap.cpp b/src/frontend/qt_sdl/LAN_PCap.cpp
index ce278bc..3381e80 100644
--- a/src/frontend/qt_sdl/LAN_PCap.cpp
+++ b/src/frontend/qt_sdl/LAN_PCap.cpp
@@ -33,7 +33,11 @@
#include <sys/types.h>
#include <ifaddrs.h>
#include <netinet/in.h>
- #include <linux/if_packet.h>
+ #ifdef __linux__
+ #include <linux/if_packet.h>
+ #else
+ #include <net/if_dl.h>
+ #endif
#endif
@@ -66,6 +70,9 @@ const char* PCapLibNames[] =
#ifdef __WIN32__
// TODO: name for npcap in non-WinPCap mode
"wpcap.dll",
+#elif defined(__APPLE__)
+ "libpcap.A.dylib",
+ "libpcap.dylib",
#else
// Linux lib names
"libpcap.so.1",
@@ -276,6 +283,7 @@ bool Init(bool open_adapter)
struct sockaddr_in* sa = (sockaddr_in*)curaddr->ifa_addr;
memcpy(adata->IP_v4, &sa->sin_addr, 4);
}
+ #ifdef __linux__
else if (af == AF_PACKET)
{
struct sockaddr_ll* sa = (sockaddr_ll*)curaddr->ifa_addr;
@@ -284,7 +292,16 @@ bool Init(bool open_adapter)
else
memcpy(adata->MAC, sa->sll_addr, 6);
}
-
+ #else
+ else if (af == AF_LINK)
+ {
+ struct sockaddr_dl* sa = (sockaddr_dl*)curaddr->ifa_addr;
+ if (sa->sdl_alen != 6)
+ printf("weird MAC length %d for %s\n", sa->sdl_alen, curaddr->ifa_name);
+ else
+ memcpy(adata->MAC, LLADDR(sa), 6);
+ }
+ #endif
curaddr = curaddr->ifa_next;
}
}
diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp
index a716feb..d3480e4 100644
--- a/src/frontend/qt_sdl/Platform.cpp
+++ b/src/frontend/qt_sdl/Platform.cpp
@@ -23,6 +23,7 @@
#include <QDir>
#include <QThread>
#include <QSemaphore>
+#include <QMutex>
#include <QOpenGLContext>
#include "Platform.h"
@@ -187,53 +188,77 @@ FILE* OpenLocalFile(const char* path, const char* mode)
return OpenFile(fullpath.toUtf8(), mode, mode[0] != 'w');
}
-void* Thread_Create(void (* func)())
+Thread* Thread_Create(void (* func)())
{
QThread* t = QThread::create(func);
t->start();
- return (void*) t;
+ return (Thread*) t;
}
-void Thread_Free(void* thread)
+void Thread_Free(Thread* thread)
{
QThread* t = (QThread*) thread;
t->terminate();
delete t;
}
-void Thread_Wait(void* thread)
+void Thread_Wait(Thread* thread)
{
((QThread*) thread)->wait();
}
-void* Semaphore_Create()
+Semaphore* Semaphore_Create()
{
- return new QSemaphore();
+ return (Semaphore*)new QSemaphore();
}
-void Semaphore_Free(void* sema)
+void Semaphore_Free(Semaphore* sema)
{
delete (QSemaphore*) sema;
}
-void Semaphore_Reset(void* sema)
+void Semaphore_Reset(Semaphore* sema)
{
QSemaphore* s = (QSemaphore*) sema;
s->acquire(s->available());
}
-void Semaphore_Wait(void* sema)
+void Semaphore_Wait(Semaphore* sema)
{
((QSemaphore*) sema)->acquire();
}
-void Semaphore_Post(void* sema)
+void Semaphore_Post(Semaphore* sema, int count)
{
- ((QSemaphore*) sema)->release();
+ ((QSemaphore*) sema)->release(count);
}
+Mutex* Mutex_Create()
+{
+ return (Mutex*)new QMutex();
+}
+
+void Mutex_Free(Mutex* mutex)
+{
+ delete (QMutex*) mutex;
+}
+
+void Mutex_Lock(Mutex* mutex)
+{
+ ((QMutex*) mutex)->lock();
+}
+
+void Mutex_Unlock(Mutex* mutex)
+{
+ ((QMutex*) mutex)->unlock();
+}
+
+bool Mutex_TryLock(Mutex* mutex)
+{
+ return ((QMutex*) mutex)->try_lock();
+}
void* GL_GetProcAddress(const char* proc)
{
diff --git a/src/frontend/qt_sdl/PlatformConfig.cpp b/src/frontend/qt_sdl/PlatformConfig.cpp
index c2d40c4..9861662 100644
--- a/src/frontend/qt_sdl/PlatformConfig.cpp
+++ b/src/frontend/qt_sdl/PlatformConfig.cpp
@@ -120,7 +120,7 @@ ConfigEntry PlatformConfigFile[] =
{"HKJoy_Reset", 0, &HKJoyMapping[HK_Reset], -1, NULL, 0},
{"HKJoy_FastForward", 0, &HKJoyMapping[HK_FastForward], -1, NULL, 0},
{"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FastForwardToggle], -1, NULL, 0},
- {"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FullscreenToggle], -1, NULL, 0},
+ {"HKJoy_FullscreenToggle", 0, &HKJoyMapping[HK_FullscreenToggle], -1, NULL, 0},
{"HKJoy_SolarSensorDecrease", 0, &HKJoyMapping[HK_SolarSensorDecrease], -1, NULL, 0},
{"HKJoy_SolarSensorIncrease", 0, &HKJoyMapping[HK_SolarSensorIncrease], -1, NULL, 0},
diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.cpp b/src/frontend/qt_sdl/WifiSettingsDialog.cpp
index 67297ad..24b339d 100644
--- a/src/frontend/qt_sdl/WifiSettingsDialog.cpp
+++ b/src/frontend/qt_sdl/WifiSettingsDialog.cpp
@@ -54,7 +54,7 @@ WifiSettingsDialog::WifiSettingsDialog(QWidget* parent) : QDialog(parent), ui(ne
LAN_Socket::Init();
haspcap = LAN_PCap::Init(false);
- ui->cbDirectMode->setText("Direct mode (requires " PCAP_NAME " and ethernet connection)");
+ ui->rbDirectMode->setText("Direct mode (requires " PCAP_NAME " and ethernet connection)");
ui->cbBindAnyAddr->setChecked(Config::SocketBindAnyAddr != 0);
ui->cbRandomizeMAC->setChecked(Config::RandomizeMAC != 0);
@@ -71,8 +71,9 @@ WifiSettingsDialog::WifiSettingsDialog(QWidget* parent) : QDialog(parent), ui(ne
}
ui->cbxDirectAdapter->setCurrentIndex(sel);
- ui->cbDirectMode->setChecked(Config::DirectLAN != 0);
- if (!haspcap) ui->cbDirectMode->setEnabled(false);
+ ui->rbDirectMode->setChecked(Config::DirectLAN != 0);
+ ui->rbIndirectMode->setChecked(Config::DirectLAN == 0);
+ if (!haspcap) ui->rbDirectMode->setEnabled(false);
updateAdapterControls();
}
@@ -101,7 +102,7 @@ void WifiSettingsDialog::done(int r)
Config::SocketBindAnyAddr = ui->cbBindAnyAddr->isChecked() ? 1:0;
Config::RandomizeMAC = randommac;
- Config::DirectLAN = ui->cbDirectMode->isChecked() ? 1:0;
+ Config::DirectLAN = ui->rbDirectMode->isChecked() ? 1:0;
int sel = ui->cbxDirectAdapter->currentIndex();
if (sel < 0 || sel >= LAN_PCap::NumAdapters) sel = 0;
@@ -125,11 +126,14 @@ void WifiSettingsDialog::done(int r)
closeDlg();
}
-void WifiSettingsDialog::on_cbDirectMode_stateChanged(int state)
+void WifiSettingsDialog::on_rbDirectMode_clicked()
+{
+ updateAdapterControls();
+}
+void WifiSettingsDialog::on_rbIndirectMode_clicked()
{
updateAdapterControls();
}
-
void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel)
{
if (!haspcap) return;
@@ -153,7 +157,7 @@ void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel)
void WifiSettingsDialog::updateAdapterControls()
{
- bool enable = haspcap && ui->cbDirectMode->isChecked();
+ bool enable = haspcap && ui->rbDirectMode->isChecked();
ui->cbxDirectAdapter->setEnabled(enable);
ui->lblAdapterMAC->setEnabled(enable);
diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.h b/src/frontend/qt_sdl/WifiSettingsDialog.h
index 6c1f863..600941f 100644
--- a/src/frontend/qt_sdl/WifiSettingsDialog.h
+++ b/src/frontend/qt_sdl/WifiSettingsDialog.h
@@ -55,7 +55,8 @@ public:
private slots:
void done(int r);
- void on_cbDirectMode_stateChanged(int state);
+ void on_rbDirectMode_clicked();
+ void on_rbIndirectMode_clicked();
void on_cbxDirectAdapter_currentIndexChanged(int sel);
private:
diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.ui b/src/frontend/qt_sdl/WifiSettingsDialog.ui
index 6668d88..174a3dc 100644
--- a/src/frontend/qt_sdl/WifiSettingsDialog.ui
+++ b/src/frontend/qt_sdl/WifiSettingsDialog.ui
@@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
- <width>479</width>
- <height>240</height>
+ <width>572</width>
+ <height>296</height>
</rect>
</property>
<property name="sizePolicy">
@@ -58,67 +58,86 @@
<string>Online</string>
</property>
<layout class="QGridLayout" name="gridLayout_2">
- <item row="2" column="0">
- <widget class="QLabel" name="label_2">
- <property name="text">
- <string>MAC address:</string>
+ <item row="3" column="0" rowspan="3" colspan="2">
+ <widget class="QGroupBox" name="groupBox_3">
+ <property name="title">
+ <string>Direct Mode Settings</string>
</property>
+ <layout class="QGridLayout" name="gridLayout_3">
+ <item row="0" column="0">
+ <widget class="QLabel" name="label">
+ <property name="text">
+ <string>Network adapter:</string>
+ </property>
+ </widget>
+ </item>
+ <item row="0" column="1">
+ <widget class="QComboBox" name="cbxDirectAdapter">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
+ <property name="minimumSize">
+ <size>
+ <width>300</width>
+ <height>0</height>
+ </size>
+ </property>
+ <property name="whatsThis">
+ <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Selects the network adapter through which to route network traffic under direct mode.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
+ </property>
+ </widget>
+ </item>
+ <item row="1" column="0">
+ <widget class="QLabel" name="label_2">
+ <property name="text">
+ <string>MAC address:</string>
+ </property>
+ </widget>
+ </item>
+ <item row="1" column="1">
+ <widget class="QLabel" name="lblAdapterMAC">
+ <property name="text">
+ <string>[PLACEHOLDER]</string>
+ </property>
+ </widget>
+ </item>
+ <item row="2" column="0">
+ <widget class="QLabel" name="label_3">
+ <property name="text">
+ <string>IP address:</string>
+ </property>
+ </widget>
+ </item>
+ <item row="2" column="1">
+ <widget class="QLabel" name="lblAdapterIP">
+ <property name="text">
+ <string>[PLACEHOLDER]</string>
+ </property>
+ </widget>
+ </item>
+ </layout>
</widget>
</item>
- <item row="0" column="0" colspan="2">
- <widget class="QCheckBox" name="cbDirectMode">
+ <item row="1" column="0">
+ <widget class="QRadioButton" name="rbIndirectMode">
<property name="whatsThis">
- <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Direct mode directly routes network traffic to the host network. It is the most reliable, but requires an ethernet connection.&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Non-direct mode uses a layer of emulation to get around this, but is more prone to problems.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
+ <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Indirect mode uses libslirp. It requires no extra setup and is easy to use.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
- <string>Direct mode [TEXT PLACEHOLDER]</string>
+ <string>Indirect Mode (uses libslirp, recommended)</string>
</property>
</widget>
</item>
- <item row="1" column="1">
- <widget class="QComboBox" name="cbxDirectAdapter">
- <property name="sizePolicy">
- <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
- <horstretch>0</horstretch>
- <verstretch>0</verstretch>
- </sizepolicy>
- </property>
- <property name="minimumSize">
- <size>
- <width>350</width>
- <height>0</height>
- </size>
- </property>
+ <item row="2" column="0">
+ <widget class="QRadioButton" name="rbDirectMode">
<property name="whatsThis">
- <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Selects the network adapter through which to route network traffic under direct mode.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
- </property>
- </widget>
- </item>
- <item row="1" column="0">
- <widget class="QLabel" name="label">
- <property name="text">
- <string>Network adapter:</string>
- </property>
- </widget>
- </item>
- <item row="3" column="0">
- <widget class="QLabel" name="label_3">
- <property name="text">
- <string>IP address:</string>
- </property>
- </widget>
- </item>
- <item row="2" column="1">
- <widget class="QLabel" name="lblAdapterMAC">
- <property name="text">
- <string>[PLACEHOLDER]</string>
+ <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Direct mode directly routes network traffic to the host network. It is the most reliable, but requires an ethernet connection.&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Non-direct mode uses a layer of emulation to get around this, but is more prone to problems.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
- </widget>
- </item>
- <item row="3" column="1">
- <widget class="QLabel" name="lblAdapterIP">
<property name="text">
- <string>[PLACEHOLDER]</string>
+ <string>Direct mode [TEXT PLACEHOLDER]</string>
</property>
</widget>
</item>
diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp
index 3a735fb..2d3749d 100644
--- a/src/frontend/qt_sdl/main.cpp
+++ b/src/frontend/qt_sdl/main.cpp
@@ -355,10 +355,10 @@ void EmuThread::run()
Input::Init();
u32 nframes = 0;
- u32 starttick = SDL_GetTicks();
- u32 lasttick = starttick;
- u32 lastmeasuretick = lasttick;
- u32 fpslimitcount = 0;
+ double perfCountsSec = 1.0 / SDL_GetPerformanceFrequency();
+ double lastTime = SDL_GetPerformanceCounter() * perfCountsSec;
+ double frameLimitError = 0.0;
+ double lastMeasureTime = lastTime;
char melontitle[100];
@@ -492,49 +492,43 @@ void EmuThread::run()
SDL_UnlockMutex(audioSyncLock);
}
- float framerate = (1000.0f * nlines) / (60.0f * 263.0f);
+ double frametimeStep = nlines / (60.0 * 263.0);
{
- u32 curtick = SDL_GetTicks();
- u32 delay = curtick - lasttick;
-
bool limitfps = Config::LimitFPS && !fastforward;
- if (limitfps)
- {
- float wantedtickF = starttick + (framerate * (fpslimitcount+1));
- u32 wantedtick = (u32)ceil(wantedtickF);
- if (curtick < wantedtick) SDL_Delay(wantedtick - curtick);
- lasttick = SDL_GetTicks();
- fpslimitcount++;
- if ((abs(wantedtickF - (float)wantedtick) < 0.001312) || (fpslimitcount > 60))
- {
- fpslimitcount = 0;
- starttick = lasttick;
- }
- }
- else
+ double practicalFramelimit = limitfps ? frametimeStep : 1.0 / 1000.0;
+
+ double curtime = SDL_GetPerformanceCounter() * perfCountsSec;
+
+ frameLimitError += practicalFramelimit - (curtime - lastTime);
+ if (frameLimitError < -practicalFramelimit)
+ frameLimitError = -practicalFramelimit;
+ if (frameLimitError > practicalFramelimit)
+ frameLimitError = practicalFramelimit;
+
+ if (round(frameLimitError * 1000.0) > 0.0)
{
- if (delay < 1) SDL_Delay(1);
- lasttick = SDL_GetTicks();
+ SDL_Delay(round(frameLimitError * 1000.0));
+ double timeBeforeSleep = curtime;
+ curtime = SDL_GetPerformanceCounter() * perfCountsSec;
+ frameLimitError -= curtime - timeBeforeSleep;
}
+
+ lastTime = curtime;
}
nframes++;
if (nframes >= 30)
{
- u32 tick = SDL_GetTicks();
- u32 diff = tick - lastmeasuretick;
- lastmeasuretick = tick;
+ double time = SDL_GetPerformanceCounter() * perfCountsSec;
+ double dt = time - lastMeasureTime;
+ lastMeasureTime = time;
- u32 fps;
- if (diff < 1) fps = 77777;
- else fps = (nframes * 1000) / diff;
+ u32 fps = round(nframes / dt);
nframes = 0;
- float fpstarget;
- if (framerate < 1) fpstarget = 999;
- else fpstarget = 1000.0f/framerate;
+ float fpstarget = 1.0/frametimeStep;
sprintf(melontitle, "[%d/%.0f] melonDS " MELONDS_VERSION, fps, fpstarget);
changeWindowTitle(melontitle);
@@ -544,10 +538,8 @@ void EmuThread::run()
{
// paused
nframes = 0;
- lasttick = SDL_GetTicks();
- starttick = lasttick;
- lastmeasuretick = lasttick;
- fpslimitcount = 0;
+ lastTime = SDL_GetPerformanceCounter() * perfCountsSec;
+ lastMeasureTime = lastTime;
emit windowUpdate();
@@ -1354,7 +1346,7 @@ void MainWindow::dragEnterEvent(QDragEnterEvent* event)
QString filename = urls.at(0).toLocalFile();
QString ext = filename.right(3);
- if (ext == "nds" || ext == "srl" || ext == "dsi" || (ext == "gba" && RunningSomething))
+ if (ext == "nds" || ext == "srl" || ext == "dsi" || ext == "gba")
event->acceptProposedAction();
}