diff options
author | Arisotura <thetotalworm@gmail.com> | 2020-12-04 18:28:15 +0100 |
---|---|---|
committer | Arisotura <thetotalworm@gmail.com> | 2020-12-04 18:28:15 +0100 |
commit | 129018a6626cbec915ef73484c51c9d07af8e8b9 (patch) | |
tree | 8e5712bd241319731f7b6ade3363cefe845e3a5e /src | |
parent | 6aad429383015a0ac135b081931ae9c5876a7ad0 (diff) | |
parent | 42e083960e52cce31589714dcc7fab8e173efb81 (diff) |
Merge remote-tracking branch 'remotes/origin/master' into dsi_camera
Diffstat (limited to 'src')
45 files changed, 1658 insertions, 749 deletions
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index c9d2b62..1921f13 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -176,7 +176,7 @@ T SlowRead9(u32 addr, ARMv5* cpu) } template <typename T, int ConsoleType> -void SlowWrite9(u32 addr, ARMv5* cpu, T val) +void SlowWrite9(u32 addr, ARMv5* cpu, u32 val) { addr &= ~(sizeof(T) - 1); @@ -224,7 +224,7 @@ T SlowRead7(u32 addr) } template <typename T, int ConsoleType> -void SlowWrite7(u32 addr, T val) +void SlowWrite7(u32 addr, u32 val) { addr &= ~(sizeof(T) - 1); @@ -266,16 +266,16 @@ void SlowBlockTransfer7(u32 addr, u64* data, u32 num) #define INSTANTIATE_SLOWMEM(consoleType) \ template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \ - template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u16); \ - template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u8); \ + template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u32); \ + template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u32); \ \ template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \ template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \ template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \ \ template void SlowWrite7<u32, consoleType>(u32, u32); \ - template void SlowWrite7<u16, consoleType>(u32, u16); \ - template void SlowWrite7<u8, consoleType>(u32, u8); \ + template void SlowWrite7<u16, consoleType>(u32, u32); \ + template void SlowWrite7<u8, consoleType>(u32, u32); \ \ template u32 SlowRead7<u32, consoleType>(u32); \ template u16 SlowRead7<u16, consoleType>(u32); \ @@ -298,6 +298,7 @@ void Init() void DeInit() { + ResetBlockCache(); ARMJIT_Memory::DeInit(); delete JITCompiler; @@ -594,7 +595,8 @@ void CompileBlock(ARM* cpu) u32 r15 = cpu->R[15]; u32 addressRanges[Config::JIT_MaxBlockSize]; - u32 addressMasks[Config::JIT_MaxBlockSize] = {0}; + u32 addressMasks[Config::JIT_MaxBlockSize]; + memset(addressMasks, 0, Config::JIT_MaxBlockSize * sizeof(u32)); u32 numAddressRanges = 0; u32 numLiterals = 0; @@ -1116,6 +1118,7 @@ void ResetBlockCache() range->Blocks.Clear(); range->Code = 0; } + delete block; } JitBlocks9.clear(); JitBlocks7.clear(); diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp index 80c7f04..93563b9 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp @@ -68,6 +68,11 @@ void Compiler::A_Comp_MRS() MOV(rd, RCPSR); } +void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode) +{ + arm->UpdateMode(oldmode, newmode); +} + void Compiler::A_Comp_MSR() { Comp_AddCycles_C(); @@ -139,7 +144,7 @@ void Compiler::A_Comp_MSR() PushRegs(true); - QuickCallFunction(X3, (void*)&ARM::UpdateMode); + QuickCallFunction(X3, (void*)&UpdateModeTrampoline); PopRegs(true); } @@ -915,4 +920,4 @@ void Compiler::Comp_AddCycles_CD() ConstantCycles += cycles; } -}
\ No newline at end of file +} diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h index af7497a..a79e9da 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.h +++ b/src/ARMJIT_A64/ARMJIT_Compiler.h @@ -187,6 +187,7 @@ public: void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs); bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr); + enum { memop_Writeback = 1 << 0, @@ -213,8 +214,8 @@ public: return (u8*)entry - GetRXBase(); } - bool IsJITFault(u64 pc); - s64 RewriteMemAccess(u64 pc); + bool IsJITFault(u8* pc); + u8* RewriteMemAccess(u8* pc); void SwapCodeRegion() { diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.s b/src/ARMJIT_A64/ARMJIT_Linkage.S index 7886315..7886315 100644 --- a/src/ARMJIT_A64/ARMJIT_Linkage.s +++ b/src/ARMJIT_A64/ARMJIT_Linkage.S diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index 86e257a..2c14dc6 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -9,37 +9,34 @@ using namespace Arm64Gen; namespace ARMJIT { -bool Compiler::IsJITFault(u64 pc) +bool Compiler::IsJITFault(u8* pc) { - return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize); + return (u64)pc >= (u64)GetRXBase() && (u64)pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize); } -s64 Compiler::RewriteMemAccess(u64 pc) +u8* Compiler::RewriteMemAccess(u8* pc) { - ptrdiff_t pcOffset = pc - (u64)GetRXBase(); + ptrdiff_t pcOffset = pc - GetRXBase(); auto it = LoadStorePatches.find(pcOffset); if (it != LoadStorePatches.end()) { LoadStorePatch patch = it->second; + LoadStorePatches.erase(it); ptrdiff_t curCodeOffset = GetCodeOffset(); SetCodePtrUnsafe(pcOffset + patch.PatchOffset); BL(patch.PatchFunc); - for (int i = 0; i < patch.PatchSize / 4 - 1; i++) HINT(HINT_NOP); - FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr()); SetCodePtrUnsafe(curCodeOffset); - LoadStorePatches.erase(it); - - return patch.PatchOffset; + return pc + (ptrdiff_t)patch.PatchOffset; } printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc)); abort(); @@ -192,7 +189,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) else { LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7); - if (size == 32) + if (size == 32 && !addrIsStatic) { UBFIZ(W0, W0, 3, 2); RORV(rdMapped, rdMapped, W0); diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h index 4244470..b1e35f5 100644 --- a/src/ARMJIT_Internal.h +++ b/src/ARMJIT_Internal.h @@ -216,9 +216,9 @@ template <u32 Num> void LinkBlock(ARM* cpu, u32 codeOffset); template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu); -template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, T val); +template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, u32 val); template <typename T, int ConsoleType> T SlowRead7(u32 addr); -template <typename T, int ConsoleType> void SlowWrite7(u32 addr, T val); +template <typename T, int ConsoleType> void SlowWrite7(u32 addr, u32 val); template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu); template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num); diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index d321d2f..f9f82aa 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -10,6 +10,12 @@ #include <signal.h> #endif +#if defined(__ANDROID__) +#include <dlfcn.h> +#include <linux/ashmem.h> +#include <sys/ioctl.h> +#endif + #include "ARMJIT_Memory.h" #include "ARMJIT_Internal.h" @@ -22,7 +28,9 @@ #include "NDSCart.h" #include "SPU.h" +#ifndef __APPLE__ #include <malloc.h> +#endif /* We're handling fastmem here. @@ -40,7 +48,8 @@ We handle this by only mapping those regions which are actually used and by praying the games don't go wild. - Beware, this file is full of platform specific code. + Beware, this file is full of platform specific code and copied + from Dolphin, so enjoy the copied comments! */ @@ -49,12 +58,16 @@ namespace ARMJIT_Memory struct FaultDescription { u32 EmulatedFaultAddr; - u64 FaultPC; + u8* FaultPC; }; -bool FaultHandler(FaultDescription* faultDesc, s32& offset); +bool FaultHandler(FaultDescription& faultDesc); } +#if defined(__ANDROID__) +#define ASHMEM_DEVICE "/dev/ashmem" +#endif + #if defined(__SWITCH__) // with LTO the symbols seem to be not properly overriden // if they're somewhere else @@ -75,7 +88,7 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx) ARMJIT_Memory::FaultDescription desc; u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea; - desc.FaultPC = ctx->pc.x; + desc.FaultPC = (u8*)ctx->pc.x; u64 integerRegisters[33]; memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29); @@ -84,10 +97,9 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx) integerRegisters[31] = ctx->sp.x; integerRegisters[32] = ctx->pc.x; - s32 offset; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) + if (ARMJIT_Memory::FaultHandler(desc, offset)) { - integerRegisters[32] += offset; + integerRegisters[32] = (u64)desc.FaultPC; ARM_RestoreContext(integerRegisters); } @@ -117,12 +129,11 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) ARMJIT_Memory::FaultDescription desc; u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea; - desc.FaultPC = exceptionInfo->ContextRecord->Rip; + desc.FaultPC = (u8*)exceptionInfo->ContextRecord->Rip; - s32 offset = 0; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) + if (ARMJIT_Memory::FaultHandler(desc)) { - exceptionInfo->ContextRecord->Rip += offset; + exceptionInfo->ContextRecord->Rip = (u64)desc.FaultPC; return EXCEPTION_CONTINUE_EXECUTION; } @@ -131,50 +142,75 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) #else -struct sigaction NewSa; -struct sigaction OldSa; +static struct sigaction OldSaSegv; +static struct sigaction OldSaBus; static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) { + if (sig != SIGSEGV && sig != SIGBUS) + { + // We are not interested in other signals - handle it as usual. + return; + } + if (info->si_code != SEGV_MAPERR && info->si_code != SEGV_ACCERR) + { + // Huh? Return. + return; + } + ucontext_t* context = (ucontext_t*)rawContext; - + ARMJIT_Memory::FaultDescription desc; u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); #ifdef __x86_64__ desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; - desc.FaultPC = context->uc_mcontext.gregs[REG_RIP]; + #ifdef __APPLE__ + desc.FaultPC = (u8*)context->uc_mcontext->__ss.__rip; + #else + desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP]; + #endif + #else desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea; - desc.FaultPC = context->uc_mcontext.pc; + desc.FaultPC = (u8*)context->uc_mcontext.pc; #endif - s32 offset = 0; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) + if (ARMJIT_Memory::FaultHandler(desc)) { #ifdef __x86_64__ - context->uc_mcontext.gregs[REG_RIP] += offset; + #ifdef __APPLE__ + context->uc_mcontext->__ss.__rip = (u64)desc.FaultPC; + #else + context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC; + #endif #else - context->uc_mcontext.pc += offset; + context->uc_mcontext.pc = (u64)desc.FaultPC; #endif return; } - if (OldSa.sa_flags & SA_SIGINFO) + struct sigaction* oldSa; + if (sig == SIGSEGV) + oldSa = &OldSaSegv; + else + oldSa = &OldSaBus; + + if (oldSa->sa_flags & SA_SIGINFO) { - OldSa.sa_sigaction(sig, info, rawContext); + oldSa->sa_sigaction(sig, info, rawContext); return; } - if (OldSa.sa_handler == SIG_DFL) + if (oldSa->sa_handler == SIG_DFL) { signal(sig, SIG_DFL); return; } - if (OldSa.sa_handler == SIG_IGN) + if (oldSa->sa_handler == SIG_IGN) { // Ignore signal return; } - OldSa.sa_handler(sig); + oldSa->sa_handler(sig); } #endif @@ -231,7 +267,7 @@ enum { memstate_Unmapped, memstate_MappedRW, - // on switch this is unmapped as well + // on Switch this is unmapped as well memstate_MappedProtected, }; @@ -314,14 +350,16 @@ struct Mapping void Unmap(int region) { + u32 dtcmStart = NDS::ARM9->DTCMBase; + u32 dtcmSize = NDS::ARM9->DTCMSize; bool skipDTCM = Num == 0 && region != memregion_DTCM; u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7; u32 offset = 0; while (offset < Size) { - if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase) + if (skipDTCM && Addr + offset == dtcmStart) { - offset += NDS::ARM9->DTCMSize; + offset += dtcmSize; } else { @@ -329,7 +367,7 @@ struct Mapping u8 status = statuses[(Addr + offset) >> 12]; while (statuses[(Addr + offset) >> 12] == status && offset < Size - && (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase)) + && (!skipDTCM || Addr + offset != dtcmStart)) { assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped); statuses[(Addr + offset) >> 12] = memstate_Unmapped; @@ -347,9 +385,33 @@ struct Mapping #endif } } + #ifndef __SWITCH__ - bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); - assert(succeded); +#ifndef _WIN32 + u32 dtcmEnd = dtcmStart + dtcmSize; + if (Num == 0 + && dtcmEnd >= Addr + && dtcmStart < Addr + Size) + { + bool success; + if (dtcmStart > Addr) + { + success = UnmapFromRange(Addr, 0, OffsetsPerRegion[region] + LocalOffset, dtcmStart - Addr); + assert(success); + } + if (dtcmEnd < Addr + Size) + { + u32 offset = dtcmStart - Addr + dtcmSize; + success = UnmapFromRange(dtcmEnd, 0, OffsetsPerRegion[region] + LocalOffset + offset, Size - offset); + assert(success); + } + } + else +#endif + { + bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); + assert(succeded); + } #endif } }; @@ -418,10 +480,10 @@ void RemapDTCM(u32 newBase, u32 newSize) printf("unmapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset); - bool oldOverlap = NDS::ARM9->DTCMSize > 0 && !(oldDTCMBase >= end || oldDTCBEnd <= start); - bool newOverlap = newSize > 0 && !(newBase >= end || newEnd <= start); + bool overlap = (NDS::ARM9->DTCMSize > 0 && oldDTCMBase < end && oldDTCBEnd > start) + || (newSize > 0 && newBase < end && newEnd > start); - if (mapping.Num == 0 && (oldOverlap || newOverlap)) + if (mapping.Num == 0 && overlap) { mapping.Unmap(region); Mappings[region].Remove(i); @@ -445,8 +507,8 @@ void RemapNWRAM(int num) for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;) { Mapping& mapping = Mappings[memregion_SharedWRAM][i]; - if (!(DSi::NWRAMStart[mapping.Num][num] >= mapping.Addr + mapping.Size - || DSi::NWRAMEnd[mapping.Num][num] < mapping.Addr)) + if (DSi::NWRAMStart[mapping.Num][num] < mapping.Addr + mapping.Size + && DSi::NWRAMEnd[mapping.Num][num] > mapping.Addr) { mapping.Unmap(memregion_SharedWRAM); Mappings[memregion_SharedWRAM].Remove(i); @@ -469,7 +531,7 @@ void RemapSWRAM() for (int i = 0; i < Mappings[memregion_WRAM7].Length;) { Mapping& mapping = Mappings[memregion_WRAM7][i]; - if (mapping.Addr + mapping.Size < 0x03800000) + if (mapping.Addr + mapping.Size <= 0x03800000) { mapping.Unmap(memregion_WRAM7); Mappings[memregion_WRAM7].Remove(i); @@ -501,26 +563,53 @@ bool MapAtAddress(u32 addr) return false; u8* states = num == 0 ? MappingStatus9 : MappingStatus7; - printf("trying to create mapping %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num); + printf("mapping mirror %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num); bool isExecutable = ARMJIT::CodeMemRegions[region]; + u32 dtcmStart = NDS::ARM9->DTCMBase; + u32 dtcmSize = NDS::ARM9->DTCMSize; + u32 dtcmEnd = dtcmStart + dtcmSize; #ifndef __SWITCH__ - bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); - assert(succeded); +#ifndef _WIN32 + if (num == 0 + && dtcmEnd >= mirrorStart + && dtcmStart < mirrorStart + mirrorSize) + { + bool success; + if (dtcmStart > mirrorStart) + { + success = MapIntoRange(mirrorStart, 0, OffsetsPerRegion[region] + memoryOffset, dtcmStart - mirrorStart); + assert(success); + } + if (dtcmEnd < mirrorStart + mirrorSize) + { + u32 offset = dtcmStart - mirrorStart + dtcmSize; + success = MapIntoRange(dtcmEnd, 0, OffsetsPerRegion[region] + memoryOffset + offset, mirrorSize - offset); + assert(success); + } + } + else +#endif + { + bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); + assert(succeded); + } #endif ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512; // this overcomplicated piece of code basically just finds whole pieces of code memory - // which can be mapped + // which can be mapped/protected u32 offset = 0; bool skipDTCM = num == 0 && region != memregion_DTCM; while (offset < mirrorSize) { - if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase) + if (skipDTCM && mirrorStart + offset == dtcmStart) { - SetCodeProtectionRange(NDS::ARM9->DTCMBase, NDS::ARM9->DTCMSize, 0, 0); - offset += NDS::ARM9->DTCMSize; +#ifdef _WIN32 + SetCodeProtectionRange(dtcmStart, dtcmSize, 0, 0); +#endif + offset += dtcmSize; } else { @@ -557,26 +646,25 @@ bool MapAtAddress(u32 addr) Mapping mapping{mirrorStart, mirrorSize, memoryOffset, num}; Mappings[region].Add(mapping); - printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1); + //printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1); return true; } -bool FaultHandler(FaultDescription* faultDesc, s32& offset) +bool FaultHandler(FaultDescription& faultDesc) { - if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC)) + if (ARMJIT::JITCompiler->IsJITFault(faultDesc.FaultPC)) { bool rewriteToSlowPath = true; - u32 addr = faultDesc->EmulatedFaultAddr; + u8* memStatus = NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7; - if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped) - rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr); + if (memStatus[faultDesc.EmulatedFaultAddr >> 12] == memstate_Unmapped) + rewriteToSlowPath = !MapAtAddress(faultDesc.EmulatedFaultAddr); if (rewriteToSlowPath) - { - offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC); - } + faultDesc.FaultPC = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc.FaultPC); + return true; } return false; @@ -624,22 +712,52 @@ void Init() u8* basePtr = MemoryBase; #else - FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - - MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - + // this used to be allocated with three different mmaps + // The idea was to give the OS more freedom where to position the buffers, + // but something was bad about this so instead we take this vmem eating monster + // which seems to work better. + MemoryBase = (u8*)mmap(NULL, AddrSpaceSize*4, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + munmap(MemoryBase, AddrSpaceSize*4); + FastMem9Start = MemoryBase; + FastMem7Start = MemoryBase + AddrSpaceSize; + MemoryBase = MemoryBase + AddrSpaceSize*2; + +#if defined(__ANDROID__) + static void* libandroid = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL); + using type_ASharedMemory_create = int(*)(const char* name, size_t size); + static void* symbol = dlsym(libandroid, "ASharedMemory_create"); + static auto shared_memory_create = reinterpret_cast<type_ASharedMemory_create>(symbol); + + if (shared_memory_create) + { + MemoryFile = shared_memory_create("melondsfastmem", MemoryTotalSize); + } + else + { + int fd = open(ASHMEM_DEVICE, O_RDWR); + ioctl(fd, ASHMEM_SET_NAME, "melondsfastmem"); + ioctl(fd, ASHMEM_SET_SIZE, MemoryTotalSize); + MemoryFile = fd; + } +#elif defined(__APPLE__) + char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1]; + sprintf(fastmemPidName, "melondsfastmem%d", getpid()); + MemoryFile = shm_open(fastmemPidName, O_RDWR|O_CREAT, 0600); + delete[] fastmemPidName; +#else MemoryFile = memfd_create("melondsfastmem", 0); +#endif ftruncate(MemoryFile, MemoryTotalSize); - NewSa.sa_flags = SA_SIGINFO; - sigemptyset(&NewSa.sa_mask); - NewSa.sa_sigaction = SigsegvHandler; - sigaction(SIGSEGV, &NewSa, &OldSa); - - munmap(MemoryBase, MemoryTotalSize); - munmap(FastMem9Start, AddrSpaceSize); - munmap(FastMem7Start, AddrSpaceSize); + struct sigaction sa; + sa.sa_handler = nullptr; + sa.sa_sigaction = &SigsegvHandler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(SIGSEGV, &sa, &OldSaSegv); +#ifdef __APPLE__ + sigaction(SIGBUS, &sa, &OldSaBus); +#endif mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0); @@ -657,17 +775,30 @@ void Init() void DeInit() { #if defined(__SWITCH__) - virtmemFree(FastMem9Start, 0x100000000); - virtmemFree(FastMem7Start, 0x100000000); + virtmemFree(FastMem9Start, AddrSpaceSize); + virtmemFree(FastMem7Start, AddrSpaceSize); svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize); virtmemFree(MemoryBaseCodeMem, MemoryTotalSize); free(MemoryBase); +#elif defined(__APPLE__) + char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1]; + sprintf(fastmemPidName, "melondsfastmem%d", getpid()); + shm_unlink(fastmemPidName); + delete[] fastmemPidName; #elif defined(_WIN32) assert(UnmapViewOfFile(MemoryBase)); CloseHandle(MemoryFile); RemoveVectoredExceptionHandler(ExceptionHandlerHandle); +#else + sigaction(SIGSEGV, &OldSaSegv, nullptr); +#ifdef __APPLE__ + sigaction(SIGBUS, &OldSaBus, nullptr); +#endif + + munmap(MemoryBase, MemoryTotalSize); + close(MemoryFile); #endif } @@ -997,9 +1128,11 @@ int ClassifyAddress7(u32 addr) case 0x06000000: case 0x06800000: return memregion_VWRAM; + + default: + return memregion_Other; } } - return memregion_Other; } void WifiWrite32(u32 addr, u32 val) @@ -1176,4 +1309,4 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) return NULL; } -}
\ No newline at end of file +} diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index 819fe3c..70ec781 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -130,6 +130,16 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); } +void ARMv4JumpToTrampoline(ARMv4* arm, u32 addr, bool restorecpsr) +{ + arm->JumpTo(addr, restorecpsr); +} + +void ARMv5JumpToTrampoline(ARMv5* arm, u32 addr, bool restorecpsr) +{ + arm->JumpTo(addr, restorecpsr); +} + void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) { IrregularCycles = true; @@ -146,9 +156,9 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) else MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste if (Num == 0) - CALL((void*)&ARMv5::JumpTo); + CALL((void*)&ARMv5JumpToTrampoline); else - CALL((void*)&ARMv4::JumpTo); + CALL((void*)&ARMv4JumpToTrampoline); PopRegs(restoreCPSR); @@ -269,4 +279,4 @@ void Compiler::T_Comp_BL_Merged() Comp_JumpTo(target); } -}
\ No newline at end of file +} diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index c6419c9..cc4ad80 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -101,6 +101,11 @@ void Compiler::A_Comp_MRS() MOV(32, rd, R(RCPSR)); } +void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode) +{ + arm->UpdateMode(oldmode, newmode); +} + void Compiler::A_Comp_MSR() { Comp_AddCycles_C(); @@ -185,7 +190,7 @@ void Compiler::A_Comp_MSR() MOV(32, R(ABI_PARAM3), R(RCPSR)); MOV(32, R(ABI_PARAM2), R(RSCRATCH3)); MOV(64, R(ABI_PARAM1), R(RCPU)); - CALL((void*)&ARM::UpdateMode); + CALL((void*)&UpdateModeTrampoline); PopRegs(true); } @@ -216,6 +221,8 @@ Compiler::Compiler() #ifdef _WIN32 DWORD dummy; VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy); + #elif defined(__APPLE__) + pageAligned = (u8*)mmap(NULL, 1024*1024*32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0); #else mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE); #endif @@ -340,7 +347,7 @@ Compiler::Compiler() ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8); if (consoleType == 0) { - switch ((8 << size) | num) + switch ((8 << size) | num) { case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break; case 33: ABI_CallFunction(SlowWrite7<u32, 0>); break; @@ -352,7 +359,7 @@ Compiler::Compiler() } else { - switch ((8 << size) | num) + switch ((8 << size) | num) { case 32: ABI_CallFunction(SlowWrite9<u32, 1>); break; case 33: ABI_CallFunction(SlowWrite7<u32, 1>); break; @@ -375,7 +382,7 @@ Compiler::Compiler() ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8); if (consoleType == 0) { - switch ((8 << size) | num) + switch ((8 << size) | num) { case 32: ABI_CallFunction(SlowRead9<u32, 0>); break; case 33: ABI_CallFunction(SlowRead7<u32, 0>); break; @@ -387,7 +394,7 @@ Compiler::Compiler() } else { - switch ((8 << size) | num) + switch ((8 << size) | num) { case 32: ABI_CallFunction(SlowRead9<u32, 1>); break; case 33: ABI_CallFunction(SlowRead7<u32, 1>); break; @@ -612,9 +619,9 @@ void Compiler::Reset() LoadStorePatches.clear(); } -bool Compiler::IsJITFault(u64 addr) +bool Compiler::IsJITFault(u8* addr) { - return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory); + return (u64)addr >= (u64)ResetStart && (u64)addr < (u64)ResetStart + CodeMemSize; } void Compiler::Comp_SpecialBranchBehaviour(bool taken) @@ -896,5 +903,4 @@ void Compiler::Comp_AddCycles_CD() else ConstantCycles += cycles; } - -}
\ No newline at end of file +} diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index 3e900c3..57aab7b 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -208,9 +208,9 @@ public: SetCodePtr(FarCode); } - bool IsJITFault(u64 addr); + bool IsJITFault(u8* addr); - s32 RewriteMemAccess(u64 pc); + u8* RewriteMemAccess(u8* pc); u8* FarCode; u8* NearCode; diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.s b/src/ARMJIT_x64/ARMJIT_Linkage.S index 0a84df0..8cc0b5f 100644 --- a/src/ARMJIT_x64/ARMJIT_Linkage.s +++ b/src/ARMJIT_x64/ARMJIT_Linkage.S @@ -29,8 +29,13 @@ .p2align 4,,15 +#ifdef __APPLE__ +.global _ARM_Dispatch +_ARM_Dispatch: +#else .global ARM_Dispatch ARM_Dispatch: +#endif #ifdef WIN64 push rdi push rsi @@ -54,8 +59,13 @@ ARM_Dispatch: .p2align 4,,15 +#ifdef __APPLE__ +.global _ARM_Ret +_ARM_Ret: +#else .global ARM_Ret ARM_Ret: +#endif mov [RCPU + ARM_CPSR_offset], RCPSR #ifdef WIN64 diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index 8b4e8fe..d80b25b 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -15,28 +15,24 @@ int squeezePointer(T* ptr) return truncated; } -s32 Compiler::RewriteMemAccess(u64 pc) +u8* Compiler::RewriteMemAccess(u8* pc) { - auto it = LoadStorePatches.find((u8*)pc); + auto it = LoadStorePatches.find(pc); if (it != LoadStorePatches.end()) { LoadStorePatch patch = it->second; LoadStorePatches.erase(it); - u8* curCodePtr = GetWritableCodePtr(); - u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset; - SetCodePtr(rewritePtr); + //printf("rewriting memory access %p %d %d\n", (u8*)pc-ResetStart, patch.Offset, patch.Size); - CALL(patch.PatchFunc); - u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr); + XEmitter emitter(pc + (ptrdiff_t)patch.Offset); + emitter.CALL(patch.PatchFunc); + ptrdiff_t remainingSize = (ptrdiff_t)patch.Size - 5; + assert(remainingSize >= 0); if (remainingSize > 0) - NOP(remainingSize); + emitter.NOP(remainingSize); - //printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size); - - SetCodePtr(curCodePtr); - - return patch.Offset; + return pc + (ptrdiff_t)patch.Offset; } printf("this is a JIT bug %llx\n", pc); @@ -192,6 +188,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag u8* memopStart = GetWritableCodePtr(); LoadStorePatch patch; + assert(rdMapped.GetSimpleReg() >= 0 && rdMapped.GetSimpleReg() < 16); patch.PatchFunc = flags & memop_Store ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()] : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()]; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d6c3897..c16da9f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -80,9 +80,8 @@ if (ENABLE_JIT) ARMJIT_x64/ARMJIT_LoadStore.cpp ARMJIT_x64/ARMJIT_Branch.cpp - ARMJIT_x64/ARMJIT_Linkage.s + ARMJIT_x64/ARMJIT_Linkage.S ) - set_source_files_properties(ARMJIT_x64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp") endif() if (ARCHITECTURE STREQUAL ARM64) target_sources(core PRIVATE @@ -94,16 +93,22 @@ if (ENABLE_JIT) ARMJIT_A64/ARMJIT_LoadStore.cpp ARMJIT_A64/ARMJIT_Branch.cpp - ARMJIT_A64/ARMJIT_Linkage.s + ARMJIT_A64/ARMJIT_Linkage.S ) - set_source_files_properties(ARMJIT_A64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp") endif() endif() +if (APPLE) + target_include_directories(core PUBLIC /usr/local/include) + target_link_directories(core PUBLIC /usr/local/lib) +endif() + if (ENABLE_OGLRENDERER) if (WIN32) target_link_libraries(core ole32 comctl32 ws2_32 opengl32) - else() + elseif (APPLE) + target_link_libraries(core "-framework OpenGL") + else() target_link_libraries(core GL EGL) endif() else() diff --git a/src/Config.cpp b/src/Config.cpp index 341b14c..f7db252 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -73,7 +73,11 @@ ConfigEntry ConfigFile[] = {"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0}, {"JIT_BranchOptimisations", 0, &JIT_BranchOptimisations, 1, NULL, 0}, {"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0}, - {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0}, + #ifdef __APPLE__ + {"JIT_FastMemory", 0, &JIT_FastMemory, 0, NULL, 0}, + #else + {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0}, + #endif #endif {"", -1, NULL, 0, NULL, 0} diff --git a/src/DMA.cpp b/src/DMA.cpp index 18b8a2f..8ad3918 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -77,21 +77,6 @@ void DMA::Reset() Running = false; InProgress = false; - - if (NDS::ConsoleType == 1) - { - BusRead16 = (CPU==0) ? DSi::ARM9Read16 : DSi::ARM7Read16; - BusRead32 = (CPU==0) ? DSi::ARM9Read32 : DSi::ARM7Read32; - BusWrite16 = (CPU==0) ? DSi::ARM9Write16 : DSi::ARM7Write16; - BusWrite32 = (CPU==0) ? DSi::ARM9Write32 : DSi::ARM7Write32; - } - else - { - BusRead16 = (CPU==0) ? NDS::ARM9Read16 : NDS::ARM7Read16; - BusRead32 = (CPU==0) ? NDS::ARM9Read32 : NDS::ARM7Read32; - BusWrite16 = (CPU==0) ? NDS::ARM9Write16 : NDS::ARM7Write16; - BusWrite32 = (CPU==0) ? NDS::ARM9Write32 : NDS::ARM7Write32; - } } void DMA::DoSavestate(Savestate* file) @@ -198,13 +183,7 @@ void DMA::Start() NDS::StopCPU(CPU, 1<<Num); } -void DMA::Run() -{ - if (!Running) return; - if (CPU == 0) return Run9(); - else return Run7(); -} - +template <int ConsoleType> void DMA::Run9() { if (NDS::ARM9Timestamp >= NDS::ARM9Target) return; @@ -242,7 +221,10 @@ void DMA::Run9() { NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift); - BusWrite16(CurDstAddr, BusRead16(CurSrcAddr)); + if (ConsoleType == 1) + DSi::ARM9Write16(CurDstAddr, DSi::ARM9Read16(CurSrcAddr)); + else + NDS::ARM9Write16(CurDstAddr, NDS::ARM9Read16(CurSrcAddr)); CurSrcAddr += SrcAddrInc<<1; CurDstAddr += DstAddrInc<<1; @@ -278,7 +260,10 @@ void DMA::Run9() { NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift); - BusWrite32(CurDstAddr, BusRead32(CurSrcAddr)); + if (ConsoleType == 1) + DSi::ARM9Write32(CurDstAddr, DSi::ARM9Read32(CurSrcAddr)); + else + NDS::ARM9Write32(CurDstAddr, NDS::ARM9Read32(CurSrcAddr)); CurSrcAddr += SrcAddrInc<<2; CurDstAddr += DstAddrInc<<2; @@ -317,6 +302,7 @@ void DMA::Run9() NDS::ResumeCPU(0, 1<<Num); } +template <int ConsoleType> void DMA::Run7() { if (NDS::ARM7Timestamp >= NDS::ARM7Target) return; @@ -354,7 +340,10 @@ void DMA::Run7() { NDS::ARM7Timestamp += unitcycles; - BusWrite16(CurDstAddr, BusRead16(CurSrcAddr)); + if (ConsoleType == 1) + DSi::ARM7Write16(CurDstAddr, DSi::ARM7Read16(CurSrcAddr)); + else + NDS::ARM7Write16(CurDstAddr, NDS::ARM7Read16(CurSrcAddr)); CurSrcAddr += SrcAddrInc<<1; CurDstAddr += DstAddrInc<<1; @@ -390,7 +379,10 @@ void DMA::Run7() { NDS::ARM7Timestamp += unitcycles; - BusWrite32(CurDstAddr, BusRead32(CurSrcAddr)); + if (ConsoleType == 1) + DSi::ARM7Write32(CurDstAddr, DSi::ARM7Read32(CurSrcAddr)); + else + NDS::ARM7Write32(CurDstAddr, NDS::ARM7Read32(CurSrcAddr)); CurSrcAddr += SrcAddrInc<<2; CurDstAddr += DstAddrInc<<2; @@ -425,3 +417,14 @@ void DMA::Run7() InProgress = false; NDS::ResumeCPU(1, 1<<Num); } + +template <int ConsoleType> +void DMA::Run() +{ + if (!Running) return; + if (CPU == 0) return Run9<ConsoleType>(); + else return Run7<ConsoleType>(); +} + +template void DMA::Run<0>(); +template void DMA::Run<1>();
\ No newline at end of file @@ -34,9 +34,12 @@ public: void WriteCnt(u32 val); void Start(); + template <int ConsoleType> void Run(); + template <int ConsoleType> void Run9(); + template <int ConsoleType> void Run7(); bool IsInMode(u32 mode) @@ -86,11 +89,6 @@ private: bool Stall; bool IsGXFIFODMA; - - u16 (*BusRead16)(u32 addr); - u32 (*BusRead32)(u32 addr); - void (*BusWrite16)(u32 addr, u16 val); - void (*BusWrite32)(u32 addr, u32 val); }; #endif diff --git a/src/DSi.cpp b/src/DSi.cpp index 0e62f5b..bcc1f92 100644 --- a/src/DSi.cpp +++ b/src/DSi.cpp @@ -543,15 +543,15 @@ void MapNWRAM_A(u32 num, u8 val) return; } -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapNWRAM(0); -#endif - int mbkn = 0, mbks = 8*num; u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; if (oldval == val) return; +#ifdef JIT_ENABLED + ARMJIT_Memory::RemapNWRAM(0); +#endif + MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] |= (val << mbks); MBK[1][mbkn] = MBK[0][mbkn]; @@ -578,15 +578,15 @@ void MapNWRAM_B(u32 num, u8 val) return; } -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapNWRAM(1); -#endif - int mbkn = 1+(num>>2), mbks = 8*(num&3); u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; if (oldval == val) return; +#ifdef JIT_ENABLED + ARMJIT_Memory::RemapNWRAM(1); +#endif + MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] |= (val << mbks); MBK[1][mbkn] = MBK[0][mbkn]; @@ -617,15 +617,15 @@ void MapNWRAM_C(u32 num, u8 val) return; } -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapNWRAM(2); -#endif - int mbkn = 3+(num>>2), mbks = 8*(num&3); u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; if (oldval == val) return; +#ifdef JIT_ENABLED + ARMJIT_Memory::RemapNWRAM(2); +#endif + MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] |= (val << mbks); MBK[1][mbkn] = MBK[0][mbkn]; diff --git a/src/GPU.cpp b/src/GPU.cpp index 7989750..e6b24e0 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -49,8 +49,8 @@ u8 VRAM_F[ 16*1024]; u8 VRAM_G[ 16*1024]; u8 VRAM_H[ 32*1024]; u8 VRAM_I[ 16*1024]; -u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; -u32 VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF}; +u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; +u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF}; u8 VRAMCNT[9]; u8 VRAMSTAT; @@ -85,6 +85,62 @@ bool Accelerated; GPU2D* GPU2D_A; GPU2D* GPU2D_B; +/* + VRAM invalidation tracking + + - we want to know when a VRAM region used for graphics changed + - for some regions unmapping is mandatory to modify them (Texture, TexPal and ExtPal) and + we don't want to completely invalidate them every time they're unmapped and remapped + + For this reason we don't track the dirtyness per mapping region, but instead per VRAM bank + with VRAMDirty. Writes to LCDC go directly into VRAMDirty, while writes via other mapping regions + like BG or OBJ are first tracked in VRAMWritten_* and need to be flushed using SyncDirtyFlags. + + This is more or less a description of VRAMTrackingSet::DeriveState + Each time before the memory is read two things could have happened + to each 16kb piece (16kb is the smallest unit in which mappings can + be made thus also the size VRAMMap_* use): + - this piece was remapped compared to last time we checked, + which means this location in memory is invalid. + - this piece wasn't remapped, which means we need to check whether + it was changed. This can be archived by checking VRAMDirty. + VRAMDirty need to be reset for the respective VRAM bank. +*/ + +VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG; +VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ; +VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG; +VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ; + +VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal; +VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal; +VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal; +VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal; + +VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture; +VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal; + + +NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG; +NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ; +NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG; +NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ; +NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7; + +NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9]; + +u8 VRAMFlat_ABG[512*1024]; +u8 VRAMFlat_BBG[128*1024]; +u8 VRAMFlat_AOBJ[256*1024]; +u8 VRAMFlat_BOBJ[128*1024]; + +u8 VRAMFlat_ABGExtPal[32*1024]; +u8 VRAMFlat_BBGExtPal[32*1024]; +u8 VRAMFlat_AOBJExtPal[8*1024]; +u8 VRAMFlat_BOBJExtPal[8*1024]; + +u8 VRAMFlat_Texture[512*1024]; +u8 VRAMFlat_TexPal[128*1024]; bool Init() { @@ -113,6 +169,30 @@ void DeInit() if (Framebuffer[1][1]) delete[] Framebuffer[1][1]; } +void ResetVRAMCache() +{ + for (int i = 0; i < 9; i++) + VRAMDirty[i] = NonStupidBitField<128*1024/VRAMDirtyGranularity>(); + + VRAMDirty_ABG.Reset(); + VRAMDirty_BBG.Reset(); + VRAMDirty_AOBJ.Reset(); + VRAMDirty_BOBJ.Reset(); + VRAMDirty_ABGExtPal.Reset(); + VRAMDirty_BBGExtPal.Reset(); + VRAMDirty_AOBJExtPal.Reset(); + VRAMDirty_BOBJExtPal.Reset(); + + memset(VRAMFlat_ABG, 0, sizeof(VRAMFlat_ABG)); + memset(VRAMFlat_BBG, 0, sizeof(VRAMFlat_BBG)); + memset(VRAMFlat_AOBJ, 0, sizeof(VRAMFlat_AOBJ)); + memset(VRAMFlat_BOBJ, 0, sizeof(VRAMFlat_BOBJ)); + memset(VRAMFlat_ABGExtPal, 0, sizeof(VRAMFlat_ABGExtPal)); + memset(VRAMFlat_BBGExtPal, 0, sizeof(VRAMFlat_BBGExtPal)); + memset(VRAMFlat_AOBJExtPal, 0, sizeof(VRAMFlat_AOBJExtPal)); + memset(VRAMFlat_BOBJExtPal, 0, sizeof(VRAMFlat_BOBJExtPal)); +} + void Reset() { VCount = 0; @@ -186,6 +266,8 @@ void Reset() GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]); ResetRenderer(); + + ResetVRAMCache(); } void Stop() @@ -261,6 +343,8 @@ void DoSavestate(Savestate* file) GPU2D_A->DoSavestate(file); GPU2D_B->DoSavestate(file); GPU3D::DoSavestate(file); + + ResetVRAMCache(); } void AssignFramebuffers() @@ -411,18 +495,8 @@ void SetRenderSettings(int renderer, RenderSettings& settings) u8* GetUniqueBankPtr(u32 mask, u32 offset) { - if (!mask) return NULL; - - int num = 0; - if (!(mask & 0xFF)) { mask >>= 8; num += 8; } - else - { - if (!(mask & 0xF)) { mask >>= 4; num += 4; } - if (!(mask & 0x3)) { mask >>= 2; num += 2; } - if (!(mask & 0x1)) { mask >>= 1; num += 1; } - } - if (mask != 1) return NULL; - + if (!mask || (mask & (mask - 1)) != 0) return NULL; + int num = __builtin_ctz(mask); return &VRAM[num][offset & VRAMMask[num]]; } @@ -606,8 +680,6 @@ void MapVRAM_E(u32 bank, u8 cnt) case 4: // ABG ext palette UNMAP_RANGE(ABGExtPal, 0, 4); - GPU2D_A->BGExtPalDirty(0); - GPU2D_A->BGExtPalDirty(2); break; } } @@ -634,8 +706,6 @@ void MapVRAM_E(u32 bank, u8 cnt) case 4: // ABG ext palette MAP_RANGE(ABGExtPal, 0, 4); - GPU2D_A->BGExtPalDirty(0); - GPU2D_A->BGExtPalDirty(2); break; } } @@ -687,12 +757,10 @@ void MapVRAM_FG(u32 bank, u8 cnt) case 4: // ABG ext palette VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask; VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask; - GPU2D_A->BGExtPalDirty((oldofs & 0x1) << 1); break; case 5: // AOBJ ext palette VRAMMap_AOBJExtPal &= ~bankmask; - GPU2D_A->OBJExtPalDirty(); break; } } @@ -732,12 +800,10 @@ void MapVRAM_FG(u32 bank, u8 cnt) case 4: // ABG ext palette VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask; VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask; - GPU2D_A->BGExtPalDirty((ofs & 0x1) << 1); break; case 5: // AOBJ ext palette VRAMMap_AOBJExtPal |= bankmask; - GPU2D_A->OBJExtPalDirty(); break; } } @@ -773,8 +839,6 @@ void MapVRAM_H(u32 bank, u8 cnt) case 2: // BBG ext palette UNMAP_RANGE(BBGExtPal, 0, 4); - GPU2D_B->BGExtPalDirty(0); - GPU2D_B->BGExtPalDirty(2); break; } } @@ -800,8 +864,6 @@ void MapVRAM_H(u32 bank, u8 cnt) case 2: // BBG ext palette MAP_RANGE(BBGExtPal, 0, 4); - GPU2D_B->BGExtPalDirty(0); - GPU2D_B->BGExtPalDirty(2); break; } } @@ -841,7 +903,6 @@ void MapVRAM_I(u32 bank, u8 cnt) case 3: // BOBJ ext palette VRAMMap_BOBJExtPal &= ~bankmask; - GPU2D_B->OBJExtPalDirty(); break; } } @@ -871,7 +932,6 @@ void MapVRAM_I(u32 bank, u8 cnt) case 3: // BOBJ ext palette VRAMMap_BOBJExtPal |= bankmask; - GPU2D_B->OBJExtPalDirty(); break; } } @@ -937,6 +997,8 @@ void StartHBlank(u32 line) DispStat[0] |= (1<<1); DispStat[1] |= (1<<1); + SyncDirtyFlags(); + if (VCount < 192) { // draw @@ -1096,4 +1158,224 @@ void SetVCount(u16 val) NextVCount = val; } +template <u32 Size, u32 MappingGranularity> +NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranularity>::DeriveState(u32* currentMappings) +{ + NonStupidBitField<Size/VRAMDirtyGranularity> result; + u16 banksToBeZeroed = 0; + for (u32 i = 0; i < Size / MappingGranularity; i++) + { + if (currentMappings[i] != Mapping[i]) + { + result |= NonStupidBitField<Size/VRAMDirtyGranularity>(i*VRAMBitsPerMapping, VRAMBitsPerMapping); + banksToBeZeroed |= currentMappings[i]; + Mapping[i] = currentMappings[i]; + } + else + { + u32 mapping = Mapping[i]; + + banksToBeZeroed |= mapping; + + while (mapping != 0) + { + u32 num = __builtin_ctz(mapping); + mapping &= ~(1 << num); + + // hack for **speed** + // this could probably be done less ugly but then we would rely + // on the compiler for vectorisation + static_assert(VRAMDirtyGranularity == 512); + if (MappingGranularity == 16*1024) + { + u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)]; + ((u32*)result.Data)[i] |= dirty; + } + else if (MappingGranularity == 8*1024) + { + u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)]; + ((u16*)result.Data)[i] |= dirty; + } + else if (MappingGranularity == 128*1024) + { + ((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0]; + ((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1]; + ((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2]; + ((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3]; + } + else + { + // welp + abort(); + } + } + } + } + + while (banksToBeZeroed != 0) + { + u32 num = __builtin_ctz(banksToBeZeroed); + banksToBeZeroed &= ~(1 << num); + memset(VRAMDirty[num].Data, 0, sizeof(VRAMDirty[num].Data)); + } + + return result; +} + +template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*); +template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*); +template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*); +template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*); +template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*); +template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*); + +template <u32 Size> +void SyncDirtyFlags(u32* mappings, NonStupidBitField<Size>& writtenFlags) +{ + const u32 VRAMWrittenBitsPer16KB = 16*1024/VRAMDirtyGranularity; + + for (typename NonStupidBitField<Size>::Iterator it = writtenFlags.Begin(); it != writtenFlags.End(); it++) + { + u32 mapping = mappings[*it / VRAMWrittenBitsPer16KB]; + while (mapping != 0) + { + u32 num = __builtin_ctz(mapping); + + VRAMDirty[num][*it & (VRAMMask[num] / VRAMDirtyGranularity)] = true; + + mapping &= ~(1 << num); + } + } + memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data)); +} + +void SyncDirtyFlags() +{ + SyncDirtyFlags(VRAMMap_ABG, VRAMWritten_ABG); + SyncDirtyFlags(VRAMMap_AOBJ, VRAMWritten_AOBJ); + SyncDirtyFlags(VRAMMap_BBG, VRAMWritten_BBG); + SyncDirtyFlags(VRAMMap_BOBJ, VRAMWritten_BOBJ); + SyncDirtyFlags(VRAMMap_ARM7, VRAMWritten_ARM7); +} + +template <u32 MappingGranularity, u32 Size> +inline bool CopyLinearVRAM(u8* flat, u32* mappings, NonStupidBitField<Size>& dirty, u64 (*slowAccess)(u32 addr)) +{ + const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity; + + bool change = false; + + typename NonStupidBitField<Size>::Iterator it = dirty.Begin(); + while (it != dirty.End()) + { + u32 offset = *it * VRAMDirtyGranularity; + u8* dst = flat + offset; + u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset); + if (fastAccess) + { + memcpy(dst, fastAccess, VRAMDirtyGranularity); + } + else + { + for (u32 i = 0; i < VRAMDirtyGranularity; i += 8) + *(u64*)&dst[i] = slowAccess(offset + i); + } + change = true; + it++; + } + return change; +} + +bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, ReadVRAM_Texture<u64>); +} +bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, ReadVRAM_TexPal<u64>); +} + +bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, ReadVRAM_ABG<u64>); +} +bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, ReadVRAM_BBG<u64>); +} + +bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, ReadVRAM_AOBJ<u64>); +} +bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, ReadVRAM_BOBJ<u64>); } + +template<typename T> +T ReadVRAM_ABGExtPal(u32 addr) +{ + u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3]; + + T ret = 0; + if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; + if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; + + return ret; +} + +template<typename T> +T ReadVRAM_BBGExtPal(u32 addr) +{ + u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3]; + + T ret = 0; + if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF]; + + return ret; +} + +template<typename T> +T ReadVRAM_AOBJExtPal(u32 addr) +{ + u32 mask = VRAMMap_AOBJExtPal; + + T ret = 0; + if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF]; + + return ret; +} + +template<typename T> +T ReadVRAM_BOBJExtPal(u32 addr) +{ + u32 mask = VRAMMap_BOBJExtPal; + + T ret = 0; + if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF]; + + return ret; +} + +bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, ReadVRAM_ABGExtPal<u64>); +} +bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, ReadVRAM_BBGExtPal<u64>); +} + +bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, ReadVRAM_AOBJExtPal<u64>); +} +bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, ReadVRAM_BOBJExtPal<u64>); +} + +}
\ No newline at end of file @@ -20,6 +20,7 @@ #define GPU_H #include "GPU2D.h" +#include "NonStupidBitfield.h" namespace GPU { @@ -45,7 +46,7 @@ extern u8 VRAM_G[ 16*1024]; extern u8 VRAM_H[ 32*1024]; extern u8 VRAM_I[ 16*1024]; -extern u8* VRAM[9]; +extern u8* const VRAM[9]; extern u32 VRAMMap_LCDC; extern u32 VRAMMap_ABG[0x20]; @@ -73,6 +74,73 @@ extern GPU2D* GPU2D_B; extern int Renderer; +const u32 VRAMDirtyGranularity = 512; + +extern NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG; +extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ; +extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG; +extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ; +extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7; + +extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9]; + +template <u32 Size, u32 MappingGranularity> +struct VRAMTrackingSet +{ + u16 Mapping[Size / MappingGranularity]; + + const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity; + + void Reset() + { + memset(Mapping, 0, sizeof(Mapping)); + } + NonStupidBitField<Size/VRAMDirtyGranularity> DeriveState(u32* currentMappings); +}; + +extern VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG; +extern VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ; +extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG; +extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ; + +extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal; +extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal; +extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal; +extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal; + +extern VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture; +extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal; + +extern u8 VRAMFlat_ABG[512*1024]; +extern u8 VRAMFlat_BBG[128*1024]; +extern u8 VRAMFlat_AOBJ[256*1024]; +extern u8 VRAMFlat_BOBJ[128*1024]; + +extern u8 VRAMFlat_ABGExtPal[32*1024]; +extern u8 VRAMFlat_BBGExtPal[32*1024]; + +extern u8 VRAMFlat_AOBJExtPal[8*1024]; +extern u8 VRAMFlat_BOBJExtPal[8*1024]; + +extern u8 VRAMFlat_Texture[512*1024]; +extern u8 VRAMFlat_TexPal[128*1024]; + +bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + +void SyncDirtyFlags(); typedef struct { @@ -233,7 +301,11 @@ void WriteVRAM_LCDC(u32 addr, T val) default: return; } - if (VRAMMap_LCDC & (1<<bank)) *(T*)&VRAM[bank][addr] = val; + if (VRAMMap_LCDC & (1<<bank)) + { + *(T*)&VRAM[bank][addr] = val; + VRAMDirty[bank][addr / VRAMDirtyGranularity] = true; + } } @@ -262,6 +334,8 @@ void WriteVRAM_ABG(u32 addr, T val) { u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F]; + VRAMWritten_ABG[(addr & 0x7FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; @@ -295,6 +369,8 @@ void WriteVRAM_AOBJ(u32 addr, T val) { u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; + VRAMWritten_AOBJ[(addr & 0x3FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val; @@ -324,6 +400,8 @@ void WriteVRAM_BBG(u32 addr, T val) { u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; + VRAMWritten_BBG[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val; if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; @@ -350,11 +428,12 @@ void WriteVRAM_BOBJ(u32 addr, T val) { u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; + VRAMWritten_BOBJ[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; } - template<typename T> T ReadVRAM_ARM7(u32 addr) { @@ -372,6 +451,8 @@ void WriteVRAM_ARM7(u32 addr, T val) { u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; + VRAMWritten_ARM7[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; } diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index 7774c65..c1a2d47 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -148,12 +148,6 @@ void GPU2D::Reset() CaptureCnt = 0; MasterBrightness = 0; - - BGExtPalStatus[0] = 0; - BGExtPalStatus[1] = 0; - BGExtPalStatus[2] = 0; - BGExtPalStatus[3] = 0; - OBJExtPalStatus = 0; } void GPU2D::DoSavestate(Savestate* file) @@ -208,13 +202,6 @@ void GPU2D::DoSavestate(Savestate* file) if (!file->Saving) { - // refresh those - BGExtPalStatus[0] = 0; - BGExtPalStatus[1] = 0; - BGExtPalStatus[2] = 0; - BGExtPalStatus[3] = 0; - OBJExtPalStatus = 0; - CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]]; CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]]; } @@ -228,9 +215,6 @@ void GPU2D::SetFramebuffer(u32* buf) void GPU2D::SetRenderSettings(bool accel) { Accelerated = accel; - - if (Accelerated) DrawPixel = DrawPixel_Accel; - else DrawPixel = DrawPixel_Normal; } @@ -761,6 +745,25 @@ void GPU2D::DrawScanline(u32 line) int n3dline = line; line = GPU::VCount; + if (Num == 0) + { + auto bgDirty = GPU::VRAMDirty_ABG.DeriveState(GPU::VRAMMap_ABG); + GPU::MakeVRAMFlat_ABGCoherent(bgDirty); + auto bgExtPalDirty = GPU::VRAMDirty_ABGExtPal.DeriveState(GPU::VRAMMap_ABGExtPal); + GPU::MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty); + auto objExtPalDirty = GPU::VRAMDirty_AOBJExtPal.DeriveState(&GPU::VRAMMap_AOBJExtPal); + GPU::MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty); + } + else + { + auto bgDirty = GPU::VRAMDirty_BBG.DeriveState(GPU::VRAMMap_BBG); + GPU::MakeVRAMFlat_BBGCoherent(bgDirty); + auto bgExtPalDirty = GPU::VRAMDirty_BBGExtPal.DeriveState(GPU::VRAMMap_BBGExtPal); + GPU::MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty); + auto objExtPalDirty = GPU::VRAMDirty_BOBJExtPal.DeriveState(&GPU::VRAMMap_BOBJExtPal); + GPU::MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty); + } + bool forceblank = false; // scanlines that end up outside of the GPU drawing range @@ -973,6 +976,9 @@ void GPU2D::DoCapture(u32 line, u32 width) u16* dst = (u16*)GPU::VRAM[dstvram]; u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width); + static_assert(GPU::VRAMDirtyGranularity == 512); + GPU::VRAMDirty[dstvram][(dstaddr & 0x1FFFF) / GPU::VRAMDirtyGranularity] = true; + // TODO: handle 3D in accelerated mode!! u32* srcA; @@ -1191,85 +1197,20 @@ void GPU2D::SampleFIFO(u32 offset, u32 num) } } - -void GPU2D::BGExtPalDirty(u32 base) -{ - BGExtPalStatus[base] = 0; - BGExtPalStatus[base+1] = 0; -} - -void GPU2D::OBJExtPalDirty() -{ - OBJExtPalStatus = 0; -} - - u16* GPU2D::GetBGExtPal(u32 slot, u32 pal) { - u16* dst = &BGExtPalCache[slot][pal << 8]; - - if (!(BGExtPalStatus[slot] & (1<<pal))) - { - if (Num) - { - if (GPU::VRAMMap_BBGExtPal[slot] & (1<<7)) - memcpy(dst, &GPU::VRAM_H[(slot << 13) + (pal << 9)], 256*2); - else - memset(dst, 0, 256*2); - } - else - { - memset(dst, 0, 256*2); - - if (GPU::VRAMMap_ABGExtPal[slot] & (1<<4)) - for (int i = 0; i < 256; i+=2) - *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_E[(slot << 13) + (pal << 9) + (i << 1)]; - - if (GPU::VRAMMap_ABGExtPal[slot] & (1<<5)) - for (int i = 0; i < 256; i+=2) - *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[((slot&1) << 13) + (pal << 9) + (i << 1)]; - - if (GPU::VRAMMap_ABGExtPal[slot] & (1<<6)) - for (int i = 0; i < 256; i+=2) - *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[((slot&1) << 13) + (pal << 9) + (i << 1)]; - } - - BGExtPalStatus[slot] |= (1<<pal); - } - - return dst; + const u32 PaletteSize = 256 * 2; + const u32 SlotSize = PaletteSize * 16; + return (u16*)&(Num == 0 + ? GPU::VRAMFlat_ABGExtPal + : GPU::VRAMFlat_BBGExtPal)[slot * SlotSize + pal * PaletteSize]; } u16* GPU2D::GetOBJExtPal() { - u16* dst = OBJExtPalCache; - - if (!OBJExtPalStatus) - { - if (Num) - { - if (GPU::VRAMMap_BOBJExtPal & (1<<8)) - memcpy(dst, &GPU::VRAM_I[0], 16*256*2); - else - memset(dst, 0, 16*256*2); - } - else - { - memset(dst, 0, 16*256*2); - - if (GPU::VRAMMap_AOBJExtPal & (1<<5)) - for (int i = 0; i < 16*256; i+=2) - *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[i << 1]; - - if (GPU::VRAMMap_AOBJExtPal & (1<<6)) - for (int i = 0; i < 16*256; i+=2) - *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[i << 1]; - } - - OBJExtPalStatus = 1; - } - - return dst; + return Num == 0 + ? (u16*)GPU::VRAMFlat_AOBJExtPal + : (u16*)GPU::VRAMFlat_BOBJExtPal; } @@ -1330,10 +1271,36 @@ void GPU2D::CalculateWindowMask(u32 line) #define DoDrawBG(type, line, num) \ - { if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) DrawBG_##type<true>(line, num); else DrawBG_##type<false>(line, num); } + { \ + if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) \ + { \ + if (Accelerated) DrawBG_##type<true, DrawPixel_Accel>(line, num); \ + else DrawBG_##type<true, DrawPixel_Normal>(line, num); \ + } \ + else \ + { \ + if (Accelerated) DrawBG_##type<false, DrawPixel_Accel>(line, num); \ + else DrawBG_##type<false, DrawPixel_Normal>(line, num); \ + } \ + } #define DoDrawBG_Large(line) \ - { if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) DrawBG_Large<true>(line); else DrawBG_Large<false>(line); } + do \ + { \ + if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) \ + { \ + if (Accelerated) DrawBG_Large<true, DrawPixel_Accel>(line); \ + else DrawBG_Large<true, DrawPixel_Normal>(line); \ + } \ + else \ + { \ + if (Accelerated) DrawBG_Large<false, DrawPixel_Accel>(line); \ + else DrawBG_Large<false, DrawPixel_Normal>(line); \ + } \ + } while (false) + +#define DoInterleaveSprites(prio) \ + if (Accelerated) InterleaveSprites<DrawPixel_Accel>(prio); else InterleaveSprites<DrawPixel_Normal>(prio); template<u32 bgmode> void GPU2D::DrawScanlineBGMode(u32 line) @@ -1382,7 +1349,7 @@ void GPU2D::DrawScanlineBGMode(u32 line) } } if ((DispCnt & 0x1000) && NumSprites) - InterleaveSprites(0x40000 | (i<<16)); + DoInterleaveSprites(0x40000 | (i<<16)); } } @@ -1394,7 +1361,7 @@ void GPU2D::DrawScanlineBGMode6(u32 line) { if (DispCnt & 0x0400) { - DoDrawBG_Large(line) + DoDrawBG_Large(line); } } if ((BGCnt[0] & 0x3) == i) @@ -1406,7 +1373,7 @@ void GPU2D::DrawScanlineBGMode6(u32 line) } } if ((DispCnt & 0x1000) && NumSprites) - InterleaveSprites(0x40000 | (i<<16)); + DoInterleaveSprites(0x40000 | (i<<16)) } } @@ -1434,7 +1401,7 @@ void GPU2D::DrawScanlineBGMode7(u32 line) } } if ((DispCnt & 0x1000) && NumSprites) - InterleaveSprites(0x40000 | (i<<16)); + DoInterleaveSprites(0x40000 | (i<<16)) } } @@ -1674,7 +1641,21 @@ void GPU2D::DrawBG_3D() } } -template<bool mosaic> +void GetBGVRAM(u32 num, u8*& data, u32& mask) +{ + if (num == 0) + { + data = GPU::VRAMFlat_ABG; + mask = 0x7FFFF; + } + else + { + data = GPU::VRAMFlat_BBG; + mask = 0x1FFFF; + } +} + +template<bool mosaic, GPU2D::DrawPixel drawPixel> void GPU2D::DrawBG_Text(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -1697,17 +1678,20 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) extpal = (DispCnt & 0x40000000); if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum; + u8* bgvram; + u32 bgvrammask; + GetBGVRAM(Num, bgvram, bgvrammask); if (Num) { - tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((bgcnt & 0x003C) << 12); + tilemapaddr = ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0x400]; } else { - tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0]; } @@ -1735,7 +1719,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) // preload shit as needed if ((xoff & 0x7) || mosaic) { - curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); + curtile = *(u16*)&bgvram[(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)) & bgvrammask]; if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); else curpal = pal; @@ -1756,7 +1740,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) (mosaic && ((xpos >> 3) != (lastxpos >> 3)))) { // load a new tile - curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)); + curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask]; if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); else curpal = pal; @@ -1771,10 +1755,10 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) if (WindowMask[i] & (1<<bgnum)) { u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7); - color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff); + color = bgvram[(pixelsaddr + tilexoff) & bgvrammask]; if (color) - DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); } xoff++; @@ -1787,7 +1771,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) // preload shit as needed if ((xoff & 0x7) || mosaic) { - curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); + curtile = *(u16*)&bgvram[((tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3))) & bgvrammask]; curpal = pal + ((curtile & 0xF000) >> 8); pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); @@ -1805,7 +1789,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) (mosaic && ((xpos >> 3) != (lastxpos >> 3)))) { // load a new tile - curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)); + curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask]; curpal = pal + ((curtile & 0xF000) >> 8); pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); @@ -1819,15 +1803,15 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7); if (tilexoff & 0x1) { - color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4; + color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] >> 4; } else { - color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F; + color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] & 0x0F; } if (color) - DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); } xoff++; @@ -1835,7 +1819,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) } } -template<bool mosaic> +template<bool mosaic, GPU2D::DrawPixel drawPixel> void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -1872,17 +1856,20 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) rotY -= (BGMosaicY * rotD); } + u8* bgvram; + u32 bgvrammask; + if (Num) { - tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((bgcnt & 0x003C) << 12); + tilemapaddr = ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0x400]; } else { - tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0]; } @@ -1911,16 +1898,16 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) if ((!((finalX|finalY) & overflowmask))) { - curtile = GPU::ReadVRAM_BG<u8>(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))); + curtile = bgvram[(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))) & bgvrammask]; // draw pixel u32 tilexoff = (finalX >> 8) & 0x7; u32 tileyoff = (finalY >> 8) & 0x7; - color = GPU::ReadVRAM_BG<u8>(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff); + color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask]; if (color) - DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); } } @@ -1932,7 +1919,7 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) BGYRefInternal[bgnum-2] += rotD; } -template<bool mosaic> +template<bool mosaic, GPU2D::DrawPixel drawPixel> void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -1941,6 +1928,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) u16* pal; u32 extpal; + u8* bgvram; + u32 bgvrammask; + GetBGVRAM(Num, bgvram, bgvrammask); + extpal = (DispCnt & 0x40000000); s16 rotA = BGRotA[bgnum-2]; @@ -1984,8 +1975,8 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) ofymask = ~ymask; } - if (Num) tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 6); - else tilemapaddr = 0x06000000 + ((bgcnt & 0x1F00) << 6); + if (Num) tilemapaddr = ((bgcnt & 0x1F00) << 6); + else tilemapaddr = ((bgcnt & 0x1F00) << 6); if (bgcnt & 0x0004) { @@ -2012,10 +2003,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) if (!(finalX & ofxmask) && !(finalY & ofymask)) { - color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)); + color = *(u16*)&bgvram[(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask]; if (color & 0x8000) - DrawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum); } } @@ -2051,10 +2042,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) if (!(finalX & ofxmask) && !(finalY & ofymask)) { - color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); + color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask]; if (color) - DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); } } @@ -2083,15 +2074,15 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) if (Num) { - tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((bgcnt & 0x003C) << 12); + tilemapaddr = ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0x400]; } else { - tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0]; } @@ -2121,7 +2112,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) if ((!((finalX|finalY) & overflowmask))) { - curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)); + curtile = *(u16*)&bgvram[(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)) & bgvrammask]; if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12); else curpal = pal; @@ -2133,10 +2124,10 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) if (curtile & 0x0400) tilexoff = 7-tilexoff; if (curtile & 0x0800) tileyoff = 7-tileyoff; - color = GPU::ReadVRAM_BG<u8>(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff); + color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask]; if (color) - DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); } } @@ -2149,7 +2140,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) BGYRefInternal[bgnum-2] += rotD; } -template<bool mosaic> +template<bool mosaic, GPU2D::DrawPixel drawPixel> void GPU2D::DrawBG_Large(u32 line) // BG is always BG2 { u16 bgcnt = BGCnt[2]; @@ -2199,8 +2190,9 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2 rotY -= (BGMosaicY * rotD); } - if (Num) tilemapaddr = 0x06200000; - else tilemapaddr = 0x06000000; + u8* bgvram; + u32 bgvrammask; + GetBGVRAM(Num, bgvram, bgvrammask); // 256-color bitmap @@ -2228,10 +2220,10 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2 if (!(finalX & ofxmask) && !(finalY & ofymask)) { - color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); + color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask]; if (color) - DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); + drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); } } @@ -2274,6 +2266,7 @@ void GPU2D::ApplySpriteMosaicX() } } +template <GPU2D::DrawPixel drawPixel> void GPU2D::InterleaveSprites(u32 prio) { u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; @@ -2297,7 +2290,7 @@ void GPU2D::InterleaveSprites(u32 prio) else color = extpal[pixel & 0xFFF]; - DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); + drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); } } else @@ -2317,11 +2310,25 @@ void GPU2D::InterleaveSprites(u32 prio) else color = pal[pixel & 0xFF]; - DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); + drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); } } } +void GetOBJVRAM(u32 num, u8*& data, u32& mask) +{ + if (num == 0) + { + data = GPU::VRAMFlat_AOBJ; + mask = 0x3FFFF; + } + else + { + data = GPU::VRAMFlat_BOBJ; + mask = 0x1FFFF; + } +} + #define DoDrawSprite(type, ...) \ if (iswin) \ { \ @@ -2346,6 +2353,17 @@ void GPU2D::DrawSprites(u32 line) OBJMosaicYCount = 0; } + if (Num == 0) + { + auto objDirty = GPU::VRAMDirty_AOBJ.DeriveState(GPU::VRAMMap_AOBJ); + GPU::MakeVRAMFlat_AOBJCoherent(objDirty); + } + else + { + auto objDirty = GPU::VRAMDirty_BOBJ.DeriveState(GPU::VRAMMap_BOBJ); + GPU::MakeVRAMFlat_BOBJCoherent(objDirty); + } + NumSprites = 0; memset(OBJLine, 0, 256*4); memset(OBJWindow, 0, 256); @@ -2458,6 +2476,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi u32 ytilefactor; + u8* objvram; + u32 objvrammask; + GetOBJVRAM(Num, objvram, objvrammask); + s32 centerX = boundwidth >> 1; s32 centerY = boundheight >> 1; @@ -2501,6 +2523,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi pixelattr |= (0xC0000000 | (alpha << 24)); + u32 pixelsaddr; if (DispCnt & 0x40) { if (DispCnt & 0x20) @@ -2512,7 +2535,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi } else { - tilenum <<= (7 + ((DispCnt >> 22) & 0x1)); + pixelsaddr = tilenum << (7 + ((DispCnt >> 22) & 0x1)); ytilefactor = ((width >> 8) * 2); } } @@ -2520,23 +2543,21 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi { if (DispCnt & 0x20) { - tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); + pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); ytilefactor = (256 * 2); } else { - tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); + pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); ytilefactor = (128 * 2); } } - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; - for (; xoff < boundwidth;) { if ((u32)rotX < width && (u32)rotY < height) { - color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)); + color = *(u16*)&objvram[(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)) & objvrammask]; if (color & 0x8000) { @@ -2561,9 +2582,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi } else { + u32 pixelsaddr = tilenum; if (DispCnt & 0x10) { - tilenum <<= ((DispCnt >> 20) & 0x3); + pixelsaddr <<= ((DispCnt >> 20) & 0x3); ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0); } else @@ -2574,12 +2596,12 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi if (spritemode == 1) pixelattr |= 0x80000000; else pixelattr |= 0x10000000; + ytilefactor <<= 5; + pixelsaddr <<= 5; + if (attrib[0] & 0x2000) { // 256-color - tilenum <<= 5; - ytilefactor <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; if (!window) { @@ -2593,7 +2615,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi { if ((u32)rotX < width && (u32)rotY < height) { - color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)); + color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)) & objvrammask]; if (color) { @@ -2619,10 +2641,6 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi else { // 16-color - tilenum <<= 5; - ytilefactor <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; - if (!window) { pixelattr |= 0x1000; @@ -2633,7 +2651,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi { if ((u32)rotX < width && (u32)rotY < height) { - color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)); + color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)) & objvrammask]; if (rotX & 0x100) color >>= 4; else @@ -2681,6 +2699,10 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos pixelattr |= 0x100000; } + u8* objvram; + u32 objvrammask; + GetOBJVRAM(Num, objvram, objvrammask); + // yflip if (attrib[1] & 0x2000) ypos = height-1 - ypos; @@ -2711,6 +2733,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos pixelattr |= (0xC0000000 | (alpha << 24)); + u32 pixelsaddr = tilenum; if (DispCnt & 0x40) { if (DispCnt & 0x20) @@ -2722,25 +2745,24 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos } else { - tilenum <<= (7 + ((DispCnt >> 22) & 0x1)); - tilenum += (ypos * width * 2); + pixelsaddr <<= (7 + ((DispCnt >> 22) & 0x1)); + pixelsaddr += (ypos * width * 2); } } else { if (DispCnt & 0x20) { - tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); - tilenum += (ypos * 256 * 2); + pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); + pixelsaddr += (ypos * 256 * 2); } else { - tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); - tilenum += (ypos * 128 * 2); + pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); + pixelsaddr += (ypos * 128 * 2); } } - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; s32 pixelstride; if (attrib[1] & 0x1000) // xflip @@ -2757,7 +2779,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos for (; xoff < xend;) { - color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr); + color = *(u16*)&objvram[pixelsaddr & objvrammask]; pixelsaddr += pixelstride; @@ -2781,14 +2803,15 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos } else { + u32 pixelsaddr = tilenum; if (DispCnt & 0x10) { - tilenum <<= ((DispCnt >> 20) & 0x3); - tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); + pixelsaddr <<= ((DispCnt >> 20) & 0x3); + pixelsaddr += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); } else { - tilenum += ((ypos >> 3) * 0x20); + pixelsaddr += ((ypos >> 3) * 0x20); } if (spritemode == 1) pixelattr |= 0x80000000; @@ -2797,8 +2820,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos if (attrib[0] & 0x2000) { // 256-color - tilenum <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr <<= 5; pixelsaddr += ((ypos & 0x7) << 3); s32 pixelstride; @@ -2827,7 +2849,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos for (; xoff < xend;) { - color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr); + color = objvram[pixelsaddr]; pixelsaddr += pixelstride; @@ -2853,8 +2875,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos else { // 16-color - tilenum <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr <<= 5; pixelsaddr += ((ypos & 0x7) << 2); s32 pixelstride; @@ -2886,13 +2907,13 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos { if (attrib[1] & 0x1000) { - if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F; pixelsaddr--; } - else color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4; + if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] & 0x0F; pixelsaddr--; } + else color = objvram[pixelsaddr & objvrammask] >> 4; } else { - if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4; pixelsaddr++; } - else color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F; + if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] >> 4; pixelsaddr++; } + else color = objvram[pixelsaddr & objvrammask] & 0x0F; } if (color) diff --git a/src/GPU2D.h b/src/GPU2D.h index 521adf0..db15adc 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -59,9 +59,6 @@ public: void CheckWindows(u32 line); - void BGExtPalDirty(u32 base); - void OBJExtPalDirty(); - u16* GetBGExtPal(u32 slot, u32 pal); u16* GetOBJExtPal(); @@ -128,9 +125,6 @@ private: u16 MasterBrightness; u16 BGExtPalCache[4][16*256]; - u16 OBJExtPalCache[16*256]; - u32 BGExtPalStatus[4]; - u32 OBJExtPalStatus; u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb); u32 ColorBlend5(u32 val1, u32 val2); @@ -147,15 +141,17 @@ private: static void DrawPixel_Normal(u32* dst, u16 color, u32 flag); static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); - void (*DrawPixel)(u32* dst, u16 color, u32 flag); + + typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag); void DrawBG_3D(); - template<bool mosaic> void DrawBG_Text(u32 line, u32 bgnum); - template<bool mosaic> void DrawBG_Affine(u32 line, u32 bgnum); - template<bool mosaic> void DrawBG_Extended(u32 line, u32 bgnum); - template<bool mosaic> void DrawBG_Large(u32 line); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Text(u32 line, u32 bgnum); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Affine(u32 line, u32 bgnum); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Extended(u32 line, u32 bgnum); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Large(u32 line); void ApplySpriteMosaicX(); + template<DrawPixel drawPixel> void InterleaveSprites(u32 prio); template<bool window> void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos); template<bool window> void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos); diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 74debfe..4e6ac42 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -179,6 +179,8 @@ u8 RenderFogDensityTable[34]; u32 RenderClearAttr1, RenderClearAttr2; +bool RenderFrameIdentical; + u32 ZeroDotWLimit; u32 GXStat; @@ -2491,6 +2493,19 @@ void VBlank() } RenderNumPolygons = NumPolygons; + RenderFrameIdentical = false; + } + else + { + RenderFrameIdentical = RenderDispCnt == DispCnt + && RenderAlphaRef == AlphaRef + && RenderClearAttr1 == ClearAttr1 + && RenderClearAttr2 == ClearAttr2 + && RenderFogColor == FogColor + && RenderFogOffset == FogOffset * 0x200 + && memcmp(RenderEdgeTable, EdgeTable, 8*2) == 0 + && memcmp(RenderFogDensityTable + 1, FogDensityTable, 32) == 0 + && memcmp(RenderToonTable, ToonTable, 32*2) == 0; } RenderDispCnt = DispCnt; diff --git a/src/GPU3D.h b/src/GPU3D.h index c69adde..0477c4f 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -87,6 +87,8 @@ extern u8 RenderFogDensityTable[34]; extern u32 RenderClearAttr1, RenderClearAttr2; +extern bool RenderFrameIdentical; + extern std::array<Polygon*,2048> RenderPolygonRAM; extern u32 RenderNumPolygons; diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index 658b261..ba9548e 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -74,11 +74,11 @@ typedef struct Polygon* PolyData; u32 NumIndices; - u16* Indices; + u32 IndicesOffset; GLuint PrimType; u32 NumEdgeIndices; - u16* EdgeIndices; + u32 EdgeIndicesOffset; u32 RenderKey; @@ -107,7 +107,11 @@ u32 VertexBuffer[10240 * 7]; u32 NumVertices; GLuint VertexArrayID; +GLuint IndexBufferID; u16 IndexBuffer[2048 * 40]; +u32 NumIndices, NumEdgeIndices; + +const u32 EdgeIndicesOffset = 2048 * 30; GLuint TexMemID; GLuint TexPalMemID; @@ -320,6 +324,9 @@ bool Init() glEnableVertexAttribArray(3); // attrib glVertexAttribIPointer(3, 3, GL_UNSIGNED_INT, 7*4, (void*)(4*4)); + glGenBuffers(1, &IndexBufferID); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, IndexBufferID); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(IndexBuffer), NULL, GL_DYNAMIC_DRAW); glGenFramebuffers(4, &FramebufferID[0]); glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]); @@ -563,15 +570,15 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) u32* vptr = &VertexBuffer[0]; u32 vidx = 0; - u16* iptr = &IndexBuffer[0]; - u16* eiptr = &IndexBuffer[2048*30]; + u32 iidx = 0; + u32 eidx = EdgeIndicesOffset; for (int i = 0; i < npolys; i++) { RendererPolygon* rp = &polygons[i]; Polygon* poly = rp->PolyData; - rp->Indices = iptr; + rp->IndicesOffset = iidx; rp->NumIndices = 0; u32 vidx_first = vidx; @@ -606,7 +613,7 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) vptr = SetupVertex(poly, j, vtx, vtxattr, vptr); - *iptr++ = vidx; + IndexBuffer[iidx++] = vidx; rp->NumIndices++; vidx++; @@ -627,9 +634,9 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) } // build a triangle - *iptr++ = vidx_first; - *iptr++ = vidx - 2; - *iptr++ = vidx - 1; + IndexBuffer[iidx++] = vidx_first; + IndexBuffer[iidx++] = vidx - 2; + IndexBuffer[iidx++] = vidx - 1; rp->NumIndices += 3; } else // quad, pentagon, etc @@ -649,9 +656,9 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) if (j >= 2) { // build a triangle - *iptr++ = vidx_first; - *iptr++ = vidx - 1; - *iptr++ = vidx; + IndexBuffer[iidx++] = vidx_first; + IndexBuffer[iidx++] = vidx - 1; + IndexBuffer[iidx++] = vidx; rp->NumIndices += 3; } @@ -743,46 +750,48 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) if (j >= 1) { // build a triangle - *iptr++ = vidx_first; - *iptr++ = vidx - 1; - *iptr++ = vidx; + IndexBuffer[iidx++] = vidx_first; + IndexBuffer[iidx++] = vidx - 1; + IndexBuffer[iidx++] = vidx; rp->NumIndices += 3; } vidx++; } - *iptr++ = vidx_first; - *iptr++ = vidx - 1; - *iptr++ = vidx_first + 1; + IndexBuffer[iidx++] = vidx_first; + IndexBuffer[iidx++] = vidx - 1; + IndexBuffer[iidx++] = vidx_first + 1; rp->NumIndices += 3; } } - rp->EdgeIndices = eiptr; + rp->EdgeIndicesOffset = eidx; rp->NumEdgeIndices = 0; u32 vidx_cur = vidx_first; for (int j = 1; j < poly->NumVertices; j++) { - *eiptr++ = vidx_cur; - *eiptr++ = vidx_cur + 1; + IndexBuffer[eidx++] = vidx_cur; + IndexBuffer[eidx++] = vidx_cur + 1; vidx_cur++; rp->NumEdgeIndices += 2; } - *eiptr++ = vidx_cur; - *eiptr++ = vidx_first; + IndexBuffer[eidx++] = vidx_cur; + IndexBuffer[eidx++] = vidx_first; rp->NumEdgeIndices += 2; } NumVertices = vidx; + NumIndices = iidx; + NumEdgeIndices = eidx - EdgeIndicesOffset; } void RenderSinglePolygon(int i) { RendererPolygon* rp = &PolygonList[i]; - glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, rp->Indices); + glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2)); } int RenderPolygonBatch(int i) @@ -803,7 +812,7 @@ int RenderPolygonBatch(int i) numindices += cur_rp->NumIndices; } - glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, rp->Indices); + glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2)); return numpolys; } @@ -823,7 +832,7 @@ int RenderPolygonEdgeBatch(int i) numindices += cur_rp->NumEdgeIndices; } - glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, rp->EdgeIndices); + glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->EdgeIndicesOffset * 2)); return numpolys; } @@ -1320,6 +1329,11 @@ void RenderFrame() glBindBuffer(GL_ARRAY_BUFFER, VertexBufferID); glBufferSubData(GL_ARRAY_BUFFER, 0, NumVertices*7*4, VertexBuffer); + // bind to access the index buffer + glBindVertexArray(VertexArrayID); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, NumIndices * 2, IndexBuffer); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, EdgeIndicesOffset * 2, NumEdgeIndices * 2, IndexBuffer + EdgeIndicesOffset); + RenderSceneChunk(0, 192); } diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index e9d8e75..d66eb76 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -58,15 +58,17 @@ bool PrevIsShadowMask; bool Enabled; +bool FrameIdentical; + // threading bool Threaded; -void* RenderThread; +Platform::Thread* RenderThread; bool RenderThreadRunning; bool RenderThreadRendering; -void* Sema_RenderStart; -void* Sema_RenderDone; -void* Sema_ScanlineCount; +Platform::Semaphore* Sema_RenderStart; +Platform::Semaphore* Sema_RenderDone; +Platform::Semaphore* Sema_ScanlineCount; void RenderThreadFunc(); @@ -550,6 +552,16 @@ typedef struct RendererPolygon PolygonList[2048]; +template <typename T> +inline T ReadVRAM_Texture(u32 addr) +{ + return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF]; +} +template <typename T> +inline T ReadVRAM_TexPal(u32 addr) +{ + return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF]; +} void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) { @@ -606,10 +618,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 1: // A3I5 { vramaddr += ((t * width) + s); - u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); + u8 pixel = ReadVRAM_Texture<u8>(vramaddr); texpal <<= 4; - *color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1)); + *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1)); *alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6); } break; @@ -617,12 +629,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 2: // 4-color { vramaddr += (((t * width) + s) >> 2); - u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); + u8 pixel = ReadVRAM_Texture<u8>(vramaddr); pixel >>= ((s & 0x3) << 1); pixel &= 0x3; texpal <<= 3; - *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -630,12 +642,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 3: // 16-color { vramaddr += (((t * width) + s) >> 1); - u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); + u8 pixel = ReadVRAM_Texture<u8>(vramaddr); if (s & 0x1) pixel >>= 4; else pixel &= 0xF; texpal <<= 4; - *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -643,10 +655,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 4: // 256-color { vramaddr += ((t * width) + s); - u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); + u8 pixel = ReadVRAM_Texture<u8>(vramaddr); texpal <<= 4; - *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -660,30 +672,30 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha if (vramaddr >= 0x40000) slot1addr += 0x10000; - u8 val = GPU::ReadVRAM_Texture<u8>(vramaddr); + u8 val = ReadVRAM_Texture<u8>(vramaddr); val >>= (2 * (s & 0x3)); - u16 palinfo = GPU::ReadVRAM_Texture<u16>(slot1addr); + u16 palinfo = ReadVRAM_Texture<u16>(slot1addr); u32 paloffset = (palinfo & 0x3FFF) << 2; texpal <<= 4; switch (val & 0x3) { case 0: - *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); + *color = ReadVRAM_TexPal<u16>(texpal + paloffset); *alpha = 31; break; case 1: - *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); + *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 2); *alpha = 31; break; case 2: if ((palinfo >> 14) == 1) { - u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); - u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset); + u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -700,8 +712,8 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha } else if ((palinfo >> 14) == 3) { - u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); - u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset); + u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -717,20 +729,20 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha *color = r | g | b; } else - *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 4); + *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 4); *alpha = 31; break; case 3: if ((palinfo >> 14) == 2) { - *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 6); + *color = ReadVRAM_TexPal<u16>(texpal + paloffset + 6); *alpha = 31; } else if ((palinfo >> 14) == 3) { - u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); - u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset); + u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -759,10 +771,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 6: // A5I3 { vramaddr += ((t * width) + s); - u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); + u8 pixel = ReadVRAM_Texture<u8>(vramaddr); texpal <<= 4; - *color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1)); + *color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1)); *alpha = (pixel >> 3); } break; @@ -770,7 +782,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 7: // direct color { vramaddr += (((t * width) + s) << 1); - *color = GPU::ReadVRAM_Texture<u16>(vramaddr); + *color = ReadVRAM_Texture<u16>(vramaddr); *alpha = (*color & 0x8000) ? 31 : 0; } break; @@ -2007,8 +2019,8 @@ void ClearBuffers() { for (int x = 0; x < 256; x++) { - u16 val2 = GPU::ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1)); - u16 val3 = GPU::ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1)); + u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1)); + u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1)); // TODO: confirm color conversion u32 r = (val2 << 1) & 0x3E; if (r) r++; @@ -2088,11 +2100,19 @@ void VCount144() void RenderFrame() { + auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture); + auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal); + + bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty); + bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty); + + FrameIdentical = !(textureChanged || texPalChanged) && RenderFrameIdentical; + if (RenderThreadRunning) { Platform::Semaphore_Post(Sema_RenderStart); } - else + else if (!FrameIdentical) { ClearBuffers(); RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons); @@ -2107,8 +2127,15 @@ void RenderThreadFunc() if (!RenderThreadRunning) return; RenderThreadRendering = true; - ClearBuffers(); - RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons); + if (FrameIdentical) + { + Platform::Semaphore_Post(Sema_ScanlineCount, 192); + } + else + { + ClearBuffers(); + RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons); + } Platform::Semaphore_Post(Sema_RenderDone); RenderThreadRendering = false; diff --git a/src/NDS.cpp b/src/NDS.cpp index 1781dd5..b313db0 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -210,13 +210,13 @@ bool Init() void DeInit() { - delete ARM9; - delete ARM7; - #ifdef JIT_ENABLED ARMJIT::DeInit(); #endif + delete ARM9; + delete ARM7; + for (int i = 0; i < 8; i++) delete DMAs[i]; @@ -908,7 +908,7 @@ void RunSystem(u64 timestamp) } } -template <bool EnableJIT> +template <bool EnableJIT, int ConsoleType> u32 RunFrame() { FrameStartTimestamp = SysTimestamp; @@ -934,10 +934,10 @@ u32 RunFrame() } else if (CPUStop & 0x0FFF) { - DMAs[0]->Run(); - if (!(CPUStop & 0x80000000)) DMAs[1]->Run(); - if (!(CPUStop & 0x80000000)) DMAs[2]->Run(); - if (!(CPUStop & 0x80000000)) DMAs[3]->Run(); + DMAs[0]->Run<ConsoleType>(); + if (!(CPUStop & 0x80000000)) DMAs[1]->Run<ConsoleType>(); + if (!(CPUStop & 0x80000000)) DMAs[2]->Run<ConsoleType>(); + if (!(CPUStop & 0x80000000)) DMAs[3]->Run<ConsoleType>(); if (ConsoleType == 1) DSi::RunNDMAs(0); } else @@ -962,10 +962,10 @@ u32 RunFrame() if (CPUStop & 0x0FFF0000) { - DMAs[4]->Run(); - DMAs[5]->Run(); - DMAs[6]->Run(); - DMAs[7]->Run(); + DMAs[4]->Run<ConsoleType>(); + DMAs[5]->Run<ConsoleType>(); + DMAs[6]->Run<ConsoleType>(); + DMAs[7]->Run<ConsoleType>(); if (ConsoleType == 1) DSi::RunNDMAs(1); } else @@ -999,6 +999,9 @@ u32 RunFrame() ARM7Timestamp-SysTimestamp, GPU3D::Timestamp-SysTimestamp); #endif + SPU::TransferOutput(); + + NDSCart::FlushSRAMFile(); NumFrames++; @@ -1009,10 +1012,14 @@ u32 RunFrame() { #ifdef JIT_ENABLED if (Config::JIT_Enable) - return RunFrame<true>(); + return NDS::ConsoleType == 1 + ? RunFrame<true, 1>() + : RunFrame<true, 0>(); else #endif - return RunFrame<false>(); + return NDS::ConsoleType == 1 + ? RunFrame<false, 1>() + : RunFrame<false, 0>(); } void Reschedule(u64 target) @@ -3130,6 +3137,10 @@ void ARM9IOWrite8(u32 addr, u8 val) NDSCart::WriteSPIData(val); return; + case 0x04000188: + ARM9IOWrite32(addr, val | (val << 8) | (val << 16) | (val << 24)); + return; + case 0x040001A8: NDSCart::ROMCommand[0] = val; return; case 0x040001A9: NDSCart::ROMCommand[1] = val; return; case 0x040001AA: NDSCart::ROMCommand[2] = val; return; @@ -3246,6 +3257,10 @@ void ARM9IOWrite16(u32 addr, u16 val) IPCFIFOCnt9 = val & 0x8404; return; + case 0x04000188: + ARM9IOWrite32(addr, val | (val << 16)); + return; + case 0x040001A0: if (!(ExMemCnt[0] & (1<<11))) NDSCart::WriteSPICnt(val); return; @@ -3733,6 +3748,10 @@ void ARM7IOWrite8(u32 addr, u8 val) case 0x04000138: RTC::Write(val, true); return; + case 0x04000188: + ARM7IOWrite32(addr, val | (val << 8) | (val << 16) | (val << 24)); + return; + case 0x040001A0: if (ExMemCnt[0] & (1<<11)) { @@ -3841,6 +3860,10 @@ void ARM7IOWrite16(u32 addr, u16 val) IPCFIFOCnt7 = val & 0x8404; return; + case 0x04000188: + ARM7IOWrite32(addr, val | (val << 16)); + return; + case 0x040001A0: if (ExMemCnt[0] & (1<<11)) NDSCart::WriteSPICnt(val); diff --git a/src/NDSCart.cpp b/src/NDSCart.cpp index 077bf48..2d8396a 100644 --- a/src/NDSCart.cpp +++ b/src/NDSCart.cpp @@ -37,6 +37,7 @@ u8* SRAM; u32 SRAMLength; char SRAMPath[1024]; +bool SRAMFileDirty; void (*WriteFunc)(u8 val, bool islast); @@ -445,14 +446,21 @@ void Write(u8 val, u32 hold) break; } - if (islast && (CurCmd == 0x02 || CurCmd == 0x0A) && (SRAMLength > 0)) + SRAMFileDirty |= islast && (CurCmd == 0x02 || CurCmd == 0x0A) && (SRAMLength > 0); +} + +void FlushSRAMFile() +{ + if (!SRAMFileDirty) + return; + + SRAMFileDirty = false; + + FILE* f = Platform::OpenFile(SRAMPath, "wb"); + if (f) { - FILE* f = Platform::OpenFile(SRAMPath, "wb"); - if (f) - { - fwrite(SRAM, SRAMLength, 1, f); - fclose(f); - } + fwrite(SRAM, SRAMLength, 1, f); + fclose(f); } } @@ -1034,6 +1042,11 @@ void RelocateSave(const char* path, bool write) NDSCart_SRAM::RelocateSave(path, write); } +void FlushSRAMFile() +{ + NDSCart_SRAM::FlushSRAMFile(); +} + int ImportSRAM(const u8* data, u32 length) { memcpy(NDSCart_SRAM::SRAM, data, std::min(length, NDSCart_SRAM::SRAMLength)); diff --git a/src/NDSCart.h b/src/NDSCart.h index 9fe916d..7d3f4a1 100644 --- a/src/NDSCart.h +++ b/src/NDSCart.h @@ -46,6 +46,9 @@ void DoSavestate(Savestate* file); void DecryptSecureArea(u8* out); bool LoadROM(const char* path, const char* sram, bool direct); + +void FlushSRAMFile(); + void RelocateSave(const char* path, bool write); int ImportSRAM(const u8* data, u32 length); diff --git a/src/NonStupidBitfield.h b/src/NonStupidBitfield.h new file mode 100644 index 0000000..124ba76 --- /dev/null +++ b/src/NonStupidBitfield.h @@ -0,0 +1,149 @@ +#ifndef NONSTUPIDBITFIELD_H +#define NONSTUPIDBITFIELD_H + +#include "types.h" + +#include <memory.h> + +#include <initializer_list> +#include <algorithm> + +// like std::bitset but less stupid and optimised for +// our use case (keeping track of memory invalidations) + +template <u32 Size> +struct NonStupidBitField +{ + static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8"); + static const u32 DataLength = Size / 8; + u8 Data[DataLength]; + + struct Ref + { + NonStupidBitField<Size>& BitField; + u32 Idx; + + operator bool() + { + return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7)); + } + + Ref& operator=(bool set) + { + BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7)); + BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7)); + return *this; + } + }; + + struct Iterator + { + NonStupidBitField<Size>& BitField; + u32 DataIdx; + u32 BitIdx; + u64 RemainingBits; + + u32 operator*() { return DataIdx * 8 + BitIdx; } + + bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; } + bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; } + + template <typename T> + void Next() + { + while (RemainingBits == 0 && DataIdx < DataLength) + { + DataIdx += sizeof(T); + RemainingBits = *(T*)&BitField.Data[DataIdx]; + } + + BitIdx = __builtin_ctzll(RemainingBits); + RemainingBits &= ~(1ULL << BitIdx); + } + + Iterator operator++(int) + { + Iterator prev(*this); + ++*this; + return prev; + } + + Iterator& operator++() + { + if ((DataLength % 8) == 0) + Next<u64>(); + else if ((DataLength % 4) == 0) + Next<u32>(); + else if ((DataLength % 2) == 0) + Next<u16>(); + else + Next<u8>(); + + return *this; + } + }; + + NonStupidBitField(u32 start, u32 size) + { + memset(Data, 0, sizeof(Data)); + + if (size == 0) + return; + + u32 roundedStartBit = (start + 7) & ~7; + u32 roundedEndBit = (start + size) & ~7; + if (roundedStartBit != roundedEndBit) + memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8); + + if (start & 0x7) + Data[start >> 3] = 0xFF << (start & 0x7); + if ((start + size) & 0x7) + Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7); + } + + NonStupidBitField() + { + memset(Data, 0, sizeof(Data)); + } + + Iterator End() + { + return Iterator{*this, DataLength, 0, 0}; + } + Iterator Begin() + { + if ((DataLength % 8) == 0) + return ++Iterator{*this, 0, 0, *(u64*)Data}; + else if ((DataLength % 4) == 0) + return ++Iterator{*this, 0, 0, *(u32*)Data}; + else if ((DataLength % 2) == 0) + return ++Iterator{*this, 0, 0, *(u16*)Data}; + else + return ++Iterator{*this, 0, 0, *Data}; + } + + Ref operator[](u32 idx) + { + return Ref{*this, idx}; + } + + NonStupidBitField& operator|=(const NonStupidBitField<Size>& other) + { + for (u32 i = 0; i < DataLength; i++) + { + Data[i] |= other.Data[i]; + } + return *this; + } + NonStupidBitField& operator&=(const NonStupidBitField<Size>& other) + { + for (u32 i = 0; i < DataLength; i++) + { + Data[i] &= other.Data[i]; + } + return *this; + } +}; + + +#endif
\ No newline at end of file diff --git a/src/OpenGLSupport.h b/src/OpenGLSupport.h index 925c0ad..44c511f 100644 --- a/src/OpenGLSupport.h +++ b/src/OpenGLSupport.h @@ -23,8 +23,13 @@ #include <string.h> // TODO: different includes for each platform -#include <GL/gl.h> -#include <GL/glext.h> +#ifdef __APPLE__ + #include <OpenGL/gl3.h> + #include <OpenGL/gl3ext.h> +#else + #include <GL/gl.h> + #include <GL/glext.h> +#endif #include "Platform.h" @@ -61,6 +66,11 @@ #endif +#ifdef __APPLE__ + +#define DO_PROCLIST(func) + +#else #define DO_PROCLIST(func) \ DO_PROCLIST_1_3(func) \ @@ -128,6 +138,7 @@ \ func(GLGETSTRINGI, glGetStringi); \ +#endif namespace OpenGL { diff --git a/src/Platform.h b/src/Platform.h index fea98dd..b4dda9e 100644 --- a/src/Platform.h +++ b/src/Platform.h @@ -67,15 +67,24 @@ inline bool LocalFileExists(const char* name) return true; } -void* Thread_Create(void (*func)()); -void Thread_Free(void* thread); -void Thread_Wait(void* thread); - -void* Semaphore_Create(); -void Semaphore_Free(void* sema); -void Semaphore_Reset(void* sema); -void Semaphore_Wait(void* sema); -void Semaphore_Post(void* sema); +struct Thread; +Thread* Thread_Create(void (*func)()); +void Thread_Free(Thread* thread); +void Thread_Wait(Thread* thread); + +struct Semaphore; +Semaphore* Semaphore_Create(); +void Semaphore_Free(Semaphore* sema); +void Semaphore_Reset(Semaphore* sema); +void Semaphore_Wait(Semaphore* sema); +void Semaphore_Post(Semaphore* sema, int count = 1); + +struct Mutex; +Mutex* Mutex_Create(); +void Mutex_Free(Mutex* mutex); +void Mutex_Lock(Mutex* mutex); +void Mutex_Unlock(Mutex* mutex); +bool Mutex_TryLock(Mutex* mutex); void* GL_GetProcAddress(const char* proc); diff --git a/src/SPU.cpp b/src/SPU.cpp index 5b74bda..fe798c7 100644 --- a/src/SPU.cpp +++ b/src/SPU.cpp @@ -18,6 +18,7 @@ #include <stdio.h> #include <string.h> +#include "Platform.h" #include "NDS.h" #include "DSi.h" #include "SPU.h" @@ -61,13 +62,15 @@ const s16 PSGTable[8][8] = {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF} }; -const u32 kSamplesPerRun = 1; +const u32 OutputBufferSize = 2*2048; +s16 OutputBackbuffer[2 * OutputBufferSize]; +u32 OutputBackbufferWritePosition; -const u32 OutputBufferSize = 2*1024; -s16 OutputBuffer[2 * OutputBufferSize]; -volatile u32 OutputReadOffset; -volatile u32 OutputWriteOffset; +s16 OutputFrontBuffer[2 * OutputBufferSize]; +u32 OutputFrontBufferWritePosition; +u32 OutputFrontBufferReadPosition; +Platform::Mutex* AudioLock; u16 Cnt; u8 MasterVolume; @@ -85,6 +88,8 @@ bool Init() Capture[0] = new CaptureUnit(0); Capture[1] = new CaptureUnit(1); + AudioLock = Platform::Mutex_Create(); + return true; } @@ -95,6 +100,8 @@ void DeInit() delete Capture[0]; delete Capture[1]; + + Platform::Mutex_Free(AudioLock); } void Reset() @@ -111,15 +118,18 @@ void Reset() Capture[0]->Reset(); Capture[1]->Reset(); - NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*kSamplesPerRun, Mix, kSamplesPerRun); + NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0); } void Stop() { - memset(OutputBuffer, 0, 2*OutputBufferSize*2); + Platform::Mutex_Lock(AudioLock); + memset(OutputFrontBuffer, 0, 2*OutputBufferSize*2); - OutputReadOffset = 0; - OutputWriteOffset = 0; + OutputBackbufferWritePosition = 0; + OutputFrontBufferReadPosition = 0; + OutputFrontBufferWritePosition = 0; + Platform::Mutex_Unlock(AudioLock); } void DoSavestate(Savestate* file) @@ -416,11 +426,11 @@ void Channel::NextSample_Noise() } template<u32 type> -void Channel::Run(s32* buf, u32 samples) +s32 Channel::Run() { - if (!(Cnt & (1<<31))) return; + if (!(Cnt & (1<<31))) return 0; - if ((type < 3) && ((Length+LoopPos) < 16)) return; + if ((type < 3) && ((Length+LoopPos) < 16)) return 0; if (KeyOn) { @@ -428,45 +438,32 @@ void Channel::Run(s32* buf, u32 samples) KeyOn = false; } - for (u32 s = 0; s < samples; s++) + Timer += 512; // 1 sample = 512 cycles at 16MHz + + while (Timer >> 16) { - Timer += 512; // 1 sample = 512 cycles at 16MHz + Timer = TimerReload + (Timer - 0x10000); - while (Timer >> 16) + switch (type) { - Timer = TimerReload + (Timer - 0x10000); - - switch (type) - { - case 0: NextSample_PCM8(); break; - case 1: NextSample_PCM16(); break; - case 2: NextSample_ADPCM(); break; - case 3: NextSample_PSG(); break; - case 4: NextSample_Noise(); break; - } + case 0: NextSample_PCM8(); break; + case 1: NextSample_PCM16(); break; + case 2: NextSample_ADPCM(); break; + case 3: NextSample_PSG(); break; + case 4: NextSample_Noise(); break; } - - s32 val = (s32)CurSample; - val <<= VolumeShift; - val *= Volume; - buf[s] = val; - - if (!(Cnt & (1<<31))) break; } + + s32 val = (s32)CurSample; + val <<= VolumeShift; + val *= Volume; + return val; } -void Channel::PanOutput(s32* inbuf, u32 samples, s32* leftbuf, s32* rightbuf) +void Channel::PanOutput(s32 in, s32& left, s32& right) { - for (u32 s = 0; s < samples; s++) - { - s32 val = (s32)inbuf[s]; - - s32 l = ((s64)val * (128-Pan)) >> 10; - s32 r = ((s64)val * Pan) >> 10; - - leftbuf[s] += l; - rightbuf[s] += r; - } + left += ((s64)in * (128-Pan)) >> 10; + right += ((s64)in * Pan) >> 10; } @@ -602,39 +599,31 @@ void CaptureUnit::Run(s32 sample) } -void Mix(u32 samples) +void Mix(u32 dummy) { - s32 channelbuf[32]; - s32 leftbuf[32], rightbuf[32]; - s32 ch0buf[32], ch1buf[32], ch2buf[32], ch3buf[32]; - s32 leftoutput[32], rightoutput[32]; - - for (u32 s = 0; s < samples; s++) - { - leftbuf[s] = 0; rightbuf[s] = 0; - leftoutput[s] = 0; rightoutput[s] = 0; - } + s32 left = 0, right = 0; + s32 leftoutput = 0, rightoutput = 0; if (Cnt & (1<<15)) { - Channels[0]->DoRun(ch0buf, samples); - Channels[1]->DoRun(ch1buf, samples); - Channels[2]->DoRun(ch2buf, samples); - Channels[3]->DoRun(ch3buf, samples); + s32 ch0 = Channels[0]->DoRun(); + s32 ch1 = Channels[1]->DoRun(); + s32 ch2 = Channels[2]->DoRun(); + s32 ch3 = Channels[3]->DoRun(); // TODO: addition from capture registers - Channels[0]->PanOutput(ch0buf, samples, leftbuf, rightbuf); - Channels[2]->PanOutput(ch2buf, samples, leftbuf, rightbuf); + Channels[0]->PanOutput(ch0, left, right); + Channels[2]->PanOutput(ch2, left, right); - if (!(Cnt & (1<<12))) Channels[1]->PanOutput(ch1buf, samples, leftbuf, rightbuf); - if (!(Cnt & (1<<13))) Channels[3]->PanOutput(ch3buf, samples, leftbuf, rightbuf); + if (!(Cnt & (1<<12))) Channels[1]->PanOutput(ch1, left, right); + if (!(Cnt & (1<<13))) Channels[3]->PanOutput(ch3, left, right); for (int i = 4; i < 16; i++) { Channel* chan = Channels[i]; - chan->DoRun(channelbuf, samples); - chan->PanOutput(channelbuf, samples, leftbuf, rightbuf); + s32 channel = chan->DoRun(); + chan->PanOutput(channel, left, right); } // sound capture @@ -642,32 +631,24 @@ void Mix(u32 samples) if (Capture[0]->Cnt & (1<<7)) { - for (u32 s = 0; s < samples; s++) - { - s32 val = leftbuf[s]; + s32 val = left; - val >>= 8; - if (val < -0x8000) val = -0x8000; - else if (val > 0x7FFF) val = 0x7FFF; + val >>= 8; + if (val < -0x8000) val = -0x8000; + else if (val > 0x7FFF) val = 0x7FFF; - Capture[0]->Run(val); - if (!(Capture[0]->Cnt & (1<<7))) break; - } + Capture[0]->Run(val); } if (Capture[1]->Cnt & (1<<7)) { - for (u32 s = 0; s < samples; s++) - { - s32 val = rightbuf[s]; + s32 val = right; - val >>= 8; - if (val < -0x8000) val = -0x8000; - else if (val > 0x7FFF) val = 0x7FFF; + val >>= 8; + if (val < -0x8000) val = -0x8000; + else if (val > 0x7FFF) val = 0x7FFF; - Capture[1]->Run(val); - if (!(Capture[1]->Cnt & (1<<7))) break; - } + Capture[1]->Run(val); } // final output @@ -675,31 +656,25 @@ void Mix(u32 samples) switch (Cnt & 0x0300) { case 0x0000: // left mixer - { - for (u32 s = 0; s < samples; s++) - leftoutput[s] = leftbuf[s]; - } + leftoutput = left; break; case 0x0100: // channel 1 { s32 pan = 128 - Channels[1]->Pan; - for (u32 s = 0; s < samples; s++) - leftoutput[s] = ((s64)ch1buf[s] * pan) >> 10; + leftoutput = ((s64)ch1 * pan) >> 10; } break; case 0x0200: // channel 3 { s32 pan = 128 - Channels[3]->Pan; - for (u32 s = 0; s < samples; s++) - leftoutput[s] = ((s64)ch3buf[s] * pan) >> 10; + leftoutput = ((s64)ch3 * pan) >> 10; } break; case 0x0300: // channel 1+3 { s32 pan1 = 128 - Channels[1]->Pan; s32 pan3 = 128 - Channels[3]->Pan; - for (u32 s = 0; s < samples; s++) - leftoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10); + leftoutput = (((s64)ch1 * pan1) >> 10) + (((s64)ch3 * pan3) >> 10); } break; } @@ -707,105 +682,122 @@ void Mix(u32 samples) switch (Cnt & 0x0C00) { case 0x0000: // right mixer - { - for (u32 s = 0; s < samples; s++) - rightoutput[s] = rightbuf[s]; - } + rightoutput = right; break; case 0x0400: // channel 1 { s32 pan = Channels[1]->Pan; - for (u32 s = 0; s < samples; s++) - rightoutput[s] = ((s64)ch1buf[s] * pan) >> 10; + rightoutput = ((s64)ch1 * pan) >> 10; } break; case 0x0800: // channel 3 { s32 pan = Channels[3]->Pan; - for (u32 s = 0; s < samples; s++) - rightoutput[s] = ((s64)ch3buf[s] * pan) >> 10; + rightoutput = ((s64)ch3 * pan) >> 10; } break; case 0x0C00: // channel 1+3 { s32 pan1 = Channels[1]->Pan; s32 pan3 = Channels[3]->Pan; - for (u32 s = 0; s < samples; s++) - rightoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10); + rightoutput = (((s64)ch1 * pan1) >> 10) + (((s64)ch3 * pan3) >> 10); } break; } } - for (u32 s = 0; s < samples; s++) + leftoutput = ((s64)leftoutput * MasterVolume) >> 7; + rightoutput = ((s64)rightoutput * MasterVolume) >> 7; + + leftoutput >>= 8; + if (leftoutput < -0x8000) leftoutput = -0x8000; + else if (leftoutput > 0x7FFF) leftoutput = 0x7FFF; + rightoutput >>= 8; + if (rightoutput < -0x8000) rightoutput = -0x8000; + else if (rightoutput > 0x7FFF) rightoutput = 0x7FFF; + + // OutputBufferFrame can never get full because it's + // transfered to OutputBuffer at the end of the frame + OutputBackbuffer[OutputBackbufferWritePosition ] = leftoutput >> 1; + OutputBackbuffer[OutputBackbufferWritePosition + 1] = rightoutput >> 1; + OutputBackbufferWritePosition += 2; + + NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0); +} + +void TransferOutput() +{ + Platform::Mutex_Lock(AudioLock); + for (u32 i = 0; i < OutputBackbufferWritePosition; i += 2) { - s32 l = leftoutput[s]; - s32 r = rightoutput[s]; - - l = ((s64)l * MasterVolume) >> 7; - r = ((s64)r * MasterVolume) >> 7; - - l >>= 8; - if (l < -0x8000) l = -0x8000; - else if (l > 0x7FFF) l = 0x7FFF; - r >>= 8; - if (r < -0x8000) r = -0x8000; - else if (r > 0x7FFF) r = 0x7FFF; - - OutputBuffer[OutputWriteOffset ] = l >> 1; - OutputBuffer[OutputWriteOffset + 1] = r >> 1; - OutputWriteOffset += 2; - OutputWriteOffset &= ((2*OutputBufferSize)-1); - if (OutputWriteOffset == OutputReadOffset) + OutputFrontBuffer[OutputFrontBufferWritePosition ] = OutputBackbuffer[i ]; + OutputFrontBuffer[OutputFrontBufferWritePosition + 1] = OutputBackbuffer[i + 1]; + + OutputFrontBufferWritePosition += 2; + OutputFrontBufferWritePosition &= OutputBufferSize*2-1; + if (OutputFrontBufferWritePosition == OutputFrontBufferReadPosition) { - //printf("!! SOUND FIFO OVERFLOW %d\n", OutputWriteOffset>>1); // advance the read position too, to avoid losing the entire FIFO - OutputReadOffset += 2; - OutputReadOffset &= ((2*OutputBufferSize)-1); + OutputFrontBufferReadPosition += 2; + OutputFrontBufferReadPosition &= OutputBufferSize*2-1; } } - - NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*kSamplesPerRun, Mix, kSamplesPerRun); + OutputBackbufferWritePosition = 0; + Platform::Mutex_Unlock(AudioLock); } - void TrimOutput() { + Platform::Mutex_Lock(AudioLock); const int halflimit = (OutputBufferSize / 2); - int readpos = OutputWriteOffset - (halflimit*2); + int readpos = OutputFrontBufferWritePosition - (halflimit*2); if (readpos < 0) readpos += (OutputBufferSize*2); - OutputReadOffset = readpos; + OutputFrontBufferReadPosition = readpos; + Platform::Mutex_Unlock(AudioLock); } void DrainOutput() { - OutputReadOffset = 0; - OutputWriteOffset = 0; + Platform::Mutex_Lock(AudioLock); + OutputFrontBufferWritePosition = 0; + OutputFrontBufferReadPosition = 0; + Platform::Mutex_Unlock(AudioLock); } void InitOutput() { - memset(OutputBuffer, 0, 2*OutputBufferSize*2); - OutputReadOffset = 0; - OutputWriteOffset = OutputBufferSize; + Platform::Mutex_Lock(AudioLock); + memset(OutputBackbuffer, 0, 2*OutputBufferSize*2); + memset(OutputFrontBuffer, 0, 2*OutputBufferSize*2); + OutputFrontBufferReadPosition = 0; + OutputFrontBufferWritePosition = 0; + Platform::Mutex_Unlock(AudioLock); } int GetOutputSize() { + Platform::Mutex_Lock(AudioLock); + int ret; - if (OutputWriteOffset >= OutputReadOffset) - ret = OutputWriteOffset - OutputReadOffset; + if (OutputFrontBufferWritePosition >= OutputFrontBufferReadPosition) + ret = OutputFrontBufferWritePosition - OutputFrontBufferReadPosition; else - ret = (OutputBufferSize*2) - OutputReadOffset + OutputWriteOffset; + ret = (OutputBufferSize*2) - OutputFrontBufferReadPosition + OutputFrontBufferWritePosition; ret >>= 1; + + Platform::Mutex_Unlock(AudioLock); return ret; } void Sync(bool wait) { + // this function is currently not used anywhere + // depending on the usage context the thread safety measures could be made + // a lot faster + // sync to audio output in case the core is running too fast // * wait=true: wait until enough audio data has been played // * wait=false: merely skip some audio data to avoid a FIFO overflow @@ -819,32 +811,42 @@ void Sync(bool wait) } else if (GetOutputSize() > halflimit) { - int readpos = OutputWriteOffset - (halflimit*2); + Platform::Mutex_Lock(AudioLock); + + int readpos = OutputFrontBufferWritePosition - (halflimit*2); if (readpos < 0) readpos += (OutputBufferSize*2); - OutputReadOffset = readpos; + OutputFrontBufferReadPosition = readpos; + + Platform::Mutex_Unlock(AudioLock); } } int ReadOutput(s16* data, int samples) { - if (OutputReadOffset == OutputWriteOffset) + Platform::Mutex_Lock(AudioLock); + if (OutputFrontBufferReadPosition == OutputFrontBufferWritePosition) + { + Platform::Mutex_Unlock(AudioLock); return 0; + } for (int i = 0; i < samples; i++) { - *data++ = OutputBuffer[OutputReadOffset]; - *data++ = OutputBuffer[OutputReadOffset + 1]; + *data++ = OutputFrontBuffer[OutputFrontBufferReadPosition]; + *data++ = OutputFrontBuffer[OutputFrontBufferReadPosition + 1]; + + OutputFrontBufferReadPosition += 2; + OutputFrontBufferReadPosition &= ((2*OutputBufferSize)-1); - //if (OutputReadOffset != OutputWriteOffset) + if (OutputFrontBufferWritePosition == OutputFrontBufferReadPosition) { - OutputReadOffset += 2; - OutputReadOffset &= ((2*OutputBufferSize)-1); - } - if (OutputReadOffset == OutputWriteOffset) + Platform::Mutex_Unlock(AudioLock); return i+1; + } } + Platform::Mutex_Unlock(AudioLock); return samples; } @@ -33,7 +33,7 @@ void DoSavestate(Savestate* file); void SetBias(u16 bias); -void Mix(u32 samples); +void Mix(u32 dummy); void TrimOutput(); void DrainOutput(); @@ -41,6 +41,7 @@ void InitOutput(); int GetOutputSize(); void Sync(bool wait); int ReadOutput(s16* data, int samples); +void TransferOutput(); u8 Read8(u32 addr); u16 Read16(u32 addr); @@ -123,26 +124,24 @@ public: void NextSample_PSG(); void NextSample_Noise(); - template<u32 type> void Run(s32* buf, u32 samples); + template<u32 type> s32 Run(); - void DoRun(s32* buf, u32 samples) + s32 DoRun() { - for (u32 s = 0; s < samples; s++) - buf[s] = 0; - switch ((Cnt >> 29) & 0x3) { - case 0: Run<0>(buf, samples); break; - case 1: Run<1>(buf, samples); break; - case 2: Run<2>(buf, samples); break; + case 0: return Run<0>(); break; + case 1: return Run<1>(); break; + case 2: return Run<2>(); break; case 3: - if (Num >= 14) Run<4>(buf, samples); - else if (Num >= 8) Run<3>(buf, samples); - break; + if (Num >= 14) return Run<4>(); + else if (Num >= 8) return Run<3>(); + default: + return 0; } } - void PanOutput(s32* inbuf, u32 samples, s32* leftbuf, s32* rightbuf); + void PanOutput(s32 in, s32& left, s32& right); private: u32 (*BusRead32)(u32 addr); diff --git a/src/frontend/SharedConfig.h b/src/frontend/SharedConfig.h new file mode 100644 index 0000000..b4b18c5 --- /dev/null +++ b/src/frontend/SharedConfig.h @@ -0,0 +1,13 @@ +#ifndef SHAREDCONFIG_H +#define SHAREDCONFIG_H + +namespace Config +{ + +extern int ConsoleType; +extern int DirectBoot; +extern int SavestateRelocSRAM; + +} + +#endif
\ No newline at end of file diff --git a/src/frontend/Util_ROM.cpp b/src/frontend/Util_ROM.cpp index f61c3e3..9f22f5f 100644 --- a/src/frontend/Util_ROM.cpp +++ b/src/frontend/Util_ROM.cpp @@ -21,7 +21,7 @@ #include "FrontendUtil.h" #include "Config.h" -#include "qt_sdl/PlatformConfig.h" // FIXME!!! +#include "SharedConfig.h" #include "Platform.h" #include "NDS.h" diff --git a/src/frontend/qt_sdl/CMakeLists.txt b/src/frontend/qt_sdl/CMakeLists.txt index 9a0a025..0d695d6 100644 --- a/src/frontend/qt_sdl/CMakeLists.txt +++ b/src/frontend/qt_sdl/CMakeLists.txt @@ -95,6 +95,19 @@ if (PORTABLE) add_definitions(-DPORTABLE) endif() +if (APPLE) + set_target_properties(melonDS PROPERTIES + MACOSX_BUNDLE true + MACOSX_BUNDLE_INFO_PLIST ${CMAKE_SOURCE_DIR}/melonDS.plist + OUTPUT_NAME melonDS + ) + + # Copy icon into the bundle + target_sources(melonDS PRIVATE "${CMAKE_SOURCE_DIR}/melonDS.icns") + set_source_files_properties("${CMAKE_SOURCE_DIR}/melonDS.icns" PROPERTIES MACOSX_PACKAGE_LOCATION Resources) + +endif() + install(FILES ../../../net.kuribo64.melonDS.desktop DESTINATION ${CMAKE_INSTALL_PREFIX}/share/applications) install(FILES ../../../icon/melon_16x16.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/16x16/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_32x32.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/32x32/apps RENAME net.kuribo64.melonDS.png) @@ -102,4 +115,4 @@ install(FILES ../../../icon/melon_48x48.png DESTINATION ${CMAKE_INSTALL_PREFIX}/ install(FILES ../../../icon/melon_64x64.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/64x64/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_128x128.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/128x128/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_256x256.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/256x256/apps RENAME net.kuribo64.melonDS.png) -install(TARGETS melonDS RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) +install(TARGETS melonDS BUNDLE DESTINATION ${CMAKE_BINARY_DIR} RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) diff --git a/src/frontend/qt_sdl/EmuSettingsDialog.cpp b/src/frontend/qt_sdl/EmuSettingsDialog.cpp index 79ce5ed..3183182 100644 --- a/src/frontend/qt_sdl/EmuSettingsDialog.cpp +++ b/src/frontend/qt_sdl/EmuSettingsDialog.cpp @@ -65,6 +65,9 @@ EmuSettingsDialog::EmuSettingsDialog(QWidget* parent) : QDialog(parent), ui(new ui->chkJITBranchOptimisations->setChecked(Config::JIT_BranchOptimisations != 0); ui->chkJITLiteralOptimisations->setChecked(Config::JIT_LiteralOptimisations != 0); ui->chkJITFastMemory->setChecked(Config::JIT_FastMemory != 0); + #ifdef __APPLE__ + ui->chkJITFastMemory->setDisabled(true); + #endif ui->spnJITMaximumBlockSize->setValue(Config::JIT_MaxBlockSize); #else ui->chkEnableJIT->setDisabled(true); @@ -329,6 +332,8 @@ void EmuSettingsDialog::on_chkEnableJIT_toggled() bool disabled = !ui->chkEnableJIT->isChecked(); ui->chkJITBranchOptimisations->setDisabled(disabled); ui->chkJITLiteralOptimisations->setDisabled(disabled); - ui->chkJITFastMemory->setDisabled(disabled); + #ifndef __APPLE__ + ui->chkJITFastMemory->setDisabled(disabled); + #endif ui->spnJITMaximumBlockSize->setDisabled(disabled); } diff --git a/src/frontend/qt_sdl/InputConfigDialog.cpp b/src/frontend/qt_sdl/InputConfigDialog.cpp index 9f08731..eaf1e9b 100644 --- a/src/frontend/qt_sdl/InputConfigDialog.cpp +++ b/src/frontend/qt_sdl/InputConfigDialog.cpp @@ -216,6 +216,7 @@ KeyMapButton::KeyMapButton(int* mapping, bool hotkey) : QPushButton() setCheckable(true); setText(mappingText()); + setFocusPolicy(Qt::StrongFocus); //Fixes binding keys in macOS connect(this, &KeyMapButton::clicked, this, &KeyMapButton::onClick); } diff --git a/src/frontend/qt_sdl/LAN_PCap.cpp b/src/frontend/qt_sdl/LAN_PCap.cpp index ce278bc..3381e80 100644 --- a/src/frontend/qt_sdl/LAN_PCap.cpp +++ b/src/frontend/qt_sdl/LAN_PCap.cpp @@ -33,7 +33,11 @@ #include <sys/types.h> #include <ifaddrs.h> #include <netinet/in.h> - #include <linux/if_packet.h> + #ifdef __linux__ + #include <linux/if_packet.h> + #else + #include <net/if_dl.h> + #endif #endif @@ -66,6 +70,9 @@ const char* PCapLibNames[] = #ifdef __WIN32__ // TODO: name for npcap in non-WinPCap mode "wpcap.dll", +#elif defined(__APPLE__) + "libpcap.A.dylib", + "libpcap.dylib", #else // Linux lib names "libpcap.so.1", @@ -276,6 +283,7 @@ bool Init(bool open_adapter) struct sockaddr_in* sa = (sockaddr_in*)curaddr->ifa_addr; memcpy(adata->IP_v4, &sa->sin_addr, 4); } + #ifdef __linux__ else if (af == AF_PACKET) { struct sockaddr_ll* sa = (sockaddr_ll*)curaddr->ifa_addr; @@ -284,7 +292,16 @@ bool Init(bool open_adapter) else memcpy(adata->MAC, sa->sll_addr, 6); } - + #else + else if (af == AF_LINK) + { + struct sockaddr_dl* sa = (sockaddr_dl*)curaddr->ifa_addr; + if (sa->sdl_alen != 6) + printf("weird MAC length %d for %s\n", sa->sdl_alen, curaddr->ifa_name); + else + memcpy(adata->MAC, LLADDR(sa), 6); + } + #endif curaddr = curaddr->ifa_next; } } diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp index a716feb..d3480e4 100644 --- a/src/frontend/qt_sdl/Platform.cpp +++ b/src/frontend/qt_sdl/Platform.cpp @@ -23,6 +23,7 @@ #include <QDir> #include <QThread> #include <QSemaphore> +#include <QMutex> #include <QOpenGLContext> #include "Platform.h" @@ -187,53 +188,77 @@ FILE* OpenLocalFile(const char* path, const char* mode) return OpenFile(fullpath.toUtf8(), mode, mode[0] != 'w'); } -void* Thread_Create(void (* func)()) +Thread* Thread_Create(void (* func)()) { QThread* t = QThread::create(func); t->start(); - return (void*) t; + return (Thread*) t; } -void Thread_Free(void* thread) +void Thread_Free(Thread* thread) { QThread* t = (QThread*) thread; t->terminate(); delete t; } -void Thread_Wait(void* thread) +void Thread_Wait(Thread* thread) { ((QThread*) thread)->wait(); } -void* Semaphore_Create() +Semaphore* Semaphore_Create() { - return new QSemaphore(); + return (Semaphore*)new QSemaphore(); } -void Semaphore_Free(void* sema) +void Semaphore_Free(Semaphore* sema) { delete (QSemaphore*) sema; } -void Semaphore_Reset(void* sema) +void Semaphore_Reset(Semaphore* sema) { QSemaphore* s = (QSemaphore*) sema; s->acquire(s->available()); } -void Semaphore_Wait(void* sema) +void Semaphore_Wait(Semaphore* sema) { ((QSemaphore*) sema)->acquire(); } -void Semaphore_Post(void* sema) +void Semaphore_Post(Semaphore* sema, int count) { - ((QSemaphore*) sema)->release(); + ((QSemaphore*) sema)->release(count); } +Mutex* Mutex_Create() +{ + return (Mutex*)new QMutex(); +} + +void Mutex_Free(Mutex* mutex) +{ + delete (QMutex*) mutex; +} + +void Mutex_Lock(Mutex* mutex) +{ + ((QMutex*) mutex)->lock(); +} + +void Mutex_Unlock(Mutex* mutex) +{ + ((QMutex*) mutex)->unlock(); +} + +bool Mutex_TryLock(Mutex* mutex) +{ + return ((QMutex*) mutex)->try_lock(); +} void* GL_GetProcAddress(const char* proc) { diff --git a/src/frontend/qt_sdl/PlatformConfig.cpp b/src/frontend/qt_sdl/PlatformConfig.cpp index c2d40c4..9861662 100644 --- a/src/frontend/qt_sdl/PlatformConfig.cpp +++ b/src/frontend/qt_sdl/PlatformConfig.cpp @@ -120,7 +120,7 @@ ConfigEntry PlatformConfigFile[] = {"HKJoy_Reset", 0, &HKJoyMapping[HK_Reset], -1, NULL, 0}, {"HKJoy_FastForward", 0, &HKJoyMapping[HK_FastForward], -1, NULL, 0}, {"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FastForwardToggle], -1, NULL, 0}, - {"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FullscreenToggle], -1, NULL, 0}, + {"HKJoy_FullscreenToggle", 0, &HKJoyMapping[HK_FullscreenToggle], -1, NULL, 0}, {"HKJoy_SolarSensorDecrease", 0, &HKJoyMapping[HK_SolarSensorDecrease], -1, NULL, 0}, {"HKJoy_SolarSensorIncrease", 0, &HKJoyMapping[HK_SolarSensorIncrease], -1, NULL, 0}, diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.cpp b/src/frontend/qt_sdl/WifiSettingsDialog.cpp index 67297ad..24b339d 100644 --- a/src/frontend/qt_sdl/WifiSettingsDialog.cpp +++ b/src/frontend/qt_sdl/WifiSettingsDialog.cpp @@ -54,7 +54,7 @@ WifiSettingsDialog::WifiSettingsDialog(QWidget* parent) : QDialog(parent), ui(ne LAN_Socket::Init(); haspcap = LAN_PCap::Init(false); - ui->cbDirectMode->setText("Direct mode (requires " PCAP_NAME " and ethernet connection)"); + ui->rbDirectMode->setText("Direct mode (requires " PCAP_NAME " and ethernet connection)"); ui->cbBindAnyAddr->setChecked(Config::SocketBindAnyAddr != 0); ui->cbRandomizeMAC->setChecked(Config::RandomizeMAC != 0); @@ -71,8 +71,9 @@ WifiSettingsDialog::WifiSettingsDialog(QWidget* parent) : QDialog(parent), ui(ne } ui->cbxDirectAdapter->setCurrentIndex(sel); - ui->cbDirectMode->setChecked(Config::DirectLAN != 0); - if (!haspcap) ui->cbDirectMode->setEnabled(false); + ui->rbDirectMode->setChecked(Config::DirectLAN != 0); + ui->rbIndirectMode->setChecked(Config::DirectLAN == 0); + if (!haspcap) ui->rbDirectMode->setEnabled(false); updateAdapterControls(); } @@ -101,7 +102,7 @@ void WifiSettingsDialog::done(int r) Config::SocketBindAnyAddr = ui->cbBindAnyAddr->isChecked() ? 1:0; Config::RandomizeMAC = randommac; - Config::DirectLAN = ui->cbDirectMode->isChecked() ? 1:0; + Config::DirectLAN = ui->rbDirectMode->isChecked() ? 1:0; int sel = ui->cbxDirectAdapter->currentIndex(); if (sel < 0 || sel >= LAN_PCap::NumAdapters) sel = 0; @@ -125,11 +126,14 @@ void WifiSettingsDialog::done(int r) closeDlg(); } -void WifiSettingsDialog::on_cbDirectMode_stateChanged(int state) +void WifiSettingsDialog::on_rbDirectMode_clicked() +{ + updateAdapterControls(); +} +void WifiSettingsDialog::on_rbIndirectMode_clicked() { updateAdapterControls(); } - void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel) { if (!haspcap) return; @@ -153,7 +157,7 @@ void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel) void WifiSettingsDialog::updateAdapterControls() { - bool enable = haspcap && ui->cbDirectMode->isChecked(); + bool enable = haspcap && ui->rbDirectMode->isChecked(); ui->cbxDirectAdapter->setEnabled(enable); ui->lblAdapterMAC->setEnabled(enable); diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.h b/src/frontend/qt_sdl/WifiSettingsDialog.h index 6c1f863..600941f 100644 --- a/src/frontend/qt_sdl/WifiSettingsDialog.h +++ b/src/frontend/qt_sdl/WifiSettingsDialog.h @@ -55,7 +55,8 @@ public: private slots: void done(int r); - void on_cbDirectMode_stateChanged(int state); + void on_rbDirectMode_clicked(); + void on_rbIndirectMode_clicked(); void on_cbxDirectAdapter_currentIndexChanged(int sel); private: diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.ui b/src/frontend/qt_sdl/WifiSettingsDialog.ui index 6668d88..174a3dc 100644 --- a/src/frontend/qt_sdl/WifiSettingsDialog.ui +++ b/src/frontend/qt_sdl/WifiSettingsDialog.ui @@ -6,8 +6,8 @@ <rect> <x>0</x> <y>0</y> - <width>479</width> - <height>240</height> + <width>572</width> + <height>296</height> </rect> </property> <property name="sizePolicy"> @@ -58,67 +58,86 @@ <string>Online</string> </property> <layout class="QGridLayout" name="gridLayout_2"> - <item row="2" column="0"> - <widget class="QLabel" name="label_2"> - <property name="text"> - <string>MAC address:</string> + <item row="3" column="0" rowspan="3" colspan="2"> + <widget class="QGroupBox" name="groupBox_3"> + <property name="title"> + <string>Direct Mode Settings</string> </property> + <layout class="QGridLayout" name="gridLayout_3"> + <item row="0" column="0"> + <widget class="QLabel" name="label"> + <property name="text"> + <string>Network adapter:</string> + </property> + </widget> + </item> + <item row="0" column="1"> + <widget class="QComboBox" name="cbxDirectAdapter"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Expanding" vsizetype="Fixed"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="minimumSize"> + <size> + <width>300</width> + <height>0</height> + </size> + </property> + <property name="whatsThis"> + <string><html><head/><body><p>Selects the network adapter through which to route network traffic under direct mode.</p></body></html></string> + </property> + </widget> + </item> + <item row="1" column="0"> + <widget class="QLabel" name="label_2"> + <property name="text"> + <string>MAC address:</string> + </property> + </widget> + </item> + <item row="1" column="1"> + <widget class="QLabel" name="lblAdapterMAC"> + <property name="text"> + <string>[PLACEHOLDER]</string> + </property> + </widget> + </item> + <item row="2" column="0"> + <widget class="QLabel" name="label_3"> + <property name="text"> + <string>IP address:</string> + </property> + </widget> + </item> + <item row="2" column="1"> + <widget class="QLabel" name="lblAdapterIP"> + <property name="text"> + <string>[PLACEHOLDER]</string> + </property> + </widget> + </item> + </layout> </widget> </item> - <item row="0" column="0" colspan="2"> - <widget class="QCheckBox" name="cbDirectMode"> + <item row="1" column="0"> + <widget class="QRadioButton" name="rbIndirectMode"> <property name="whatsThis"> - <string><html><head/><body><p>Direct mode directly routes network traffic to the host network. It is the most reliable, but requires an ethernet connection.</p><p><br/></p><p>Non-direct mode uses a layer of emulation to get around this, but is more prone to problems.</p></body></html></string> + <string><html><head/><body><p>Indirect mode uses libslirp. It requires no extra setup and is easy to use.</p></body></html></string> </property> <property name="text"> - <string>Direct mode [TEXT PLACEHOLDER]</string> + <string>Indirect Mode (uses libslirp, recommended)</string> </property> </widget> </item> - <item row="1" column="1"> - <widget class="QComboBox" name="cbxDirectAdapter"> - <property name="sizePolicy"> - <sizepolicy hsizetype="Expanding" vsizetype="Fixed"> - <horstretch>0</horstretch> - <verstretch>0</verstretch> - </sizepolicy> - </property> - <property name="minimumSize"> - <size> - <width>350</width> - <height>0</height> - </size> - </property> + <item row="2" column="0"> + <widget class="QRadioButton" name="rbDirectMode"> <property name="whatsThis"> - <string><html><head/><body><p>Selects the network adapter through which to route network traffic under direct mode.</p></body></html></string> - </property> - </widget> - </item> - <item row="1" column="0"> - <widget class="QLabel" name="label"> - <property name="text"> - <string>Network adapter:</string> - </property> - </widget> - </item> - <item row="3" column="0"> - <widget class="QLabel" name="label_3"> - <property name="text"> - <string>IP address:</string> - </property> - </widget> - </item> - <item row="2" column="1"> - <widget class="QLabel" name="lblAdapterMAC"> - <property name="text"> - <string>[PLACEHOLDER]</string> + <string><html><head/><body><p>Direct mode directly routes network traffic to the host network. It is the most reliable, but requires an ethernet connection.</p><p><br/></p><p>Non-direct mode uses a layer of emulation to get around this, but is more prone to problems.</p></body></html></string> </property> - </widget> - </item> - <item row="3" column="1"> - <widget class="QLabel" name="lblAdapterIP"> <property name="text"> - <string>[PLACEHOLDER]</string> + <string>Direct mode [TEXT PLACEHOLDER]</string> </property> </widget> </item> diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 3a735fb..2d3749d 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -355,10 +355,10 @@ void EmuThread::run() Input::Init(); u32 nframes = 0; - u32 starttick = SDL_GetTicks(); - u32 lasttick = starttick; - u32 lastmeasuretick = lasttick; - u32 fpslimitcount = 0; + double perfCountsSec = 1.0 / SDL_GetPerformanceFrequency(); + double lastTime = SDL_GetPerformanceCounter() * perfCountsSec; + double frameLimitError = 0.0; + double lastMeasureTime = lastTime; char melontitle[100]; @@ -492,49 +492,43 @@ void EmuThread::run() SDL_UnlockMutex(audioSyncLock); } - float framerate = (1000.0f * nlines) / (60.0f * 263.0f); + double frametimeStep = nlines / (60.0 * 263.0); { - u32 curtick = SDL_GetTicks(); - u32 delay = curtick - lasttick; - bool limitfps = Config::LimitFPS && !fastforward; - if (limitfps) - { - float wantedtickF = starttick + (framerate * (fpslimitcount+1)); - u32 wantedtick = (u32)ceil(wantedtickF); - if (curtick < wantedtick) SDL_Delay(wantedtick - curtick); - lasttick = SDL_GetTicks(); - fpslimitcount++; - if ((abs(wantedtickF - (float)wantedtick) < 0.001312) || (fpslimitcount > 60)) - { - fpslimitcount = 0; - starttick = lasttick; - } - } - else + double practicalFramelimit = limitfps ? frametimeStep : 1.0 / 1000.0; + + double curtime = SDL_GetPerformanceCounter() * perfCountsSec; + + frameLimitError += practicalFramelimit - (curtime - lastTime); + if (frameLimitError < -practicalFramelimit) + frameLimitError = -practicalFramelimit; + if (frameLimitError > practicalFramelimit) + frameLimitError = practicalFramelimit; + + if (round(frameLimitError * 1000.0) > 0.0) { - if (delay < 1) SDL_Delay(1); - lasttick = SDL_GetTicks(); + SDL_Delay(round(frameLimitError * 1000.0)); + double timeBeforeSleep = curtime; + curtime = SDL_GetPerformanceCounter() * perfCountsSec; + frameLimitError -= curtime - timeBeforeSleep; } + + lastTime = curtime; } nframes++; if (nframes >= 30) { - u32 tick = SDL_GetTicks(); - u32 diff = tick - lastmeasuretick; - lastmeasuretick = tick; + double time = SDL_GetPerformanceCounter() * perfCountsSec; + double dt = time - lastMeasureTime; + lastMeasureTime = time; - u32 fps; - if (diff < 1) fps = 77777; - else fps = (nframes * 1000) / diff; + u32 fps = round(nframes / dt); nframes = 0; - float fpstarget; - if (framerate < 1) fpstarget = 999; - else fpstarget = 1000.0f/framerate; + float fpstarget = 1.0/frametimeStep; sprintf(melontitle, "[%d/%.0f] melonDS " MELONDS_VERSION, fps, fpstarget); changeWindowTitle(melontitle); @@ -544,10 +538,8 @@ void EmuThread::run() { // paused nframes = 0; - lasttick = SDL_GetTicks(); - starttick = lasttick; - lastmeasuretick = lasttick; - fpslimitcount = 0; + lastTime = SDL_GetPerformanceCounter() * perfCountsSec; + lastMeasureTime = lastTime; emit windowUpdate(); @@ -1354,7 +1346,7 @@ void MainWindow::dragEnterEvent(QDragEnterEvent* event) QString filename = urls.at(0).toLocalFile(); QString ext = filename.right(3); - if (ext == "nds" || ext == "srl" || ext == "dsi" || (ext == "gba" && RunningSomething)) + if (ext == "nds" || ext == "srl" || ext == "dsi" || ext == "gba") event->acceptProposedAction(); } |