From ad7791f72661eece13e81dfd1323793293054c5e Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 2 Nov 2020 18:48:32 +0100 Subject: better framelimiter for reference: https://github.com/citra-emu/citra/blob/master/src/core/perf_stats.cpp#L129 --- src/frontend/qt_sdl/main.cpp | 60 +++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 34 deletions(-) (limited to 'src/frontend/qt_sdl') diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 3a735fb..cfdf03d 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -355,10 +355,10 @@ void EmuThread::run() Input::Init(); u32 nframes = 0; - u32 starttick = SDL_GetTicks(); - u32 lasttick = starttick; - u32 lastmeasuretick = lasttick; - u32 fpslimitcount = 0; + double perfCountsSec = 1.0 / SDL_GetPerformanceFrequency(); + double lastTime = SDL_GetPerformanceCounter() * perfCountsSec; + double frameLimitError = 0.0; + double lastMeasureTime = lastTime; char melontitle[100]; @@ -492,49 +492,43 @@ void EmuThread::run() SDL_UnlockMutex(audioSyncLock); } - float framerate = (1000.0f * nlines) / (60.0f * 263.0f); + double frametimeStep = nlines / (60.0 * 263.0); { - u32 curtick = SDL_GetTicks(); - u32 delay = curtick - lasttick; + double curtime = SDL_GetPerformanceCounter() * perfCountsSec; bool limitfps = Config::LimitFPS && !fastforward; if (limitfps) { - float wantedtickF = starttick + (framerate * (fpslimitcount+1)); - u32 wantedtick = (u32)ceil(wantedtickF); - if (curtick < wantedtick) SDL_Delay(wantedtick - curtick); - - lasttick = SDL_GetTicks(); - fpslimitcount++; - if ((abs(wantedtickF - (float)wantedtick) < 0.001312) || (fpslimitcount > 60)) + frameLimitError += frametimeStep - (curtime - lastTime); + if (frameLimitError < -frametimeStep) + frameLimitError = -frametimeStep; + if (frameLimitError > frametimeStep) + frameLimitError = frametimeStep; + + if (round(frameLimitError * 1000.0) > 0.0) { - fpslimitcount = 0; - starttick = lasttick; + SDL_Delay(round(frameLimitError * 1000.0)); + double timeBeforeSleep = curtime; + curtime = SDL_GetPerformanceCounter() * perfCountsSec; + frameLimitError -= curtime - timeBeforeSleep; } } - else - { - if (delay < 1) SDL_Delay(1); - lasttick = SDL_GetTicks(); - } + + lastTime = curtime; } nframes++; if (nframes >= 30) { - u32 tick = SDL_GetTicks(); - u32 diff = tick - lastmeasuretick; - lastmeasuretick = tick; + double time = SDL_GetPerformanceCounter() * perfCountsSec; + double dt = time - lastMeasureTime; + lastMeasureTime = time; - u32 fps; - if (diff < 1) fps = 77777; - else fps = (nframes * 1000) / diff; + u32 fps = round(nframes / dt); nframes = 0; - float fpstarget; - if (framerate < 1) fpstarget = 999; - else fpstarget = 1000.0f/framerate; + float fpstarget = 1.0/frametimeStep; sprintf(melontitle, "[%d/%.0f] melonDS " MELONDS_VERSION, fps, fpstarget); changeWindowTitle(melontitle); @@ -544,10 +538,8 @@ void EmuThread::run() { // paused nframes = 0; - lasttick = SDL_GetTicks(); - starttick = lasttick; - lastmeasuretick = lasttick; - fpslimitcount = 0; + lastTime = SDL_GetPerformanceCounter() * perfCountsSec; + lastMeasureTime = lastTime; emit windowUpdate(); -- cgit v1.2.3 From 2720df965025b75a77656db523606dadbcbb6067 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Tue, 3 Nov 2020 19:40:14 +0100 Subject: make platform objects typesafer and add mutex --- src/GPU3D_Soft.cpp | 8 +++---- src/Platform.h | 27 ++++++++++++++++-------- src/frontend/qt_sdl/Platform.cpp | 45 +++++++++++++++++++++++++++++++--------- 3 files changed, 57 insertions(+), 23 deletions(-) (limited to 'src/frontend/qt_sdl') diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index e9d8e75..7ee9e5d 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -61,12 +61,12 @@ bool Enabled; // threading bool Threaded; -void* RenderThread; +Platform::Thread* RenderThread; bool RenderThreadRunning; bool RenderThreadRendering; -void* Sema_RenderStart; -void* Sema_RenderDone; -void* Sema_ScanlineCount; +Platform::Semaphore* Sema_RenderStart; +Platform::Semaphore* Sema_RenderDone; +Platform::Semaphore* Sema_ScanlineCount; void RenderThreadFunc(); diff --git a/src/Platform.h b/src/Platform.h index fea98dd..deb3785 100644 --- a/src/Platform.h +++ b/src/Platform.h @@ -67,15 +67,24 @@ inline bool LocalFileExists(const char* name) return true; } -void* Thread_Create(void (*func)()); -void Thread_Free(void* thread); -void Thread_Wait(void* thread); - -void* Semaphore_Create(); -void Semaphore_Free(void* sema); -void Semaphore_Reset(void* sema); -void Semaphore_Wait(void* sema); -void Semaphore_Post(void* sema); +struct Thread; +Thread* Thread_Create(void (*func)()); +void Thread_Free(Thread* thread); +void Thread_Wait(Thread* thread); + +struct Semaphore; +Semaphore* Semaphore_Create(); +void Semaphore_Free(Semaphore* sema); +void Semaphore_Reset(Semaphore* sema); +void Semaphore_Wait(Semaphore* sema); +void Semaphore_Post(Semaphore* sema); + +struct Mutex; +Mutex* Mutex_Create(); +void Mutex_Free(Mutex* mutex); +void Mutex_Lock(Mutex* mutex); +void Mutex_Unlock(Mutex* mutex); +bool Mutex_TryLock(Mutex* mutex); void* GL_GetProcAddress(const char* proc); diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp index a716feb..a51a985 100644 --- a/src/frontend/qt_sdl/Platform.cpp +++ b/src/frontend/qt_sdl/Platform.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "Platform.h" @@ -187,53 +188,77 @@ FILE* OpenLocalFile(const char* path, const char* mode) return OpenFile(fullpath.toUtf8(), mode, mode[0] != 'w'); } -void* Thread_Create(void (* func)()) +Thread* Thread_Create(void (* func)()) { QThread* t = QThread::create(func); t->start(); - return (void*) t; + return (Thread*) t; } -void Thread_Free(void* thread) +void Thread_Free(Thread* thread) { QThread* t = (QThread*) thread; t->terminate(); delete t; } -void Thread_Wait(void* thread) +void Thread_Wait(Thread* thread) { ((QThread*) thread)->wait(); } -void* Semaphore_Create() +Semaphore* Semaphore_Create() { - return new QSemaphore(); + return (Semaphore*)new QSemaphore(); } -void Semaphore_Free(void* sema) +void Semaphore_Free(Semaphore* sema) { delete (QSemaphore*) sema; } -void Semaphore_Reset(void* sema) +void Semaphore_Reset(Semaphore* sema) { QSemaphore* s = (QSemaphore*) sema; s->acquire(s->available()); } -void Semaphore_Wait(void* sema) +void Semaphore_Wait(Semaphore* sema) { ((QSemaphore*) sema)->acquire(); } -void Semaphore_Post(void* sema) +void Semaphore_Post(Semaphore* sema) { ((QSemaphore*) sema)->release(); } +Mutex* Mutex_Create() +{ + return (Mutex*)new QMutex(); +} + +void Mutex_Free(Mutex* mutex) +{ + delete (QMutex*) mutex; +} + +void Mutex_Lock(Mutex* mutex) +{ + ((QMutex*) mutex)->lock(); +} + +void Mutex_Unlock(Mutex* mutex) +{ + ((QMutex*) mutex)->unlock(); +} + +bool Mutex_TryLock(Mutex* mutex) +{ + return ((QMutex*) mutex)->try_lock(); +} void* GL_GetProcAddress(const char* proc) { -- cgit v1.2.3 From 550241dbad5527fbf0e622193f8a0ef943c35557 Mon Sep 17 00:00:00 2001 From: Raphaƫl Zumer Date: Sun, 15 Nov 2020 15:15:09 +0000 Subject: Fix GBA file drag-and-drop when the system is off (#817) --- src/frontend/qt_sdl/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/frontend/qt_sdl') diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index cfdf03d..6fcd8ce 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -1346,7 +1346,7 @@ void MainWindow::dragEnterEvent(QDragEnterEvent* event) QString filename = urls.at(0).toLocalFile(); QString ext = filename.right(3); - if (ext == "nds" || ext == "srl" || ext == "dsi" || (ext == "gba" && RunningSomething)) + if (ext == "nds" || ext == "srl" || ext == "dsi" || ext == "gba") event->acceptProposedAction(); } -- cgit v1.2.3 From a1cf1967acb72861fefa3fc4ef14f9f2fb9db5d0 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Sun, 22 Nov 2020 12:00:18 +0000 Subject: Fix fullscreen toggle with joysticks (#821) --- src/frontend/qt_sdl/PlatformConfig.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/frontend/qt_sdl') diff --git a/src/frontend/qt_sdl/PlatformConfig.cpp b/src/frontend/qt_sdl/PlatformConfig.cpp index c2d40c4..9861662 100644 --- a/src/frontend/qt_sdl/PlatformConfig.cpp +++ b/src/frontend/qt_sdl/PlatformConfig.cpp @@ -120,7 +120,7 @@ ConfigEntry PlatformConfigFile[] = {"HKJoy_Reset", 0, &HKJoyMapping[HK_Reset], -1, NULL, 0}, {"HKJoy_FastForward", 0, &HKJoyMapping[HK_FastForward], -1, NULL, 0}, {"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FastForwardToggle], -1, NULL, 0}, - {"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FullscreenToggle], -1, NULL, 0}, + {"HKJoy_FullscreenToggle", 0, &HKJoyMapping[HK_FullscreenToggle], -1, NULL, 0}, {"HKJoy_SolarSensorDecrease", 0, &HKJoyMapping[HK_SolarSensorDecrease], -1, NULL, 0}, {"HKJoy_SolarSensorIncrease", 0, &HKJoyMapping[HK_SolarSensorIncrease], -1, NULL, 0}, -- cgit v1.2.3 From f11d53c69c6c2be0363d66b7ab0d941dd4564d65 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Sun, 22 Nov 2020 14:31:29 +0000 Subject: Add radio buttons to switch between Direct and Indirect Mode (#822) --- src/frontend/qt_sdl/WifiSettingsDialog.cpp | 18 +++-- src/frontend/qt_sdl/WifiSettingsDialog.h | 3 +- src/frontend/qt_sdl/WifiSettingsDialog.ui | 121 +++++++++++++++++------------ 3 files changed, 83 insertions(+), 59 deletions(-) (limited to 'src/frontend/qt_sdl') diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.cpp b/src/frontend/qt_sdl/WifiSettingsDialog.cpp index 67297ad..24b339d 100644 --- a/src/frontend/qt_sdl/WifiSettingsDialog.cpp +++ b/src/frontend/qt_sdl/WifiSettingsDialog.cpp @@ -54,7 +54,7 @@ WifiSettingsDialog::WifiSettingsDialog(QWidget* parent) : QDialog(parent), ui(ne LAN_Socket::Init(); haspcap = LAN_PCap::Init(false); - ui->cbDirectMode->setText("Direct mode (requires " PCAP_NAME " and ethernet connection)"); + ui->rbDirectMode->setText("Direct mode (requires " PCAP_NAME " and ethernet connection)"); ui->cbBindAnyAddr->setChecked(Config::SocketBindAnyAddr != 0); ui->cbRandomizeMAC->setChecked(Config::RandomizeMAC != 0); @@ -71,8 +71,9 @@ WifiSettingsDialog::WifiSettingsDialog(QWidget* parent) : QDialog(parent), ui(ne } ui->cbxDirectAdapter->setCurrentIndex(sel); - ui->cbDirectMode->setChecked(Config::DirectLAN != 0); - if (!haspcap) ui->cbDirectMode->setEnabled(false); + ui->rbDirectMode->setChecked(Config::DirectLAN != 0); + ui->rbIndirectMode->setChecked(Config::DirectLAN == 0); + if (!haspcap) ui->rbDirectMode->setEnabled(false); updateAdapterControls(); } @@ -101,7 +102,7 @@ void WifiSettingsDialog::done(int r) Config::SocketBindAnyAddr = ui->cbBindAnyAddr->isChecked() ? 1:0; Config::RandomizeMAC = randommac; - Config::DirectLAN = ui->cbDirectMode->isChecked() ? 1:0; + Config::DirectLAN = ui->rbDirectMode->isChecked() ? 1:0; int sel = ui->cbxDirectAdapter->currentIndex(); if (sel < 0 || sel >= LAN_PCap::NumAdapters) sel = 0; @@ -125,11 +126,14 @@ void WifiSettingsDialog::done(int r) closeDlg(); } -void WifiSettingsDialog::on_cbDirectMode_stateChanged(int state) +void WifiSettingsDialog::on_rbDirectMode_clicked() +{ + updateAdapterControls(); +} +void WifiSettingsDialog::on_rbIndirectMode_clicked() { updateAdapterControls(); } - void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel) { if (!haspcap) return; @@ -153,7 +157,7 @@ void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel) void WifiSettingsDialog::updateAdapterControls() { - bool enable = haspcap && ui->cbDirectMode->isChecked(); + bool enable = haspcap && ui->rbDirectMode->isChecked(); ui->cbxDirectAdapter->setEnabled(enable); ui->lblAdapterMAC->setEnabled(enable); diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.h b/src/frontend/qt_sdl/WifiSettingsDialog.h index 6c1f863..600941f 100644 --- a/src/frontend/qt_sdl/WifiSettingsDialog.h +++ b/src/frontend/qt_sdl/WifiSettingsDialog.h @@ -55,7 +55,8 @@ public: private slots: void done(int r); - void on_cbDirectMode_stateChanged(int state); + void on_rbDirectMode_clicked(); + void on_rbIndirectMode_clicked(); void on_cbxDirectAdapter_currentIndexChanged(int sel); private: diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.ui b/src/frontend/qt_sdl/WifiSettingsDialog.ui index 6668d88..174a3dc 100644 --- a/src/frontend/qt_sdl/WifiSettingsDialog.ui +++ b/src/frontend/qt_sdl/WifiSettingsDialog.ui @@ -6,8 +6,8 @@ 0 0 - 479 - 240 + 572 + 296 @@ -58,67 +58,86 @@ Online - - - - MAC address: + + + + Direct Mode Settings + + + + + Network adapter: + + + + + + + + 0 + 0 + + + + + 300 + 0 + + + + <html><head/><body><p>Selects the network adapter through which to route network traffic under direct mode.</p></body></html> + + + + + + + MAC address: + + + + + + + [PLACEHOLDER] + + + + + + + IP address: + + + + + + + [PLACEHOLDER] + + + + - - + + - <html><head/><body><p>Direct mode directly routes network traffic to the host network. It is the most reliable, but requires an ethernet connection.</p><p><br/></p><p>Non-direct mode uses a layer of emulation to get around this, but is more prone to problems.</p></body></html> + <html><head/><body><p>Indirect mode uses libslirp. It requires no extra setup and is easy to use.</p></body></html> - Direct mode [TEXT PLACEHOLDER] + Indirect Mode (uses libslirp, recommended) - - - - - 0 - 0 - - - - - 350 - 0 - - + + - <html><head/><body><p>Selects the network adapter through which to route network traffic under direct mode.</p></body></html> - - - - - - - Network adapter: - - - - - - - IP address: - - - - - - - [PLACEHOLDER] + <html><head/><body><p>Direct mode directly routes network traffic to the host network. It is the most reliable, but requires an ethernet connection.</p><p><br/></p><p>Non-direct mode uses a layer of emulation to get around this, but is more prone to problems.</p></body></html> - - - - - [PLACEHOLDER] + Direct mode [TEXT PLACEHOLDER] -- cgit v1.2.3 From 7da4550eea43a5ec83d1afc88022aaa62827febc Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Sun, 29 Nov 2020 08:11:33 -0800 Subject: Add support for macOS (#771) * use shm_open() instead of memfd_create() on macOS malloc.h isn't a header on macOS * Change OpenGL headers + create ifdef for DO_PROCLIST macOS seems to already have the OpenGL functions defined, without the ifdef, it gives "ambiguous references" errors. * macOS doesn't have ->gregs in uc_mcontext and it doesn't have REG_RIP either https://github.com/gperftools/gperftools/blob/master/m4/pc_from_ucontext.m4 * use getpid() to make memory file name unique * #ifndef __APPLE__ for AF_PACKET and linux/if_packet.h * Add include and link directories for macOS and link the OpenGL framework * Add macOS CI * Use newly added libslirp package from Homebrew https://github.com/Homebrew/homebrew-core/pull/63412 * Use Apple's Clang instead of GNU GCC on macOS * Add macOS build instructions to README * Try to fix macOS undefined symbol * snprintf doesn't take null terminator into account * Map new memory on macOS for JIT * Only use gcc-ar if using GNU Compiler * re-add fastmem code - whoops! * Fix style issue - use camelCase not snake_case * Set Minimum macOS version * Switch Minimum OS X version to 10.9 * Add macOS libpcap library name * fix memory leak * Fix binding keys in macOS * Allow getting MAC address on macOS melonDS on Linux uses AF_PACKET, which doesn't exist on macOS. Instead, this commit uses AF_LINK on macOS to get the MAC address. * Remove unneeded macOS CI dependencies * Build melonDS app bundle on macOS Now it is no longer required to install the libraries on macOS, they come with the app bundle. * fix macOS CI not being able to find macdeployqt * copy melonDS.app with recursive because it's a folder * Disable fastmem checkbox on macOS * Disable fastmem by default in config * forgot a semicolon * Don't bundle libraries, causes issues on macOS <10.15 * Update README + allow finding version in Finder on macOS * Make sure fastmem checkbox stays uncheckable --- .github/workflows/build-macos.yml | 39 ++++++++++++++++++++++++++++++ CMakeLists.txt | 4 ++- README.md | 15 ++++++++++++ melonDS.icns | Bin 0 -> 401172 bytes melonDS.plist | 24 ++++++++++++++++++ src/ARMJIT_Memory.cpp | 31 +++++++++++++++++++++--- src/ARMJIT_x64/ARMJIT_Compiler.cpp | 2 ++ src/ARMJIT_x64/ARMJIT_Linkage.s | 10 ++++++++ src/CMakeLists.txt | 9 ++++++- src/Config.cpp | 6 ++++- src/OpenGLSupport.h | 15 ++++++++++-- src/frontend/qt_sdl/CMakeLists.txt | 15 +++++++++++- src/frontend/qt_sdl/EmuSettingsDialog.cpp | 7 +++++- src/frontend/qt_sdl/InputConfigDialog.cpp | 1 + src/frontend/qt_sdl/LAN_PCap.cpp | 21 ++++++++++++++-- 15 files changed, 186 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/build-macos.yml create mode 100644 melonDS.icns create mode 100644 melonDS.plist (limited to 'src/frontend/qt_sdl') diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml new file mode 100644 index 0000000..e2b942a --- /dev/null +++ b/.github/workflows/build-macos.yml @@ -0,0 +1,39 @@ +name: CMake Build (macOS x86-64) + +on: + push: + branches: + - master + pull_request: + branches: + - master + +env: + BUILD_TYPE: Release + +jobs: + build: + + runs-on: macos-latest + + steps: + - uses: actions/checkout@v1 + - name: Install dependencies + working-directory: ${{runner.workspace}} + run: | + brew install cmake sdl2 qt5 libslirp + - name: Create build environment + run: mkdir ${{runner.workspace}}/build + - name: Configure + working-directory: ${{runner.workspace}}/build + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DQt5_DIR=$(brew --prefix qt5)/lib/cmake/Qt5 + - name: Make + working-directory: ${{runner.workspace}}/build + run: | + make -j$(sysctl -n hw.ncpu) + mkdir dist + cp -r melonDS.app dist + - uses: actions/upload-artifact@v1 + with: + name: melonDS.app + path: ${{runner.workspace}}/build/dist diff --git a/CMakeLists.txt b/CMakeLists.txt index 04ad2a5..fcd4741 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,8 @@ if (POLICY CMP0076) cmake_policy(SET CMP0076 NEW) endif() +set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version") + project(melonDS CXX) set(CMAKE_C_STANDARD 11) @@ -100,4 +102,4 @@ add_subdirectory(src) if (BUILD_QT_SDL) add_subdirectory(src/frontend/qt_sdl) -endif() +endif() \ No newline at end of file diff --git a/README.md b/README.md index 2fb2315..64489db 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,21 @@ If everything went well, melonDS and the libraries it needs should now be in the ``` If everything went well, melonDS should now be in the `dist` folder. +### macOS: +1. Install the [Homebrew Package Manager](https://brew.sh) +2. Install dependencies: `brew install git pkg-config cmake sdl2 qt5 libslirp` +3. Compile: + ```zsh + git clone https://github.com/Arisotura/melonDS.git + cd melonDS + mkdir build && cd build + cmake .. -DQt5_DIR=$(brew --prefix qt5)/lib/cmake/Qt5 + make -j$(sysctl -n hw.ncpu) + mkdir dist && cp -r melonDS.app dist + ``` +If everything went well, melonDS.app should now be in the `dist` folder. + + ## TODO LIST * DSi emulation diff --git a/melonDS.icns b/melonDS.icns new file mode 100644 index 0000000..b4f3733 Binary files /dev/null and b/melonDS.icns differ diff --git a/melonDS.plist b/melonDS.plist new file mode 100644 index 0000000..1328777 --- /dev/null +++ b/melonDS.plist @@ -0,0 +1,24 @@ + + + + + CFBundleExecutable + melonDS + CFBundleIconFile + melonDS.icns + CFBundleIdentifier + net.kuribo64.melonDS + CFBundleDevelopmentRegion + English + CFBundlePackageType + APPL + CFBundleVersion + 0.9 + CFBundleShortVersionString + 0.9 + NSHumanReadableCopyright + Licensed under GPLv3 + NSHighResolutionCapable + + + \ No newline at end of file diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index 70d18e6..cc0f149 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -22,7 +22,9 @@ #include "NDSCart.h" #include "SPU.h" +#ifndef __APPLE__ #include +#endif /* We're handling fastmem here. @@ -152,7 +154,12 @@ static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); #ifdef __x86_64__ desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; - desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP]; + #ifdef __APPLE__ + desc.FaultPC = (u8*)context->uc_mcontext->__ss.__rip; + #else + desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP]; + #endif + #else desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea; desc.FaultPC = (u8*)context->uc_mcontext.pc; @@ -161,7 +168,11 @@ static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) if (ARMJIT_Memory::FaultHandler(desc)) { #ifdef __x86_64__ - context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC; + #ifdef __APPLE__ + context->uc_mcontext->__ss.__rip = (u64)desc.FaultPC; + #else + context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC; + #endif #else context->uc_mcontext.pc = (u64)desc.FaultPC; #endif @@ -701,7 +712,14 @@ void Init() FastMem7Start = MemoryBase + AddrSpaceSize; MemoryBase = MemoryBase + AddrSpaceSize*2; - MemoryFile = memfd_create("melondsfastmem", 0); + #ifdef __APPLE__ + char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1]; + sprintf(fastmemPidName, "melondsfastmem%d", getpid()); + MemoryFile = shm_open(fastmemPidName, O_RDWR|O_CREAT, 0600); + delete[] fastmemPidName; + #else + MemoryFile = memfd_create("melondsfastmem", 0); + #endif ftruncate(MemoryFile, MemoryTotalSize); struct sigaction sa; @@ -736,6 +754,11 @@ void DeInit() svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize); virtmemFree(MemoryBaseCodeMem, MemoryTotalSize); free(MemoryBase); +#elif defined(__APPLE__) + char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1]; + sprintf(fastmemPidName, "melondsfastmem%d", getpid()); + shm_unlink(fastmemPidName); + delete[] fastmemPidName; #elif defined(_WIN32) assert(UnmapViewOfFile(MemoryBase)); CloseHandle(MemoryFile); @@ -1259,4 +1282,4 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) return NULL; } -} \ No newline at end of file +} diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 076f48c..cc4ad80 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -221,6 +221,8 @@ Compiler::Compiler() #ifdef _WIN32 DWORD dummy; VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy); + #elif defined(__APPLE__) + pageAligned = (u8*)mmap(NULL, 1024*1024*32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0); #else mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE); #endif diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.s b/src/ARMJIT_x64/ARMJIT_Linkage.s index 0a84df0..8cc0b5f 100644 --- a/src/ARMJIT_x64/ARMJIT_Linkage.s +++ b/src/ARMJIT_x64/ARMJIT_Linkage.s @@ -29,8 +29,13 @@ .p2align 4,,15 +#ifdef __APPLE__ +.global _ARM_Dispatch +_ARM_Dispatch: +#else .global ARM_Dispatch ARM_Dispatch: +#endif #ifdef WIN64 push rdi push rsi @@ -54,8 +59,13 @@ ARM_Dispatch: .p2align 4,,15 +#ifdef __APPLE__ +.global _ARM_Ret +_ARM_Ret: +#else .global ARM_Ret ARM_Ret: +#endif mov [RCPU + ARM_CPSR_offset], RCPSR #ifdef WIN64 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d6c3897..446480d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -100,10 +100,17 @@ if (ENABLE_JIT) endif() endif() +if (APPLE) + target_include_directories(core PUBLIC /usr/local/include) + target_link_directories(core PUBLIC /usr/local/lib) +endif() + if (ENABLE_OGLRENDERER) if (WIN32) target_link_libraries(core ole32 comctl32 ws2_32 opengl32) - else() + elseif (APPLE) + target_link_libraries(core "-framework OpenGL") + else() target_link_libraries(core GL EGL) endif() else() diff --git a/src/Config.cpp b/src/Config.cpp index 341b14c..f7db252 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -73,7 +73,11 @@ ConfigEntry ConfigFile[] = {"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0}, {"JIT_BranchOptimisations", 0, &JIT_BranchOptimisations, 1, NULL, 0}, {"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0}, - {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0}, + #ifdef __APPLE__ + {"JIT_FastMemory", 0, &JIT_FastMemory, 0, NULL, 0}, + #else + {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0}, + #endif #endif {"", -1, NULL, 0, NULL, 0} diff --git a/src/OpenGLSupport.h b/src/OpenGLSupport.h index 925c0ad..44c511f 100644 --- a/src/OpenGLSupport.h +++ b/src/OpenGLSupport.h @@ -23,8 +23,13 @@ #include // TODO: different includes for each platform -#include -#include +#ifdef __APPLE__ + #include + #include +#else + #include + #include +#endif #include "Platform.h" @@ -61,6 +66,11 @@ #endif +#ifdef __APPLE__ + +#define DO_PROCLIST(func) + +#else #define DO_PROCLIST(func) \ DO_PROCLIST_1_3(func) \ @@ -128,6 +138,7 @@ \ func(GLGETSTRINGI, glGetStringi); \ +#endif namespace OpenGL { diff --git a/src/frontend/qt_sdl/CMakeLists.txt b/src/frontend/qt_sdl/CMakeLists.txt index 9a0a025..0d695d6 100644 --- a/src/frontend/qt_sdl/CMakeLists.txt +++ b/src/frontend/qt_sdl/CMakeLists.txt @@ -95,6 +95,19 @@ if (PORTABLE) add_definitions(-DPORTABLE) endif() +if (APPLE) + set_target_properties(melonDS PROPERTIES + MACOSX_BUNDLE true + MACOSX_BUNDLE_INFO_PLIST ${CMAKE_SOURCE_DIR}/melonDS.plist + OUTPUT_NAME melonDS + ) + + # Copy icon into the bundle + target_sources(melonDS PRIVATE "${CMAKE_SOURCE_DIR}/melonDS.icns") + set_source_files_properties("${CMAKE_SOURCE_DIR}/melonDS.icns" PROPERTIES MACOSX_PACKAGE_LOCATION Resources) + +endif() + install(FILES ../../../net.kuribo64.melonDS.desktop DESTINATION ${CMAKE_INSTALL_PREFIX}/share/applications) install(FILES ../../../icon/melon_16x16.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/16x16/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_32x32.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/32x32/apps RENAME net.kuribo64.melonDS.png) @@ -102,4 +115,4 @@ install(FILES ../../../icon/melon_48x48.png DESTINATION ${CMAKE_INSTALL_PREFIX}/ install(FILES ../../../icon/melon_64x64.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/64x64/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_128x128.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/128x128/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_256x256.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/256x256/apps RENAME net.kuribo64.melonDS.png) -install(TARGETS melonDS RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) +install(TARGETS melonDS BUNDLE DESTINATION ${CMAKE_BINARY_DIR} RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) diff --git a/src/frontend/qt_sdl/EmuSettingsDialog.cpp b/src/frontend/qt_sdl/EmuSettingsDialog.cpp index 79ce5ed..3183182 100644 --- a/src/frontend/qt_sdl/EmuSettingsDialog.cpp +++ b/src/frontend/qt_sdl/EmuSettingsDialog.cpp @@ -65,6 +65,9 @@ EmuSettingsDialog::EmuSettingsDialog(QWidget* parent) : QDialog(parent), ui(new ui->chkJITBranchOptimisations->setChecked(Config::JIT_BranchOptimisations != 0); ui->chkJITLiteralOptimisations->setChecked(Config::JIT_LiteralOptimisations != 0); ui->chkJITFastMemory->setChecked(Config::JIT_FastMemory != 0); + #ifdef __APPLE__ + ui->chkJITFastMemory->setDisabled(true); + #endif ui->spnJITMaximumBlockSize->setValue(Config::JIT_MaxBlockSize); #else ui->chkEnableJIT->setDisabled(true); @@ -329,6 +332,8 @@ void EmuSettingsDialog::on_chkEnableJIT_toggled() bool disabled = !ui->chkEnableJIT->isChecked(); ui->chkJITBranchOptimisations->setDisabled(disabled); ui->chkJITLiteralOptimisations->setDisabled(disabled); - ui->chkJITFastMemory->setDisabled(disabled); + #ifndef __APPLE__ + ui->chkJITFastMemory->setDisabled(disabled); + #endif ui->spnJITMaximumBlockSize->setDisabled(disabled); } diff --git a/src/frontend/qt_sdl/InputConfigDialog.cpp b/src/frontend/qt_sdl/InputConfigDialog.cpp index 9f08731..eaf1e9b 100644 --- a/src/frontend/qt_sdl/InputConfigDialog.cpp +++ b/src/frontend/qt_sdl/InputConfigDialog.cpp @@ -216,6 +216,7 @@ KeyMapButton::KeyMapButton(int* mapping, bool hotkey) : QPushButton() setCheckable(true); setText(mappingText()); + setFocusPolicy(Qt::StrongFocus); //Fixes binding keys in macOS connect(this, &KeyMapButton::clicked, this, &KeyMapButton::onClick); } diff --git a/src/frontend/qt_sdl/LAN_PCap.cpp b/src/frontend/qt_sdl/LAN_PCap.cpp index ce278bc..8138699 100644 --- a/src/frontend/qt_sdl/LAN_PCap.cpp +++ b/src/frontend/qt_sdl/LAN_PCap.cpp @@ -33,7 +33,11 @@ #include #include #include - #include + #ifdef __APPLE__ + #include + #else + #include + #endif #endif @@ -66,6 +70,9 @@ const char* PCapLibNames[] = #ifdef __WIN32__ // TODO: name for npcap in non-WinPCap mode "wpcap.dll", +#elif defined(__APPLE__) + "libpcap.A.dylib", + "libpcap.dylib", #else // Linux lib names "libpcap.so.1", @@ -276,6 +283,16 @@ bool Init(bool open_adapter) struct sockaddr_in* sa = (sockaddr_in*)curaddr->ifa_addr; memcpy(adata->IP_v4, &sa->sin_addr, 4); } + #ifdef __APPLE__ + else if (af == AF_LINK) + { + struct sockaddr_dl* sa = (sockaddr_dl*)curaddr->ifa_addr; + if (sa->sdl_alen != 6) + printf("weird MAC length %d for %s\n", sa->sdl_alen, curaddr->ifa_name); + else + memcpy(adata->MAC, LLADDR(sa), 6); + } + #else else if (af == AF_PACKET) { struct sockaddr_ll* sa = (sockaddr_ll*)curaddr->ifa_addr; @@ -284,7 +301,7 @@ bool Init(bool open_adapter) else memcpy(adata->MAC, sa->sll_addr, 6); } - + #endif curaddr = curaddr->ifa_next; } } -- cgit v1.2.3 From 6e8bac39091d0321f43a3e7574d3359255996e9f Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 30 Nov 2020 16:58:52 +0100 Subject: Merge vram dirty tracking Squashed commit of the following: commit b463a05d4b909372f0cd1ad91caa0c77a25e5901 Author: RSDuck Date: Mon Nov 30 01:55:35 2020 +0100 minor fix commit ce73cebbdf5da243d7ebade82d8799ded9cd6b28 Author: RSDuck Date: Mon Nov 30 00:43:08 2020 +0100 fix dirty flags of BG/OBJ mappings not being reset commit fc5d73a6178e3adc444398bdd23de8314b5ca8f8 Author: RSDuck Date: Mon Nov 30 00:11:13 2020 +0100 use flat vram for gpu2d everywhere commit 34ee9fe2bf04fcfa2a5a1c8d78d70007e606f1a2 Author: RSDuck Date: Sat Nov 28 19:10:34 2020 +0100 mark VRAM dirty for display capture commit e8778fa2f429c6df0eece19d6a5ee83ae23a0cf4 Author: RSDuck Date: Sat Nov 28 18:59:31 2020 +0100 use flat VRAM for textures and texpals also skip rendering if nothing changed and a bunch of fixes commit 53f2041e2e1a28b35702a2ed51de885c36689f71 Author: RSDuck Date: Fri Nov 27 18:29:56 2020 +0100 use vram dirty tracking for extpals also preparations to take this further commit 4cdfa329e95aed26d3b21319c8fd86a04abf20f7 Author: RSDuck Date: Mon Nov 16 23:32:22 2020 +0100 VRAM dirty tracking --- src/GPU.cpp | 338 +++++++++++++++++++++++++++++++++++---- src/GPU.h | 87 +++++++++- src/GPU2D.cpp | 294 +++++++++++++++++----------------- src/GPU2D.h | 6 - src/GPU3D.cpp | 15 ++ src/GPU3D.h | 2 + src/GPU3D_Soft.cpp | 83 ++++++---- src/NonStupidBitfield.h | 149 +++++++++++++++++ src/Platform.h | 2 +- src/frontend/qt_sdl/Platform.cpp | 4 +- 10 files changed, 765 insertions(+), 215 deletions(-) create mode 100644 src/NonStupidBitfield.h (limited to 'src/frontend/qt_sdl') diff --git a/src/GPU.cpp b/src/GPU.cpp index 7989750..e6b24e0 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -49,8 +49,8 @@ u8 VRAM_F[ 16*1024]; u8 VRAM_G[ 16*1024]; u8 VRAM_H[ 32*1024]; u8 VRAM_I[ 16*1024]; -u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; -u32 VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF}; +u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; +u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF}; u8 VRAMCNT[9]; u8 VRAMSTAT; @@ -85,6 +85,62 @@ bool Accelerated; GPU2D* GPU2D_A; GPU2D* GPU2D_B; +/* + VRAM invalidation tracking + + - we want to know when a VRAM region used for graphics changed + - for some regions unmapping is mandatory to modify them (Texture, TexPal and ExtPal) and + we don't want to completely invalidate them every time they're unmapped and remapped + + For this reason we don't track the dirtyness per mapping region, but instead per VRAM bank + with VRAMDirty. Writes to LCDC go directly into VRAMDirty, while writes via other mapping regions + like BG or OBJ are first tracked in VRAMWritten_* and need to be flushed using SyncDirtyFlags. + + This is more or less a description of VRAMTrackingSet::DeriveState + Each time before the memory is read two things could have happened + to each 16kb piece (16kb is the smallest unit in which mappings can + be made thus also the size VRAMMap_* use): + - this piece was remapped compared to last time we checked, + which means this location in memory is invalid. + - this piece wasn't remapped, which means we need to check whether + it was changed. This can be archived by checking VRAMDirty. + VRAMDirty need to be reset for the respective VRAM bank. +*/ + +VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG; +VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ; +VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG; +VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ; + +VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal; +VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal; +VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal; +VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal; + +VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture; +VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal; + + +NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG; +NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ; +NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG; +NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ; +NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7; + +NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9]; + +u8 VRAMFlat_ABG[512*1024]; +u8 VRAMFlat_BBG[128*1024]; +u8 VRAMFlat_AOBJ[256*1024]; +u8 VRAMFlat_BOBJ[128*1024]; + +u8 VRAMFlat_ABGExtPal[32*1024]; +u8 VRAMFlat_BBGExtPal[32*1024]; +u8 VRAMFlat_AOBJExtPal[8*1024]; +u8 VRAMFlat_BOBJExtPal[8*1024]; + +u8 VRAMFlat_Texture[512*1024]; +u8 VRAMFlat_TexPal[128*1024]; bool Init() { @@ -113,6 +169,30 @@ void DeInit() if (Framebuffer[1][1]) delete[] Framebuffer[1][1]; } +void ResetVRAMCache() +{ + for (int i = 0; i < 9; i++) + VRAMDirty[i] = NonStupidBitField<128*1024/VRAMDirtyGranularity>(); + + VRAMDirty_ABG.Reset(); + VRAMDirty_BBG.Reset(); + VRAMDirty_AOBJ.Reset(); + VRAMDirty_BOBJ.Reset(); + VRAMDirty_ABGExtPal.Reset(); + VRAMDirty_BBGExtPal.Reset(); + VRAMDirty_AOBJExtPal.Reset(); + VRAMDirty_BOBJExtPal.Reset(); + + memset(VRAMFlat_ABG, 0, sizeof(VRAMFlat_ABG)); + memset(VRAMFlat_BBG, 0, sizeof(VRAMFlat_BBG)); + memset(VRAMFlat_AOBJ, 0, sizeof(VRAMFlat_AOBJ)); + memset(VRAMFlat_BOBJ, 0, sizeof(VRAMFlat_BOBJ)); + memset(VRAMFlat_ABGExtPal, 0, sizeof(VRAMFlat_ABGExtPal)); + memset(VRAMFlat_BBGExtPal, 0, sizeof(VRAMFlat_BBGExtPal)); + memset(VRAMFlat_AOBJExtPal, 0, sizeof(VRAMFlat_AOBJExtPal)); + memset(VRAMFlat_BOBJExtPal, 0, sizeof(VRAMFlat_BOBJExtPal)); +} + void Reset() { VCount = 0; @@ -186,6 +266,8 @@ void Reset() GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]); ResetRenderer(); + + ResetVRAMCache(); } void Stop() @@ -261,6 +343,8 @@ void DoSavestate(Savestate* file) GPU2D_A->DoSavestate(file); GPU2D_B->DoSavestate(file); GPU3D::DoSavestate(file); + + ResetVRAMCache(); } void AssignFramebuffers() @@ -411,18 +495,8 @@ void SetRenderSettings(int renderer, RenderSettings& settings) u8* GetUniqueBankPtr(u32 mask, u32 offset) { - if (!mask) return NULL; - - int num = 0; - if (!(mask & 0xFF)) { mask >>= 8; num += 8; } - else - { - if (!(mask & 0xF)) { mask >>= 4; num += 4; } - if (!(mask & 0x3)) { mask >>= 2; num += 2; } - if (!(mask & 0x1)) { mask >>= 1; num += 1; } - } - if (mask != 1) return NULL; - + if (!mask || (mask & (mask - 1)) != 0) return NULL; + int num = __builtin_ctz(mask); return &VRAM[num][offset & VRAMMask[num]]; } @@ -606,8 +680,6 @@ void MapVRAM_E(u32 bank, u8 cnt) case 4: // ABG ext palette UNMAP_RANGE(ABGExtPal, 0, 4); - GPU2D_A->BGExtPalDirty(0); - GPU2D_A->BGExtPalDirty(2); break; } } @@ -634,8 +706,6 @@ void MapVRAM_E(u32 bank, u8 cnt) case 4: // ABG ext palette MAP_RANGE(ABGExtPal, 0, 4); - GPU2D_A->BGExtPalDirty(0); - GPU2D_A->BGExtPalDirty(2); break; } } @@ -687,12 +757,10 @@ void MapVRAM_FG(u32 bank, u8 cnt) case 4: // ABG ext palette VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask; VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask; - GPU2D_A->BGExtPalDirty((oldofs & 0x1) << 1); break; case 5: // AOBJ ext palette VRAMMap_AOBJExtPal &= ~bankmask; - GPU2D_A->OBJExtPalDirty(); break; } } @@ -732,12 +800,10 @@ void MapVRAM_FG(u32 bank, u8 cnt) case 4: // ABG ext palette VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask; VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask; - GPU2D_A->BGExtPalDirty((ofs & 0x1) << 1); break; case 5: // AOBJ ext palette VRAMMap_AOBJExtPal |= bankmask; - GPU2D_A->OBJExtPalDirty(); break; } } @@ -773,8 +839,6 @@ void MapVRAM_H(u32 bank, u8 cnt) case 2: // BBG ext palette UNMAP_RANGE(BBGExtPal, 0, 4); - GPU2D_B->BGExtPalDirty(0); - GPU2D_B->BGExtPalDirty(2); break; } } @@ -800,8 +864,6 @@ void MapVRAM_H(u32 bank, u8 cnt) case 2: // BBG ext palette MAP_RANGE(BBGExtPal, 0, 4); - GPU2D_B->BGExtPalDirty(0); - GPU2D_B->BGExtPalDirty(2); break; } } @@ -841,7 +903,6 @@ void MapVRAM_I(u32 bank, u8 cnt) case 3: // BOBJ ext palette VRAMMap_BOBJExtPal &= ~bankmask; - GPU2D_B->OBJExtPalDirty(); break; } } @@ -871,7 +932,6 @@ void MapVRAM_I(u32 bank, u8 cnt) case 3: // BOBJ ext palette VRAMMap_BOBJExtPal |= bankmask; - GPU2D_B->OBJExtPalDirty(); break; } } @@ -937,6 +997,8 @@ void StartHBlank(u32 line) DispStat[0] |= (1<<1); DispStat[1] |= (1<<1); + SyncDirtyFlags(); + if (VCount < 192) { // draw @@ -1096,4 +1158,224 @@ void SetVCount(u16 val) NextVCount = val; } +template +NonStupidBitField VRAMTrackingSet::DeriveState(u32* currentMappings) +{ + NonStupidBitField result; + u16 banksToBeZeroed = 0; + for (u32 i = 0; i < Size / MappingGranularity; i++) + { + if (currentMappings[i] != Mapping[i]) + { + result |= NonStupidBitField(i*VRAMBitsPerMapping, VRAMBitsPerMapping); + banksToBeZeroed |= currentMappings[i]; + Mapping[i] = currentMappings[i]; + } + else + { + u32 mapping = Mapping[i]; + + banksToBeZeroed |= mapping; + + while (mapping != 0) + { + u32 num = __builtin_ctz(mapping); + mapping &= ~(1 << num); + + // hack for **speed** + // this could probably be done less ugly but then we would rely + // on the compiler for vectorisation + static_assert(VRAMDirtyGranularity == 512); + if (MappingGranularity == 16*1024) + { + u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)]; + ((u32*)result.Data)[i] |= dirty; + } + else if (MappingGranularity == 8*1024) + { + u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)]; + ((u16*)result.Data)[i] |= dirty; + } + else if (MappingGranularity == 128*1024) + { + ((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0]; + ((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1]; + ((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2]; + ((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3]; + } + else + { + // welp + abort(); + } + } + } + } + + while (banksToBeZeroed != 0) + { + u32 num = __builtin_ctz(banksToBeZeroed); + banksToBeZeroed &= ~(1 << num); + memset(VRAMDirty[num].Data, 0, sizeof(VRAMDirty[num].Data)); + } + + return result; +} + +template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*); +template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*); +template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*); +template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*); +template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*); +template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*); + +template +void SyncDirtyFlags(u32* mappings, NonStupidBitField& writtenFlags) +{ + const u32 VRAMWrittenBitsPer16KB = 16*1024/VRAMDirtyGranularity; + + for (typename NonStupidBitField::Iterator it = writtenFlags.Begin(); it != writtenFlags.End(); it++) + { + u32 mapping = mappings[*it / VRAMWrittenBitsPer16KB]; + while (mapping != 0) + { + u32 num = __builtin_ctz(mapping); + + VRAMDirty[num][*it & (VRAMMask[num] / VRAMDirtyGranularity)] = true; + + mapping &= ~(1 << num); + } + } + memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data)); +} + +void SyncDirtyFlags() +{ + SyncDirtyFlags(VRAMMap_ABG, VRAMWritten_ABG); + SyncDirtyFlags(VRAMMap_AOBJ, VRAMWritten_AOBJ); + SyncDirtyFlags(VRAMMap_BBG, VRAMWritten_BBG); + SyncDirtyFlags(VRAMMap_BOBJ, VRAMWritten_BOBJ); + SyncDirtyFlags(VRAMMap_ARM7, VRAMWritten_ARM7); +} + +template +inline bool CopyLinearVRAM(u8* flat, u32* mappings, NonStupidBitField& dirty, u64 (*slowAccess)(u32 addr)) +{ + const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity; + + bool change = false; + + typename NonStupidBitField::Iterator it = dirty.Begin(); + while (it != dirty.End()) + { + u32 offset = *it * VRAMDirtyGranularity; + u8* dst = flat + offset; + u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset); + if (fastAccess) + { + memcpy(dst, fastAccess, VRAMDirtyGranularity); + } + else + { + for (u32 i = 0; i < VRAMDirtyGranularity; i += 8) + *(u64*)&dst[i] = slowAccess(offset + i); + } + change = true; + it++; + } + return change; +} + +bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, ReadVRAM_Texture); +} +bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, ReadVRAM_TexPal); +} + +bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, ReadVRAM_ABG); +} +bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, ReadVRAM_BBG); +} + +bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, ReadVRAM_AOBJ); +} +bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, ReadVRAM_BOBJ); } + +template +T ReadVRAM_ABGExtPal(u32 addr) +{ + u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3]; + + T ret = 0; + if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; + if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; + + return ret; +} + +template +T ReadVRAM_BBGExtPal(u32 addr) +{ + u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3]; + + T ret = 0; + if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF]; + + return ret; +} + +template +T ReadVRAM_AOBJExtPal(u32 addr) +{ + u32 mask = VRAMMap_AOBJExtPal; + + T ret = 0; + if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF]; + + return ret; +} + +template +T ReadVRAM_BOBJExtPal(u32 addr) +{ + u32 mask = VRAMMap_BOBJExtPal; + + T ret = 0; + if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF]; + + return ret; +} + +bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, ReadVRAM_ABGExtPal); +} +bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, ReadVRAM_BBGExtPal); +} + +bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, ReadVRAM_AOBJExtPal); +} +bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, ReadVRAM_BOBJExtPal); +} + +} \ No newline at end of file diff --git a/src/GPU.h b/src/GPU.h index 1564ef7..2f71da6 100644 --- a/src/GPU.h +++ b/src/GPU.h @@ -20,6 +20,7 @@ #define GPU_H #include "GPU2D.h" +#include "NonStupidBitfield.h" namespace GPU { @@ -45,7 +46,7 @@ extern u8 VRAM_G[ 16*1024]; extern u8 VRAM_H[ 32*1024]; extern u8 VRAM_I[ 16*1024]; -extern u8* VRAM[9]; +extern u8* const VRAM[9]; extern u32 VRAMMap_LCDC; extern u32 VRAMMap_ABG[0x20]; @@ -73,6 +74,73 @@ extern GPU2D* GPU2D_B; extern int Renderer; +const u32 VRAMDirtyGranularity = 512; + +extern NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG; +extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ; +extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG; +extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ; +extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7; + +extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9]; + +template +struct VRAMTrackingSet +{ + u16 Mapping[Size / MappingGranularity]; + + const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity; + + void Reset() + { + memset(Mapping, 0, sizeof(Mapping)); + } + NonStupidBitField DeriveState(u32* currentMappings); +}; + +extern VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG; +extern VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ; +extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG; +extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ; + +extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal; +extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal; +extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal; +extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal; + +extern VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture; +extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal; + +extern u8 VRAMFlat_ABG[512*1024]; +extern u8 VRAMFlat_BBG[128*1024]; +extern u8 VRAMFlat_AOBJ[256*1024]; +extern u8 VRAMFlat_BOBJ[128*1024]; + +extern u8 VRAMFlat_ABGExtPal[32*1024]; +extern u8 VRAMFlat_BBGExtPal[32*1024]; + +extern u8 VRAMFlat_AOBJExtPal[8*1024]; +extern u8 VRAMFlat_BOBJExtPal[8*1024]; + +extern u8 VRAMFlat_Texture[512*1024]; +extern u8 VRAMFlat_TexPal[128*1024]; + +bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + +void SyncDirtyFlags(); typedef struct { @@ -233,7 +301,11 @@ void WriteVRAM_LCDC(u32 addr, T val) default: return; } - if (VRAMMap_LCDC & (1<> 14) & 0x1F]; + VRAMWritten_ABG[(addr & 0x7FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; @@ -295,6 +369,8 @@ void WriteVRAM_AOBJ(u32 addr, T val) { u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; + VRAMWritten_AOBJ[(addr & 0x3FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val; @@ -324,6 +400,8 @@ void WriteVRAM_BBG(u32 addr, T val) { u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; + VRAMWritten_BBG[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val; if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; @@ -350,11 +428,12 @@ void WriteVRAM_BOBJ(u32 addr, T val) { u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; + VRAMWritten_BOBJ[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; } - template T ReadVRAM_ARM7(u32 addr) { @@ -372,6 +451,8 @@ void WriteVRAM_ARM7(u32 addr, T val) { u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; + VRAMWritten_ARM7[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; } diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index 27aa608..07b5b21 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -148,12 +148,6 @@ void GPU2D::Reset() CaptureCnt = 0; MasterBrightness = 0; - - BGExtPalStatus[0] = 0; - BGExtPalStatus[1] = 0; - BGExtPalStatus[2] = 0; - BGExtPalStatus[3] = 0; - OBJExtPalStatus = 0; } void GPU2D::DoSavestate(Savestate* file) @@ -208,13 +202,6 @@ void GPU2D::DoSavestate(Savestate* file) if (!file->Saving) { - // refresh those - BGExtPalStatus[0] = 0; - BGExtPalStatus[1] = 0; - BGExtPalStatus[2] = 0; - BGExtPalStatus[3] = 0; - OBJExtPalStatus = 0; - CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]]; CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]]; } @@ -758,6 +745,25 @@ void GPU2D::DrawScanline(u32 line) int n3dline = line; line = GPU::VCount; + if (Num == 0) + { + auto bgDirty = GPU::VRAMDirty_ABG.DeriveState(GPU::VRAMMap_ABG); + GPU::MakeVRAMFlat_ABGCoherent(bgDirty); + auto bgExtPalDirty = GPU::VRAMDirty_ABGExtPal.DeriveState(GPU::VRAMMap_ABGExtPal); + GPU::MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty); + auto objExtPalDirty = GPU::VRAMDirty_AOBJExtPal.DeriveState(&GPU::VRAMMap_AOBJExtPal); + GPU::MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty); + } + else + { + auto bgDirty = GPU::VRAMDirty_BBG.DeriveState(GPU::VRAMMap_BBG); + GPU::MakeVRAMFlat_BBGCoherent(bgDirty); + auto bgExtPalDirty = GPU::VRAMDirty_BBGExtPal.DeriveState(GPU::VRAMMap_BBGExtPal); + GPU::MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty); + auto objExtPalDirty = GPU::VRAMDirty_BOBJExtPal.DeriveState(&GPU::VRAMMap_BOBJExtPal); + GPU::MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty); + } + bool forceblank = false; // scanlines that end up outside of the GPU drawing range @@ -970,6 +976,9 @@ void GPU2D::DoCapture(u32 line, u32 width) u16* dst = (u16*)GPU::VRAM[dstvram]; u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width); + static_assert(GPU::VRAMDirtyGranularity == 512); + GPU::VRAMDirty[dstvram][(dstaddr & 0x1FFFF) / GPU::VRAMDirtyGranularity] = true; + // TODO: handle 3D in accelerated mode!! u32* srcA; @@ -1188,85 +1197,20 @@ void GPU2D::SampleFIFO(u32 offset, u32 num) } } - -void GPU2D::BGExtPalDirty(u32 base) -{ - BGExtPalStatus[base] = 0; - BGExtPalStatus[base+1] = 0; -} - -void GPU2D::OBJExtPalDirty() -{ - OBJExtPalStatus = 0; -} - - u16* GPU2D::GetBGExtPal(u32 slot, u32 pal) { - u16* dst = &BGExtPalCache[slot][pal << 8]; - - if (!(BGExtPalStatus[slot] & (1< void GPU2D::DrawBG_Text(u32 line, u32 bgnum) { @@ -1720,17 +1678,20 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) extpal = (DispCnt & 0x40000000); if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum; + u8* bgvram; + u32 bgvrammask; + GetBGVRAM(Num, bgvram, bgvrammask); if (Num) { - tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((bgcnt & 0x003C) << 12); + tilemapaddr = ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0x400]; } else { - tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0]; } @@ -1758,7 +1719,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) // preload shit as needed if ((xoff & 0x7) || mosaic) { - curtile = GPU::ReadVRAM_BG(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); + curtile = *(u16*)&bgvram[(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)) & bgvrammask]; if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); else curpal = pal; @@ -1779,7 +1740,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) (mosaic && ((xpos >> 3) != (lastxpos >> 3)))) { // load a new tile - curtile = GPU::ReadVRAM_BG(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)); + curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask]; if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); else curpal = pal; @@ -1794,7 +1755,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) if (WindowMask[i] & (1<(pixelsaddr + tilexoff); + color = bgvram[(pixelsaddr + tilexoff) & bgvrammask]; if (color) drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); + curtile = *(u16*)&bgvram[((tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3))) & bgvrammask]; curpal = pal + ((curtile & 0xF000) >> 8); pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); @@ -1828,7 +1789,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) (mosaic && ((xpos >> 3) != (lastxpos >> 3)))) { // load a new tile - curtile = GPU::ReadVRAM_BG(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)); + curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask]; curpal = pal + ((curtile & 0xF000) >> 8); pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); @@ -1842,11 +1803,11 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7); if (tilexoff & 0x1) { - color = GPU::ReadVRAM_BG(pixelsaddr + (tilexoff >> 1)) >> 4; + color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] >> 4; } else { - color = GPU::ReadVRAM_BG(pixelsaddr + (tilexoff >> 1)) & 0x0F; + color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] & 0x0F; } if (color) @@ -1895,17 +1856,20 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) rotY -= (BGMosaicY * rotD); } + u8* bgvram; + u32 bgvrammask; + if (Num) { - tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((bgcnt & 0x003C) << 12); + tilemapaddr = ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0x400]; } else { - tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0]; } @@ -1934,13 +1898,13 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) if ((!((finalX|finalY) & overflowmask))) { - curtile = GPU::ReadVRAM_BG(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))); + curtile = bgvram[(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))) & bgvrammask]; // draw pixel u32 tilexoff = (finalX >> 8) & 0x7; u32 tileyoff = (finalY >> 8) & 0x7; - color = GPU::ReadVRAM_BG(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff); + color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask]; if (color) drawPixel(&BGOBJLine[i], pal[color], 0x01000000<(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)); + color = *(u16*)&bgvram[(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask]; if (color & 0x8000) drawPixel(&BGOBJLine[i], color, 0x01000000<(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); + color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask]; if (color) drawPixel(&BGOBJLine[i], pal[color], 0x01000000<> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0]; } @@ -2144,7 +2112,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) if ((!((finalX|finalY) & overflowmask))) { - curtile = GPU::ReadVRAM_BG(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)); + curtile = *(u16*)&bgvram[(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)) & bgvrammask]; if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12); else curpal = pal; @@ -2156,7 +2124,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) if (curtile & 0x0400) tilexoff = 7-tilexoff; if (curtile & 0x0800) tileyoff = 7-tileyoff; - color = GPU::ReadVRAM_BG(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff); + color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask]; if (color) drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); + color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask]; if (color) drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); @@ -2346,6 +2315,20 @@ void GPU2D::InterleaveSprites(u32 prio) } } +void GetOBJVRAM(u32 num, u8*& data, u32& mask) +{ + if (num == 0) + { + data = GPU::VRAMFlat_AOBJ; + mask = 0x3FFFF; + } + else + { + data = GPU::VRAMFlat_BOBJ; + mask = 0x1FFFF; + } +} + #define DoDrawSprite(type, ...) \ if (iswin) \ { \ @@ -2370,6 +2353,17 @@ void GPU2D::DrawSprites(u32 line) OBJMosaicYCount = 0; } + if (Num == 0) + { + auto objDirty = GPU::VRAMDirty_AOBJ.DeriveState(GPU::VRAMMap_AOBJ); + GPU::MakeVRAMFlat_AOBJCoherent(objDirty); + } + else + { + auto objDirty = GPU::VRAMDirty_BOBJ.DeriveState(GPU::VRAMMap_BOBJ); + GPU::MakeVRAMFlat_BOBJCoherent(objDirty); + } + NumSprites = 0; memset(OBJLine, 0, 256*4); memset(OBJWindow, 0, 256); @@ -2482,6 +2476,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi u32 ytilefactor; + u8* objvram; + u32 objvrammask; + GetOBJVRAM(Num, objvram, objvrammask); + s32 centerX = boundwidth >> 1; s32 centerY = boundheight >> 1; @@ -2525,6 +2523,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi pixelattr |= (0xC0000000 | (alpha << 24)); + u32 pixelsaddr; if (DispCnt & 0x40) { if (DispCnt & 0x20) @@ -2536,7 +2535,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi } else { - tilenum <<= (7 + ((DispCnt >> 22) & 0x1)); + pixelsaddr = tilenum << (7 + ((DispCnt >> 22) & 0x1)); ytilefactor = ((width >> 8) * 2); } } @@ -2544,23 +2543,21 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi { if (DispCnt & 0x20) { - tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); + pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); ytilefactor = (256 * 2); } else { - tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); + pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); ytilefactor = (128 * 2); } } - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; - for (; xoff < boundwidth;) { if ((u32)rotX < width && (u32)rotY < height) { - color = GPU::ReadVRAM_OBJ(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)); + color = *(u16*)&objvram[(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)) & objvrammask]; if (color & 0x8000) { @@ -2585,9 +2582,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi } else { + u32 pixelsaddr = tilenum; if (DispCnt & 0x10) { - tilenum <<= ((DispCnt >> 20) & 0x3); + pixelsaddr <<= ((DispCnt >> 20) & 0x3); ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0); } else @@ -2601,9 +2599,8 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi if (attrib[0] & 0x2000) { // 256-color - tilenum <<= 5; ytilefactor <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr <<= 5; if (!window) { @@ -2617,7 +2614,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi { if ((u32)rotX < width && (u32)rotY < height) { - color = GPU::ReadVRAM_OBJ(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)); + color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)) & objvrammask]; if (color) { @@ -2657,7 +2654,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi { if ((u32)rotX < width && (u32)rotY < height) { - color = GPU::ReadVRAM_OBJ(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)); + color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)) & objvrammask]; if (rotX & 0x100) color >>= 4; else @@ -2705,6 +2702,10 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos pixelattr |= 0x100000; } + u8* objvram; + u32 objvrammask; + GetOBJVRAM(Num, objvram, objvrammask); + // yflip if (attrib[1] & 0x2000) ypos = height-1 - ypos; @@ -2735,6 +2736,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos pixelattr |= (0xC0000000 | (alpha << 24)); + u32 pixelsaddr = tilenum; if (DispCnt & 0x40) { if (DispCnt & 0x20) @@ -2746,25 +2748,24 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos } else { - tilenum <<= (7 + ((DispCnt >> 22) & 0x1)); - tilenum += (ypos * width * 2); + pixelsaddr <<= (7 + ((DispCnt >> 22) & 0x1)); + pixelsaddr += (ypos * width * 2); } } else { if (DispCnt & 0x20) { - tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); - tilenum += (ypos * 256 * 2); + pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); + pixelsaddr += (ypos * 256 * 2); } else { - tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); - tilenum += (ypos * 128 * 2); + pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); + pixelsaddr += (ypos * 128 * 2); } } - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; s32 pixelstride; if (attrib[1] & 0x1000) // xflip @@ -2781,7 +2782,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos for (; xoff < xend;) { - color = GPU::ReadVRAM_OBJ(pixelsaddr); + color = *(u16*)&objvram[pixelsaddr & objvrammask]; pixelsaddr += pixelstride; @@ -2805,14 +2806,15 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos } else { + u32 pixelsaddr = tilenum; if (DispCnt & 0x10) { - tilenum <<= ((DispCnt >> 20) & 0x3); - tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); + pixelsaddr <<= ((DispCnt >> 20) & 0x3); + pixelsaddr += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); } else { - tilenum += ((ypos >> 3) * 0x20); + pixelsaddr += ((ypos >> 3) * 0x20); } if (spritemode == 1) pixelattr |= 0x80000000; @@ -2821,8 +2823,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos if (attrib[0] & 0x2000) { // 256-color - tilenum <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr <<= 5; pixelsaddr += ((ypos & 0x7) << 3); s32 pixelstride; @@ -2851,7 +2852,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos for (; xoff < xend;) { - color = GPU::ReadVRAM_OBJ(pixelsaddr); + color = objvram[pixelsaddr]; pixelsaddr += pixelstride; @@ -2877,8 +2878,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos else { // 16-color - tilenum <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr <<= 5; pixelsaddr += ((ypos & 0x7) << 2); s32 pixelstride; @@ -2910,13 +2910,13 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos { if (attrib[1] & 0x1000) { - if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ(pixelsaddr) & 0x0F; pixelsaddr--; } - else color = GPU::ReadVRAM_OBJ(pixelsaddr) >> 4; + if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] & 0x0F; pixelsaddr--; } + else color = objvram[pixelsaddr & objvrammask] >> 4; } else { - if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ(pixelsaddr) >> 4; pixelsaddr++; } - else color = GPU::ReadVRAM_OBJ(pixelsaddr) & 0x0F; + if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] >> 4; pixelsaddr++; } + else color = objvram[pixelsaddr & objvrammask] & 0x0F; } if (color) diff --git a/src/GPU2D.h b/src/GPU2D.h index 469d6a2..db15adc 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -59,9 +59,6 @@ public: void CheckWindows(u32 line); - void BGExtPalDirty(u32 base); - void OBJExtPalDirty(); - u16* GetBGExtPal(u32 slot, u32 pal); u16* GetOBJExtPal(); @@ -128,9 +125,6 @@ private: u16 MasterBrightness; u16 BGExtPalCache[4][16*256]; - u16 OBJExtPalCache[16*256]; - u32 BGExtPalStatus[4]; - u32 OBJExtPalStatus; u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb); u32 ColorBlend5(u32 val1, u32 val2); diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 74debfe..4e6ac42 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -179,6 +179,8 @@ u8 RenderFogDensityTable[34]; u32 RenderClearAttr1, RenderClearAttr2; +bool RenderFrameIdentical; + u32 ZeroDotWLimit; u32 GXStat; @@ -2491,6 +2493,19 @@ void VBlank() } RenderNumPolygons = NumPolygons; + RenderFrameIdentical = false; + } + else + { + RenderFrameIdentical = RenderDispCnt == DispCnt + && RenderAlphaRef == AlphaRef + && RenderClearAttr1 == ClearAttr1 + && RenderClearAttr2 == ClearAttr2 + && RenderFogColor == FogColor + && RenderFogOffset == FogOffset * 0x200 + && memcmp(RenderEdgeTable, EdgeTable, 8*2) == 0 + && memcmp(RenderFogDensityTable + 1, FogDensityTable, 32) == 0 + && memcmp(RenderToonTable, ToonTable, 32*2) == 0; } RenderDispCnt = DispCnt; diff --git a/src/GPU3D.h b/src/GPU3D.h index c69adde..0477c4f 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -87,6 +87,8 @@ extern u8 RenderFogDensityTable[34]; extern u32 RenderClearAttr1, RenderClearAttr2; +extern bool RenderFrameIdentical; + extern std::array RenderPolygonRAM; extern u32 RenderNumPolygons; diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 7ee9e5d..d66eb76 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -58,6 +58,8 @@ bool PrevIsShadowMask; bool Enabled; +bool FrameIdentical; + // threading bool Threaded; @@ -550,6 +552,16 @@ typedef struct RendererPolygon PolygonList[2048]; +template +inline T ReadVRAM_Texture(u32 addr) +{ + return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF]; +} +template +inline T ReadVRAM_TexPal(u32 addr) +{ + return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF]; +} void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) { @@ -606,10 +618,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 1: // A3I5 { vramaddr += ((t * width) + s); - u8 pixel = GPU::ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr); texpal <<= 4; - *color = GPU::ReadVRAM_TexPal(texpal + ((pixel&0x1F)<<1)); + *color = ReadVRAM_TexPal(texpal + ((pixel&0x1F)<<1)); *alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6); } break; @@ -617,12 +629,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 2: // 4-color { vramaddr += (((t * width) + s) >> 2); - u8 pixel = GPU::ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr); pixel >>= ((s & 0x3) << 1); pixel &= 0x3; texpal <<= 3; - *color = GPU::ReadVRAM_TexPal(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -630,12 +642,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 3: // 16-color { vramaddr += (((t * width) + s) >> 1); - u8 pixel = GPU::ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr); if (s & 0x1) pixel >>= 4; else pixel &= 0xF; texpal <<= 4; - *color = GPU::ReadVRAM_TexPal(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -643,10 +655,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 4: // 256-color { vramaddr += ((t * width) + s); - u8 pixel = GPU::ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr); texpal <<= 4; - *color = GPU::ReadVRAM_TexPal(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -660,30 +672,30 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha if (vramaddr >= 0x40000) slot1addr += 0x10000; - u8 val = GPU::ReadVRAM_Texture(vramaddr); + u8 val = ReadVRAM_Texture(vramaddr); val >>= (2 * (s & 0x3)); - u16 palinfo = GPU::ReadVRAM_Texture(slot1addr); + u16 palinfo = ReadVRAM_Texture(slot1addr); u32 paloffset = (palinfo & 0x3FFF) << 2; texpal <<= 4; switch (val & 0x3) { case 0: - *color = GPU::ReadVRAM_TexPal(texpal + paloffset); + *color = ReadVRAM_TexPal(texpal + paloffset); *alpha = 31; break; case 1: - *color = GPU::ReadVRAM_TexPal(texpal + paloffset + 2); + *color = ReadVRAM_TexPal(texpal + paloffset + 2); *alpha = 31; break; case 2: if ((palinfo >> 14) == 1) { - u16 color0 = GPU::ReadVRAM_TexPal(texpal + paloffset); - u16 color1 = GPU::ReadVRAM_TexPal(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal(texpal + paloffset); + u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -700,8 +712,8 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha } else if ((palinfo >> 14) == 3) { - u16 color0 = GPU::ReadVRAM_TexPal(texpal + paloffset); - u16 color1 = GPU::ReadVRAM_TexPal(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal(texpal + paloffset); + u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -717,20 +729,20 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha *color = r | g | b; } else - *color = GPU::ReadVRAM_TexPal(texpal + paloffset + 4); + *color = ReadVRAM_TexPal(texpal + paloffset + 4); *alpha = 31; break; case 3: if ((palinfo >> 14) == 2) { - *color = GPU::ReadVRAM_TexPal(texpal + paloffset + 6); + *color = ReadVRAM_TexPal(texpal + paloffset + 6); *alpha = 31; } else if ((palinfo >> 14) == 3) { - u16 color0 = GPU::ReadVRAM_TexPal(texpal + paloffset); - u16 color1 = GPU::ReadVRAM_TexPal(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal(texpal + paloffset); + u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -759,10 +771,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 6: // A5I3 { vramaddr += ((t * width) + s); - u8 pixel = GPU::ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr); texpal <<= 4; - *color = GPU::ReadVRAM_TexPal(texpal + ((pixel&0x7)<<1)); + *color = ReadVRAM_TexPal(texpal + ((pixel&0x7)<<1)); *alpha = (pixel >> 3); } break; @@ -770,7 +782,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 7: // direct color { vramaddr += (((t * width) + s) << 1); - *color = GPU::ReadVRAM_Texture(vramaddr); + *color = ReadVRAM_Texture(vramaddr); *alpha = (*color & 0x8000) ? 31 : 0; } break; @@ -2007,8 +2019,8 @@ void ClearBuffers() { for (int x = 0; x < 256; x++) { - u16 val2 = GPU::ReadVRAM_Texture(0x40000 + (yoff << 9) + (xoff << 1)); - u16 val3 = GPU::ReadVRAM_Texture(0x60000 + (yoff << 9) + (xoff << 1)); + u16 val2 = ReadVRAM_Texture(0x40000 + (yoff << 9) + (xoff << 1)); + u16 val3 = ReadVRAM_Texture(0x60000 + (yoff << 9) + (xoff << 1)); // TODO: confirm color conversion u32 r = (val2 << 1) & 0x3E; if (r) r++; @@ -2088,11 +2100,19 @@ void VCount144() void RenderFrame() { + auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture); + auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal); + + bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty); + bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty); + + FrameIdentical = !(textureChanged || texPalChanged) && RenderFrameIdentical; + if (RenderThreadRunning) { Platform::Semaphore_Post(Sema_RenderStart); } - else + else if (!FrameIdentical) { ClearBuffers(); RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons); @@ -2107,8 +2127,15 @@ void RenderThreadFunc() if (!RenderThreadRunning) return; RenderThreadRendering = true; - ClearBuffers(); - RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons); + if (FrameIdentical) + { + Platform::Semaphore_Post(Sema_ScanlineCount, 192); + } + else + { + ClearBuffers(); + RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons); + } Platform::Semaphore_Post(Sema_RenderDone); RenderThreadRendering = false; diff --git a/src/NonStupidBitfield.h b/src/NonStupidBitfield.h new file mode 100644 index 0000000..124ba76 --- /dev/null +++ b/src/NonStupidBitfield.h @@ -0,0 +1,149 @@ +#ifndef NONSTUPIDBITFIELD_H +#define NONSTUPIDBITFIELD_H + +#include "types.h" + +#include + +#include +#include + +// like std::bitset but less stupid and optimised for +// our use case (keeping track of memory invalidations) + +template +struct NonStupidBitField +{ + static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8"); + static const u32 DataLength = Size / 8; + u8 Data[DataLength]; + + struct Ref + { + NonStupidBitField& BitField; + u32 Idx; + + operator bool() + { + return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7)); + } + + Ref& operator=(bool set) + { + BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7)); + BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7)); + return *this; + } + }; + + struct Iterator + { + NonStupidBitField& BitField; + u32 DataIdx; + u32 BitIdx; + u64 RemainingBits; + + u32 operator*() { return DataIdx * 8 + BitIdx; } + + bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; } + bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; } + + template + void Next() + { + while (RemainingBits == 0 && DataIdx < DataLength) + { + DataIdx += sizeof(T); + RemainingBits = *(T*)&BitField.Data[DataIdx]; + } + + BitIdx = __builtin_ctzll(RemainingBits); + RemainingBits &= ~(1ULL << BitIdx); + } + + Iterator operator++(int) + { + Iterator prev(*this); + ++*this; + return prev; + } + + Iterator& operator++() + { + if ((DataLength % 8) == 0) + Next(); + else if ((DataLength % 4) == 0) + Next(); + else if ((DataLength % 2) == 0) + Next(); + else + Next(); + + return *this; + } + }; + + NonStupidBitField(u32 start, u32 size) + { + memset(Data, 0, sizeof(Data)); + + if (size == 0) + return; + + u32 roundedStartBit = (start + 7) & ~7; + u32 roundedEndBit = (start + size) & ~7; + if (roundedStartBit != roundedEndBit) + memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8); + + if (start & 0x7) + Data[start >> 3] = 0xFF << (start & 0x7); + if ((start + size) & 0x7) + Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7); + } + + NonStupidBitField() + { + memset(Data, 0, sizeof(Data)); + } + + Iterator End() + { + return Iterator{*this, DataLength, 0, 0}; + } + Iterator Begin() + { + if ((DataLength % 8) == 0) + return ++Iterator{*this, 0, 0, *(u64*)Data}; + else if ((DataLength % 4) == 0) + return ++Iterator{*this, 0, 0, *(u32*)Data}; + else if ((DataLength % 2) == 0) + return ++Iterator{*this, 0, 0, *(u16*)Data}; + else + return ++Iterator{*this, 0, 0, *Data}; + } + + Ref operator[](u32 idx) + { + return Ref{*this, idx}; + } + + NonStupidBitField& operator|=(const NonStupidBitField& other) + { + for (u32 i = 0; i < DataLength; i++) + { + Data[i] |= other.Data[i]; + } + return *this; + } + NonStupidBitField& operator&=(const NonStupidBitField& other) + { + for (u32 i = 0; i < DataLength; i++) + { + Data[i] &= other.Data[i]; + } + return *this; + } +}; + + +#endif \ No newline at end of file diff --git a/src/Platform.h b/src/Platform.h index deb3785..b4dda9e 100644 --- a/src/Platform.h +++ b/src/Platform.h @@ -77,7 +77,7 @@ Semaphore* Semaphore_Create(); void Semaphore_Free(Semaphore* sema); void Semaphore_Reset(Semaphore* sema); void Semaphore_Wait(Semaphore* sema); -void Semaphore_Post(Semaphore* sema); +void Semaphore_Post(Semaphore* sema, int count = 1); struct Mutex; Mutex* Mutex_Create(); diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp index a51a985..d3480e4 100644 --- a/src/frontend/qt_sdl/Platform.cpp +++ b/src/frontend/qt_sdl/Platform.cpp @@ -230,9 +230,9 @@ void Semaphore_Wait(Semaphore* sema) ((QSemaphore*) sema)->acquire(); } -void Semaphore_Post(Semaphore* sema) +void Semaphore_Post(Semaphore* sema, int count) { - ((QSemaphore*) sema)->release(); + ((QSemaphore*) sema)->release(count); } Mutex* Mutex_Create() -- cgit v1.2.3 From 07423492c434f2d0e54790e87562da25cd382bd9 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Tue, 1 Dec 2020 14:01:57 -0800 Subject: Use AF_LINK and net/if_dl.h on all non-Linux systems (#835) --- src/frontend/qt_sdl/LAN_PCap.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'src/frontend/qt_sdl') diff --git a/src/frontend/qt_sdl/LAN_PCap.cpp b/src/frontend/qt_sdl/LAN_PCap.cpp index 8138699..3381e80 100644 --- a/src/frontend/qt_sdl/LAN_PCap.cpp +++ b/src/frontend/qt_sdl/LAN_PCap.cpp @@ -33,10 +33,10 @@ #include #include #include - #ifdef __APPLE__ - #include + #ifdef __linux__ + #include #else - #include + #include #endif #endif @@ -283,16 +283,7 @@ bool Init(bool open_adapter) struct sockaddr_in* sa = (sockaddr_in*)curaddr->ifa_addr; memcpy(adata->IP_v4, &sa->sin_addr, 4); } - #ifdef __APPLE__ - else if (af == AF_LINK) - { - struct sockaddr_dl* sa = (sockaddr_dl*)curaddr->ifa_addr; - if (sa->sdl_alen != 6) - printf("weird MAC length %d for %s\n", sa->sdl_alen, curaddr->ifa_name); - else - memcpy(adata->MAC, LLADDR(sa), 6); - } - #else + #ifdef __linux__ else if (af == AF_PACKET) { struct sockaddr_ll* sa = (sockaddr_ll*)curaddr->ifa_addr; @@ -301,6 +292,15 @@ bool Init(bool open_adapter) else memcpy(adata->MAC, sa->sll_addr, 6); } + #else + else if (af == AF_LINK) + { + struct sockaddr_dl* sa = (sockaddr_dl*)curaddr->ifa_addr; + if (sa->sdl_alen != 6) + printf("weird MAC length %d for %s\n", sa->sdl_alen, curaddr->ifa_name); + else + memcpy(adata->MAC, LLADDR(sa), 6); + } #endif curaddr = curaddr->ifa_next; } -- cgit v1.2.3 From 42e083960e52cce31589714dcc7fab8e173efb81 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Fri, 4 Dec 2020 00:00:35 +0100 Subject: always cap FPS to 1000 --- src/frontend/qt_sdl/main.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src/frontend/qt_sdl') diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 6fcd8ce..2d3749d 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -495,24 +495,24 @@ void EmuThread::run() double frametimeStep = nlines / (60.0 * 263.0); { + bool limitfps = Config::LimitFPS && !fastforward; + + double practicalFramelimit = limitfps ? frametimeStep : 1.0 / 1000.0; + double curtime = SDL_GetPerformanceCounter() * perfCountsSec; - bool limitfps = Config::LimitFPS && !fastforward; - if (limitfps) + frameLimitError += practicalFramelimit - (curtime - lastTime); + if (frameLimitError < -practicalFramelimit) + frameLimitError = -practicalFramelimit; + if (frameLimitError > practicalFramelimit) + frameLimitError = practicalFramelimit; + + if (round(frameLimitError * 1000.0) > 0.0) { - frameLimitError += frametimeStep - (curtime - lastTime); - if (frameLimitError < -frametimeStep) - frameLimitError = -frametimeStep; - if (frameLimitError > frametimeStep) - frameLimitError = frametimeStep; - - if (round(frameLimitError * 1000.0) > 0.0) - { - SDL_Delay(round(frameLimitError * 1000.0)); - double timeBeforeSleep = curtime; - curtime = SDL_GetPerformanceCounter() * perfCountsSec; - frameLimitError -= curtime - timeBeforeSleep; - } + SDL_Delay(round(frameLimitError * 1000.0)); + double timeBeforeSleep = curtime; + curtime = SDL_GetPerformanceCounter() * perfCountsSec; + frameLimitError -= curtime - timeBeforeSleep; } lastTime = curtime; -- cgit v1.2.3 From 9673659db46dfbb3b2963b7acd0d68d58fb1c7fd Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 7 Dec 2020 17:00:53 +0100 Subject: fix toggle FPS hotkey --- src/frontend/qt_sdl/main.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/frontend/qt_sdl') diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 2d3749d..6e27d61 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -266,6 +266,7 @@ EmuThread::EmuThread(QObject* parent) : QThread(parent) connect(this, SIGNAL(windowEmuStop()), mainWindow, SLOT(onEmuStop())); connect(this, SIGNAL(windowEmuPause()), mainWindow->actPause, SLOT(trigger())); connect(this, SIGNAL(windowEmuReset()), mainWindow->actReset, SLOT(trigger())); + connect(this, SIGNAL(windowLimitFPSChange()), mainWindow->actLimitFramerate, SLOT(trigger())); connect(this, SIGNAL(screenLayoutChange()), mainWindow->panel, SLOT(onScreenLayoutChanged())); connect(this, SIGNAL(windowFullscreenToggle()), mainWindow, SLOT(onFullscreenToggled())); -- cgit v1.2.3 From 1dbe69c6bec5e1b8e75108e5ccf11361acedc925 Mon Sep 17 00:00:00 2001 From: Arisotura Date: Thu, 10 Dec 2020 18:09:11 +0100 Subject: remove some debug crap --- src/ARM.cpp | 2 -- src/DMA.cpp | 2 +- src/NDS.cpp | 15 +++------------ src/frontend/qt_sdl/main.cpp | 9 +++++---- 4 files changed, 9 insertions(+), 19 deletions(-) (limited to 'src/frontend/qt_sdl') diff --git a/src/ARM.cpp b/src/ARM.cpp index c6c51ed..29110e5 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -570,8 +570,6 @@ void ARMv5::Execute() // actually execute u32 icode = (CurInstr >> 6) & 0x3FF; ARMInterpreter::THUMBInstrTable[icode](this); - - if (R[15]==0x0219A6B0) printf("CAM THREAD MSG: %02X %08X -> %08X\n", R[1], R[0], 0x0219A6B6+R[0]); } else { diff --git a/src/DMA.cpp b/src/DMA.cpp index 8ad3918..cd3465f 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -427,4 +427,4 @@ void DMA::Run() } template void DMA::Run<0>(); -template void DMA::Run<1>(); \ No newline at end of file +template void DMA::Run<1>(); diff --git a/src/NDS.cpp b/src/NDS.cpp index b313db0..b8b83f7 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -1811,15 +1811,6 @@ void StartSqrt() void debug(u32 param) { - if (param==1312) - { - u32 timer = 0x10000 - (Timers[3].Counter >> 16); - timer *= 16; - timer += (0x10000 - (Timers[2].Counter >> 16)); - printf("TIMER=%d (%04X/%04X)\n", timer, (Timers[2].Counter >> 16), (Timers[3].Counter >> 16)); - return; - } - printf("ARM9 PC=%08X LR=%08X %08X\n", ARM9->R[15], ARM9->R[14], ARM9->R_IRQ[1]); printf("ARM7 PC=%08X LR=%08X %08X\n", ARM7->R[15], ARM7->R[14], ARM7->R_IRQ[1]); @@ -1845,14 +1836,14 @@ void debug(u32 param) fclose(shit);*/ FILE* - shit = fopen("debug/cam9.bin", "wb"); + shit = fopen("debug/power9.bin", "wb"); for (u32 i = 0x02000000; i < 0x04000000; i+=4) { u32 val = DSi::ARM9Read32(i); fwrite(&val, 4, 1, shit); } fclose(shit); - shit = fopen("debug/cam7.bin", "wb"); + shit = fopen("debug/power7.bin", "wb"); for (u32 i = 0x02000000; i < 0x04000000; i+=4) { u32 val = DSi::ARM7Read32(i); @@ -3258,7 +3249,7 @@ void ARM9IOWrite16(u32 addr, u16 val) return; case 0x04000188: - ARM9IOWrite32(addr, val | (val << 16)); + ARM9IOWrite32(addr, val | (val << 16)); return; case 0x040001A0: diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 6e27d61..0228446 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -371,7 +371,7 @@ void EmuThread::run() if (Input::HotkeyPressed(HK_Pause)) emit windowEmuPause(); if (Input::HotkeyPressed(HK_Reset)) emit windowEmuReset(); - + if (Input::HotkeyPressed(HK_FullscreenToggle)) emit windowFullscreenToggle(); if (GBACart::CartInserted && GBACart::HasSolarSensor) @@ -507,7 +507,7 @@ void EmuThread::run() frameLimitError = -practicalFramelimit; if (frameLimitError > practicalFramelimit) frameLimitError = practicalFramelimit; - + if (round(frameLimitError * 1000.0) > 0.0) { SDL_Delay(round(frameLimitError * 1000.0)); @@ -1324,6 +1324,7 @@ void MainWindow::keyPressEvent(QKeyEvent* event) { if (event->isAutoRepeat()) return; + // TODO!! REMOVE ME IN RELEASE BUILDS!! if (event->key() == Qt::Key_F11) NDS::debug(0); Input::KeyPress(event); @@ -1926,9 +1927,9 @@ void MainWindow::onTitleUpdate(QString title) void MainWindow::onFullscreenToggled() { - if (!mainWindow->isFullScreen()) + if (!mainWindow->isFullScreen()) { - mainWindow->showFullScreen(); + mainWindow->showFullScreen(); mainWindow->menuBar()->hide(); } else -- cgit v1.2.3