From c03d83b7befa8586422faebd55f7844db4e7a715 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Wed, 28 Oct 2020 19:45:50 +0100 Subject: remove qt_sdl dependency from frontend util --- src/frontend/SharedConfig.h | 13 +++++++++++++ src/frontend/Util_ROM.cpp | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 src/frontend/SharedConfig.h (limited to 'src') diff --git a/src/frontend/SharedConfig.h b/src/frontend/SharedConfig.h new file mode 100644 index 0000000..b4b18c5 --- /dev/null +++ b/src/frontend/SharedConfig.h @@ -0,0 +1,13 @@ +#ifndef SHAREDCONFIG_H +#define SHAREDCONFIG_H + +namespace Config +{ + +extern int ConsoleType; +extern int DirectBoot; +extern int SavestateRelocSRAM; + +} + +#endif \ No newline at end of file diff --git a/src/frontend/Util_ROM.cpp b/src/frontend/Util_ROM.cpp index f61c3e3..9f22f5f 100644 --- a/src/frontend/Util_ROM.cpp +++ b/src/frontend/Util_ROM.cpp @@ -21,7 +21,7 @@ #include "FrontendUtil.h" #include "Config.h" -#include "qt_sdl/PlatformConfig.h" // FIXME!!! +#include "SharedConfig.h" #include "Platform.h" #include "NDS.h" -- cgit v1.2.3 From 05e274a1f69b976ae5826e0660f9510c2b3878af Mon Sep 17 00:00:00 2001 From: Hypnotron Date: Thu, 29 Oct 2020 16:09:25 -0400 Subject: Added 8/16-bit IPCFIFOSEND writes --- src/NDS.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src') diff --git a/src/NDS.cpp b/src/NDS.cpp index d42e735..aef4d3c 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -3116,6 +3116,10 @@ void ARM9IOWrite8(u32 addr, u8 val) NDSCart::WriteSPIData(val); return; + case 0x04000188: + ARM9IOWrite32(addr, val | (val << 8) | (val << 16) | (val << 24)); + return; + case 0x040001A8: NDSCart::ROMCommand[0] = val; return; case 0x040001A9: NDSCart::ROMCommand[1] = val; return; case 0x040001AA: NDSCart::ROMCommand[2] = val; return; @@ -3232,6 +3236,10 @@ void ARM9IOWrite16(u32 addr, u16 val) IPCFIFOCnt9 = val & 0x8404; return; + case 0x04000188: + ARM9IOWrite32(addr, val | (val << 16)); + return; + case 0x040001A0: if (!(ExMemCnt[0] & (1<<11))) NDSCart::WriteSPICnt(val); return; @@ -3719,6 +3727,10 @@ void ARM7IOWrite8(u32 addr, u8 val) case 0x04000138: RTC::Write(val, true); return; + case 0x04000188: + ARM7IOWrite32(addr, val | (val << 8) | (val << 16) | (val << 24)); + return; + case 0x040001A0: if (ExMemCnt[0] & (1<<11)) { @@ -3827,6 +3839,10 @@ void ARM7IOWrite16(u32 addr, u16 val) IPCFIFOCnt7 = val & 0x8404; return; + case 0x04000188: + ARM7IOWrite32(addr, val | (val << 16)); + return; + case 0x040001A0: if (ExMemCnt[0] & (1<<11)) NDSCart::WriteSPICnt(val); -- cgit v1.2.3 From 9ac60a840a1447d583b32049dab4ab327dd3b65f Mon Sep 17 00:00:00 2001 From: RSDuck Date: Sat, 31 Oct 2020 13:43:33 +0100 Subject: SPU: work with scalars instead of arrays --- src/SPU.cpp | 195 +++++++++++++++++++++++------------------------------------- src/SPU.h | 24 ++++---- 2 files changed, 84 insertions(+), 135 deletions(-) (limited to 'src') diff --git a/src/SPU.cpp b/src/SPU.cpp index 5b74bda..bb15244 100644 --- a/src/SPU.cpp +++ b/src/SPU.cpp @@ -61,8 +61,6 @@ const s16 PSGTable[8][8] = {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF} }; -const u32 kSamplesPerRun = 1; - const u32 OutputBufferSize = 2*1024; s16 OutputBuffer[2 * OutputBufferSize]; volatile u32 OutputReadOffset; @@ -111,7 +109,7 @@ void Reset() Capture[0]->Reset(); Capture[1]->Reset(); - NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*kSamplesPerRun, Mix, kSamplesPerRun); + NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0); } void Stop() @@ -416,11 +414,11 @@ void Channel::NextSample_Noise() } template -void Channel::Run(s32* buf, u32 samples) +s32 Channel::Run() { - if (!(Cnt & (1<<31))) return; + if (!(Cnt & (1<<31))) return 0; - if ((type < 3) && ((Length+LoopPos) < 16)) return; + if ((type < 3) && ((Length+LoopPos) < 16)) return 0; if (KeyOn) { @@ -428,45 +426,32 @@ void Channel::Run(s32* buf, u32 samples) KeyOn = false; } - for (u32 s = 0; s < samples; s++) + Timer += 512; // 1 sample = 512 cycles at 16MHz + + while (Timer >> 16) { - Timer += 512; // 1 sample = 512 cycles at 16MHz + Timer = TimerReload + (Timer - 0x10000); - while (Timer >> 16) + switch (type) { - Timer = TimerReload + (Timer - 0x10000); - - switch (type) - { - case 0: NextSample_PCM8(); break; - case 1: NextSample_PCM16(); break; - case 2: NextSample_ADPCM(); break; - case 3: NextSample_PSG(); break; - case 4: NextSample_Noise(); break; - } + case 0: NextSample_PCM8(); break; + case 1: NextSample_PCM16(); break; + case 2: NextSample_ADPCM(); break; + case 3: NextSample_PSG(); break; + case 4: NextSample_Noise(); break; } - - s32 val = (s32)CurSample; - val <<= VolumeShift; - val *= Volume; - buf[s] = val; - - if (!(Cnt & (1<<31))) break; } + + s32 val = (s32)CurSample; + val <<= VolumeShift; + val *= Volume; + return val; } -void Channel::PanOutput(s32* inbuf, u32 samples, s32* leftbuf, s32* rightbuf) +void Channel::PanOutput(s32 in, s32& left, s32& right) { - for (u32 s = 0; s < samples; s++) - { - s32 val = (s32)inbuf[s]; - - s32 l = ((s64)val * (128-Pan)) >> 10; - s32 r = ((s64)val * Pan) >> 10; - - leftbuf[s] += l; - rightbuf[s] += r; - } + left += ((s64)in * (128-Pan)) >> 10; + right += ((s64)in * Pan) >> 10; } @@ -602,39 +587,31 @@ void CaptureUnit::Run(s32 sample) } -void Mix(u32 samples) +void Mix(u32 dummy) { - s32 channelbuf[32]; - s32 leftbuf[32], rightbuf[32]; - s32 ch0buf[32], ch1buf[32], ch2buf[32], ch3buf[32]; - s32 leftoutput[32], rightoutput[32]; - - for (u32 s = 0; s < samples; s++) - { - leftbuf[s] = 0; rightbuf[s] = 0; - leftoutput[s] = 0; rightoutput[s] = 0; - } + s32 left = 0, right = 0; + s32 leftoutput = 0, rightoutput = 0; if (Cnt & (1<<15)) { - Channels[0]->DoRun(ch0buf, samples); - Channels[1]->DoRun(ch1buf, samples); - Channels[2]->DoRun(ch2buf, samples); - Channels[3]->DoRun(ch3buf, samples); + s32 ch0 = Channels[0]->DoRun(); + s32 ch1 = Channels[1]->DoRun(); + s32 ch2 = Channels[2]->DoRun(); + s32 ch3 = Channels[3]->DoRun(); // TODO: addition from capture registers - Channels[0]->PanOutput(ch0buf, samples, leftbuf, rightbuf); - Channels[2]->PanOutput(ch2buf, samples, leftbuf, rightbuf); + Channels[0]->PanOutput(ch0, left, right); + Channels[2]->PanOutput(ch2, left, right); - if (!(Cnt & (1<<12))) Channels[1]->PanOutput(ch1buf, samples, leftbuf, rightbuf); - if (!(Cnt & (1<<13))) Channels[3]->PanOutput(ch3buf, samples, leftbuf, rightbuf); + if (!(Cnt & (1<<12))) Channels[1]->PanOutput(ch1, left, right); + if (!(Cnt & (1<<13))) Channels[3]->PanOutput(ch3, left, right); for (int i = 4; i < 16; i++) { Channel* chan = Channels[i]; - chan->DoRun(channelbuf, samples); - chan->PanOutput(channelbuf, samples, leftbuf, rightbuf); + s32 channel = chan->DoRun(); + chan->PanOutput(channel, left, right); } // sound capture @@ -642,32 +619,24 @@ void Mix(u32 samples) if (Capture[0]->Cnt & (1<<7)) { - for (u32 s = 0; s < samples; s++) - { - s32 val = leftbuf[s]; + s32 val = left; - val >>= 8; - if (val < -0x8000) val = -0x8000; - else if (val > 0x7FFF) val = 0x7FFF; + val >>= 8; + if (val < -0x8000) val = -0x8000; + else if (val > 0x7FFF) val = 0x7FFF; - Capture[0]->Run(val); - if (!(Capture[0]->Cnt & (1<<7))) break; - } + Capture[0]->Run(val); } if (Capture[1]->Cnt & (1<<7)) { - for (u32 s = 0; s < samples; s++) - { - s32 val = rightbuf[s]; + s32 val = right; - val >>= 8; - if (val < -0x8000) val = -0x8000; - else if (val > 0x7FFF) val = 0x7FFF; + val >>= 8; + if (val < -0x8000) val = -0x8000; + else if (val > 0x7FFF) val = 0x7FFF; - Capture[1]->Run(val); - if (!(Capture[1]->Cnt & (1<<7))) break; - } + Capture[1]->Run(val); } // final output @@ -675,31 +644,25 @@ void Mix(u32 samples) switch (Cnt & 0x0300) { case 0x0000: // left mixer - { - for (u32 s = 0; s < samples; s++) - leftoutput[s] = leftbuf[s]; - } + leftoutput = left; break; case 0x0100: // channel 1 { s32 pan = 128 - Channels[1]->Pan; - for (u32 s = 0; s < samples; s++) - leftoutput[s] = ((s64)ch1buf[s] * pan) >> 10; + leftoutput = ((s64)ch1 * pan) >> 10; } break; case 0x0200: // channel 3 { s32 pan = 128 - Channels[3]->Pan; - for (u32 s = 0; s < samples; s++) - leftoutput[s] = ((s64)ch3buf[s] * pan) >> 10; + leftoutput = ((s64)ch3 * pan) >> 10; } break; case 0x0300: // channel 1+3 { s32 pan1 = 128 - Channels[1]->Pan; s32 pan3 = 128 - Channels[3]->Pan; - for (u32 s = 0; s < samples; s++) - leftoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10); + leftoutput = (((s64)ch1 * pan1) >> 10) + (((s64)ch3 * pan3) >> 10); } break; } @@ -707,65 +670,53 @@ void Mix(u32 samples) switch (Cnt & 0x0C00) { case 0x0000: // right mixer - { - for (u32 s = 0; s < samples; s++) - rightoutput[s] = rightbuf[s]; - } + rightoutput = right; break; case 0x0400: // channel 1 { s32 pan = Channels[1]->Pan; - for (u32 s = 0; s < samples; s++) - rightoutput[s] = ((s64)ch1buf[s] * pan) >> 10; + rightoutput = ((s64)ch1 * pan) >> 10; } break; case 0x0800: // channel 3 { s32 pan = Channels[3]->Pan; - for (u32 s = 0; s < samples; s++) - rightoutput[s] = ((s64)ch3buf[s] * pan) >> 10; + rightoutput = ((s64)ch3 * pan) >> 10; } break; case 0x0C00: // channel 1+3 { s32 pan1 = Channels[1]->Pan; s32 pan3 = Channels[3]->Pan; - for (u32 s = 0; s < samples; s++) - rightoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10); + rightoutput = (((s64)ch1 * pan1) >> 10) + (((s64)ch3 * pan3) >> 10); } break; } } - for (u32 s = 0; s < samples; s++) + leftoutput = ((s64)leftoutput * MasterVolume) >> 7; + rightoutput = ((s64)rightoutput * MasterVolume) >> 7; + + leftoutput >>= 8; + if (leftoutput < -0x8000) leftoutput = -0x8000; + else if (leftoutput > 0x7FFF) leftoutput = 0x7FFF; + rightoutput >>= 8; + if (rightoutput < -0x8000) rightoutput = -0x8000; + else if (rightoutput > 0x7FFF) rightoutput = 0x7FFF; + + OutputBuffer[OutputWriteOffset ] = leftoutput >> 1; + OutputBuffer[OutputWriteOffset + 1] = rightoutput >> 1; + OutputWriteOffset += 2; + OutputWriteOffset &= ((2*OutputBufferSize)-1); + if (OutputWriteOffset == OutputReadOffset) { - s32 l = leftoutput[s]; - s32 r = rightoutput[s]; - - l = ((s64)l * MasterVolume) >> 7; - r = ((s64)r * MasterVolume) >> 7; - - l >>= 8; - if (l < -0x8000) l = -0x8000; - else if (l > 0x7FFF) l = 0x7FFF; - r >>= 8; - if (r < -0x8000) r = -0x8000; - else if (r > 0x7FFF) r = 0x7FFF; - - OutputBuffer[OutputWriteOffset ] = l >> 1; - OutputBuffer[OutputWriteOffset + 1] = r >> 1; - OutputWriteOffset += 2; - OutputWriteOffset &= ((2*OutputBufferSize)-1); - if (OutputWriteOffset == OutputReadOffset) - { - //printf("!! SOUND FIFO OVERFLOW %d\n", OutputWriteOffset>>1); - // advance the read position too, to avoid losing the entire FIFO - OutputReadOffset += 2; - OutputReadOffset &= ((2*OutputBufferSize)-1); - } + //printf("!! SOUND FIFO OVERFLOW %d\n", OutputWriteOffset>>1); + // advance the read position too, to avoid losing the entire FIFO + OutputReadOffset += 2; + OutputReadOffset &= ((2*OutputBufferSize)-1); } - NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*kSamplesPerRun, Mix, kSamplesPerRun); + NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0); } diff --git a/src/SPU.h b/src/SPU.h index 964841d..397c733 100644 --- a/src/SPU.h +++ b/src/SPU.h @@ -33,7 +33,7 @@ void DoSavestate(Savestate* file); void SetBias(u16 bias); -void Mix(u32 samples); +void Mix(u32 dummy); void TrimOutput(); void DrainOutput(); @@ -123,26 +123,24 @@ public: void NextSample_PSG(); void NextSample_Noise(); - template void Run(s32* buf, u32 samples); + template s32 Run(); - void DoRun(s32* buf, u32 samples) + s32 DoRun() { - for (u32 s = 0; s < samples; s++) - buf[s] = 0; - switch ((Cnt >> 29) & 0x3) { - case 0: Run<0>(buf, samples); break; - case 1: Run<1>(buf, samples); break; - case 2: Run<2>(buf, samples); break; + case 0: return Run<0>(); break; + case 1: return Run<1>(); break; + case 2: return Run<2>(); break; case 3: - if (Num >= 14) Run<4>(buf, samples); - else if (Num >= 8) Run<3>(buf, samples); - break; + if (Num >= 14) return Run<4>(); + else if (Num >= 8) return Run<3>(); + default: + return 0; } } - void PanOutput(s32* inbuf, u32 samples, s32* leftbuf, s32* rightbuf); + void PanOutput(s32 in, s32& left, s32& right); private: u32 (*BusRead32)(u32 addr); -- cgit v1.2.3 From 45ea1fa9905d20d7c3a839e28f571aff0b3e76f5 Mon Sep 17 00:00:00 2001 From: Filippo Scognamiglio Date: Sat, 31 Oct 2020 17:40:05 +0100 Subject: Fix compilation issues on pedantic cpp compilers. (#783) * Fix compilation issues on pedantic cpp compilers. * Avoid using fullblown static function. --- src/ARMJIT.cpp | 3 ++- src/ARMJIT_A64/ARMJIT_Compiler.cpp | 9 +++++++-- src/ARMJIT_A64/ARMJIT_Compiler.h | 1 + src/ARMJIT_x64/ARMJIT_Compiler.cpp | 10 +++++++--- 4 files changed, 17 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index c9d2b62..82f1d79 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -594,7 +594,8 @@ void CompileBlock(ARM* cpu) u32 r15 = cpu->R[15]; u32 addressRanges[Config::JIT_MaxBlockSize]; - u32 addressMasks[Config::JIT_MaxBlockSize] = {0}; + u32 addressMasks[Config::JIT_MaxBlockSize]; + memset(addressMasks, 0, Config::JIT_MaxBlockSize * sizeof(u32)); u32 numAddressRanges = 0; u32 numLiterals = 0; diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp index 80c7f04..93563b9 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp @@ -68,6 +68,11 @@ void Compiler::A_Comp_MRS() MOV(rd, RCPSR); } +void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode) +{ + arm->UpdateMode(oldmode, newmode); +} + void Compiler::A_Comp_MSR() { Comp_AddCycles_C(); @@ -139,7 +144,7 @@ void Compiler::A_Comp_MSR() PushRegs(true); - QuickCallFunction(X3, (void*)&ARM::UpdateMode); + QuickCallFunction(X3, (void*)&UpdateModeTrampoline); PopRegs(true); } @@ -915,4 +920,4 @@ void Compiler::Comp_AddCycles_CD() ConstantCycles += cycles; } -} \ No newline at end of file +} diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h index af7497a..ef40ea4 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.h +++ b/src/ARMJIT_A64/ARMJIT_Compiler.h @@ -187,6 +187,7 @@ public: void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs); bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr); + enum { memop_Writeback = 1 << 0, diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index c6419c9..9785ffc 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -101,6 +101,11 @@ void Compiler::A_Comp_MRS() MOV(32, rd, R(RCPSR)); } +void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode) +{ + arm->UpdateMode(oldmode, newmode); +} + void Compiler::A_Comp_MSR() { Comp_AddCycles_C(); @@ -185,7 +190,7 @@ void Compiler::A_Comp_MSR() MOV(32, R(ABI_PARAM3), R(RCPSR)); MOV(32, R(ABI_PARAM2), R(RSCRATCH3)); MOV(64, R(ABI_PARAM1), R(RCPU)); - CALL((void*)&ARM::UpdateMode); + CALL((void*)&UpdateModeTrampoline); PopRegs(true); } @@ -896,5 +901,4 @@ void Compiler::Comp_AddCycles_CD() else ConstantCycles += cycles; } - -} \ No newline at end of file +} -- cgit v1.2.3 From fbca47381b9e37ee19a2bdfdcd4e393d0ea69a7d Mon Sep 17 00:00:00 2001 From: Filippo Scognamiglio Date: Sat, 31 Oct 2020 17:53:01 +0100 Subject: Fix a couple of wrong cpp function pointers. (#785) --- src/ARMJIT_x64/ARMJIT_Branch.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index 819fe3c..70ec781 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -130,6 +130,16 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); } +void ARMv4JumpToTrampoline(ARMv4* arm, u32 addr, bool restorecpsr) +{ + arm->JumpTo(addr, restorecpsr); +} + +void ARMv5JumpToTrampoline(ARMv5* arm, u32 addr, bool restorecpsr) +{ + arm->JumpTo(addr, restorecpsr); +} + void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) { IrregularCycles = true; @@ -146,9 +156,9 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) else MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste if (Num == 0) - CALL((void*)&ARMv5::JumpTo); + CALL((void*)&ARMv5JumpToTrampoline); else - CALL((void*)&ARMv4::JumpTo); + CALL((void*)&ARMv4JumpToTrampoline); PopRegs(restoreCPSR); @@ -269,4 +279,4 @@ void Compiler::T_Comp_BL_Merged() Comp_JumpTo(target); } -} \ No newline at end of file +} -- cgit v1.2.3 From ad7791f72661eece13e81dfd1323793293054c5e Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 2 Nov 2020 18:48:32 +0100 Subject: better framelimiter for reference: https://github.com/citra-emu/citra/blob/master/src/core/perf_stats.cpp#L129 --- src/frontend/qt_sdl/main.cpp | 60 +++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 3a735fb..cfdf03d 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -355,10 +355,10 @@ void EmuThread::run() Input::Init(); u32 nframes = 0; - u32 starttick = SDL_GetTicks(); - u32 lasttick = starttick; - u32 lastmeasuretick = lasttick; - u32 fpslimitcount = 0; + double perfCountsSec = 1.0 / SDL_GetPerformanceFrequency(); + double lastTime = SDL_GetPerformanceCounter() * perfCountsSec; + double frameLimitError = 0.0; + double lastMeasureTime = lastTime; char melontitle[100]; @@ -492,49 +492,43 @@ void EmuThread::run() SDL_UnlockMutex(audioSyncLock); } - float framerate = (1000.0f * nlines) / (60.0f * 263.0f); + double frametimeStep = nlines / (60.0 * 263.0); { - u32 curtick = SDL_GetTicks(); - u32 delay = curtick - lasttick; + double curtime = SDL_GetPerformanceCounter() * perfCountsSec; bool limitfps = Config::LimitFPS && !fastforward; if (limitfps) { - float wantedtickF = starttick + (framerate * (fpslimitcount+1)); - u32 wantedtick = (u32)ceil(wantedtickF); - if (curtick < wantedtick) SDL_Delay(wantedtick - curtick); - - lasttick = SDL_GetTicks(); - fpslimitcount++; - if ((abs(wantedtickF - (float)wantedtick) < 0.001312) || (fpslimitcount > 60)) + frameLimitError += frametimeStep - (curtime - lastTime); + if (frameLimitError < -frametimeStep) + frameLimitError = -frametimeStep; + if (frameLimitError > frametimeStep) + frameLimitError = frametimeStep; + + if (round(frameLimitError * 1000.0) > 0.0) { - fpslimitcount = 0; - starttick = lasttick; + SDL_Delay(round(frameLimitError * 1000.0)); + double timeBeforeSleep = curtime; + curtime = SDL_GetPerformanceCounter() * perfCountsSec; + frameLimitError -= curtime - timeBeforeSleep; } } - else - { - if (delay < 1) SDL_Delay(1); - lasttick = SDL_GetTicks(); - } + + lastTime = curtime; } nframes++; if (nframes >= 30) { - u32 tick = SDL_GetTicks(); - u32 diff = tick - lastmeasuretick; - lastmeasuretick = tick; + double time = SDL_GetPerformanceCounter() * perfCountsSec; + double dt = time - lastMeasureTime; + lastMeasureTime = time; - u32 fps; - if (diff < 1) fps = 77777; - else fps = (nframes * 1000) / diff; + u32 fps = round(nframes / dt); nframes = 0; - float fpstarget; - if (framerate < 1) fpstarget = 999; - else fpstarget = 1000.0f/framerate; + float fpstarget = 1.0/frametimeStep; sprintf(melontitle, "[%d/%.0f] melonDS " MELONDS_VERSION, fps, fpstarget); changeWindowTitle(melontitle); @@ -544,10 +538,8 @@ void EmuThread::run() { // paused nframes = 0; - lasttick = SDL_GetTicks(); - starttick = lasttick; - lastmeasuretick = lasttick; - fpslimitcount = 0; + lastTime = SDL_GetPerformanceCounter() * perfCountsSec; + lastMeasureTime = lastTime; emit windowUpdate(); -- cgit v1.2.3 From 78839f862e6bd1a8d0a3d99737bd377c5dccf11f Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 9 Nov 2020 20:43:31 +0100 Subject: JIT fixes - fix fastmem problems on linux - small fix memory leak - SlowWrite functions always take in a 32-bit variable so that the C compiler knows that the values aren't necessary zero extended - a few other stylistic things - handle SIGBUS as well (for macos) --- src/ARMJIT.cpp | 14 ++-- src/ARMJIT_A64/ARMJIT_Compiler.h | 4 +- src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 17 ++-- src/ARMJIT_Internal.h | 4 +- src/ARMJIT_Memory.cpp | 153 ++++++++++++++++++++++++------------ src/ARMJIT_x64/ARMJIT_Compiler.cpp | 12 +-- src/ARMJIT_x64/ARMJIT_Compiler.h | 4 +- src/ARMJIT_x64/ARMJIT_LoadStore.cpp | 23 +++--- 8 files changed, 140 insertions(+), 91 deletions(-) (limited to 'src') diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 82f1d79..1921f13 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -176,7 +176,7 @@ T SlowRead9(u32 addr, ARMv5* cpu) } template -void SlowWrite9(u32 addr, ARMv5* cpu, T val) +void SlowWrite9(u32 addr, ARMv5* cpu, u32 val) { addr &= ~(sizeof(T) - 1); @@ -224,7 +224,7 @@ T SlowRead7(u32 addr) } template -void SlowWrite7(u32 addr, T val) +void SlowWrite7(u32 addr, u32 val) { addr &= ~(sizeof(T) - 1); @@ -266,16 +266,16 @@ void SlowBlockTransfer7(u32 addr, u64* data, u32 num) #define INSTANTIATE_SLOWMEM(consoleType) \ template void SlowWrite9(u32, ARMv5*, u32); \ - template void SlowWrite9(u32, ARMv5*, u16); \ - template void SlowWrite9(u32, ARMv5*, u8); \ + template void SlowWrite9(u32, ARMv5*, u32); \ + template void SlowWrite9(u32, ARMv5*, u32); \ \ template u32 SlowRead9(u32, ARMv5*); \ template u16 SlowRead9(u32, ARMv5*); \ template u8 SlowRead9(u32, ARMv5*); \ \ template void SlowWrite7(u32, u32); \ - template void SlowWrite7(u32, u16); \ - template void SlowWrite7(u32, u8); \ + template void SlowWrite7(u32, u32); \ + template void SlowWrite7(u32, u32); \ \ template u32 SlowRead7(u32); \ template u16 SlowRead7(u32); \ @@ -298,6 +298,7 @@ void Init() void DeInit() { + ResetBlockCache(); ARMJIT_Memory::DeInit(); delete JITCompiler; @@ -1117,6 +1118,7 @@ void ResetBlockCache() range->Blocks.Clear(); range->Code = 0; } + delete block; } JitBlocks9.clear(); JitBlocks7.clear(); diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h index ef40ea4..a79e9da 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.h +++ b/src/ARMJIT_A64/ARMJIT_Compiler.h @@ -214,8 +214,8 @@ public: return (u8*)entry - GetRXBase(); } - bool IsJITFault(u64 pc); - s64 RewriteMemAccess(u64 pc); + bool IsJITFault(u8* pc); + u8* RewriteMemAccess(u8* pc); void SwapCodeRegion() { diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index 86e257a..2c14dc6 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -9,37 +9,34 @@ using namespace Arm64Gen; namespace ARMJIT { -bool Compiler::IsJITFault(u64 pc) +bool Compiler::IsJITFault(u8* pc) { - return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize); + return (u64)pc >= (u64)GetRXBase() && (u64)pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize); } -s64 Compiler::RewriteMemAccess(u64 pc) +u8* Compiler::RewriteMemAccess(u8* pc) { - ptrdiff_t pcOffset = pc - (u64)GetRXBase(); + ptrdiff_t pcOffset = pc - GetRXBase(); auto it = LoadStorePatches.find(pcOffset); if (it != LoadStorePatches.end()) { LoadStorePatch patch = it->second; + LoadStorePatches.erase(it); ptrdiff_t curCodeOffset = GetCodeOffset(); SetCodePtrUnsafe(pcOffset + patch.PatchOffset); BL(patch.PatchFunc); - for (int i = 0; i < patch.PatchSize / 4 - 1; i++) HINT(HINT_NOP); - FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr()); SetCodePtrUnsafe(curCodeOffset); - LoadStorePatches.erase(it); - - return patch.PatchOffset; + return pc + (ptrdiff_t)patch.PatchOffset; } printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc)); abort(); @@ -192,7 +189,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) else { LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7); - if (size == 32) + if (size == 32 && !addrIsStatic) { UBFIZ(W0, W0, 3, 2); RORV(rdMapped, rdMapped, W0); diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h index 4244470..b1e35f5 100644 --- a/src/ARMJIT_Internal.h +++ b/src/ARMJIT_Internal.h @@ -216,9 +216,9 @@ template void LinkBlock(ARM* cpu, u32 codeOffset); template T SlowRead9(u32 addr, ARMv5* cpu); -template void SlowWrite9(u32 addr, ARMv5* cpu, T val); +template void SlowWrite9(u32 addr, ARMv5* cpu, u32 val); template T SlowRead7(u32 addr); -template void SlowWrite7(u32 addr, T val); +template void SlowWrite7(u32 addr, u32 val); template void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu); template void SlowBlockTransfer7(u32 addr, u64* data, u32 num); diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index d321d2f..355031e 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -40,7 +40,8 @@ We handle this by only mapping those regions which are actually used and by praying the games don't go wild. - Beware, this file is full of platform specific code. + Beware, this file is full of platform specific code and copied + from Dolphin, so enjoy the copied comments! */ @@ -49,10 +50,10 @@ namespace ARMJIT_Memory struct FaultDescription { u32 EmulatedFaultAddr; - u64 FaultPC; + u8* FaultPC; }; -bool FaultHandler(FaultDescription* faultDesc, s32& offset); +bool FaultHandler(FaultDescription& faultDesc); } #if defined(__SWITCH__) @@ -75,7 +76,7 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx) ARMJIT_Memory::FaultDescription desc; u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea; - desc.FaultPC = ctx->pc.x; + desc.FaultPC = (u8*)ctx->pc.x; u64 integerRegisters[33]; memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29); @@ -84,10 +85,9 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx) integerRegisters[31] = ctx->sp.x; integerRegisters[32] = ctx->pc.x; - s32 offset; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) + if (ARMJIT_Memory::FaultHandler(desc, offset)) { - integerRegisters[32] += offset; + integerRegisters[32] = (u64)desc.FaultPC; ARM_RestoreContext(integerRegisters); } @@ -117,12 +117,11 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) ARMJIT_Memory::FaultDescription desc; u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea; - desc.FaultPC = exceptionInfo->ContextRecord->Rip; + desc.FaultPC = (u8*)exceptionInfo->ContextRecord->Rip; - s32 offset = 0; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) + if (ARMJIT_Memory::FaultHandler(desc)) { - exceptionInfo->ContextRecord->Rip += offset; + exceptionInfo->ContextRecord->Rip = (u8*)desc.FaultPC; return EXCEPTION_CONTINUE_EXECUTION; } @@ -131,50 +130,66 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) #else -struct sigaction NewSa; -struct sigaction OldSa; +static struct sigaction OldSaSegv; +static struct sigaction OldSaBus; static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) { + if (sig != SIGSEGV && sig != SIGBUS) + { + // We are not interested in other signals - handle it as usual. + return; + } + if (info->si_code != SEGV_MAPERR && info->si_code != SEGV_ACCERR) + { + // Huh? Return. + return; + } + ucontext_t* context = (ucontext_t*)rawContext; - + ARMJIT_Memory::FaultDescription desc; u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); #ifdef __x86_64__ desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; - desc.FaultPC = context->uc_mcontext.gregs[REG_RIP]; + desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP]; #else desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea; - desc.FaultPC = context->uc_mcontext.pc; + desc.FaultPC = (u8*)context->uc_mcontext.pc; #endif - s32 offset = 0; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) + if (ARMJIT_Memory::FaultHandler(desc)) { #ifdef __x86_64__ - context->uc_mcontext.gregs[REG_RIP] += offset; + context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC; #else - context->uc_mcontext.pc += offset; + context->uc_mcontext.pc = (u64)desc.FaultPC; #endif return; } - if (OldSa.sa_flags & SA_SIGINFO) + struct sigaction* oldSa; + if (sig == SIGSEGV) + oldSa = &OldSaSegv; + else + oldSa = &OldSaBus; + + if (oldSa->sa_flags & SA_SIGINFO) { - OldSa.sa_sigaction(sig, info, rawContext); + oldSa->sa_sigaction(sig, info, rawContext); return; } - if (OldSa.sa_handler == SIG_DFL) + if (oldSa->sa_handler == SIG_DFL) { signal(sig, SIG_DFL); return; } - if (OldSa.sa_handler == SIG_IGN) + if (oldSa->sa_handler == SIG_IGN) { // Ignore signal return; } - OldSa.sa_handler(sig); + oldSa->sa_handler(sig); } #endif @@ -231,7 +246,7 @@ enum { memstate_Unmapped, memstate_MappedRW, - // on switch this is unmapped as well + // on Switch this is unmapped as well memstate_MappedProtected, }; @@ -505,6 +520,21 @@ bool MapAtAddress(u32 addr) bool isExecutable = ARMJIT::CodeMemRegions[region]; #ifndef __SWITCH__ + if (num == 0) + { + // if a DTCM mapping is mapped before the mapping below it + // we unmap it, so it won't just be overriden + for (int i = 0; i < Mappings[memregion_DTCM].Length; i++) + { + Mapping& mapping = Mappings[memregion_DTCM][i]; + if (mirrorStart < mapping.Addr + mapping.Size && mirrorStart + mirrorSize >= mapping.Addr) + { + mapping.Unmap(memregion_DTCM); + } + } + Mappings[memregion_DTCM].Clear(); + } + bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); assert(succeded); #endif @@ -562,21 +592,20 @@ bool MapAtAddress(u32 addr) return true; } -bool FaultHandler(FaultDescription* faultDesc, s32& offset) +bool FaultHandler(FaultDescription& faultDesc) { - if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC)) + if (ARMJIT::JITCompiler->IsJITFault(faultDesc.FaultPC)) { bool rewriteToSlowPath = true; - u32 addr = faultDesc->EmulatedFaultAddr; + u8* memStatus = NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7; - if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped) - rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr); + if (memStatus[faultDesc.EmulatedFaultAddr >> 12] == memstate_Unmapped) + rewriteToSlowPath = !MapAtAddress(faultDesc.EmulatedFaultAddr); if (rewriteToSlowPath) - { - offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC); - } + faultDesc.FaultPC = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc.FaultPC); + return true; } return false; @@ -624,22 +653,28 @@ void Init() u8* basePtr = MemoryBase; #else - FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - - MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + // this used to be allocated with three different mmaps + // The idea was to give the OS more freedom where to position the buffers, + // but something was bad about this so instead we take this vmem eating monster + // which seems to work better. + MemoryBase = (u8*)mmap(NULL, AddrSpaceSize*4, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + munmap(MemoryBase, AddrSpaceSize*4); + FastMem9Start = MemoryBase; + FastMem7Start = MemoryBase + AddrSpaceSize; + MemoryBase = MemoryBase + AddrSpaceSize*2; MemoryFile = memfd_create("melondsfastmem", 0); ftruncate(MemoryFile, MemoryTotalSize); - NewSa.sa_flags = SA_SIGINFO; - sigemptyset(&NewSa.sa_mask); - NewSa.sa_sigaction = SigsegvHandler; - sigaction(SIGSEGV, &NewSa, &OldSa); - - munmap(MemoryBase, MemoryTotalSize); - munmap(FastMem9Start, AddrSpaceSize); - munmap(FastMem7Start, AddrSpaceSize); + struct sigaction sa; + sa.sa_handler = nullptr; + sa.sa_sigaction = &SigsegvHandler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(SIGSEGV, &sa, &OldSaSegv); +#ifdef __APPLE__ + sigaction(SIGBUS, &sa, &OldSaBus); +#endif mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0); @@ -657,8 +692,8 @@ void Init() void DeInit() { #if defined(__SWITCH__) - virtmemFree(FastMem9Start, 0x100000000); - virtmemFree(FastMem7Start, 0x100000000); + virtmemFree(FastMem9Start, AddrSpaceSize); + virtmemFree(FastMem7Start, AddrSpaceSize); svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize); virtmemFree(MemoryBaseCodeMem, MemoryTotalSize); @@ -668,6 +703,14 @@ void DeInit() CloseHandle(MemoryFile); RemoveVectoredExceptionHandler(ExceptionHandlerHandle); +#else + sigaction(SIGSEGV, &OldSaSegv, nullptr); +#ifdef __APPLE__ + sigaction(SIGBUS, &OldSaBus, nullptr); +#endif + + munmap(MemoryBase, MemoryTotalSize); + close(MemoryFile); #endif } @@ -702,7 +745,15 @@ bool IsFastmemCompatible(int region) || region == memregion_NewSharedWRAM_C) return false; #endif - return OffsetsPerRegion[region] != UINT32_MAX; + if (region == memregion_DTCM + || region == memregion_MainRAM + || region == memregion_NewSharedWRAM_A + || region == memregion_NewSharedWRAM_B + || region == memregion_NewSharedWRAM_C + || region == memregion_SharedWRAM) + return false; + //return OffsetsPerRegion[region] != UINT32_MAX; + return false; } bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize) @@ -997,9 +1048,11 @@ int ClassifyAddress7(u32 addr) case 0x06000000: case 0x06800000: return memregion_VWRAM; + + default: + return memregion_Other; } } - return memregion_Other; } void WifiWrite32(u32 addr, u32 val) diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 9785ffc..076f48c 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -345,7 +345,7 @@ Compiler::Compiler() ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8); if (consoleType == 0) { - switch ((8 << size) | num) + switch ((8 << size) | num) { case 32: ABI_CallFunction(SlowWrite9); break; case 33: ABI_CallFunction(SlowWrite7); break; @@ -357,7 +357,7 @@ Compiler::Compiler() } else { - switch ((8 << size) | num) + switch ((8 << size) | num) { case 32: ABI_CallFunction(SlowWrite9); break; case 33: ABI_CallFunction(SlowWrite7); break; @@ -380,7 +380,7 @@ Compiler::Compiler() ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8); if (consoleType == 0) { - switch ((8 << size) | num) + switch ((8 << size) | num) { case 32: ABI_CallFunction(SlowRead9); break; case 33: ABI_CallFunction(SlowRead7); break; @@ -392,7 +392,7 @@ Compiler::Compiler() } else { - switch ((8 << size) | num) + switch ((8 << size) | num) { case 32: ABI_CallFunction(SlowRead9); break; case 33: ABI_CallFunction(SlowRead7); break; @@ -617,9 +617,9 @@ void Compiler::Reset() LoadStorePatches.clear(); } -bool Compiler::IsJITFault(u64 addr) +bool Compiler::IsJITFault(u8* addr) { - return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory); + return (u64)addr >= (u64)ResetStart && (u64)addr < (u64)ResetStart + CodeMemSize; } void Compiler::Comp_SpecialBranchBehaviour(bool taken) diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index 3e900c3..57aab7b 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -208,9 +208,9 @@ public: SetCodePtr(FarCode); } - bool IsJITFault(u64 addr); + bool IsJITFault(u8* addr); - s32 RewriteMemAccess(u64 pc); + u8* RewriteMemAccess(u8* pc); u8* FarCode; u8* NearCode; diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index 8b4e8fe..d80b25b 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -15,28 +15,24 @@ int squeezePointer(T* ptr) return truncated; } -s32 Compiler::RewriteMemAccess(u64 pc) +u8* Compiler::RewriteMemAccess(u8* pc) { - auto it = LoadStorePatches.find((u8*)pc); + auto it = LoadStorePatches.find(pc); if (it != LoadStorePatches.end()) { LoadStorePatch patch = it->second; LoadStorePatches.erase(it); - u8* curCodePtr = GetWritableCodePtr(); - u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset; - SetCodePtr(rewritePtr); + //printf("rewriting memory access %p %d %d\n", (u8*)pc-ResetStart, patch.Offset, patch.Size); - CALL(patch.PatchFunc); - u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr); + XEmitter emitter(pc + (ptrdiff_t)patch.Offset); + emitter.CALL(patch.PatchFunc); + ptrdiff_t remainingSize = (ptrdiff_t)patch.Size - 5; + assert(remainingSize >= 0); if (remainingSize > 0) - NOP(remainingSize); + emitter.NOP(remainingSize); - //printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size); - - SetCodePtr(curCodePtr); - - return patch.Offset; + return pc + (ptrdiff_t)patch.Offset; } printf("this is a JIT bug %llx\n", pc); @@ -192,6 +188,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag u8* memopStart = GetWritableCodePtr(); LoadStorePatch patch; + assert(rdMapped.GetSimpleReg() >= 0 && rdMapped.GetSimpleReg() < 16); patch.PatchFunc = flags & memop_Store ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()] : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()]; -- cgit v1.2.3 From 052079afeb8b7efe9c87c0fa5599fe626b461145 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 9 Nov 2020 20:56:31 +0100 Subject: fix Windows --- src/ARMJIT_Memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index 355031e..1352ea7 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -121,7 +121,7 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) if (ARMJIT_Memory::FaultHandler(desc)) { - exceptionInfo->ContextRecord->Rip = (u8*)desc.FaultPC; + exceptionInfo->ContextRecord->Rip = (u64)desc.FaultPC; return EXCEPTION_CONTINUE_EXECUTION; } -- cgit v1.2.3 From 2720df965025b75a77656db523606dadbcbb6067 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Tue, 3 Nov 2020 19:40:14 +0100 Subject: make platform objects typesafer and add mutex --- src/GPU3D_Soft.cpp | 8 +++---- src/Platform.h | 27 ++++++++++++++++-------- src/frontend/qt_sdl/Platform.cpp | 45 +++++++++++++++++++++++++++++++--------- 3 files changed, 57 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index e9d8e75..7ee9e5d 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -61,12 +61,12 @@ bool Enabled; // threading bool Threaded; -void* RenderThread; +Platform::Thread* RenderThread; bool RenderThreadRunning; bool RenderThreadRendering; -void* Sema_RenderStart; -void* Sema_RenderDone; -void* Sema_ScanlineCount; +Platform::Semaphore* Sema_RenderStart; +Platform::Semaphore* Sema_RenderDone; +Platform::Semaphore* Sema_ScanlineCount; void RenderThreadFunc(); diff --git a/src/Platform.h b/src/Platform.h index fea98dd..deb3785 100644 --- a/src/Platform.h +++ b/src/Platform.h @@ -67,15 +67,24 @@ inline bool LocalFileExists(const char* name) return true; } -void* Thread_Create(void (*func)()); -void Thread_Free(void* thread); -void Thread_Wait(void* thread); - -void* Semaphore_Create(); -void Semaphore_Free(void* sema); -void Semaphore_Reset(void* sema); -void Semaphore_Wait(void* sema); -void Semaphore_Post(void* sema); +struct Thread; +Thread* Thread_Create(void (*func)()); +void Thread_Free(Thread* thread); +void Thread_Wait(Thread* thread); + +struct Semaphore; +Semaphore* Semaphore_Create(); +void Semaphore_Free(Semaphore* sema); +void Semaphore_Reset(Semaphore* sema); +void Semaphore_Wait(Semaphore* sema); +void Semaphore_Post(Semaphore* sema); + +struct Mutex; +Mutex* Mutex_Create(); +void Mutex_Free(Mutex* mutex); +void Mutex_Lock(Mutex* mutex); +void Mutex_Unlock(Mutex* mutex); +bool Mutex_TryLock(Mutex* mutex); void* GL_GetProcAddress(const char* proc); diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp index a716feb..a51a985 100644 --- a/src/frontend/qt_sdl/Platform.cpp +++ b/src/frontend/qt_sdl/Platform.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "Platform.h" @@ -187,53 +188,77 @@ FILE* OpenLocalFile(const char* path, const char* mode) return OpenFile(fullpath.toUtf8(), mode, mode[0] != 'w'); } -void* Thread_Create(void (* func)()) +Thread* Thread_Create(void (* func)()) { QThread* t = QThread::create(func); t->start(); - return (void*) t; + return (Thread*) t; } -void Thread_Free(void* thread) +void Thread_Free(Thread* thread) { QThread* t = (QThread*) thread; t->terminate(); delete t; } -void Thread_Wait(void* thread) +void Thread_Wait(Thread* thread) { ((QThread*) thread)->wait(); } -void* Semaphore_Create() +Semaphore* Semaphore_Create() { - return new QSemaphore(); + return (Semaphore*)new QSemaphore(); } -void Semaphore_Free(void* sema) +void Semaphore_Free(Semaphore* sema) { delete (QSemaphore*) sema; } -void Semaphore_Reset(void* sema) +void Semaphore_Reset(Semaphore* sema) { QSemaphore* s = (QSemaphore*) sema; s->acquire(s->available()); } -void Semaphore_Wait(void* sema) +void Semaphore_Wait(Semaphore* sema) { ((QSemaphore*) sema)->acquire(); } -void Semaphore_Post(void* sema) +void Semaphore_Post(Semaphore* sema) { ((QSemaphore*) sema)->release(); } +Mutex* Mutex_Create() +{ + return (Mutex*)new QMutex(); +} + +void Mutex_Free(Mutex* mutex) +{ + delete (QMutex*) mutex; +} + +void Mutex_Lock(Mutex* mutex) +{ + ((QMutex*) mutex)->lock(); +} + +void Mutex_Unlock(Mutex* mutex) +{ + ((QMutex*) mutex)->unlock(); +} + +bool Mutex_TryLock(Mutex* mutex) +{ + return ((QMutex*) mutex)->try_lock(); +} void* GL_GetProcAddress(const char* proc) { -- cgit v1.2.3 From 62e3f41f20866388287329218810d2fd8cea3515 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Wed, 11 Nov 2020 13:38:05 +0100 Subject: delay savefile flush to the end of the frame --- src/NDS.cpp | 2 ++ src/NDSCart.cpp | 27 ++++++++++++++++++++------- src/NDSCart.h | 3 +++ 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/NDS.cpp b/src/NDS.cpp index aef4d3c..5181b34 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -1000,6 +1000,8 @@ u32 RunFrame() GPU3D::Timestamp-SysTimestamp); #endif + NDSCart::FlushSRAMFile(); + NumFrames++; return GPU::TotalScanlines; diff --git a/src/NDSCart.cpp b/src/NDSCart.cpp index 077bf48..2d8396a 100644 --- a/src/NDSCart.cpp +++ b/src/NDSCart.cpp @@ -37,6 +37,7 @@ u8* SRAM; u32 SRAMLength; char SRAMPath[1024]; +bool SRAMFileDirty; void (*WriteFunc)(u8 val, bool islast); @@ -445,14 +446,21 @@ void Write(u8 val, u32 hold) break; } - if (islast && (CurCmd == 0x02 || CurCmd == 0x0A) && (SRAMLength > 0)) + SRAMFileDirty |= islast && (CurCmd == 0x02 || CurCmd == 0x0A) && (SRAMLength > 0); +} + +void FlushSRAMFile() +{ + if (!SRAMFileDirty) + return; + + SRAMFileDirty = false; + + FILE* f = Platform::OpenFile(SRAMPath, "wb"); + if (f) { - FILE* f = Platform::OpenFile(SRAMPath, "wb"); - if (f) - { - fwrite(SRAM, SRAMLength, 1, f); - fclose(f); - } + fwrite(SRAM, SRAMLength, 1, f); + fclose(f); } } @@ -1034,6 +1042,11 @@ void RelocateSave(const char* path, bool write) NDSCart_SRAM::RelocateSave(path, write); } +void FlushSRAMFile() +{ + NDSCart_SRAM::FlushSRAMFile(); +} + int ImportSRAM(const u8* data, u32 length) { memcpy(NDSCart_SRAM::SRAM, data, std::min(length, NDSCart_SRAM::SRAMLength)); diff --git a/src/NDSCart.h b/src/NDSCart.h index 9fe916d..7d3f4a1 100644 --- a/src/NDSCart.h +++ b/src/NDSCart.h @@ -46,6 +46,9 @@ void DoSavestate(Savestate* file); void DecryptSecureArea(u8* out); bool LoadROM(const char* path, const char* sram, bool direct); + +void FlushSRAMFile(); + void RelocateSave(const char* path, bool write); int ImportSRAM(const u8* data, u32 length); -- cgit v1.2.3 From d697f9e0d245da0ed0061724e24e4a476091ee7e Mon Sep 17 00:00:00 2001 From: RSDuck Date: Fri, 13 Nov 2020 15:20:53 +0100 Subject: make fastmem work again --- src/ARMJIT_Memory.cpp | 108 ++++++++++++++++++++++++++++++++------------------ src/DSi.cpp | 24 +++++------ 2 files changed, 81 insertions(+), 51 deletions(-) (limited to 'src') diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index 1352ea7..70d18e6 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -329,14 +329,16 @@ struct Mapping void Unmap(int region) { + u32 dtcmStart = NDS::ARM9->DTCMBase; + u32 dtcmSize = NDS::ARM9->DTCMSize; bool skipDTCM = Num == 0 && region != memregion_DTCM; u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7; u32 offset = 0; while (offset < Size) { - if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase) + if (skipDTCM && Addr + offset == dtcmStart) { - offset += NDS::ARM9->DTCMSize; + offset += dtcmSize; } else { @@ -344,7 +346,7 @@ struct Mapping u8 status = statuses[(Addr + offset) >> 12]; while (statuses[(Addr + offset) >> 12] == status && offset < Size - && (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase)) + && (!skipDTCM || Addr + offset != dtcmStart)) { assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped); statuses[(Addr + offset) >> 12] = memstate_Unmapped; @@ -362,9 +364,33 @@ struct Mapping #endif } } + #ifndef __SWITCH__ - bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); - assert(succeded); +#ifndef _WIN32 + u32 dtcmEnd = dtcmStart + dtcmSize; + if (Num == 0 + && dtcmEnd >= Addr + && dtcmStart < Addr + Size) + { + bool success; + if (dtcmStart > Addr) + { + success = UnmapFromRange(Addr, 0, OffsetsPerRegion[region] + LocalOffset, dtcmStart - Addr); + assert(success); + } + if (dtcmEnd < Addr + Size) + { + u32 offset = dtcmStart - Addr + dtcmSize; + success = UnmapFromRange(dtcmEnd, 0, OffsetsPerRegion[region] + LocalOffset + offset, Size - offset); + assert(success); + } + } + else +#endif + { + bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); + assert(succeded); + } #endif } }; @@ -433,10 +459,10 @@ void RemapDTCM(u32 newBase, u32 newSize) printf("unmapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset); - bool oldOverlap = NDS::ARM9->DTCMSize > 0 && !(oldDTCMBase >= end || oldDTCBEnd <= start); - bool newOverlap = newSize > 0 && !(newBase >= end || newEnd <= start); + bool overlap = (NDS::ARM9->DTCMSize > 0 && oldDTCMBase < end && oldDTCBEnd > start) + || (newSize > 0 && newBase < end && newEnd > start); - if (mapping.Num == 0 && (oldOverlap || newOverlap)) + if (mapping.Num == 0 && overlap) { mapping.Unmap(region); Mappings[region].Remove(i); @@ -460,8 +486,8 @@ void RemapNWRAM(int num) for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;) { Mapping& mapping = Mappings[memregion_SharedWRAM][i]; - if (!(DSi::NWRAMStart[mapping.Num][num] >= mapping.Addr + mapping.Size - || DSi::NWRAMEnd[mapping.Num][num] < mapping.Addr)) + if (DSi::NWRAMStart[mapping.Num][num] < mapping.Addr + mapping.Size + && DSi::NWRAMEnd[mapping.Num][num] > mapping.Addr) { mapping.Unmap(memregion_SharedWRAM); Mappings[memregion_SharedWRAM].Remove(i); @@ -484,7 +510,7 @@ void RemapSWRAM() for (int i = 0; i < Mappings[memregion_WRAM7].Length;) { Mapping& mapping = Mappings[memregion_WRAM7][i]; - if (mapping.Addr + mapping.Size < 0x03800000) + if (mapping.Addr + mapping.Size <= 0x03800000) { mapping.Unmap(memregion_WRAM7); Mappings[memregion_WRAM7].Remove(i); @@ -516,41 +542,53 @@ bool MapAtAddress(u32 addr) return false; u8* states = num == 0 ? MappingStatus9 : MappingStatus7; - printf("trying to create mapping %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num); + printf("mapping mirror %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num); bool isExecutable = ARMJIT::CodeMemRegions[region]; + u32 dtcmStart = NDS::ARM9->DTCMBase; + u32 dtcmSize = NDS::ARM9->DTCMSize; + u32 dtcmEnd = dtcmStart + dtcmSize; #ifndef __SWITCH__ - if (num == 0) +#ifndef _WIN32 + if (num == 0 + && dtcmEnd >= mirrorStart + && dtcmStart < mirrorStart + mirrorSize) { - // if a DTCM mapping is mapped before the mapping below it - // we unmap it, so it won't just be overriden - for (int i = 0; i < Mappings[memregion_DTCM].Length; i++) + bool success; + if (dtcmStart > mirrorStart) { - Mapping& mapping = Mappings[memregion_DTCM][i]; - if (mirrorStart < mapping.Addr + mapping.Size && mirrorStart + mirrorSize >= mapping.Addr) - { - mapping.Unmap(memregion_DTCM); - } + success = MapIntoRange(mirrorStart, 0, OffsetsPerRegion[region] + memoryOffset, dtcmStart - mirrorStart); + assert(success); + } + if (dtcmEnd < mirrorStart + mirrorSize) + { + u32 offset = dtcmStart - mirrorStart + dtcmSize; + success = MapIntoRange(dtcmEnd, 0, OffsetsPerRegion[region] + memoryOffset + offset, mirrorSize - offset); + assert(success); } - Mappings[memregion_DTCM].Clear(); } - - bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); - assert(succeded); + else +#endif + { + bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); + assert(succeded); + } #endif ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512; // this overcomplicated piece of code basically just finds whole pieces of code memory - // which can be mapped + // which can be mapped/protected u32 offset = 0; bool skipDTCM = num == 0 && region != memregion_DTCM; while (offset < mirrorSize) { - if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase) + if (skipDTCM && mirrorStart + offset == dtcmStart) { - SetCodeProtectionRange(NDS::ARM9->DTCMBase, NDS::ARM9->DTCMSize, 0, 0); - offset += NDS::ARM9->DTCMSize; +#ifdef _WIN32 + SetCodeProtectionRange(dtcmStart, dtcmSize, 0, 0); +#endif + offset += dtcmSize; } else { @@ -587,7 +625,7 @@ bool MapAtAddress(u32 addr) Mapping mapping{mirrorStart, mirrorSize, memoryOffset, num}; Mappings[region].Add(mapping); - printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1); + //printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1); return true; } @@ -745,15 +783,7 @@ bool IsFastmemCompatible(int region) || region == memregion_NewSharedWRAM_C) return false; #endif - if (region == memregion_DTCM - || region == memregion_MainRAM - || region == memregion_NewSharedWRAM_A - || region == memregion_NewSharedWRAM_B - || region == memregion_NewSharedWRAM_C - || region == memregion_SharedWRAM) - return false; - //return OffsetsPerRegion[region] != UINT32_MAX; - return false; + return OffsetsPerRegion[region] != UINT32_MAX; } bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize) diff --git a/src/DSi.cpp b/src/DSi.cpp index e8b1231..aea9b5a 100644 --- a/src/DSi.cpp +++ b/src/DSi.cpp @@ -542,15 +542,15 @@ void MapNWRAM_A(u32 num, u8 val) return; } -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapNWRAM(0); -#endif - int mbkn = 0, mbks = 8*num; u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; if (oldval == val) return; +#ifdef JIT_ENABLED + ARMJIT_Memory::RemapNWRAM(0); +#endif + MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] |= (val << mbks); MBK[1][mbkn] = MBK[0][mbkn]; @@ -577,15 +577,15 @@ void MapNWRAM_B(u32 num, u8 val) return; } -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapNWRAM(1); -#endif - int mbkn = 1+(num>>2), mbks = 8*(num&3); u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; if (oldval == val) return; +#ifdef JIT_ENABLED + ARMJIT_Memory::RemapNWRAM(1); +#endif + MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] |= (val << mbks); MBK[1][mbkn] = MBK[0][mbkn]; @@ -616,15 +616,15 @@ void MapNWRAM_C(u32 num, u8 val) return; } -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapNWRAM(2); -#endif - int mbkn = 3+(num>>2), mbks = 8*(num&3); u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; if (oldval == val) return; +#ifdef JIT_ENABLED + ARMJIT_Memory::RemapNWRAM(2); +#endif + MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] |= (val << mbks); MBK[1][mbkn] = MBK[0][mbkn]; -- cgit v1.2.3 From 05b94eff66e3ae0192dad2a638d18491b15654b0 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Wed, 11 Nov 2020 17:54:27 +0100 Subject: make audio output thread safe(r?) --- src/NDS.cpp | 1 + src/SPU.cpp | 127 ++++++++++++++++++++++++++++++++++++++++++------------------ src/SPU.h | 1 + 3 files changed, 91 insertions(+), 38 deletions(-) (limited to 'src') diff --git a/src/NDS.cpp b/src/NDS.cpp index 5181b34..d68045b 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -999,6 +999,7 @@ u32 RunFrame() ARM7Timestamp-SysTimestamp, GPU3D::Timestamp-SysTimestamp); #endif + SPU::TransferOutput(); NDSCart::FlushSRAMFile(); diff --git a/src/SPU.cpp b/src/SPU.cpp index bb15244..fe798c7 100644 --- a/src/SPU.cpp +++ b/src/SPU.cpp @@ -18,6 +18,7 @@ #include #include +#include "Platform.h" #include "NDS.h" #include "DSi.h" #include "SPU.h" @@ -61,11 +62,15 @@ const s16 PSGTable[8][8] = {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF} }; -const u32 OutputBufferSize = 2*1024; -s16 OutputBuffer[2 * OutputBufferSize]; -volatile u32 OutputReadOffset; -volatile u32 OutputWriteOffset; +const u32 OutputBufferSize = 2*2048; +s16 OutputBackbuffer[2 * OutputBufferSize]; +u32 OutputBackbufferWritePosition; +s16 OutputFrontBuffer[2 * OutputBufferSize]; +u32 OutputFrontBufferWritePosition; +u32 OutputFrontBufferReadPosition; + +Platform::Mutex* AudioLock; u16 Cnt; u8 MasterVolume; @@ -83,6 +88,8 @@ bool Init() Capture[0] = new CaptureUnit(0); Capture[1] = new CaptureUnit(1); + AudioLock = Platform::Mutex_Create(); + return true; } @@ -93,6 +100,8 @@ void DeInit() delete Capture[0]; delete Capture[1]; + + Platform::Mutex_Free(AudioLock); } void Reset() @@ -114,10 +123,13 @@ void Reset() void Stop() { - memset(OutputBuffer, 0, 2*OutputBufferSize*2); + Platform::Mutex_Lock(AudioLock); + memset(OutputFrontBuffer, 0, 2*OutputBufferSize*2); - OutputReadOffset = 0; - OutputWriteOffset = 0; + OutputBackbufferWritePosition = 0; + OutputFrontBufferReadPosition = 0; + OutputFrontBufferWritePosition = 0; + Platform::Mutex_Unlock(AudioLock); } void DoSavestate(Savestate* file) @@ -704,59 +716,88 @@ void Mix(u32 dummy) if (rightoutput < -0x8000) rightoutput = -0x8000; else if (rightoutput > 0x7FFF) rightoutput = 0x7FFF; - OutputBuffer[OutputWriteOffset ] = leftoutput >> 1; - OutputBuffer[OutputWriteOffset + 1] = rightoutput >> 1; - OutputWriteOffset += 2; - OutputWriteOffset &= ((2*OutputBufferSize)-1); - if (OutputWriteOffset == OutputReadOffset) - { - //printf("!! SOUND FIFO OVERFLOW %d\n", OutputWriteOffset>>1); - // advance the read position too, to avoid losing the entire FIFO - OutputReadOffset += 2; - OutputReadOffset &= ((2*OutputBufferSize)-1); - } + // OutputBufferFrame can never get full because it's + // transfered to OutputBuffer at the end of the frame + OutputBackbuffer[OutputBackbufferWritePosition ] = leftoutput >> 1; + OutputBackbuffer[OutputBackbufferWritePosition + 1] = rightoutput >> 1; + OutputBackbufferWritePosition += 2; NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0); } +void TransferOutput() +{ + Platform::Mutex_Lock(AudioLock); + for (u32 i = 0; i < OutputBackbufferWritePosition; i += 2) + { + OutputFrontBuffer[OutputFrontBufferWritePosition ] = OutputBackbuffer[i ]; + OutputFrontBuffer[OutputFrontBufferWritePosition + 1] = OutputBackbuffer[i + 1]; + + OutputFrontBufferWritePosition += 2; + OutputFrontBufferWritePosition &= OutputBufferSize*2-1; + if (OutputFrontBufferWritePosition == OutputFrontBufferReadPosition) + { + // advance the read position too, to avoid losing the entire FIFO + OutputFrontBufferReadPosition += 2; + OutputFrontBufferReadPosition &= OutputBufferSize*2-1; + } + } + OutputBackbufferWritePosition = 0; + Platform::Mutex_Unlock(AudioLock); +} void TrimOutput() { + Platform::Mutex_Lock(AudioLock); const int halflimit = (OutputBufferSize / 2); - int readpos = OutputWriteOffset - (halflimit*2); + int readpos = OutputFrontBufferWritePosition - (halflimit*2); if (readpos < 0) readpos += (OutputBufferSize*2); - OutputReadOffset = readpos; + OutputFrontBufferReadPosition = readpos; + Platform::Mutex_Unlock(AudioLock); } void DrainOutput() { - OutputReadOffset = 0; - OutputWriteOffset = 0; + Platform::Mutex_Lock(AudioLock); + OutputFrontBufferWritePosition = 0; + OutputFrontBufferReadPosition = 0; + Platform::Mutex_Unlock(AudioLock); } void InitOutput() { - memset(OutputBuffer, 0, 2*OutputBufferSize*2); - OutputReadOffset = 0; - OutputWriteOffset = OutputBufferSize; + Platform::Mutex_Lock(AudioLock); + memset(OutputBackbuffer, 0, 2*OutputBufferSize*2); + memset(OutputFrontBuffer, 0, 2*OutputBufferSize*2); + OutputFrontBufferReadPosition = 0; + OutputFrontBufferWritePosition = 0; + Platform::Mutex_Unlock(AudioLock); } int GetOutputSize() { + Platform::Mutex_Lock(AudioLock); + int ret; - if (OutputWriteOffset >= OutputReadOffset) - ret = OutputWriteOffset - OutputReadOffset; + if (OutputFrontBufferWritePosition >= OutputFrontBufferReadPosition) + ret = OutputFrontBufferWritePosition - OutputFrontBufferReadPosition; else - ret = (OutputBufferSize*2) - OutputReadOffset + OutputWriteOffset; + ret = (OutputBufferSize*2) - OutputFrontBufferReadPosition + OutputFrontBufferWritePosition; ret >>= 1; + + Platform::Mutex_Unlock(AudioLock); return ret; } void Sync(bool wait) { + // this function is currently not used anywhere + // depending on the usage context the thread safety measures could be made + // a lot faster + // sync to audio output in case the core is running too fast // * wait=true: wait until enough audio data has been played // * wait=false: merely skip some audio data to avoid a FIFO overflow @@ -770,32 +811,42 @@ void Sync(bool wait) } else if (GetOutputSize() > halflimit) { - int readpos = OutputWriteOffset - (halflimit*2); + Platform::Mutex_Lock(AudioLock); + + int readpos = OutputFrontBufferWritePosition - (halflimit*2); if (readpos < 0) readpos += (OutputBufferSize*2); - OutputReadOffset = readpos; + OutputFrontBufferReadPosition = readpos; + + Platform::Mutex_Unlock(AudioLock); } } int ReadOutput(s16* data, int samples) { - if (OutputReadOffset == OutputWriteOffset) + Platform::Mutex_Lock(AudioLock); + if (OutputFrontBufferReadPosition == OutputFrontBufferWritePosition) + { + Platform::Mutex_Unlock(AudioLock); return 0; + } for (int i = 0; i < samples; i++) { - *data++ = OutputBuffer[OutputReadOffset]; - *data++ = OutputBuffer[OutputReadOffset + 1]; + *data++ = OutputFrontBuffer[OutputFrontBufferReadPosition]; + *data++ = OutputFrontBuffer[OutputFrontBufferReadPosition + 1]; + + OutputFrontBufferReadPosition += 2; + OutputFrontBufferReadPosition &= ((2*OutputBufferSize)-1); - //if (OutputReadOffset != OutputWriteOffset) + if (OutputFrontBufferWritePosition == OutputFrontBufferReadPosition) { - OutputReadOffset += 2; - OutputReadOffset &= ((2*OutputBufferSize)-1); - } - if (OutputReadOffset == OutputWriteOffset) + Platform::Mutex_Unlock(AudioLock); return i+1; + } } + Platform::Mutex_Unlock(AudioLock); return samples; } diff --git a/src/SPU.h b/src/SPU.h index 397c733..c6b1c7f 100644 --- a/src/SPU.h +++ b/src/SPU.h @@ -41,6 +41,7 @@ void InitOutput(); int GetOutputSize(); void Sync(bool wait); int ReadOutput(s16* data, int samples); +void TransferOutput(); u8 Read8(u32 addr); u16 Read16(u32 addr); -- cgit v1.2.3 From 550241dbad5527fbf0e622193f8a0ef943c35557 Mon Sep 17 00:00:00 2001 From: Raphaël Zumer Date: Sun, 15 Nov 2020 15:15:09 +0000 Subject: Fix GBA file drag-and-drop when the system is off (#817) --- src/frontend/qt_sdl/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index cfdf03d..6fcd8ce 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -1346,7 +1346,7 @@ void MainWindow::dragEnterEvent(QDragEnterEvent* event) QString filename = urls.at(0).toLocalFile(); QString ext = filename.right(3); - if (ext == "nds" || ext == "srl" || ext == "dsi" || (ext == "gba" && RunningSomething)) + if (ext == "nds" || ext == "srl" || ext == "dsi" || ext == "gba") event->acceptProposedAction(); } -- cgit v1.2.3 From 21dbca95434d18914b62287c0bb81677c030e6a3 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 16 Nov 2020 15:57:56 +0100 Subject: use proper index buffers --- src/GPU3D_OpenGL.cpp | 66 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index 658b261..8b9f06c 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -74,11 +74,11 @@ typedef struct Polygon* PolyData; u32 NumIndices; - u16* Indices; + u32 IndicesOffset; GLuint PrimType; u32 NumEdgeIndices; - u16* EdgeIndices; + u32 EdgeIndicesOffset; u32 RenderKey; @@ -107,7 +107,11 @@ u32 VertexBuffer[10240 * 7]; u32 NumVertices; GLuint VertexArrayID; +GLuint IndexBufferID; u16 IndexBuffer[2048 * 40]; +u32 NumIndices, NumEdgeIndices; + +const u32 EdgeIndicesOffset = 2048 * 30; GLuint TexMemID; GLuint TexPalMemID; @@ -320,6 +324,9 @@ bool Init() glEnableVertexAttribArray(3); // attrib glVertexAttribIPointer(3, 3, GL_UNSIGNED_INT, 7*4, (void*)(4*4)); + glGenBuffers(1, &IndexBufferID); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, IndexBufferID); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(IndexBuffer), NULL, GL_DYNAMIC_DRAW); glGenFramebuffers(4, &FramebufferID[0]); glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]); @@ -563,15 +570,15 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) u32* vptr = &VertexBuffer[0]; u32 vidx = 0; - u16* iptr = &IndexBuffer[0]; - u16* eiptr = &IndexBuffer[2048*30]; + u32 iidx = 0; + u32 eidx = EdgeIndicesOffset; for (int i = 0; i < npolys; i++) { RendererPolygon* rp = &polygons[i]; Polygon* poly = rp->PolyData; - rp->Indices = iptr; + rp->IndicesOffset = iidx; rp->NumIndices = 0; u32 vidx_first = vidx; @@ -606,7 +613,7 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) vptr = SetupVertex(poly, j, vtx, vtxattr, vptr); - *iptr++ = vidx; + IndexBuffer[iidx++] = vidx; rp->NumIndices++; vidx++; @@ -627,9 +634,9 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) } // build a triangle - *iptr++ = vidx_first; - *iptr++ = vidx - 2; - *iptr++ = vidx - 1; + IndexBuffer[iidx++] = vidx_first; + IndexBuffer[iidx++] = vidx - 2; + IndexBuffer[iidx++] = vidx - 1; rp->NumIndices += 3; } else // quad, pentagon, etc @@ -649,9 +656,9 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) if (j >= 2) { // build a triangle - *iptr++ = vidx_first; - *iptr++ = vidx - 1; - *iptr++ = vidx; + IndexBuffer[iidx++] = vidx_first; + IndexBuffer[iidx++] = vidx - 1; + IndexBuffer[iidx++] = vidx; rp->NumIndices += 3; } @@ -743,46 +750,48 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) if (j >= 1) { // build a triangle - *iptr++ = vidx_first; - *iptr++ = vidx - 1; - *iptr++ = vidx; + IndexBuffer[iidx++] = vidx_first; + IndexBuffer[iidx++] = vidx - 1; + IndexBuffer[iidx++] = vidx; rp->NumIndices += 3; } vidx++; } - *iptr++ = vidx_first; - *iptr++ = vidx - 1; - *iptr++ = vidx_first + 1; + IndexBuffer[iidx++] = vidx_first; + IndexBuffer[iidx++] = vidx - 1; + IndexBuffer[iidx++] = vidx_first + 1; rp->NumIndices += 3; } } - rp->EdgeIndices = eiptr; + rp->EdgeIndicesOffset = eidx; rp->NumEdgeIndices = 0; u32 vidx_cur = vidx_first; for (int j = 1; j < poly->NumVertices; j++) { - *eiptr++ = vidx_cur; - *eiptr++ = vidx_cur + 1; + IndexBuffer[eidx++] = vidx_cur; + IndexBuffer[eidx++] = vidx_cur + 1; vidx_cur++; rp->NumEdgeIndices += 2; } - *eiptr++ = vidx_cur; - *eiptr++ = vidx_first; + IndexBuffer[eidx++] = vidx_cur; + IndexBuffer[eidx++] = vidx_first; rp->NumEdgeIndices += 2; } NumVertices = vidx; + NumIndices = iidx; + NumEdgeIndices = eidx; } void RenderSinglePolygon(int i) { RendererPolygon* rp = &PolygonList[i]; - glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, rp->Indices); + glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2)); } int RenderPolygonBatch(int i) @@ -803,7 +812,7 @@ int RenderPolygonBatch(int i) numindices += cur_rp->NumIndices; } - glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, rp->Indices); + glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2)); return numpolys; } @@ -823,7 +832,7 @@ int RenderPolygonEdgeBatch(int i) numindices += cur_rp->NumEdgeIndices; } - glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, rp->EdgeIndices); + glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->EdgeIndicesOffset * 2)); return numpolys; } @@ -1320,6 +1329,11 @@ void RenderFrame() glBindBuffer(GL_ARRAY_BUFFER, VertexBufferID); glBufferSubData(GL_ARRAY_BUFFER, 0, NumVertices*7*4, VertexBuffer); + // bind to access the index buffer + glBindVertexArray(VertexArrayID); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, NumIndices * 2, IndexBuffer); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, EdgeIndicesOffset * 2, NumEdgeIndices * 2, IndexBuffer + EdgeIndicesOffset); + RenderSceneChunk(0, 192); } -- cgit v1.2.3 From 1085cc14a43fe09b423a0e4334acd32823873d1c Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 16 Nov 2020 17:03:24 +0100 Subject: prevent use after free --- src/NDS.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/NDS.cpp b/src/NDS.cpp index d68045b..2b468be 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -210,13 +210,13 @@ bool Init() void DeInit() { - delete ARM9; - delete ARM7; - #ifdef JIT_ENABLED ARMJIT::DeInit(); #endif + delete ARM9; + delete ARM7; + for (int i = 0; i < 8; i++) delete DMAs[i]; -- cgit v1.2.3 From 842379c4105954782e1145b919d468e596a090e8 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 16 Nov 2020 17:22:34 +0100 Subject: harmless DMA micro optimisation --- src/DMA.cpp | 55 +++++++++++++++++++++++++++++-------------------------- src/DMA.h | 8 +++----- src/NDS.cpp | 26 +++++++++++++++----------- 3 files changed, 47 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/DMA.cpp b/src/DMA.cpp index 18b8a2f..8ad3918 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -77,21 +77,6 @@ void DMA::Reset() Running = false; InProgress = false; - - if (NDS::ConsoleType == 1) - { - BusRead16 = (CPU==0) ? DSi::ARM9Read16 : DSi::ARM7Read16; - BusRead32 = (CPU==0) ? DSi::ARM9Read32 : DSi::ARM7Read32; - BusWrite16 = (CPU==0) ? DSi::ARM9Write16 : DSi::ARM7Write16; - BusWrite32 = (CPU==0) ? DSi::ARM9Write32 : DSi::ARM7Write32; - } - else - { - BusRead16 = (CPU==0) ? NDS::ARM9Read16 : NDS::ARM7Read16; - BusRead32 = (CPU==0) ? NDS::ARM9Read32 : NDS::ARM7Read32; - BusWrite16 = (CPU==0) ? NDS::ARM9Write16 : NDS::ARM7Write16; - BusWrite32 = (CPU==0) ? NDS::ARM9Write32 : NDS::ARM7Write32; - } } void DMA::DoSavestate(Savestate* file) @@ -198,13 +183,7 @@ void DMA::Start() NDS::StopCPU(CPU, 1< void DMA::Run9() { if (NDS::ARM9Timestamp >= NDS::ARM9Target) return; @@ -242,7 +221,10 @@ void DMA::Run9() { NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift); - BusWrite16(CurDstAddr, BusRead16(CurSrcAddr)); + if (ConsoleType == 1) + DSi::ARM9Write16(CurDstAddr, DSi::ARM9Read16(CurSrcAddr)); + else + NDS::ARM9Write16(CurDstAddr, NDS::ARM9Read16(CurSrcAddr)); CurSrcAddr += SrcAddrInc<<1; CurDstAddr += DstAddrInc<<1; @@ -278,7 +260,10 @@ void DMA::Run9() { NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift); - BusWrite32(CurDstAddr, BusRead32(CurSrcAddr)); + if (ConsoleType == 1) + DSi::ARM9Write32(CurDstAddr, DSi::ARM9Read32(CurSrcAddr)); + else + NDS::ARM9Write32(CurDstAddr, NDS::ARM9Read32(CurSrcAddr)); CurSrcAddr += SrcAddrInc<<2; CurDstAddr += DstAddrInc<<2; @@ -317,6 +302,7 @@ void DMA::Run9() NDS::ResumeCPU(0, 1< void DMA::Run7() { if (NDS::ARM7Timestamp >= NDS::ARM7Target) return; @@ -354,7 +340,10 @@ void DMA::Run7() { NDS::ARM7Timestamp += unitcycles; - BusWrite16(CurDstAddr, BusRead16(CurSrcAddr)); + if (ConsoleType == 1) + DSi::ARM7Write16(CurDstAddr, DSi::ARM7Read16(CurSrcAddr)); + else + NDS::ARM7Write16(CurDstAddr, NDS::ARM7Read16(CurSrcAddr)); CurSrcAddr += SrcAddrInc<<1; CurDstAddr += DstAddrInc<<1; @@ -390,7 +379,10 @@ void DMA::Run7() { NDS::ARM7Timestamp += unitcycles; - BusWrite32(CurDstAddr, BusRead32(CurSrcAddr)); + if (ConsoleType == 1) + DSi::ARM7Write32(CurDstAddr, DSi::ARM7Read32(CurSrcAddr)); + else + NDS::ARM7Write32(CurDstAddr, NDS::ARM7Read32(CurSrcAddr)); CurSrcAddr += SrcAddrInc<<2; CurDstAddr += DstAddrInc<<2; @@ -425,3 +417,14 @@ void DMA::Run7() InProgress = false; NDS::ResumeCPU(1, 1< +void DMA::Run() +{ + if (!Running) return; + if (CPU == 0) return Run9(); + else return Run7(); +} + +template void DMA::Run<0>(); +template void DMA::Run<1>(); \ No newline at end of file diff --git a/src/DMA.h b/src/DMA.h index 0344fba..b0b4ab2 100644 --- a/src/DMA.h +++ b/src/DMA.h @@ -34,9 +34,12 @@ public: void WriteCnt(u32 val); void Start(); + template void Run(); + template void Run9(); + template void Run7(); bool IsInMode(u32 mode) @@ -86,11 +89,6 @@ private: bool Stall; bool IsGXFIFODMA; - - u16 (*BusRead16)(u32 addr); - u32 (*BusRead32)(u32 addr); - void (*BusWrite16)(u32 addr, u16 val); - void (*BusWrite32)(u32 addr, u32 val); }; #endif diff --git a/src/NDS.cpp b/src/NDS.cpp index 2b468be..0b0263b 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -908,7 +908,7 @@ void RunSystem(u64 timestamp) } } -template +template u32 RunFrame() { FrameStartTimestamp = SysTimestamp; @@ -934,10 +934,10 @@ u32 RunFrame() } else if (CPUStop & 0x0FFF) { - DMAs[0]->Run(); - if (!(CPUStop & 0x80000000)) DMAs[1]->Run(); - if (!(CPUStop & 0x80000000)) DMAs[2]->Run(); - if (!(CPUStop & 0x80000000)) DMAs[3]->Run(); + DMAs[0]->Run(); + if (!(CPUStop & 0x80000000)) DMAs[1]->Run(); + if (!(CPUStop & 0x80000000)) DMAs[2]->Run(); + if (!(CPUStop & 0x80000000)) DMAs[3]->Run(); if (ConsoleType == 1) DSi::RunNDMAs(0); } else @@ -962,10 +962,10 @@ u32 RunFrame() if (CPUStop & 0x0FFF0000) { - DMAs[4]->Run(); - DMAs[5]->Run(); - DMAs[6]->Run(); - DMAs[7]->Run(); + DMAs[4]->Run(); + DMAs[5]->Run(); + DMAs[6]->Run(); + DMAs[7]->Run(); if (ConsoleType == 1) DSi::RunNDMAs(1); } else @@ -1012,10 +1012,14 @@ u32 RunFrame() { #ifdef JIT_ENABLED if (Config::JIT_Enable) - return RunFrame(); + return NDS::ConsoleType == 1 + ? RunFrame() + : RunFrame(); else #endif - return RunFrame(); + return NDS::ConsoleType == 0 + ? RunFrame() + : RunFrame(); } void Reschedule(u64 target) -- cgit v1.2.3 From 690eed9e262ae794715ec915d4cc44db844a27b9 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 16 Nov 2020 18:33:58 +0100 Subject: GPU2D: don't an indirect call in tight loops --- src/GPU2D.cpp | 68 ++++++++++++++++++++++++++++++++++++++++------------------- src/GPU2D.h | 12 ++++++----- 2 files changed, 53 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index 7774c65..27aa608 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -228,9 +228,6 @@ void GPU2D::SetFramebuffer(u32* buf) void GPU2D::SetRenderSettings(bool accel) { Accelerated = accel; - - if (Accelerated) DrawPixel = DrawPixel_Accel; - else DrawPixel = DrawPixel_Normal; } @@ -1330,10 +1327,36 @@ void GPU2D::CalculateWindowMask(u32 line) #define DoDrawBG(type, line, num) \ - { if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) DrawBG_##type(line, num); else DrawBG_##type(line, num); } + { \ + if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) \ + { \ + if (Accelerated) DrawBG_##type(line, num); \ + else DrawBG_##type(line, num); \ + } \ + else \ + { \ + if (Accelerated) DrawBG_##type(line, num); \ + else DrawBG_##type(line, num); \ + } \ + } #define DoDrawBG_Large(line) \ - { if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) DrawBG_Large(line); else DrawBG_Large(line); } + do \ + { \ + if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) \ + { \ + if (Accelerated) DrawBG_Large(line); \ + else DrawBG_Large(line); \ + } \ + else \ + { \ + if (Accelerated) DrawBG_Large(line); \ + else DrawBG_Large(line); \ + } \ + } while (false) + +#define DoInterleaveSprites(prio) \ + if (Accelerated) InterleaveSprites(prio); else InterleaveSprites(prio); template void GPU2D::DrawScanlineBGMode(u32 line) @@ -1382,7 +1405,7 @@ void GPU2D::DrawScanlineBGMode(u32 line) } } if ((DispCnt & 0x1000) && NumSprites) - InterleaveSprites(0x40000 | (i<<16)); + DoInterleaveSprites(0x40000 | (i<<16)); } } @@ -1394,7 +1417,7 @@ void GPU2D::DrawScanlineBGMode6(u32 line) { if (DispCnt & 0x0400) { - DoDrawBG_Large(line) + DoDrawBG_Large(line); } } if ((BGCnt[0] & 0x3) == i) @@ -1406,7 +1429,7 @@ void GPU2D::DrawScanlineBGMode6(u32 line) } } if ((DispCnt & 0x1000) && NumSprites) - InterleaveSprites(0x40000 | (i<<16)); + DoInterleaveSprites(0x40000 | (i<<16)) } } @@ -1434,7 +1457,7 @@ void GPU2D::DrawScanlineBGMode7(u32 line) } } if ((DispCnt & 0x1000) && NumSprites) - InterleaveSprites(0x40000 | (i<<16)); + DoInterleaveSprites(0x40000 | (i<<16)) } } @@ -1674,7 +1697,7 @@ void GPU2D::DrawBG_3D() } } -template +template void GPU2D::DrawBG_Text(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -1774,7 +1797,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) color = GPU::ReadVRAM_BG(pixelsaddr + tilexoff); if (color) - DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000< +template void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -1920,7 +1943,7 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) color = GPU::ReadVRAM_BG(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff); if (color) - DrawPixel(&BGOBJLine[i], pal[color], 0x01000000< +template void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -2015,7 +2038,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) color = GPU::ReadVRAM_BG(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)); if (color & 0x8000) - DrawPixel(&BGOBJLine[i], color, 0x01000000<(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); if (color) - DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff); if (color) - DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000< +template void GPU2D::DrawBG_Large(u32 line) // BG is always BG2 { u16 bgcnt = BGCnt[2]; @@ -2231,7 +2254,7 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2 color = GPU::ReadVRAM_BG(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); if (color) - DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); + drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); } } @@ -2274,6 +2297,7 @@ void GPU2D::ApplySpriteMosaicX() } } +template void GPU2D::InterleaveSprites(u32 prio) { u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; @@ -2297,7 +2321,7 @@ void GPU2D::InterleaveSprites(u32 prio) else color = extpal[pixel & 0xFFF]; - DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); + drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); } } else @@ -2317,7 +2341,7 @@ void GPU2D::InterleaveSprites(u32 prio) else color = pal[pixel & 0xFF]; - DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); + drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); } } } diff --git a/src/GPU2D.h b/src/GPU2D.h index 521adf0..469d6a2 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -147,15 +147,17 @@ private: static void DrawPixel_Normal(u32* dst, u16 color, u32 flag); static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); - void (*DrawPixel)(u32* dst, u16 color, u32 flag); + + typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag); void DrawBG_3D(); - template void DrawBG_Text(u32 line, u32 bgnum); - template void DrawBG_Affine(u32 line, u32 bgnum); - template void DrawBG_Extended(u32 line, u32 bgnum); - template void DrawBG_Large(u32 line); + template void DrawBG_Text(u32 line, u32 bgnum); + template void DrawBG_Affine(u32 line, u32 bgnum); + template void DrawBG_Extended(u32 line, u32 bgnum); + template void DrawBG_Large(u32 line); void ApplySpriteMosaicX(); + template void InterleaveSprites(u32 prio); template void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos); template void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos); -- cgit v1.2.3 From 50cdfd01378e72167e5ec5a5fb8c93d894afa566 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 19 Nov 2020 17:46:21 +0100 Subject: fix edge indices count --- src/GPU3D_OpenGL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index 8b9f06c..ba9548e 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -784,7 +784,7 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) NumVertices = vidx; NumIndices = iidx; - NumEdgeIndices = eidx; + NumEdgeIndices = eidx - EdgeIndicesOffset; } void RenderSinglePolygon(int i) -- cgit v1.2.3 From a1cf1967acb72861fefa3fc4ef14f9f2fb9db5d0 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Sun, 22 Nov 2020 12:00:18 +0000 Subject: Fix fullscreen toggle with joysticks (#821) --- src/frontend/qt_sdl/PlatformConfig.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/frontend/qt_sdl/PlatformConfig.cpp b/src/frontend/qt_sdl/PlatformConfig.cpp index c2d40c4..9861662 100644 --- a/src/frontend/qt_sdl/PlatformConfig.cpp +++ b/src/frontend/qt_sdl/PlatformConfig.cpp @@ -120,7 +120,7 @@ ConfigEntry PlatformConfigFile[] = {"HKJoy_Reset", 0, &HKJoyMapping[HK_Reset], -1, NULL, 0}, {"HKJoy_FastForward", 0, &HKJoyMapping[HK_FastForward], -1, NULL, 0}, {"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FastForwardToggle], -1, NULL, 0}, - {"HKJoy_FastForwardToggle", 0, &HKJoyMapping[HK_FullscreenToggle], -1, NULL, 0}, + {"HKJoy_FullscreenToggle", 0, &HKJoyMapping[HK_FullscreenToggle], -1, NULL, 0}, {"HKJoy_SolarSensorDecrease", 0, &HKJoyMapping[HK_SolarSensorDecrease], -1, NULL, 0}, {"HKJoy_SolarSensorIncrease", 0, &HKJoyMapping[HK_SolarSensorIncrease], -1, NULL, 0}, -- cgit v1.2.3 From f11d53c69c6c2be0363d66b7ab0d941dd4564d65 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Sun, 22 Nov 2020 14:31:29 +0000 Subject: Add radio buttons to switch between Direct and Indirect Mode (#822) --- src/frontend/qt_sdl/WifiSettingsDialog.cpp | 18 +++-- src/frontend/qt_sdl/WifiSettingsDialog.h | 3 +- src/frontend/qt_sdl/WifiSettingsDialog.ui | 121 +++++++++++++++++------------ 3 files changed, 83 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.cpp b/src/frontend/qt_sdl/WifiSettingsDialog.cpp index 67297ad..24b339d 100644 --- a/src/frontend/qt_sdl/WifiSettingsDialog.cpp +++ b/src/frontend/qt_sdl/WifiSettingsDialog.cpp @@ -54,7 +54,7 @@ WifiSettingsDialog::WifiSettingsDialog(QWidget* parent) : QDialog(parent), ui(ne LAN_Socket::Init(); haspcap = LAN_PCap::Init(false); - ui->cbDirectMode->setText("Direct mode (requires " PCAP_NAME " and ethernet connection)"); + ui->rbDirectMode->setText("Direct mode (requires " PCAP_NAME " and ethernet connection)"); ui->cbBindAnyAddr->setChecked(Config::SocketBindAnyAddr != 0); ui->cbRandomizeMAC->setChecked(Config::RandomizeMAC != 0); @@ -71,8 +71,9 @@ WifiSettingsDialog::WifiSettingsDialog(QWidget* parent) : QDialog(parent), ui(ne } ui->cbxDirectAdapter->setCurrentIndex(sel); - ui->cbDirectMode->setChecked(Config::DirectLAN != 0); - if (!haspcap) ui->cbDirectMode->setEnabled(false); + ui->rbDirectMode->setChecked(Config::DirectLAN != 0); + ui->rbIndirectMode->setChecked(Config::DirectLAN == 0); + if (!haspcap) ui->rbDirectMode->setEnabled(false); updateAdapterControls(); } @@ -101,7 +102,7 @@ void WifiSettingsDialog::done(int r) Config::SocketBindAnyAddr = ui->cbBindAnyAddr->isChecked() ? 1:0; Config::RandomizeMAC = randommac; - Config::DirectLAN = ui->cbDirectMode->isChecked() ? 1:0; + Config::DirectLAN = ui->rbDirectMode->isChecked() ? 1:0; int sel = ui->cbxDirectAdapter->currentIndex(); if (sel < 0 || sel >= LAN_PCap::NumAdapters) sel = 0; @@ -125,11 +126,14 @@ void WifiSettingsDialog::done(int r) closeDlg(); } -void WifiSettingsDialog::on_cbDirectMode_stateChanged(int state) +void WifiSettingsDialog::on_rbDirectMode_clicked() +{ + updateAdapterControls(); +} +void WifiSettingsDialog::on_rbIndirectMode_clicked() { updateAdapterControls(); } - void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel) { if (!haspcap) return; @@ -153,7 +157,7 @@ void WifiSettingsDialog::on_cbxDirectAdapter_currentIndexChanged(int sel) void WifiSettingsDialog::updateAdapterControls() { - bool enable = haspcap && ui->cbDirectMode->isChecked(); + bool enable = haspcap && ui->rbDirectMode->isChecked(); ui->cbxDirectAdapter->setEnabled(enable); ui->lblAdapterMAC->setEnabled(enable); diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.h b/src/frontend/qt_sdl/WifiSettingsDialog.h index 6c1f863..600941f 100644 --- a/src/frontend/qt_sdl/WifiSettingsDialog.h +++ b/src/frontend/qt_sdl/WifiSettingsDialog.h @@ -55,7 +55,8 @@ public: private slots: void done(int r); - void on_cbDirectMode_stateChanged(int state); + void on_rbDirectMode_clicked(); + void on_rbIndirectMode_clicked(); void on_cbxDirectAdapter_currentIndexChanged(int sel); private: diff --git a/src/frontend/qt_sdl/WifiSettingsDialog.ui b/src/frontend/qt_sdl/WifiSettingsDialog.ui index 6668d88..174a3dc 100644 --- a/src/frontend/qt_sdl/WifiSettingsDialog.ui +++ b/src/frontend/qt_sdl/WifiSettingsDialog.ui @@ -6,8 +6,8 @@ 0 0 - 479 - 240 + 572 + 296 @@ -58,67 +58,86 @@ Online - - - - MAC address: + + + + Direct Mode Settings + + + + + Network adapter: + + + + + + + + 0 + 0 + + + + + 300 + 0 + + + + <html><head/><body><p>Selects the network adapter through which to route network traffic under direct mode.</p></body></html> + + + + + + + MAC address: + + + + + + + [PLACEHOLDER] + + + + + + + IP address: + + + + + + + [PLACEHOLDER] + + + + - - + + - <html><head/><body><p>Direct mode directly routes network traffic to the host network. It is the most reliable, but requires an ethernet connection.</p><p><br/></p><p>Non-direct mode uses a layer of emulation to get around this, but is more prone to problems.</p></body></html> + <html><head/><body><p>Indirect mode uses libslirp. It requires no extra setup and is easy to use.</p></body></html> - Direct mode [TEXT PLACEHOLDER] + Indirect Mode (uses libslirp, recommended) - - - - - 0 - 0 - - - - - 350 - 0 - - + + - <html><head/><body><p>Selects the network adapter through which to route network traffic under direct mode.</p></body></html> - - - - - - - Network adapter: - - - - - - - IP address: - - - - - - - [PLACEHOLDER] + <html><head/><body><p>Direct mode directly routes network traffic to the host network. It is the most reliable, but requires an ethernet connection.</p><p><br/></p><p>Non-direct mode uses a layer of emulation to get around this, but is more prone to problems.</p></body></html> - - - - - [PLACEHOLDER] + Direct mode [TEXT PLACEHOLDER] -- cgit v1.2.3 From 1ff4a1564f951ef1a60fe2f55afe78aedede674a Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 26 Nov 2020 00:04:19 +0100 Subject: fix DSi mode with interpreter I'm so stupid --- src/NDS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/NDS.cpp b/src/NDS.cpp index 0b0263b..bdbdb97 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -1017,7 +1017,7 @@ u32 RunFrame() : RunFrame(); else #endif - return NDS::ConsoleType == 0 + return NDS::ConsoleType == 1 ? RunFrame() : RunFrame(); } -- cgit v1.2.3 From 7da4550eea43a5ec83d1afc88022aaa62827febc Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Sun, 29 Nov 2020 08:11:33 -0800 Subject: Add support for macOS (#771) * use shm_open() instead of memfd_create() on macOS malloc.h isn't a header on macOS * Change OpenGL headers + create ifdef for DO_PROCLIST macOS seems to already have the OpenGL functions defined, without the ifdef, it gives "ambiguous references" errors. * macOS doesn't have ->gregs in uc_mcontext and it doesn't have REG_RIP either https://github.com/gperftools/gperftools/blob/master/m4/pc_from_ucontext.m4 * use getpid() to make memory file name unique * #ifndef __APPLE__ for AF_PACKET and linux/if_packet.h * Add include and link directories for macOS and link the OpenGL framework * Add macOS CI * Use newly added libslirp package from Homebrew https://github.com/Homebrew/homebrew-core/pull/63412 * Use Apple's Clang instead of GNU GCC on macOS * Add macOS build instructions to README * Try to fix macOS undefined symbol * snprintf doesn't take null terminator into account * Map new memory on macOS for JIT * Only use gcc-ar if using GNU Compiler * re-add fastmem code - whoops! * Fix style issue - use camelCase not snake_case * Set Minimum macOS version * Switch Minimum OS X version to 10.9 * Add macOS libpcap library name * fix memory leak * Fix binding keys in macOS * Allow getting MAC address on macOS melonDS on Linux uses AF_PACKET, which doesn't exist on macOS. Instead, this commit uses AF_LINK on macOS to get the MAC address. * Remove unneeded macOS CI dependencies * Build melonDS app bundle on macOS Now it is no longer required to install the libraries on macOS, they come with the app bundle. * fix macOS CI not being able to find macdeployqt * copy melonDS.app with recursive because it's a folder * Disable fastmem checkbox on macOS * Disable fastmem by default in config * forgot a semicolon * Don't bundle libraries, causes issues on macOS <10.15 * Update README + allow finding version in Finder on macOS * Make sure fastmem checkbox stays uncheckable --- .github/workflows/build-macos.yml | 39 ++++++++++++++++++++++++++++++ CMakeLists.txt | 4 ++- README.md | 15 ++++++++++++ melonDS.icns | Bin 0 -> 401172 bytes melonDS.plist | 24 ++++++++++++++++++ src/ARMJIT_Memory.cpp | 31 +++++++++++++++++++++--- src/ARMJIT_x64/ARMJIT_Compiler.cpp | 2 ++ src/ARMJIT_x64/ARMJIT_Linkage.s | 10 ++++++++ src/CMakeLists.txt | 9 ++++++- src/Config.cpp | 6 ++++- src/OpenGLSupport.h | 15 ++++++++++-- src/frontend/qt_sdl/CMakeLists.txt | 15 +++++++++++- src/frontend/qt_sdl/EmuSettingsDialog.cpp | 7 +++++- src/frontend/qt_sdl/InputConfigDialog.cpp | 1 + src/frontend/qt_sdl/LAN_PCap.cpp | 21 ++++++++++++++-- 15 files changed, 186 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/build-macos.yml create mode 100644 melonDS.icns create mode 100644 melonDS.plist (limited to 'src') diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml new file mode 100644 index 0000000..e2b942a --- /dev/null +++ b/.github/workflows/build-macos.yml @@ -0,0 +1,39 @@ +name: CMake Build (macOS x86-64) + +on: + push: + branches: + - master + pull_request: + branches: + - master + +env: + BUILD_TYPE: Release + +jobs: + build: + + runs-on: macos-latest + + steps: + - uses: actions/checkout@v1 + - name: Install dependencies + working-directory: ${{runner.workspace}} + run: | + brew install cmake sdl2 qt5 libslirp + - name: Create build environment + run: mkdir ${{runner.workspace}}/build + - name: Configure + working-directory: ${{runner.workspace}}/build + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DQt5_DIR=$(brew --prefix qt5)/lib/cmake/Qt5 + - name: Make + working-directory: ${{runner.workspace}}/build + run: | + make -j$(sysctl -n hw.ncpu) + mkdir dist + cp -r melonDS.app dist + - uses: actions/upload-artifact@v1 + with: + name: melonDS.app + path: ${{runner.workspace}}/build/dist diff --git a/CMakeLists.txt b/CMakeLists.txt index 04ad2a5..fcd4741 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,8 @@ if (POLICY CMP0076) cmake_policy(SET CMP0076 NEW) endif() +set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version") + project(melonDS CXX) set(CMAKE_C_STANDARD 11) @@ -100,4 +102,4 @@ add_subdirectory(src) if (BUILD_QT_SDL) add_subdirectory(src/frontend/qt_sdl) -endif() +endif() \ No newline at end of file diff --git a/README.md b/README.md index 2fb2315..64489db 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,21 @@ If everything went well, melonDS and the libraries it needs should now be in the ``` If everything went well, melonDS should now be in the `dist` folder. +### macOS: +1. Install the [Homebrew Package Manager](https://brew.sh) +2. Install dependencies: `brew install git pkg-config cmake sdl2 qt5 libslirp` +3. Compile: + ```zsh + git clone https://github.com/Arisotura/melonDS.git + cd melonDS + mkdir build && cd build + cmake .. -DQt5_DIR=$(brew --prefix qt5)/lib/cmake/Qt5 + make -j$(sysctl -n hw.ncpu) + mkdir dist && cp -r melonDS.app dist + ``` +If everything went well, melonDS.app should now be in the `dist` folder. + + ## TODO LIST * DSi emulation diff --git a/melonDS.icns b/melonDS.icns new file mode 100644 index 0000000..b4f3733 Binary files /dev/null and b/melonDS.icns differ diff --git a/melonDS.plist b/melonDS.plist new file mode 100644 index 0000000..1328777 --- /dev/null +++ b/melonDS.plist @@ -0,0 +1,24 @@ + + + + + CFBundleExecutable + melonDS + CFBundleIconFile + melonDS.icns + CFBundleIdentifier + net.kuribo64.melonDS + CFBundleDevelopmentRegion + English + CFBundlePackageType + APPL + CFBundleVersion + 0.9 + CFBundleShortVersionString + 0.9 + NSHumanReadableCopyright + Licensed under GPLv3 + NSHighResolutionCapable + + + \ No newline at end of file diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index 70d18e6..cc0f149 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -22,7 +22,9 @@ #include "NDSCart.h" #include "SPU.h" +#ifndef __APPLE__ #include +#endif /* We're handling fastmem here. @@ -152,7 +154,12 @@ static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); #ifdef __x86_64__ desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; - desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP]; + #ifdef __APPLE__ + desc.FaultPC = (u8*)context->uc_mcontext->__ss.__rip; + #else + desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP]; + #endif + #else desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea; desc.FaultPC = (u8*)context->uc_mcontext.pc; @@ -161,7 +168,11 @@ static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) if (ARMJIT_Memory::FaultHandler(desc)) { #ifdef __x86_64__ - context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC; + #ifdef __APPLE__ + context->uc_mcontext->__ss.__rip = (u64)desc.FaultPC; + #else + context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC; + #endif #else context->uc_mcontext.pc = (u64)desc.FaultPC; #endif @@ -701,7 +712,14 @@ void Init() FastMem7Start = MemoryBase + AddrSpaceSize; MemoryBase = MemoryBase + AddrSpaceSize*2; - MemoryFile = memfd_create("melondsfastmem", 0); + #ifdef __APPLE__ + char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1]; + sprintf(fastmemPidName, "melondsfastmem%d", getpid()); + MemoryFile = shm_open(fastmemPidName, O_RDWR|O_CREAT, 0600); + delete[] fastmemPidName; + #else + MemoryFile = memfd_create("melondsfastmem", 0); + #endif ftruncate(MemoryFile, MemoryTotalSize); struct sigaction sa; @@ -736,6 +754,11 @@ void DeInit() svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize); virtmemFree(MemoryBaseCodeMem, MemoryTotalSize); free(MemoryBase); +#elif defined(__APPLE__) + char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1]; + sprintf(fastmemPidName, "melondsfastmem%d", getpid()); + shm_unlink(fastmemPidName); + delete[] fastmemPidName; #elif defined(_WIN32) assert(UnmapViewOfFile(MemoryBase)); CloseHandle(MemoryFile); @@ -1259,4 +1282,4 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) return NULL; } -} \ No newline at end of file +} diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 076f48c..cc4ad80 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -221,6 +221,8 @@ Compiler::Compiler() #ifdef _WIN32 DWORD dummy; VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy); + #elif defined(__APPLE__) + pageAligned = (u8*)mmap(NULL, 1024*1024*32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0); #else mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE); #endif diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.s b/src/ARMJIT_x64/ARMJIT_Linkage.s index 0a84df0..8cc0b5f 100644 --- a/src/ARMJIT_x64/ARMJIT_Linkage.s +++ b/src/ARMJIT_x64/ARMJIT_Linkage.s @@ -29,8 +29,13 @@ .p2align 4,,15 +#ifdef __APPLE__ +.global _ARM_Dispatch +_ARM_Dispatch: +#else .global ARM_Dispatch ARM_Dispatch: +#endif #ifdef WIN64 push rdi push rsi @@ -54,8 +59,13 @@ ARM_Dispatch: .p2align 4,,15 +#ifdef __APPLE__ +.global _ARM_Ret +_ARM_Ret: +#else .global ARM_Ret ARM_Ret: +#endif mov [RCPU + ARM_CPSR_offset], RCPSR #ifdef WIN64 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d6c3897..446480d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -100,10 +100,17 @@ if (ENABLE_JIT) endif() endif() +if (APPLE) + target_include_directories(core PUBLIC /usr/local/include) + target_link_directories(core PUBLIC /usr/local/lib) +endif() + if (ENABLE_OGLRENDERER) if (WIN32) target_link_libraries(core ole32 comctl32 ws2_32 opengl32) - else() + elseif (APPLE) + target_link_libraries(core "-framework OpenGL") + else() target_link_libraries(core GL EGL) endif() else() diff --git a/src/Config.cpp b/src/Config.cpp index 341b14c..f7db252 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -73,7 +73,11 @@ ConfigEntry ConfigFile[] = {"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0}, {"JIT_BranchOptimisations", 0, &JIT_BranchOptimisations, 1, NULL, 0}, {"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0}, - {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0}, + #ifdef __APPLE__ + {"JIT_FastMemory", 0, &JIT_FastMemory, 0, NULL, 0}, + #else + {"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0}, + #endif #endif {"", -1, NULL, 0, NULL, 0} diff --git a/src/OpenGLSupport.h b/src/OpenGLSupport.h index 925c0ad..44c511f 100644 --- a/src/OpenGLSupport.h +++ b/src/OpenGLSupport.h @@ -23,8 +23,13 @@ #include // TODO: different includes for each platform -#include -#include +#ifdef __APPLE__ + #include + #include +#else + #include + #include +#endif #include "Platform.h" @@ -61,6 +66,11 @@ #endif +#ifdef __APPLE__ + +#define DO_PROCLIST(func) + +#else #define DO_PROCLIST(func) \ DO_PROCLIST_1_3(func) \ @@ -128,6 +138,7 @@ \ func(GLGETSTRINGI, glGetStringi); \ +#endif namespace OpenGL { diff --git a/src/frontend/qt_sdl/CMakeLists.txt b/src/frontend/qt_sdl/CMakeLists.txt index 9a0a025..0d695d6 100644 --- a/src/frontend/qt_sdl/CMakeLists.txt +++ b/src/frontend/qt_sdl/CMakeLists.txt @@ -95,6 +95,19 @@ if (PORTABLE) add_definitions(-DPORTABLE) endif() +if (APPLE) + set_target_properties(melonDS PROPERTIES + MACOSX_BUNDLE true + MACOSX_BUNDLE_INFO_PLIST ${CMAKE_SOURCE_DIR}/melonDS.plist + OUTPUT_NAME melonDS + ) + + # Copy icon into the bundle + target_sources(melonDS PRIVATE "${CMAKE_SOURCE_DIR}/melonDS.icns") + set_source_files_properties("${CMAKE_SOURCE_DIR}/melonDS.icns" PROPERTIES MACOSX_PACKAGE_LOCATION Resources) + +endif() + install(FILES ../../../net.kuribo64.melonDS.desktop DESTINATION ${CMAKE_INSTALL_PREFIX}/share/applications) install(FILES ../../../icon/melon_16x16.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/16x16/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_32x32.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/32x32/apps RENAME net.kuribo64.melonDS.png) @@ -102,4 +115,4 @@ install(FILES ../../../icon/melon_48x48.png DESTINATION ${CMAKE_INSTALL_PREFIX}/ install(FILES ../../../icon/melon_64x64.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/64x64/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_128x128.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/128x128/apps RENAME net.kuribo64.melonDS.png) install(FILES ../../../icon/melon_256x256.png DESTINATION ${CMAKE_INSTALL_PREFIX}/share/icons/hicolor/256x256/apps RENAME net.kuribo64.melonDS.png) -install(TARGETS melonDS RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) +install(TARGETS melonDS BUNDLE DESTINATION ${CMAKE_BINARY_DIR} RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) diff --git a/src/frontend/qt_sdl/EmuSettingsDialog.cpp b/src/frontend/qt_sdl/EmuSettingsDialog.cpp index 79ce5ed..3183182 100644 --- a/src/frontend/qt_sdl/EmuSettingsDialog.cpp +++ b/src/frontend/qt_sdl/EmuSettingsDialog.cpp @@ -65,6 +65,9 @@ EmuSettingsDialog::EmuSettingsDialog(QWidget* parent) : QDialog(parent), ui(new ui->chkJITBranchOptimisations->setChecked(Config::JIT_BranchOptimisations != 0); ui->chkJITLiteralOptimisations->setChecked(Config::JIT_LiteralOptimisations != 0); ui->chkJITFastMemory->setChecked(Config::JIT_FastMemory != 0); + #ifdef __APPLE__ + ui->chkJITFastMemory->setDisabled(true); + #endif ui->spnJITMaximumBlockSize->setValue(Config::JIT_MaxBlockSize); #else ui->chkEnableJIT->setDisabled(true); @@ -329,6 +332,8 @@ void EmuSettingsDialog::on_chkEnableJIT_toggled() bool disabled = !ui->chkEnableJIT->isChecked(); ui->chkJITBranchOptimisations->setDisabled(disabled); ui->chkJITLiteralOptimisations->setDisabled(disabled); - ui->chkJITFastMemory->setDisabled(disabled); + #ifndef __APPLE__ + ui->chkJITFastMemory->setDisabled(disabled); + #endif ui->spnJITMaximumBlockSize->setDisabled(disabled); } diff --git a/src/frontend/qt_sdl/InputConfigDialog.cpp b/src/frontend/qt_sdl/InputConfigDialog.cpp index 9f08731..eaf1e9b 100644 --- a/src/frontend/qt_sdl/InputConfigDialog.cpp +++ b/src/frontend/qt_sdl/InputConfigDialog.cpp @@ -216,6 +216,7 @@ KeyMapButton::KeyMapButton(int* mapping, bool hotkey) : QPushButton() setCheckable(true); setText(mappingText()); + setFocusPolicy(Qt::StrongFocus); //Fixes binding keys in macOS connect(this, &KeyMapButton::clicked, this, &KeyMapButton::onClick); } diff --git a/src/frontend/qt_sdl/LAN_PCap.cpp b/src/frontend/qt_sdl/LAN_PCap.cpp index ce278bc..8138699 100644 --- a/src/frontend/qt_sdl/LAN_PCap.cpp +++ b/src/frontend/qt_sdl/LAN_PCap.cpp @@ -33,7 +33,11 @@ #include #include #include - #include + #ifdef __APPLE__ + #include + #else + #include + #endif #endif @@ -66,6 +70,9 @@ const char* PCapLibNames[] = #ifdef __WIN32__ // TODO: name for npcap in non-WinPCap mode "wpcap.dll", +#elif defined(__APPLE__) + "libpcap.A.dylib", + "libpcap.dylib", #else // Linux lib names "libpcap.so.1", @@ -276,6 +283,16 @@ bool Init(bool open_adapter) struct sockaddr_in* sa = (sockaddr_in*)curaddr->ifa_addr; memcpy(adata->IP_v4, &sa->sin_addr, 4); } + #ifdef __APPLE__ + else if (af == AF_LINK) + { + struct sockaddr_dl* sa = (sockaddr_dl*)curaddr->ifa_addr; + if (sa->sdl_alen != 6) + printf("weird MAC length %d for %s\n", sa->sdl_alen, curaddr->ifa_name); + else + memcpy(adata->MAC, LLADDR(sa), 6); + } + #else else if (af == AF_PACKET) { struct sockaddr_ll* sa = (sockaddr_ll*)curaddr->ifa_addr; @@ -284,7 +301,7 @@ bool Init(bool open_adapter) else memcpy(adata->MAC, sa->sll_addr, 6); } - + #endif curaddr = curaddr->ifa_next; } } -- cgit v1.2.3 From acb272ed782d7682deb39901ce9ea1ea86269af3 Mon Sep 17 00:00:00 2001 From: Filippo Scognamiglio Date: Mon, 30 Nov 2020 15:33:43 +0100 Subject: Use ashmem instead of memfd_create on Android. (#816) * Use ashmem instead of memfd_create on Android. * Fix code styling issues. * fix small mistake in merge commit Co-authored-by: RSDuck --- src/ARMJIT_Memory.cpp | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index cc0f149..f9f82aa 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -10,6 +10,12 @@ #include #endif +#if defined(__ANDROID__) +#include +#include +#include +#endif + #include "ARMJIT_Memory.h" #include "ARMJIT_Internal.h" @@ -58,6 +64,10 @@ struct FaultDescription bool FaultHandler(FaultDescription& faultDesc); } +#if defined(__ANDROID__) +#define ASHMEM_DEVICE "/dev/ashmem" +#endif + #if defined(__SWITCH__) // with LTO the symbols seem to be not properly overriden // if they're somewhere else @@ -712,14 +722,31 @@ void Init() FastMem7Start = MemoryBase + AddrSpaceSize; MemoryBase = MemoryBase + AddrSpaceSize*2; - #ifdef __APPLE__ - char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1]; - sprintf(fastmemPidName, "melondsfastmem%d", getpid()); - MemoryFile = shm_open(fastmemPidName, O_RDWR|O_CREAT, 0600); - delete[] fastmemPidName; - #else - MemoryFile = memfd_create("melondsfastmem", 0); - #endif +#if defined(__ANDROID__) + static void* libandroid = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL); + using type_ASharedMemory_create = int(*)(const char* name, size_t size); + static void* symbol = dlsym(libandroid, "ASharedMemory_create"); + static auto shared_memory_create = reinterpret_cast(symbol); + + if (shared_memory_create) + { + MemoryFile = shared_memory_create("melondsfastmem", MemoryTotalSize); + } + else + { + int fd = open(ASHMEM_DEVICE, O_RDWR); + ioctl(fd, ASHMEM_SET_NAME, "melondsfastmem"); + ioctl(fd, ASHMEM_SET_SIZE, MemoryTotalSize); + MemoryFile = fd; + } +#elif defined(__APPLE__) + char* fastmemPidName = new char[snprintf(NULL, 0, "melondsfastmem%d", getpid()) + 1]; + sprintf(fastmemPidName, "melondsfastmem%d", getpid()); + MemoryFile = shm_open(fastmemPidName, O_RDWR|O_CREAT, 0600); + delete[] fastmemPidName; +#else + MemoryFile = memfd_create("melondsfastmem", 0); +#endif ftruncate(MemoryFile, MemoryTotalSize); struct sigaction sa; -- cgit v1.2.3 From 6e8bac39091d0321f43a3e7574d3359255996e9f Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 30 Nov 2020 16:58:52 +0100 Subject: Merge vram dirty tracking Squashed commit of the following: commit b463a05d4b909372f0cd1ad91caa0c77a25e5901 Author: RSDuck Date: Mon Nov 30 01:55:35 2020 +0100 minor fix commit ce73cebbdf5da243d7ebade82d8799ded9cd6b28 Author: RSDuck Date: Mon Nov 30 00:43:08 2020 +0100 fix dirty flags of BG/OBJ mappings not being reset commit fc5d73a6178e3adc444398bdd23de8314b5ca8f8 Author: RSDuck Date: Mon Nov 30 00:11:13 2020 +0100 use flat vram for gpu2d everywhere commit 34ee9fe2bf04fcfa2a5a1c8d78d70007e606f1a2 Author: RSDuck Date: Sat Nov 28 19:10:34 2020 +0100 mark VRAM dirty for display capture commit e8778fa2f429c6df0eece19d6a5ee83ae23a0cf4 Author: RSDuck Date: Sat Nov 28 18:59:31 2020 +0100 use flat VRAM for textures and texpals also skip rendering if nothing changed and a bunch of fixes commit 53f2041e2e1a28b35702a2ed51de885c36689f71 Author: RSDuck Date: Fri Nov 27 18:29:56 2020 +0100 use vram dirty tracking for extpals also preparations to take this further commit 4cdfa329e95aed26d3b21319c8fd86a04abf20f7 Author: RSDuck Date: Mon Nov 16 23:32:22 2020 +0100 VRAM dirty tracking --- src/GPU.cpp | 338 +++++++++++++++++++++++++++++++++++---- src/GPU.h | 87 +++++++++- src/GPU2D.cpp | 294 +++++++++++++++++----------------- src/GPU2D.h | 6 - src/GPU3D.cpp | 15 ++ src/GPU3D.h | 2 + src/GPU3D_Soft.cpp | 83 ++++++---- src/NonStupidBitfield.h | 149 +++++++++++++++++ src/Platform.h | 2 +- src/frontend/qt_sdl/Platform.cpp | 4 +- 10 files changed, 765 insertions(+), 215 deletions(-) create mode 100644 src/NonStupidBitfield.h (limited to 'src') diff --git a/src/GPU.cpp b/src/GPU.cpp index 7989750..e6b24e0 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -49,8 +49,8 @@ u8 VRAM_F[ 16*1024]; u8 VRAM_G[ 16*1024]; u8 VRAM_H[ 32*1024]; u8 VRAM_I[ 16*1024]; -u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; -u32 VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF}; +u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; +u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF}; u8 VRAMCNT[9]; u8 VRAMSTAT; @@ -85,6 +85,62 @@ bool Accelerated; GPU2D* GPU2D_A; GPU2D* GPU2D_B; +/* + VRAM invalidation tracking + + - we want to know when a VRAM region used for graphics changed + - for some regions unmapping is mandatory to modify them (Texture, TexPal and ExtPal) and + we don't want to completely invalidate them every time they're unmapped and remapped + + For this reason we don't track the dirtyness per mapping region, but instead per VRAM bank + with VRAMDirty. Writes to LCDC go directly into VRAMDirty, while writes via other mapping regions + like BG or OBJ are first tracked in VRAMWritten_* and need to be flushed using SyncDirtyFlags. + + This is more or less a description of VRAMTrackingSet::DeriveState + Each time before the memory is read two things could have happened + to each 16kb piece (16kb is the smallest unit in which mappings can + be made thus also the size VRAMMap_* use): + - this piece was remapped compared to last time we checked, + which means this location in memory is invalid. + - this piece wasn't remapped, which means we need to check whether + it was changed. This can be archived by checking VRAMDirty. + VRAMDirty need to be reset for the respective VRAM bank. +*/ + +VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG; +VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ; +VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG; +VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ; + +VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal; +VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal; +VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal; +VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal; + +VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture; +VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal; + + +NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG; +NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ; +NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG; +NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ; +NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7; + +NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9]; + +u8 VRAMFlat_ABG[512*1024]; +u8 VRAMFlat_BBG[128*1024]; +u8 VRAMFlat_AOBJ[256*1024]; +u8 VRAMFlat_BOBJ[128*1024]; + +u8 VRAMFlat_ABGExtPal[32*1024]; +u8 VRAMFlat_BBGExtPal[32*1024]; +u8 VRAMFlat_AOBJExtPal[8*1024]; +u8 VRAMFlat_BOBJExtPal[8*1024]; + +u8 VRAMFlat_Texture[512*1024]; +u8 VRAMFlat_TexPal[128*1024]; bool Init() { @@ -113,6 +169,30 @@ void DeInit() if (Framebuffer[1][1]) delete[] Framebuffer[1][1]; } +void ResetVRAMCache() +{ + for (int i = 0; i < 9; i++) + VRAMDirty[i] = NonStupidBitField<128*1024/VRAMDirtyGranularity>(); + + VRAMDirty_ABG.Reset(); + VRAMDirty_BBG.Reset(); + VRAMDirty_AOBJ.Reset(); + VRAMDirty_BOBJ.Reset(); + VRAMDirty_ABGExtPal.Reset(); + VRAMDirty_BBGExtPal.Reset(); + VRAMDirty_AOBJExtPal.Reset(); + VRAMDirty_BOBJExtPal.Reset(); + + memset(VRAMFlat_ABG, 0, sizeof(VRAMFlat_ABG)); + memset(VRAMFlat_BBG, 0, sizeof(VRAMFlat_BBG)); + memset(VRAMFlat_AOBJ, 0, sizeof(VRAMFlat_AOBJ)); + memset(VRAMFlat_BOBJ, 0, sizeof(VRAMFlat_BOBJ)); + memset(VRAMFlat_ABGExtPal, 0, sizeof(VRAMFlat_ABGExtPal)); + memset(VRAMFlat_BBGExtPal, 0, sizeof(VRAMFlat_BBGExtPal)); + memset(VRAMFlat_AOBJExtPal, 0, sizeof(VRAMFlat_AOBJExtPal)); + memset(VRAMFlat_BOBJExtPal, 0, sizeof(VRAMFlat_BOBJExtPal)); +} + void Reset() { VCount = 0; @@ -186,6 +266,8 @@ void Reset() GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]); ResetRenderer(); + + ResetVRAMCache(); } void Stop() @@ -261,6 +343,8 @@ void DoSavestate(Savestate* file) GPU2D_A->DoSavestate(file); GPU2D_B->DoSavestate(file); GPU3D::DoSavestate(file); + + ResetVRAMCache(); } void AssignFramebuffers() @@ -411,18 +495,8 @@ void SetRenderSettings(int renderer, RenderSettings& settings) u8* GetUniqueBankPtr(u32 mask, u32 offset) { - if (!mask) return NULL; - - int num = 0; - if (!(mask & 0xFF)) { mask >>= 8; num += 8; } - else - { - if (!(mask & 0xF)) { mask >>= 4; num += 4; } - if (!(mask & 0x3)) { mask >>= 2; num += 2; } - if (!(mask & 0x1)) { mask >>= 1; num += 1; } - } - if (mask != 1) return NULL; - + if (!mask || (mask & (mask - 1)) != 0) return NULL; + int num = __builtin_ctz(mask); return &VRAM[num][offset & VRAMMask[num]]; } @@ -606,8 +680,6 @@ void MapVRAM_E(u32 bank, u8 cnt) case 4: // ABG ext palette UNMAP_RANGE(ABGExtPal, 0, 4); - GPU2D_A->BGExtPalDirty(0); - GPU2D_A->BGExtPalDirty(2); break; } } @@ -634,8 +706,6 @@ void MapVRAM_E(u32 bank, u8 cnt) case 4: // ABG ext palette MAP_RANGE(ABGExtPal, 0, 4); - GPU2D_A->BGExtPalDirty(0); - GPU2D_A->BGExtPalDirty(2); break; } } @@ -687,12 +757,10 @@ void MapVRAM_FG(u32 bank, u8 cnt) case 4: // ABG ext palette VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask; VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask; - GPU2D_A->BGExtPalDirty((oldofs & 0x1) << 1); break; case 5: // AOBJ ext palette VRAMMap_AOBJExtPal &= ~bankmask; - GPU2D_A->OBJExtPalDirty(); break; } } @@ -732,12 +800,10 @@ void MapVRAM_FG(u32 bank, u8 cnt) case 4: // ABG ext palette VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask; VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask; - GPU2D_A->BGExtPalDirty((ofs & 0x1) << 1); break; case 5: // AOBJ ext palette VRAMMap_AOBJExtPal |= bankmask; - GPU2D_A->OBJExtPalDirty(); break; } } @@ -773,8 +839,6 @@ void MapVRAM_H(u32 bank, u8 cnt) case 2: // BBG ext palette UNMAP_RANGE(BBGExtPal, 0, 4); - GPU2D_B->BGExtPalDirty(0); - GPU2D_B->BGExtPalDirty(2); break; } } @@ -800,8 +864,6 @@ void MapVRAM_H(u32 bank, u8 cnt) case 2: // BBG ext palette MAP_RANGE(BBGExtPal, 0, 4); - GPU2D_B->BGExtPalDirty(0); - GPU2D_B->BGExtPalDirty(2); break; } } @@ -841,7 +903,6 @@ void MapVRAM_I(u32 bank, u8 cnt) case 3: // BOBJ ext palette VRAMMap_BOBJExtPal &= ~bankmask; - GPU2D_B->OBJExtPalDirty(); break; } } @@ -871,7 +932,6 @@ void MapVRAM_I(u32 bank, u8 cnt) case 3: // BOBJ ext palette VRAMMap_BOBJExtPal |= bankmask; - GPU2D_B->OBJExtPalDirty(); break; } } @@ -937,6 +997,8 @@ void StartHBlank(u32 line) DispStat[0] |= (1<<1); DispStat[1] |= (1<<1); + SyncDirtyFlags(); + if (VCount < 192) { // draw @@ -1096,4 +1158,224 @@ void SetVCount(u16 val) NextVCount = val; } +template +NonStupidBitField VRAMTrackingSet::DeriveState(u32* currentMappings) +{ + NonStupidBitField result; + u16 banksToBeZeroed = 0; + for (u32 i = 0; i < Size / MappingGranularity; i++) + { + if (currentMappings[i] != Mapping[i]) + { + result |= NonStupidBitField(i*VRAMBitsPerMapping, VRAMBitsPerMapping); + banksToBeZeroed |= currentMappings[i]; + Mapping[i] = currentMappings[i]; + } + else + { + u32 mapping = Mapping[i]; + + banksToBeZeroed |= mapping; + + while (mapping != 0) + { + u32 num = __builtin_ctz(mapping); + mapping &= ~(1 << num); + + // hack for **speed** + // this could probably be done less ugly but then we would rely + // on the compiler for vectorisation + static_assert(VRAMDirtyGranularity == 512); + if (MappingGranularity == 16*1024) + { + u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)]; + ((u32*)result.Data)[i] |= dirty; + } + else if (MappingGranularity == 8*1024) + { + u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)]; + ((u16*)result.Data)[i] |= dirty; + } + else if (MappingGranularity == 128*1024) + { + ((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0]; + ((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1]; + ((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2]; + ((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3]; + } + else + { + // welp + abort(); + } + } + } + } + + while (banksToBeZeroed != 0) + { + u32 num = __builtin_ctz(banksToBeZeroed); + banksToBeZeroed &= ~(1 << num); + memset(VRAMDirty[num].Data, 0, sizeof(VRAMDirty[num].Data)); + } + + return result; +} + +template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*); +template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*); +template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*); +template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*); +template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*); +template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*); + +template +void SyncDirtyFlags(u32* mappings, NonStupidBitField& writtenFlags) +{ + const u32 VRAMWrittenBitsPer16KB = 16*1024/VRAMDirtyGranularity; + + for (typename NonStupidBitField::Iterator it = writtenFlags.Begin(); it != writtenFlags.End(); it++) + { + u32 mapping = mappings[*it / VRAMWrittenBitsPer16KB]; + while (mapping != 0) + { + u32 num = __builtin_ctz(mapping); + + VRAMDirty[num][*it & (VRAMMask[num] / VRAMDirtyGranularity)] = true; + + mapping &= ~(1 << num); + } + } + memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data)); +} + +void SyncDirtyFlags() +{ + SyncDirtyFlags(VRAMMap_ABG, VRAMWritten_ABG); + SyncDirtyFlags(VRAMMap_AOBJ, VRAMWritten_AOBJ); + SyncDirtyFlags(VRAMMap_BBG, VRAMWritten_BBG); + SyncDirtyFlags(VRAMMap_BOBJ, VRAMWritten_BOBJ); + SyncDirtyFlags(VRAMMap_ARM7, VRAMWritten_ARM7); +} + +template +inline bool CopyLinearVRAM(u8* flat, u32* mappings, NonStupidBitField& dirty, u64 (*slowAccess)(u32 addr)) +{ + const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity; + + bool change = false; + + typename NonStupidBitField::Iterator it = dirty.Begin(); + while (it != dirty.End()) + { + u32 offset = *it * VRAMDirtyGranularity; + u8* dst = flat + offset; + u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset); + if (fastAccess) + { + memcpy(dst, fastAccess, VRAMDirtyGranularity); + } + else + { + for (u32 i = 0; i < VRAMDirtyGranularity; i += 8) + *(u64*)&dst[i] = slowAccess(offset + i); + } + change = true; + it++; + } + return change; +} + +bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, ReadVRAM_Texture); +} +bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, ReadVRAM_TexPal); +} + +bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, ReadVRAM_ABG); +} +bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, ReadVRAM_BBG); +} + +bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, ReadVRAM_AOBJ); +} +bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, ReadVRAM_BOBJ); } + +template +T ReadVRAM_ABGExtPal(u32 addr) +{ + u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3]; + + T ret = 0; + if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; + if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; + + return ret; +} + +template +T ReadVRAM_BBGExtPal(u32 addr) +{ + u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3]; + + T ret = 0; + if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF]; + + return ret; +} + +template +T ReadVRAM_AOBJExtPal(u32 addr) +{ + u32 mask = VRAMMap_AOBJExtPal; + + T ret = 0; + if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF]; + + return ret; +} + +template +T ReadVRAM_BOBJExtPal(u32 addr) +{ + u32 mask = VRAMMap_BOBJExtPal; + + T ret = 0; + if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF]; + + return ret; +} + +bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, ReadVRAM_ABGExtPal); +} +bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, ReadVRAM_BBGExtPal); +} + +bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, ReadVRAM_AOBJExtPal); +} +bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) +{ + return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, ReadVRAM_BOBJExtPal); +} + +} \ No newline at end of file diff --git a/src/GPU.h b/src/GPU.h index 1564ef7..2f71da6 100644 --- a/src/GPU.h +++ b/src/GPU.h @@ -20,6 +20,7 @@ #define GPU_H #include "GPU2D.h" +#include "NonStupidBitfield.h" namespace GPU { @@ -45,7 +46,7 @@ extern u8 VRAM_G[ 16*1024]; extern u8 VRAM_H[ 32*1024]; extern u8 VRAM_I[ 16*1024]; -extern u8* VRAM[9]; +extern u8* const VRAM[9]; extern u32 VRAMMap_LCDC; extern u32 VRAMMap_ABG[0x20]; @@ -73,6 +74,73 @@ extern GPU2D* GPU2D_B; extern int Renderer; +const u32 VRAMDirtyGranularity = 512; + +extern NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMWritten_ABG; +extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_AOBJ; +extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BBG; +extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMWritten_BOBJ; +extern NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMWritten_ARM7; + +extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9]; + +template +struct VRAMTrackingSet +{ + u16 Mapping[Size / MappingGranularity]; + + const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity; + + void Reset() + { + memset(Mapping, 0, sizeof(Mapping)); + } + NonStupidBitField DeriveState(u32* currentMappings); +}; + +extern VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG; +extern VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ; +extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG; +extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ; + +extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal; +extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal; +extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal; +extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal; + +extern VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture; +extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal; + +extern u8 VRAMFlat_ABG[512*1024]; +extern u8 VRAMFlat_BBG[128*1024]; +extern u8 VRAMFlat_AOBJ[256*1024]; +extern u8 VRAMFlat_BOBJ[128*1024]; + +extern u8 VRAMFlat_ABGExtPal[32*1024]; +extern u8 VRAMFlat_BBGExtPal[32*1024]; + +extern u8 VRAMFlat_AOBJExtPal[8*1024]; +extern u8 VRAMFlat_BOBJExtPal[8*1024]; + +extern u8 VRAMFlat_Texture[512*1024]; +extern u8 VRAMFlat_TexPal[128*1024]; + +bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty); + +bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty); +bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + +void SyncDirtyFlags(); typedef struct { @@ -233,7 +301,11 @@ void WriteVRAM_LCDC(u32 addr, T val) default: return; } - if (VRAMMap_LCDC & (1<> 14) & 0x1F]; + VRAMWritten_ABG[(addr & 0x7FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; @@ -295,6 +369,8 @@ void WriteVRAM_AOBJ(u32 addr, T val) { u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; + VRAMWritten_AOBJ[(addr & 0x3FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val; @@ -324,6 +400,8 @@ void WriteVRAM_BBG(u32 addr, T val) { u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; + VRAMWritten_BBG[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val; if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; @@ -350,11 +428,12 @@ void WriteVRAM_BOBJ(u32 addr, T val) { u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; + VRAMWritten_BOBJ[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; } - template T ReadVRAM_ARM7(u32 addr) { @@ -372,6 +451,8 @@ void WriteVRAM_ARM7(u32 addr, T val) { u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; + VRAMWritten_ARM7[(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; } diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index 27aa608..07b5b21 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -148,12 +148,6 @@ void GPU2D::Reset() CaptureCnt = 0; MasterBrightness = 0; - - BGExtPalStatus[0] = 0; - BGExtPalStatus[1] = 0; - BGExtPalStatus[2] = 0; - BGExtPalStatus[3] = 0; - OBJExtPalStatus = 0; } void GPU2D::DoSavestate(Savestate* file) @@ -208,13 +202,6 @@ void GPU2D::DoSavestate(Savestate* file) if (!file->Saving) { - // refresh those - BGExtPalStatus[0] = 0; - BGExtPalStatus[1] = 0; - BGExtPalStatus[2] = 0; - BGExtPalStatus[3] = 0; - OBJExtPalStatus = 0; - CurBGXMosaicTable = MosaicTable[BGMosaicSize[0]]; CurOBJXMosaicTable = MosaicTable[OBJMosaicSize[0]]; } @@ -758,6 +745,25 @@ void GPU2D::DrawScanline(u32 line) int n3dline = line; line = GPU::VCount; + if (Num == 0) + { + auto bgDirty = GPU::VRAMDirty_ABG.DeriveState(GPU::VRAMMap_ABG); + GPU::MakeVRAMFlat_ABGCoherent(bgDirty); + auto bgExtPalDirty = GPU::VRAMDirty_ABGExtPal.DeriveState(GPU::VRAMMap_ABGExtPal); + GPU::MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty); + auto objExtPalDirty = GPU::VRAMDirty_AOBJExtPal.DeriveState(&GPU::VRAMMap_AOBJExtPal); + GPU::MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty); + } + else + { + auto bgDirty = GPU::VRAMDirty_BBG.DeriveState(GPU::VRAMMap_BBG); + GPU::MakeVRAMFlat_BBGCoherent(bgDirty); + auto bgExtPalDirty = GPU::VRAMDirty_BBGExtPal.DeriveState(GPU::VRAMMap_BBGExtPal); + GPU::MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty); + auto objExtPalDirty = GPU::VRAMDirty_BOBJExtPal.DeriveState(&GPU::VRAMMap_BOBJExtPal); + GPU::MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty); + } + bool forceblank = false; // scanlines that end up outside of the GPU drawing range @@ -970,6 +976,9 @@ void GPU2D::DoCapture(u32 line, u32 width) u16* dst = (u16*)GPU::VRAM[dstvram]; u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width); + static_assert(GPU::VRAMDirtyGranularity == 512); + GPU::VRAMDirty[dstvram][(dstaddr & 0x1FFFF) / GPU::VRAMDirtyGranularity] = true; + // TODO: handle 3D in accelerated mode!! u32* srcA; @@ -1188,85 +1197,20 @@ void GPU2D::SampleFIFO(u32 offset, u32 num) } } - -void GPU2D::BGExtPalDirty(u32 base) -{ - BGExtPalStatus[base] = 0; - BGExtPalStatus[base+1] = 0; -} - -void GPU2D::OBJExtPalDirty() -{ - OBJExtPalStatus = 0; -} - - u16* GPU2D::GetBGExtPal(u32 slot, u32 pal) { - u16* dst = &BGExtPalCache[slot][pal << 8]; - - if (!(BGExtPalStatus[slot] & (1< void GPU2D::DrawBG_Text(u32 line, u32 bgnum) { @@ -1720,17 +1678,20 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) extpal = (DispCnt & 0x40000000); if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum; + u8* bgvram; + u32 bgvrammask; + GetBGVRAM(Num, bgvram, bgvrammask); if (Num) { - tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((bgcnt & 0x003C) << 12); + tilemapaddr = ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0x400]; } else { - tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0]; } @@ -1758,7 +1719,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) // preload shit as needed if ((xoff & 0x7) || mosaic) { - curtile = GPU::ReadVRAM_BG(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); + curtile = *(u16*)&bgvram[(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)) & bgvrammask]; if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); else curpal = pal; @@ -1779,7 +1740,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) (mosaic && ((xpos >> 3) != (lastxpos >> 3)))) { // load a new tile - curtile = GPU::ReadVRAM_BG(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)); + curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask]; if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); else curpal = pal; @@ -1794,7 +1755,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) if (WindowMask[i] & (1<(pixelsaddr + tilexoff); + color = bgvram[(pixelsaddr + tilexoff) & bgvrammask]; if (color) drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); + curtile = *(u16*)&bgvram[((tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3))) & bgvrammask]; curpal = pal + ((curtile & 0xF000) >> 8); pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); @@ -1828,7 +1789,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) (mosaic && ((xpos >> 3) != (lastxpos >> 3)))) { // load a new tile - curtile = GPU::ReadVRAM_BG(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)); + curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask]; curpal = pal + ((curtile & 0xF000) >> 8); pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); @@ -1842,11 +1803,11 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) u32 tilexoff = (curtile & 0x0400) ? (7-(xpos&0x7)) : (xpos&0x7); if (tilexoff & 0x1) { - color = GPU::ReadVRAM_BG(pixelsaddr + (tilexoff >> 1)) >> 4; + color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] >> 4; } else { - color = GPU::ReadVRAM_BG(pixelsaddr + (tilexoff >> 1)) & 0x0F; + color = bgvram[(pixelsaddr + (tilexoff >> 1)) & bgvrammask] & 0x0F; } if (color) @@ -1895,17 +1856,20 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) rotY -= (BGMosaicY * rotD); } + u8* bgvram; + u32 bgvrammask; + if (Num) { - tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((bgcnt & 0x003C) << 12); + tilemapaddr = ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0x400]; } else { - tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0]; } @@ -1934,13 +1898,13 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) if ((!((finalX|finalY) & overflowmask))) { - curtile = GPU::ReadVRAM_BG(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))); + curtile = bgvram[(tilemapaddr + ((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11))) & bgvrammask]; // draw pixel u32 tilexoff = (finalX >> 8) & 0x7; u32 tileyoff = (finalY >> 8) & 0x7; - color = GPU::ReadVRAM_BG(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff); + color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask]; if (color) drawPixel(&BGOBJLine[i], pal[color], 0x01000000<(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)); + color = *(u16*)&bgvram[(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask]; if (color & 0x8000) drawPixel(&BGOBJLine[i], color, 0x01000000<(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); + color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask]; if (color) drawPixel(&BGOBJLine[i], pal[color], 0x01000000<> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU::Palette[0]; } @@ -2144,7 +2112,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) if ((!((finalX|finalY) & overflowmask))) { - curtile = GPU::ReadVRAM_BG(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)); + curtile = *(u16*)&bgvram[(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)) & bgvrammask]; if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12); else curpal = pal; @@ -2156,7 +2124,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) if (curtile & 0x0400) tilexoff = 7-tilexoff; if (curtile & 0x0800) tileyoff = 7-tileyoff; - color = GPU::ReadVRAM_BG(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff); + color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask]; if (color) drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); + color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask]; if (color) drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); @@ -2346,6 +2315,20 @@ void GPU2D::InterleaveSprites(u32 prio) } } +void GetOBJVRAM(u32 num, u8*& data, u32& mask) +{ + if (num == 0) + { + data = GPU::VRAMFlat_AOBJ; + mask = 0x3FFFF; + } + else + { + data = GPU::VRAMFlat_BOBJ; + mask = 0x1FFFF; + } +} + #define DoDrawSprite(type, ...) \ if (iswin) \ { \ @@ -2370,6 +2353,17 @@ void GPU2D::DrawSprites(u32 line) OBJMosaicYCount = 0; } + if (Num == 0) + { + auto objDirty = GPU::VRAMDirty_AOBJ.DeriveState(GPU::VRAMMap_AOBJ); + GPU::MakeVRAMFlat_AOBJCoherent(objDirty); + } + else + { + auto objDirty = GPU::VRAMDirty_BOBJ.DeriveState(GPU::VRAMMap_BOBJ); + GPU::MakeVRAMFlat_BOBJCoherent(objDirty); + } + NumSprites = 0; memset(OBJLine, 0, 256*4); memset(OBJWindow, 0, 256); @@ -2482,6 +2476,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi u32 ytilefactor; + u8* objvram; + u32 objvrammask; + GetOBJVRAM(Num, objvram, objvrammask); + s32 centerX = boundwidth >> 1; s32 centerY = boundheight >> 1; @@ -2525,6 +2523,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi pixelattr |= (0xC0000000 | (alpha << 24)); + u32 pixelsaddr; if (DispCnt & 0x40) { if (DispCnt & 0x20) @@ -2536,7 +2535,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi } else { - tilenum <<= (7 + ((DispCnt >> 22) & 0x1)); + pixelsaddr = tilenum << (7 + ((DispCnt >> 22) & 0x1)); ytilefactor = ((width >> 8) * 2); } } @@ -2544,23 +2543,21 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi { if (DispCnt & 0x20) { - tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); + pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); ytilefactor = (256 * 2); } else { - tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); + pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); ytilefactor = (128 * 2); } } - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; - for (; xoff < boundwidth;) { if ((u32)rotX < width && (u32)rotY < height) { - color = GPU::ReadVRAM_OBJ(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)); + color = *(u16*)&objvram[(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)) & objvrammask]; if (color & 0x8000) { @@ -2585,9 +2582,10 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi } else { + u32 pixelsaddr = tilenum; if (DispCnt & 0x10) { - tilenum <<= ((DispCnt >> 20) & 0x3); + pixelsaddr <<= ((DispCnt >> 20) & 0x3); ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0); } else @@ -2601,9 +2599,8 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi if (attrib[0] & 0x2000) { // 256-color - tilenum <<= 5; ytilefactor <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr <<= 5; if (!window) { @@ -2617,7 +2614,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi { if ((u32)rotX < width && (u32)rotY < height) { - color = GPU::ReadVRAM_OBJ(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)); + color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)) & objvrammask]; if (color) { @@ -2657,7 +2654,7 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi { if ((u32)rotX < width && (u32)rotY < height) { - color = GPU::ReadVRAM_OBJ(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)); + color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)) & objvrammask]; if (rotX & 0x100) color >>= 4; else @@ -2705,6 +2702,10 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos pixelattr |= 0x100000; } + u8* objvram; + u32 objvrammask; + GetOBJVRAM(Num, objvram, objvrammask); + // yflip if (attrib[1] & 0x2000) ypos = height-1 - ypos; @@ -2735,6 +2736,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos pixelattr |= (0xC0000000 | (alpha << 24)); + u32 pixelsaddr = tilenum; if (DispCnt & 0x40) { if (DispCnt & 0x20) @@ -2746,25 +2748,24 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos } else { - tilenum <<= (7 + ((DispCnt >> 22) & 0x1)); - tilenum += (ypos * width * 2); + pixelsaddr <<= (7 + ((DispCnt >> 22) & 0x1)); + pixelsaddr += (ypos * width * 2); } } else { if (DispCnt & 0x20) { - tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); - tilenum += (ypos * 256 * 2); + pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); + pixelsaddr += (ypos * 256 * 2); } else { - tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); - tilenum += (ypos * 128 * 2); + pixelsaddr = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); + pixelsaddr += (ypos * 128 * 2); } } - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; s32 pixelstride; if (attrib[1] & 0x1000) // xflip @@ -2781,7 +2782,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos for (; xoff < xend;) { - color = GPU::ReadVRAM_OBJ(pixelsaddr); + color = *(u16*)&objvram[pixelsaddr & objvrammask]; pixelsaddr += pixelstride; @@ -2805,14 +2806,15 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos } else { + u32 pixelsaddr = tilenum; if (DispCnt & 0x10) { - tilenum <<= ((DispCnt >> 20) & 0x3); - tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); + pixelsaddr <<= ((DispCnt >> 20) & 0x3); + pixelsaddr += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); } else { - tilenum += ((ypos >> 3) * 0x20); + pixelsaddr += ((ypos >> 3) * 0x20); } if (spritemode == 1) pixelattr |= 0x80000000; @@ -2821,8 +2823,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos if (attrib[0] & 0x2000) { // 256-color - tilenum <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr <<= 5; pixelsaddr += ((ypos & 0x7) << 3); s32 pixelstride; @@ -2851,7 +2852,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos for (; xoff < xend;) { - color = GPU::ReadVRAM_OBJ(pixelsaddr); + color = objvram[pixelsaddr]; pixelsaddr += pixelstride; @@ -2877,8 +2878,7 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos else { // 16-color - tilenum <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr <<= 5; pixelsaddr += ((ypos & 0x7) << 2); s32 pixelstride; @@ -2910,13 +2910,13 @@ void GPU2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos { if (attrib[1] & 0x1000) { - if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ(pixelsaddr) & 0x0F; pixelsaddr--; } - else color = GPU::ReadVRAM_OBJ(pixelsaddr) >> 4; + if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] & 0x0F; pixelsaddr--; } + else color = objvram[pixelsaddr & objvrammask] >> 4; } else { - if (xoff & 0x1) { color = GPU::ReadVRAM_OBJ(pixelsaddr) >> 4; pixelsaddr++; } - else color = GPU::ReadVRAM_OBJ(pixelsaddr) & 0x0F; + if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] >> 4; pixelsaddr++; } + else color = objvram[pixelsaddr & objvrammask] & 0x0F; } if (color) diff --git a/src/GPU2D.h b/src/GPU2D.h index 469d6a2..db15adc 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -59,9 +59,6 @@ public: void CheckWindows(u32 line); - void BGExtPalDirty(u32 base); - void OBJExtPalDirty(); - u16* GetBGExtPal(u32 slot, u32 pal); u16* GetOBJExtPal(); @@ -128,9 +125,6 @@ private: u16 MasterBrightness; u16 BGExtPalCache[4][16*256]; - u16 OBJExtPalCache[16*256]; - u32 BGExtPalStatus[4]; - u32 OBJExtPalStatus; u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb); u32 ColorBlend5(u32 val1, u32 val2); diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 74debfe..4e6ac42 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -179,6 +179,8 @@ u8 RenderFogDensityTable[34]; u32 RenderClearAttr1, RenderClearAttr2; +bool RenderFrameIdentical; + u32 ZeroDotWLimit; u32 GXStat; @@ -2491,6 +2493,19 @@ void VBlank() } RenderNumPolygons = NumPolygons; + RenderFrameIdentical = false; + } + else + { + RenderFrameIdentical = RenderDispCnt == DispCnt + && RenderAlphaRef == AlphaRef + && RenderClearAttr1 == ClearAttr1 + && RenderClearAttr2 == ClearAttr2 + && RenderFogColor == FogColor + && RenderFogOffset == FogOffset * 0x200 + && memcmp(RenderEdgeTable, EdgeTable, 8*2) == 0 + && memcmp(RenderFogDensityTable + 1, FogDensityTable, 32) == 0 + && memcmp(RenderToonTable, ToonTable, 32*2) == 0; } RenderDispCnt = DispCnt; diff --git a/src/GPU3D.h b/src/GPU3D.h index c69adde..0477c4f 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -87,6 +87,8 @@ extern u8 RenderFogDensityTable[34]; extern u32 RenderClearAttr1, RenderClearAttr2; +extern bool RenderFrameIdentical; + extern std::array RenderPolygonRAM; extern u32 RenderNumPolygons; diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 7ee9e5d..d66eb76 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -58,6 +58,8 @@ bool PrevIsShadowMask; bool Enabled; +bool FrameIdentical; + // threading bool Threaded; @@ -550,6 +552,16 @@ typedef struct RendererPolygon PolygonList[2048]; +template +inline T ReadVRAM_Texture(u32 addr) +{ + return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF]; +} +template +inline T ReadVRAM_TexPal(u32 addr) +{ + return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF]; +} void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) { @@ -606,10 +618,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 1: // A3I5 { vramaddr += ((t * width) + s); - u8 pixel = GPU::ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr); texpal <<= 4; - *color = GPU::ReadVRAM_TexPal(texpal + ((pixel&0x1F)<<1)); + *color = ReadVRAM_TexPal(texpal + ((pixel&0x1F)<<1)); *alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6); } break; @@ -617,12 +629,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 2: // 4-color { vramaddr += (((t * width) + s) >> 2); - u8 pixel = GPU::ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr); pixel >>= ((s & 0x3) << 1); pixel &= 0x3; texpal <<= 3; - *color = GPU::ReadVRAM_TexPal(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -630,12 +642,12 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 3: // 16-color { vramaddr += (((t * width) + s) >> 1); - u8 pixel = GPU::ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr); if (s & 0x1) pixel >>= 4; else pixel &= 0xF; texpal <<= 4; - *color = GPU::ReadVRAM_TexPal(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -643,10 +655,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 4: // 256-color { vramaddr += ((t * width) + s); - u8 pixel = GPU::ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr); texpal <<= 4; - *color = GPU::ReadVRAM_TexPal(texpal + (pixel<<1)); + *color = ReadVRAM_TexPal(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -660,30 +672,30 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha if (vramaddr >= 0x40000) slot1addr += 0x10000; - u8 val = GPU::ReadVRAM_Texture(vramaddr); + u8 val = ReadVRAM_Texture(vramaddr); val >>= (2 * (s & 0x3)); - u16 palinfo = GPU::ReadVRAM_Texture(slot1addr); + u16 palinfo = ReadVRAM_Texture(slot1addr); u32 paloffset = (palinfo & 0x3FFF) << 2; texpal <<= 4; switch (val & 0x3) { case 0: - *color = GPU::ReadVRAM_TexPal(texpal + paloffset); + *color = ReadVRAM_TexPal(texpal + paloffset); *alpha = 31; break; case 1: - *color = GPU::ReadVRAM_TexPal(texpal + paloffset + 2); + *color = ReadVRAM_TexPal(texpal + paloffset + 2); *alpha = 31; break; case 2: if ((palinfo >> 14) == 1) { - u16 color0 = GPU::ReadVRAM_TexPal(texpal + paloffset); - u16 color1 = GPU::ReadVRAM_TexPal(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal(texpal + paloffset); + u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -700,8 +712,8 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha } else if ((palinfo >> 14) == 3) { - u16 color0 = GPU::ReadVRAM_TexPal(texpal + paloffset); - u16 color1 = GPU::ReadVRAM_TexPal(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal(texpal + paloffset); + u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -717,20 +729,20 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha *color = r | g | b; } else - *color = GPU::ReadVRAM_TexPal(texpal + paloffset + 4); + *color = ReadVRAM_TexPal(texpal + paloffset + 4); *alpha = 31; break; case 3: if ((palinfo >> 14) == 2) { - *color = GPU::ReadVRAM_TexPal(texpal + paloffset + 6); + *color = ReadVRAM_TexPal(texpal + paloffset + 6); *alpha = 31; } else if ((palinfo >> 14) == 3) { - u16 color0 = GPU::ReadVRAM_TexPal(texpal + paloffset); - u16 color1 = GPU::ReadVRAM_TexPal(texpal + paloffset + 2); + u16 color0 = ReadVRAM_TexPal(texpal + paloffset); + u16 color1 = ReadVRAM_TexPal(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -759,10 +771,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 6: // A5I3 { vramaddr += ((t * width) + s); - u8 pixel = GPU::ReadVRAM_Texture(vramaddr); + u8 pixel = ReadVRAM_Texture(vramaddr); texpal <<= 4; - *color = GPU::ReadVRAM_TexPal(texpal + ((pixel&0x7)<<1)); + *color = ReadVRAM_TexPal(texpal + ((pixel&0x7)<<1)); *alpha = (pixel >> 3); } break; @@ -770,7 +782,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha case 7: // direct color { vramaddr += (((t * width) + s) << 1); - *color = GPU::ReadVRAM_Texture(vramaddr); + *color = ReadVRAM_Texture(vramaddr); *alpha = (*color & 0x8000) ? 31 : 0; } break; @@ -2007,8 +2019,8 @@ void ClearBuffers() { for (int x = 0; x < 256; x++) { - u16 val2 = GPU::ReadVRAM_Texture(0x40000 + (yoff << 9) + (xoff << 1)); - u16 val3 = GPU::ReadVRAM_Texture(0x60000 + (yoff << 9) + (xoff << 1)); + u16 val2 = ReadVRAM_Texture(0x40000 + (yoff << 9) + (xoff << 1)); + u16 val3 = ReadVRAM_Texture(0x60000 + (yoff << 9) + (xoff << 1)); // TODO: confirm color conversion u32 r = (val2 << 1) & 0x3E; if (r) r++; @@ -2088,11 +2100,19 @@ void VCount144() void RenderFrame() { + auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture); + auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal); + + bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty); + bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty); + + FrameIdentical = !(textureChanged || texPalChanged) && RenderFrameIdentical; + if (RenderThreadRunning) { Platform::Semaphore_Post(Sema_RenderStart); } - else + else if (!FrameIdentical) { ClearBuffers(); RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons); @@ -2107,8 +2127,15 @@ void RenderThreadFunc() if (!RenderThreadRunning) return; RenderThreadRendering = true; - ClearBuffers(); - RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons); + if (FrameIdentical) + { + Platform::Semaphore_Post(Sema_ScanlineCount, 192); + } + else + { + ClearBuffers(); + RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons); + } Platform::Semaphore_Post(Sema_RenderDone); RenderThreadRendering = false; diff --git a/src/NonStupidBitfield.h b/src/NonStupidBitfield.h new file mode 100644 index 0000000..124ba76 --- /dev/null +++ b/src/NonStupidBitfield.h @@ -0,0 +1,149 @@ +#ifndef NONSTUPIDBITFIELD_H +#define NONSTUPIDBITFIELD_H + +#include "types.h" + +#include + +#include +#include + +// like std::bitset but less stupid and optimised for +// our use case (keeping track of memory invalidations) + +template +struct NonStupidBitField +{ + static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8"); + static const u32 DataLength = Size / 8; + u8 Data[DataLength]; + + struct Ref + { + NonStupidBitField& BitField; + u32 Idx; + + operator bool() + { + return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7)); + } + + Ref& operator=(bool set) + { + BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7)); + BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7)); + return *this; + } + }; + + struct Iterator + { + NonStupidBitField& BitField; + u32 DataIdx; + u32 BitIdx; + u64 RemainingBits; + + u32 operator*() { return DataIdx * 8 + BitIdx; } + + bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; } + bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; } + + template + void Next() + { + while (RemainingBits == 0 && DataIdx < DataLength) + { + DataIdx += sizeof(T); + RemainingBits = *(T*)&BitField.Data[DataIdx]; + } + + BitIdx = __builtin_ctzll(RemainingBits); + RemainingBits &= ~(1ULL << BitIdx); + } + + Iterator operator++(int) + { + Iterator prev(*this); + ++*this; + return prev; + } + + Iterator& operator++() + { + if ((DataLength % 8) == 0) + Next(); + else if ((DataLength % 4) == 0) + Next(); + else if ((DataLength % 2) == 0) + Next(); + else + Next(); + + return *this; + } + }; + + NonStupidBitField(u32 start, u32 size) + { + memset(Data, 0, sizeof(Data)); + + if (size == 0) + return; + + u32 roundedStartBit = (start + 7) & ~7; + u32 roundedEndBit = (start + size) & ~7; + if (roundedStartBit != roundedEndBit) + memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8); + + if (start & 0x7) + Data[start >> 3] = 0xFF << (start & 0x7); + if ((start + size) & 0x7) + Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7); + } + + NonStupidBitField() + { + memset(Data, 0, sizeof(Data)); + } + + Iterator End() + { + return Iterator{*this, DataLength, 0, 0}; + } + Iterator Begin() + { + if ((DataLength % 8) == 0) + return ++Iterator{*this, 0, 0, *(u64*)Data}; + else if ((DataLength % 4) == 0) + return ++Iterator{*this, 0, 0, *(u32*)Data}; + else if ((DataLength % 2) == 0) + return ++Iterator{*this, 0, 0, *(u16*)Data}; + else + return ++Iterator{*this, 0, 0, *Data}; + } + + Ref operator[](u32 idx) + { + return Ref{*this, idx}; + } + + NonStupidBitField& operator|=(const NonStupidBitField& other) + { + for (u32 i = 0; i < DataLength; i++) + { + Data[i] |= other.Data[i]; + } + return *this; + } + NonStupidBitField& operator&=(const NonStupidBitField& other) + { + for (u32 i = 0; i < DataLength; i++) + { + Data[i] &= other.Data[i]; + } + return *this; + } +}; + + +#endif \ No newline at end of file diff --git a/src/Platform.h b/src/Platform.h index deb3785..b4dda9e 100644 --- a/src/Platform.h +++ b/src/Platform.h @@ -77,7 +77,7 @@ Semaphore* Semaphore_Create(); void Semaphore_Free(Semaphore* sema); void Semaphore_Reset(Semaphore* sema); void Semaphore_Wait(Semaphore* sema); -void Semaphore_Post(Semaphore* sema); +void Semaphore_Post(Semaphore* sema, int count = 1); struct Mutex; Mutex* Mutex_Create(); diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp index a51a985..d3480e4 100644 --- a/src/frontend/qt_sdl/Platform.cpp +++ b/src/frontend/qt_sdl/Platform.cpp @@ -230,9 +230,9 @@ void Semaphore_Wait(Semaphore* sema) ((QSemaphore*) sema)->acquire(); } -void Semaphore_Post(Semaphore* sema) +void Semaphore_Post(Semaphore* sema, int count) { - ((QSemaphore*) sema)->release(); + ((QSemaphore*) sema)->release(count); } Mutex* Mutex_Create() -- cgit v1.2.3 From 298b958e2a105ba29f07ee110cd9f856da1f66d7 Mon Sep 17 00:00:00 2001 From: webgeek1234 Date: Tue, 1 Dec 2020 12:48:46 -0600 Subject: Rename jit linkage asm files (#836) An extension of lower case s indicates to not run the preprocessor while upper case S does. These files have defines. --- src/ARMJIT_A64/ARMJIT_Linkage.S | 68 +++++++++++++++++++++++++++++++ src/ARMJIT_A64/ARMJIT_Linkage.s | 68 ------------------------------- src/ARMJIT_x64/ARMJIT_Linkage.S | 88 +++++++++++++++++++++++++++++++++++++++++ src/ARMJIT_x64/ARMJIT_Linkage.s | 88 ----------------------------------------- src/CMakeLists.txt | 6 +-- 5 files changed, 158 insertions(+), 160 deletions(-) create mode 100644 src/ARMJIT_A64/ARMJIT_Linkage.S delete mode 100644 src/ARMJIT_A64/ARMJIT_Linkage.s create mode 100644 src/ARMJIT_x64/ARMJIT_Linkage.S delete mode 100644 src/ARMJIT_x64/ARMJIT_Linkage.s (limited to 'src') diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.S b/src/ARMJIT_A64/ARMJIT_Linkage.S new file mode 100644 index 0000000..7886315 --- /dev/null +++ b/src/ARMJIT_A64/ARMJIT_Linkage.S @@ -0,0 +1,68 @@ +#include "../ARMJIT_x64/ARMJIT_Offsets.h" + +.text + +#define RCPSR w27 +#define RCycles w28 +#define RCPU x29 + +.p2align 4,,15 + +.global ARM_Dispatch +ARM_Dispatch: + stp x19, x20, [sp, #-96]! + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp x25, x26, [sp, #48] + stp x27, x28, [sp, #64] + stp x29, x30, [sp, #80] + + mov RCPU, x0 + ldr RCycles, [RCPU, ARM_Cycles_offset] + ldr RCPSR, [RCPU, ARM_CPSR_offset] + + br x1 + +.p2align 4,,15 + +.global ARM_Ret +ARM_Ret: + str RCycles, [RCPU, ARM_Cycles_offset] + str RCPSR, [RCPU, ARM_CPSR_offset] + + ldp x29, x30, [sp, #80] + ldp x27, x28, [sp, #64] + ldp x25, x26, [sp, #48] + ldp x23, x24, [sp, #32] + ldp x21, x22, [sp, #16] + ldp x19, x20, [sp], #96 + + ret + +.p2align 4,,15 + +.global ARM_RestoreContext +ARM_RestoreContext: + mov sp, x0 + + ldp x0, x1, [sp] + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] + ldp x8, x9, [sp, #64] + ldp x10, x11, [sp, #80] + ldp x12, x13, [sp, #96] + ldp x14, x15, [sp, #112] + ldp x16, x17, [sp, #128] + ldp x18, x19, [sp, #144] + ldp x20, x21, [sp, #160] + ldp x22, x23, [sp, #176] + ldp x24, x25, [sp, #192] + ldp x26, x27, [sp, #208] + ldp x28, x29, [sp, #224] + ldr x30, [sp, #240] + + ldp x17, x18, [sp, #248] + mov sp, x17 + + br x18 \ No newline at end of file diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.s b/src/ARMJIT_A64/ARMJIT_Linkage.s deleted file mode 100644 index 7886315..0000000 --- a/src/ARMJIT_A64/ARMJIT_Linkage.s +++ /dev/null @@ -1,68 +0,0 @@ -#include "../ARMJIT_x64/ARMJIT_Offsets.h" - -.text - -#define RCPSR w27 -#define RCycles w28 -#define RCPU x29 - -.p2align 4,,15 - -.global ARM_Dispatch -ARM_Dispatch: - stp x19, x20, [sp, #-96]! - stp x21, x22, [sp, #16] - stp x23, x24, [sp, #32] - stp x25, x26, [sp, #48] - stp x27, x28, [sp, #64] - stp x29, x30, [sp, #80] - - mov RCPU, x0 - ldr RCycles, [RCPU, ARM_Cycles_offset] - ldr RCPSR, [RCPU, ARM_CPSR_offset] - - br x1 - -.p2align 4,,15 - -.global ARM_Ret -ARM_Ret: - str RCycles, [RCPU, ARM_Cycles_offset] - str RCPSR, [RCPU, ARM_CPSR_offset] - - ldp x29, x30, [sp, #80] - ldp x27, x28, [sp, #64] - ldp x25, x26, [sp, #48] - ldp x23, x24, [sp, #32] - ldp x21, x22, [sp, #16] - ldp x19, x20, [sp], #96 - - ret - -.p2align 4,,15 - -.global ARM_RestoreContext -ARM_RestoreContext: - mov sp, x0 - - ldp x0, x1, [sp] - ldp x2, x3, [sp, #16] - ldp x4, x5, [sp, #32] - ldp x6, x7, [sp, #48] - ldp x8, x9, [sp, #64] - ldp x10, x11, [sp, #80] - ldp x12, x13, [sp, #96] - ldp x14, x15, [sp, #112] - ldp x16, x17, [sp, #128] - ldp x18, x19, [sp, #144] - ldp x20, x21, [sp, #160] - ldp x22, x23, [sp, #176] - ldp x24, x25, [sp, #192] - ldp x26, x27, [sp, #208] - ldp x28, x29, [sp, #224] - ldr x30, [sp, #240] - - ldp x17, x18, [sp, #248] - mov sp, x17 - - br x18 \ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.S b/src/ARMJIT_x64/ARMJIT_Linkage.S new file mode 100644 index 0000000..8cc0b5f --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_Linkage.S @@ -0,0 +1,88 @@ +.intel_syntax noprefix + +#include "ARMJIT_Offsets.h" + +.text + +#define RCPU rbp +#define RCPSR r15d + +#ifdef WIN64 +#define ARG1_REG ecx +#define ARG2_REG edx +#define ARG3_REG r8d +#define ARG4_REG r9d +#define ARG1_REG64 rcx +#define ARG2_REG64 rdx +#define ARG3_REG64 r8 +#define ARG4_REG64 r9 +#else +#define ARG1_REG edi +#define ARG2_REG esi +#define ARG3_REG edx +#define ARG4_REG ecx +#define ARG1_REG64 rdi +#define ARG2_REG64 rsi +#define ARG3_REG64 rdx +#define ARG4_REG64 rcx +#endif + +.p2align 4,,15 + +#ifdef __APPLE__ +.global _ARM_Dispatch +_ARM_Dispatch: +#else +.global ARM_Dispatch +ARM_Dispatch: +#endif +#ifdef WIN64 + push rdi + push rsi +#endif + push rbx + push r12 + push r13 + push r14 + push r15 + push rbp + +#ifdef WIN64 + sub rsp, 0x28 +#else + sub rsp, 0x8 +#endif + mov RCPU, ARG1_REG64 + mov RCPSR, [RCPU + ARM_CPSR_offset] + + jmp ARG2_REG64 + +.p2align 4,,15 + +#ifdef __APPLE__ +.global _ARM_Ret +_ARM_Ret: +#else +.global ARM_Ret +ARM_Ret: +#endif + mov [RCPU + ARM_CPSR_offset], RCPSR + +#ifdef WIN64 + add rsp, 0x28 +#else + add rsp, 0x8 +#endif + + pop rbp + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx +#ifdef WIN64 + pop rsi + pop rdi +#endif + + ret diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.s b/src/ARMJIT_x64/ARMJIT_Linkage.s deleted file mode 100644 index 8cc0b5f..0000000 --- a/src/ARMJIT_x64/ARMJIT_Linkage.s +++ /dev/null @@ -1,88 +0,0 @@ -.intel_syntax noprefix - -#include "ARMJIT_Offsets.h" - -.text - -#define RCPU rbp -#define RCPSR r15d - -#ifdef WIN64 -#define ARG1_REG ecx -#define ARG2_REG edx -#define ARG3_REG r8d -#define ARG4_REG r9d -#define ARG1_REG64 rcx -#define ARG2_REG64 rdx -#define ARG3_REG64 r8 -#define ARG4_REG64 r9 -#else -#define ARG1_REG edi -#define ARG2_REG esi -#define ARG3_REG edx -#define ARG4_REG ecx -#define ARG1_REG64 rdi -#define ARG2_REG64 rsi -#define ARG3_REG64 rdx -#define ARG4_REG64 rcx -#endif - -.p2align 4,,15 - -#ifdef __APPLE__ -.global _ARM_Dispatch -_ARM_Dispatch: -#else -.global ARM_Dispatch -ARM_Dispatch: -#endif -#ifdef WIN64 - push rdi - push rsi -#endif - push rbx - push r12 - push r13 - push r14 - push r15 - push rbp - -#ifdef WIN64 - sub rsp, 0x28 -#else - sub rsp, 0x8 -#endif - mov RCPU, ARG1_REG64 - mov RCPSR, [RCPU + ARM_CPSR_offset] - - jmp ARG2_REG64 - -.p2align 4,,15 - -#ifdef __APPLE__ -.global _ARM_Ret -_ARM_Ret: -#else -.global ARM_Ret -ARM_Ret: -#endif - mov [RCPU + ARM_CPSR_offset], RCPSR - -#ifdef WIN64 - add rsp, 0x28 -#else - add rsp, 0x8 -#endif - - pop rbp - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx -#ifdef WIN64 - pop rsi - pop rdi -#endif - - ret diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 446480d..c16da9f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -80,9 +80,8 @@ if (ENABLE_JIT) ARMJIT_x64/ARMJIT_LoadStore.cpp ARMJIT_x64/ARMJIT_Branch.cpp - ARMJIT_x64/ARMJIT_Linkage.s + ARMJIT_x64/ARMJIT_Linkage.S ) - set_source_files_properties(ARMJIT_x64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp") endif() if (ARCHITECTURE STREQUAL ARM64) target_sources(core PRIVATE @@ -94,9 +93,8 @@ if (ENABLE_JIT) ARMJIT_A64/ARMJIT_LoadStore.cpp ARMJIT_A64/ARMJIT_Branch.cpp - ARMJIT_A64/ARMJIT_Linkage.s + ARMJIT_A64/ARMJIT_Linkage.S ) - set_source_files_properties(ARMJIT_A64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp") endif() endif() -- cgit v1.2.3 From 07423492c434f2d0e54790e87562da25cd382bd9 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Tue, 1 Dec 2020 14:01:57 -0800 Subject: Use AF_LINK and net/if_dl.h on all non-Linux systems (#835) --- src/frontend/qt_sdl/LAN_PCap.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/frontend/qt_sdl/LAN_PCap.cpp b/src/frontend/qt_sdl/LAN_PCap.cpp index 8138699..3381e80 100644 --- a/src/frontend/qt_sdl/LAN_PCap.cpp +++ b/src/frontend/qt_sdl/LAN_PCap.cpp @@ -33,10 +33,10 @@ #include #include #include - #ifdef __APPLE__ - #include + #ifdef __linux__ + #include #else - #include + #include #endif #endif @@ -283,16 +283,7 @@ bool Init(bool open_adapter) struct sockaddr_in* sa = (sockaddr_in*)curaddr->ifa_addr; memcpy(adata->IP_v4, &sa->sin_addr, 4); } - #ifdef __APPLE__ - else if (af == AF_LINK) - { - struct sockaddr_dl* sa = (sockaddr_dl*)curaddr->ifa_addr; - if (sa->sdl_alen != 6) - printf("weird MAC length %d for %s\n", sa->sdl_alen, curaddr->ifa_name); - else - memcpy(adata->MAC, LLADDR(sa), 6); - } - #else + #ifdef __linux__ else if (af == AF_PACKET) { struct sockaddr_ll* sa = (sockaddr_ll*)curaddr->ifa_addr; @@ -301,6 +292,15 @@ bool Init(bool open_adapter) else memcpy(adata->MAC, sa->sll_addr, 6); } + #else + else if (af == AF_LINK) + { + struct sockaddr_dl* sa = (sockaddr_dl*)curaddr->ifa_addr; + if (sa->sdl_alen != 6) + printf("weird MAC length %d for %s\n", sa->sdl_alen, curaddr->ifa_name); + else + memcpy(adata->MAC, LLADDR(sa), 6); + } #endif curaddr = curaddr->ifa_next; } -- cgit v1.2.3 From 906521e7e9ab32f0502a680244db22f53f23b0b7 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 3 Dec 2020 14:52:36 +0100 Subject: fix 4-bit affine sprites --- src/GPU2D.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index 07b5b21..c1a2d47 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -2596,11 +2596,12 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi if (spritemode == 1) pixelattr |= 0x80000000; else pixelattr |= 0x10000000; + ytilefactor <<= 5; + pixelsaddr <<= 5; + if (attrib[0] & 0x2000) { // 256-color - ytilefactor <<= 5; - pixelsaddr <<= 5; if (!window) { @@ -2640,10 +2641,6 @@ void GPU2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 wi else { // 16-color - tilenum <<= 5; - ytilefactor <<= 5; - u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; - if (!window) { pixelattr |= 0x1000; -- cgit v1.2.3 From 42e083960e52cce31589714dcc7fab8e173efb81 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Fri, 4 Dec 2020 00:00:35 +0100 Subject: always cap FPS to 1000 --- src/frontend/qt_sdl/main.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 6fcd8ce..2d3749d 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -495,24 +495,24 @@ void EmuThread::run() double frametimeStep = nlines / (60.0 * 263.0); { + bool limitfps = Config::LimitFPS && !fastforward; + + double practicalFramelimit = limitfps ? frametimeStep : 1.0 / 1000.0; + double curtime = SDL_GetPerformanceCounter() * perfCountsSec; - bool limitfps = Config::LimitFPS && !fastforward; - if (limitfps) + frameLimitError += practicalFramelimit - (curtime - lastTime); + if (frameLimitError < -practicalFramelimit) + frameLimitError = -practicalFramelimit; + if (frameLimitError > practicalFramelimit) + frameLimitError = practicalFramelimit; + + if (round(frameLimitError * 1000.0) > 0.0) { - frameLimitError += frametimeStep - (curtime - lastTime); - if (frameLimitError < -frametimeStep) - frameLimitError = -frametimeStep; - if (frameLimitError > frametimeStep) - frameLimitError = frametimeStep; - - if (round(frameLimitError * 1000.0) > 0.0) - { - SDL_Delay(round(frameLimitError * 1000.0)); - double timeBeforeSleep = curtime; - curtime = SDL_GetPerformanceCounter() * perfCountsSec; - frameLimitError -= curtime - timeBeforeSleep; - } + SDL_Delay(round(frameLimitError * 1000.0)); + double timeBeforeSleep = curtime; + curtime = SDL_GetPerformanceCounter() * perfCountsSec; + frameLimitError -= curtime - timeBeforeSleep; } lastTime = curtime; -- cgit v1.2.3