From 8a96dfce18b8de93d8e6d5b21d7fa0ba9bbcab23 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Sat, 25 Jul 2020 20:59:53 +0200 Subject: fix build with JIT disabled fixes #675 and #674 --- src/ARM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/ARM.cpp') diff --git a/src/ARM.cpp b/src/ARM.cpp index 8530795..ecf94cd 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -80,7 +80,7 @@ ARM::~ARM() ARMv5::ARMv5() : ARM(0) { #ifndef JIT_ENABLED - DTCM = new u8[DTCMSize]; + DTCM = new u8[DTCMPhysicalSize]; #endif } -- cgit v1.2.3 From 5903b11bda0aa181f2914a06650b2cbaf28aa9f1 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 27 Jul 2020 23:14:23 +0200 Subject: subtract cycles after checking IRQ and Halt also switch back to adding to ARM::Cycles instead of subtracting from them --- src/ARM.cpp | 45 ++++++++++++++++++-------------------- src/ARM.h | 32 +++++++++++++-------------- src/ARMJIT_A64/ARMJIT_Branch.cpp | 10 ++++----- src/ARMJIT_A64/ARMJIT_Compiler.cpp | 16 +++++++------- src/ARMJIT_x64/ARMJIT_Branch.cpp | 2 +- src/ARMJIT_x64/ARMJIT_Compiler.cpp | 16 +++++++------- 6 files changed, 59 insertions(+), 62 deletions(-) (limited to 'src/ARM.cpp') diff --git a/src/ARM.cpp b/src/ARM.cpp index ecf94cd..c1743ea 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -274,15 +274,15 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) if (addr & 0x2) { NextInstr[0] = CodeRead32(addr-2, true) >> 16; - Cycles -= CodeCycles; + Cycles += CodeCycles; NextInstr[1] = CodeRead32(addr+2, false); - Cycles -= CodeCycles; + Cycles += CodeCycles; } else { NextInstr[0] = CodeRead32(addr, true); NextInstr[1] = NextInstr[0] >> 16; - Cycles -= CodeCycles; + Cycles += CodeCycles; } CPSR |= 0x20; @@ -295,9 +295,9 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) if (newregion != oldregion) SetupCodeMem(addr); NextInstr[0] = CodeRead32(addr, true); - Cycles -= CodeCycles; + Cycles += CodeCycles; NextInstr[1] = CodeRead32(addr+4, false); - Cycles -= CodeCycles; + Cycles += CodeCycles; CPSR &= ~0x20; } @@ -337,7 +337,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) NextInstr[0] = CodeRead16(addr); NextInstr[1] = CodeRead16(addr+2); - Cycles -= NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1]; + Cycles += NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1]; CPSR |= 0x20; } @@ -350,7 +350,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) NextInstr[0] = CodeRead32(addr); NextInstr[1] = CodeRead32(addr+4); - Cycles -= NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3]; + Cycles += NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3]; CPSR &= ~0x20; } @@ -609,7 +609,7 @@ void ARMv5::Execute() }*/ if (IRQ) TriggerIRQ(); - NDS::ARM9Timestamp -= Cycles; + NDS::ARM9Timestamp += Cycles; Cycles = 0; } @@ -643,9 +643,6 @@ void ARMv5::ExecuteJIT() { u32 instrAddr = R[15] - ((CPSR&0x20)?2:4); - // hack so Cycles <= 0 becomes Cycles < 0 - Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1; - if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize)) && !ARMJIT::SetupExecutableRegion(0, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize)) { @@ -661,13 +658,8 @@ void ARMv5::ExecuteJIT() else ARMJIT::CompileBlock(this); - NDS::ARM9Timestamp = NDS::ARM9Target - Cycles - 1; - if (StopExecution) { - if (IRQ) - TriggerIRQ(); - if (Halted || IdleLoop) { bool idleLoop = IdleLoop; @@ -678,7 +670,13 @@ void ARMv5::ExecuteJIT() } break; } + + if (IRQ) + TriggerIRQ(); } + + NDS::ARM9Timestamp += Cycles; + Cycles = 0; } if (Halted == 2) @@ -755,7 +753,7 @@ void ARMv4::Execute() }*/ if (IRQ) TriggerIRQ(); - NDS::ARM7Timestamp -= Cycles; + NDS::ARM7Timestamp += Cycles; Cycles = 0; } @@ -795,8 +793,6 @@ void ARMv4::ExecuteJIT() { u32 instrAddr = R[15] - ((CPSR&0x20)?2:4); - Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1; - if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize)) && !ARMJIT::SetupExecutableRegion(1, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize)) { @@ -812,14 +808,9 @@ void ARMv4::ExecuteJIT() else ARMJIT::CompileBlock(this); - NDS::ARM7Timestamp = NDS::ARM7Target - Cycles - 1; - // TODO optimize this shit!!! if (StopExecution) { - if (IRQ) - TriggerIRQ(); - if (Halted || IdleLoop) { bool idleLoop = IdleLoop; @@ -830,7 +821,13 @@ void ARMv4::ExecuteJIT() } break; } + + if (IRQ) + TriggerIRQ(); } + + NDS::ARM7Timestamp += Cycles; + Cycles = 0; } if (Halted == 2) diff --git a/src/ARM.h b/src/ARM.h index ee6ac96..deacbee 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -202,14 +202,14 @@ public: { // code only. always nonseq 32-bit for ARM9. s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - Cycles -= numC; + Cycles += numC; } void AddCycles_CI(s32 numI) { // code+internal s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - Cycles -= numC + numI; + Cycles += numC + numI; } void AddCycles_CDI() @@ -220,9 +220,9 @@ public: s32 numD = DataCycles; //if (DataRegion != CodeRegion) - Cycles -= std::max(numC + numD - 6, std::max(numC, numD)); + Cycles += std::max(numC + numD - 6, std::max(numC, numD)); //else - // Cycles -= numC + numD; + // Cycles += numC + numD; } void AddCycles_CD() @@ -232,9 +232,9 @@ public: s32 numD = DataCycles; //if (DataRegion != CodeRegion) - Cycles -= std::max(numC + numD - 6, std::max(numC, numD)); + Cycles += std::max(numC + numD - 6, std::max(numC, numD)); //else - // Cycles -= numC + numD; + // Cycles += numC + numD; } void GetCodeMemRegion(u32 addr, NDS::MemRegion* region); @@ -396,13 +396,13 @@ public: void AddCycles_C() { // code only. this code fetch is sequential. - Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3]; + Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3]; } void AddCycles_CI(s32 num) { // code+internal. results in a nonseq code fetch. - Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num; + Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num; } void AddCycles_CDI() @@ -414,21 +414,21 @@ public: if ((DataRegion >> 24) == 0x02) // mainRAM { if (CodeRegion == 0x02) - Cycles -= numC + numD; + Cycles += numC + numD; else { numC++; - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } } else if (CodeRegion == 0x02) { numD++; - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else { - Cycles -= numC + numD + 1; + Cycles += numC + numD + 1; } } @@ -441,17 +441,17 @@ public: if ((DataRegion >> 24) == 0x02) { if (CodeRegion == 0x02) - Cycles -= numC + numD; + Cycles += numC + numD; else - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else if (CodeRegion == 0x02) { - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else { - Cycles -= numC + numD; + Cycles += numC + numD; } } }; diff --git a/src/ARMJIT_A64/ARMJIT_Branch.cpp b/src/ARMJIT_A64/ARMJIT_Branch.cpp index f130938..117eaa0 100644 --- a/src/ARMJIT_A64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_A64/ARMJIT_Branch.cpp @@ -143,7 +143,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } @@ -181,7 +181,7 @@ void* Compiler::Gen_JumpTo9(int kind) STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15])); ADD(W1, W1, W1); - SUB(RCycles, RCycles, W1); + ADD(RCycles, RCycles, W1); RET(); } @@ -201,7 +201,7 @@ void* Compiler::Gen_JumpTo9(int kind) ADD(W2, W1, W1); TSTI2R(W0, 0x2); CSEL(W1, W1, W2, CC_EQ); - SUB(RCycles, RCycles, W1); + ADD(RCycles, RCycles, W1); RET(); } @@ -229,7 +229,7 @@ void* Compiler::Gen_JumpTo7(int kind) UBFX(W2, W3, 0, 8); UBFX(W3, W3, 8, 8); ADD(W2, W3, W2); - SUB(RCycles, RCycles, W2); + ADD(RCycles, RCycles, W2); ANDI2R(W0, W0, ~3); @@ -253,7 +253,7 @@ void* Compiler::Gen_JumpTo7(int kind) UBFX(W2, W3, 16, 8); UBFX(W3, W3, 24, 8); ADD(W2, W3, W2); - SUB(RCycles, RCycles, W2); + ADD(RCycles, RCycles, W2); ANDI2R(W0, W0, ~1); diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp index 413c673..b046123 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp @@ -629,7 +629,7 @@ void Compiler::Comp_BranchSpecialBehaviour(bool taken) { RegCache.PrepareExit(); - SUB(RCycles, RCycles, ConstantCycles); + ADD(RCycles, RCycles, ConstantCycles); QuickTailCall(X0, ARM_Ret); } } @@ -770,7 +770,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] RegCache.Flush(); - SUB(RCycles, RCycles, ConstantCycles); + ADD(RCycles, RCycles, ConstantCycles); QuickTailCall(X0, ARM_Ret); FlushIcache(); @@ -800,7 +800,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant) if (forceNonConstant) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CI(u32 numI) @@ -814,7 +814,7 @@ void Compiler::Comp_AddCycles_CI(u32 numI) if (Thumb || CurInstr.Cond() == 0xE) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift) @@ -825,11 +825,11 @@ void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift) NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c; - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); if (Thumb || CurInstr.Cond() >= 0xE) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CDI() @@ -866,7 +866,7 @@ void Compiler::Comp_AddCycles_CDI() } if (!Thumb && CurInstr.Cond() < 0xE) - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); else ConstantCycles += cycles; } @@ -910,7 +910,7 @@ void Compiler::Comp_AddCycles_CD() } if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles) - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); else ConstantCycles += cycles; } diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index bda9e52..819fe3c 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -127,7 +127,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) ConstantCycles += cycles; else - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); } void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 7f32f31..1fdbaf8 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -627,7 +627,7 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken) { RegCache.PrepareExit(); - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); JMP((u8*)&ARM_Ret, true); } } @@ -760,7 +760,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] RegCache.Flush(); - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); JMP((u8*)ARM_Ret, true); /*FILE* codeout = fopen("codeout", "a"); @@ -779,7 +779,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant) : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -791,7 +791,7 @@ void Compiler::Comp_AddCycles_CI(u32 i) : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i; if (!Thumb && CurInstr.Cond() < 0xE) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -805,12 +805,12 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add) if (!Thumb && CurInstr.Cond() < 0xE) { LEA(32, RSCRATCH, MDisp(i, add + cycles)); - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); } else { ConstantCycles += cycles; - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); } } @@ -848,7 +848,7 @@ void Compiler::Comp_AddCycles_CDI() } if (!Thumb && CurInstr.Cond() < 0xE) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -892,7 +892,7 @@ void Compiler::Comp_AddCycles_CD() } if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } -- cgit v1.2.3 From f56aa60eb693ed41212960db60e01ceecf5d64c5 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Tue, 28 Jul 2020 00:44:58 +0200 Subject: check IRQ first then Idle loop apparently I put it this way for a reason --- src/ARM.cpp | 26 +++++++++++++------------- src/ARMJIT.cpp | 10 ++++++---- 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'src/ARM.cpp') diff --git a/src/ARM.cpp b/src/ARM.cpp index c1743ea..7eeacb7 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -660,19 +660,20 @@ void ARMv5::ExecuteJIT() if (StopExecution) { + // this order is crucial otherwise idle loops waiting for an IRQ won't function + if (IRQ) + TriggerIRQ(); + if (Halted || IdleLoop) { - bool idleLoop = IdleLoop; - IdleLoop = 0; - if ((Halted == 1 || idleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target) + if ((Halted == 1 || IdleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target) { + Cycles = 0; NDS::ARM9Timestamp = NDS::ARM9Target; } + IdleLoop = 0; break; } - - if (IRQ) - TriggerIRQ(); } NDS::ARM9Timestamp += Cycles; @@ -808,22 +809,21 @@ void ARMv4::ExecuteJIT() else ARMJIT::CompileBlock(this); - // TODO optimize this shit!!! if (StopExecution) { + if (IRQ) + TriggerIRQ(); + if (Halted || IdleLoop) { - bool idleLoop = IdleLoop; - IdleLoop = 0; - if ((Halted == 1 || idleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target) + if ((Halted == 1 || IdleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target) { + Cycles = 0; NDS::ARM7Timestamp = NDS::ARM7Target; } + IdleLoop = 0; break; } - - if (IRQ) - TriggerIRQ(); } NDS::ARM7Timestamp += Cycles; diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 72a3179..5b827e2 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -504,7 +504,7 @@ bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink return false; } -bool IsIdleLoop(FetchedInstr* instrs, int instrsCount) +bool IsIdleLoop(bool thumb, FetchedInstr* instrs, int instrsCount) { // see https://github.com/dolphin-emu/dolphin/blob/master/Source/Core/Core/PowerPC/PPCAnalyst.cpp#L678 // it basically checks if one iteration of a loop depends on another @@ -515,9 +515,11 @@ bool IsIdleLoop(FetchedInstr* instrs, int instrsCount) u16 regsDisallowedToWrite = 0; for (int i = 0; i < instrsCount; i++) { - JIT_DEBUGPRINT("instr %d %x regs(%x %x) %x %x\n", i, instrs[i].Instr, instrs[i].Info.DstRegs, instrs[i].Info.SrcRegs, regsWrittenTo, regsDisallowedToWrite); + JIT_DEBUGPRINT("instr %d %08x regs(%x %x) %x %x\n", i, instrs[i].Instr, instrs[i].Info.DstRegs, instrs[i].Info.SrcRegs, regsWrittenTo, regsDisallowedToWrite); if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem) return false; + if (!thumb && instrs[i].Info.Kind >= ARMInstrInfo::ak_MSR_IMM && instrs[i].Info.Kind <= ARMInstrInfo::ak_MRC) + return false; if (i < instrsCount - 1 && instrs[i].Info.Branches()) return false; @@ -852,10 +854,10 @@ void CompileBlock(ARM* cpu) { // we might have an idle loop u32 backwardsOffset = (instrs[i].Addr - target) / (thumb ? 2 : 4); - if (IsIdleLoop(&instrs[i - backwardsOffset], backwardsOffset + 1)) + if (IsIdleLoop(thumb, &instrs[i - backwardsOffset], backwardsOffset + 1)) { instrs[i].BranchFlags |= branch_IdleBranch; - JIT_DEBUGPRINT("found %s idle loop %d in block %x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr); + JIT_DEBUGPRINT("found %s idle loop %d in block %08x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr); } } else if (hasBranched && !isBackJump && i + 1 < Config::JIT_MaxBlockSize) -- cgit v1.2.3