diff options
Diffstat (limited to 'src/ARMJIT.cpp')
-rw-r--r-- | src/ARMJIT.cpp | 755 |
1 files changed, 644 insertions, 111 deletions
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 85cadf3..686bdd6 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -1,122 +1,137 @@ #include "ARMJIT.h" #include <string.h> +#include <assert.h> #include "Config.h" +#include "ARMJIT_Internal.h" #include "ARMJIT_x64/ARMJIT_Compiler.h" +#include "ARMInterpreter_ALU.h" +#include "ARMInterpreter_LoadStore.h" +#include "ARMInterpreter_Branch.h" +#include "ARMInterpreter.h" + +#include "GPU3D.h" +#include "SPU.h" +#include "Wifi.h" + namespace ARMJIT { +#define JIT_DEBUGPRINT(msg, ...) + Compiler* compiler; -BlockCache cache; -#define DUP2(x) x, x +const u32 ExeMemRegionSizes[] = { + 0x8000, // Unmapped Region (dummy) + 0x8000, // ITCM + 4*1024*1024, // Main RAM + 0x8000, // SWRAM + 0xA4000, // LCDC + 0x8000, // ARM9 BIOS + 0x4000, // ARM7 BIOS + 0x10000, // ARM7 WRAM + 0x40000 // ARM7 WVRAM +}; -static ptrdiff_t JIT_MEM[2][32] = { - //arm9 - { - /* 0X*/ DUP2(offsetof(BlockCache, ARM9_ITCM)), - /* 1X*/ DUP2(offsetof(BlockCache, ARM9_ITCM)), // mirror - /* 2X*/ DUP2(offsetof(BlockCache, MainRAM)), - /* 3X*/ DUP2(offsetof(BlockCache, SWRAM)), - /* 4X*/ DUP2(-1), - /* 5X*/ DUP2(-1), - /* 6X*/ -1, - offsetof(BlockCache, ARM9_LCDC), // Plain ARM9-CPU Access (LCDC mode) (max 656KB) - /* 7X*/ DUP2(-1), - /* 8X*/ DUP2(-1), - /* 9X*/ DUP2(-1), - /* AX*/ DUP2(-1), - /* BX*/ DUP2(-1), - /* CX*/ DUP2(-1), - /* DX*/ DUP2(-1), - /* EX*/ DUP2(-1), - /* FX*/ DUP2(offsetof(BlockCache, ARM9_BIOS)) - }, - //arm7 - { - /* 0X*/ DUP2(offsetof(BlockCache, ARM7_BIOS)), - /* 1X*/ DUP2(-1), - /* 2X*/ DUP2(offsetof(BlockCache, MainRAM)), - /* 3X*/ offsetof(BlockCache, SWRAM), - offsetof(BlockCache, ARM7_WRAM), - /* 4X*/ DUP2(-1), - /* 5X*/ DUP2(-1), - /* 6X*/ DUP2(offsetof(BlockCache, ARM7_WVRAM)), /* contrary to Gbatek, melonDS and itself, - DeSmuME doesn't mirror the 64 MB region at 0x6800000 */ - /* 7X*/ DUP2(-1), - /* 8X*/ DUP2(-1), - /* 9X*/ DUP2(-1), - /* AX*/ DUP2(-1), - /* BX*/ DUP2(-1), - /* CX*/ DUP2(-1), - /* DX*/ DUP2(-1), - /* EX*/ DUP2(-1), - /* FX*/ DUP2(-1) - } +const u32 ExeMemRegionOffsets[] = { + 0, + 0x8000, + 0x10000, + 0x410000, + 0x418000, + 0x4BC000, + 0x4C4000, + 0x4C8000, + 0x4D8000, + 0x518000, }; -static u32 JIT_MASK[2][32] = { +#define DUP2(x) x, x + +const static ExeMemKind JIT_MEM[2][32] = { //arm9 { - /* 0X*/ DUP2(0x00007FFF), - /* 1X*/ DUP2(0x00007FFF), - /* 2X*/ DUP2(0x003FFFFF), - /* 3X*/ DUP2(0x00007FFF), - /* 4X*/ DUP2(0x00000000), - /* 5X*/ DUP2(0x00000000), - /* 6X*/ 0x00000000, - 0x000FFFFF, - /* 7X*/ DUP2(0x00000000), - /* 8X*/ DUP2(0x00000000), - /* 9X*/ DUP2(0x00000000), - /* AX*/ DUP2(0x00000000), - /* BX*/ DUP2(0x00000000), - /* CX*/ DUP2(0x00000000), - /* DX*/ DUP2(0x00000000), - /* EX*/ DUP2(0x00000000), - /* FX*/ DUP2(0x00007FFF) + /* 0X*/ DUP2(exeMem_ITCM), + /* 1X*/ DUP2(exeMem_ITCM), // mirror + /* 2X*/ DUP2(exeMem_MainRAM), + /* 3X*/ DUP2(exeMem_SWRAM), + /* 4X*/ DUP2(exeMem_Unmapped), + /* 5X*/ DUP2(exeMem_Unmapped), + /* 6X*/ exeMem_Unmapped, + exeMem_LCDC, // Plain ARM9-CPU Access (LCDC mode) (max 656KB) + /* 7X*/ DUP2(exeMem_Unmapped), + /* 8X*/ DUP2(exeMem_Unmapped), + /* 9X*/ DUP2(exeMem_Unmapped), + /* AX*/ DUP2(exeMem_Unmapped), + /* BX*/ DUP2(exeMem_Unmapped), + /* CX*/ DUP2(exeMem_Unmapped), + /* DX*/ DUP2(exeMem_Unmapped), + /* EX*/ DUP2(exeMem_Unmapped), + /* FX*/ DUP2(exeMem_ARM9_BIOS) }, //arm7 { - /* 0X*/ DUP2(0x00003FFF), - /* 1X*/ DUP2(0x00000000), - /* 2X*/ DUP2(0x003FFFFF), - /* 3X*/ 0x00007FFF, - 0x0000FFFF, - /* 4X*/ 0x00000000, - 0x0000FFFF, - /* 5X*/ DUP2(0x00000000), - /* 6X*/ DUP2(0x0003FFFF), - /* 7X*/ DUP2(0x00000000), - /* 8X*/ DUP2(0x00000000), - /* 9X*/ DUP2(0x00000000), - /* AX*/ DUP2(0x00000000), - /* BX*/ DUP2(0x00000000), - /* CX*/ DUP2(0x00000000), - /* DX*/ DUP2(0x00000000), - /* EX*/ DUP2(0x00000000), - /* FX*/ DUP2(0x00000000) + /* 0X*/ DUP2(exeMem_ARM7_BIOS), + /* 1X*/ DUP2(exeMem_Unmapped), + /* 2X*/ DUP2(exeMem_MainRAM), + /* 3X*/ exeMem_SWRAM, + exeMem_ARM7_WRAM, + /* 4X*/ DUP2(exeMem_Unmapped), + /* 5X*/ DUP2(exeMem_Unmapped), + /* 6X*/ DUP2(exeMem_ARM7_WVRAM), /* contrary to Gbatek, melonDS and itself, + DeSmuME doesn't mirror the 64 MB region at 0x6800000 */ + /* 7X*/ DUP2(exeMem_Unmapped), + /* 8X*/ DUP2(exeMem_Unmapped), + /* 9X*/ DUP2(exeMem_Unmapped), + /* AX*/ DUP2(exeMem_Unmapped), + /* BX*/ DUP2(exeMem_Unmapped), + /* CX*/ DUP2(exeMem_Unmapped), + /* DX*/ DUP2(exeMem_Unmapped), + /* EX*/ DUP2(exeMem_Unmapped), + /* FX*/ DUP2(exeMem_Unmapped) } }; #undef DUP2 +/* + translates address to pseudo physical address + - more compact, eliminates mirroring, everything comes in a row + - we only need one translation table +*/ +u32 AddrTranslate9[0x2000]; +u32 AddrTranslate7[0x4000]; -void Init() +JitBlockEntry FastBlockAccess[ExeMemSpaceSize / 2]; +AddressRange CodeRanges[ExeMemSpaceSize / 256]; + +TinyVector<JitBlock*> JitBlocks; +JitBlock* RestoreCandidates[0x1000] = {NULL}; + +u32 HashRestoreCandidate(u32 pseudoPhysicalAddr) { - memset(&cache, 0, sizeof(BlockCache)); + return (u32)(((u64)pseudoPhysicalAddr * 11400714819323198485llu) >> 53); +} +void Init() +{ for (int i = 0; i < 0x2000; i++) - cache.AddrMapping9[i] = JIT_MEM[0][i >> 8] == -1 ? NULL : - (CompiledBlock*)((u8*)&cache + JIT_MEM[0][i >> 8]) - + (((i << 15) & JIT_MASK[0][i >> 8]) >> 1); + { + ExeMemKind kind = JIT_MEM[0][i >> 8]; + u32 size = ExeMemRegionSizes[kind]; + + AddrTranslate9[i] = ExeMemRegionOffsets[kind] + ((i << 15) & (size - 1)); + } for (int i = 0; i < 0x4000; i++) - cache.AddrMapping7[i] = JIT_MEM[1][i >> 9] == -1 ? NULL : - (CompiledBlock*)((u8*)&cache + JIT_MEM[1][i >> 9]) - + (((i << 14) & JIT_MASK[1][i >> 9]) >> 1); + { + ExeMemKind kind = JIT_MEM[1][i >> 9]; + u32 size = ExeMemRegionSizes[kind]; + + AddrTranslate7[i] = ExeMemRegionOffsets[kind] + ((i << 14) & (size - 1)); + } compiler = new Compiler(); } @@ -126,7 +141,7 @@ void DeInit() delete compiler; } -void floodFillSetFlags(FetchedInstr instrs[], int start, u8 flags) +void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags) { for (int j = start; j >= 0; j--) { @@ -144,7 +159,154 @@ void floodFillSetFlags(FetchedInstr instrs[], int start, u8 flags) } } -CompiledBlock CompileBlock(ARM* cpu) +bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, u32& targetAddr) +{ + if (thumb) + { + u32 r15 = instr.Addr + 4; + cond = 0xE; + + if (instr.Info.Kind == ARMInstrInfo::tk_BL_LONG && !(instr.Instr & (1 << 12))) + { + targetAddr = r15 + ((s32)((instr.Instr & 0x7FF) << 21) >> 9); + targetAddr += ((instr.Instr >> 16) & 0x7FF) << 1; + return true; + } + else if (instr.Info.Kind == ARMInstrInfo::tk_B) + { + s32 offset = (s32)((instr.Instr & 0x7FF) << 21) >> 20; + targetAddr = r15 + offset; + return true; + } + else if (instr.Info.Kind == ARMInstrInfo::tk_BCOND) + { + cond = (instr.Instr >> 8) & 0xF; + s32 offset = (s32)(instr.Instr << 24) >> 23; + targetAddr = r15 + offset; + return true; + } + } + else + { + cond = instr.Cond(); + if (instr.Info.Kind == ARMInstrInfo::ak_BL + || instr.Info.Kind == ARMInstrInfo::ak_B) + { + s32 offset = (s32)(instr.Instr << 8) >> 6; + u32 r15 = instr.Addr + 8; + targetAddr = r15 + offset; + return true; + } + } + return false; +} + +bool IsIdleLoop(FetchedInstr* instrs, int instrsCount) +{ + // see https://github.com/dolphin-emu/dolphin/blob/master/Source/Core/Core/PowerPC/PPCAnalyst.cpp#L678 + // it basically checks if one iteration of a loop depends on another + // the rules are quite simple + + u16 regsWrittenTo = 0; + u16 regsDisallowedToWrite = 0; + for (int i = 0; i < instrsCount; i++) + { + //printf("instr %d %x regs(%x %x) %x %x\n", i, instrs[i].Instr, instrs[i].Info.DstRegs, instrs[i].Info.SrcRegs, regsWrittenTo, regsDisallowedToWrite); + if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem) + return false; + if (i < instrsCount - 1 && instrs[i].Info.Branches()) + return false; + + u16 srcRegs = instrs[i].Info.SrcRegs & ~(1 << 15); + u16 dstRegs = instrs[i].Info.DstRegs & ~(1 << 15); + + regsDisallowedToWrite |= srcRegs & ~regsWrittenTo; + + if (dstRegs & regsDisallowedToWrite) + return false; + regsWrittenTo |= dstRegs; + } + return true; +} + +typedef void (*InterpreterFunc)(ARM* cpu); + +#define F(x) &ARMInterpreter::A_##x +#define F_ALU(name, s) \ + F(name##_REG_LSL_IMM##s), F(name##_REG_LSR_IMM##s), F(name##_REG_ASR_IMM##s), F(name##_REG_ROR_IMM##s), \ + F(name##_REG_LSL_REG##s), F(name##_REG_LSR_REG##s), F(name##_REG_ASR_REG##s), F(name##_REG_ROR_REG##s), F(name##_IMM##s) +#define F_MEM_WB(name) \ + F(name##_REG_LSL), F(name##_REG_LSR), F(name##_REG_ASR), F(name##_REG_ROR), F(name##_IMM), \ + F(name##_POST_REG_LSL), F(name##_POST_REG_LSR), F(name##_POST_REG_ASR), F(name##_POST_REG_ROR), F(name##_POST_IMM) +#define F_MEM_HD(name) \ + F(name##_REG), F(name##_IMM), F(name##_POST_REG), F(name##_POST_IMM) +InterpreterFunc InterpretARM[ARMInstrInfo::ak_Count] = +{ + F_ALU(AND,), F_ALU(AND,_S), + F_ALU(EOR,), F_ALU(EOR,_S), + F_ALU(SUB,), F_ALU(SUB,_S), + F_ALU(RSB,), F_ALU(RSB,_S), + F_ALU(ADD,), F_ALU(ADD,_S), + F_ALU(ADC,), F_ALU(ADC,_S), + F_ALU(SBC,), F_ALU(SBC,_S), + F_ALU(RSC,), F_ALU(RSC,_S), + F_ALU(ORR,), F_ALU(ORR,_S), + F_ALU(MOV,), F_ALU(MOV,_S), + F_ALU(BIC,), F_ALU(BIC,_S), + F_ALU(MVN,), F_ALU(MVN,_S), + F_ALU(TST,), + F_ALU(TEQ,), + F_ALU(CMP,), + F_ALU(CMN,), + + F(MUL), F(MLA), F(UMULL), F(UMLAL), F(SMULL), F(SMLAL), F(SMLAxy), F(SMLAWy), F(SMULWy), F(SMLALxy), F(SMULxy), + F(CLZ), F(QADD), F(QDADD), F(QSUB), F(QDSUB), + + F_MEM_WB(STR), + F_MEM_WB(STRB), + F_MEM_WB(LDR), + F_MEM_WB(LDRB), + + F_MEM_HD(STRH), + F_MEM_HD(LDRD), + F_MEM_HD(STRD), + F_MEM_HD(LDRH), + F_MEM_HD(LDRSB), + F_MEM_HD(LDRSH), + + F(SWP), F(SWPB), + F(LDM), F(STM), + + F(B), F(BL), F(BLX_IMM), F(BX), F(BLX_REG), + F(UNK), F(MSR_IMM), F(MSR_REG), F(MRS), F(MCR), F(MRC), F(SVC) +}; +#undef F_ALU +#undef F_MEM_WB +#undef F_MEM_HD +#undef F + +#define F(x) ARMInterpreter::T_##x +InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] = +{ + F(LSL_IMM), F(LSR_IMM), F(ASR_IMM), + F(ADD_REG_), F(SUB_REG_), F(ADD_IMM_), F(SUB_IMM_), + F(MOV_IMM), F(CMP_IMM), F(ADD_IMM), F(SUB_IMM), + F(AND_REG), F(EOR_REG), F(LSL_REG), F(LSR_REG), F(ASR_REG), + F(ADC_REG), F(SBC_REG), F(ROR_REG), F(TST_REG), F(NEG_REG), + F(CMP_REG), F(CMN_REG), F(ORR_REG), F(MUL_REG), F(BIC_REG), F(MVN_REG), + F(ADD_HIREG), F(CMP_HIREG), F(MOV_HIREG), + F(ADD_PCREL), F(ADD_SPREL), F(ADD_SP), + F(LDR_PCREL), F(STR_REG), F(STRB_REG), F(LDR_REG), F(LDRB_REG), F(STRH_REG), + F(LDRSB_REG), F(LDRH_REG), F(LDRSH_REG), F(STR_IMM), F(LDR_IMM), F(STRB_IMM), + F(LDRB_IMM), F(STRH_IMM), F(LDRH_IMM), F(STR_SPREL), F(LDR_SPREL), + F(PUSH), F(POP), F(LDMIA), F(STMIA), + F(BCOND), F(BX), F(BLX_REG), F(B), F(BL_LONG_1), F(BL_LONG_2), + F(UNK), F(SVC), + NULL // BL_LONG psudo opcode +}; +#undef F + +void CompileBlock(ARM* cpu) { bool thumb = cpu->CPSR & 0x20; @@ -153,17 +315,41 @@ CompiledBlock CompileBlock(ARM* cpu) if (Config::JIT_MaxBlockSize > 32) Config::JIT_MaxBlockSize = 32; + u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4); + if (!(cpu->Num == 0 + ? IsMapped<0>(blockAddr) + : IsMapped<1>(blockAddr))) + { + printf("Trying to compile a block in unmapped memory: %x\n", blockAddr); + } + + u32 pseudoPhysicalAddr = cpu->Num == 0 + ? TranslateAddr<0>(blockAddr) + : TranslateAddr<1>(blockAddr); + FetchedInstr instrs[Config::JIT_MaxBlockSize]; int i = 0; - u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4); u32 r15 = cpu->R[15]; + + u32 addresseRanges[32] = {}; + u32 numAddressRanges = 0; + cpu->FillPipeline(); u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]}; u32 nextInstrAddr[2] = {blockAddr, r15}; + + JIT_DEBUGPRINT("start block %x (%x) %p %p (region invalidates %dx)\n", + blockAddr, pseudoPhysicalAddr, FastBlockAccess[pseudoPhysicalAddr / 2], + cpu->Num == 0 ? LookUpBlock<0>(blockAddr) : LookUpBlock<1>(blockAddr), + CodeRanges[pseudoPhysicalAddr / 256].TimesInvalidated); + + u32 lastSegmentStart = blockAddr; + do { r15 += thumb ? 2 : 4; + instrs[i].BranchFlags = 0; instrs[i].SetFlags = 0; instrs[i].Instr = nextInstr[0]; instrs[i].NextInstr[0] = nextInstr[0] = nextInstr[1]; @@ -171,6 +357,25 @@ CompiledBlock CompileBlock(ARM* cpu) instrs[i].Addr = nextInstrAddr[0]; nextInstrAddr[0] = nextInstrAddr[1]; nextInstrAddr[1] = r15; + JIT_DEBUGPRINT("instr %08x %x\n", instrs[i].Instr & (thumb ? 0xFFFF : ~0), instrs[i].Addr); + + u32 translatedAddr = (cpu->Num == 0 + ? TranslateAddr<0>(instrs[i].Addr) + : TranslateAddr<1>(instrs[i].Addr)) & ~0xFF; + if (i == 0 || translatedAddr != addresseRanges[numAddressRanges - 1]) + { + bool returning = false; + for (int j = 0; j < numAddressRanges; j++) + { + if (addresseRanges[j] == translatedAddr) + { + returning = true; + break; + } + } + if (!returning) + addresseRanges[numAddressRanges++] = translatedAddr; + } if (cpu->Num == 0) { @@ -198,6 +403,34 @@ CompiledBlock CompileBlock(ARM* cpu) instrs[i].NextInstr[1] = nextInstr[1]; instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr); + cpu->R[15] = r15; + cpu->CurInstr = instrs[i].Instr; + cpu->CodeCycles = instrs[i].CodeCycles; + + if (thumb) + { + InterpretTHUMB[instrs[i].Info.Kind](cpu); + } + else + { + if (cpu->Num == 0 && instrs[i].Info.Kind == ARMInstrInfo::ak_BLX_IMM) + { + ARMInterpreter::A_BLX_IMM(cpu); + } + else + { + u32 icode = ((instrs[i].Instr >> 4) & 0xF) | ((instrs[i].Instr >> 16) & 0xFF0); + assert(InterpretARM[instrs[i].Info.Kind] == ARMInterpreter::ARMInstrTable[icode] || instrs[i].Info.Kind == ARMInstrInfo::ak_MOV_REG_LSL_IMM); + if (cpu->CheckCondition(instrs[i].Cond())) + InterpretARM[instrs[i].Info.Kind](cpu); + else + cpu->AddCycles_C(); + } + } + + instrs[i].DataCycles = cpu->DataCycles; + instrs[i].DataRegion = cpu->DataRegion; + if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0 && instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1) { @@ -208,40 +441,340 @@ CompiledBlock CompileBlock(ARM* cpu) instrs[i - 1].Info.EndBlock = true; i--; } - i++; + if (instrs[i].Info.Branches() && Config::JIT_BrancheOptimisations) + { + bool hasBranched = cpu->R[15] != r15; + + u32 cond, target; + bool staticBranch = DecodeBranch(thumb, instrs[i], cond, target); + JIT_DEBUGPRINT("branch cond %x target %x (%d)\n", cond, target, hasBranched); + + if (staticBranch) + { + bool isBackJump = false; + if (hasBranched) + { + for (int j = 0; j < i; j++) + { + if (instrs[i].Addr == target) + { + isBackJump = true; + break; + } + } + } + + if (cond < 0xE && target < instrs[i].Addr && target >= lastSegmentStart) + { + // we might have an idle loop + u32 offset = (target - blockAddr) / (thumb ? 2 : 4); + if (IsIdleLoop(instrs + offset, i - offset + 1)) + { + instrs[i].BranchFlags |= branch_IdleBranch; + JIT_DEBUGPRINT("found %s idle loop %d in block %x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr); + } + } + else if (hasBranched && (!thumb || cond == 0xE) && !isBackJump && i + 1 < Config::JIT_MaxBlockSize) + { + u32 targetPseudoPhysical = cpu->Num == 0 + ? TranslateAddr<0>(target) + : TranslateAddr<1>(target); + + r15 = target + (thumb ? 2 : 4); + assert(r15 == cpu->R[15]); + + JIT_DEBUGPRINT("block lengthened by static branch (target %x)\n", target); + + nextInstr[0] = cpu->NextInstr[0]; + nextInstr[1] = cpu->NextInstr[1]; + + nextInstrAddr[0] = target; + nextInstrAddr[1] = r15; + + lastSegmentStart = target; + + instrs[i].Info.EndBlock = false; + + if (cond < 0xE) + instrs[i].BranchFlags |= branch_FollowCondTaken; + } + } + + if (!hasBranched && cond < 0xE && i + 1 < Config::JIT_MaxBlockSize) + { + instrs[i].Info.EndBlock = false; + instrs[i].BranchFlags |= branch_FollowCondNotTaken; + } + } + + i++; bool canCompile = compiler->CanCompile(thumb, instrs[i - 1].Info.Kind); - if (instrs[i - 1].Info.ReadFlags != 0 || !canCompile) - floodFillSetFlags(instrs, i - 2, canCompile ? instrs[i - 1].Info.ReadFlags : 0xF); - } while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize); + bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken)); + if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond) + FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF); + } while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted); - floodFillSetFlags(instrs, i - 1, 0xF); + u32 restoreSlot = HashRestoreCandidate(pseudoPhysicalAddr); + JitBlock* prevBlock = RestoreCandidates[restoreSlot]; + bool mayRestore = true; + if (prevBlock && prevBlock->PseudoPhysicalAddr == pseudoPhysicalAddr) + { + RestoreCandidates[restoreSlot] = NULL; + if (prevBlock->NumInstrs == i) + { + for (int j = 0; j < i; j++) + { + if (prevBlock->Instrs()[j] != instrs[j].Instr) + { + mayRestore = false; + break; + } + } + } + else + mayRestore = false; - CompiledBlock block = compiler->CompileBlock(cpu, instrs, i); + if (prevBlock->NumAddresses == numAddressRanges) + { + for (int j = 0; j < numAddressRanges; j++) + { + if (prevBlock->AddressRanges()[j] != addresseRanges[j]) + { + mayRestore = false; + break; + } + } + } + else + mayRestore = false; + } + else + { + mayRestore = false; + prevBlock = NULL; + } - if (cpu->Num == 0) - InsertBlock<0>(blockAddr, block); + JitBlock* block; + if (!mayRestore) + { + if (prevBlock) + delete prevBlock; + + block = new JitBlock(i, numAddressRanges); + for (int j = 0; j < i; j++) + block->Instrs()[j] = instrs[j].Instr; + for (int j = 0; j < numAddressRanges; j++) + block->AddressRanges()[j] = addresseRanges[j]; + + block->StartAddr = blockAddr; + block->PseudoPhysicalAddr = pseudoPhysicalAddr; + + FloodFillSetFlags(instrs, i - 1, 0xF); + + block->EntryPoint = compiler->CompileBlock(cpu, thumb, instrs, i); + } else - InsertBlock<1>(blockAddr, block); + { + JIT_DEBUGPRINT("restored! %p\n", prevBlock); + block = prevBlock; + } + + for (int j = 0; j < numAddressRanges; j++) + { + assert(addresseRanges[j] == block->AddressRanges()[j]); + CodeRanges[addresseRanges[j] / 256].Blocks.Add(block); + } + + FastBlockAccess[block->PseudoPhysicalAddr / 2] = block->EntryPoint; - return block; + JitBlocks.Add(block); } -void InvalidateBlockCache() +void InvalidateByAddr(u32 pseudoPhysical) { - printf("Resetting JIT block cache...\n"); + JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical); + AddressRange* range = &CodeRanges[pseudoPhysical / 256]; + int startLength = range->Blocks.Length; + for (int i = 0; i < range->Blocks.Length; i++) + { + assert(range->Blocks.Length == startLength); + JitBlock* block = range->Blocks[i]; + for (int j = 0; j < block->NumAddresses; j++) + { + u32 addr = block->AddressRanges()[j]; + if ((addr / 256) != (pseudoPhysical / 256)) + { + AddressRange* otherRange = &CodeRanges[addr / 256]; + assert(otherRange != range); + assert(otherRange->Blocks.RemoveByValue(block)); + } + } + + assert(JitBlocks.RemoveByValue(block)); + + FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL; - memset(cache.MainRAM, 0, sizeof(cache.MainRAM)); - memset(cache.SWRAM, 0, sizeof(cache.SWRAM)); - memset(cache.ARM9_BIOS, 0, sizeof(cache.ARM9_BIOS)); - memset(cache.ARM9_ITCM, 0, sizeof(cache.ARM9_ITCM)); - memset(cache.ARM9_LCDC, 0, sizeof(cache.ARM9_LCDC)); - memset(cache.ARM7_BIOS, 0, sizeof(cache.ARM7_BIOS)); - memset(cache.ARM7_WRAM, 0, sizeof(cache.ARM7_WRAM)); - memset(cache.ARM7_WVRAM, 0, sizeof(cache.ARM7_WVRAM)); + u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr); + if (RestoreCandidates[slot] && RestoreCandidates[slot] != block) + delete RestoreCandidates[slot]; + + RestoreCandidates[slot] = block; + } + if ((range->TimesInvalidated + 1) > range->TimesInvalidated) + range->TimesInvalidated++; + + range->Blocks.Clear(); +} + +void InvalidateByAddr7(u32 addr) +{ + u32 pseudoPhysical = TranslateAddr<1>(addr); + if (__builtin_expect(CodeRanges[pseudoPhysical / 256].Blocks.Length > 0, false)) + InvalidateByAddr(pseudoPhysical); +} + +void InvalidateITCM(u32 addr) +{ + u32 pseudoPhysical = addr + ExeMemRegionOffsets[exeMem_ITCM]; + if (CodeRanges[pseudoPhysical / 256].Blocks.Length > 0) + InvalidateByAddr(pseudoPhysical); +} + +void InvalidateAll() +{ + JIT_DEBUGPRINT("invalidating all %x\n", JitBlocks.Length); + for (int i = 0; i < JitBlocks.Length; i++) + { + JitBlock* block = JitBlocks[i]; + + FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL; + + for (int j = 0; j < block->NumAddresses; j++) + { + u32 addr = block->AddressRanges()[j]; + AddressRange* range = &CodeRanges[addr / 256]; + range->Blocks.Clear(); + if (range->TimesInvalidated + 1 > range->TimesInvalidated) + range->TimesInvalidated++; + } + + u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr); + if (RestoreCandidates[slot] && RestoreCandidates[slot] != block) + delete RestoreCandidates[slot]; + + RestoreCandidates[slot] = block; + } + + JitBlocks.Clear(); +} + +void ResetBlockCache() +{ + printf("Resetting JIT block cache...\n"); + + memset(FastBlockAccess, 0, sizeof(FastBlockAccess)); + for (int i = 0; i < sizeof(RestoreCandidates)/sizeof(RestoreCandidates[0]); i++) + { + if (RestoreCandidates[i]) + { + delete RestoreCandidates[i]; + RestoreCandidates[i] = NULL; + } + } + for (int i = 0; i < JitBlocks.Length; i++) + { + JitBlock* block = JitBlocks[i]; + for (int j = 0; j < block->NumAddresses; j++) + { + u32 addr = block->AddressRanges()[j]; + CodeRanges[addr / 256].Blocks.Clear(); + CodeRanges[addr / 256].TimesInvalidated = 0; + } + delete block; + } + JitBlocks.Clear(); compiler->Reset(); } +void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) +{ + if (cpu->Num == 0) + { + if ((addr & 0xFF000000) == 0x04000000) + { + /* + unfortunately we can't map GPU2D this way + since it's hidden inside an object + + though GPU3D registers are accessed much more intensive + */ + if (addr >= 0x04000320 && addr < 0x040006A4) + { + switch (size | store) + { + case 8: return (void*)GPU3D::Read8; + case 9: return (void*)GPU3D::Write8; + case 16: return (void*)GPU3D::Read16; + case 17: return (void*)GPU3D::Write16; + case 32: return (void*)GPU3D::Read32; + case 33: return (void*)GPU3D::Write32; + } + } + + switch (size | store) + { + case 8: return (void*)NDS::ARM9IORead8; + case 9: return (void*)NDS::ARM9IOWrite8; + case 16: return (void*)NDS::ARM9IORead16; + case 17: return (void*)NDS::ARM9IOWrite16; + case 32: return (void*)NDS::ARM9IORead32; + case 33: return (void*)NDS::ARM9IOWrite32; + } + } + } + else + { + switch (addr & 0xFF800000) + { + case 0x04000000: + if (addr >= 0x04000400 && addr < 0x04000520) + { + switch (size | store) + { + case 8: return (void*)SPU::Read8; + case 9: return (void*)SPU::Write8; + case 16: return (void*)SPU::Read16; + case 17: return (void*)SPU::Write16; + case 32: return (void*)SPU::Read32; + case 33: return (void*)SPU::Write32; + } + } + + switch (size | store) + { + case 8: return (void*)NDS::ARM7IORead8; + case 9: return (void*)NDS::ARM7IOWrite8; + case 16: return (void*)NDS::ARM7IORead16; + case 17: return (void*)NDS::ARM7IOWrite16; + case 32: return (void*)NDS::ARM7IORead32; + case 33: return (void*)NDS::ARM7IOWrite32; + } + break; + case 0x04800000: + if (addr < 0x04810000 && size == 16) + { + if (store) + return (void*)Wifi::Write; + else + return (void*)Wifi::Read; + } + break; + } + } + return NULL; +} + }
\ No newline at end of file |