diff options
author | RSDuck <rsduck@users.noreply.github.com> | 2020-07-23 17:43:25 +0200 |
---|---|---|
committer | RSDuck <rsduck@users.noreply.github.com> | 2020-07-23 17:43:25 +0200 |
commit | e63bd7e38c6bc75b9a34ab820ee53422cdcf8e63 (patch) | |
tree | 258217443a13d4759ed2bc5449b4a3490380157b | |
parent | 2f9a6b7c0346c31cbed34a006174d9a9b4efb79e (diff) |
for some reason tabs and spaces were mixed
-rw-r--r-- | src/ARMJIT.cpp | 1828 | ||||
-rw-r--r-- | src/ARMJIT_Internal.h | 270 | ||||
-rw-r--r-- | src/ARMJIT_Memory.cpp | 1702 | ||||
-rw-r--r-- | src/ARMJIT_Memory.h | 44 | ||||
-rw-r--r-- | src/ARMJIT_RegisterCache.h | 24 |
5 files changed, 1934 insertions, 1934 deletions
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 0a0b52f..72a3179 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -71,79 +71,79 @@ u64 FastBlockLookupNWRAM_C[DSi::NWRAMSize / 2]; const u32 CodeRegionSizes[ARMJIT_Memory::memregions_Count] = { - 0, - ITCMPhysicalSize, - 0, - sizeof(NDS::ARM9BIOS), - NDS::MainRAMMaxSize, - NDS::SharedWRAMSize, - 0, - 0x100000, - sizeof(NDS::ARM7BIOS), - NDS::ARM7WRAMSize, - 0, - 0, - 0x40000, - 0x10000, - 0x10000, - DSi::NWRAMSize, - DSi::NWRAMSize, - DSi::NWRAMSize, + 0, + ITCMPhysicalSize, + 0, + sizeof(NDS::ARM9BIOS), + NDS::MainRAMMaxSize, + NDS::SharedWRAMSize, + 0, + 0x100000, + sizeof(NDS::ARM7BIOS), + NDS::ARM7WRAMSize, + 0, + 0, + 0x40000, + 0x10000, + 0x10000, + DSi::NWRAMSize, + DSi::NWRAMSize, + DSi::NWRAMSize, }; AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count] = { - NULL, - CodeIndexITCM, - NULL, - CodeIndexARM9BIOS, - CodeIndexMainRAM, - CodeIndexSWRAM, - NULL, - CodeIndexVRAM, - CodeIndexARM7BIOS, - CodeIndexARM7WRAM, - NULL, - NULL, - CodeIndexARM7WVRAM, - CodeIndexBIOS9DSi, - CodeIndexBIOS7DSi, - CodeIndexNWRAM_A, - CodeIndexNWRAM_B, - CodeIndexNWRAM_C + NULL, + CodeIndexITCM, + NULL, + CodeIndexARM9BIOS, + CodeIndexMainRAM, + CodeIndexSWRAM, + NULL, + CodeIndexVRAM, + CodeIndexARM7BIOS, + CodeIndexARM7WRAM, + NULL, + NULL, + CodeIndexARM7WVRAM, + CodeIndexBIOS9DSi, + CodeIndexBIOS7DSi, + CodeIndexNWRAM_A, + CodeIndexNWRAM_B, + CodeIndexNWRAM_C }; u64* const FastBlockLookupRegions[ARMJIT_Memory::memregions_Count] = { - NULL, - FastBlockLookupITCM, - NULL, - FastBlockLookupARM9BIOS, - FastBlockLookupMainRAM, - FastBlockLookupSWRAM, - NULL, - FastBlockLookupVRAM, - FastBlockLookupARM7BIOS, - FastBlockLookupARM7WRAM, - NULL, - NULL, - FastBlockLookupARM7WVRAM, - FastBlockLookupBIOS9DSi, - FastBlockLookupBIOS7DSi, - FastBlockLookupNWRAM_A, - FastBlockLookupNWRAM_B, - FastBlockLookupNWRAM_C + NULL, + FastBlockLookupITCM, + NULL, + FastBlockLookupARM9BIOS, + FastBlockLookupMainRAM, + FastBlockLookupSWRAM, + NULL, + FastBlockLookupVRAM, + FastBlockLookupARM7BIOS, + FastBlockLookupARM7WRAM, + NULL, + NULL, + FastBlockLookupARM7WVRAM, + FastBlockLookupBIOS9DSi, + FastBlockLookupBIOS7DSi, + FastBlockLookupNWRAM_A, + FastBlockLookupNWRAM_B, + FastBlockLookupNWRAM_C }; u32 LocaliseCodeAddress(u32 num, u32 addr) { - int region = num == 0 - ? ARMJIT_Memory::ClassifyAddress9(addr) - : ARMJIT_Memory::ClassifyAddress7(addr); + int region = num == 0 + ? ARMJIT_Memory::ClassifyAddress9(addr) + : ARMJIT_Memory::ClassifyAddress7(addr); - if (CodeMemRegions[region]) - return ARMJIT_Memory::LocaliseAddress(region, num, addr); - return 0; + if (CodeMemRegions[region]) + return ARMJIT_Memory::LocaliseAddress(region, num, addr); + return 0; } TinyVector<u32> InvalidLiterals; @@ -151,137 +151,137 @@ TinyVector<u32> InvalidLiterals; template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu) { - u32 offset = addr & 0x3; - addr &= ~(sizeof(T) - 1); - - T val; - if (addr < cpu->ITCMSize) - val = *(T*)&cpu->ITCM[addr & 0x7FFF]; - else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize)) - val = *(T*)&cpu->DTCM[(addr - cpu->DTCMBase) & 0x3FFF]; - else if (std::is_same<T, u32>::value) - val = (ConsoleType == 0 ? NDS::ARM9Read32 : DSi::ARM9Read32)(addr); - else if (std::is_same<T, u16>::value) - val = (ConsoleType == 0 ? NDS::ARM9Read16 : DSi::ARM9Read16)(addr); - else - val = (ConsoleType == 0 ? NDS::ARM9Read8 : DSi::ARM9Read8)(addr); - - if (std::is_same<T, u32>::value) - return ROR(val, offset << 3); - else - return val; + u32 offset = addr & 0x3; + addr &= ~(sizeof(T) - 1); + + T val; + if (addr < cpu->ITCMSize) + val = *(T*)&cpu->ITCM[addr & 0x7FFF]; + else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize)) + val = *(T*)&cpu->DTCM[(addr - cpu->DTCMBase) & 0x3FFF]; + else if (std::is_same<T, u32>::value) + val = (ConsoleType == 0 ? NDS::ARM9Read32 : DSi::ARM9Read32)(addr); + else if (std::is_same<T, u16>::value) + val = (ConsoleType == 0 ? NDS::ARM9Read16 : DSi::ARM9Read16)(addr); + else + val = (ConsoleType == 0 ? NDS::ARM9Read8 : DSi::ARM9Read8)(addr); + + if (std::is_same<T, u32>::value) + return ROR(val, offset << 3); + else + return val; } template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, T val) { - addr &= ~(sizeof(T) - 1); + addr &= ~(sizeof(T) - 1); if (addr < cpu->ITCMSize) - { + { CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); - *(T*)&cpu->ITCM[addr & 0x7FFF] = val; - } - else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize)) - { - *(T*)&cpu->DTCM[(addr - cpu->DTCMBase) & 0x3FFF] = val; - } - else if (std::is_same<T, u32>::value) - { - (ConsoleType == 0 ? NDS::ARM9Write32 : DSi::ARM9Write32)(addr, val); - } - else if (std::is_same<T, u16>::value) - { - (ConsoleType == 0 ? NDS::ARM9Write16 : DSi::ARM9Write16)(addr, val); - } - else - { - (ConsoleType == 0 ? NDS::ARM9Write8 : DSi::ARM9Write8)(addr, val); - } + *(T*)&cpu->ITCM[addr & 0x7FFF] = val; + } + else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize)) + { + *(T*)&cpu->DTCM[(addr - cpu->DTCMBase) & 0x3FFF] = val; + } + else if (std::is_same<T, u32>::value) + { + (ConsoleType == 0 ? NDS::ARM9Write32 : DSi::ARM9Write32)(addr, val); + } + else if (std::is_same<T, u16>::value) + { + (ConsoleType == 0 ? NDS::ARM9Write16 : DSi::ARM9Write16)(addr, val); + } + else + { + (ConsoleType == 0 ? NDS::ARM9Write8 : DSi::ARM9Write8)(addr, val); + } } template <typename T, int ConsoleType> T SlowRead7(u32 addr) { - u32 offset = addr & 0x3; - addr &= ~(sizeof(T) - 1); - - T val; - if (std::is_same<T, u32>::value) - val = (ConsoleType == 0 ? NDS::ARM7Read32 : DSi::ARM7Read32)(addr); - else if (std::is_same<T, u16>::value) - val = (ConsoleType == 0 ? NDS::ARM7Read16 : DSi::ARM7Read16)(addr); - else - val = (ConsoleType == 0 ? NDS::ARM7Read8 : DSi::ARM7Read8)(addr); - - if (std::is_same<T, u32>::value) - return ROR(val, offset << 3); - else - return val; + u32 offset = addr & 0x3; + addr &= ~(sizeof(T) - 1); + + T val; + if (std::is_same<T, u32>::value) + val = (ConsoleType == 0 ? NDS::ARM7Read32 : DSi::ARM7Read32)(addr); + else if (std::is_same<T, u16>::value) + val = (ConsoleType == 0 ? NDS::ARM7Read16 : DSi::ARM7Read16)(addr); + else + val = (ConsoleType == 0 ? NDS::ARM7Read8 : DSi::ARM7Read8)(addr); + + if (std::is_same<T, u32>::value) + return ROR(val, offset << 3); + else + return val; } template <typename T, int ConsoleType> void SlowWrite7(u32 addr, T val) { - addr &= ~(sizeof(T) - 1); - - if (std::is_same<T, u32>::value) - (ConsoleType == 0 ? NDS::ARM7Write32 : DSi::ARM7Write32)(addr, val); - else if (std::is_same<T, u16>::value) - (ConsoleType == 0 ? NDS::ARM7Write16 : DSi::ARM7Write16)(addr, val); - else - (ConsoleType == 0 ? NDS::ARM7Write8 : DSi::ARM7Write8)(addr, val); + addr &= ~(sizeof(T) - 1); + + if (std::is_same<T, u32>::value) + (ConsoleType == 0 ? NDS::ARM7Write32 : DSi::ARM7Write32)(addr, val); + else if (std::is_same<T, u16>::value) + (ConsoleType == 0 ? NDS::ARM7Write16 : DSi::ARM7Write16)(addr, val); + else + (ConsoleType == 0 ? NDS::ARM7Write8 : DSi::ARM7Write8)(addr, val); } template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu) { - addr &= ~0x3; - for (int i = 0; i < num; i++) - { - if (Write) - SlowWrite9<u32, ConsoleType>(addr, cpu, data[i]); - else - data[i] = SlowRead9<u32, ConsoleType>(addr, cpu); - addr += 4; - } + addr &= ~0x3; + for (int i = 0; i < num; i++) + { + if (Write) + SlowWrite9<u32, ConsoleType>(addr, cpu, data[i]); + else + data[i] = SlowRead9<u32, ConsoleType>(addr, cpu); + addr += 4; + } } template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num) { - addr &= ~0x3; - for (int i = 0; i < num; i++) - { - if (Write) - SlowWrite7<u32, ConsoleType>(addr, data[i]); - else - data[i] = SlowRead7<u32, ConsoleType>(addr); - addr += 4; - } + addr &= ~0x3; + for (int i = 0; i < num; i++) + { + if (Write) + SlowWrite7<u32, ConsoleType>(addr, data[i]); + else + data[i] = SlowRead7<u32, ConsoleType>(addr); + addr += 4; + } } #define INSTANTIATE_SLOWMEM(consoleType) \ - template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \ - template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u16); \ - template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u8); \ - \ - template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \ - template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \ - template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \ - \ - template void SlowWrite7<u32, consoleType>(u32, u32); \ - template void SlowWrite7<u16, consoleType>(u32, u16); \ - template void SlowWrite7<u8, consoleType>(u32, u8); \ - \ - template u32 SlowRead7<u32, consoleType>(u32); \ - template u16 SlowRead7<u16, consoleType>(u32); \ - template u8 SlowRead7<u8, consoleType>(u32); \ - \ - template void SlowBlockTransfer9<false, consoleType>(u32, u64*, u32, ARMv5*); \ - template void SlowBlockTransfer9<true, consoleType>(u32, u64*, u32, ARMv5*); \ - template void SlowBlockTransfer7<false, consoleType>(u32 addr, u64* data, u32 num); \ - template void SlowBlockTransfer7<true, consoleType>(u32 addr, u64* data, u32 num); \ + template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \ + template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u16); \ + template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u8); \ + \ + template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \ + template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \ + template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \ + \ + template void SlowWrite7<u32, consoleType>(u32, u32); \ + template void SlowWrite7<u16, consoleType>(u32, u16); \ + template void SlowWrite7<u8, consoleType>(u32, u8); \ + \ + template u32 SlowRead7<u32, consoleType>(u32); \ + template u16 SlowRead7<u16, consoleType>(u32); \ + template u8 SlowRead7<u8, consoleType>(u32); \ + \ + template void SlowBlockTransfer9<false, consoleType>(u32, u64*, u32, ARMv5*); \ + template void SlowBlockTransfer9<true, consoleType>(u32, u64*, u32, ARMv5*); \ + template void SlowBlockTransfer7<false, consoleType>(u32 addr, u64* data, u32 num); \ + template void SlowBlockTransfer7<true, consoleType>(u32 addr, u64* data, u32 num); \ INSTANTIATE_SLOWMEM(0) INSTANTIATE_SLOWMEM(1) @@ -289,248 +289,248 @@ INSTANTIATE_SLOWMEM(1) template <typename K, typename V, int Size, V InvalidValue> struct UnreliableHashTable { - struct Bucket - { - K KeyA, KeyB; - V ValA, ValB; - }; - - Bucket Table[Size]; - - void Reset() - { - for (int i = 0; i < Size; i++) - { - Table[i].ValA = Table[i].ValB = InvalidValue; - } - } - - UnreliableHashTable() - { - Reset(); - } - - V Insert(K key, V value) - { - u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1); - Bucket* bucket = &Table[slot]; - - if (bucket->ValA == value || bucket->ValB == value) - { - return InvalidValue; - } - else if (bucket->ValA == InvalidValue) - { - bucket->KeyA = key; - bucket->ValA = value; - } - else if (bucket->ValB == InvalidValue) - { - bucket->KeyB = key; - bucket->ValB = value; - } - else - { - V prevVal = bucket->ValB; - bucket->KeyB = bucket->KeyA; - bucket->ValB = bucket->ValA; - bucket->KeyA = key; - bucket->ValA = value; - return prevVal; - } - - return InvalidValue; - } - - void Remove(K key) - { - u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1); - Bucket* bucket = &Table[slot]; - - if (bucket->KeyA == key && bucket->ValA != InvalidValue) - { - bucket->ValA = InvalidValue; - if (bucket->ValB != InvalidValue) - { - bucket->KeyA = bucket->KeyB; - bucket->ValA = bucket->ValB; - bucket->ValB = InvalidValue; - } - } - if (bucket->KeyB == key && bucket->ValB != InvalidValue) - bucket->ValB = InvalidValue; - } - - V LookUp(K addr) - { - u32 slot = XXH3_64bits(&addr, 4) & (Size - 1); - Bucket* bucket = &Table[slot]; - - if (bucket->ValA != InvalidValue && bucket->KeyA == addr) - return bucket->ValA; - if (bucket->ValB != InvalidValue && bucket->KeyB == addr) - return bucket->ValB; - - return InvalidValue; - } + struct Bucket + { + K KeyA, KeyB; + V ValA, ValB; + }; + + Bucket Table[Size]; + + void Reset() + { + for (int i = 0; i < Size; i++) + { + Table[i].ValA = Table[i].ValB = InvalidValue; + } + } + + UnreliableHashTable() + { + Reset(); + } + + V Insert(K key, V value) + { + u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1); + Bucket* bucket = &Table[slot]; + + if (bucket->ValA == value || bucket->ValB == value) + { + return InvalidValue; + } + else if (bucket->ValA == InvalidValue) + { + bucket->KeyA = key; + bucket->ValA = value; + } + else if (bucket->ValB == InvalidValue) + { + bucket->KeyB = key; + bucket->ValB = value; + } + else + { + V prevVal = bucket->ValB; + bucket->KeyB = bucket->KeyA; + bucket->ValB = bucket->ValA; + bucket->KeyA = key; + bucket->ValA = value; + return prevVal; + } + + return InvalidValue; + } + + void Remove(K key) + { + u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1); + Bucket* bucket = &Table[slot]; + + if (bucket->KeyA == key && bucket->ValA != InvalidValue) + { + bucket->ValA = InvalidValue; + if (bucket->ValB != InvalidValue) + { + bucket->KeyA = bucket->KeyB; + bucket->ValA = bucket->ValB; + bucket->ValB = InvalidValue; + } + } + if (bucket->KeyB == key && bucket->ValB != InvalidValue) + bucket->ValB = InvalidValue; + } + + V LookUp(K addr) + { + u32 slot = XXH3_64bits(&addr, 4) & (Size - 1); + Bucket* bucket = &Table[slot]; + + if (bucket->ValA != InvalidValue && bucket->KeyA == addr) + return bucket->ValA; + if (bucket->ValB != InvalidValue && bucket->KeyB == addr) + return bucket->ValB; + + return InvalidValue; + } }; UnreliableHashTable<u32, JitBlock*, 0x800, nullptr> RestoreCandidates; void Init() { - JITCompiler = new Compiler(); + JITCompiler = new Compiler(); - ARMJIT_Memory::Init(); + ARMJIT_Memory::Init(); } void DeInit() { - ARMJIT_Memory::DeInit(); + ARMJIT_Memory::DeInit(); - delete JITCompiler; + delete JITCompiler; } void Reset() { - ResetBlockCache(); + ResetBlockCache(); - ARMJIT_Memory::Reset(); + ARMJIT_Memory::Reset(); } void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags) { - for (int j = start; j >= 0; j--) - { - u8 match = instrs[j].Info.WriteFlags & flags; - u8 matchMaybe = (instrs[j].Info.WriteFlags >> 4) & flags; - if (matchMaybe) // writes flags maybe - instrs[j].SetFlags |= matchMaybe; - if (match) - { - instrs[j].SetFlags |= match; - flags &= ~match; - if (!flags) - return; - } - } + for (int j = start; j >= 0; j--) + { + u8 match = instrs[j].Info.WriteFlags & flags; + u8 matchMaybe = (instrs[j].Info.WriteFlags >> 4) & flags; + if (matchMaybe) // writes flags maybe + instrs[j].SetFlags |= matchMaybe; + if (match) + { + instrs[j].SetFlags |= match; + flags &= ~match; + if (!flags) + return; + } + } } bool DecodeLiteral(bool thumb, const FetchedInstr& instr, u32& addr) { - if (!thumb) - { - switch (instr.Info.Kind) - { - case ARMInstrInfo::ak_LDR_IMM: - case ARMInstrInfo::ak_LDRB_IMM: - addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1)); - return true; - case ARMInstrInfo::ak_LDRH_IMM: - addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1)); - return true; - default: - break; - } - } - else if (instr.Info.Kind == ARMInstrInfo::tk_LDR_PCREL) - { - addr = ((instr.Addr + 4) & ~0x2) + ((instr.Instr & 0xFF) << 2); - return true; - } - - JIT_DEBUGPRINT("Literal %08x %x not recognised %d\n", instr.Instr, instr.Addr, instr.Info.Kind); - return false; + if (!thumb) + { + switch (instr.Info.Kind) + { + case ARMInstrInfo::ak_LDR_IMM: + case ARMInstrInfo::ak_LDRB_IMM: + addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1)); + return true; + case ARMInstrInfo::ak_LDRH_IMM: + addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1)); + return true; + default: + break; + } + } + else if (instr.Info.Kind == ARMInstrInfo::tk_LDR_PCREL) + { + addr = ((instr.Addr + 4) & ~0x2) + ((instr.Instr & 0xFF) << 2); + return true; + } + + JIT_DEBUGPRINT("Literal %08x %x not recognised %d\n", instr.Instr, instr.Addr, instr.Info.Kind); + return false; } bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link, - u32& linkAddr, u32& targetAddr) + u32& linkAddr, u32& targetAddr) { - if (thumb) - { - u32 r15 = instr.Addr + 4; - cond = 0xE; - - link = instr.Info.Kind == ARMInstrInfo::tk_BL_LONG; - linkAddr = instr.Addr + 4; - - if (instr.Info.Kind == ARMInstrInfo::tk_BL_LONG && !(instr.Instr & (1 << 12))) - { - targetAddr = r15 + ((s32)((instr.Instr & 0x7FF) << 21) >> 9); - targetAddr += ((instr.Instr >> 16) & 0x7FF) << 1; - return true; - } - else if (instr.Info.Kind == ARMInstrInfo::tk_B) - { - s32 offset = (s32)((instr.Instr & 0x7FF) << 21) >> 20; - targetAddr = r15 + offset; - return true; - } - else if (instr.Info.Kind == ARMInstrInfo::tk_BCOND) - { - cond = (instr.Instr >> 8) & 0xF; - s32 offset = (s32)(instr.Instr << 24) >> 23; - targetAddr = r15 + offset; - return true; - } - else if (hasLink && instr.Info.Kind == ARMInstrInfo::tk_BX && instr.A_Reg(3) == 14) - { - JIT_DEBUGPRINT("returning!\n"); - targetAddr = lr; - return true; - } - } - else - { - link = instr.Info.Kind == ARMInstrInfo::ak_BL; - linkAddr = instr.Addr + 4; - - cond = instr.Cond(); - if (instr.Info.Kind == ARMInstrInfo::ak_BL - || instr.Info.Kind == ARMInstrInfo::ak_B) - { - s32 offset = (s32)(instr.Instr << 8) >> 6; - u32 r15 = instr.Addr + 8; - targetAddr = r15 + offset; - return true; - } - else if (hasLink && instr.Info.Kind == ARMInstrInfo::ak_BX && instr.A_Reg(0) == 14) - { - JIT_DEBUGPRINT("returning!\n"); - targetAddr = lr; - return true; - } - } - return false; + if (thumb) + { + u32 r15 = instr.Addr + 4; + cond = 0xE; + + link = instr.Info.Kind == ARMInstrInfo::tk_BL_LONG; + linkAddr = instr.Addr + 4; + + if (instr.Info.Kind == ARMInstrInfo::tk_BL_LONG && !(instr.Instr & (1 << 12))) + { + targetAddr = r15 + ((s32)((instr.Instr & 0x7FF) << 21) >> 9); + targetAddr += ((instr.Instr >> 16) & 0x7FF) << 1; + return true; + } + else if (instr.Info.Kind == ARMInstrInfo::tk_B) + { + s32 offset = (s32)((instr.Instr & 0x7FF) << 21) >> 20; + targetAddr = r15 + offset; + return true; + } + else if (instr.Info.Kind == ARMInstrInfo::tk_BCOND) + { + cond = (instr.Instr >> 8) & 0xF; + s32 offset = (s32)(instr.Instr << 24) >> 23; + targetAddr = r15 + offset; + return true; + } + else if (hasLink && instr.Info.Kind == ARMInstrInfo::tk_BX && instr.A_Reg(3) == 14) + { + JIT_DEBUGPRINT("returning!\n"); + targetAddr = lr; + return true; + } + } + else + { + link = instr.Info.Kind == ARMInstrInfo::ak_BL; + linkAddr = instr.Addr + 4; + + cond = instr.Cond(); + if (instr.Info.Kind == ARMInstrInfo::ak_BL + || instr.Info.Kind == ARMInstrInfo::ak_B) + { + s32 offset = (s32)(instr.Instr << 8) >> 6; + u32 r15 = instr.Addr + 8; + targetAddr = r15 + offset; + return true; + } + else if (hasLink && instr.Info.Kind == ARMInstrInfo::ak_BX && instr.A_Reg(0) == 14) + { + JIT_DEBUGPRINT("returning!\n"); + targetAddr = lr; + return true; + } + } + return false; } bool IsIdleLoop(FetchedInstr* instrs, int instrsCount) { - // see https://github.com/dolphin-emu/dolphin/blob/master/Source/Core/Core/PowerPC/PPCAnalyst.cpp#L678 - // it basically checks if one iteration of a loop depends on another - // the rules are quite simple - - JIT_DEBUGPRINT("checking potential idle loop\n"); - u16 regsWrittenTo = 0; - u16 regsDisallowedToWrite = 0; - for (int i = 0; i < instrsCount; i++) - { - JIT_DEBUGPRINT("instr %d %x regs(%x %x) %x %x\n", i, instrs[i].Instr, instrs[i].Info.DstRegs, instrs[i].Info.SrcRegs, regsWrittenTo, regsDisallowedToWrite); - if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem) - return false; - if (i < instrsCount - 1 && instrs[i].Info.Branches()) - return false; - - u16 srcRegs = instrs[i].Info.SrcRegs & ~(1 << 15); - u16 dstRegs = instrs[i].Info.DstRegs & ~(1 << 15); - - regsDisallowedToWrite |= srcRegs & ~regsWrittenTo; - - if (dstRegs & regsDisallowedToWrite) - return false; - regsWrittenTo |= dstRegs; - } - return true; + // see https://github.com/dolphin-emu/dolphin/blob/master/Source/Core/Core/PowerPC/PPCAnalyst.cpp#L678 + // it basically checks if one iteration of a loop depends on another + // the rules are quite simple + + JIT_DEBUGPRINT("checking potential idle loop\n"); + u16 regsWrittenTo = 0; + u16 regsDisallowedToWrite = 0; + for (int i = 0; i < instrsCount; i++) + { + JIT_DEBUGPRINT("instr %d %x regs(%x %x) %x %x\n", i, instrs[i].Instr, instrs[i].Info.DstRegs, instrs[i].Info.SrcRegs, regsWrittenTo, regsDisallowedToWrite); + if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem) + return false; + if (i < instrsCount - 1 && instrs[i].Info.Branches()) + return false; + + u16 srcRegs = instrs[i].Info.SrcRegs & ~(1 << 15); + u16 dstRegs = instrs[i].Info.DstRegs & ~(1 << 15); + + regsDisallowedToWrite |= srcRegs & ~regsWrittenTo; + + if (dstRegs & regsDisallowedToWrite) + return false; + regsWrittenTo |= dstRegs; + } + return true; } typedef void (*InterpreterFunc)(ARM* cpu); @@ -539,53 +539,53 @@ void NOP(ARM* cpu) {} #define F(x) &ARMInterpreter::A_##x #define F_ALU(name, s) \ - F(name##_REG_LSL_IMM##s), F(name##_REG_LSR_IMM##s), F(name##_REG_ASR_IMM##s), F(name##_REG_ROR_IMM##s), \ - F(name##_REG_LSL_REG##s), F(name##_REG_LSR_REG##s), F(name##_REG_ASR_REG##s), F(name##_REG_ROR_REG##s), F(name##_IMM##s) + F(name##_REG_LSL_IMM##s), F(name##_REG_LSR_IMM##s), F(name##_REG_ASR_IMM##s), F(name##_REG_ROR_IMM##s), \ + F(name##_REG_LSL_REG##s), F(name##_REG_LSR_REG##s), F(name##_REG_ASR_REG##s), F(name##_REG_ROR_REG##s), F(name##_IMM##s) #define F_MEM_WB(name) \ - F(name##_REG_LSL), F(name##_REG_LSR), F(name##_REG_ASR), F(name##_REG_ROR), F(name##_IMM), \ - F(name##_POST_REG_LSL), F(name##_POST_REG_LSR), F(name##_POST_REG_ASR), F(name##_POST_REG_ROR), F(name##_POST_IMM) + F(name##_REG_LSL), F(name##_REG_LSR), F(name##_REG_ASR), F(name##_REG_ROR), F(name##_IMM), \ + F(name##_POST_REG_LSL), F(name##_POST_REG_LSR), F(name##_POST_REG_ASR), F(name##_POST_REG_ROR), F(name##_POST_IMM) #define F_MEM_HD(name) \ - F(name##_REG), F(name##_IMM), F(name##_POST_REG), F(name##_POST_IMM) + F(name##_REG), F(name##_IMM), F(name##_POST_REG), F(name##_POST_IMM) InterpreterFunc InterpretARM[ARMInstrInfo::ak_Count] = { - F_ALU(AND,), F_ALU(AND,_S), - F_ALU(EOR,), F_ALU(EOR,_S), - F_ALU(SUB,), F_ALU(SUB,_S), - F_ALU(RSB,), F_ALU(RSB,_S), - F_ALU(ADD,), F_ALU(ADD,_S), - F_ALU(ADC,), F_ALU(ADC,_S), - F_ALU(SBC,), F_ALU(SBC,_S), - F_ALU(RSC,), F_ALU(RSC,_S), - F_ALU(ORR,), F_ALU(ORR,_S), - F_ALU(MOV,), F_ALU(MOV,_S), - F_ALU(BIC,), F_ALU(BIC,_S), - F_ALU(MVN,), F_ALU(MVN,_S), - F_ALU(TST,), - F_ALU(TEQ,), - F_ALU(CMP,), - F_ALU(CMN,), - - F(MUL), F(MLA), F(UMULL), F(UMLAL), F(SMULL), F(SMLAL), F(SMLAxy), F(SMLAWy), F(SMULWy), F(SMLALxy), F(SMULxy), - F(CLZ), F(QADD), F(QDADD), F(QSUB), F(QDSUB), - - F_MEM_WB(STR), - F_MEM_WB(STRB), - F_MEM_WB(LDR), - F_MEM_WB(LDRB), - - F_MEM_HD(STRH), - F_MEM_HD(LDRD), - F_MEM_HD(STRD), - F_MEM_HD(LDRH), - F_MEM_HD(LDRSB), - F_MEM_HD(LDRSH), - - F(SWP), F(SWPB), - F(LDM), F(STM), - - F(B), F(BL), F(BLX_IMM), F(BX), F(BLX_REG), - F(UNK), F(MSR_IMM), F(MSR_REG), F(MRS), F(MCR), F(MRC), F(SVC), - NOP + F_ALU(AND,), F_ALU(AND,_S), + F_ALU(EOR,), F_ALU(EOR,_S), + F_ALU(SUB,), F_ALU(SUB,_S), + F_ALU(RSB,), F_ALU(RSB,_S), + F_ALU(ADD,), F_ALU(ADD,_S), + F_ALU(ADC,), F_ALU(ADC,_S), + F_ALU(SBC,), F_ALU(SBC,_S), + F_ALU(RSC,), F_ALU(RSC,_S), + F_ALU(ORR,), F_ALU(ORR,_S), + F_ALU(MOV,), F_ALU(MOV,_S), + F_ALU(BIC,), F_ALU(BIC,_S), + F_ALU(MVN,), F_ALU(MVN,_S), + F_ALU(TST,), + F_ALU(TEQ,), + F_ALU(CMP,), + F_ALU(CMN,), + + F(MUL), F(MLA), F(UMULL), F(UMLAL), F(SMULL), F(SMLAL), F(SMLAxy), F(SMLAWy), F(SMULWy), F(SMLALxy), F(SMULxy), + F(CLZ), F(QADD), F(QDADD), F(QSUB), F(QDSUB), + + F_MEM_WB(STR), + F_MEM_WB(STRB), + F_MEM_WB(LDR), + F_MEM_WB(LDRB), + + F_MEM_HD(STRH), + F_MEM_HD(LDRD), + F_MEM_HD(STRD), + F_MEM_HD(LDRH), + F_MEM_HD(LDRSB), + F_MEM_HD(LDRSH), + + F(SWP), F(SWPB), + F(LDM), F(STM), + + F(B), F(BL), F(BLX_IMM), F(BX), F(BLX_REG), + F(UNK), F(MSR_IMM), F(MSR_REG), F(MRS), F(MCR), F(MRC), F(SVC), + NOP }; #undef F_ALU #undef F_MEM_WB @@ -594,29 +594,29 @@ InterpreterFunc InterpretARM[ARMInstrInfo::ak_Count] = void T_BL_LONG(ARM* cpu) { - ARMInterpreter::T_BL_LONG_1(cpu); - cpu->R[15] += 2; - ARMInterpreter::T_BL_LONG_2(cpu); + ARMInterpreter::T_BL_LONG_1(cpu); + cpu->R[15] += 2; + ARMInterpreter::T_BL_LONG_2(cpu); } #define F(x) ARMInterpreter::T_##x InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] = { - F(LSL_IMM), F(LSR_IMM), F(ASR_IMM), - F(ADD_REG_), F(SUB_REG_), F(ADD_IMM_), F(SUB_IMM_), - F(MOV_IMM), F(CMP_IMM), F(ADD_IMM), F(SUB_IMM), - F(AND_REG), F(EOR_REG), F(LSL_REG), F(LSR_REG), F(ASR_REG), - F(ADC_REG), F(SBC_REG), F(ROR_REG), F(TST_REG), F(NEG_REG), - F(CMP_REG), F(CMN_REG), F(ORR_REG), F(MUL_REG), F(BIC_REG), F(MVN_REG), - F(ADD_HIREG), F(CMP_HIREG), F(MOV_HIREG), - F(ADD_PCREL), F(ADD_SPREL), F(ADD_SP), - F(LDR_PCREL), F(STR_REG), F(STRB_REG), F(LDR_REG), F(LDRB_REG), F(STRH_REG), - F(LDRSB_REG), F(LDRH_REG), F(LDRSH_REG), F(STR_IMM), F(LDR_IMM), F(STRB_IMM), - F(LDRB_IMM), F(STRH_IMM), F(LDRH_IMM), F(STR_SPREL), F(LDR_SPREL), - F(PUSH), F(POP), F(LDMIA), F(STMIA), - F(BCOND), F(BX), F(BLX_REG), F(B), F(BL_LONG_1), F(BL_LONG_2), - F(UNK), F(SVC), - T_BL_LONG // BL_LONG psudo opcode + F(LSL_IMM), F(LSR_IMM), F(ASR_IMM), + F(ADD_REG_), F(SUB_REG_), F(ADD_IMM_), F(SUB_IMM_), + F(MOV_IMM), F(CMP_IMM), F(ADD_IMM), F(SUB_IMM), + F(AND_REG), F(EOR_REG), F(LSL_REG), F(LSR_REG), F(ASR_REG), + F(ADC_REG), F(SBC_REG), F(ROR_REG), F(TST_REG), F(NEG_REG), + F(CMP_REG), F(CMN_REG), F(ORR_REG), F(MUL_REG), F(BIC_REG), F(MVN_REG), + F(ADD_HIREG), F(CMP_HIREG), F(MOV_HIREG), + F(ADD_PCREL), F(ADD_SPREL), F(ADD_SP), + F(LDR_PCREL), F(STR_REG), F(STRB_REG), F(LDR_REG), F(LDRB_REG), F(STRH_REG), + F(LDRSB_REG), F(LDRH_REG), F(LDRSH_REG), F(STR_IMM), F(LDR_IMM), F(STRB_IMM), + F(LDRB_IMM), F(STRH_IMM), F(LDRH_IMM), F(STR_SPREL), F(LDR_SPREL), + F(PUSH), F(POP), F(LDMIA), F(STMIA), + F(BCOND), F(BX), F(BLX_REG), F(B), F(BL_LONG_1), F(BL_LONG_2), + F(UNK), F(SVC), + T_BL_LONG // BL_LONG psudo opcode }; #undef F @@ -624,106 +624,106 @@ void CompileBlock(ARM* cpu) { bool thumb = cpu->CPSR & 0x20; - if (Config::JIT_MaxBlockSize < 1) - Config::JIT_MaxBlockSize = 1; - if (Config::JIT_MaxBlockSize > 32) - Config::JIT_MaxBlockSize = 32; - - u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4); - - u32 localAddr = LocaliseCodeAddress(cpu->Num, blockAddr); - if (!localAddr) - { - printf("trying to compile non executable code? %x\n", blockAddr); - } - - auto& map = cpu->Num == 0 ? JitBlocks9 : JitBlocks7; - auto existingBlockIt = map.find(blockAddr); - if (existingBlockIt != map.end()) - { - // there's already a block, though it's not inside the fast map - // could be that there are two blocks at the same physical addr - // but different mirrors - u32 otherLocalAddr = existingBlockIt->second->StartAddrLocal; - - if (localAddr == otherLocalAddr) - { - JIT_DEBUGPRINT("switching out block %x %x %x\n", localAddr, blockAddr, existingBlockIt->second->StartAddr); - - u64* entry = &FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]; - *entry = ((u64)blockAddr | cpu->Num) << 32; - *entry |= JITCompiler->SubEntryOffset(existingBlockIt->second->EntryPoint); - return; - } - - // some memory has been remapped - JitBlock* prevBlock = RestoreCandidates.Insert(existingBlockIt->second->InstrHash, existingBlockIt->second); - if (prevBlock) - delete prevBlock; - - map.erase(existingBlockIt); - } + if (Config::JIT_MaxBlockSize < 1) + Config::JIT_MaxBlockSize = 1; + if (Config::JIT_MaxBlockSize > 32) + Config::JIT_MaxBlockSize = 32; + + u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4); + + u32 localAddr = LocaliseCodeAddress(cpu->Num, blockAddr); + if (!localAddr) + { + printf("trying to compile non executable code? %x\n", blockAddr); + } + + auto& map = cpu->Num == 0 ? JitBlocks9 : JitBlocks7; + auto existingBlockIt = map.find(blockAddr); + if (existingBlockIt != map.end()) + { + // there's already a block, though it's not inside the fast map + // could be that there are two blocks at the same physical addr + // but different mirrors + u32 otherLocalAddr = existingBlockIt->second->StartAddrLocal; + + if (localAddr == otherLocalAddr) + { + JIT_DEBUGPRINT("switching out block %x %x %x\n", localAddr, blockAddr, existingBlockIt->second->StartAddr); + + u64* entry = &FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]; + *entry = ((u64)blockAddr | cpu->Num) << 32; + *entry |= JITCompiler->SubEntryOffset(existingBlockIt->second->EntryPoint); + return; + } + + // some memory has been remapped + JitBlock* prevBlock = RestoreCandidates.Insert(existingBlockIt->second->InstrHash, existingBlockIt->second); + if (prevBlock) + delete prevBlock; + + map.erase(existingBlockIt); + } FetchedInstr instrs[Config::JIT_MaxBlockSize]; int i = 0; u32 r15 = cpu->R[15]; - u32 addressRanges[Config::JIT_MaxBlockSize]; - u32 addressMasks[Config::JIT_MaxBlockSize] = {0}; - u32 numAddressRanges = 0; + u32 addressRanges[Config::JIT_MaxBlockSize]; + u32 addressMasks[Config::JIT_MaxBlockSize] = {0}; + u32 numAddressRanges = 0; - u32 numLiterals = 0; - u32 literalLoadAddrs[Config::JIT_MaxBlockSize]; - // they are going to be hashed - u32 literalValues[Config::JIT_MaxBlockSize]; - u32 instrValues[Config::JIT_MaxBlockSize]; + u32 numLiterals = 0; + u32 literalLoadAddrs[Config::JIT_MaxBlockSize]; + // they are going to be hashed + u32 literalValues[Config::JIT_MaxBlockSize]; + u32 instrValues[Config::JIT_MaxBlockSize]; - cpu->FillPipeline(); + cpu->FillPipeline(); u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]}; - u32 nextInstrAddr[2] = {blockAddr, r15}; + u32 nextInstrAddr[2] = {blockAddr, r15}; - JIT_DEBUGPRINT("start block %x %08x (%x)\n", blockAddr, cpu->CPSR, localAddr); + JIT_DEBUGPRINT("start block %x %08x (%x)\n", blockAddr, cpu->CPSR, localAddr); - u32 lastSegmentStart = blockAddr; - u32 lr; - bool hasLink = false; + u32 lastSegmentStart = blockAddr; + u32 lr; + bool hasLink = false; do { r15 += thumb ? 2 : 4; - instrs[i].BranchFlags = 0; - instrs[i].SetFlags = 0; + instrs[i].BranchFlags = 0; + instrs[i].SetFlags = 0; instrs[i].Instr = nextInstr[0]; nextInstr[0] = nextInstr[1]; - - instrs[i].Addr = nextInstrAddr[0]; - nextInstrAddr[0] = nextInstrAddr[1]; - nextInstrAddr[1] = r15; - JIT_DEBUGPRINT("instr %08x %x\n", instrs[i].Instr & (thumb ? 0xFFFF : ~0), instrs[i].Addr); - - instrValues[i] = instrs[i].Instr; - - u32 translatedAddr = LocaliseCodeAddress(cpu->Num, instrs[i].Addr); - assert(translatedAddr >> 27); - u32 translatedAddrRounded = translatedAddr & ~0x1FF; - if (i == 0 || translatedAddrRounded != addressRanges[numAddressRanges - 1]) - { - bool returning = false; - for (int j = 0; j < numAddressRanges; j++) - { - if (addressRanges[j] == translatedAddrRounded) - { - std::swap(addressRanges[j], addressRanges[numAddressRanges - 1]); - std::swap(addressMasks[j], addressMasks[numAddressRanges - 1]); - returning = true; - break; - } - } - if (!returning) - addressRanges[numAddressRanges++] = translatedAddrRounded; - } - addressMasks[numAddressRanges - 1] |= 1 << ((translatedAddr & 0x1FF) / 16); + + instrs[i].Addr = nextInstrAddr[0]; + nextInstrAddr[0] = nextInstrAddr[1]; + nextInstrAddr[1] = r15; + JIT_DEBUGPRINT("instr %08x %x\n", instrs[i].Instr & (thumb ? 0xFFFF : ~0), instrs[i].Addr); + + instrValues[i] = instrs[i].Instr; + + u32 translatedAddr = LocaliseCodeAddress(cpu->Num, instrs[i].Addr); + assert(translatedAddr >> 27); + u32 translatedAddrRounded = translatedAddr & ~0x1FF; + if (i == 0 || translatedAddrRounded != addressRanges[numAddressRanges - 1]) + { + bool returning = false; + for (int j = 0; j < numAddressRanges; j++) + { + if (addressRanges[j] == translatedAddrRounded) + { + std::swap(addressRanges[j], addressRanges[numAddressRanges - 1]); + std::swap(addressMasks[j], addressMasks[numAddressRanges - 1]); + returning = true; + break; + } + } + if (!returning) + addressRanges[numAddressRanges++] = translatedAddrRounded; + } + addressMasks[numAddressRanges - 1] |= 1 << ((translatedAddr & 0x1FF) / 16); if (cpu->Num == 0) { @@ -750,392 +750,392 @@ void CompileBlock(ARM* cpu) } instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr); - cpu->R[15] = r15; - cpu->CurInstr = instrs[i].Instr; - cpu->CodeCycles = instrs[i].CodeCycles; - - if (instrs[i].Info.DstRegs & (1 << 14) - || (!thumb - && (instrs[i].Info.Kind == ARMInstrInfo::ak_MSR_IMM || instrs[i].Info.Kind == ARMInstrInfo::ak_MSR_REG) - && instrs[i].Instr & (1 << 16))) - hasLink = false; - - if (thumb) - { - InterpretTHUMB[instrs[i].Info.Kind](cpu); - } - else - { - if (cpu->Num == 0 && instrs[i].Info.Kind == ARMInstrInfo::ak_BLX_IMM) - { - ARMInterpreter::A_BLX_IMM(cpu); - } - else - { + cpu->R[15] = r15; + cpu->CurInstr = instrs[i].Instr; + cpu->CodeCycles = instrs[i].CodeCycles; + + if (instrs[i].Info.DstRegs & (1 << 14) + || (!thumb + && (instrs[i].Info.Kind == ARMInstrInfo::ak_MSR_IMM || instrs[i].Info.Kind == ARMInstrInfo::ak_MSR_REG) + && instrs[i].Instr & (1 << 16))) + hasLink = false; + + if (thumb) + { + InterpretTHUMB[instrs[i].Info.Kind](cpu); + } + else + { + if (cpu->Num == 0 && instrs[i].Info.Kind == ARMInstrInfo::ak_BLX_IMM) + { + ARMInterpreter::A_BLX_IMM(cpu); + } + else + { u32 icode = ((instrs[i].Instr >> 4) & 0xF) | ((instrs[i].Instr >> 16) & 0xFF0); - assert(InterpretARM[instrs[i].Info.Kind] == ARMInterpreter::ARMInstrTable[icode] - || instrs[i].Info.Kind == ARMInstrInfo::ak_MOV_REG_LSL_IMM - || instrs[i].Info.Kind == ARMInstrInfo::ak_Nop - || instrs[i].Info.Kind == ARMInstrInfo::ak_UNK); - if (cpu->CheckCondition(instrs[i].Cond())) - InterpretARM[instrs[i].Info.Kind](cpu); - else - cpu->AddCycles_C(); - } - } - - instrs[i].DataCycles = cpu->DataCycles; - instrs[i].DataRegion = cpu->DataRegion; - - u32 literalAddr; - if (Config::JIT_LiteralOptimisations - && instrs[i].Info.SpecialKind == ARMInstrInfo::special_LoadLiteral - && DecodeLiteral(thumb, instrs[i], literalAddr)) - { - u32 translatedAddr = LocaliseCodeAddress(cpu->Num, literalAddr); - if (!translatedAddr) - { - printf("literal in non executable memory?\n"); - } - u32 translatedAddrRounded = translatedAddr & ~0x1FF; - - u32 j = 0; - for (; j < numAddressRanges; j++) - if (addressRanges[j] == translatedAddrRounded) - break; - if (j == numAddressRanges) - addressRanges[numAddressRanges++] = translatedAddrRounded; - addressMasks[j] |= 1 << ((translatedAddr & 0x1FF) / 16); - JIT_DEBUGPRINT("literal loading %08x %08x %08x %08x\n", literalAddr, translatedAddr, addressMasks[j], addressRanges[j]); - cpu->DataRead32(literalAddr, &literalValues[numLiterals]); - literalLoadAddrs[numLiterals++] = translatedAddr; - } - - if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0 - && instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1) - { - instrs[i - 1].Info.Kind = ARMInstrInfo::tk_BL_LONG; - instrs[i - 1].Instr = (instrs[i - 1].Instr & 0xFFFF) | (instrs[i].Instr << 16); - instrs[i - 1].Info.DstRegs = 0xC000; - instrs[i - 1].Info.SrcRegs = 0; - instrs[i - 1].Info.EndBlock = true; - i--; - } - - if (instrs[i].Info.Branches() && Config::JIT_BranchOptimisations) - { - bool hasBranched = cpu->R[15] != r15; - - bool link; - u32 cond, target, linkAddr; - bool staticBranch = DecodeBranch(thumb, instrs[i], cond, hasLink, lr, link, linkAddr, target); - JIT_DEBUGPRINT("branch cond %x target %x (%d)\n", cond, target, hasBranched); - - if (staticBranch) - { - instrs[i].BranchFlags |= branch_StaticTarget; - - bool isBackJump = false; - if (hasBranched) - { - for (int j = 0; j < i; j++) - { - if (instrs[i].Addr == target) - { - isBackJump = true; - break; - } - } - } - - if (cond < 0xE && target < instrs[i].Addr && target >= lastSegmentStart) - { - // we might have an idle loop - u32 backwardsOffset = (instrs[i].Addr - target) / (thumb ? 2 : 4); - if (IsIdleLoop(&instrs[i - backwardsOffset], backwardsOffset + 1)) - { - instrs[i].BranchFlags |= branch_IdleBranch; - JIT_DEBUGPRINT("found %s idle loop %d in block %x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr); - } - } - else if (hasBranched && !isBackJump && i + 1 < Config::JIT_MaxBlockSize) - { - if (link) - { - lr = linkAddr; - hasLink = true; - } - - r15 = target + (thumb ? 2 : 4); - assert(r15 == cpu->R[15]); - - JIT_DEBUGPRINT("block lengthened by static branch (target %x)\n", target); - - nextInstr[0] = cpu->NextInstr[0]; - nextInstr[1] = cpu->NextInstr[1]; - - nextInstrAddr[0] = target; - nextInstrAddr[1] = r15; - - lastSegmentStart = target; - - instrs[i].Info.EndBlock = false; - - if (cond < 0xE) - instrs[i].BranchFlags |= branch_FollowCondTaken; - } - } - - if (!hasBranched && cond < 0xE && i + 1 < Config::JIT_MaxBlockSize) - { - instrs[i].Info.EndBlock = false; - instrs[i].BranchFlags |= branch_FollowCondNotTaken; - } - } + assert(InterpretARM[instrs[i].Info.Kind] == ARMInterpreter::ARMInstrTable[icode] + || instrs[i].Info.Kind == ARMInstrInfo::ak_MOV_REG_LSL_IMM + || instrs[i].Info.Kind == ARMInstrInfo::ak_Nop + || instrs[i].Info.Kind == ARMInstrInfo::ak_UNK); + if (cpu->CheckCondition(instrs[i].Cond())) + InterpretARM[instrs[i].Info.Kind](cpu); + else + cpu->AddCycles_C(); + } + } + + instrs[i].DataCycles = cpu->DataCycles; + instrs[i].DataRegion = cpu->DataRegion; + + u32 literalAddr; + if (Config::JIT_LiteralOptimisations + && instrs[i].Info.SpecialKind == ARMInstrInfo::special_LoadLiteral + && DecodeLiteral(thumb, instrs[i], literalAddr)) + { + u32 translatedAddr = LocaliseCodeAddress(cpu->Num, literalAddr); + if (!translatedAddr) + { + printf("literal in non executable memory?\n"); + } + u32 translatedAddrRounded = translatedAddr & ~0x1FF; + + u32 j = 0; + for (; j < numAddressRanges; j++) + if (addressRanges[j] == translatedAddrRounded) + break; + if (j == numAddressRanges) + addressRanges[numAddressRanges++] = translatedAddrRounded; + addressMasks[j] |= 1 << ((translatedAddr & 0x1FF) / 16); + JIT_DEBUGPRINT("literal loading %08x %08x %08x %08x\n", literalAddr, translatedAddr, addressMasks[j], addressRanges[j]); + cpu->DataRead32(literalAddr, &literalValues[numLiterals]); + literalLoadAddrs[numLiterals++] = translatedAddr; + } + + if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0 + && instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1) + { + instrs[i - 1].Info.Kind = ARMInstrInfo::tk_BL_LONG; + instrs[i - 1].Instr = (instrs[i - 1].Instr & 0xFFFF) | (instrs[i].Instr << 16); + instrs[i - 1].Info.DstRegs = 0xC000; + instrs[i - 1].Info.SrcRegs = 0; + instrs[i - 1].Info.EndBlock = true; + i--; + } + + if (instrs[i].Info.Branches() && Config::JIT_BranchOptimisations) + { + bool hasBranched = cpu->R[15] != r15; + + bool link; + u32 cond, target, linkAddr; + bool staticBranch = DecodeBranch(thumb, instrs[i], cond, hasLink, lr, link, linkAddr, target); + JIT_DEBUGPRINT("branch cond %x target %x (%d)\n", cond, target, hasBranched); + + if (staticBranch) + { + instrs[i].BranchFlags |= branch_StaticTarget; + + bool isBackJump = false; + if (hasBranched) + { + for (int j = 0; j < i; j++) + { + if (instrs[i].Addr == target) + { + isBackJump = true; + break; + } + } + } + + if (cond < 0xE && target < instrs[i].Addr && target >= lastSegmentStart) + { + // we might have an idle loop + u32 backwardsOffset = (instrs[i].Addr - target) / (thumb ? 2 : 4); + if (IsIdleLoop(&instrs[i - backwardsOffset], backwardsOffset + 1)) + { + instrs[i].BranchFlags |= branch_IdleBranch; + JIT_DEBUGPRINT("found %s idle loop %d in block %x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr); + } + } + else if (hasBranched && !isBackJump && i + 1 < Config::JIT_MaxBlockSize) + { + if (link) + { + lr = linkAddr; + hasLink = true; + } + + r15 = target + (thumb ? 2 : 4); + assert(r15 == cpu->R[15]); + + JIT_DEBUGPRINT("block lengthened by static branch (target %x)\n", target); + + nextInstr[0] = cpu->NextInstr[0]; + nextInstr[1] = cpu->NextInstr[1]; + + nextInstrAddr[0] = target; + nextInstrAddr[1] = r15; + + lastSegmentStart = target; + + instrs[i].Info.EndBlock = false; + + if (cond < 0xE) + instrs[i].BranchFlags |= branch_FollowCondTaken; + } + } + + if (!hasBranched && cond < 0xE && i + 1 < Config::JIT_MaxBlockSize) + { + instrs[i].Info.EndBlock = false; + instrs[i].BranchFlags |= branch_FollowCondNotTaken; + } + } i++; - bool canCompile = JITCompiler->CanCompile(thumb, instrs[i - 1].Info.Kind); - bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken)); - if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond) - FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF); + bool canCompile = JITCompiler->CanCompile(thumb, instrs[i - 1].Info.Kind); + bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken)); + if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond) + FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF); } while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted && (!cpu->IRQ || (cpu->CPSR & 0x80))); - u32 literalHash = (u32)XXH3_64bits(literalValues, numLiterals * 4); - u32 instrHash = (u32)XXH3_64bits(instrValues, i * 4); - - JitBlock* prevBlock = RestoreCandidates.LookUp(instrHash); - bool mayRestore = true; - if (prevBlock) - { - RestoreCandidates.Remove(instrHash); - - mayRestore = prevBlock->StartAddr == blockAddr && prevBlock->LiteralHash == literalHash; - - if (mayRestore && prevBlock->NumAddresses == numAddressRanges) - { - for (int j = 0; j < numAddressRanges; j++) - { - if (prevBlock->AddressRanges()[j] != addressRanges[j] - || prevBlock->AddressMasks()[j] != addressMasks[j]) - { - mayRestore = false; - break; - } - } - } - else - mayRestore = false; - } - else - { - mayRestore = false; - prevBlock = NULL; - } - - JitBlock* block; - if (!mayRestore) - { - if (prevBlock) - delete prevBlock; - - block = new JitBlock(cpu->Num, i, numAddressRanges, numLiterals); - block->LiteralHash = literalHash; - block->InstrHash = instrHash; - for (int j = 0; j < numAddressRanges; j++) - block->AddressRanges()[j] = addressRanges[j]; - for (int j = 0; j < numAddressRanges; j++) - block->AddressMasks()[j] = addressMasks[j]; - for (int j = 0; j < numLiterals; j++) - block->Literals()[j] = literalLoadAddrs[j]; - - block->StartAddr = blockAddr; - block->StartAddrLocal = localAddr; - - FloodFillSetFlags(instrs, i - 1, 0xF); - - block->EntryPoint = JITCompiler->CompileBlock(cpu, thumb, instrs, i); - - JIT_DEBUGPRINT("block start %p\n", block->EntryPoint); - } - else - { - JIT_DEBUGPRINT("restored! %p\n", prevBlock); - block = prevBlock; - } - - assert((localAddr & 1) == 0); - for (int j = 0; j < numAddressRanges; j++) - { - assert(addressRanges[j] == block->AddressRanges()[j]); - assert(addressMasks[j] == block->AddressMasks()[j]); - assert(addressMasks[j] != 0); - - AddressRange* region = CodeMemRegions[addressRanges[j] >> 27]; - - if (!PageContainsCode(®ion[(addressRanges[j] & 0x7FFF000) / 512])) - ARMJIT_Memory::SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true); - - AddressRange* range = ®ion[(addressRanges[j] & 0x7FFFFFF) / 512]; - range->Code |= addressMasks[j]; - range->Blocks.Add(block); - } - - if (cpu->Num == 0) - JitBlocks9[blockAddr] = block; - else - JitBlocks7[blockAddr] = block; - - u64* entry = &FastBlockLookupRegions[(localAddr >> 27)][(localAddr & 0x7FFFFFF) / 2]; - *entry = ((u64)blockAddr | cpu->Num) << 32; - *entry |= JITCompiler->SubEntryOffset(block->EntryPoint); + u32 literalHash = (u32)XXH3_64bits(literalValues, numLiterals * 4); + u32 instrHash = (u32)XXH3_64bits(instrValues, i * 4); + + JitBlock* prevBlock = RestoreCandidates.LookUp(instrHash); + bool mayRestore = true; + if (prevBlock) + { + RestoreCandidates.Remove(instrHash); + + mayRestore = prevBlock->StartAddr == blockAddr && prevBlock->LiteralHash == literalHash; + + if (mayRestore && prevBlock->NumAddresses == numAddressRanges) + { + for (int j = 0; j < numAddressRanges; j++) + { + if (prevBlock->AddressRanges()[j] != addressRanges[j] + || prevBlock->AddressMasks()[j] != addressMasks[j]) + { + mayRestore = false; + break; + } + } + } + else + mayRestore = false; + } + else + { + mayRestore = false; + prevBlock = NULL; + } + + JitBlock* block; + if (!mayRestore) + { + if (prevBlock) + delete prevBlock; + + block = new JitBlock(cpu->Num, i, numAddressRanges, numLiterals); + block->LiteralHash = literalHash; + block->InstrHash = instrHash; + for (int j = 0; j < numAddressRanges; j++) + block->AddressRanges()[j] = addressRanges[j]; + for (int j = 0; j < numAddressRanges; j++) + block->AddressMasks()[j] = addressMasks[j]; + for (int j = 0; j < numLiterals; j++) + block->Literals()[j] = literalLoadAddrs[j]; + + block->StartAddr = blockAddr; + block->StartAddrLocal = localAddr; + + FloodFillSetFlags(instrs, i - 1, 0xF); + + block->EntryPoint = JITCompiler->CompileBlock(cpu, thumb, instrs, i); + + JIT_DEBUGPRINT("block start %p\n", block->EntryPoint); + } + else + { + JIT_DEBUGPRINT("restored! %p\n", prevBlock); + block = prevBlock; + } + + assert((localAddr & 1) == 0); + for (int j = 0; j < numAddressRanges; j++) + { + assert(addressRanges[j] == block->AddressRanges()[j]); + assert(addressMasks[j] == block->AddressMasks()[j]); + assert(addressMasks[j] != 0); + + AddressRange* region = CodeMemRegions[addressRanges[j] >> 27]; + + if (!PageContainsCode(®ion[(addressRanges[j] & 0x7FFF000) / 512])) + ARMJIT_Memory::SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true); + + AddressRange* range = ®ion[(addressRanges[j] & 0x7FFFFFF) / 512]; + range->Code |= addressMasks[j]; + range->Blocks.Add(block); + } + + if (cpu->Num == 0) + JitBlocks9[blockAddr] = block; + else + JitBlocks7[blockAddr] = block; + + u64* entry = &FastBlockLookupRegions[(localAddr >> 27)][(localAddr & 0x7FFFFFF) / 2]; + *entry = ((u64)blockAddr | cpu->Num) << 32; + *entry |= JITCompiler->SubEntryOffset(block->EntryPoint); } void InvalidateByAddr(u32 localAddr) { - JIT_DEBUGPRINT("invalidating by addr %x\n", localAddr); - - AddressRange* region = CodeMemRegions[localAddr >> 27]; - AddressRange* range = ®ion[(localAddr & 0x7FFFFFF) / 512]; - u32 mask = 1 << ((localAddr & 0x1FF) / 16); - - range->Code = 0; - for (int i = 0; i < range->Blocks.Length;) - { - JitBlock* block = range->Blocks[i]; - - bool invalidated = false; - u32 mask = 0; - for (int j = 0; j < block->NumAddresses; j++) - { - if (block->AddressRanges()[j] == (localAddr & ~0x1FF)) - { - mask = block->AddressMasks()[j]; - invalidated = block->AddressMasks()[j] & mask; - assert(mask); - break; - } - } - assert(mask); - if (!invalidated) - { - range->Code |= mask; - i++; - continue; - } - range->Blocks.Remove(i); - - if (range->Blocks.Length == 0 - && !PageContainsCode(®ion[(localAddr & 0x7FFF000) / 512])) - { - ARMJIT_Memory::SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false); - } - - bool literalInvalidation = false; - for (int j = 0; j < block->NumLiterals; j++) - { - u32 addr = block->Literals()[j]; - if (addr == localAddr) - { - if (InvalidLiterals.Find(localAddr) != -1) - { - InvalidLiterals.Add(localAddr); - JIT_DEBUGPRINT("found invalid literal %d\n", InvalidLiterals.Length); - } - literalInvalidation = true; - break; - } - } - for (int j = 0; j < block->NumAddresses; j++) - { - u32 addr = block->AddressRanges()[j]; - if ((addr / 512) != (localAddr / 512)) - { - AddressRange* otherRegion = CodeMemRegions[addr >> 27]; - AddressRange* otherRange = &otherRegion[(addr & 0x7FFFFFF) / 512]; - assert(otherRange != range); - - bool removed = otherRange->Blocks.RemoveByValue(block); - assert(removed); - - if (otherRange->Blocks.Length == 0) - { - if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000) / 512])) - ARMJIT_Memory::SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false); - - otherRange->Code = 0; - } - } - } - - FastBlockLookupRegions[block->StartAddrLocal >> 27][(block->StartAddrLocal & 0x7FFFFFF) / 2] = (u64)UINT32_MAX << 32; - if (block->Num == 0) - JitBlocks9.erase(block->StartAddr); - else - JitBlocks7.erase(block->StartAddr); - - if (!literalInvalidation) - { - JitBlock* prevBlock = RestoreCandidates.Insert(block->InstrHash, block); - if (prevBlock) - delete prevBlock; - } - else - { - delete block; - } - } + JIT_DEBUGPRINT("invalidating by addr %x\n", localAddr); + + AddressRange* region = CodeMemRegions[localAddr >> 27]; + AddressRange* range = ®ion[(localAddr & 0x7FFFFFF) / 512]; + u32 mask = 1 << ((localAddr & 0x1FF) / 16); + + range->Code = 0; + for (int i = 0; i < range->Blocks.Length;) + { + JitBlock* block = range->Blocks[i]; + + bool invalidated = false; + u32 mask = 0; + for (int j = 0; j < block->NumAddresses; j++) + { + if (block->AddressRanges()[j] == (localAddr & ~0x1FF)) + { + mask = block->AddressMasks()[j]; + invalidated = block->AddressMasks()[j] & mask; + assert(mask); + break; + } + } + assert(mask); + if (!invalidated) + { + range->Code |= mask; + i++; + continue; + } + range->Blocks.Remove(i); + + if (range->Blocks.Length == 0 + && !PageContainsCode(®ion[(localAddr & 0x7FFF000) / 512])) + { + ARMJIT_Memory::SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false); + } + + bool literalInvalidation = false; + for (int j = 0; j < block->NumLiterals; j++) + { + u32 addr = block->Literals()[j]; + if (addr == localAddr) + { + if (InvalidLiterals.Find(localAddr) != -1) + { + InvalidLiterals.Add(localAddr); + JIT_DEBUGPRINT("found invalid literal %d\n", InvalidLiterals.Length); + } + literalInvalidation = true; + break; + } + } + for (int j = 0; j < block->NumAddresses; j++) + { + u32 addr = block->AddressRanges()[j]; + if ((addr / 512) != (localAddr / 512)) + { + AddressRange* otherRegion = CodeMemRegions[addr >> 27]; + AddressRange* otherRange = &otherRegion[(addr & 0x7FFFFFF) / 512]; + assert(otherRange != range); + + bool removed = otherRange->Blocks.RemoveByValue(block); + assert(removed); + + if (otherRange->Blocks.Length == 0) + { + if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000) / 512])) + ARMJIT_Memory::SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false); + + otherRange->Code = 0; + } + } + } + + FastBlockLookupRegions[block->StartAddrLocal >> 27][(block->StartAddrLocal & 0x7FFFFFF) / 2] = (u64)UINT32_MAX << 32; + if (block->Num == 0) + JitBlocks9.erase(block->StartAddr); + else + JitBlocks7.erase(block->StartAddr); + + if (!literalInvalidation) + { + JitBlock* prevBlock = RestoreCandidates.Insert(block->InstrHash, block); + if (prevBlock) + delete prevBlock; + } + else + { + delete block; + } + } } void CheckAndInvalidateITCM() { - for (u32 i = 0; i < ITCMPhysicalSize; i+=16) - { - if (CodeIndexITCM[i / 512].Code & (1 << ((i & 0x1FF) / 16))) - { - InvalidateByAddr(i | (ARMJIT_Memory::memregion_ITCM << 27)); - } - } + for (u32 i = 0; i < ITCMPhysicalSize; i+=16) + { + if (CodeIndexITCM[i / 512].Code & (1 << ((i & 0x1FF) / 16))) + { + InvalidateByAddr(i | (ARMJIT_Memory::memregion_ITCM << 27)); + } + } } template <u32 num, int region> void CheckAndInvalidate(u32 addr) { - u32 localAddr = ARMJIT_Memory::LocaliseAddress(region, num, addr); - if (CodeMemRegions[region][(localAddr & 0x7FFFFFF) / 512].Code & (1 << ((localAddr & 0x1FF) / 16))) - InvalidateByAddr(localAddr); + u32 localAddr = ARMJIT_Memory::LocaliseAddress(region, num, addr); + if (CodeMemRegions[region][(localAddr & 0x7FFFFFF) / 512].Code & (1 << ((localAddr & 0x1FF) / 16))) + InvalidateByAddr(localAddr); } JitBlockEntry LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr) { - u64* entry = &entries[offset / 2]; - if (*entry >> 32 == (addr | num)) - return JITCompiler->AddEntryOffset((u32)*entry); - return NULL; + u64* entry = &entries[offset / 2]; + if (*entry >> 32 == (addr | num)) + return JITCompiler->AddEntryOffset((u32)*entry); + return NULL; } void blockSanityCheck(u32 num, u32 blockAddr, JitBlockEntry entry) { - u32 localAddr = LocaliseCodeAddress(num, blockAddr); - assert(JITCompiler->AddEntryOffset((u32)FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]) == entry); + u32 localAddr = LocaliseCodeAddress(num, blockAddr); + assert(JITCompiler->AddEntryOffset((u32)FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]) == entry); } bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size) { - // amazingly ignoring the DTCM is the proper behaviour for code fetches - int region = num == 0 - ? ARMJIT_Memory::ClassifyAddress9(blockAddr) - : ARMJIT_Memory::ClassifyAddress7(blockAddr); - - u32 memoryOffset; - if (FastBlockLookupRegions[region] - && ARMJIT_Memory::GetMirrorLocation(region, num, blockAddr, memoryOffset, start, size)) - { - //printf("setup exec region %d %d %08x %08x %x %x\n", num, region, blockAddr, start, size, memoryOffset); - entry = FastBlockLookupRegions[region] + memoryOffset / 2; - return true; - } - return false; + // amazingly ignoring the DTCM is the proper behaviour for code fetches + int region = num == 0 + ? ARMJIT_Memory::ClassifyAddress9(blockAddr) + : ARMJIT_Memory::ClassifyAddress7(blockAddr); + + u32 memoryOffset; + if (FastBlockLookupRegions[region] + && ARMJIT_Memory::GetMirrorLocation(region, num, blockAddr, memoryOffset, start, size)) + { + //printf("setup exec region %d %d %08x %08x %x %x\n", num, region, blockAddr, start, size, memoryOffset); + entry = FastBlockLookupRegions[region] + memoryOffset / 2; + return true; + } + return false; } template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(u32); @@ -1155,52 +1155,52 @@ template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(u3 void ResetBlockCache() { - printf("Resetting JIT block cache...\n"); - - InvalidLiterals.Clear(); - for (int i = 0; i < ARMJIT_Memory::memregions_Count; i++) - memset(FastBlockLookupRegions[i], 0xFF, CodeRegionSizes[i] * sizeof(u64) / 2); - RestoreCandidates.Reset(); - for (int i = 0; i < sizeof(RestoreCandidates.Table)/sizeof(RestoreCandidates.Table[0]); i++) - { - if (RestoreCandidates.Table[i].ValA) - { - delete RestoreCandidates.Table[i].ValA; - RestoreCandidates.Table[i].ValA = NULL; - } - if (RestoreCandidates.Table[i].ValA) - { - delete RestoreCandidates.Table[i].ValB; - RestoreCandidates.Table[i].ValB = NULL; - } - } - for (auto it : JitBlocks9) - { - JitBlock* block = it.second; - for (int j = 0; j < block->NumAddresses; j++) - { - u32 addr = block->AddressRanges()[j]; - AddressRange* range = &CodeMemRegions[addr >> 27][(addr & 0x7FFFFFF) / 512]; - range->Blocks.Clear(); - range->Code = 0; - } - delete block; - } - for (auto it : JitBlocks7) - { - JitBlock* block = it.second; - for (int j = 0; j < block->NumAddresses; j++) - { - u32 addr = block->AddressRanges()[j]; - AddressRange* range = &CodeMemRegions[addr >> 27][(addr & 0x7FFFFFF) / 512]; - range->Blocks.Clear(); - range->Code = 0; - } - } - JitBlocks9.clear(); - JitBlocks7.clear(); - - JITCompiler->Reset(); + printf("Resetting JIT block cache...\n"); + + InvalidLiterals.Clear(); + for (int i = 0; i < ARMJIT_Memory::memregions_Count; i++) + memset(FastBlockLookupRegions[i], 0xFF, CodeRegionSizes[i] * sizeof(u64) / 2); + RestoreCandidates.Reset(); + for (int i = 0; i < sizeof(RestoreCandidates.Table)/sizeof(RestoreCandidates.Table[0]); i++) + { + if (RestoreCandidates.Table[i].ValA) + { + delete RestoreCandidates.Table[i].ValA; + RestoreCandidates.Table[i].ValA = NULL; + } + if (RestoreCandidates.Table[i].ValA) + { + delete RestoreCandidates.Table[i].ValB; + RestoreCandidates.Table[i].ValB = NULL; + } + } + for (auto it : JitBlocks9) + { + JitBlock* block = it.second; + for (int j = 0; j < block->NumAddresses; j++) + { + u32 addr = block->AddressRanges()[j]; + AddressRange* range = &CodeMemRegions[addr >> 27][(addr & 0x7FFFFFF) / 512]; + range->Blocks.Clear(); + range->Code = 0; + } + delete block; + } + for (auto it : JitBlocks7) + { + JitBlock* block = it.second; + for (int j = 0; j < block->NumAddresses; j++) + { + u32 addr = block->AddressRanges()[j]; + AddressRange* range = &CodeMemRegions[addr >> 27][(addr & 0x7FFFFFF) / 512]; + range->Blocks.Clear(); + range->Code = 0; + } + } + JitBlocks9.clear(); + JitBlocks7.clear(); + + JITCompiler->Reset(); } } diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h index c87e1b3..bb6621f 100644 --- a/src/ARMJIT_Internal.h +++ b/src/ARMJIT_Internal.h @@ -16,10 +16,10 @@ namespace ARMJIT enum { - branch_IdleBranch = 1 << 0, - branch_FollowCondTaken = 1 << 1, - branch_FollowCondNotTaken = 1 << 2, - branch_StaticTarget = 1 << 3, + branch_IdleBranch = 1 << 0, + branch_FollowCondTaken = 1 << 1, + branch_FollowCondNotTaken = 1 << 2, + branch_StaticTarget = 1 << 3, }; struct FetchedInstr @@ -39,155 +39,155 @@ struct FetchedInstr return Instr >> 28; } - u8 BranchFlags; - u8 SetFlags; + u8 BranchFlags; + u8 SetFlags; u32 Instr; - u32 Addr; + u32 Addr; - u8 DataCycles; + u8 DataCycles; u16 CodeCycles; - u32 DataRegion; + u32 DataRegion; ARMInstrInfo::Info Info; }; /* - TinyVector - - because reinventing the wheel is the best! - - - meant to be used very often, with not so many elements - max 1 << 16 elements - - doesn't allocate while no elements are inserted - - not stl confirmant of course - - probably only works with POD types - - remove operations don't preserve order, but O(1)! + TinyVector + - because reinventing the wheel is the best! + + - meant to be used very often, with not so many elements + max 1 << 16 elements + - doesn't allocate while no elements are inserted + - not stl confirmant of course + - probably only works with POD types + - remove operations don't preserve order, but O(1)! */ template <typename T> struct __attribute__((packed)) TinyVector { - T* Data = NULL; - u16 Capacity = 0; - u16 Length = 0; - - ~TinyVector() - { - delete[] Data; - } - - void MakeCapacity(u32 capacity) - { - assert(capacity <= UINT16_MAX); - assert(capacity > Capacity); - T* newMem = new T[capacity]; - if (Data != NULL) - memcpy(newMem, Data, sizeof(T) * Length); - - T* oldData = Data; - Data = newMem; - if (oldData != NULL) - delete[] oldData; - - Capacity = capacity; - } - - void SetLength(u16 length) - { - if (Capacity < length) - MakeCapacity(length); - - Length = length; - } - - void Clear() - { - Length = 0; - } - - void Add(T element) - { - assert(Length + 1 <= UINT16_MAX); - if (Length + 1 > Capacity) - MakeCapacity(((Capacity + 4) * 3) / 2); - - Data[Length++] = element; - } - - void Remove(int index) - { - assert(index >= 0 && index < Length); - - Length--; - Data[index] = Data[Length]; - /*for (int i = index; i < Length; i++) - Data[i] = Data[i + 1];*/ - } - - int Find(T needle) - { - for (int i = 0; i < Length; i++) - { - if (Data[i] == needle) - return i; - } - return -1; - } - - bool RemoveByValue(T needle) - { - for (int i = 0; i < Length; i++) - { - if (Data[i] == needle) - { - Remove(i); - return true; - } - } - return false; - } - - T& operator[](int index) - { - assert(index >= 0 && index < Length); - return Data[index]; - } + T* Data = NULL; + u16 Capacity = 0; + u16 Length = 0; + + ~TinyVector() + { + delete[] Data; + } + + void MakeCapacity(u32 capacity) + { + assert(capacity <= UINT16_MAX); + assert(capacity > Capacity); + T* newMem = new T[capacity]; + if (Data != NULL) + memcpy(newMem, Data, sizeof(T) * Length); + + T* oldData = Data; + Data = newMem; + if (oldData != NULL) + delete[] oldData; + + Capacity = capacity; + } + + void SetLength(u16 length) + { + if (Capacity < length) + MakeCapacity(length); + + Length = length; + } + + void Clear() + { + Length = 0; + } + + void Add(T element) + { + assert(Length + 1 <= UINT16_MAX); + if (Length + 1 > Capacity) + MakeCapacity(((Capacity + 4) * 3) / 2); + + Data[Length++] = element; + } + + void Remove(int index) + { + assert(index >= 0 && index < Length); + + Length--; + Data[index] = Data[Length]; + /*for (int i = index; i < Length; i++) + Data[i] = Data[i + 1];*/ + } + + int Find(T needle) + { + for (int i = 0; i < Length; i++) + { + if (Data[i] == needle) + return i; + } + return -1; + } + + bool RemoveByValue(T needle) + { + for (int i = 0; i < Length; i++) + { + if (Data[i] == needle) + { + Remove(i); + return true; + } + } + return false; + } + + T& operator[](int index) + { + assert(index >= 0 && index < Length); + return Data[index]; + } }; class JitBlock { public: - JitBlock(u32 num, u32 literalHash, u32 numAddresses, u32 numLiterals) - { - Num = num; - NumAddresses = numAddresses; - NumLiterals = numLiterals; - Data.SetLength(numAddresses * 2 + numLiterals); - } - - u32 StartAddr; - u32 StartAddrLocal; - u32 InstrHash, LiteralHash; - u8 Num; - u16 NumAddresses; - u16 NumLiterals; - - JitBlockEntry EntryPoint; - - u32* AddressRanges() - { return &Data[0]; } - u32* AddressMasks() - { return &Data[NumAddresses]; } - u32* Literals() - { return &Data[NumAddresses * 2]; } + JitBlock(u32 num, u32 literalHash, u32 numAddresses, u32 numLiterals) + { + Num = num; + NumAddresses = numAddresses; + NumLiterals = numLiterals; + Data.SetLength(numAddresses * 2 + numLiterals); + } + + u32 StartAddr; + u32 StartAddrLocal; + u32 InstrHash, LiteralHash; + u8 Num; + u16 NumAddresses; + u16 NumLiterals; + + JitBlockEntry EntryPoint; + + u32* AddressRanges() + { return &Data[0]; } + u32* AddressMasks() + { return &Data[NumAddresses]; } + u32* Literals() + { return &Data[NumAddresses * 2]; } private: - TinyVector<u32> Data; + TinyVector<u32> Data; }; // size should be 16 bytes because I'm to lazy to use mul and whatnot struct __attribute__((packed)) AddressRange { - TinyVector<JitBlock*> Blocks; - u32 Code; + TinyVector<JitBlock*> Blocks; + u32 Code; }; @@ -201,12 +201,12 @@ extern AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count]; inline bool PageContainsCode(AddressRange* range) { - for (int i = 0; i < 8; i++) - { - if (range[i].Blocks.Length > 0) - return true; - } - return false; + for (int i = 0; i < 8; i++) + { + if (range[i].Blocks.Length > 0) + return true; + } + return false; } u32 LocaliseCodeAddress(u32 num, u32 addr); diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index ec83905..2364023 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -25,22 +25,22 @@ #include <malloc.h> /* - We're handling fastmem here. + We're handling fastmem here. - Basically we're repurposing a big piece of virtual memory - and map the memory regions as they're structured on the DS - in it. + Basically we're repurposing a big piece of virtual memory + and map the memory regions as they're structured on the DS + in it. - On most systems you have a single piece of main ram, - maybe some video ram and faster cache RAM and that's about it. - Here we have not only a lot more different memory regions, - but also two address spaces. Not only that but they all have - mirrors (the worst case is 16kb SWRAM which is mirrored 1024x). + On most systems you have a single piece of main ram, + maybe some video ram and faster cache RAM and that's about it. + Here we have not only a lot more different memory regions, + but also two address spaces. Not only that but they all have + mirrors (the worst case is 16kb SWRAM which is mirrored 1024x). - We handle this by only mapping those regions which are actually - used and by praying the games don't go wild. + We handle this by only mapping those regions which are actually + used and by praying the games don't go wild. - Beware, this file is full of platform specific code. + Beware, this file is full of platform specific code. */ @@ -48,8 +48,8 @@ namespace ARMJIT_Memory { struct FaultDescription { - u32 EmulatedFaultAddr; - u64 FaultPC; + u32 EmulatedFaultAddr; + u64 FaultPC; }; bool FaultHandler(FaultDescription* faultDesc, s32& offset); @@ -61,7 +61,7 @@ bool FaultHandler(FaultDescription* faultDesc, s32& offset); extern "C" { - + void ARM_RestoreContext(u64* registers) __attribute__((noreturn)); extern char __start__; @@ -72,35 +72,35 @@ u64 __nx_exception_stack_size = 0x8000; void __libnx_exception_handler(ThreadExceptionDump* ctx) { - ARMJIT_Memory::FaultDescription desc; - u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); - desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea; - desc.FaultPC = ctx->pc.x; - - u64 integerRegisters[33]; - memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29); - integerRegisters[29] = ctx->fp.x; - integerRegisters[30] = ctx->lr.x; - integerRegisters[31] = ctx->sp.x; - integerRegisters[32] = ctx->pc.x; - - s32 offset; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) - { - integerRegisters[32] += offset; - - ARM_RestoreContext(integerRegisters); - } - - if (ctx->pc.x >= (u64)&__start__ && ctx->pc.x < (u64)&__rodata_start) - { - printf("unintentional fault in .text at 0x%x (type %d) (trying to access 0x%x?)\n", - ctx->pc.x - (u64)&__start__, ctx->error_desc, ctx->far.x); - } - else - { - printf("unintentional fault somewhere in deep (address) space at %x (type %d)\n", ctx->pc.x, ctx->error_desc); - } + ARMJIT_Memory::FaultDescription desc; + u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea; + desc.FaultPC = ctx->pc.x; + + u64 integerRegisters[33]; + memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29); + integerRegisters[29] = ctx->fp.x; + integerRegisters[30] = ctx->lr.x; + integerRegisters[31] = ctx->sp.x; + integerRegisters[32] = ctx->pc.x; + + s32 offset; + if (ARMJIT_Memory::FaultHandler(&desc, offset)) + { + integerRegisters[32] += offset; + + ARM_RestoreContext(integerRegisters); + } + + if (ctx->pc.x >= (u64)&__start__ && ctx->pc.x < (u64)&__rodata_start) + { + printf("unintentional fault in .text at 0x%x (type %d) (trying to access 0x%x?)\n", + ctx->pc.x - (u64)&__start__, ctx->error_desc, ctx->far.x); + } + else + { + printf("unintentional fault somewhere in deep (address) space at %x (type %d)\n", ctx->pc.x, ctx->error_desc); + } } } @@ -109,25 +109,25 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx) static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) { - if (exceptionInfo->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) - { - printf("narg\n"); - return EXCEPTION_CONTINUE_SEARCH; - } - - ARMJIT_Memory::FaultDescription desc; - u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); - desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea; - desc.FaultPC = exceptionInfo->ContextRecord->Rip; - - s32 offset = 0; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) - { - exceptionInfo->ContextRecord->Rip += offset; - return EXCEPTION_CONTINUE_EXECUTION; - } - - return EXCEPTION_CONTINUE_SEARCH; + if (exceptionInfo->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) + { + printf("narg\n"); + return EXCEPTION_CONTINUE_SEARCH; + } + + ARMJIT_Memory::FaultDescription desc; + u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea; + desc.FaultPC = exceptionInfo->ContextRecord->Rip; + + s32 offset = 0; + if (ARMJIT_Memory::FaultHandler(&desc, offset)) + { + exceptionInfo->ContextRecord->Rip += offset; + return EXCEPTION_CONTINUE_EXECUTION; + } + + return EXCEPTION_CONTINUE_SEARCH; } #else @@ -137,28 +137,28 @@ struct sigaction OldSa; static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) { - ucontext_t* context = (ucontext_t*)rawContext; - - ARMJIT_Memory::FaultDescription desc; - u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + ucontext_t* context = (ucontext_t*)rawContext; + + ARMJIT_Memory::FaultDescription desc; + u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); #ifdef __x86_64__ - desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; - desc.FaultPC = context->uc_mcontext.gregs[REG_RIP]; + desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; + desc.FaultPC = context->uc_mcontext.gregs[REG_RIP]; #else - desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea; - desc.FaultPC = context->uc_mcontext.pc; + desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea; + desc.FaultPC = context->uc_mcontext.pc; #endif - s32 offset = 0; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) - { + s32 offset = 0; + if (ARMJIT_Memory::FaultHandler(&desc, offset)) + { #ifdef __x86_64__ - context->uc_mcontext.gregs[REG_RIP] += offset; + context->uc_mcontext.gregs[REG_RIP] += offset; #else - context->uc_mcontext.pc += offset; + context->uc_mcontext.pc += offset; #endif - return; - } + return; + } if (OldSa.sa_flags & SA_SIGINFO) { @@ -188,12 +188,12 @@ void* FastMem9Start, *FastMem7Start; #ifdef _WIN32 inline u32 RoundUp(u32 size) { - return (size + 0xFFFF) & ~0xFFFF; + return (size + 0xFFFF) & ~0xFFFF; } #else inline u32 RoundUp(u32 size) { - return size; + return size; } #endif @@ -208,32 +208,32 @@ const u32 MemoryTotalSize = MemBlockNWRAM_COffset + RoundUp(DSi::NWRAMSize); const u32 OffsetsPerRegion[memregions_Count] = { - UINT32_MAX, - UINT32_MAX, - MemBlockDTCMOffset, - UINT32_MAX, - MemBlockMainRAMOffset, - MemBlockSWRAMOffset, - UINT32_MAX, - UINT32_MAX, - UINT32_MAX, - MemBlockARM7WRAMOffset, - UINT32_MAX, - UINT32_MAX, - UINT32_MAX, - UINT32_MAX, - UINT32_MAX, - MemBlockNWRAM_AOffset, - MemBlockNWRAM_BOffset, - MemBlockNWRAM_COffset + UINT32_MAX, + UINT32_MAX, + MemBlockDTCMOffset, + UINT32_MAX, + MemBlockMainRAMOffset, + MemBlockSWRAMOffset, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + MemBlockARM7WRAMOffset, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + MemBlockNWRAM_AOffset, + MemBlockNWRAM_BOffset, + MemBlockNWRAM_COffset }; enum { - memstate_Unmapped, - memstate_MappedRW, - // on switch this is unmapped as well - memstate_MappedProtected, + memstate_Unmapped, + memstate_MappedRW, + // on switch this is unmapped as well + memstate_MappedProtected, }; u8 MappingStatus9[1 << (32-12)]; @@ -253,925 +253,925 @@ int MemoryFile; bool MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) { - u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; + u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; #ifdef __SWITCH__ - Result r = (svcMapProcessMemory(dst, envGetOwnProcessHandle(), - (u64)(MemoryBaseCodeMem + offset), size)); - return R_SUCCEEDED(r); + Result r = (svcMapProcessMemory(dst, envGetOwnProcessHandle(), + (u64)(MemoryBaseCodeMem + offset), size)); + return R_SUCCEEDED(r); #elif defined(_WIN32) - bool r = MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, offset, size, dst) == dst; - return r; + bool r = MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, offset, size, dst) == dst; + return r; #else - return mmap(dst, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, offset) != MAP_FAILED; + return mmap(dst, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, offset) != MAP_FAILED; #endif } bool UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) { - u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; + u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; #ifdef __SWITCH__ - Result r = svcUnmapProcessMemory(dst, envGetOwnProcessHandle(), - (u64)(MemoryBaseCodeMem + offset), size); - return R_SUCCEEDED(r); + Result r = svcUnmapProcessMemory(dst, envGetOwnProcessHandle(), + (u64)(MemoryBaseCodeMem + offset), size); + return R_SUCCEEDED(r); #elif defined(_WIN32) - return UnmapViewOfFile(dst); + return UnmapViewOfFile(dst); #else - return munmap(dst, size) == 0; + return munmap(dst, size) == 0; #endif } void SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) { - u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; + u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; #if defined(_WIN32) - DWORD winProtection, oldProtection; - if (protection == 0) - winProtection = PAGE_NOACCESS; - else if (protection == 1) - winProtection = PAGE_READONLY; - else - winProtection = PAGE_READWRITE; - bool success = VirtualProtect(dst, size, winProtection, &oldProtection); - assert(success); + DWORD winProtection, oldProtection; + if (protection == 0) + winProtection = PAGE_NOACCESS; + else if (protection == 1) + winProtection = PAGE_READONLY; + else + winProtection = PAGE_READWRITE; + bool success = VirtualProtect(dst, size, winProtection, &oldProtection); + assert(success); #else - int posixProt; - if (protection == 0) - posixProt = PROT_NONE; - else if (protection == 1) - posixProt = PROT_READ; - else - posixProt = PROT_READ | PROT_WRITE; - mprotect(dst, size, posixProt); + int posixProt; + if (protection == 0) + posixProt = PROT_NONE; + else if (protection == 1) + posixProt = PROT_READ; + else + posixProt = PROT_READ | PROT_WRITE; + mprotect(dst, size, posixProt); #endif } struct Mapping { - u32 Addr; - u32 Size, LocalOffset; - u32 Num; - - void Unmap(int region) - { - bool skipDTCM = Num == 0 && region != memregion_DTCM; - u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7; - u32 offset = 0; - while (offset < Size) - { - if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase) - { - offset += NDS::ARM9->DTCMSize; - } - else - { - u32 segmentOffset = offset; - u8 status = statuses[(Addr + offset) >> 12]; - while (statuses[(Addr + offset) >> 12] == status - && offset < Size - && (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase)) - { - assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped); - statuses[(Addr + offset) >> 12] = memstate_Unmapped; - offset += 0x1000; - } + u32 Addr; + u32 Size, LocalOffset; + u32 Num; + + void Unmap(int region) + { + bool skipDTCM = Num == 0 && region != memregion_DTCM; + u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7; + u32 offset = 0; + while (offset < Size) + { + if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase) + { + offset += NDS::ARM9->DTCMSize; + } + else + { + u32 segmentOffset = offset; + u8 status = statuses[(Addr + offset) >> 12]; + while (statuses[(Addr + offset) >> 12] == status + && offset < Size + && (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase)) + { + assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped); + statuses[(Addr + offset) >> 12] = memstate_Unmapped; + offset += 0x1000; + } #ifdef __SWITCH__ - if (status == memstate_MappedRW) - { - u32 segmentSize = offset - segmentOffset; - printf("unmapping %x %x %x %x\n", Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); - bool success = UnmapFromRange(Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); - assert(success); - } + if (status == memstate_MappedRW) + { + u32 segmentSize = offset - segmentOffset; + printf("unmapping %x %x %x %x\n", Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); + bool success = UnmapFromRange(Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); + assert(success); + } #endif - } - } + } + } #ifndef __SWITCH__ - bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); - assert(succeded); + bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); + assert(succeded); #endif - } + } }; ARMJIT::TinyVector<Mapping> Mappings[memregions_Count]; void SetCodeProtection(int region, u32 offset, bool protect) { - offset &= ~0xFFF; - printf("set code protection %d %x %d\n", region, offset, protect); + offset &= ~0xFFF; + printf("set code protection %d %x %d\n", region, offset, protect); - for (int i = 0; i < Mappings[region].Length; i++) - { - Mapping& mapping = Mappings[region][i]; + for (int i = 0; i < Mappings[region].Length; i++) + { + Mapping& mapping = Mappings[region][i]; - if (offset < mapping.LocalOffset || offset >= mapping.LocalOffset + mapping.Size) - continue; + if (offset < mapping.LocalOffset || offset >= mapping.LocalOffset + mapping.Size) + continue; - u32 effectiveAddr = mapping.Addr + (offset - mapping.LocalOffset); - if (mapping.Num == 0 - && region != memregion_DTCM - && effectiveAddr >= NDS::ARM9->DTCMBase - && effectiveAddr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize)) - continue; + u32 effectiveAddr = mapping.Addr + (offset - mapping.LocalOffset); + if (mapping.Num == 0 + && region != memregion_DTCM + && effectiveAddr >= NDS::ARM9->DTCMBase + && effectiveAddr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize)) + continue; - u8* states = (u8*)(mapping.Num == 0 ? MappingStatus9 : MappingStatus7); + u8* states = (u8*)(mapping.Num == 0 ? MappingStatus9 : MappingStatus7); - printf("%x %d %x %x %x %d\n", effectiveAddr, mapping.Num, mapping.Addr, mapping.LocalOffset, mapping.Size, states[effectiveAddr >> 12]); - assert(states[effectiveAddr >> 12] == (protect ? memstate_MappedRW : memstate_MappedProtected)); - states[effectiveAddr >> 12] = protect ? memstate_MappedProtected : memstate_MappedRW; + printf("%x %d %x %x %x %d\n", effectiveAddr, mapping.Num, mapping.Addr, mapping.LocalOffset, mapping.Size, states[effectiveAddr >> 12]); + assert(states[effectiveAddr >> 12] == (protect ? memstate_MappedRW : memstate_MappedProtected)); + states[effectiveAddr >> 12] = protect ? memstate_MappedProtected : memstate_MappedRW; #if defined(__SWITCH__) - bool success; - if (protect) - success = UnmapFromRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000); - else - success = MapIntoRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000); - assert(success); + bool success; + if (protect) + success = UnmapFromRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000); + else + success = MapIntoRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000); + assert(success); #else - SetCodeProtectionRange(effectiveAddr, 0x1000, mapping.Num, protect ? 1 : 2); + SetCodeProtectionRange(effectiveAddr, 0x1000, mapping.Num, protect ? 1 : 2); #endif - } + } } void RemapDTCM(u32 newBase, u32 newSize) { - // this first part could be made more efficient - // by unmapping DTCM first and then map the holes - u32 oldDTCMBase = NDS::ARM9->DTCMBase; - u32 oldDTCBEnd = oldDTCMBase + NDS::ARM9->DTCMSize; - - u32 newEnd = newBase + newSize; - - printf("remapping DTCM %x %x %x %x\n", newBase, newEnd, oldDTCMBase, oldDTCBEnd); - // unmap all regions containing the old or the current DTCM mapping - for (int region = 0; region < memregions_Count; region++) - { - if (region == memregion_DTCM) - continue; - - for (int i = 0; i < Mappings[region].Length;) - { - Mapping& mapping = Mappings[region][i]; - - u32 start = mapping.Addr; - u32 end = mapping.Addr + mapping.Size; - - printf("mapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset); - - bool oldOverlap = NDS::ARM9->DTCMSize > 0 && !(oldDTCMBase >= end || oldDTCBEnd <= start); - bool newOverlap = newSize > 0 && !(newBase >= end || newEnd <= start); - - if (mapping.Num == 0 && (oldOverlap || newOverlap)) - { - mapping.Unmap(region); - Mappings[region].Remove(i); - } - else - { - i++; - } - } - } - - for (int i = 0; i < Mappings[memregion_DTCM].Length; i++) - { - Mappings[memregion_DTCM][i].Unmap(memregion_DTCM); - } - Mappings[memregion_DTCM].Clear(); + // this first part could be made more efficient + // by unmapping DTCM first and then map the holes + u32 oldDTCMBase = NDS::ARM9->DTCMBase; + u32 oldDTCBEnd = oldDTCMBase + NDS::ARM9->DTCMSize; + + u32 newEnd = newBase + newSize; + + printf("remapping DTCM %x %x %x %x\n", newBase, newEnd, oldDTCMBase, oldDTCBEnd); + // unmap all regions containing the old or the current DTCM mapping + for (int region = 0; region < memregions_Count; region++) + { + if (region == memregion_DTCM) + continue; + + for (int i = 0; i < Mappings[region].Length;) + { + Mapping& mapping = Mappings[region][i]; + + u32 start = mapping.Addr; + u32 end = mapping.Addr + mapping.Size; + + printf("mapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset); + + bool oldOverlap = NDS::ARM9->DTCMSize > 0 && !(oldDTCMBase >= end || oldDTCBEnd <= start); + bool newOverlap = newSize > 0 && !(newBase >= end || newEnd <= start); + + if (mapping.Num == 0 && (oldOverlap || newOverlap)) + { + mapping.Unmap(region); + Mappings[region].Remove(i); + } + else + { + i++; + } + } + } + + for (int i = 0; i < Mappings[memregion_DTCM].Length; i++) + { + Mappings[memregion_DTCM][i].Unmap(memregion_DTCM); + } + Mappings[memregion_DTCM].Clear(); } void RemapNWRAM(int num) { - for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;) - { - Mapping& mapping = Mappings[memregion_SharedWRAM][i]; - if (!(DSi::NWRAMStart[mapping.Num][num] >= mapping.Addr + mapping.Size - || DSi::NWRAMEnd[mapping.Num][num] < mapping.Addr)) - { - mapping.Unmap(memregion_SharedWRAM); - Mappings[memregion_SharedWRAM].Remove(i); - } - else - { - i++; - } - } - for (int i = 0; i < Mappings[memregion_NewSharedWRAM_A + num].Length; i++) - { - Mappings[memregion_NewSharedWRAM_A + num][i].Unmap(memregion_NewSharedWRAM_A + num); - } - Mappings[memregion_NewSharedWRAM_A + num].Clear(); + for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;) + { + Mapping& mapping = Mappings[memregion_SharedWRAM][i]; + if (!(DSi::NWRAMStart[mapping.Num][num] >= mapping.Addr + mapping.Size + || DSi::NWRAMEnd[mapping.Num][num] < mapping.Addr)) + { + mapping.Unmap(memregion_SharedWRAM); + Mappings[memregion_SharedWRAM].Remove(i); + } + else + { + i++; + } + } + for (int i = 0; i < Mappings[memregion_NewSharedWRAM_A + num].Length; i++) + { + Mappings[memregion_NewSharedWRAM_A + num][i].Unmap(memregion_NewSharedWRAM_A + num); + } + Mappings[memregion_NewSharedWRAM_A + num].Clear(); } void RemapSWRAM() { - printf("remapping SWRAM\n"); - for (int i = 0; i < Mappings[memregion_WRAM7].Length;) - { - Mapping& mapping = Mappings[memregion_WRAM7][i]; - if (mapping.Addr + mapping.Size < 0x03800000) - { - mapping.Unmap(memregion_WRAM7); - Mappings[memregion_WRAM7].Remove(i); - } - else - i++; - } - for (int i = 0; i < Mappings[memregion_SharedWRAM].Length; i++) - { - Mappings[memregion_SharedWRAM][i].Unmap(memregion_SharedWRAM); - } - Mappings[memregion_SharedWRAM].Clear(); + printf("remapping SWRAM\n"); + for (int i = 0; i < Mappings[memregion_WRAM7].Length;) + { + Mapping& mapping = Mappings[memregion_WRAM7][i]; + if (mapping.Addr + mapping.Size < 0x03800000) + { + mapping.Unmap(memregion_WRAM7); + Mappings[memregion_WRAM7].Remove(i); + } + else + i++; + } + for (int i = 0; i < Mappings[memregion_SharedWRAM].Length; i++) + { + Mappings[memregion_SharedWRAM][i].Unmap(memregion_SharedWRAM); + } + Mappings[memregion_SharedWRAM].Clear(); } bool MapAtAddress(u32 addr) { - u32 num = NDS::CurCPU; + u32 num = NDS::CurCPU; - int region = num == 0 - ? ClassifyAddress9(addr) - : ClassifyAddress7(addr); + int region = num == 0 + ? ClassifyAddress9(addr) + : ClassifyAddress7(addr); - if (!IsFastmemCompatible(region)) - return false; + if (!IsFastmemCompatible(region)) + return false; - u32 mirrorStart, mirrorSize, memoryOffset; - bool isMapped = GetMirrorLocation(region, num, addr, memoryOffset, mirrorStart, mirrorSize); - if (!isMapped) - return false; + u32 mirrorStart, mirrorSize, memoryOffset; + bool isMapped = GetMirrorLocation(region, num, addr, memoryOffset, mirrorStart, mirrorSize); + if (!isMapped) + return false; - u8* states = num == 0 ? MappingStatus9 : MappingStatus7; - printf("trying to create mapping %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num); - bool isExecutable = ARMJIT::CodeMemRegions[region]; + u8* states = num == 0 ? MappingStatus9 : MappingStatus7; + printf("trying to create mapping %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num); + bool isExecutable = ARMJIT::CodeMemRegions[region]; #ifndef __SWITCH__ - bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); - assert(succeded); + bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); + assert(succeded); #endif - ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512; - - // this overcomplicated piece of code basically just finds whole pieces of code memory - // which can be mapped - u32 offset = 0; - bool skipDTCM = num == 0 && region != memregion_DTCM; - while (offset < mirrorSize) - { - if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase) - { - SetCodeProtectionRange(NDS::ARM9->DTCMBase, NDS::ARM9->DTCMSize, 0, 0); - offset += NDS::ARM9->DTCMSize; - } - else - { - u32 sectionOffset = offset; - bool hasCode = isExecutable && ARMJIT::PageContainsCode(&range[offset / 512]); - while ((!isExecutable || ARMJIT::PageContainsCode(&range[offset / 512]) == hasCode) - && offset < mirrorSize - && (!skipDTCM || mirrorStart + offset != NDS::ARM9->DTCMBase)) - { - assert(states[(mirrorStart + offset) >> 12] == memstate_Unmapped); - states[(mirrorStart + offset) >> 12] = hasCode ? memstate_MappedProtected : memstate_MappedRW; - offset += 0x1000; - } - - u32 sectionSize = offset - sectionOffset; + ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512; + + // this overcomplicated piece of code basically just finds whole pieces of code memory + // which can be mapped + u32 offset = 0; + bool skipDTCM = num == 0 && region != memregion_DTCM; + while (offset < mirrorSize) + { + if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase) + { + SetCodeProtectionRange(NDS::ARM9->DTCMBase, NDS::ARM9->DTCMSize, 0, 0); + offset += NDS::ARM9->DTCMSize; + } + else + { + u32 sectionOffset = offset; + bool hasCode = isExecutable && ARMJIT::PageContainsCode(&range[offset / 512]); + while ((!isExecutable || ARMJIT::PageContainsCode(&range[offset / 512]) == hasCode) + && offset < mirrorSize + && (!skipDTCM || mirrorStart + offset != NDS::ARM9->DTCMBase)) + { + assert(states[(mirrorStart + offset) >> 12] == memstate_Unmapped); + states[(mirrorStart + offset) >> 12] = hasCode ? memstate_MappedProtected : memstate_MappedRW; + offset += 0x1000; + } + + u32 sectionSize = offset - sectionOffset; #if defined(__SWITCH__) - if (!hasCode) - { - printf("trying to map %x (size: %x) from %x\n", mirrorStart + sectionOffset, sectionSize, sectionOffset + memoryOffset + OffsetsPerRegion[region]); - bool succeded = MapIntoRange(mirrorStart + sectionOffset, num, sectionOffset + memoryOffset + OffsetsPerRegion[region], sectionSize); - assert(succeded); - } + if (!hasCode) + { + printf("trying to map %x (size: %x) from %x\n", mirrorStart + sectionOffset, sectionSize, sectionOffset + memoryOffset + OffsetsPerRegion[region]); + bool succeded = MapIntoRange(mirrorStart + sectionOffset, num, sectionOffset + memoryOffset + OffsetsPerRegion[region], sectionSize); + assert(succeded); + } #else - if (hasCode) - { - SetCodeProtectionRange(mirrorStart + sectionOffset, sectionSize, num, 1); - } + if (hasCode) + { + SetCodeProtectionRange(mirrorStart + sectionOffset, sectionSize, num, 1); + } #endif - } - } + } + } - assert(num == 0 || num == 1); - Mapping mapping{mirrorStart, mirrorSize, memoryOffset, num}; - Mappings[region].Add(mapping); + assert(num == 0 || num == 1); + Mapping mapping{mirrorStart, mirrorSize, memoryOffset, num}; + Mappings[region].Add(mapping); - printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1); + printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1); - return true; + return true; } bool FaultHandler(FaultDescription* faultDesc, s32& offset) { - if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC)) - { - bool rewriteToSlowPath = true; - - u32 addr = faultDesc->EmulatedFaultAddr; - - if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped) - rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr); - - if (rewriteToSlowPath) - { - offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC); - } - return true; - } - return false; + if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC)) + { + bool rewriteToSlowPath = true; + + u32 addr = faultDesc->EmulatedFaultAddr; + + if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped) + rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr); + + if (rewriteToSlowPath) + { + offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC); + } + return true; + } + return false; } void Init() { - const u64 AddrSpaceSize = 0x100000000; + const u64 AddrSpaceSize = 0x100000000; #if defined(__SWITCH__) MemoryBase = (u8*)memalign(0x1000, MemoryTotalSize); - MemoryBaseCodeMem = (u8*)virtmemReserve(MemoryTotalSize); + MemoryBaseCodeMem = (u8*)virtmemReserve(MemoryTotalSize); bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize)); assert(succeded); - succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, + succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, MemoryTotalSize, Perm_Rw)); - assert(succeded); + assert(succeded); - // 8 GB of address space, just don't ask... - FastMem9Start = virtmemReserve(AddrSpaceSize); - assert(FastMem9Start); - FastMem7Start = virtmemReserve(AddrSpaceSize); - assert(FastMem7Start); + // 8 GB of address space, just don't ask... + FastMem9Start = virtmemReserve(AddrSpaceSize); + assert(FastMem9Start); + FastMem7Start = virtmemReserve(AddrSpaceSize); + assert(FastMem7Start); - u8* basePtr = MemoryBaseCodeMem; + u8* basePtr = MemoryBaseCodeMem; #elif defined(_WIN32) - ExceptionHandlerHandle = AddVectoredExceptionHandler(1, ExceptionHandler); + ExceptionHandlerHandle = AddVectoredExceptionHandler(1, ExceptionHandler); - MemoryFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, MemoryTotalSize, NULL); + MemoryFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, MemoryTotalSize, NULL); - MemoryBase = (u8*)VirtualAlloc(NULL, MemoryTotalSize, MEM_RESERVE, PAGE_READWRITE); + MemoryBase = (u8*)VirtualAlloc(NULL, MemoryTotalSize, MEM_RESERVE, PAGE_READWRITE); - FastMem9Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE); - FastMem7Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE); + FastMem9Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE); + FastMem7Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE); - // only free them after they have all been reserved - // so they can't overlap - VirtualFree(MemoryBase, 0, MEM_RELEASE); - VirtualFree(FastMem9Start, 0, MEM_RELEASE); - VirtualFree(FastMem7Start, 0, MEM_RELEASE); + // only free them after they have all been reserved + // so they can't overlap + VirtualFree(MemoryBase, 0, MEM_RELEASE); + VirtualFree(FastMem9Start, 0, MEM_RELEASE); + VirtualFree(FastMem7Start, 0, MEM_RELEASE); - MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase); + MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase); - u8* basePtr = MemoryBase; + u8* basePtr = MemoryBase; #else - FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - MemoryFile = memfd_create("melondsfastmem", 0); - ftruncate(MemoryFile, MemoryTotalSize); + MemoryFile = memfd_create("melondsfastmem", 0); + ftruncate(MemoryFile, MemoryTotalSize); - NewSa.sa_flags = SA_SIGINFO; - sigemptyset(&NewSa.sa_mask); - NewSa.sa_sigaction = SigsegvHandler; - sigaction(SIGSEGV, &NewSa, &OldSa); + NewSa.sa_flags = SA_SIGINFO; + sigemptyset(&NewSa.sa_mask); + NewSa.sa_sigaction = SigsegvHandler; + sigaction(SIGSEGV, &NewSa, &OldSa); - munmap(MemoryBase, MemoryTotalSize); - munmap(FastMem9Start, AddrSpaceSize); - munmap(FastMem7Start, AddrSpaceSize); + munmap(MemoryBase, MemoryTotalSize); + munmap(FastMem9Start, AddrSpaceSize); + munmap(FastMem7Start, AddrSpaceSize); - mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0); + mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0); - u8* basePtr = MemoryBase; + u8* basePtr = MemoryBase; #endif - NDS::MainRAM = basePtr + MemBlockMainRAMOffset; - NDS::SharedWRAM = basePtr + MemBlockSWRAMOffset; - NDS::ARM7WRAM = basePtr + MemBlockARM7WRAMOffset; - NDS::ARM9->DTCM = basePtr + MemBlockDTCMOffset; - DSi::NWRAM_A = basePtr + MemBlockNWRAM_AOffset; - DSi::NWRAM_B = basePtr + MemBlockNWRAM_BOffset; - DSi::NWRAM_C = basePtr + MemBlockNWRAM_COffset; + NDS::MainRAM = basePtr + MemBlockMainRAMOffset; + NDS::SharedWRAM = basePtr + MemBlockSWRAMOffset; + NDS::ARM7WRAM = basePtr + MemBlockARM7WRAMOffset; + NDS::ARM9->DTCM = basePtr + MemBlockDTCMOffset; + DSi::NWRAM_A = basePtr + MemBlockNWRAM_AOffset; + DSi::NWRAM_B = basePtr + MemBlockNWRAM_BOffset; + DSi::NWRAM_C = basePtr + MemBlockNWRAM_COffset; } void DeInit() { #if defined(__SWITCH__) - virtmemFree(FastMem9Start, 0x100000000); - virtmemFree(FastMem7Start, 0x100000000); + virtmemFree(FastMem9Start, 0x100000000); + virtmemFree(FastMem7Start, 0x100000000); svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize); - virtmemFree(MemoryBaseCodeMem, MemoryTotalSize); + virtmemFree(MemoryBaseCodeMem, MemoryTotalSize); free(MemoryBase); #elif defined(_WIN32) - assert(UnmapViewOfFile(MemoryBase)); - CloseHandle(MemoryFile); + assert(UnmapViewOfFile(MemoryBase)); + CloseHandle(MemoryFile); - RemoveVectoredExceptionHandler(ExceptionHandlerHandle); + RemoveVectoredExceptionHandler(ExceptionHandlerHandle); #endif } void Reset() { - for (int region = 0; region < memregions_Count; region++) - { - for (int i = 0; i < Mappings[region].Length; i++) - Mappings[region][i].Unmap(region); - Mappings[region].Clear(); - } - - for (int i = 0; i < sizeof(MappingStatus9); i++) - { - assert(MappingStatus9[i] == memstate_Unmapped); - assert(MappingStatus7[i] == memstate_Unmapped); - } - - printf("done resetting jit mem\n"); + for (int region = 0; region < memregions_Count; region++) + { + for (int i = 0; i < Mappings[region].Length; i++) + Mappings[region][i].Unmap(region); + Mappings[region].Clear(); + } + + for (int i = 0; i < sizeof(MappingStatus9); i++) + { + assert(MappingStatus9[i] == memstate_Unmapped); + assert(MappingStatus7[i] == memstate_Unmapped); + } + + printf("done resetting jit mem\n"); } bool IsFastmemCompatible(int region) { #ifdef _WIN32 - /* - TODO: with some hacks, the smaller shared WRAM regions - could be mapped in some occaisons as well - */ - if (region == memregion_DTCM - || region == memregion_SharedWRAM - || region == memregion_NewSharedWRAM_B - || region == memregion_NewSharedWRAM_C) - return false; + /* + TODO: with some hacks, the smaller shared WRAM regions + could be mapped in some occaisons as well + */ + if (region == memregion_DTCM + || region == memregion_SharedWRAM + || region == memregion_NewSharedWRAM_B + || region == memregion_NewSharedWRAM_C) + return false; #endif - return OffsetsPerRegion[region] != UINT32_MAX; + return OffsetsPerRegion[region] != UINT32_MAX; } bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize) { - memoryOffset = 0; - switch (region) - { - case memregion_ITCM: - if (num == 0) - { - mirrorStart = addr & ~(ITCMPhysicalSize - 1); - mirrorSize = ITCMPhysicalSize; - return true; - } - return false; - case memregion_DTCM: - if (num == 0) - { - mirrorStart = addr & ~(DTCMPhysicalSize - 1); - mirrorSize = DTCMPhysicalSize; - return true; - } - return false; - case memregion_MainRAM: - mirrorStart = addr & ~NDS::MainRAMMask; - mirrorSize = NDS::MainRAMMask + 1; - return true; - case memregion_BIOS9: - if (num == 0) - { - mirrorStart = addr & ~0xFFF; - mirrorSize = 0x1000; - return true; - } - return false; - case memregion_BIOS7: - if (num == 1) - { - mirrorStart = 0; - mirrorSize = 0x4000; - return true; - } - return false; - case memregion_SharedWRAM: - if (num == 0 && NDS::SWRAM_ARM9.Mem) - { - mirrorStart = addr & ~NDS::SWRAM_ARM9.Mask; - mirrorSize = NDS::SWRAM_ARM9.Mask + 1; - memoryOffset = NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM; - return true; - } - else if (num == 1 && NDS::SWRAM_ARM7.Mem) - { - mirrorStart = addr & ~NDS::SWRAM_ARM7.Mask; - mirrorSize = NDS::SWRAM_ARM7.Mask + 1; - memoryOffset = NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM; - return true; - } - return false; - case memregion_WRAM7: - if (num == 1) - { - mirrorStart = addr & ~(NDS::ARM7WRAMSize - 1); - mirrorSize = NDS::ARM7WRAMSize; - return true; - } - return false; - case memregion_VRAM: - if (num == 0) - { - mirrorStart = addr & ~0xFFFFF; - mirrorSize = 0x100000; - } - return false; - case memregion_VWRAM: - if (num == 1) - { - mirrorStart = addr & ~0x3FFFF; - mirrorSize = 0x40000; - return true; - } - return false; - case memregion_NewSharedWRAM_A: - { - u8* ptr = DSi::NWRAMMap_A[num][(addr >> 16) & DSi::NWRAMMask[num][0]]; - if (ptr) - { - memoryOffset = ptr - DSi::NWRAM_A; - mirrorStart = addr & ~0xFFFF; - mirrorSize = 0x10000; - return true; - } - return false; // zero filled memory - } - case memregion_NewSharedWRAM_B: - { - u8* ptr = DSi::NWRAMMap_B[num][(addr >> 15) & DSi::NWRAMMask[num][1]]; - if (ptr) - { - memoryOffset = ptr - DSi::NWRAM_B; - mirrorStart = addr & ~0x7FFF; - mirrorSize = 0x8000; - return true; - } - return false; // zero filled memory - } - case memregion_NewSharedWRAM_C: - { - u8* ptr = DSi::NWRAMMap_C[num][(addr >> 15) & DSi::NWRAMMask[num][2]]; - if (ptr) - { - memoryOffset = ptr - DSi::NWRAM_C; - mirrorStart = addr & ~0x7FFF; - mirrorSize = 0x8000; - return true; - } - return false; // zero filled memory - } - case memregion_BIOS9DSi: - if (num == 0) - { - mirrorStart = addr & ~0xFFFF; - mirrorSize = DSi::SCFG_BIOS & (1<<0) ? 0x8000 : 0x10000; - return true; - } - return false; - case memregion_BIOS7DSi: - if (num == 1) - { - mirrorStart = addr & ~0xFFFF; - mirrorSize = DSi::SCFG_BIOS & (1<<8) ? 0x8000 : 0x10000; - return true; - } - return false; - default: - assert(false && "For the time being this should only be used for code"); - return false; - } + memoryOffset = 0; + switch (region) + { + case memregion_ITCM: + if (num == 0) + { + mirrorStart = addr & ~(ITCMPhysicalSize - 1); + mirrorSize = ITCMPhysicalSize; + return true; + } + return false; + case memregion_DTCM: + if (num == 0) + { + mirrorStart = addr & ~(DTCMPhysicalSize - 1); + mirrorSize = DTCMPhysicalSize; + return true; + } + return false; + case memregion_MainRAM: + mirrorStart = addr & ~NDS::MainRAMMask; + mirrorSize = NDS::MainRAMMask + 1; + return true; + case memregion_BIOS9: + if (num == 0) + { + mirrorStart = addr & ~0xFFF; + mirrorSize = 0x1000; + return true; + } + return false; + case memregion_BIOS7: + if (num == 1) + { + mirrorStart = 0; + mirrorSize = 0x4000; + return true; + } + return false; + case memregion_SharedWRAM: + if (num == 0 && NDS::SWRAM_ARM9.Mem) + { + mirrorStart = addr & ~NDS::SWRAM_ARM9.Mask; + mirrorSize = NDS::SWRAM_ARM9.Mask + 1; + memoryOffset = NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM; + return true; + } + else if (num == 1 && NDS::SWRAM_ARM7.Mem) + { + mirrorStart = addr & ~NDS::SWRAM_ARM7.Mask; + mirrorSize = NDS::SWRAM_ARM7.Mask + 1; + memoryOffset = NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM; + return true; + } + return false; + case memregion_WRAM7: + if (num == 1) + { + mirrorStart = addr & ~(NDS::ARM7WRAMSize - 1); + mirrorSize = NDS::ARM7WRAMSize; + return true; + } + return false; + case memregion_VRAM: + if (num == 0) + { + mirrorStart = addr & ~0xFFFFF; + mirrorSize = 0x100000; + } + return false; + case memregion_VWRAM: + if (num == 1) + { + mirrorStart = addr & ~0x3FFFF; + mirrorSize = 0x40000; + return true; + } + return false; + case memregion_NewSharedWRAM_A: + { + u8* ptr = DSi::NWRAMMap_A[num][(addr >> 16) & DSi::NWRAMMask[num][0]]; + if (ptr) + { + memoryOffset = ptr - DSi::NWRAM_A; + mirrorStart = addr & ~0xFFFF; + mirrorSize = 0x10000; + return true; + } + return false; // zero filled memory + } + case memregion_NewSharedWRAM_B: + { + u8* ptr = DSi::NWRAMMap_B[num][(addr >> 15) & DSi::NWRAMMask[num][1]]; + if (ptr) + { + memoryOffset = ptr - DSi::NWRAM_B; + mirrorStart = addr & ~0x7FFF; + mirrorSize = 0x8000; + return true; + } + return false; // zero filled memory + } + case memregion_NewSharedWRAM_C: + { + u8* ptr = DSi::NWRAMMap_C[num][(addr >> 15) & DSi::NWRAMMask[num][2]]; + if (ptr) + { + memoryOffset = ptr - DSi::NWRAM_C; + mirrorStart = addr & ~0x7FFF; + mirrorSize = 0x8000; + return true; + } + return false; // zero filled memory + } + case memregion_BIOS9DSi: + if (num == 0) + { + mirrorStart = addr & ~0xFFFF; + mirrorSize = DSi::SCFG_BIOS & (1<<0) ? 0x8000 : 0x10000; + return true; + } + return false; + case memregion_BIOS7DSi: + if (num == 1) + { + mirrorStart = addr & ~0xFFFF; + mirrorSize = DSi::SCFG_BIOS & (1<<8) ? 0x8000 : 0x10000; + return true; + } + return false; + default: + assert(false && "For the time being this should only be used for code"); + return false; + } } u32 LocaliseAddress(int region, u32 num, u32 addr) { - switch (region) - { - case memregion_ITCM: - return (addr & (ITCMPhysicalSize - 1)) | (memregion_ITCM << 27); - case memregion_MainRAM: - return (addr & NDS::MainRAMMask) | (memregion_MainRAM << 27); - case memregion_BIOS9: - return (addr & 0xFFF) | (memregion_BIOS9 << 27); - case memregion_BIOS7: - return (addr & 0x3FFF) | (memregion_BIOS7 << 27); - case memregion_SharedWRAM: - if (num == 0) - return ((addr & NDS::SWRAM_ARM9.Mask) + (NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM)) | (memregion_SharedWRAM << 27); - else - return ((addr & NDS::SWRAM_ARM7.Mask) + (NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM)) | (memregion_SharedWRAM << 27); - case memregion_WRAM7: - return (addr & (NDS::ARM7WRAMSize - 1)) | (memregion_WRAM7 << 27); - case memregion_VRAM: - // TODO: take mapping properly into account - return (addr & 0xFFFFF) | (memregion_VRAM << 27); - case memregion_VWRAM: - // same here - return (addr & 0x3FFFF) | (memregion_VWRAM << 27); - case memregion_NewSharedWRAM_A: - { - u8* ptr = DSi::NWRAMMap_A[num][(addr >> 16) & DSi::NWRAMMask[num][0]]; - if (ptr) - return (ptr - DSi::NWRAM_A + (addr & 0xFFFF)) | (memregion_NewSharedWRAM_A << 27); - else - return memregion_Other << 27; // zero filled memory - } - case memregion_NewSharedWRAM_B: - { - u8* ptr = DSi::NWRAMMap_B[num][(addr >> 15) & DSi::NWRAMMask[num][1]]; - if (ptr) - return (ptr - DSi::NWRAM_B + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_B << 27); - else - return memregion_Other << 27; - } - case memregion_NewSharedWRAM_C: - { - u8* ptr = DSi::NWRAMMap_C[num][(addr >> 15) & DSi::NWRAMMask[num][2]]; - if (ptr) - return (ptr - DSi::NWRAM_C + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_C << 27); - else - return memregion_Other << 27; - } - case memregion_BIOS9DSi: - case memregion_BIOS7DSi: - return (addr & 0xFFFF) | (region << 27); - default: - assert(false && "This should only be needed for regions which can contain code"); - return memregion_Other << 27; - } + switch (region) + { + case memregion_ITCM: + return (addr & (ITCMPhysicalSize - 1)) | (memregion_ITCM << 27); + case memregion_MainRAM: + return (addr & NDS::MainRAMMask) | (memregion_MainRAM << 27); + case memregion_BIOS9: + return (addr & 0xFFF) | (memregion_BIOS9 << 27); + case memregion_BIOS7: + return (addr & 0x3FFF) | (memregion_BIOS7 << 27); + case memregion_SharedWRAM: + if (num == 0) + return ((addr & NDS::SWRAM_ARM9.Mask) + (NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM)) | (memregion_SharedWRAM << 27); + else + return ((addr & NDS::SWRAM_ARM7.Mask) + (NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM)) | (memregion_SharedWRAM << 27); + case memregion_WRAM7: + return (addr & (NDS::ARM7WRAMSize - 1)) | (memregion_WRAM7 << 27); + case memregion_VRAM: + // TODO: take mapping properly into account + return (addr & 0xFFFFF) | (memregion_VRAM << 27); + case memregion_VWRAM: + // same here + return (addr & 0x3FFFF) | (memregion_VWRAM << 27); + case memregion_NewSharedWRAM_A: + { + u8* ptr = DSi::NWRAMMap_A[num][(addr >> 16) & DSi::NWRAMMask[num][0]]; + if (ptr) + return (ptr - DSi::NWRAM_A + (addr & 0xFFFF)) | (memregion_NewSharedWRAM_A << 27); + else + return memregion_Other << 27; // zero filled memory + } + case memregion_NewSharedWRAM_B: + { + u8* ptr = DSi::NWRAMMap_B[num][(addr >> 15) & DSi::NWRAMMask[num][1]]; + if (ptr) + return (ptr - DSi::NWRAM_B + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_B << 27); + else + return memregion_Other << 27; + } + case memregion_NewSharedWRAM_C: + { + u8* ptr = DSi::NWRAMMap_C[num][(addr >> 15) & DSi::NWRAMMask[num][2]]; + if (ptr) + return (ptr - DSi::NWRAM_C + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_C << 27); + else + return memregion_Other << 27; + } + case memregion_BIOS9DSi: + case memregion_BIOS7DSi: + return (addr & 0xFFFF) | (region << 27); + default: + assert(false && "This should only be needed for regions which can contain code"); + return memregion_Other << 27; + } } int ClassifyAddress9(u32 addr) { - if (addr < NDS::ARM9->ITCMSize) - { - return memregion_ITCM; - } - else if (addr >= NDS::ARM9->DTCMBase && addr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize)) - { - return memregion_DTCM; - } - else - { - if (NDS::ConsoleType == 1 && addr >= 0xFFFF0000 && !(DSi::SCFG_BIOS & (1<<1))) - { - if ((addr >= 0xFFFF8000) && (DSi::SCFG_BIOS & (1<<0))) - return memregion_Other; - - return memregion_BIOS9DSi; - } - else if ((addr & 0xFFFFF000) == 0xFFFF0000) - { - return memregion_BIOS9; - } - - switch (addr & 0xFF000000) - { - case 0x02000000: - return memregion_MainRAM; - case 0x03000000: - if (NDS::ConsoleType == 1) - { - if (addr >= DSi::NWRAMStart[0][0] && addr < DSi::NWRAMEnd[0][0]) - return memregion_NewSharedWRAM_A; - if (addr >= DSi::NWRAMStart[0][1] && addr < DSi::NWRAMEnd[0][1]) - return memregion_NewSharedWRAM_B; - if (addr >= DSi::NWRAMStart[0][2] && addr < DSi::NWRAMEnd[0][2]) - return memregion_NewSharedWRAM_C; - } - - if (NDS::SWRAM_ARM9.Mem) - return memregion_SharedWRAM; - return memregion_Other; - case 0x04000000: - return memregion_IO9; - case 0x06000000: - return memregion_VRAM; - default: - return memregion_Other; - } - } + if (addr < NDS::ARM9->ITCMSize) + { + return memregion_ITCM; + } + else if (addr >= NDS::ARM9->DTCMBase && addr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize)) + { + return memregion_DTCM; + } + else + { + if (NDS::ConsoleType == 1 && addr >= 0xFFFF0000 && !(DSi::SCFG_BIOS & (1<<1))) + { + if ((addr >= 0xFFFF8000) && (DSi::SCFG_BIOS & (1<<0))) + return memregion_Other; + + return memregion_BIOS9DSi; + } + else if ((addr & 0xFFFFF000) == 0xFFFF0000) + { + return memregion_BIOS9; + } + + switch (addr & 0xFF000000) + { + case 0x02000000: + return memregion_MainRAM; + case 0x03000000: + if (NDS::ConsoleType == 1) + { + if (addr >= DSi::NWRAMStart[0][0] && addr < DSi::NWRAMEnd[0][0]) + return memregion_NewSharedWRAM_A; + if (addr >= DSi::NWRAMStart[0][1] && addr < DSi::NWRAMEnd[0][1]) + return memregion_NewSharedWRAM_B; + if (addr >= DSi::NWRAMStart[0][2] && addr < DSi::NWRAMEnd[0][2]) + return memregion_NewSharedWRAM_C; + } + + if (NDS::SWRAM_ARM9.Mem) + return memregion_SharedWRAM; + return memregion_Other; + case 0x04000000: + return memregion_IO9; + case 0x06000000: + return memregion_VRAM; + default: + return memregion_Other; + } + } } int ClassifyAddress7(u32 addr) { - if (NDS::ConsoleType == 1 && addr < 0x00010000 && !(DSi::SCFG_BIOS & (1<<9))) + if (NDS::ConsoleType == 1 && addr < 0x00010000 && !(DSi::SCFG_BIOS & (1<<9))) { if (addr >= 0x00008000 && DSi::SCFG_BIOS & (1<<8)) return memregion_Other; return memregion_BIOS7DSi; } - else if (addr < 0x00004000) - { - return memregion_BIOS7; - } - else - { - switch (addr & 0xFF800000) - { - case 0x02000000: - case 0x02800000: - return memregion_MainRAM; - case 0x03000000: - if (NDS::ConsoleType == 1) - { - if (addr >= DSi::NWRAMStart[1][0] && addr < DSi::NWRAMEnd[1][0]) - return memregion_NewSharedWRAM_A; - if (addr >= DSi::NWRAMStart[1][1] && addr < DSi::NWRAMEnd[1][1]) - return memregion_NewSharedWRAM_B; - if (addr >= DSi::NWRAMStart[1][2] && addr < DSi::NWRAMEnd[1][2]) - return memregion_NewSharedWRAM_C; - } - - if (NDS::SWRAM_ARM7.Mem) - return memregion_SharedWRAM; - return memregion_WRAM7; - case 0x03800000: - return memregion_WRAM7; - case 0x04000000: - return memregion_IO7; - case 0x04800000: - return memregion_Wifi; - case 0x06000000: - case 0x06800000: - return memregion_VWRAM; - } - } - return memregion_Other; + else if (addr < 0x00004000) + { + return memregion_BIOS7; + } + else + { + switch (addr & 0xFF800000) + { + case 0x02000000: + case 0x02800000: + return memregion_MainRAM; + case 0x03000000: + if (NDS::ConsoleType == 1) + { + if (addr >= DSi::NWRAMStart[1][0] && addr < DSi::NWRAMEnd[1][0]) + return memregion_NewSharedWRAM_A; + if (addr >= DSi::NWRAMStart[1][1] && addr < DSi::NWRAMEnd[1][1]) + return memregion_NewSharedWRAM_B; + if (addr >= DSi::NWRAMStart[1][2] && addr < DSi::NWRAMEnd[1][2]) + return memregion_NewSharedWRAM_C; + } + + if (NDS::SWRAM_ARM7.Mem) + return memregion_SharedWRAM; + return memregion_WRAM7; + case 0x03800000: + return memregion_WRAM7; + case 0x04000000: + return memregion_IO7; + case 0x04800000: + return memregion_Wifi; + case 0x06000000: + case 0x06800000: + return memregion_VWRAM; + } + } + return memregion_Other; } void WifiWrite32(u32 addr, u32 val) { - Wifi::Write(addr, val & 0xFFFF); - Wifi::Write(addr + 2, val >> 16); + Wifi::Write(addr, val & 0xFFFF); + Wifi::Write(addr + 2, val >> 16); } u32 WifiRead32(u32 addr) { - return Wifi::Read(addr) | (Wifi::Read(addr + 2) << 16); + return Wifi::Read(addr) | (Wifi::Read(addr + 2) << 16); } template <typename T> void VRAMWrite(u32 addr, T val) { - switch (addr & 0x00E00000) - { - case 0x00000000: GPU::WriteVRAM_ABG<T>(addr, val); return; - case 0x00200000: GPU::WriteVRAM_BBG<T>(addr, val); return; - case 0x00400000: GPU::WriteVRAM_AOBJ<T>(addr, val); return; - case 0x00600000: GPU::WriteVRAM_BOBJ<T>(addr, val); return; - default: GPU::WriteVRAM_LCDC<T>(addr, val); return; - } + switch (addr & 0x00E00000) + { + case 0x00000000: GPU::WriteVRAM_ABG<T>(addr, val); return; + case 0x00200000: GPU::WriteVRAM_BBG<T>(addr, val); return; + case 0x00400000: GPU::WriteVRAM_AOBJ<T>(addr, val); return; + case 0x00600000: GPU::WriteVRAM_BOBJ<T>(addr, val); return; + default: GPU::WriteVRAM_LCDC<T>(addr, val); return; + } } template <typename T> T VRAMRead(u32 addr) { - switch (addr & 0x00E00000) - { - case 0x00000000: return GPU::ReadVRAM_ABG<T>(addr); - case 0x00200000: return GPU::ReadVRAM_BBG<T>(addr); - case 0x00400000: return GPU::ReadVRAM_AOBJ<T>(addr); - case 0x00600000: return GPU::ReadVRAM_BOBJ<T>(addr); - default: return GPU::ReadVRAM_LCDC<T>(addr); - } + switch (addr & 0x00E00000) + { + case 0x00000000: return GPU::ReadVRAM_ABG<T>(addr); + case 0x00200000: return GPU::ReadVRAM_BBG<T>(addr); + case 0x00400000: return GPU::ReadVRAM_AOBJ<T>(addr); + case 0x00600000: return GPU::ReadVRAM_BOBJ<T>(addr); + default: return GPU::ReadVRAM_LCDC<T>(addr); + } } void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) { - if (cpu->Num == 0) - { - switch (addr & 0xFF000000) - { - case 0x04000000: - if (!store && size == 32 && addr == 0x04100010 && NDS::ExMemCnt[0] & (1<<11)) - return (void*)NDSCart::ReadROMData; - - /* - unfortunately we can't map GPU2D this way - since it's hidden inside an object - - though GPU3D registers are accessed much more intensive - */ - if (addr >= 0x04000320 && addr < 0x040006A4) - { - switch (size | store) - { - case 8: return (void*)GPU3D::Read8; - case 9: return (void*)GPU3D::Write8; - case 16: return (void*)GPU3D::Read16; - case 17: return (void*)GPU3D::Write16; - case 32: return (void*)GPU3D::Read32; - case 33: return (void*)GPU3D::Write32; - } - } - - if (NDS::ConsoleType == 0) - { - switch (size | store) - { - case 8: return (void*)NDS::ARM9IORead8; - case 9: return (void*)NDS::ARM9IOWrite8; - case 16: return (void*)NDS::ARM9IORead16; - case 17: return (void*)NDS::ARM9IOWrite16; - case 32: return (void*)NDS::ARM9IORead32; - case 33: return (void*)NDS::ARM9IOWrite32; - } - } - else - { - switch (size | store) - { - case 8: return (void*)DSi::ARM9IORead8; - case 9: return (void*)DSi::ARM9IOWrite8; - case 16: return (void*)DSi::ARM9IORead16; - case 17: return (void*)DSi::ARM9IOWrite16; - case 32: return (void*)DSi::ARM9IORead32; - case 33: return (void*)DSi::ARM9IOWrite32; - } - } - break; - case 0x06000000: - switch (size | store) - { - case 8: return (void*)VRAMRead<u8>; - case 9: return NULL; - case 16: return (void*)VRAMRead<u16>; - case 17: return (void*)VRAMWrite<u16>; - case 32: return (void*)VRAMRead<u32>; - case 33: return (void*)VRAMWrite<u32>; - } - break; - } - } - else - { - switch (addr & 0xFF800000) - { - case 0x04000000: - if (addr >= 0x04000400 && addr < 0x04000520) - { - switch (size | store) - { - case 8: return (void*)SPU::Read8; - case 9: return (void*)SPU::Write8; - case 16: return (void*)SPU::Read16; - case 17: return (void*)SPU::Write16; - case 32: return (void*)SPU::Read32; - case 33: return (void*)SPU::Write32; - } - } - - if (NDS::ConsoleType == 0) - { - switch (size | store) - { - case 8: return (void*)NDS::ARM7IORead8; - case 9: return (void*)NDS::ARM7IOWrite8; - case 16: return (void*)NDS::ARM7IORead16; - case 17: return (void*)NDS::ARM7IOWrite16; - case 32: return (void*)NDS::ARM7IORead32; - case 33: return (void*)NDS::ARM7IOWrite32; - } - } - else - { - switch (size | store) - { - case 8: return (void*)DSi::ARM7IORead8; - case 9: return (void*)DSi::ARM7IOWrite8; - case 16: return (void*)DSi::ARM7IORead16; - case 17: return (void*)DSi::ARM7IOWrite16; - case 32: return (void*)DSi::ARM7IORead32; - case 33: return (void*)DSi::ARM7IOWrite32; - } - } - break; - case 0x04800000: - if (addr < 0x04810000 && size >= 16) - { - switch (size | store) - { - case 16: return (void*)Wifi::Read; - case 17: return (void*)Wifi::Write; - case 32: return (void*)WifiRead32; - case 33: return (void*)WifiWrite32; - } - } - break; - case 0x06000000: - case 0x06800000: - switch (size | store) - { - case 8: return (void*)GPU::ReadVRAM_ARM7<u8>; - case 9: return (void*)GPU::WriteVRAM_ARM7<u8>; - case 16: return (void*)GPU::ReadVRAM_ARM7<u16>; - case 17: return (void*)GPU::WriteVRAM_ARM7<u16>; - case 32: return (void*)GPU::ReadVRAM_ARM7<u32>; - case 33: return (void*)GPU::WriteVRAM_ARM7<u32>; - } - } - } - return NULL; + if (cpu->Num == 0) + { + switch (addr & 0xFF000000) + { + case 0x04000000: + if (!store && size == 32 && addr == 0x04100010 && NDS::ExMemCnt[0] & (1<<11)) + return (void*)NDSCart::ReadROMData; + + /* + unfortunately we can't map GPU2D this way + since it's hidden inside an object + + though GPU3D registers are accessed much more intensive + */ + if (addr >= 0x04000320 && addr < 0x040006A4) + { + switch (size | store) + { + case 8: return (void*)GPU3D::Read8; + case 9: return (void*)GPU3D::Write8; + case 16: return (void*)GPU3D::Read16; + case 17: return (void*)GPU3D::Write16; + case 32: return (void*)GPU3D::Read32; + case 33: return (void*)GPU3D::Write32; + } + } + + if (NDS::ConsoleType == 0) + { + switch (size | store) + { + case 8: return (void*)NDS::ARM9IORead8; + case 9: return (void*)NDS::ARM9IOWrite8; + case 16: return (void*)NDS::ARM9IORead16; + case 17: return (void*)NDS::ARM9IOWrite16; + case 32: return (void*)NDS::ARM9IORead32; + case 33: return (void*)NDS::ARM9IOWrite32; + } + } + else + { + switch (size | store) + { + case 8: return (void*)DSi::ARM9IORead8; + case 9: return (void*)DSi::ARM9IOWrite8; + case 16: return (void*)DSi::ARM9IORead16; + case 17: return (void*)DSi::ARM9IOWrite16; + case 32: return (void*)DSi::ARM9IORead32; + case 33: return (void*)DSi::ARM9IOWrite32; + } + } + break; + case 0x06000000: + switch (size | store) + { + case 8: return (void*)VRAMRead<u8>; + case 9: return NULL; + case 16: return (void*)VRAMRead<u16>; + case 17: return (void*)VRAMWrite<u16>; + case 32: return (void*)VRAMRead<u32>; + case 33: return (void*)VRAMWrite<u32>; + } + break; + } + } + else + { + switch (addr & 0xFF800000) + { + case 0x04000000: + if (addr >= 0x04000400 && addr < 0x04000520) + { + switch (size | store) + { + case 8: return (void*)SPU::Read8; + case 9: return (void*)SPU::Write8; + case 16: return (void*)SPU::Read16; + case 17: return (void*)SPU::Write16; + case 32: return (void*)SPU::Read32; + case 33: return (void*)SPU::Write32; + } + } + + if (NDS::ConsoleType == 0) + { + switch (size | store) + { + case 8: return (void*)NDS::ARM7IORead8; + case 9: return (void*)NDS::ARM7IOWrite8; + case 16: return (void*)NDS::ARM7IORead16; + case 17: return (void*)NDS::ARM7IOWrite16; + case 32: return (void*)NDS::ARM7IORead32; + case 33: return (void*)NDS::ARM7IOWrite32; + } + } + else + { + switch (size | store) + { + case 8: return (void*)DSi::ARM7IORead8; + case 9: return (void*)DSi::ARM7IOWrite8; + case 16: return (void*)DSi::ARM7IORead16; + case 17: return (void*)DSi::ARM7IOWrite16; + case 32: return (void*)DSi::ARM7IORead32; + case 33: return (void*)DSi::ARM7IOWrite32; + } + } + break; + case 0x04800000: + if (addr < 0x04810000 && size >= 16) + { + switch (size | store) + { + case 16: return (void*)Wifi::Read; + case 17: return (void*)Wifi::Write; + case 32: return (void*)WifiRead32; + case 33: return (void*)WifiWrite32; + } + } + break; + case 0x06000000: + case 0x06800000: + switch (size | store) + { + case 8: return (void*)GPU::ReadVRAM_ARM7<u8>; + case 9: return (void*)GPU::WriteVRAM_ARM7<u8>; + case 16: return (void*)GPU::ReadVRAM_ARM7<u16>; + case 17: return (void*)GPU::WriteVRAM_ARM7<u16>; + case 32: return (void*)GPU::ReadVRAM_ARM7<u32>; + case 33: return (void*)GPU::WriteVRAM_ARM7<u32>; + } + } + } + return NULL; } }
\ No newline at end of file diff --git a/src/ARMJIT_Memory.h b/src/ARMJIT_Memory.h index 123e18e..4912449 100644 --- a/src/ARMJIT_Memory.h +++ b/src/ARMJIT_Memory.h @@ -18,28 +18,28 @@ void Reset(); enum { - memregion_Other = 0, - memregion_ITCM, - memregion_DTCM, - memregion_BIOS9, - memregion_MainRAM, - memregion_SharedWRAM, - memregion_IO9, - memregion_VRAM, - memregion_BIOS7, - memregion_WRAM7, - memregion_IO7, - memregion_Wifi, - memregion_VWRAM, - - // DSi - memregion_BIOS9DSi, - memregion_BIOS7DSi, - memregion_NewSharedWRAM_A, - memregion_NewSharedWRAM_B, - memregion_NewSharedWRAM_C, - - memregions_Count + memregion_Other = 0, + memregion_ITCM, + memregion_DTCM, + memregion_BIOS9, + memregion_MainRAM, + memregion_SharedWRAM, + memregion_IO9, + memregion_VRAM, + memregion_BIOS7, + memregion_WRAM7, + memregion_IO7, + memregion_Wifi, + memregion_VWRAM, + + // DSi + memregion_BIOS9DSi, + memregion_BIOS7DSi, + memregion_NewSharedWRAM_A, + memregion_NewSharedWRAM_B, + memregion_NewSharedWRAM_C, + + memregions_Count }; int ClassifyAddress9(u32 addr); diff --git a/src/ARMJIT_RegisterCache.h b/src/ARMJIT_RegisterCache.h index 0547c84..feb2d35 100644 --- a/src/ARMJIT_RegisterCache.h +++ b/src/ARMJIT_RegisterCache.h @@ -18,8 +18,8 @@ public: RegisterCache() {} - RegisterCache(T* compiler, FetchedInstr instrs[], int instrsCount, bool pcAllocatableAsSrc = false) - : Compiler(compiler), Instrs(instrs), InstrsCount(instrsCount) + RegisterCache(T* compiler, FetchedInstr instrs[], int instrsCount, bool pcAllocatableAsSrc = false) + : Compiler(compiler), Instrs(instrs), InstrsCount(instrsCount) { for (int i = 0; i < 16; i++) Mapping[i] = (Reg)-1; @@ -95,7 +95,7 @@ public: LiteralsLoaded = 0; } - void Prepare(bool thumb, int i) + void Prepare(bool thumb, int i) { FetchedInstr instr = Instrs[i]; @@ -175,23 +175,23 @@ public: DirtyRegs |= (LoadedRegs & instr.Info.DstRegs) & ~(1 << 15); } - static const Reg NativeRegAllocOrder[]; - static const int NativeRegsAvailable; + static const Reg NativeRegAllocOrder[]; + static const int NativeRegsAvailable; - Reg Mapping[16]; + Reg Mapping[16]; u32 LiteralValues[16]; u16 LiteralsLoaded = 0; - u32 NativeRegsUsed = 0; - u16 LoadedRegs = 0; - u16 DirtyRegs = 0; + u32 NativeRegsUsed = 0; + u16 LoadedRegs = 0; + u16 DirtyRegs = 0; u16 PCAllocatableAsSrc = 0; - T* Compiler; + T* Compiler; - FetchedInstr* Instrs; - int InstrsCount; + FetchedInstr* Instrs; + int InstrsCount; }; } |