diff options
author | WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> | 2020-08-05 15:06:15 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-05 15:06:15 +0100 |
commit | 6d71f9c83293006b02a96ce0f5a5f9f65a47cd18 (patch) | |
tree | 5558a3a2ae148e7e17fdd56ab0296b883da0aa09 /src | |
parent | 7e5eafe345017dc93a68572528e896f896a6e175 (diff) | |
parent | e4b1526b477bc66996bce8f0a2f81c2f1cffba63 (diff) |
Merge branch 'master' into feature/zip-support
Diffstat (limited to 'src')
51 files changed, 3430 insertions, 3061 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp index 8530795..7eeacb7 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -80,7 +80,7 @@ ARM::~ARM() ARMv5::ARMv5() : ARM(0) { #ifndef JIT_ENABLED - DTCM = new u8[DTCMSize]; + DTCM = new u8[DTCMPhysicalSize]; #endif } @@ -274,15 +274,15 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) if (addr & 0x2) { NextInstr[0] = CodeRead32(addr-2, true) >> 16; - Cycles -= CodeCycles; + Cycles += CodeCycles; NextInstr[1] = CodeRead32(addr+2, false); - Cycles -= CodeCycles; + Cycles += CodeCycles; } else { NextInstr[0] = CodeRead32(addr, true); NextInstr[1] = NextInstr[0] >> 16; - Cycles -= CodeCycles; + Cycles += CodeCycles; } CPSR |= 0x20; @@ -295,9 +295,9 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) if (newregion != oldregion) SetupCodeMem(addr); NextInstr[0] = CodeRead32(addr, true); - Cycles -= CodeCycles; + Cycles += CodeCycles; NextInstr[1] = CodeRead32(addr+4, false); - Cycles -= CodeCycles; + Cycles += CodeCycles; CPSR &= ~0x20; } @@ -337,7 +337,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) NextInstr[0] = CodeRead16(addr); NextInstr[1] = CodeRead16(addr+2); - Cycles -= NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1]; + Cycles += NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1]; CPSR |= 0x20; } @@ -350,7 +350,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) NextInstr[0] = CodeRead32(addr); NextInstr[1] = CodeRead32(addr+4); - Cycles -= NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3]; + Cycles += NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3]; CPSR &= ~0x20; } @@ -609,7 +609,7 @@ void ARMv5::Execute() }*/ if (IRQ) TriggerIRQ(); - NDS::ARM9Timestamp -= Cycles; + NDS::ARM9Timestamp += Cycles; Cycles = 0; } @@ -643,9 +643,6 @@ void ARMv5::ExecuteJIT() { u32 instrAddr = R[15] - ((CPSR&0x20)?2:4); - // hack so Cycles <= 0 becomes Cycles < 0 - Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1; - if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize)) && !ARMJIT::SetupExecutableRegion(0, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize)) { @@ -661,24 +658,26 @@ void ARMv5::ExecuteJIT() else ARMJIT::CompileBlock(this); - NDS::ARM9Timestamp = NDS::ARM9Target - Cycles - 1; - if (StopExecution) { + // this order is crucial otherwise idle loops waiting for an IRQ won't function if (IRQ) TriggerIRQ(); if (Halted || IdleLoop) { - bool idleLoop = IdleLoop; - IdleLoop = 0; - if ((Halted == 1 || idleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target) + if ((Halted == 1 || IdleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target) { + Cycles = 0; NDS::ARM9Timestamp = NDS::ARM9Target; } + IdleLoop = 0; break; } } + + NDS::ARM9Timestamp += Cycles; + Cycles = 0; } if (Halted == 2) @@ -755,7 +754,7 @@ void ARMv4::Execute() }*/ if (IRQ) TriggerIRQ(); - NDS::ARM7Timestamp -= Cycles; + NDS::ARM7Timestamp += Cycles; Cycles = 0; } @@ -795,8 +794,6 @@ void ARMv4::ExecuteJIT() { u32 instrAddr = R[15] - ((CPSR&0x20)?2:4); - Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1; - if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize)) && !ARMJIT::SetupExecutableRegion(1, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize)) { @@ -812,9 +809,6 @@ void ARMv4::ExecuteJIT() else ARMJIT::CompileBlock(this); - NDS::ARM7Timestamp = NDS::ARM7Target - Cycles - 1; - - // TODO optimize this shit!!! if (StopExecution) { if (IRQ) @@ -822,15 +816,18 @@ void ARMv4::ExecuteJIT() if (Halted || IdleLoop) { - bool idleLoop = IdleLoop; - IdleLoop = 0; - if ((Halted == 1 || idleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target) + if ((Halted == 1 || IdleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target) { + Cycles = 0; NDS::ARM7Timestamp = NDS::ARM7Target; } + IdleLoop = 0; break; } } + + NDS::ARM7Timestamp += Cycles; + Cycles = 0; } if (Halted == 2) @@ -57,7 +57,7 @@ public: } virtual void Execute() = 0; -#ifdef ENABLE_JIT +#ifdef JIT_ENABLED virtual void ExecuteJIT() = 0; #endif @@ -202,14 +202,14 @@ public: { // code only. always nonseq 32-bit for ARM9. s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - Cycles -= numC; + Cycles += numC; } void AddCycles_CI(s32 numI) { // code+internal s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - Cycles -= numC + numI; + Cycles += numC + numI; } void AddCycles_CDI() @@ -220,9 +220,9 @@ public: s32 numD = DataCycles; //if (DataRegion != CodeRegion) - Cycles -= std::max(numC + numD - 6, std::max(numC, numD)); + Cycles += std::max(numC + numD - 6, std::max(numC, numD)); //else - // Cycles -= numC + numD; + // Cycles += numC + numD; } void AddCycles_CD() @@ -232,9 +232,9 @@ public: s32 numD = DataCycles; //if (DataRegion != CodeRegion) - Cycles -= std::max(numC + numD - 6, std::max(numC, numD)); + Cycles += std::max(numC + numD - 6, std::max(numC, numD)); //else - // Cycles -= numC + numD; + // Cycles += numC + numD; } void GetCodeMemRegion(u32 addr, NDS::MemRegion* region); @@ -396,13 +396,13 @@ public: void AddCycles_C() { // code only. this code fetch is sequential. - Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3]; + Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3]; } void AddCycles_CI(s32 num) { // code+internal. results in a nonseq code fetch. - Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num; + Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num; } void AddCycles_CDI() @@ -414,21 +414,21 @@ public: if ((DataRegion >> 24) == 0x02) // mainRAM { if (CodeRegion == 0x02) - Cycles -= numC + numD; + Cycles += numC + numD; else { numC++; - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } } else if (CodeRegion == 0x02) { numD++; - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else { - Cycles -= numC + numD + 1; + Cycles += numC + numD + 1; } } @@ -441,17 +441,17 @@ public: if ((DataRegion >> 24) == 0x02) { if (CodeRegion == 0x02) - Cycles -= numC + numD; + Cycles += numC + numD; else - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else if (CodeRegion == 0x02) { - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else { - Cycles -= numC + numD; + Cycles += numC + numD; } } }; diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 545667a..2095432 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -126,6 +126,11 @@ namespace ARMInterpreter #define A_CALC_OP2_IMM \ u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); +#define A_CALC_OP2_IMM_S \ + u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \ + if ((cpu->CurInstr>>7)&0x1E) \ + cpu->SetC(b & 0x80000000); + #define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \ u32 b = cpu->R[cpu->CurInstr&0xF]; \ u32 s = (cpu->CurInstr>>7)&0x1F; \ @@ -186,7 +191,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ } \ void A_##x##_IMM_S(ARM* cpu) \ { \ - A_CALC_OP2_IMM \ + A_CALC_OP2_IMM##s \ A_##x##_S(0) \ } \ void A_##x##_REG_LSL_IMM_S(ARM* cpu) \ @@ -234,7 +239,7 @@ void A_##x##_REG_ROR_REG_S(ARM* cpu) \ \ void A_##x##_IMM(ARM* cpu) \ { \ - A_CALC_OP2_IMM \ + A_CALC_OP2_IMM##s \ A_##x(0) \ } \ void A_##x##_REG_LSL_IMM(ARM* cpu) \ @@ -1078,7 +1083,7 @@ void A_QDADD(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; - if (rn & 0x40000000) + if (OVERFLOW_ADD(rn, rn, rn<<1)) { rn = (rn & 0x80000000) ? 0x80000000 : 0x7FFFFFFF; cpu->CPSR |= 0x08000000; // CHECKME @@ -1104,7 +1109,7 @@ void A_QDSUB(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; - if (rn & 0x40000000) + if (OVERFLOW_ADD(rn, rn, rn<<1)) { rn = (rn & 0x80000000) ? 0x80000000 : 0x7FFFFFFF; cpu->CPSR |= 0x08000000; // CHECKME diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 0a0b52f..0eb792c 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -71,79 +71,79 @@ u64 FastBlockLookupNWRAM_C[DSi::NWRAMSize / 2]; const u32 CodeRegionSizes[ARMJIT_Memory::memregions_Count] = { - 0, - ITCMPhysicalSize, - 0, - sizeof(NDS::ARM9BIOS), - NDS::MainRAMMaxSize, - NDS::SharedWRAMSize, - 0, - 0x100000, - sizeof(NDS::ARM7BIOS), - NDS::ARM7WRAMSize, - 0, - 0, - 0x40000, - 0x10000, - 0x10000, - DSi::NWRAMSize, - DSi::NWRAMSize, - DSi::NWRAMSize, + 0, + ITCMPhysicalSize, + 0, + sizeof(NDS::ARM9BIOS), + NDS::MainRAMMaxSize, + NDS::SharedWRAMSize, + 0, + 0x100000, + sizeof(NDS::ARM7BIOS), + NDS::ARM7WRAMSize, + 0, + 0, + 0x40000, + 0x10000, + 0x10000, + DSi::NWRAMSize, + DSi::NWRAMSize, + DSi::NWRAMSize, }; AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count] = { - NULL, - CodeIndexITCM, - NULL, - CodeIndexARM9BIOS, - CodeIndexMainRAM, - CodeIndexSWRAM, - NULL, - CodeIndexVRAM, - CodeIndexARM7BIOS, - CodeIndexARM7WRAM, - NULL, - NULL, - CodeIndexARM7WVRAM, - CodeIndexBIOS9DSi, - CodeIndexBIOS7DSi, - CodeIndexNWRAM_A, - CodeIndexNWRAM_B, - CodeIndexNWRAM_C + NULL, + CodeIndexITCM, + NULL, + CodeIndexARM9BIOS, + CodeIndexMainRAM, + CodeIndexSWRAM, + NULL, + CodeIndexVRAM, + CodeIndexARM7BIOS, + CodeIndexARM7WRAM, + NULL, + NULL, + CodeIndexARM7WVRAM, + CodeIndexBIOS9DSi, + CodeIndexBIOS7DSi, + CodeIndexNWRAM_A, + CodeIndexNWRAM_B, + CodeIndexNWRAM_C }; u64* const FastBlockLookupRegions[ARMJIT_Memory::memregions_Count] = { - NULL, - FastBlockLookupITCM, - NULL, - FastBlockLookupARM9BIOS, - FastBlockLookupMainRAM, - FastBlockLookupSWRAM, - NULL, - FastBlockLookupVRAM, - FastBlockLookupARM7BIOS, - FastBlockLookupARM7WRAM, - NULL, - NULL, - FastBlockLookupARM7WVRAM, - FastBlockLookupBIOS9DSi, - FastBlockLookupBIOS7DSi, - FastBlockLookupNWRAM_A, - FastBlockLookupNWRAM_B, - FastBlockLookupNWRAM_C + NULL, + FastBlockLookupITCM, + NULL, + FastBlockLookupARM9BIOS, + FastBlockLookupMainRAM, + FastBlockLookupSWRAM, + NULL, + FastBlockLookupVRAM, + FastBlockLookupARM7BIOS, + FastBlockLookupARM7WRAM, + NULL, + NULL, + FastBlockLookupARM7WVRAM, + FastBlockLookupBIOS9DSi, + FastBlockLookupBIOS7DSi, + FastBlockLookupNWRAM_A, + FastBlockLookupNWRAM_B, + FastBlockLookupNWRAM_C }; u32 LocaliseCodeAddress(u32 num, u32 addr) { - int region = num == 0 - ? ARMJIT_Memory::ClassifyAddress9(addr) - : ARMJIT_Memory::ClassifyAddress7(addr); + int region = num == 0 + ? ARMJIT_Memory::ClassifyAddress9(addr) + : ARMJIT_Memory::ClassifyAddress7(addr); - if (CodeMemRegions[region]) - return ARMJIT_Memory::LocaliseAddress(region, num, addr); - return 0; + if (CodeMemRegions[region]) + return ARMJIT_Memory::LocaliseAddress(region, num, addr); + return 0; } TinyVector<u32> InvalidLiterals; @@ -151,137 +151,137 @@ TinyVector<u32> InvalidLiterals; template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu) { - u32 offset = addr & 0x3; - addr &= ~(sizeof(T) - 1); - - T val; - if (addr < cpu->ITCMSize) - val = *(T*)&cpu->ITCM[addr & 0x7FFF]; - else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize)) - val = *(T*)&cpu->DTCM[(addr - cpu->DTCMBase) & 0x3FFF]; - else if (std::is_same<T, u32>::value) - val = (ConsoleType == 0 ? NDS::ARM9Read32 : DSi::ARM9Read32)(addr); - else if (std::is_same<T, u16>::value) - val = (ConsoleType == 0 ? NDS::ARM9Read16 : DSi::ARM9Read16)(addr); - else - val = (ConsoleType == 0 ? NDS::ARM9Read8 : DSi::ARM9Read8)(addr); - - if (std::is_same<T, u32>::value) - return ROR(val, offset << 3); - else - return val; + u32 offset = addr & 0x3; + addr &= ~(sizeof(T) - 1); + + T val; + if (addr < cpu->ITCMSize) + val = *(T*)&cpu->ITCM[addr & 0x7FFF]; + else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize)) + val = *(T*)&cpu->DTCM[(addr - cpu->DTCMBase) & 0x3FFF]; + else if (std::is_same<T, u32>::value) + val = (ConsoleType == 0 ? NDS::ARM9Read32 : DSi::ARM9Read32)(addr); + else if (std::is_same<T, u16>::value) + val = (ConsoleType == 0 ? NDS::ARM9Read16 : DSi::ARM9Read16)(addr); + else + val = (ConsoleType == 0 ? NDS::ARM9Read8 : DSi::ARM9Read8)(addr); + + if (std::is_same<T, u32>::value) + return ROR(val, offset << 3); + else + return val; } template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, T val) { - addr &= ~(sizeof(T) - 1); + addr &= ~(sizeof(T) - 1); if (addr < cpu->ITCMSize) - { + { CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); - *(T*)&cpu->ITCM[addr & 0x7FFF] = val; - } - else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize)) - { - *(T*)&cpu->DTCM[(addr - cpu->DTCMBase) & 0x3FFF] = val; - } - else if (std::is_same<T, u32>::value) - { - (ConsoleType == 0 ? NDS::ARM9Write32 : DSi::ARM9Write32)(addr, val); - } - else if (std::is_same<T, u16>::value) - { - (ConsoleType == 0 ? NDS::ARM9Write16 : DSi::ARM9Write16)(addr, val); - } - else - { - (ConsoleType == 0 ? NDS::ARM9Write8 : DSi::ARM9Write8)(addr, val); - } + *(T*)&cpu->ITCM[addr & 0x7FFF] = val; + } + else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize)) + { + *(T*)&cpu->DTCM[(addr - cpu->DTCMBase) & 0x3FFF] = val; + } + else if (std::is_same<T, u32>::value) + { + (ConsoleType == 0 ? NDS::ARM9Write32 : DSi::ARM9Write32)(addr, val); + } + else if (std::is_same<T, u16>::value) + { + (ConsoleType == 0 ? NDS::ARM9Write16 : DSi::ARM9Write16)(addr, val); + } + else + { + (ConsoleType == 0 ? NDS::ARM9Write8 : DSi::ARM9Write8)(addr, val); + } } template <typename T, int ConsoleType> T SlowRead7(u32 addr) { - u32 offset = addr & 0x3; - addr &= ~(sizeof(T) - 1); - - T val; - if (std::is_same<T, u32>::value) - val = (ConsoleType == 0 ? NDS::ARM7Read32 : DSi::ARM7Read32)(addr); - else if (std::is_same<T, u16>::value) - val = (ConsoleType == 0 ? NDS::ARM7Read16 : DSi::ARM7Read16)(addr); - else - val = (ConsoleType == 0 ? NDS::ARM7Read8 : DSi::ARM7Read8)(addr); - - if (std::is_same<T, u32>::value) - return ROR(val, offset << 3); - else - return val; + u32 offset = addr & 0x3; + addr &= ~(sizeof(T) - 1); + + T val; + if (std::is_same<T, u32>::value) + val = (ConsoleType == 0 ? NDS::ARM7Read32 : DSi::ARM7Read32)(addr); + else if (std::is_same<T, u16>::value) + val = (ConsoleType == 0 ? NDS::ARM7Read16 : DSi::ARM7Read16)(addr); + else + val = (ConsoleType == 0 ? NDS::ARM7Read8 : DSi::ARM7Read8)(addr); + + if (std::is_same<T, u32>::value) + return ROR(val, offset << 3); + else + return val; } template <typename T, int ConsoleType> void SlowWrite7(u32 addr, T val) { - addr &= ~(sizeof(T) - 1); - - if (std::is_same<T, u32>::value) - (ConsoleType == 0 ? NDS::ARM7Write32 : DSi::ARM7Write32)(addr, val); - else if (std::is_same<T, u16>::value) - (ConsoleType == 0 ? NDS::ARM7Write16 : DSi::ARM7Write16)(addr, val); - else - (ConsoleType == 0 ? NDS::ARM7Write8 : DSi::ARM7Write8)(addr, val); + addr &= ~(sizeof(T) - 1); + + if (std::is_same<T, u32>::value) + (ConsoleType == 0 ? NDS::ARM7Write32 : DSi::ARM7Write32)(addr, val); + else if (std::is_same<T, u16>::value) + (ConsoleType == 0 ? NDS::ARM7Write16 : DSi::ARM7Write16)(addr, val); + else + (ConsoleType == 0 ? NDS::ARM7Write8 : DSi::ARM7Write8)(addr, val); } template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu) { - addr &= ~0x3; - for (int i = 0; i < num; i++) - { - if (Write) - SlowWrite9<u32, ConsoleType>(addr, cpu, data[i]); - else - data[i] = SlowRead9<u32, ConsoleType>(addr, cpu); - addr += 4; - } + addr &= ~0x3; + for (int i = 0; i < num; i++) + { + if (Write) + SlowWrite9<u32, ConsoleType>(addr, cpu, data[i]); + else + data[i] = SlowRead9<u32, ConsoleType>(addr, cpu); + addr += 4; + } } template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num) { - addr &= ~0x3; - for (int i = 0; i < num; i++) - { - if (Write) - SlowWrite7<u32, ConsoleType>(addr, data[i]); - else - data[i] = SlowRead7<u32, ConsoleType>(addr); - addr += 4; - } + addr &= ~0x3; + for (int i = 0; i < num; i++) + { + if (Write) + SlowWrite7<u32, ConsoleType>(addr, data[i]); + else + data[i] = SlowRead7<u32, ConsoleType>(addr); + addr += 4; + } } #define INSTANTIATE_SLOWMEM(consoleType) \ - template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \ - template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u16); \ - template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u8); \ - \ - template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \ - template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \ - template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \ - \ - template void SlowWrite7<u32, consoleType>(u32, u32); \ - template void SlowWrite7<u16, consoleType>(u32, u16); \ - template void SlowWrite7<u8, consoleType>(u32, u8); \ - \ - template u32 SlowRead7<u32, consoleType>(u32); \ - template u16 SlowRead7<u16, consoleType>(u32); \ - template u8 SlowRead7<u8, consoleType>(u32); \ - \ - template void SlowBlockTransfer9<false, consoleType>(u32, u64*, u32, ARMv5*); \ - template void SlowBlockTransfer9<true, consoleType>(u32, u64*, u32, ARMv5*); \ - template void SlowBlockTransfer7<false, consoleType>(u32 addr, u64* data, u32 num); \ - template void SlowBlockTransfer7<true, consoleType>(u32 addr, u64* data, u32 num); \ + template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \ + template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u16); \ + template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u8); \ + \ + template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \ + template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \ + template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \ + \ + template void SlowWrite7<u32, consoleType>(u32, u32); \ + template void SlowWrite7<u16, consoleType>(u32, u16); \ + template void SlowWrite7<u8, consoleType>(u32, u8); \ + \ + template u32 SlowRead7<u32, consoleType>(u32); \ + template u16 SlowRead7<u16, consoleType>(u32); \ + template u8 SlowRead7<u8, consoleType>(u32); \ + \ + template void SlowBlockTransfer9<false, consoleType>(u32, u64*, u32, ARMv5*); \ + template void SlowBlockTransfer9<true, consoleType>(u32, u64*, u32, ARMv5*); \ + template void SlowBlockTransfer7<false, consoleType>(u32 addr, u64* data, u32 num); \ + template void SlowBlockTransfer7<true, consoleType>(u32 addr, u64* data, u32 num); \ INSTANTIATE_SLOWMEM(0) INSTANTIATE_SLOWMEM(1) @@ -289,248 +289,250 @@ INSTANTIATE_SLOWMEM(1) template <typename K, typename V, int Size, V InvalidValue> struct UnreliableHashTable { - struct Bucket - { - K KeyA, KeyB; - V ValA, ValB; - }; - - Bucket Table[Size]; - - void Reset() - { - for (int i = 0; i < Size; i++) - { - Table[i].ValA = Table[i].ValB = InvalidValue; - } - } - - UnreliableHashTable() - { - Reset(); - } - - V Insert(K key, V value) - { - u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1); - Bucket* bucket = &Table[slot]; - - if (bucket->ValA == value || bucket->ValB == value) - { - return InvalidValue; - } - else if (bucket->ValA == InvalidValue) - { - bucket->KeyA = key; - bucket->ValA = value; - } - else if (bucket->ValB == InvalidValue) - { - bucket->KeyB = key; - bucket->ValB = value; - } - else - { - V prevVal = bucket->ValB; - bucket->KeyB = bucket->KeyA; - bucket->ValB = bucket->ValA; - bucket->KeyA = key; - bucket->ValA = value; - return prevVal; - } - - return InvalidValue; - } - - void Remove(K key) - { - u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1); - Bucket* bucket = &Table[slot]; - - if (bucket->KeyA == key && bucket->ValA != InvalidValue) - { - bucket->ValA = InvalidValue; - if (bucket->ValB != InvalidValue) - { - bucket->KeyA = bucket->KeyB; - bucket->ValA = bucket->ValB; - bucket->ValB = InvalidValue; - } - } - if (bucket->KeyB == key && bucket->ValB != InvalidValue) - bucket->ValB = InvalidValue; - } - - V LookUp(K addr) - { - u32 slot = XXH3_64bits(&addr, 4) & (Size - 1); - Bucket* bucket = &Table[slot]; - - if (bucket->ValA != InvalidValue && bucket->KeyA == addr) - return bucket->ValA; - if (bucket->ValB != InvalidValue && bucket->KeyB == addr) - return bucket->ValB; - - return InvalidValue; - } + struct Bucket + { + K KeyA, KeyB; + V ValA, ValB; + }; + + Bucket Table[Size]; + + void Reset() + { + for (int i = 0; i < Size; i++) + { + Table[i].ValA = Table[i].ValB = InvalidValue; + } + } + + UnreliableHashTable() + { + Reset(); + } + + V Insert(K key, V value) + { + u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1); + Bucket* bucket = &Table[slot]; + + if (bucket->ValA == value || bucket->ValB == value) + { + return InvalidValue; + } + else if (bucket->ValA == InvalidValue) + { + bucket->KeyA = key; + bucket->ValA = value; + } + else if (bucket->ValB == InvalidValue) + { + bucket->KeyB = key; + bucket->ValB = value; + } + else + { + V prevVal = bucket->ValB; + bucket->KeyB = bucket->KeyA; + bucket->ValB = bucket->ValA; + bucket->KeyA = key; + bucket->ValA = value; + return prevVal; + } + + return InvalidValue; + } + + void Remove(K key) + { + u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1); + Bucket* bucket = &Table[slot]; + + if (bucket->KeyA == key && bucket->ValA != InvalidValue) + { + bucket->ValA = InvalidValue; + if (bucket->ValB != InvalidValue) + { + bucket->KeyA = bucket->KeyB; + bucket->ValA = bucket->ValB; + bucket->ValB = InvalidValue; + } + } + if (bucket->KeyB == key && bucket->ValB != InvalidValue) + bucket->ValB = InvalidValue; + } + + V LookUp(K addr) + { + u32 slot = XXH3_64bits(&addr, 4) & (Size - 1); + Bucket* bucket = &Table[slot]; + + if (bucket->ValA != InvalidValue && bucket->KeyA == addr) + return bucket->ValA; + if (bucket->ValB != InvalidValue && bucket->KeyB == addr) + return bucket->ValB; + + return InvalidValue; + } }; UnreliableHashTable<u32, JitBlock*, 0x800, nullptr> RestoreCandidates; void Init() { - JITCompiler = new Compiler(); + JITCompiler = new Compiler(); - ARMJIT_Memory::Init(); + ARMJIT_Memory::Init(); } void DeInit() { - ARMJIT_Memory::DeInit(); + ARMJIT_Memory::DeInit(); - delete JITCompiler; + delete JITCompiler; } void Reset() { - ResetBlockCache(); + ResetBlockCache(); - ARMJIT_Memory::Reset(); + ARMJIT_Memory::Reset(); } void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags) { - for (int j = start; j >= 0; j--) - { - u8 match = instrs[j].Info.WriteFlags & flags; - u8 matchMaybe = (instrs[j].Info.WriteFlags >> 4) & flags; - if (matchMaybe) // writes flags maybe - instrs[j].SetFlags |= matchMaybe; - if (match) - { - instrs[j].SetFlags |= match; - flags &= ~match; - if (!flags) - return; - } - } + for (int j = start; j >= 0; j--) + { + u8 match = instrs[j].Info.WriteFlags & flags; + u8 matchMaybe = (instrs[j].Info.WriteFlags >> 4) & flags; + if (matchMaybe) // writes flags maybe + instrs[j].SetFlags |= matchMaybe; + if (match) + { + instrs[j].SetFlags |= match; + flags &= ~match; + if (!flags) + return; + } + } } bool DecodeLiteral(bool thumb, const FetchedInstr& instr, u32& addr) { - if (!thumb) - { - switch (instr.Info.Kind) - { - case ARMInstrInfo::ak_LDR_IMM: - case ARMInstrInfo::ak_LDRB_IMM: - addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1)); - return true; - case ARMInstrInfo::ak_LDRH_IMM: - addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1)); - return true; - default: - break; - } - } - else if (instr.Info.Kind == ARMInstrInfo::tk_LDR_PCREL) - { - addr = ((instr.Addr + 4) & ~0x2) + ((instr.Instr & 0xFF) << 2); - return true; - } - - JIT_DEBUGPRINT("Literal %08x %x not recognised %d\n", instr.Instr, instr.Addr, instr.Info.Kind); - return false; + if (!thumb) + { + switch (instr.Info.Kind) + { + case ARMInstrInfo::ak_LDR_IMM: + case ARMInstrInfo::ak_LDRB_IMM: + addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1)); + return true; + case ARMInstrInfo::ak_LDRH_IMM: + addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1)); + return true; + default: + break; + } + } + else if (instr.Info.Kind == ARMInstrInfo::tk_LDR_PCREL) + { + addr = ((instr.Addr + 4) & ~0x2) + ((instr.Instr & 0xFF) << 2); + return true; + } + + JIT_DEBUGPRINT("Literal %08x %x not recognised %d\n", instr.Instr, instr.Addr, instr.Info.Kind); + return false; } bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link, - u32& linkAddr, u32& targetAddr) + u32& linkAddr, u32& targetAddr) { - if (thumb) - { - u32 r15 = instr.Addr + 4; - cond = 0xE; - - link = instr.Info.Kind == ARMInstrInfo::tk_BL_LONG; - linkAddr = instr.Addr + 4; - - if (instr.Info.Kind == ARMInstrInfo::tk_BL_LONG && !(instr.Instr & (1 << 12))) - { - targetAddr = r15 + ((s32)((instr.Instr & 0x7FF) << 21) >> 9); - targetAddr += ((instr.Instr >> 16) & 0x7FF) << 1; - return true; - } - else if (instr.Info.Kind == ARMInstrInfo::tk_B) - { - s32 offset = (s32)((instr.Instr & 0x7FF) << 21) >> 20; - targetAddr = r15 + offset; - return true; - } - else if (instr.Info.Kind == ARMInstrInfo::tk_BCOND) - { - cond = (instr.Instr >> 8) & 0xF; - s32 offset = (s32)(instr.Instr << 24) >> 23; - targetAddr = r15 + offset; - return true; - } - else if (hasLink && instr.Info.Kind == ARMInstrInfo::tk_BX && instr.A_Reg(3) == 14) - { - JIT_DEBUGPRINT("returning!\n"); - targetAddr = lr; - return true; - } - } - else - { - link = instr.Info.Kind == ARMInstrInfo::ak_BL; - linkAddr = instr.Addr + 4; - - cond = instr.Cond(); - if (instr.Info.Kind == ARMInstrInfo::ak_BL - || instr.Info.Kind == ARMInstrInfo::ak_B) - { - s32 offset = (s32)(instr.Instr << 8) >> 6; - u32 r15 = instr.Addr + 8; - targetAddr = r15 + offset; - return true; - } - else if (hasLink && instr.Info.Kind == ARMInstrInfo::ak_BX && instr.A_Reg(0) == 14) - { - JIT_DEBUGPRINT("returning!\n"); - targetAddr = lr; - return true; - } - } - return false; + if (thumb) + { + u32 r15 = instr.Addr + 4; + cond = 0xE; + + link = instr.Info.Kind == ARMInstrInfo::tk_BL_LONG; + linkAddr = instr.Addr + 4; + + if (instr.Info.Kind == ARMInstrInfo::tk_BL_LONG && !(instr.Instr & (1 << 12))) + { + targetAddr = r15 + ((s32)((instr.Instr & 0x7FF) << 21) >> 9); + targetAddr += ((instr.Instr >> 16) & 0x7FF) << 1; + return true; + } + else if (instr.Info.Kind == ARMInstrInfo::tk_B) + { + s32 offset = (s32)((instr.Instr & 0x7FF) << 21) >> 20; + targetAddr = r15 + offset; + return true; + } + else if (instr.Info.Kind == ARMInstrInfo::tk_BCOND) + { + cond = (instr.Instr >> 8) & 0xF; + s32 offset = (s32)(instr.Instr << 24) >> 23; + targetAddr = r15 + offset; + return true; + } + else if (hasLink && instr.Info.Kind == ARMInstrInfo::tk_BX && instr.A_Reg(3) == 14) + { + JIT_DEBUGPRINT("returning!\n"); + targetAddr = lr; + return true; + } + } + else + { + link = instr.Info.Kind == ARMInstrInfo::ak_BL; + linkAddr = instr.Addr + 4; + + cond = instr.Cond(); + if (instr.Info.Kind == ARMInstrInfo::ak_BL + || instr.Info.Kind == ARMInstrInfo::ak_B) + { + s32 offset = (s32)(instr.Instr << 8) >> 6; + u32 r15 = instr.Addr + 8; + targetAddr = r15 + offset; + return true; + } + else if (hasLink && instr.Info.Kind == ARMInstrInfo::ak_BX && instr.A_Reg(0) == 14) + { + JIT_DEBUGPRINT("returning!\n"); + targetAddr = lr; + return true; + } + } + return false; } -bool IsIdleLoop(FetchedInstr* instrs, int instrsCount) +bool IsIdleLoop(bool thumb, FetchedInstr* instrs, int instrsCount) { - // see https://github.com/dolphin-emu/dolphin/blob/master/Source/Core/Core/PowerPC/PPCAnalyst.cpp#L678 - // it basically checks if one iteration of a loop depends on another - // the rules are quite simple - - JIT_DEBUGPRINT("checking potential idle loop\n"); - u16 regsWrittenTo = 0; - u16 regsDisallowedToWrite = 0; - for (int i = 0; i < instrsCount; i++) - { - JIT_DEBUGPRINT("instr %d %x regs(%x %x) %x %x\n", i, instrs[i].Instr, instrs[i].Info.DstRegs, instrs[i].Info.SrcRegs, regsWrittenTo, regsDisallowedToWrite); - if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem) - return false; - if (i < instrsCount - 1 && instrs[i].Info.Branches()) - return false; - - u16 srcRegs = instrs[i].Info.SrcRegs & ~(1 << 15); - u16 dstRegs = instrs[i].Info.DstRegs & ~(1 << 15); - - regsDisallowedToWrite |= srcRegs & ~regsWrittenTo; - - if (dstRegs & regsDisallowedToWrite) - return false; - regsWrittenTo |= dstRegs; - } - return true; + // see https://github.com/dolphin-emu/dolphin/blob/master/Source/Core/Core/PowerPC/PPCAnalyst.cpp#L678 + // it basically checks if one iteration of a loop depends on another + // the rules are quite simple + + JIT_DEBUGPRINT("checking potential idle loop\n"); + u16 regsWrittenTo = 0; + u16 regsDisallowedToWrite = 0; + for (int i = 0; i < instrsCount; i++) + { + JIT_DEBUGPRINT("instr %d %08x regs(%x %x) %x %x\n", i, instrs[i].Instr, instrs[i].Info.DstRegs, instrs[i].Info.SrcRegs, regsWrittenTo, regsDisallowedToWrite); + if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem) + return false; + if (!thumb && instrs[i].Info.Kind >= ARMInstrInfo::ak_MSR_IMM && instrs[i].Info.Kind <= ARMInstrInfo::ak_MRC) + return false; + if (i < instrsCount - 1 && instrs[i].Info.Branches()) + return false; + + u16 srcRegs = instrs[i].Info.SrcRegs & ~(1 << 15); + u16 dstRegs = instrs[i].Info.DstRegs & ~(1 << 15); + + regsDisallowedToWrite |= srcRegs & ~regsWrittenTo; + + if (dstRegs & regsDisallowedToWrite) + return false; + regsWrittenTo |= dstRegs; + } + return true; } typedef void (*InterpreterFunc)(ARM* cpu); @@ -539,53 +541,53 @@ void NOP(ARM* cpu) {} #define F(x) &ARMInterpreter::A_##x #define F_ALU(name, s) \ - F(name##_REG_LSL_IMM##s), F(name##_REG_LSR_IMM##s), F(name##_REG_ASR_IMM##s), F(name##_REG_ROR_IMM##s), \ - F(name##_REG_LSL_REG##s), F(name##_REG_LSR_REG##s), F(name##_REG_ASR_REG##s), F(name##_REG_ROR_REG##s), F(name##_IMM##s) + F(name##_REG_LSL_IMM##s), F(name##_REG_LSR_IMM##s), F(name##_REG_ASR_IMM##s), F(name##_REG_ROR_IMM##s), \ + F(name##_REG_LSL_REG##s), F(name##_REG_LSR_REG##s), F(name##_REG_ASR_REG##s), F(name##_REG_ROR_REG##s), F(name##_IMM##s) #define F_MEM_WB(name) \ - F(name##_REG_LSL), F(name##_REG_LSR), F(name##_REG_ASR), F(name##_REG_ROR), F(name##_IMM), \ - F(name##_POST_REG_LSL), F(name##_POST_REG_LSR), F(name##_POST_REG_ASR), F(name##_POST_REG_ROR), F(name##_POST_IMM) + F(name##_REG_LSL), F(name##_REG_LSR), F(name##_REG_ASR), F(name##_REG_ROR), F(name##_IMM), \ + F(name##_POST_REG_LSL), F(name##_POST_REG_LSR), F(name##_POST_REG_ASR), F(name##_POST_REG_ROR), F(name##_POST_IMM) #define F_MEM_HD(name) \ - F(name##_REG), F(name##_IMM), F(name##_POST_REG), F(name##_POST_IMM) + F(name##_REG), F(name##_IMM), F(name##_POST_REG), F(name##_POST_IMM) InterpreterFunc InterpretARM[ARMInstrInfo::ak_Count] = { - F_ALU(AND,), F_ALU(AND,_S), - F_ALU(EOR,), F_ALU(EOR,_S), - F_ALU(SUB,), F_ALU(SUB,_S), - F_ALU(RSB,), F_ALU(RSB,_S), - F_ALU(ADD,), F_ALU(ADD,_S), - F_ALU(ADC,), F_ALU(ADC,_S), - F_ALU(SBC,), F_ALU(SBC,_S), - F_ALU(RSC,), F_ALU(RSC,_S), - F_ALU(ORR,), F_ALU(ORR,_S), - F_ALU(MOV,), F_ALU(MOV,_S), - F_ALU(BIC,), F_ALU(BIC,_S), - F_ALU(MVN,), F_ALU(MVN,_S), - F_ALU(TST,), - F_ALU(TEQ,), - F_ALU(CMP,), - F_ALU(CMN,), - - F(MUL), F(MLA), F(UMULL), F(UMLAL), F(SMULL), F(SMLAL), F(SMLAxy), F(SMLAWy), F(SMULWy), F(SMLALxy), F(SMULxy), - F(CLZ), F(QADD), F(QDADD), F(QSUB), F(QDSUB), - - F_MEM_WB(STR), - F_MEM_WB(STRB), - F_MEM_WB(LDR), - F_MEM_WB(LDRB), - - F_MEM_HD(STRH), - F_MEM_HD(LDRD), - F_MEM_HD(STRD), - F_MEM_HD(LDRH), - F_MEM_HD(LDRSB), - F_MEM_HD(LDRSH), - - F(SWP), F(SWPB), - F(LDM), F(STM), - - F(B), F(BL), F(BLX_IMM), F(BX), F(BLX_REG), - F(UNK), F(MSR_IMM), F(MSR_REG), F(MRS), F(MCR), F(MRC), F(SVC), - NOP + F_ALU(AND,), F_ALU(AND,_S), + F_ALU(EOR,), F_ALU(EOR,_S), + F_ALU(SUB,), F_ALU(SUB,_S), + F_ALU(RSB,), F_ALU(RSB,_S), + F_ALU(ADD,), F_ALU(ADD,_S), + F_ALU(ADC,), F_ALU(ADC,_S), + F_ALU(SBC,), F_ALU(SBC,_S), + F_ALU(RSC,), F_ALU(RSC,_S), + F_ALU(ORR,), F_ALU(ORR,_S), + F_ALU(MOV,), F_ALU(MOV,_S), + F_ALU(BIC,), F_ALU(BIC,_S), + F_ALU(MVN,), F_ALU(MVN,_S), + F_ALU(TST,), + F_ALU(TEQ,), + F_ALU(CMP,), + F_ALU(CMN,), + + F(MUL), F(MLA), F(UMULL), F(UMLAL), F(SMULL), F(SMLAL), F(SMLAxy), F(SMLAWy), F(SMULWy), F(SMLALxy), F(SMULxy), + F(CLZ), F(QADD), F(QDADD), F(QSUB), F(QDSUB), + + F_MEM_WB(STR), + F_MEM_WB(STRB), + F_MEM_WB(LDR), + F_MEM_WB(LDRB), + + F_MEM_HD(STRH), + F_MEM_HD(LDRD), + F_MEM_HD(STRD), + F_MEM_HD(LDRH), + F_MEM_HD(LDRSB), + F_MEM_HD(LDRSH), + + F(SWP), F(SWPB), + F(LDM), F(STM), + + F(B), F(BL), F(BLX_IMM), F(BX), F(BLX_REG), + F(UNK), F(MSR_IMM), F(MSR_REG), F(MRS), F(MCR), F(MRC), F(SVC), + NOP }; #undef F_ALU #undef F_MEM_WB @@ -594,29 +596,29 @@ InterpreterFunc InterpretARM[ARMInstrInfo::ak_Count] = void T_BL_LONG(ARM* cpu) { - ARMInterpreter::T_BL_LONG_1(cpu); - cpu->R[15] += 2; - ARMInterpreter::T_BL_LONG_2(cpu); + ARMInterpreter::T_BL_LONG_1(cpu); + cpu->R[15] += 2; + ARMInterpreter::T_BL_LONG_2(cpu); } #define F(x) ARMInterpreter::T_##x InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] = { - F(LSL_IMM), F(LSR_IMM), F(ASR_IMM), - F(ADD_REG_), F(SUB_REG_), F(ADD_IMM_), F(SUB_IMM_), - F(MOV_IMM), F(CMP_IMM), F(ADD_IMM), F(SUB_IMM), - F(AND_REG), F(EOR_REG), F(LSL_REG), F(LSR_REG), F(ASR_REG), - F(ADC_REG), F(SBC_REG), F(ROR_REG), F(TST_REG), F(NEG_REG), - F(CMP_REG), F(CMN_REG), F(ORR_REG), F(MUL_REG), F(BIC_REG), F(MVN_REG), - F(ADD_HIREG), F(CMP_HIREG), F(MOV_HIREG), - F(ADD_PCREL), F(ADD_SPREL), F(ADD_SP), - F(LDR_PCREL), F(STR_REG), F(STRB_REG), F(LDR_REG), F(LDRB_REG), F(STRH_REG), - F(LDRSB_REG), F(LDRH_REG), F(LDRSH_REG), F(STR_IMM), F(LDR_IMM), F(STRB_IMM), - F(LDRB_IMM), F(STRH_IMM), F(LDRH_IMM), F(STR_SPREL), F(LDR_SPREL), - F(PUSH), F(POP), F(LDMIA), F(STMIA), - F(BCOND), F(BX), F(BLX_REG), F(B), F(BL_LONG_1), F(BL_LONG_2), - F(UNK), F(SVC), - T_BL_LONG // BL_LONG psudo opcode + F(LSL_IMM), F(LSR_IMM), F(ASR_IMM), + F(ADD_REG_), F(SUB_REG_), F(ADD_IMM_), F(SUB_IMM_), + F(MOV_IMM), F(CMP_IMM), F(ADD_IMM), F(SUB_IMM), + F(AND_REG), F(EOR_REG), F(LSL_REG), F(LSR_REG), F(ASR_REG), + F(ADC_REG), F(SBC_REG), F(ROR_REG), F(TST_REG), F(NEG_REG), + F(CMP_REG), F(CMN_REG), F(ORR_REG), F(MUL_REG), F(BIC_REG), F(MVN_REG), + F(ADD_HIREG), F(CMP_HIREG), F(MOV_HIREG), + F(ADD_PCREL), F(ADD_SPREL), F(ADD_SP), + F(LDR_PCREL), F(STR_REG), F(STRB_REG), F(LDR_REG), F(LDRB_REG), F(STRH_REG), + F(LDRSB_REG), F(LDRH_REG), F(LDRSH_REG), F(STR_IMM), F(LDR_IMM), F(STRB_IMM), + F(LDRB_IMM), F(STRH_IMM), F(LDRH_IMM), F(STR_SPREL), F(LDR_SPREL), + F(PUSH), F(POP), F(LDMIA), F(STMIA), + F(BCOND), F(BX), F(BLX_REG), F(B), F(BL_LONG_1), F(BL_LONG_2), + F(UNK), F(SVC), + T_BL_LONG // BL_LONG psudo opcode }; #undef F @@ -624,106 +626,106 @@ void CompileBlock(ARM* cpu) { bool thumb = cpu->CPSR & 0x20; - if (Config::JIT_MaxBlockSize < 1) - Config::JIT_MaxBlockSize = 1; - if (Config::JIT_MaxBlockSize > 32) - Config::JIT_MaxBlockSize = 32; - - u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4); - - u32 localAddr = LocaliseCodeAddress(cpu->Num, blockAddr); - if (!localAddr) - { - printf("trying to compile non executable code? %x\n", blockAddr); - } - - auto& map = cpu->Num == 0 ? JitBlocks9 : JitBlocks7; - auto existingBlockIt = map.find(blockAddr); - if (existingBlockIt != map.end()) - { - // there's already a block, though it's not inside the fast map - // could be that there are two blocks at the same physical addr - // but different mirrors - u32 otherLocalAddr = existingBlockIt->second->StartAddrLocal; - - if (localAddr == otherLocalAddr) - { - JIT_DEBUGPRINT("switching out block %x %x %x\n", localAddr, blockAddr, existingBlockIt->second->StartAddr); - - u64* entry = &FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]; - *entry = ((u64)blockAddr | cpu->Num) << 32; - *entry |= JITCompiler->SubEntryOffset(existingBlockIt->second->EntryPoint); - return; - } - - // some memory has been remapped - JitBlock* prevBlock = RestoreCandidates.Insert(existingBlockIt->second->InstrHash, existingBlockIt->second); - if (prevBlock) - delete prevBlock; - - map.erase(existingBlockIt); - } + if (Config::JIT_MaxBlockSize < 1) + Config::JIT_MaxBlockSize = 1; + if (Config::JIT_MaxBlockSize > 32) + Config::JIT_MaxBlockSize = 32; + + u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4); + + u32 localAddr = LocaliseCodeAddress(cpu->Num, blockAddr); + if (!localAddr) + { + printf("trying to compile non executable code? %x\n", blockAddr); + } + + auto& map = cpu->Num == 0 ? JitBlocks9 : JitBlocks7; + auto existingBlockIt = map.find(blockAddr); + if (existingBlockIt != map.end()) + { + // there's already a block, though it's not inside the fast map + // could be that there are two blocks at the same physical addr + // but different mirrors + u32 otherLocalAddr = existingBlockIt->second->StartAddrLocal; + + if (localAddr == otherLocalAddr) + { + JIT_DEBUGPRINT("switching out block %x %x %x\n", localAddr, blockAddr, existingBlockIt->second->StartAddr); + + u64* entry = &FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]; + *entry = ((u64)blockAddr | cpu->Num) << 32; + *entry |= JITCompiler->SubEntryOffset(existingBlockIt->second->EntryPoint); + return; + } + + // some memory has been remapped + JitBlock* prevBlock = RestoreCandidates.Insert(existingBlockIt->second->InstrHash, existingBlockIt->second); + if (prevBlock) + delete prevBlock; + + map.erase(existingBlockIt); + } FetchedInstr instrs[Config::JIT_MaxBlockSize]; int i = 0; u32 r15 = cpu->R[15]; - u32 addressRanges[Config::JIT_MaxBlockSize]; - u32 addressMasks[Config::JIT_MaxBlockSize] = {0}; - u32 numAddressRanges = 0; + u32 addressRanges[Config::JIT_MaxBlockSize]; + u32 addressMasks[Config::JIT_MaxBlockSize] = {0}; + u32 numAddressRanges = 0; - u32 numLiterals = 0; - u32 literalLoadAddrs[Config::JIT_MaxBlockSize]; - // they are going to be hashed - u32 literalValues[Config::JIT_MaxBlockSize]; - u32 instrValues[Config::JIT_MaxBlockSize]; + u32 numLiterals = 0; + u32 literalLoadAddrs[Config::JIT_MaxBlockSize]; + // they are going to be hashed + u32 literalValues[Config::JIT_MaxBlockSize]; + u32 instrValues[Config::JIT_MaxBlockSize]; - cpu->FillPipeline(); + cpu->FillPipeline(); u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]}; - u32 nextInstrAddr[2] = {blockAddr, r15}; + u32 nextInstrAddr[2] = {blockAddr, r15}; - JIT_DEBUGPRINT("start block %x %08x (%x)\n", blockAddr, cpu->CPSR, localAddr); + JIT_DEBUGPRINT("start block %x %08x (%x)\n", blockAddr, cpu->CPSR, localAddr); - u32 lastSegmentStart = blockAddr; - u32 lr; - bool hasLink = false; + u32 lastSegmentStart = blockAddr; + u32 lr; + bool hasLink = false; do { r15 += thumb ? 2 : 4; - instrs[i].BranchFlags = 0; - instrs[i].SetFlags = 0; + instrs[i].BranchFlags = 0; + instrs[i].SetFlags = 0; instrs[i].Instr = nextInstr[0]; nextInstr[0] = nextInstr[1]; - - instrs[i].Addr = nextInstrAddr[0]; - nextInstrAddr[0] = nextInstrAddr[1]; - nextInstrAddr[1] = r15; - JIT_DEBUGPRINT("instr %08x %x\n", instrs[i].Instr & (thumb ? 0xFFFF : ~0), instrs[i].Addr); - - instrValues[i] = instrs[i].Instr; - - u32 translatedAddr = LocaliseCodeAddress(cpu->Num, instrs[i].Addr); - assert(translatedAddr >> 27); - u32 translatedAddrRounded = translatedAddr & ~0x1FF; - if (i == 0 || translatedAddrRounded != addressRanges[numAddressRanges - 1]) - { - bool returning = false; - for (int j = 0; j < numAddressRanges; j++) - { - if (addressRanges[j] == translatedAddrRounded) - { - std::swap(addressRanges[j], addressRanges[numAddressRanges - 1]); - std::swap(addressMasks[j], addressMasks[numAddressRanges - 1]); - returning = true; - break; - } - } - if (!returning) - addressRanges[numAddressRanges++] = translatedAddrRounded; - } - addressMasks[numAddressRanges - 1] |= 1 << ((translatedAddr & 0x1FF) / 16); + + instrs[i].Addr = nextInstrAddr[0]; + nextInstrAddr[0] = nextInstrAddr[1]; + nextInstrAddr[1] = r15; + JIT_DEBUGPRINT("instr %08x %x\n", instrs[i].Instr & (thumb ? 0xFFFF : ~0), instrs[i].Addr); + + instrValues[i] = instrs[i].Instr; + + u32 translatedAddr = LocaliseCodeAddress(cpu->Num, instrs[i].Addr); + assert(translatedAddr >> 27); + u32 translatedAddrRounded = translatedAddr & ~0x1FF; + if (i == 0 || translatedAddrRounded != addressRanges[numAddressRanges - 1]) + { + bool returning = false; + for (int j = 0; j < numAddressRanges; j++) + { + if (addressRanges[j] == translatedAddrRounded) + { + std::swap(addressRanges[j], addressRanges[numAddressRanges - 1]); + std::swap(addressMasks[j], addressMasks[numAddressRanges - 1]); + returning = true; + break; + } + } + if (!returning) + addressRanges[numAddressRanges++] = translatedAddrRounded; + } + addressMasks[numAddressRanges - 1] |= 1 << ((translatedAddr & 0x1FF) / 16); if (cpu->Num == 0) { @@ -750,392 +752,392 @@ void CompileBlock(ARM* cpu) } instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr); - cpu->R[15] = r15; - cpu->CurInstr = instrs[i].Instr; - cpu->CodeCycles = instrs[i].CodeCycles; - - if (instrs[i].Info.DstRegs & (1 << 14) - || (!thumb - && (instrs[i].Info.Kind == ARMInstrInfo::ak_MSR_IMM || instrs[i].Info.Kind == ARMInstrInfo::ak_MSR_REG) - && instrs[i].Instr & (1 << 16))) - hasLink = false; - - if (thumb) - { - InterpretTHUMB[instrs[i].Info.Kind](cpu); - } - else - { - if (cpu->Num == 0 && instrs[i].Info.Kind == ARMInstrInfo::ak_BLX_IMM) - { - ARMInterpreter::A_BLX_IMM(cpu); - } - else - { + cpu->R[15] = r15; + cpu->CurInstr = instrs[i].Instr; + cpu->CodeCycles = instrs[i].CodeCycles; + + if (instrs[i].Info.DstRegs & (1 << 14) + || (!thumb + && (instrs[i].Info.Kind == ARMInstrInfo::ak_MSR_IMM || instrs[i].Info.Kind == ARMInstrInfo::ak_MSR_REG) + && instrs[i].Instr & (1 << 16))) + hasLink = false; + + if (thumb) + { + InterpretTHUMB[instrs[i].Info.Kind](cpu); + } + else + { + if (cpu->Num == 0 && instrs[i].Info.Kind == ARMInstrInfo::ak_BLX_IMM) + { + ARMInterpreter::A_BLX_IMM(cpu); + } + else + { u32 icode = ((instrs[i].Instr >> 4) & 0xF) | ((instrs[i].Instr >> 16) & 0xFF0); - assert(InterpretARM[instrs[i].Info.Kind] == ARMInterpreter::ARMInstrTable[icode] - || instrs[i].Info.Kind == ARMInstrInfo::ak_MOV_REG_LSL_IMM - || instrs[i].Info.Kind == ARMInstrInfo::ak_Nop - || instrs[i].Info.Kind == ARMInstrInfo::ak_UNK); - if (cpu->CheckCondition(instrs[i].Cond())) - InterpretARM[instrs[i].Info.Kind](cpu); - else - cpu->AddCycles_C(); - } - } - - instrs[i].DataCycles = cpu->DataCycles; - instrs[i].DataRegion = cpu->DataRegion; - - u32 literalAddr; - if (Config::JIT_LiteralOptimisations - && instrs[i].Info.SpecialKind == ARMInstrInfo::special_LoadLiteral - && DecodeLiteral(thumb, instrs[i], literalAddr)) - { - u32 translatedAddr = LocaliseCodeAddress(cpu->Num, literalAddr); - if (!translatedAddr) - { - printf("literal in non executable memory?\n"); - } - u32 translatedAddrRounded = translatedAddr & ~0x1FF; - - u32 j = 0; - for (; j < numAddressRanges; j++) - if (addressRanges[j] == translatedAddrRounded) - break; - if (j == numAddressRanges) - addressRanges[numAddressRanges++] = translatedAddrRounded; - addressMasks[j] |= 1 << ((translatedAddr & 0x1FF) / 16); - JIT_DEBUGPRINT("literal loading %08x %08x %08x %08x\n", literalAddr, translatedAddr, addressMasks[j], addressRanges[j]); - cpu->DataRead32(literalAddr, &literalValues[numLiterals]); - literalLoadAddrs[numLiterals++] = translatedAddr; - } - - if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0 - && instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1) - { - instrs[i - 1].Info.Kind = ARMInstrInfo::tk_BL_LONG; - instrs[i - 1].Instr = (instrs[i - 1].Instr & 0xFFFF) | (instrs[i].Instr << 16); - instrs[i - 1].Info.DstRegs = 0xC000; - instrs[i - 1].Info.SrcRegs = 0; - instrs[i - 1].Info.EndBlock = true; - i--; - } - - if (instrs[i].Info.Branches() && Config::JIT_BranchOptimisations) - { - bool hasBranched = cpu->R[15] != r15; - - bool link; - u32 cond, target, linkAddr; - bool staticBranch = DecodeBranch(thumb, instrs[i], cond, hasLink, lr, link, linkAddr, target); - JIT_DEBUGPRINT("branch cond %x target %x (%d)\n", cond, target, hasBranched); - - if (staticBranch) - { - instrs[i].BranchFlags |= branch_StaticTarget; - - bool isBackJump = false; - if (hasBranched) - { - for (int j = 0; j < i; j++) - { - if (instrs[i].Addr == target) - { - isBackJump = true; - break; - } - } - } - - if (cond < 0xE && target < instrs[i].Addr && target >= lastSegmentStart) - { - // we might have an idle loop - u32 backwardsOffset = (instrs[i].Addr - target) / (thumb ? 2 : 4); - if (IsIdleLoop(&instrs[i - backwardsOffset], backwardsOffset + 1)) - { - instrs[i].BranchFlags |= branch_IdleBranch; - JIT_DEBUGPRINT("found %s idle loop %d in block %x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr); - } - } - else if (hasBranched && !isBackJump && i + 1 < Config::JIT_MaxBlockSize) - { - if (link) - { - lr = linkAddr; - hasLink = true; - } - - r15 = target + (thumb ? 2 : 4); - assert(r15 == cpu->R[15]); - - JIT_DEBUGPRINT("block lengthened by static branch (target %x)\n", target); - - nextInstr[0] = cpu->NextInstr[0]; - nextInstr[1] = cpu->NextInstr[1]; - - nextInstrAddr[0] = target; - nextInstrAddr[1] = r15; - - lastSegmentStart = target; - - instrs[i].Info.EndBlock = false; - - if (cond < 0xE) - instrs[i].BranchFlags |= branch_FollowCondTaken; - } - } - - if (!hasBranched && cond < 0xE && i + 1 < Config::JIT_MaxBlockSize) - { - instrs[i].Info.EndBlock = false; - instrs[i].BranchFlags |= branch_FollowCondNotTaken; - } - } + assert(InterpretARM[instrs[i].Info.Kind] == ARMInterpreter::ARMInstrTable[icode] + || instrs[i].Info.Kind == ARMInstrInfo::ak_MOV_REG_LSL_IMM + || instrs[i].Info.Kind == ARMInstrInfo::ak_Nop + || instrs[i].Info.Kind == ARMInstrInfo::ak_UNK); + if (cpu->CheckCondition(instrs[i].Cond())) + InterpretARM[instrs[i].Info.Kind](cpu); + else + cpu->AddCycles_C(); + } + } + + instrs[i].DataCycles = cpu->DataCycles; + instrs[i].DataRegion = cpu->DataRegion; + + u32 literalAddr; + if (Config::JIT_LiteralOptimisations + && instrs[i].Info.SpecialKind == ARMInstrInfo::special_LoadLiteral + && DecodeLiteral(thumb, instrs[i], literalAddr)) + { + u32 translatedAddr = LocaliseCodeAddress(cpu->Num, literalAddr); + if (!translatedAddr) + { + printf("literal in non executable memory?\n"); + } + u32 translatedAddrRounded = translatedAddr & ~0x1FF; + + u32 j = 0; + for (; j < numAddressRanges; j++) + if (addressRanges[j] == translatedAddrRounded) + break; + if (j == numAddressRanges) + addressRanges[numAddressRanges++] = translatedAddrRounded; + addressMasks[j] |= 1 << ((translatedAddr & 0x1FF) / 16); + JIT_DEBUGPRINT("literal loading %08x %08x %08x %08x\n", literalAddr, translatedAddr, addressMasks[j], addressRanges[j]); + cpu->DataRead32(literalAddr, &literalValues[numLiterals]); + literalLoadAddrs[numLiterals++] = translatedAddr; + } + + if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0 + && instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1) + { + instrs[i - 1].Info.Kind = ARMInstrInfo::tk_BL_LONG; + instrs[i - 1].Instr = (instrs[i - 1].Instr & 0xFFFF) | (instrs[i].Instr << 16); + instrs[i - 1].Info.DstRegs = 0xC000; + instrs[i - 1].Info.SrcRegs = 0; + instrs[i - 1].Info.EndBlock = true; + i--; + } + + if (instrs[i].Info.Branches() && Config::JIT_BranchOptimisations) + { + bool hasBranched = cpu->R[15] != r15; + + bool link; + u32 cond, target, linkAddr; + bool staticBranch = DecodeBranch(thumb, instrs[i], cond, hasLink, lr, link, linkAddr, target); + JIT_DEBUGPRINT("branch cond %x target %x (%d)\n", cond, target, hasBranched); + + if (staticBranch) + { + instrs[i].BranchFlags |= branch_StaticTarget; + + bool isBackJump = false; + if (hasBranched) + { + for (int j = 0; j < i; j++) + { + if (instrs[i].Addr == target) + { + isBackJump = true; + break; + } + } + } + + if (cond < 0xE && target < instrs[i].Addr && target >= lastSegmentStart) + { + // we might have an idle loop + u32 backwardsOffset = (instrs[i].Addr - target) / (thumb ? 2 : 4); + if (IsIdleLoop(thumb, &instrs[i - backwardsOffset], backwardsOffset + 1)) + { + instrs[i].BranchFlags |= branch_IdleBranch; + JIT_DEBUGPRINT("found %s idle loop %d in block %08x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr); + } + } + else if (hasBranched && !isBackJump && i + 1 < Config::JIT_MaxBlockSize) + { + if (link) + { + lr = linkAddr; + hasLink = true; + } + + r15 = target + (thumb ? 2 : 4); + assert(r15 == cpu->R[15]); + + JIT_DEBUGPRINT("block lengthened by static branch (target %x)\n", target); + + nextInstr[0] = cpu->NextInstr[0]; + nextInstr[1] = cpu->NextInstr[1]; + + nextInstrAddr[0] = target; + nextInstrAddr[1] = r15; + + lastSegmentStart = target; + + instrs[i].Info.EndBlock = false; + + if (cond < 0xE) + instrs[i].BranchFlags |= branch_FollowCondTaken; + } + } + + if (!hasBranched && cond < 0xE && i + 1 < Config::JIT_MaxBlockSize) + { + instrs[i].Info.EndBlock = false; + instrs[i].BranchFlags |= branch_FollowCondNotTaken; + } + } i++; - bool canCompile = JITCompiler->CanCompile(thumb, instrs[i - 1].Info.Kind); - bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken)); - if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond) - FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF); + bool canCompile = JITCompiler->CanCompile(thumb, instrs[i - 1].Info.Kind); + bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken)); + if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond) + FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF); } while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted && (!cpu->IRQ || (cpu->CPSR & 0x80))); - u32 literalHash = (u32)XXH3_64bits(literalValues, numLiterals * 4); - u32 instrHash = (u32)XXH3_64bits(instrValues, i * 4); - - JitBlock* prevBlock = RestoreCandidates.LookUp(instrHash); - bool mayRestore = true; - if (prevBlock) - { - RestoreCandidates.Remove(instrHash); - - mayRestore = prevBlock->StartAddr == blockAddr && prevBlock->LiteralHash == literalHash; - - if (mayRestore && prevBlock->NumAddresses == numAddressRanges) - { - for (int j = 0; j < numAddressRanges; j++) - { - if (prevBlock->AddressRanges()[j] != addressRanges[j] - || prevBlock->AddressMasks()[j] != addressMasks[j]) - { - mayRestore = false; - break; - } - } - } - else - mayRestore = false; - } - else - { - mayRestore = false; - prevBlock = NULL; - } - - JitBlock* block; - if (!mayRestore) - { - if (prevBlock) - delete prevBlock; - - block = new JitBlock(cpu->Num, i, numAddressRanges, numLiterals); - block->LiteralHash = literalHash; - block->InstrHash = instrHash; - for (int j = 0; j < numAddressRanges; j++) - block->AddressRanges()[j] = addressRanges[j]; - for (int j = 0; j < numAddressRanges; j++) - block->AddressMasks()[j] = addressMasks[j]; - for (int j = 0; j < numLiterals; j++) - block->Literals()[j] = literalLoadAddrs[j]; - - block->StartAddr = blockAddr; - block->StartAddrLocal = localAddr; - - FloodFillSetFlags(instrs, i - 1, 0xF); - - block->EntryPoint = JITCompiler->CompileBlock(cpu, thumb, instrs, i); - - JIT_DEBUGPRINT("block start %p\n", block->EntryPoint); - } - else - { - JIT_DEBUGPRINT("restored! %p\n", prevBlock); - block = prevBlock; - } - - assert((localAddr & 1) == 0); - for (int j = 0; j < numAddressRanges; j++) - { - assert(addressRanges[j] == block->AddressRanges()[j]); - assert(addressMasks[j] == block->AddressMasks()[j]); - assert(addressMasks[j] != 0); - - AddressRange* region = CodeMemRegions[addressRanges[j] >> 27]; - - if (!PageContainsCode(®ion[(addressRanges[j] & 0x7FFF000) / 512])) - ARMJIT_Memory::SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true); - - AddressRange* range = ®ion[(addressRanges[j] & 0x7FFFFFF) / 512]; - range->Code |= addressMasks[j]; - range->Blocks.Add(block); - } - - if (cpu->Num == 0) - JitBlocks9[blockAddr] = block; - else - JitBlocks7[blockAddr] = block; - - u64* entry = &FastBlockLookupRegions[(localAddr >> 27)][(localAddr & 0x7FFFFFF) / 2]; - *entry = ((u64)blockAddr | cpu->Num) << 32; - *entry |= JITCompiler->SubEntryOffset(block->EntryPoint); + u32 literalHash = (u32)XXH3_64bits(literalValues, numLiterals * 4); + u32 instrHash = (u32)XXH3_64bits(instrValues, i * 4); + + JitBlock* prevBlock = RestoreCandidates.LookUp(instrHash); + bool mayRestore = true; + if (prevBlock) + { + RestoreCandidates.Remove(instrHash); + + mayRestore = prevBlock->StartAddr == blockAddr && prevBlock->LiteralHash == literalHash; + + if (mayRestore && prevBlock->NumAddresses == numAddressRanges) + { + for (int j = 0; j < numAddressRanges; j++) + { + if (prevBlock->AddressRanges()[j] != addressRanges[j] + || prevBlock->AddressMasks()[j] != addressMasks[j]) + { + mayRestore = false; + break; + } + } + } + else + mayRestore = false; + } + else + { + mayRestore = false; + prevBlock = NULL; + } + + JitBlock* block; + if (!mayRestore) + { + if (prevBlock) + delete prevBlock; + + block = new JitBlock(cpu->Num, i, numAddressRanges, numLiterals); + block->LiteralHash = literalHash; + block->InstrHash = instrHash; + for (int j = 0; j < numAddressRanges; j++) + block->AddressRanges()[j] = addressRanges[j]; + for (int j = 0; j < numAddressRanges; j++) + block->AddressMasks()[j] = addressMasks[j]; + for (int j = 0; j < numLiterals; j++) + block->Literals()[j] = literalLoadAddrs[j]; + + block->StartAddr = blockAddr; + block->StartAddrLocal = localAddr; + + FloodFillSetFlags(instrs, i - 1, 0xF); + + block->EntryPoint = JITCompiler->CompileBlock(cpu, thumb, instrs, i); + + JIT_DEBUGPRINT("block start %p\n", block->EntryPoint); + } + else + { + JIT_DEBUGPRINT("restored! %p\n", prevBlock); + block = prevBlock; + } + + assert((localAddr & 1) == 0); + for (int j = 0; j < numAddressRanges; j++) + { + assert(addressRanges[j] == block->AddressRanges()[j]); + assert(addressMasks[j] == block->AddressMasks()[j]); + assert(addressMasks[j] != 0); + + AddressRange* region = CodeMemRegions[addressRanges[j] >> 27]; + + if (!PageContainsCode(®ion[(addressRanges[j] & 0x7FFF000) / 512])) + ARMJIT_Memory::SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true); + + AddressRange* range = ®ion[(addressRanges[j] & 0x7FFFFFF) / 512]; + range->Code |= addressMasks[j]; + range->Blocks.Add(block); + } + + if (cpu->Num == 0) + JitBlocks9[blockAddr] = block; + else + JitBlocks7[blockAddr] = block; + + u64* entry = &FastBlockLookupRegions[(localAddr >> 27)][(localAddr & 0x7FFFFFF) / 2]; + *entry = ((u64)blockAddr | cpu->Num) << 32; + *entry |= JITCompiler->SubEntryOffset(block->EntryPoint); } void InvalidateByAddr(u32 localAddr) { - JIT_DEBUGPRINT("invalidating by addr %x\n", localAddr); - - AddressRange* region = CodeMemRegions[localAddr >> 27]; - AddressRange* range = ®ion[(localAddr & 0x7FFFFFF) / 512]; - u32 mask = 1 << ((localAddr & 0x1FF) / 16); - - range->Code = 0; - for (int i = 0; i < range->Blocks.Length;) - { - JitBlock* block = range->Blocks[i]; - - bool invalidated = false; - u32 mask = 0; - for (int j = 0; j < block->NumAddresses; j++) - { - if (block->AddressRanges()[j] == (localAddr & ~0x1FF)) - { - mask = block->AddressMasks()[j]; - invalidated = block->AddressMasks()[j] & mask; - assert(mask); - break; - } - } - assert(mask); - if (!invalidated) - { - range->Code |= mask; - i++; - continue; - } - range->Blocks.Remove(i); - - if (range->Blocks.Length == 0 - && !PageContainsCode(®ion[(localAddr & 0x7FFF000) / 512])) - { - ARMJIT_Memory::SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false); - } - - bool literalInvalidation = false; - for (int j = 0; j < block->NumLiterals; j++) - { - u32 addr = block->Literals()[j]; - if (addr == localAddr) - { - if (InvalidLiterals.Find(localAddr) != -1) - { - InvalidLiterals.Add(localAddr); - JIT_DEBUGPRINT("found invalid literal %d\n", InvalidLiterals.Length); - } - literalInvalidation = true; - break; - } - } - for (int j = 0; j < block->NumAddresses; j++) - { - u32 addr = block->AddressRanges()[j]; - if ((addr / 512) != (localAddr / 512)) - { - AddressRange* otherRegion = CodeMemRegions[addr >> 27]; - AddressRange* otherRange = &otherRegion[(addr & 0x7FFFFFF) / 512]; - assert(otherRange != range); - - bool removed = otherRange->Blocks.RemoveByValue(block); - assert(removed); - - if (otherRange->Blocks.Length == 0) - { - if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000) / 512])) - ARMJIT_Memory::SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false); - - otherRange->Code = 0; - } - } - } - - FastBlockLookupRegions[block->StartAddrLocal >> 27][(block->StartAddrLocal & 0x7FFFFFF) / 2] = (u64)UINT32_MAX << 32; - if (block->Num == 0) - JitBlocks9.erase(block->StartAddr); - else - JitBlocks7.erase(block->StartAddr); - - if (!literalInvalidation) - { - JitBlock* prevBlock = RestoreCandidates.Insert(block->InstrHash, block); - if (prevBlock) - delete prevBlock; - } - else - { - delete block; - } - } + JIT_DEBUGPRINT("invalidating by addr %x\n", localAddr); + + AddressRange* region = CodeMemRegions[localAddr >> 27]; + AddressRange* range = ®ion[(localAddr & 0x7FFFFFF) / 512]; + u32 mask = 1 << ((localAddr & 0x1FF) / 16); + + range->Code = 0; + for (int i = 0; i < range->Blocks.Length;) + { + JitBlock* block = range->Blocks[i]; + + bool invalidated = false; + u32 mask = 0; + for (int j = 0; j < block->NumAddresses; j++) + { + if (block->AddressRanges()[j] == (localAddr & ~0x1FF)) + { + mask = block->AddressMasks()[j]; + invalidated = block->AddressMasks()[j] & mask; + assert(mask); + break; + } + } + assert(mask); + if (!invalidated) + { + range->Code |= mask; + i++; + continue; + } + range->Blocks.Remove(i); + + if (range->Blocks.Length == 0 + && !PageContainsCode(®ion[(localAddr & 0x7FFF000) / 512])) + { + ARMJIT_Memory::SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false); + } + + bool literalInvalidation = false; + for (int j = 0; j < block->NumLiterals; j++) + { + u32 addr = block->Literals()[j]; + if (addr == localAddr) + { + if (InvalidLiterals.Find(localAddr) != -1) + { + InvalidLiterals.Add(localAddr); + JIT_DEBUGPRINT("found invalid literal %d\n", InvalidLiterals.Length); + } + literalInvalidation = true; + break; + } + } + for (int j = 0; j < block->NumAddresses; j++) + { + u32 addr = block->AddressRanges()[j]; + if ((addr / 512) != (localAddr / 512)) + { + AddressRange* otherRegion = CodeMemRegions[addr >> 27]; + AddressRange* otherRange = &otherRegion[(addr & 0x7FFFFFF) / 512]; + assert(otherRange != range); + + bool removed = otherRange->Blocks.RemoveByValue(block); + assert(removed); + + if (otherRange->Blocks.Length == 0) + { + if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000) / 512])) + ARMJIT_Memory::SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false); + + otherRange->Code = 0; + } + } + } + + FastBlockLookupRegions[block->StartAddrLocal >> 27][(block->StartAddrLocal & 0x7FFFFFF) / 2] = (u64)UINT32_MAX << 32; + if (block->Num == 0) + JitBlocks9.erase(block->StartAddr); + else + JitBlocks7.erase(block->StartAddr); + + if (!literalInvalidation) + { + JitBlock* prevBlock = RestoreCandidates.Insert(block->InstrHash, block); + if (prevBlock) + delete prevBlock; + } + else + { + delete block; + } + } } void CheckAndInvalidateITCM() { - for (u32 i = 0; i < ITCMPhysicalSize; i+=16) - { - if (CodeIndexITCM[i / 512].Code & (1 << ((i & 0x1FF) / 16))) - { - InvalidateByAddr(i | (ARMJIT_Memory::memregion_ITCM << 27)); - } - } + for (u32 i = 0; i < ITCMPhysicalSize; i+=16) + { + if (CodeIndexITCM[i / 512].Code & (1 << ((i & 0x1FF) / 16))) + { + InvalidateByAddr(i | (ARMJIT_Memory::memregion_ITCM << 27)); + } + } } template <u32 num, int region> void CheckAndInvalidate(u32 addr) { - u32 localAddr = ARMJIT_Memory::LocaliseAddress(region, num, addr); - if (CodeMemRegions[region][(localAddr & 0x7FFFFFF) / 512].Code & (1 << ((localAddr & 0x1FF) / 16))) - InvalidateByAddr(localAddr); + u32 localAddr = ARMJIT_Memory::LocaliseAddress(region, num, addr); + if (CodeMemRegions[region][(localAddr & 0x7FFFFFF) / 512].Code & (1 << ((localAddr & 0x1FF) / 16))) + InvalidateByAddr(localAddr); } JitBlockEntry LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr) { - u64* entry = &entries[offset / 2]; - if (*entry >> 32 == (addr | num)) - return JITCompiler->AddEntryOffset((u32)*entry); - return NULL; + u64* entry = &entries[offset / 2]; + if (*entry >> 32 == (addr | num)) + return JITCompiler->AddEntryOffset((u32)*entry); + return NULL; } void blockSanityCheck(u32 num, u32 blockAddr, JitBlockEntry entry) { - u32 localAddr = LocaliseCodeAddress(num, blockAddr); - assert(JITCompiler->AddEntryOffset((u32)FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]) == entry); + u32 localAddr = LocaliseCodeAddress(num, blockAddr); + assert(JITCompiler->AddEntryOffset((u32)FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]) == entry); } bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size) { - // amazingly ignoring the DTCM is the proper behaviour for code fetches - int region = num == 0 - ? ARMJIT_Memory::ClassifyAddress9(blockAddr) - : ARMJIT_Memory::ClassifyAddress7(blockAddr); - - u32 memoryOffset; - if (FastBlockLookupRegions[region] - && ARMJIT_Memory::GetMirrorLocation(region, num, blockAddr, memoryOffset, start, size)) - { - //printf("setup exec region %d %d %08x %08x %x %x\n", num, region, blockAddr, start, size, memoryOffset); - entry = FastBlockLookupRegions[region] + memoryOffset / 2; - return true; - } - return false; + // amazingly ignoring the DTCM is the proper behaviour for code fetches + int region = num == 0 + ? ARMJIT_Memory::ClassifyAddress9(blockAddr) + : ARMJIT_Memory::ClassifyAddress7(blockAddr); + + u32 memoryOffset; + if (FastBlockLookupRegions[region] + && ARMJIT_Memory::GetMirrorLocation(region, num, blockAddr, memoryOffset, start, size)) + { + //printf("setup exec region %d %d %08x %08x %x %x\n", num, region, blockAddr, start, size, memoryOffset); + entry = FastBlockLookupRegions[region] + memoryOffset / 2; + return true; + } + return false; } template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(u32); @@ -1155,52 +1157,56 @@ template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(u3 void ResetBlockCache() { - printf("Resetting JIT block cache...\n"); - - InvalidLiterals.Clear(); - for (int i = 0; i < ARMJIT_Memory::memregions_Count; i++) - memset(FastBlockLookupRegions[i], 0xFF, CodeRegionSizes[i] * sizeof(u64) / 2); - RestoreCandidates.Reset(); - for (int i = 0; i < sizeof(RestoreCandidates.Table)/sizeof(RestoreCandidates.Table[0]); i++) - { - if (RestoreCandidates.Table[i].ValA) - { - delete RestoreCandidates.Table[i].ValA; - RestoreCandidates.Table[i].ValA = NULL; - } - if (RestoreCandidates.Table[i].ValA) - { - delete RestoreCandidates.Table[i].ValB; - RestoreCandidates.Table[i].ValB = NULL; - } - } - for (auto it : JitBlocks9) - { - JitBlock* block = it.second; - for (int j = 0; j < block->NumAddresses; j++) - { - u32 addr = block->AddressRanges()[j]; - AddressRange* range = &CodeMemRegions[addr >> 27][(addr & 0x7FFFFFF) / 512]; - range->Blocks.Clear(); - range->Code = 0; - } - delete block; - } - for (auto it : JitBlocks7) - { - JitBlock* block = it.second; - for (int j = 0; j < block->NumAddresses; j++) - { - u32 addr = block->AddressRanges()[j]; - AddressRange* range = &CodeMemRegions[addr >> 27][(addr & 0x7FFFFFF) / 512]; - range->Blocks.Clear(); - range->Code = 0; - } - } - JitBlocks9.clear(); - JitBlocks7.clear(); - - JITCompiler->Reset(); + printf("Resetting JIT block cache...\n"); + + // could be replace through a function which only resets + // the permissions but we're too lazy + ARMJIT_Memory::Reset(); + + InvalidLiterals.Clear(); + for (int i = 0; i < ARMJIT_Memory::memregions_Count; i++) + memset(FastBlockLookupRegions[i], 0xFF, CodeRegionSizes[i] * sizeof(u64) / 2); + RestoreCandidates.Reset(); + for (int i = 0; i < sizeof(RestoreCandidates.Table)/sizeof(RestoreCandidates.Table[0]); i++) + { + if (RestoreCandidates.Table[i].ValA) + { + delete RestoreCandidates.Table[i].ValA; + RestoreCandidates.Table[i].ValA = NULL; + } + if (RestoreCandidates.Table[i].ValA) + { + delete RestoreCandidates.Table[i].ValB; + RestoreCandidates.Table[i].ValB = NULL; + } + } + for (auto it : JitBlocks9) + { + JitBlock* block = it.second; + for (int j = 0; j < block->NumAddresses; j++) + { + u32 addr = block->AddressRanges()[j]; + AddressRange* range = &CodeMemRegions[addr >> 27][(addr & 0x7FFFFFF) / 512]; + range->Blocks.Clear(); + range->Code = 0; + } + delete block; + } + for (auto it : JitBlocks7) + { + JitBlock* block = it.second; + for (int j = 0; j < block->NumAddresses; j++) + { + u32 addr = block->AddressRanges()[j]; + AddressRange* range = &CodeMemRegions[addr >> 27][(addr & 0x7FFFFFF) / 512]; + range->Blocks.Clear(); + range->Code = 0; + } + } + JitBlocks9.clear(); + JitBlocks7.clear(); + + JITCompiler->Reset(); } } diff --git a/src/ARMJIT_A64/ARMJIT_ALU.cpp b/src/ARMJIT_A64/ARMJIT_ALU.cpp index 5f021a0..26a89cb 100644 --- a/src/ARMJIT_A64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_A64/ARMJIT_ALU.cpp @@ -434,6 +434,19 @@ void Compiler::A_Comp_GetOp2(bool S, Op2& op2) if (CurInstr.Instr & (1 << 25)) { Comp_AddCycles_C(); + + u32 shift = (CurInstr.Instr >> 7) & 0x1E; + u32 imm = ROR(CurInstr.Instr & 0xFF, shift); + + if (S && shift && (CurInstr.SetFlags & 0x2)) + { + CPSRDirty = true; + if (imm & 0x80000000) + ORRI2R(RCPSR, RCPSR, 1 << 29); + else + ANDI2R(RCPSR, RCPSR, ~(1 << 29)); + } + op2 = Op2(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E)); } else diff --git a/src/ARMJIT_A64/ARMJIT_Branch.cpp b/src/ARMJIT_A64/ARMJIT_Branch.cpp index f130938..117eaa0 100644 --- a/src/ARMJIT_A64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_A64/ARMJIT_Branch.cpp @@ -143,7 +143,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } @@ -181,7 +181,7 @@ void* Compiler::Gen_JumpTo9(int kind) STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15])); ADD(W1, W1, W1); - SUB(RCycles, RCycles, W1); + ADD(RCycles, RCycles, W1); RET(); } @@ -201,7 +201,7 @@ void* Compiler::Gen_JumpTo9(int kind) ADD(W2, W1, W1); TSTI2R(W0, 0x2); CSEL(W1, W1, W2, CC_EQ); - SUB(RCycles, RCycles, W1); + ADD(RCycles, RCycles, W1); RET(); } @@ -229,7 +229,7 @@ void* Compiler::Gen_JumpTo7(int kind) UBFX(W2, W3, 0, 8); UBFX(W3, W3, 8, 8); ADD(W2, W3, W2); - SUB(RCycles, RCycles, W2); + ADD(RCycles, RCycles, W2); ANDI2R(W0, W0, ~3); @@ -253,7 +253,7 @@ void* Compiler::Gen_JumpTo7(int kind) UBFX(W2, W3, 16, 8); UBFX(W3, W3, 24, 8); ADD(W2, W3, W2); - SUB(RCycles, RCycles, W2); + ADD(RCycles, RCycles, W2); ANDI2R(W0, W0, ~1); diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp index 62323ff..b046123 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp @@ -312,59 +312,93 @@ Compiler::Compiler() RET(); } - for (int num = 0; num < 2; num++) + for (int consoleType = 0; consoleType < 2; consoleType++) { - for (int size = 0; size < 3; size++) + for (int num = 0; num < 2; num++) { - for (int reg = 0; reg < 8; reg++) + for (int size = 0; size < 3; size++) { - ARM64Reg rdMapped = (ARM64Reg)(W19 + reg); - PatchedStoreFuncs[num][size][reg] = GetRXPtr(); - if (num == 0) + for (int reg = 0; reg < 8; reg++) { - MOV(X1, RCPU); - MOV(W2, rdMapped); - } - else - { - MOV(W1, rdMapped); - } - ABI_PushRegisters({30}); - switch ((8 << size) | num) - { - case 32: QuickCallFunction(X3, SlowWrite9<u32>); break; - case 33: QuickCallFunction(X3, SlowWrite7<u32>); break; - case 16: QuickCallFunction(X3, SlowWrite9<u16>); break; - case 17: QuickCallFunction(X3, SlowWrite7<u16>); break; - case 8: QuickCallFunction(X3, SlowWrite9<u8>); break; - case 9: QuickCallFunction(X3, SlowWrite7<u8>); break; - } - ABI_PopRegisters({30}); - RET(); - - for (int signextend = 0; signextend < 2; signextend++) - { - PatchedLoadFuncs[num][size][signextend][reg] = GetRXPtr(); + ARM64Reg rdMapped = (ARM64Reg)(W19 + reg); + PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr(); if (num == 0) + { MOV(X1, RCPU); + MOV(W2, rdMapped); + } + else + { + MOV(W1, rdMapped); + } ABI_PushRegisters({30}); - switch ((8 << size) | num) + if (consoleType == 0) { - case 32: QuickCallFunction(X3, SlowRead9<u32>); break; - case 33: QuickCallFunction(X3, SlowRead7<u32>); break; - case 16: QuickCallFunction(X3, SlowRead9<u16>); break; - case 17: QuickCallFunction(X3, SlowRead7<u16>); break; - case 8: QuickCallFunction(X3, SlowRead9<u8>); break; - case 9: QuickCallFunction(X3, SlowRead7<u8>); break; + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowWrite9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowWrite7<u32, 0>); break; + case 16: QuickCallFunction(X3, SlowWrite9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowWrite7<u16, 0>); break; + case 8: QuickCallFunction(X3, SlowWrite9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowWrite7<u8, 0>); break; + } } - ABI_PopRegisters({30}); - if (size == 32) - MOV(rdMapped, W0); - else if (signextend) - SBFX(rdMapped, W0, 0, 8 << size); else - UBFX(rdMapped, W0, 0, 8 << size); + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowWrite9<u32, 1>); break; + case 33: QuickCallFunction(X3, SlowWrite7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowWrite9<u16, 1>); break; + case 17: QuickCallFunction(X3, SlowWrite7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowWrite9<u8, 1>); break; + case 9: QuickCallFunction(X3, SlowWrite7<u8, 1>); break; + } + } + + ABI_PopRegisters({30}); RET(); + + for (int signextend = 0; signextend < 2; signextend++) + { + PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr(); + if (num == 0) + MOV(X1, RCPU); + ABI_PushRegisters({30}); + if (consoleType == 0) + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowRead9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowRead7<u32, 0>); break; + case 16: QuickCallFunction(X3, SlowRead9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowRead7<u16, 0>); break; + case 8: QuickCallFunction(X3, SlowRead9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowRead7<u8, 0>); break; + } + } + else + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowRead9<u32, 1>); break; + case 33: QuickCallFunction(X3, SlowRead7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowRead9<u16, 1>); break; + case 17: QuickCallFunction(X3, SlowRead7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowRead9<u8, 1>); break; + case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break; + } + } + ABI_PopRegisters({30}); + if (size == 32) + MOV(rdMapped, W0); + else if (signextend) + SBFX(rdMapped, W0, 0, 8 << size); + else + UBFX(rdMapped, W0, 0, 8 << size); + RET(); + } } } } @@ -595,7 +629,7 @@ void Compiler::Comp_BranchSpecialBehaviour(bool taken) { RegCache.PrepareExit(); - SUB(RCycles, RCycles, ConstantCycles); + ADD(RCycles, RCycles, ConstantCycles); QuickTailCall(X0, ARM_Ret); } } @@ -736,7 +770,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] RegCache.Flush(); - SUB(RCycles, RCycles, ConstantCycles); + ADD(RCycles, RCycles, ConstantCycles); QuickTailCall(X0, ARM_Ret); FlushIcache(); @@ -766,7 +800,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant) if (forceNonConstant) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CI(u32 numI) @@ -780,7 +814,7 @@ void Compiler::Comp_AddCycles_CI(u32 numI) if (Thumb || CurInstr.Cond() == 0xE) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift) @@ -791,11 +825,11 @@ void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift) NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c; - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); if (Thumb || CurInstr.Cond() >= 0xE) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CDI() @@ -832,7 +866,7 @@ void Compiler::Comp_AddCycles_CDI() } if (!Thumb && CurInstr.Cond() < 0xE) - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); else ConstantCycles += cycles; } @@ -876,7 +910,7 @@ void Compiler::Comp_AddCycles_CD() } if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles) - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); else ConstantCycles += cycles; } diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h index e4ffc63..0e7d54c 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.h +++ b/src/ARMJIT_A64/ARMJIT_Compiler.h @@ -247,9 +247,9 @@ public: std::unordered_map<ptrdiff_t, LoadStorePatch> LoadStorePatches; - // [Num][Size][Sign Extend][Output register] - void* PatchedLoadFuncs[2][3][2][8]; - void* PatchedStoreFuncs[2][3][8]; + // [Console Type][Num][Size][Sign Extend][Output register] + void* PatchedLoadFuncs[2][2][3][2][8]; + void* PatchedStoreFuncs[2][2][3][8]; RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache; diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.s b/src/ARMJIT_A64/ARMJIT_Linkage.s index 536a478..7886315 100644 --- a/src/ARMJIT_A64/ARMJIT_Linkage.s +++ b/src/ARMJIT_A64/ARMJIT_Linkage.s @@ -2,9 +2,9 @@ .text -#define RCPSR W27 -#define RCycles W28 -#define RCPU X29 +#define RCPSR w27 +#define RCycles w28 +#define RCPU x29 .p2align 4,,15 diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index bdd9f43..6140ffc 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -174,8 +174,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) LoadStorePatch patch; patch.PatchFunc = flags & memop_Store - ? PatchedStoreFuncs[Num][__builtin_ctz(size) - 3][rdMapped - W19] - : PatchedLoadFuncs[Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19]; + ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped - W19] + : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19]; assert(rdMapped - W19 >= 0 && rdMapped - W19 < 8); MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); @@ -241,20 +241,26 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) if (flags & memop_Store) { MOV(W2, rdMapped); - switch (size) + switch (size | NDS::ConsoleType) { - case 32: QuickCallFunction(X3, SlowWrite9<u32>); break; - case 16: QuickCallFunction(X3, SlowWrite9<u16>); break; - case 8: QuickCallFunction(X3, SlowWrite9<u8>); break; + case 32: QuickCallFunction(X3, SlowWrite9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowWrite9<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowWrite9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowWrite9<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowWrite9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowWrite9<u8, 1>); break; } } else { - switch (size) + switch (size | NDS::ConsoleType) { - case 32: QuickCallFunction(X3, SlowRead9<u32>); break; - case 16: QuickCallFunction(X3, SlowRead9<u16>); break; - case 8: QuickCallFunction(X3, SlowRead9<u8>); break; + case 32: QuickCallFunction(X3, SlowRead9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowRead9<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowRead9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowRead9<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowRead9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowRead9<u8, 1>); break; } } } @@ -263,20 +269,26 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) if (flags & memop_Store) { MOV(W1, rdMapped); - switch (size) + switch (size | NDS::ConsoleType) { - case 32: QuickCallFunction(X3, SlowWrite7<u32>); break; - case 16: QuickCallFunction(X3, SlowWrite7<u16>); break; - case 8: QuickCallFunction(X3, SlowWrite7<u8>); break; + case 32: QuickCallFunction(X3, SlowWrite7<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowWrite7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowWrite7<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowWrite7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowWrite7<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowWrite7<u8, 1>); break; } } else { - switch (size) + switch (size | NDS::ConsoleType) { - case 32: QuickCallFunction(X3, SlowRead7<u32>); break; - case 16: QuickCallFunction(X3, SlowRead7<u16>); break; - case 8: QuickCallFunction(X3, SlowRead7<u8>); break; + case 32: QuickCallFunction(X3, SlowRead7<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowRead7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowRead7<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowRead7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowRead7<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break; } } } @@ -465,15 +477,25 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (decrement) { - SUB(W0, MapReg(rn), regsCount * 4); - ANDI2R(W0, W0, ~3); - preinc ^= true; + s32 offset = -regsCount * 4 + (preinc ? 0 : 4); + if (offset) + { + ADDI2R(W0, MapReg(rn), offset); + ANDI2R(W0, W0, ~3); + } + else + { + ANDI2R(W0, MapReg(rn), ~3); + } } else { ANDI2R(W0, MapReg(rn), ~3); + if (preinc) + ADD(W0, W0, 4); } + u8* patchFunc; if (compileFastPath) { ptrdiff_t fastPathStart = GetCodeOffset(); @@ -482,7 +504,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); ADD(X1, X1, X0); - u32 offset = preinc ? 4 : 0; + u32 offset = 0; BitSet16::Iterator it = regs.begin(); u32 i = 0; @@ -545,7 +567,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc LoadStorePatch patch; patch.PatchSize = GetCodeOffset() - fastPathStart; SwapCodeRegion(); - patch.PatchFunc = GetRXPtr(); + patchFunc = (u8*)GetRXPtr(); + patch.PatchFunc = patchFunc; for (i = 0; i < regsCount; i++) { patch.PatchOffset = fastPathStart - loadStoreOffsets[i]; @@ -620,22 +643,22 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (Num == 0) { MOV(X3, RCPU); - switch (preinc * 2 | store) + switch ((u32)store * 2 | NDS::ConsoleType) { - case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, false>); break; - case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, true>); break; - case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, false>); break; - case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, true>); break; + case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, 0>); break; + case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, 1>); break; + case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, 0>); break; + case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, 1>); break; } } else { - switch (preinc * 2 | store) + switch ((u32)store * 2 | NDS::ConsoleType) { - case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, false>); break; - case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, true>); break; - case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, false>); break; - case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, true>); break; + case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, 0>); break; + case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, 1>); break; + case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, 0>); break; + case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, 1>); break; } } @@ -705,7 +728,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc ABI_PopRegisters({30}); RET(); - FlushIcacheSection((u8*)patch.PatchFunc, (u8*)GetRXPtr()); + FlushIcacheSection(patchFunc, (u8*)GetRXPtr()); SwapCodeRegion(); } diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h index c87e1b3..4244470 100644 --- a/src/ARMJIT_Internal.h +++ b/src/ARMJIT_Internal.h @@ -16,10 +16,10 @@ namespace ARMJIT enum { - branch_IdleBranch = 1 << 0, - branch_FollowCondTaken = 1 << 1, - branch_FollowCondNotTaken = 1 << 2, - branch_StaticTarget = 1 << 3, + branch_IdleBranch = 1 << 0, + branch_FollowCondTaken = 1 << 1, + branch_FollowCondNotTaken = 1 << 2, + branch_StaticTarget = 1 << 3, }; struct FetchedInstr @@ -39,155 +39,156 @@ struct FetchedInstr return Instr >> 28; } - u8 BranchFlags; - u8 SetFlags; + u8 BranchFlags; + u8 SetFlags; u32 Instr; - u32 Addr; + u32 Addr; - u8 DataCycles; + u8 DataCycles; u16 CodeCycles; - u32 DataRegion; + u32 DataRegion; ARMInstrInfo::Info Info; }; /* - TinyVector - - because reinventing the wheel is the best! - - - meant to be used very often, with not so many elements - max 1 << 16 elements - - doesn't allocate while no elements are inserted - - not stl confirmant of course - - probably only works with POD types - - remove operations don't preserve order, but O(1)! + TinyVector + - because reinventing the wheel is the best! + + - meant to be used very often, with not so many elements + max 1 << 16 elements + - doesn't allocate while no elements are inserted + - not stl confirmant of course + - probably only works with POD types + - remove operations don't preserve order, but O(1)! */ template <typename T> struct __attribute__((packed)) TinyVector { - T* Data = NULL; - u16 Capacity = 0; - u16 Length = 0; - - ~TinyVector() - { - delete[] Data; - } - - void MakeCapacity(u32 capacity) - { - assert(capacity <= UINT16_MAX); - assert(capacity > Capacity); - T* newMem = new T[capacity]; - if (Data != NULL) - memcpy(newMem, Data, sizeof(T) * Length); - - T* oldData = Data; - Data = newMem; - if (oldData != NULL) - delete[] oldData; - - Capacity = capacity; - } - - void SetLength(u16 length) - { - if (Capacity < length) - MakeCapacity(length); - - Length = length; - } - - void Clear() - { - Length = 0; - } - - void Add(T element) - { - assert(Length + 1 <= UINT16_MAX); - if (Length + 1 > Capacity) - MakeCapacity(((Capacity + 4) * 3) / 2); - - Data[Length++] = element; - } - - void Remove(int index) - { - assert(index >= 0 && index < Length); - - Length--; - Data[index] = Data[Length]; - /*for (int i = index; i < Length; i++) - Data[i] = Data[i + 1];*/ - } - - int Find(T needle) - { - for (int i = 0; i < Length; i++) - { - if (Data[i] == needle) - return i; - } - return -1; - } - - bool RemoveByValue(T needle) - { - for (int i = 0; i < Length; i++) - { - if (Data[i] == needle) - { - Remove(i); - return true; - } - } - return false; - } - - T& operator[](int index) - { - assert(index >= 0 && index < Length); - return Data[index]; - } + T* Data = NULL; + u16 Capacity = 0; + u16 Length = 0; + + ~TinyVector() + { + delete[] Data; + } + + void MakeCapacity(u32 capacity) + { + assert(capacity <= UINT16_MAX); + assert(capacity > Capacity); + T* newMem = new T[capacity]; + if (Data != NULL) + memcpy(newMem, Data, sizeof(T) * Length); + + T* oldData = Data; + Data = newMem; + if (oldData != NULL) + delete[] oldData; + + Capacity = capacity; + } + + void SetLength(u16 length) + { + if (Capacity < length) + MakeCapacity(length); + + Length = length; + } + + void Clear() + { + Length = 0; + } + + void Add(T element) + { + assert(Length + 1 <= UINT16_MAX); + if (Length + 1 > Capacity) + MakeCapacity(((Capacity + 4) * 3) / 2); + + Data[Length++] = element; + } + + void Remove(int index) + { + assert(Length > 0); + assert(index >= 0 && index < Length); + + Length--; + Data[index] = Data[Length]; + /*for (int i = index; i < Length; i++) + Data[i] = Data[i + 1];*/ + } + + int Find(T needle) + { + for (int i = 0; i < Length; i++) + { + if (Data[i] == needle) + return i; + } + return -1; + } + + bool RemoveByValue(T needle) + { + for (int i = 0; i < Length; i++) + { + if (Data[i] == needle) + { + Remove(i); + return true; + } + } + return false; + } + + T& operator[](int index) + { + assert(index >= 0 && index < Length); + return Data[index]; + } }; class JitBlock { public: - JitBlock(u32 num, u32 literalHash, u32 numAddresses, u32 numLiterals) - { - Num = num; - NumAddresses = numAddresses; - NumLiterals = numLiterals; - Data.SetLength(numAddresses * 2 + numLiterals); - } - - u32 StartAddr; - u32 StartAddrLocal; - u32 InstrHash, LiteralHash; - u8 Num; - u16 NumAddresses; - u16 NumLiterals; - - JitBlockEntry EntryPoint; - - u32* AddressRanges() - { return &Data[0]; } - u32* AddressMasks() - { return &Data[NumAddresses]; } - u32* Literals() - { return &Data[NumAddresses * 2]; } + JitBlock(u32 num, u32 literalHash, u32 numAddresses, u32 numLiterals) + { + Num = num; + NumAddresses = numAddresses; + NumLiterals = numLiterals; + Data.SetLength(numAddresses * 2 + numLiterals); + } + + u32 StartAddr; + u32 StartAddrLocal; + u32 InstrHash, LiteralHash; + u8 Num; + u16 NumAddresses; + u16 NumLiterals; + + JitBlockEntry EntryPoint; + + u32* AddressRanges() + { return &Data[0]; } + u32* AddressMasks() + { return &Data[NumAddresses]; } + u32* Literals() + { return &Data[NumAddresses * 2]; } private: - TinyVector<u32> Data; + TinyVector<u32> Data; }; // size should be 16 bytes because I'm to lazy to use mul and whatnot struct __attribute__((packed)) AddressRange { - TinyVector<JitBlock*> Blocks; - u32 Code; + TinyVector<JitBlock*> Blocks; + u32 Code; }; @@ -201,12 +202,12 @@ extern AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count]; inline bool PageContainsCode(AddressRange* range) { - for (int i = 0; i < 8; i++) - { - if (range[i].Blocks.Length > 0) - return true; - } - return false; + for (int i = 0; i < 8; i++) + { + if (range[i].Blocks.Length > 0) + return true; + } + return false; } u32 LocaliseCodeAddress(u32 num, u32 addr); diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index ec83905..35cfdf0 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -25,22 +25,22 @@ #include <malloc.h> /* - We're handling fastmem here. + We're handling fastmem here. - Basically we're repurposing a big piece of virtual memory - and map the memory regions as they're structured on the DS - in it. + Basically we're repurposing a big piece of virtual memory + and map the memory regions as they're structured on the DS + in it. - On most systems you have a single piece of main ram, - maybe some video ram and faster cache RAM and that's about it. - Here we have not only a lot more different memory regions, - but also two address spaces. Not only that but they all have - mirrors (the worst case is 16kb SWRAM which is mirrored 1024x). + On most systems you have a single piece of main ram, + maybe some video ram and faster cache RAM and that's about it. + Here we have not only a lot more different memory regions, + but also two address spaces. Not only that but they all have + mirrors (the worst case is 16kb SWRAM which is mirrored 1024x). - We handle this by only mapping those regions which are actually - used and by praying the games don't go wild. + We handle this by only mapping those regions which are actually + used and by praying the games don't go wild. - Beware, this file is full of platform specific code. + Beware, this file is full of platform specific code. */ @@ -48,8 +48,8 @@ namespace ARMJIT_Memory { struct FaultDescription { - u32 EmulatedFaultAddr; - u64 FaultPC; + u32 EmulatedFaultAddr; + u64 FaultPC; }; bool FaultHandler(FaultDescription* faultDesc, s32& offset); @@ -61,7 +61,7 @@ bool FaultHandler(FaultDescription* faultDesc, s32& offset); extern "C" { - + void ARM_RestoreContext(u64* registers) __attribute__((noreturn)); extern char __start__; @@ -72,35 +72,35 @@ u64 __nx_exception_stack_size = 0x8000; void __libnx_exception_handler(ThreadExceptionDump* ctx) { - ARMJIT_Memory::FaultDescription desc; - u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); - desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea; - desc.FaultPC = ctx->pc.x; - - u64 integerRegisters[33]; - memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29); - integerRegisters[29] = ctx->fp.x; - integerRegisters[30] = ctx->lr.x; - integerRegisters[31] = ctx->sp.x; - integerRegisters[32] = ctx->pc.x; - - s32 offset; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) - { - integerRegisters[32] += offset; - - ARM_RestoreContext(integerRegisters); - } - - if (ctx->pc.x >= (u64)&__start__ && ctx->pc.x < (u64)&__rodata_start) - { - printf("unintentional fault in .text at 0x%x (type %d) (trying to access 0x%x?)\n", - ctx->pc.x - (u64)&__start__, ctx->error_desc, ctx->far.x); - } - else - { - printf("unintentional fault somewhere in deep (address) space at %x (type %d)\n", ctx->pc.x, ctx->error_desc); - } + ARMJIT_Memory::FaultDescription desc; + u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea; + desc.FaultPC = ctx->pc.x; + + u64 integerRegisters[33]; + memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29); + integerRegisters[29] = ctx->fp.x; + integerRegisters[30] = ctx->lr.x; + integerRegisters[31] = ctx->sp.x; + integerRegisters[32] = ctx->pc.x; + + s32 offset; + if (ARMJIT_Memory::FaultHandler(&desc, offset)) + { + integerRegisters[32] += offset; + + ARM_RestoreContext(integerRegisters); + } + + if (ctx->pc.x >= (u64)&__start__ && ctx->pc.x < (u64)&__rodata_start) + { + printf("unintentional fault in .text at 0x%x (type %d) (trying to access 0x%x?)\n", + ctx->pc.x - (u64)&__start__, ctx->error_desc, ctx->far.x); + } + else + { + printf("unintentional fault somewhere in deep (address) space at %x (type %d)\n", ctx->pc.x, ctx->error_desc); + } } } @@ -109,25 +109,24 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx) static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) { - if (exceptionInfo->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) - { - printf("narg\n"); - return EXCEPTION_CONTINUE_SEARCH; - } - - ARMJIT_Memory::FaultDescription desc; - u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); - desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea; - desc.FaultPC = exceptionInfo->ContextRecord->Rip; - - s32 offset = 0; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) - { - exceptionInfo->ContextRecord->Rip += offset; - return EXCEPTION_CONTINUE_EXECUTION; - } - - return EXCEPTION_CONTINUE_SEARCH; + if (exceptionInfo->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) + { + return EXCEPTION_CONTINUE_SEARCH; + } + + ARMJIT_Memory::FaultDescription desc; + u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea; + desc.FaultPC = exceptionInfo->ContextRecord->Rip; + + s32 offset = 0; + if (ARMJIT_Memory::FaultHandler(&desc, offset)) + { + exceptionInfo->ContextRecord->Rip += offset; + return EXCEPTION_CONTINUE_EXECUTION; + } + + return EXCEPTION_CONTINUE_SEARCH; } #else @@ -137,28 +136,28 @@ struct sigaction OldSa; static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) { - ucontext_t* context = (ucontext_t*)rawContext; - - ARMJIT_Memory::FaultDescription desc; - u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + ucontext_t* context = (ucontext_t*)rawContext; + + ARMJIT_Memory::FaultDescription desc; + u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); #ifdef __x86_64__ - desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; - desc.FaultPC = context->uc_mcontext.gregs[REG_RIP]; + desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; + desc.FaultPC = context->uc_mcontext.gregs[REG_RIP]; #else - desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea; - desc.FaultPC = context->uc_mcontext.pc; + desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea; + desc.FaultPC = context->uc_mcontext.pc; #endif - s32 offset = 0; - if (ARMJIT_Memory::FaultHandler(&desc, offset)) - { + s32 offset = 0; + if (ARMJIT_Memory::FaultHandler(&desc, offset)) + { #ifdef __x86_64__ - context->uc_mcontext.gregs[REG_RIP] += offset; + context->uc_mcontext.gregs[REG_RIP] += offset; #else - context->uc_mcontext.pc += offset; + context->uc_mcontext.pc += offset; #endif - return; - } + return; + } if (OldSa.sa_flags & SA_SIGINFO) { @@ -188,12 +187,12 @@ void* FastMem9Start, *FastMem7Start; #ifdef _WIN32 inline u32 RoundUp(u32 size) { - return (size + 0xFFFF) & ~0xFFFF; + return (size + 0xFFFF) & ~0xFFFF; } #else inline u32 RoundUp(u32 size) { - return size; + return size; } #endif @@ -208,32 +207,32 @@ const u32 MemoryTotalSize = MemBlockNWRAM_COffset + RoundUp(DSi::NWRAMSize); const u32 OffsetsPerRegion[memregions_Count] = { - UINT32_MAX, - UINT32_MAX, - MemBlockDTCMOffset, - UINT32_MAX, - MemBlockMainRAMOffset, - MemBlockSWRAMOffset, - UINT32_MAX, - UINT32_MAX, - UINT32_MAX, - MemBlockARM7WRAMOffset, - UINT32_MAX, - UINT32_MAX, - UINT32_MAX, - UINT32_MAX, - UINT32_MAX, - MemBlockNWRAM_AOffset, - MemBlockNWRAM_BOffset, - MemBlockNWRAM_COffset + UINT32_MAX, + UINT32_MAX, + MemBlockDTCMOffset, + UINT32_MAX, + MemBlockMainRAMOffset, + MemBlockSWRAMOffset, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + MemBlockARM7WRAMOffset, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + UINT32_MAX, + MemBlockNWRAM_AOffset, + MemBlockNWRAM_BOffset, + MemBlockNWRAM_COffset }; enum { - memstate_Unmapped, - memstate_MappedRW, - // on switch this is unmapped as well - memstate_MappedProtected, + memstate_Unmapped, + memstate_MappedRW, + // on switch this is unmapped as well + memstate_MappedProtected, }; u8 MappingStatus9[1 << (32-12)]; @@ -253,925 +252,925 @@ int MemoryFile; bool MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) { - u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; + u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; #ifdef __SWITCH__ - Result r = (svcMapProcessMemory(dst, envGetOwnProcessHandle(), - (u64)(MemoryBaseCodeMem + offset), size)); - return R_SUCCEEDED(r); + Result r = (svcMapProcessMemory(dst, envGetOwnProcessHandle(), + (u64)(MemoryBaseCodeMem + offset), size)); + return R_SUCCEEDED(r); #elif defined(_WIN32) - bool r = MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, offset, size, dst) == dst; - return r; + bool r = MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, offset, size, dst) == dst; + return r; #else - return mmap(dst, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, offset) != MAP_FAILED; + return mmap(dst, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, offset) != MAP_FAILED; #endif } bool UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) { - u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; + u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; #ifdef __SWITCH__ - Result r = svcUnmapProcessMemory(dst, envGetOwnProcessHandle(), - (u64)(MemoryBaseCodeMem + offset), size); - return R_SUCCEEDED(r); + Result r = svcUnmapProcessMemory(dst, envGetOwnProcessHandle(), + (u64)(MemoryBaseCodeMem + offset), size); + return R_SUCCEEDED(r); #elif defined(_WIN32) - return UnmapViewOfFile(dst); + return UnmapViewOfFile(dst); #else - return munmap(dst, size) == 0; + return munmap(dst, size) == 0; #endif } void SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) { - u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; + u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; #if defined(_WIN32) - DWORD winProtection, oldProtection; - if (protection == 0) - winProtection = PAGE_NOACCESS; - else if (protection == 1) - winProtection = PAGE_READONLY; - else - winProtection = PAGE_READWRITE; - bool success = VirtualProtect(dst, size, winProtection, &oldProtection); - assert(success); + DWORD winProtection, oldProtection; + if (protection == 0) + winProtection = PAGE_NOACCESS; + else if (protection == 1) + winProtection = PAGE_READONLY; + else + winProtection = PAGE_READWRITE; + bool success = VirtualProtect(dst, size, winProtection, &oldProtection); + assert(success); #else - int posixProt; - if (protection == 0) - posixProt = PROT_NONE; - else if (protection == 1) - posixProt = PROT_READ; - else - posixProt = PROT_READ | PROT_WRITE; - mprotect(dst, size, posixProt); + int posixProt; + if (protection == 0) + posixProt = PROT_NONE; + else if (protection == 1) + posixProt = PROT_READ; + else + posixProt = PROT_READ | PROT_WRITE; + mprotect(dst, size, posixProt); #endif } struct Mapping { - u32 Addr; - u32 Size, LocalOffset; - u32 Num; - - void Unmap(int region) - { - bool skipDTCM = Num == 0 && region != memregion_DTCM; - u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7; - u32 offset = 0; - while (offset < Size) - { - if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase) - { - offset += NDS::ARM9->DTCMSize; - } - else - { - u32 segmentOffset = offset; - u8 status = statuses[(Addr + offset) >> 12]; - while (statuses[(Addr + offset) >> 12] == status - && offset < Size - && (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase)) - { - assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped); - statuses[(Addr + offset) >> 12] = memstate_Unmapped; - offset += 0x1000; - } + u32 Addr; + u32 Size, LocalOffset; + u32 Num; + + void Unmap(int region) + { + bool skipDTCM = Num == 0 && region != memregion_DTCM; + u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7; + u32 offset = 0; + while (offset < Size) + { + if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase) + { + offset += NDS::ARM9->DTCMSize; + } + else + { + u32 segmentOffset = offset; + u8 status = statuses[(Addr + offset) >> 12]; + while (statuses[(Addr + offset) >> 12] == status + && offset < Size + && (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase)) + { + assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped); + statuses[(Addr + offset) >> 12] = memstate_Unmapped; + offset += 0x1000; + } #ifdef __SWITCH__ - if (status == memstate_MappedRW) - { - u32 segmentSize = offset - segmentOffset; - printf("unmapping %x %x %x %x\n", Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); - bool success = UnmapFromRange(Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); - assert(success); - } + if (status == memstate_MappedRW) + { + u32 segmentSize = offset - segmentOffset; + printf("unmapping %x %x %x %x\n", Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); + bool success = UnmapFromRange(Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); + assert(success); + } #endif - } - } + } + } #ifndef __SWITCH__ - bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); - assert(succeded); + bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); + assert(succeded); #endif - } + } }; ARMJIT::TinyVector<Mapping> Mappings[memregions_Count]; void SetCodeProtection(int region, u32 offset, bool protect) { - offset &= ~0xFFF; - printf("set code protection %d %x %d\n", region, offset, protect); + offset &= ~0xFFF; + //printf("set code protection %d %x %d\n", region, offset, protect); - for (int i = 0; i < Mappings[region].Length; i++) - { - Mapping& mapping = Mappings[region][i]; + for (int i = 0; i < Mappings[region].Length; i++) + { + Mapping& mapping = Mappings[region][i]; - if (offset < mapping.LocalOffset || offset >= mapping.LocalOffset + mapping.Size) - continue; + if (offset < mapping.LocalOffset || offset >= mapping.LocalOffset + mapping.Size) + continue; - u32 effectiveAddr = mapping.Addr + (offset - mapping.LocalOffset); - if (mapping.Num == 0 - && region != memregion_DTCM - && effectiveAddr >= NDS::ARM9->DTCMBase - && effectiveAddr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize)) - continue; + u32 effectiveAddr = mapping.Addr + (offset - mapping.LocalOffset); + if (mapping.Num == 0 + && region != memregion_DTCM + && effectiveAddr >= NDS::ARM9->DTCMBase + && effectiveAddr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize)) + continue; - u8* states = (u8*)(mapping.Num == 0 ? MappingStatus9 : MappingStatus7); + u8* states = (u8*)(mapping.Num == 0 ? MappingStatus9 : MappingStatus7); - printf("%x %d %x %x %x %d\n", effectiveAddr, mapping.Num, mapping.Addr, mapping.LocalOffset, mapping.Size, states[effectiveAddr >> 12]); - assert(states[effectiveAddr >> 12] == (protect ? memstate_MappedRW : memstate_MappedProtected)); - states[effectiveAddr >> 12] = protect ? memstate_MappedProtected : memstate_MappedRW; + //printf("%x %d %x %x %x %d\n", effectiveAddr, mapping.Num, mapping.Addr, mapping.LocalOffset, mapping.Size, states[effectiveAddr >> 12]); + assert(states[effectiveAddr >> 12] == (protect ? memstate_MappedRW : memstate_MappedProtected)); + states[effectiveAddr >> 12] = protect ? memstate_MappedProtected : memstate_MappedRW; #if defined(__SWITCH__) - bool success; - if (protect) - success = UnmapFromRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000); - else - success = MapIntoRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000); - assert(success); + bool success; + if (protect) + success = UnmapFromRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000); + else + success = MapIntoRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000); + assert(success); #else - SetCodeProtectionRange(effectiveAddr, 0x1000, mapping.Num, protect ? 1 : 2); + SetCodeProtectionRange(effectiveAddr, 0x1000, mapping.Num, protect ? 1 : 2); #endif - } + } } void RemapDTCM(u32 newBase, u32 newSize) { - // this first part could be made more efficient - // by unmapping DTCM first and then map the holes - u32 oldDTCMBase = NDS::ARM9->DTCMBase; - u32 oldDTCBEnd = oldDTCMBase + NDS::ARM9->DTCMSize; - - u32 newEnd = newBase + newSize; - - printf("remapping DTCM %x %x %x %x\n", newBase, newEnd, oldDTCMBase, oldDTCBEnd); - // unmap all regions containing the old or the current DTCM mapping - for (int region = 0; region < memregions_Count; region++) - { - if (region == memregion_DTCM) - continue; - - for (int i = 0; i < Mappings[region].Length;) - { - Mapping& mapping = Mappings[region][i]; - - u32 start = mapping.Addr; - u32 end = mapping.Addr + mapping.Size; - - printf("mapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset); - - bool oldOverlap = NDS::ARM9->DTCMSize > 0 && !(oldDTCMBase >= end || oldDTCBEnd <= start); - bool newOverlap = newSize > 0 && !(newBase >= end || newEnd <= start); - - if (mapping.Num == 0 && (oldOverlap || newOverlap)) - { - mapping.Unmap(region); - Mappings[region].Remove(i); - } - else - { - i++; - } - } - } - - for (int i = 0; i < Mappings[memregion_DTCM].Length; i++) - { - Mappings[memregion_DTCM][i].Unmap(memregion_DTCM); - } - Mappings[memregion_DTCM].Clear(); + // this first part could be made more efficient + // by unmapping DTCM first and then map the holes + u32 oldDTCMBase = NDS::ARM9->DTCMBase; + u32 oldDTCBEnd = oldDTCMBase + NDS::ARM9->DTCMSize; + + u32 newEnd = newBase + newSize; + + printf("remapping DTCM %x %x %x %x\n", newBase, newEnd, oldDTCMBase, oldDTCBEnd); + // unmap all regions containing the old or the current DTCM mapping + for (int region = 0; region < memregions_Count; region++) + { + if (region == memregion_DTCM) + continue; + + for (int i = 0; i < Mappings[region].Length;) + { + Mapping& mapping = Mappings[region][i]; + + u32 start = mapping.Addr; + u32 end = mapping.Addr + mapping.Size; + + printf("unmapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset); + + bool oldOverlap = NDS::ARM9->DTCMSize > 0 && !(oldDTCMBase >= end || oldDTCBEnd <= start); + bool newOverlap = newSize > 0 && !(newBase >= end || newEnd <= start); + + if (mapping.Num == 0 && (oldOverlap || newOverlap)) + { + mapping.Unmap(region); + Mappings[region].Remove(i); + } + else + { + i++; + } + } + } + + for (int i = 0; i < Mappings[memregion_DTCM].Length; i++) + { + Mappings[memregion_DTCM][i].Unmap(memregion_DTCM); + } + Mappings[memregion_DTCM].Clear(); } void RemapNWRAM(int num) { - for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;) - { - Mapping& mapping = Mappings[memregion_SharedWRAM][i]; - if (!(DSi::NWRAMStart[mapping.Num][num] >= mapping.Addr + mapping.Size - || DSi::NWRAMEnd[mapping.Num][num] < mapping.Addr)) - { - mapping.Unmap(memregion_SharedWRAM); - Mappings[memregion_SharedWRAM].Remove(i); - } - else - { - i++; - } - } - for (int i = 0; i < Mappings[memregion_NewSharedWRAM_A + num].Length; i++) - { - Mappings[memregion_NewSharedWRAM_A + num][i].Unmap(memregion_NewSharedWRAM_A + num); - } - Mappings[memregion_NewSharedWRAM_A + num].Clear(); + for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;) + { + Mapping& mapping = Mappings[memregion_SharedWRAM][i]; + if (!(DSi::NWRAMStart[mapping.Num][num] >= mapping.Addr + mapping.Size + || DSi::NWRAMEnd[mapping.Num][num] < mapping.Addr)) + { + mapping.Unmap(memregion_SharedWRAM); + Mappings[memregion_SharedWRAM].Remove(i); + } + else + { + i++; + } + } + for (int i = 0; i < Mappings[memregion_NewSharedWRAM_A + num].Length; i++) + { + Mappings[memregion_NewSharedWRAM_A + num][i].Unmap(memregion_NewSharedWRAM_A + num); + } + Mappings[memregion_NewSharedWRAM_A + num].Clear(); } void RemapSWRAM() { - printf("remapping SWRAM\n"); - for (int i = 0; i < Mappings[memregion_WRAM7].Length;) - { - Mapping& mapping = Mappings[memregion_WRAM7][i]; - if (mapping.Addr + mapping.Size < 0x03800000) - { - mapping.Unmap(memregion_WRAM7); - Mappings[memregion_WRAM7].Remove(i); - } - else - i++; - } - for (int i = 0; i < Mappings[memregion_SharedWRAM].Length; i++) - { - Mappings[memregion_SharedWRAM][i].Unmap(memregion_SharedWRAM); - } - Mappings[memregion_SharedWRAM].Clear(); + printf("remapping SWRAM\n"); + for (int i = 0; i < Mappings[memregion_WRAM7].Length;) + { + Mapping& mapping = Mappings[memregion_WRAM7][i]; + if (mapping.Addr + mapping.Size < 0x03800000) + { + mapping.Unmap(memregion_WRAM7); + Mappings[memregion_WRAM7].Remove(i); + } + else + i++; + } + for (int i = 0; i < Mappings[memregion_SharedWRAM].Length; i++) + { + Mappings[memregion_SharedWRAM][i].Unmap(memregion_SharedWRAM); + } + Mappings[memregion_SharedWRAM].Clear(); } bool MapAtAddress(u32 addr) { - u32 num = NDS::CurCPU; + u32 num = NDS::CurCPU; - int region = num == 0 - ? ClassifyAddress9(addr) - : ClassifyAddress7(addr); + int region = num == 0 + ? ClassifyAddress9(addr) + : ClassifyAddress7(addr); - if (!IsFastmemCompatible(region)) - return false; + if (!IsFastmemCompatible(region)) + return false; - u32 mirrorStart, mirrorSize, memoryOffset; - bool isMapped = GetMirrorLocation(region, num, addr, memoryOffset, mirrorStart, mirrorSize); - if (!isMapped) - return false; + u32 mirrorStart, mirrorSize, memoryOffset; + bool isMapped = GetMirrorLocation(region, num, addr, memoryOffset, mirrorStart, mirrorSize); + if (!isMapped) + return false; - u8* states = num == 0 ? MappingStatus9 : MappingStatus7; - printf("trying to create mapping %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num); - bool isExecutable = ARMJIT::CodeMemRegions[region]; + u8* states = num == 0 ? MappingStatus9 : MappingStatus7; + printf("trying to create mapping %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num); + bool isExecutable = ARMJIT::CodeMemRegions[region]; #ifndef __SWITCH__ - bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); - assert(succeded); + bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); + assert(succeded); #endif - ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512; - - // this overcomplicated piece of code basically just finds whole pieces of code memory - // which can be mapped - u32 offset = 0; - bool skipDTCM = num == 0 && region != memregion_DTCM; - while (offset < mirrorSize) - { - if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase) - { - SetCodeProtectionRange(NDS::ARM9->DTCMBase, NDS::ARM9->DTCMSize, 0, 0); - offset += NDS::ARM9->DTCMSize; - } - else - { - u32 sectionOffset = offset; - bool hasCode = isExecutable && ARMJIT::PageContainsCode(&range[offset / 512]); - while ((!isExecutable || ARMJIT::PageContainsCode(&range[offset / 512]) == hasCode) - && offset < mirrorSize - && (!skipDTCM || mirrorStart + offset != NDS::ARM9->DTCMBase)) - { - assert(states[(mirrorStart + offset) >> 12] == memstate_Unmapped); - states[(mirrorStart + offset) >> 12] = hasCode ? memstate_MappedProtected : memstate_MappedRW; - offset += 0x1000; - } - - u32 sectionSize = offset - sectionOffset; + ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512; + + // this overcomplicated piece of code basically just finds whole pieces of code memory + // which can be mapped + u32 offset = 0; + bool skipDTCM = num == 0 && region != memregion_DTCM; + while (offset < mirrorSize) + { + if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase) + { + SetCodeProtectionRange(NDS::ARM9->DTCMBase, NDS::ARM9->DTCMSize, 0, 0); + offset += NDS::ARM9->DTCMSize; + } + else + { + u32 sectionOffset = offset; + bool hasCode = isExecutable && ARMJIT::PageContainsCode(&range[offset / 512]); + while ((!isExecutable || ARMJIT::PageContainsCode(&range[offset / 512]) == hasCode) + && offset < mirrorSize + && (!skipDTCM || mirrorStart + offset != NDS::ARM9->DTCMBase)) + { + assert(states[(mirrorStart + offset) >> 12] == memstate_Unmapped); + states[(mirrorStart + offset) >> 12] = hasCode ? memstate_MappedProtected : memstate_MappedRW; + offset += 0x1000; + } + + u32 sectionSize = offset - sectionOffset; #if defined(__SWITCH__) - if (!hasCode) - { - printf("trying to map %x (size: %x) from %x\n", mirrorStart + sectionOffset, sectionSize, sectionOffset + memoryOffset + OffsetsPerRegion[region]); - bool succeded = MapIntoRange(mirrorStart + sectionOffset, num, sectionOffset + memoryOffset + OffsetsPerRegion[region], sectionSize); - assert(succeded); - } + if (!hasCode) + { + printf("trying to map %x (size: %x) from %x\n", mirrorStart + sectionOffset, sectionSize, sectionOffset + memoryOffset + OffsetsPerRegion[region]); + bool succeded = MapIntoRange(mirrorStart + sectionOffset, num, sectionOffset + memoryOffset + OffsetsPerRegion[region], sectionSize); + assert(succeded); + } #else - if (hasCode) - { - SetCodeProtectionRange(mirrorStart + sectionOffset, sectionSize, num, 1); - } + if (hasCode) + { + SetCodeProtectionRange(mirrorStart + sectionOffset, sectionSize, num, 1); + } #endif - } - } + } + } - assert(num == 0 || num == 1); - Mapping mapping{mirrorStart, mirrorSize, memoryOffset, num}; - Mappings[region].Add(mapping); + assert(num == 0 || num == 1); + Mapping mapping{mirrorStart, mirrorSize, memoryOffset, num}; + Mappings[region].Add(mapping); - printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1); + printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + mirrorSize - 1); - return true; + return true; } bool FaultHandler(FaultDescription* faultDesc, s32& offset) { - if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC)) - { - bool rewriteToSlowPath = true; - - u32 addr = faultDesc->EmulatedFaultAddr; - - if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped) - rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr); - - if (rewriteToSlowPath) - { - offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC); - } - return true; - } - return false; + if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC)) + { + bool rewriteToSlowPath = true; + + u32 addr = faultDesc->EmulatedFaultAddr; + + if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped) + rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr); + + if (rewriteToSlowPath) + { + offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC); + } + return true; + } + return false; } void Init() { - const u64 AddrSpaceSize = 0x100000000; + const u64 AddrSpaceSize = 0x100000000; #if defined(__SWITCH__) MemoryBase = (u8*)memalign(0x1000, MemoryTotalSize); - MemoryBaseCodeMem = (u8*)virtmemReserve(MemoryTotalSize); + MemoryBaseCodeMem = (u8*)virtmemReserve(MemoryTotalSize); bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize)); assert(succeded); - succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, + succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, MemoryTotalSize, Perm_Rw)); - assert(succeded); + assert(succeded); - // 8 GB of address space, just don't ask... - FastMem9Start = virtmemReserve(AddrSpaceSize); - assert(FastMem9Start); - FastMem7Start = virtmemReserve(AddrSpaceSize); - assert(FastMem7Start); + // 8 GB of address space, just don't ask... + FastMem9Start = virtmemReserve(AddrSpaceSize); + assert(FastMem9Start); + FastMem7Start = virtmemReserve(AddrSpaceSize); + assert(FastMem7Start); - u8* basePtr = MemoryBaseCodeMem; + u8* basePtr = MemoryBaseCodeMem; #elif defined(_WIN32) - ExceptionHandlerHandle = AddVectoredExceptionHandler(1, ExceptionHandler); + ExceptionHandlerHandle = AddVectoredExceptionHandler(1, ExceptionHandler); - MemoryFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, MemoryTotalSize, NULL); + MemoryFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, MemoryTotalSize, NULL); - MemoryBase = (u8*)VirtualAlloc(NULL, MemoryTotalSize, MEM_RESERVE, PAGE_READWRITE); + MemoryBase = (u8*)VirtualAlloc(NULL, MemoryTotalSize, MEM_RESERVE, PAGE_READWRITE); - FastMem9Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE); - FastMem7Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE); + FastMem9Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE); + FastMem7Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE); - // only free them after they have all been reserved - // so they can't overlap - VirtualFree(MemoryBase, 0, MEM_RELEASE); - VirtualFree(FastMem9Start, 0, MEM_RELEASE); - VirtualFree(FastMem7Start, 0, MEM_RELEASE); + // only free them after they have all been reserved + // so they can't overlap + VirtualFree(MemoryBase, 0, MEM_RELEASE); + VirtualFree(FastMem9Start, 0, MEM_RELEASE); + VirtualFree(FastMem7Start, 0, MEM_RELEASE); - MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase); + MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase); - u8* basePtr = MemoryBase; + u8* basePtr = MemoryBase; #else - FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); - MemoryFile = memfd_create("melondsfastmem", 0); - ftruncate(MemoryFile, MemoryTotalSize); + MemoryFile = memfd_create("melondsfastmem", 0); + ftruncate(MemoryFile, MemoryTotalSize); - NewSa.sa_flags = SA_SIGINFO; - sigemptyset(&NewSa.sa_mask); - NewSa.sa_sigaction = SigsegvHandler; - sigaction(SIGSEGV, &NewSa, &OldSa); + NewSa.sa_flags = SA_SIGINFO; + sigemptyset(&NewSa.sa_mask); + NewSa.sa_sigaction = SigsegvHandler; + sigaction(SIGSEGV, &NewSa, &OldSa); - munmap(MemoryBase, MemoryTotalSize); - munmap(FastMem9Start, AddrSpaceSize); - munmap(FastMem7Start, AddrSpaceSize); + munmap(MemoryBase, MemoryTotalSize); + munmap(FastMem9Start, AddrSpaceSize); + munmap(FastMem7Start, AddrSpaceSize); - mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0); + mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0); - u8* basePtr = MemoryBase; + u8* basePtr = MemoryBase; #endif - NDS::MainRAM = basePtr + MemBlockMainRAMOffset; - NDS::SharedWRAM = basePtr + MemBlockSWRAMOffset; - NDS::ARM7WRAM = basePtr + MemBlockARM7WRAMOffset; - NDS::ARM9->DTCM = basePtr + MemBlockDTCMOffset; - DSi::NWRAM_A = basePtr + MemBlockNWRAM_AOffset; - DSi::NWRAM_B = basePtr + MemBlockNWRAM_BOffset; - DSi::NWRAM_C = basePtr + MemBlockNWRAM_COffset; + NDS::MainRAM = basePtr + MemBlockMainRAMOffset; + NDS::SharedWRAM = basePtr + MemBlockSWRAMOffset; + NDS::ARM7WRAM = basePtr + MemBlockARM7WRAMOffset; + NDS::ARM9->DTCM = basePtr + MemBlockDTCMOffset; + DSi::NWRAM_A = basePtr + MemBlockNWRAM_AOffset; + DSi::NWRAM_B = basePtr + MemBlockNWRAM_BOffset; + DSi::NWRAM_C = basePtr + MemBlockNWRAM_COffset; } void DeInit() { #if defined(__SWITCH__) - virtmemFree(FastMem9Start, 0x100000000); - virtmemFree(FastMem7Start, 0x100000000); + virtmemFree(FastMem9Start, 0x100000000); + virtmemFree(FastMem7Start, 0x100000000); svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize); - virtmemFree(MemoryBaseCodeMem, MemoryTotalSize); + virtmemFree(MemoryBaseCodeMem, MemoryTotalSize); free(MemoryBase); #elif defined(_WIN32) - assert(UnmapViewOfFile(MemoryBase)); - CloseHandle(MemoryFile); + assert(UnmapViewOfFile(MemoryBase)); + CloseHandle(MemoryFile); - RemoveVectoredExceptionHandler(ExceptionHandlerHandle); + RemoveVectoredExceptionHandler(ExceptionHandlerHandle); #endif } void Reset() { - for (int region = 0; region < memregions_Count; region++) - { - for (int i = 0; i < Mappings[region].Length; i++) - Mappings[region][i].Unmap(region); - Mappings[region].Clear(); - } - - for (int i = 0; i < sizeof(MappingStatus9); i++) - { - assert(MappingStatus9[i] == memstate_Unmapped); - assert(MappingStatus7[i] == memstate_Unmapped); - } - - printf("done resetting jit mem\n"); + for (int region = 0; region < memregions_Count; region++) + { + for (int i = 0; i < Mappings[region].Length; i++) + Mappings[region][i].Unmap(region); + Mappings[region].Clear(); + } + + for (int i = 0; i < sizeof(MappingStatus9); i++) + { + assert(MappingStatus9[i] == memstate_Unmapped); + assert(MappingStatus7[i] == memstate_Unmapped); + } + + printf("done resetting jit mem\n"); } bool IsFastmemCompatible(int region) { #ifdef _WIN32 - /* - TODO: with some hacks, the smaller shared WRAM regions - could be mapped in some occaisons as well - */ - if (region == memregion_DTCM - || region == memregion_SharedWRAM - || region == memregion_NewSharedWRAM_B - || region == memregion_NewSharedWRAM_C) - return false; + /* + TODO: with some hacks, the smaller shared WRAM regions + could be mapped in some occaisons as well + */ + if (region == memregion_DTCM + || region == memregion_SharedWRAM + || region == memregion_NewSharedWRAM_B + || region == memregion_NewSharedWRAM_C) + return false; #endif - return OffsetsPerRegion[region] != UINT32_MAX; + return OffsetsPerRegion[region] != UINT32_MAX; } bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize) { - memoryOffset = 0; - switch (region) - { - case memregion_ITCM: - if (num == 0) - { - mirrorStart = addr & ~(ITCMPhysicalSize - 1); - mirrorSize = ITCMPhysicalSize; - return true; - } - return false; - case memregion_DTCM: - if (num == 0) - { - mirrorStart = addr & ~(DTCMPhysicalSize - 1); - mirrorSize = DTCMPhysicalSize; - return true; - } - return false; - case memregion_MainRAM: - mirrorStart = addr & ~NDS::MainRAMMask; - mirrorSize = NDS::MainRAMMask + 1; - return true; - case memregion_BIOS9: - if (num == 0) - { - mirrorStart = addr & ~0xFFF; - mirrorSize = 0x1000; - return true; - } - return false; - case memregion_BIOS7: - if (num == 1) - { - mirrorStart = 0; - mirrorSize = 0x4000; - return true; - } - return false; - case memregion_SharedWRAM: - if (num == 0 && NDS::SWRAM_ARM9.Mem) - { - mirrorStart = addr & ~NDS::SWRAM_ARM9.Mask; - mirrorSize = NDS::SWRAM_ARM9.Mask + 1; - memoryOffset = NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM; - return true; - } - else if (num == 1 && NDS::SWRAM_ARM7.Mem) - { - mirrorStart = addr & ~NDS::SWRAM_ARM7.Mask; - mirrorSize = NDS::SWRAM_ARM7.Mask + 1; - memoryOffset = NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM; - return true; - } - return false; - case memregion_WRAM7: - if (num == 1) - { - mirrorStart = addr & ~(NDS::ARM7WRAMSize - 1); - mirrorSize = NDS::ARM7WRAMSize; - return true; - } - return false; - case memregion_VRAM: - if (num == 0) - { - mirrorStart = addr & ~0xFFFFF; - mirrorSize = 0x100000; - } - return false; - case memregion_VWRAM: - if (num == 1) - { - mirrorStart = addr & ~0x3FFFF; - mirrorSize = 0x40000; - return true; - } - return false; - case memregion_NewSharedWRAM_A: - { - u8* ptr = DSi::NWRAMMap_A[num][(addr >> 16) & DSi::NWRAMMask[num][0]]; - if (ptr) - { - memoryOffset = ptr - DSi::NWRAM_A; - mirrorStart = addr & ~0xFFFF; - mirrorSize = 0x10000; - return true; - } - return false; // zero filled memory - } - case memregion_NewSharedWRAM_B: - { - u8* ptr = DSi::NWRAMMap_B[num][(addr >> 15) & DSi::NWRAMMask[num][1]]; - if (ptr) - { - memoryOffset = ptr - DSi::NWRAM_B; - mirrorStart = addr & ~0x7FFF; - mirrorSize = 0x8000; - return true; - } - return false; // zero filled memory - } - case memregion_NewSharedWRAM_C: - { - u8* ptr = DSi::NWRAMMap_C[num][(addr >> 15) & DSi::NWRAMMask[num][2]]; - if (ptr) - { - memoryOffset = ptr - DSi::NWRAM_C; - mirrorStart = addr & ~0x7FFF; - mirrorSize = 0x8000; - return true; - } - return false; // zero filled memory - } - case memregion_BIOS9DSi: - if (num == 0) - { - mirrorStart = addr & ~0xFFFF; - mirrorSize = DSi::SCFG_BIOS & (1<<0) ? 0x8000 : 0x10000; - return true; - } - return false; - case memregion_BIOS7DSi: - if (num == 1) - { - mirrorStart = addr & ~0xFFFF; - mirrorSize = DSi::SCFG_BIOS & (1<<8) ? 0x8000 : 0x10000; - return true; - } - return false; - default: - assert(false && "For the time being this should only be used for code"); - return false; - } + memoryOffset = 0; + switch (region) + { + case memregion_ITCM: + if (num == 0) + { + mirrorStart = addr & ~(ITCMPhysicalSize - 1); + mirrorSize = ITCMPhysicalSize; + return true; + } + return false; + case memregion_DTCM: + if (num == 0) + { + mirrorStart = addr & ~(DTCMPhysicalSize - 1); + mirrorSize = DTCMPhysicalSize; + return true; + } + return false; + case memregion_MainRAM: + mirrorStart = addr & ~NDS::MainRAMMask; + mirrorSize = NDS::MainRAMMask + 1; + return true; + case memregion_BIOS9: + if (num == 0) + { + mirrorStart = addr & ~0xFFF; + mirrorSize = 0x1000; + return true; + } + return false; + case memregion_BIOS7: + if (num == 1) + { + mirrorStart = 0; + mirrorSize = 0x4000; + return true; + } + return false; + case memregion_SharedWRAM: + if (num == 0 && NDS::SWRAM_ARM9.Mem) + { + mirrorStart = addr & ~NDS::SWRAM_ARM9.Mask; + mirrorSize = NDS::SWRAM_ARM9.Mask + 1; + memoryOffset = NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM; + return true; + } + else if (num == 1 && NDS::SWRAM_ARM7.Mem) + { + mirrorStart = addr & ~NDS::SWRAM_ARM7.Mask; + mirrorSize = NDS::SWRAM_ARM7.Mask + 1; + memoryOffset = NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM; + return true; + } + return false; + case memregion_WRAM7: + if (num == 1) + { + mirrorStart = addr & ~(NDS::ARM7WRAMSize - 1); + mirrorSize = NDS::ARM7WRAMSize; + return true; + } + return false; + case memregion_VRAM: + if (num == 0) + { + mirrorStart = addr & ~0xFFFFF; + mirrorSize = 0x100000; + } + return false; + case memregion_VWRAM: + if (num == 1) + { + mirrorStart = addr & ~0x3FFFF; + mirrorSize = 0x40000; + return true; + } + return false; + case memregion_NewSharedWRAM_A: + { + u8* ptr = DSi::NWRAMMap_A[num][(addr >> 16) & DSi::NWRAMMask[num][0]]; + if (ptr) + { + memoryOffset = ptr - DSi::NWRAM_A; + mirrorStart = addr & ~0xFFFF; + mirrorSize = 0x10000; + return true; + } + return false; // zero filled memory + } + case memregion_NewSharedWRAM_B: + { + u8* ptr = DSi::NWRAMMap_B[num][(addr >> 15) & DSi::NWRAMMask[num][1]]; + if (ptr) + { + memoryOffset = ptr - DSi::NWRAM_B; + mirrorStart = addr & ~0x7FFF; + mirrorSize = 0x8000; + return true; + } + return false; // zero filled memory + } + case memregion_NewSharedWRAM_C: + { + u8* ptr = DSi::NWRAMMap_C[num][(addr >> 15) & DSi::NWRAMMask[num][2]]; + if (ptr) + { + memoryOffset = ptr - DSi::NWRAM_C; + mirrorStart = addr & ~0x7FFF; + mirrorSize = 0x8000; + return true; + } + return false; // zero filled memory + } + case memregion_BIOS9DSi: + if (num == 0) + { + mirrorStart = addr & ~0xFFFF; + mirrorSize = DSi::SCFG_BIOS & (1<<0) ? 0x8000 : 0x10000; + return true; + } + return false; + case memregion_BIOS7DSi: + if (num == 1) + { + mirrorStart = addr & ~0xFFFF; + mirrorSize = DSi::SCFG_BIOS & (1<<8) ? 0x8000 : 0x10000; + return true; + } + return false; + default: + assert(false && "For the time being this should only be used for code"); + return false; + } } u32 LocaliseAddress(int region, u32 num, u32 addr) { - switch (region) - { - case memregion_ITCM: - return (addr & (ITCMPhysicalSize - 1)) | (memregion_ITCM << 27); - case memregion_MainRAM: - return (addr & NDS::MainRAMMask) | (memregion_MainRAM << 27); - case memregion_BIOS9: - return (addr & 0xFFF) | (memregion_BIOS9 << 27); - case memregion_BIOS7: - return (addr & 0x3FFF) | (memregion_BIOS7 << 27); - case memregion_SharedWRAM: - if (num == 0) - return ((addr & NDS::SWRAM_ARM9.Mask) + (NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM)) | (memregion_SharedWRAM << 27); - else - return ((addr & NDS::SWRAM_ARM7.Mask) + (NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM)) | (memregion_SharedWRAM << 27); - case memregion_WRAM7: - return (addr & (NDS::ARM7WRAMSize - 1)) | (memregion_WRAM7 << 27); - case memregion_VRAM: - // TODO: take mapping properly into account - return (addr & 0xFFFFF) | (memregion_VRAM << 27); - case memregion_VWRAM: - // same here - return (addr & 0x3FFFF) | (memregion_VWRAM << 27); - case memregion_NewSharedWRAM_A: - { - u8* ptr = DSi::NWRAMMap_A[num][(addr >> 16) & DSi::NWRAMMask[num][0]]; - if (ptr) - return (ptr - DSi::NWRAM_A + (addr & 0xFFFF)) | (memregion_NewSharedWRAM_A << 27); - else - return memregion_Other << 27; // zero filled memory - } - case memregion_NewSharedWRAM_B: - { - u8* ptr = DSi::NWRAMMap_B[num][(addr >> 15) & DSi::NWRAMMask[num][1]]; - if (ptr) - return (ptr - DSi::NWRAM_B + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_B << 27); - else - return memregion_Other << 27; - } - case memregion_NewSharedWRAM_C: - { - u8* ptr = DSi::NWRAMMap_C[num][(addr >> 15) & DSi::NWRAMMask[num][2]]; - if (ptr) - return (ptr - DSi::NWRAM_C + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_C << 27); - else - return memregion_Other << 27; - } - case memregion_BIOS9DSi: - case memregion_BIOS7DSi: - return (addr & 0xFFFF) | (region << 27); - default: - assert(false && "This should only be needed for regions which can contain code"); - return memregion_Other << 27; - } + switch (region) + { + case memregion_ITCM: + return (addr & (ITCMPhysicalSize - 1)) | (memregion_ITCM << 27); + case memregion_MainRAM: + return (addr & NDS::MainRAMMask) | (memregion_MainRAM << 27); + case memregion_BIOS9: + return (addr & 0xFFF) | (memregion_BIOS9 << 27); + case memregion_BIOS7: + return (addr & 0x3FFF) | (memregion_BIOS7 << 27); + case memregion_SharedWRAM: + if (num == 0) + return ((addr & NDS::SWRAM_ARM9.Mask) + (NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM)) | (memregion_SharedWRAM << 27); + else + return ((addr & NDS::SWRAM_ARM7.Mask) + (NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM)) | (memregion_SharedWRAM << 27); + case memregion_WRAM7: + return (addr & (NDS::ARM7WRAMSize - 1)) | (memregion_WRAM7 << 27); + case memregion_VRAM: + // TODO: take mapping properly into account + return (addr & 0xFFFFF) | (memregion_VRAM << 27); + case memregion_VWRAM: + // same here + return (addr & 0x3FFFF) | (memregion_VWRAM << 27); + case memregion_NewSharedWRAM_A: + { + u8* ptr = DSi::NWRAMMap_A[num][(addr >> 16) & DSi::NWRAMMask[num][0]]; + if (ptr) + return (ptr - DSi::NWRAM_A + (addr & 0xFFFF)) | (memregion_NewSharedWRAM_A << 27); + else + return memregion_Other << 27; // zero filled memory + } + case memregion_NewSharedWRAM_B: + { + u8* ptr = DSi::NWRAMMap_B[num][(addr >> 15) & DSi::NWRAMMask[num][1]]; + if (ptr) + return (ptr - DSi::NWRAM_B + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_B << 27); + else + return memregion_Other << 27; + } + case memregion_NewSharedWRAM_C: + { + u8* ptr = DSi::NWRAMMap_C[num][(addr >> 15) & DSi::NWRAMMask[num][2]]; + if (ptr) + return (ptr - DSi::NWRAM_C + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_C << 27); + else + return memregion_Other << 27; + } + case memregion_BIOS9DSi: + case memregion_BIOS7DSi: + return (addr & 0xFFFF) | (region << 27); + default: + assert(false && "This should only be needed for regions which can contain code"); + return memregion_Other << 27; + } } int ClassifyAddress9(u32 addr) { - if (addr < NDS::ARM9->ITCMSize) - { - return memregion_ITCM; - } - else if (addr >= NDS::ARM9->DTCMBase && addr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize)) - { - return memregion_DTCM; - } - else - { - if (NDS::ConsoleType == 1 && addr >= 0xFFFF0000 && !(DSi::SCFG_BIOS & (1<<1))) - { - if ((addr >= 0xFFFF8000) && (DSi::SCFG_BIOS & (1<<0))) - return memregion_Other; - - return memregion_BIOS9DSi; - } - else if ((addr & 0xFFFFF000) == 0xFFFF0000) - { - return memregion_BIOS9; - } - - switch (addr & 0xFF000000) - { - case 0x02000000: - return memregion_MainRAM; - case 0x03000000: - if (NDS::ConsoleType == 1) - { - if (addr >= DSi::NWRAMStart[0][0] && addr < DSi::NWRAMEnd[0][0]) - return memregion_NewSharedWRAM_A; - if (addr >= DSi::NWRAMStart[0][1] && addr < DSi::NWRAMEnd[0][1]) - return memregion_NewSharedWRAM_B; - if (addr >= DSi::NWRAMStart[0][2] && addr < DSi::NWRAMEnd[0][2]) - return memregion_NewSharedWRAM_C; - } - - if (NDS::SWRAM_ARM9.Mem) - return memregion_SharedWRAM; - return memregion_Other; - case 0x04000000: - return memregion_IO9; - case 0x06000000: - return memregion_VRAM; - default: - return memregion_Other; - } - } + if (addr < NDS::ARM9->ITCMSize) + { + return memregion_ITCM; + } + else if (addr >= NDS::ARM9->DTCMBase && addr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize)) + { + return memregion_DTCM; + } + else + { + if (NDS::ConsoleType == 1 && addr >= 0xFFFF0000 && !(DSi::SCFG_BIOS & (1<<1))) + { + if ((addr >= 0xFFFF8000) && (DSi::SCFG_BIOS & (1<<0))) + return memregion_Other; + + return memregion_BIOS9DSi; + } + else if ((addr & 0xFFFFF000) == 0xFFFF0000) + { + return memregion_BIOS9; + } + + switch (addr & 0xFF000000) + { + case 0x02000000: + return memregion_MainRAM; + case 0x03000000: + if (NDS::ConsoleType == 1) + { + if (addr >= DSi::NWRAMStart[0][0] && addr < DSi::NWRAMEnd[0][0]) + return memregion_NewSharedWRAM_A; + if (addr >= DSi::NWRAMStart[0][1] && addr < DSi::NWRAMEnd[0][1]) + return memregion_NewSharedWRAM_B; + if (addr >= DSi::NWRAMStart[0][2] && addr < DSi::NWRAMEnd[0][2]) + return memregion_NewSharedWRAM_C; + } + + if (NDS::SWRAM_ARM9.Mem) + return memregion_SharedWRAM; + return memregion_Other; + case 0x04000000: + return memregion_IO9; + case 0x06000000: + return memregion_VRAM; + default: + return memregion_Other; + } + } } int ClassifyAddress7(u32 addr) { - if (NDS::ConsoleType == 1 && addr < 0x00010000 && !(DSi::SCFG_BIOS & (1<<9))) + if (NDS::ConsoleType == 1 && addr < 0x00010000 && !(DSi::SCFG_BIOS & (1<<9))) { if (addr >= 0x00008000 && DSi::SCFG_BIOS & (1<<8)) return memregion_Other; return memregion_BIOS7DSi; } - else if (addr < 0x00004000) - { - return memregion_BIOS7; - } - else - { - switch (addr & 0xFF800000) - { - case 0x02000000: - case 0x02800000: - return memregion_MainRAM; - case 0x03000000: - if (NDS::ConsoleType == 1) - { - if (addr >= DSi::NWRAMStart[1][0] && addr < DSi::NWRAMEnd[1][0]) - return memregion_NewSharedWRAM_A; - if (addr >= DSi::NWRAMStart[1][1] && addr < DSi::NWRAMEnd[1][1]) - return memregion_NewSharedWRAM_B; - if (addr >= DSi::NWRAMStart[1][2] && addr < DSi::NWRAMEnd[1][2]) - return memregion_NewSharedWRAM_C; - } - - if (NDS::SWRAM_ARM7.Mem) - return memregion_SharedWRAM; - return memregion_WRAM7; - case 0x03800000: - return memregion_WRAM7; - case 0x04000000: - return memregion_IO7; - case 0x04800000: - return memregion_Wifi; - case 0x06000000: - case 0x06800000: - return memregion_VWRAM; - } - } - return memregion_Other; + else if (addr < 0x00004000) + { + return memregion_BIOS7; + } + else + { + switch (addr & 0xFF800000) + { + case 0x02000000: + case 0x02800000: + return memregion_MainRAM; + case 0x03000000: + if (NDS::ConsoleType == 1) + { + if (addr >= DSi::NWRAMStart[1][0] && addr < DSi::NWRAMEnd[1][0]) + return memregion_NewSharedWRAM_A; + if (addr >= DSi::NWRAMStart[1][1] && addr < DSi::NWRAMEnd[1][1]) + return memregion_NewSharedWRAM_B; + if (addr >= DSi::NWRAMStart[1][2] && addr < DSi::NWRAMEnd[1][2]) + return memregion_NewSharedWRAM_C; + } + + if (NDS::SWRAM_ARM7.Mem) + return memregion_SharedWRAM; + return memregion_WRAM7; + case 0x03800000: + return memregion_WRAM7; + case 0x04000000: + return memregion_IO7; + case 0x04800000: + return memregion_Wifi; + case 0x06000000: + case 0x06800000: + return memregion_VWRAM; + } + } + return memregion_Other; } void WifiWrite32(u32 addr, u32 val) { - Wifi::Write(addr, val & 0xFFFF); - Wifi::Write(addr + 2, val >> 16); + Wifi::Write(addr, val & 0xFFFF); + Wifi::Write(addr + 2, val >> 16); } u32 WifiRead32(u32 addr) { - return Wifi::Read(addr) | (Wifi::Read(addr + 2) << 16); + return Wifi::Read(addr) | (Wifi::Read(addr + 2) << 16); } template <typename T> void VRAMWrite(u32 addr, T val) { - switch (addr & 0x00E00000) - { - case 0x00000000: GPU::WriteVRAM_ABG<T>(addr, val); return; - case 0x00200000: GPU::WriteVRAM_BBG<T>(addr, val); return; - case 0x00400000: GPU::WriteVRAM_AOBJ<T>(addr, val); return; - case 0x00600000: GPU::WriteVRAM_BOBJ<T>(addr, val); return; - default: GPU::WriteVRAM_LCDC<T>(addr, val); return; - } + switch (addr & 0x00E00000) + { + case 0x00000000: GPU::WriteVRAM_ABG<T>(addr, val); return; + case 0x00200000: GPU::WriteVRAM_BBG<T>(addr, val); return; + case 0x00400000: GPU::WriteVRAM_AOBJ<T>(addr, val); return; + case 0x00600000: GPU::WriteVRAM_BOBJ<T>(addr, val); return; + default: GPU::WriteVRAM_LCDC<T>(addr, val); return; + } } template <typename T> T VRAMRead(u32 addr) { - switch (addr & 0x00E00000) - { - case 0x00000000: return GPU::ReadVRAM_ABG<T>(addr); - case 0x00200000: return GPU::ReadVRAM_BBG<T>(addr); - case 0x00400000: return GPU::ReadVRAM_AOBJ<T>(addr); - case 0x00600000: return GPU::ReadVRAM_BOBJ<T>(addr); - default: return GPU::ReadVRAM_LCDC<T>(addr); - } + switch (addr & 0x00E00000) + { + case 0x00000000: return GPU::ReadVRAM_ABG<T>(addr); + case 0x00200000: return GPU::ReadVRAM_BBG<T>(addr); + case 0x00400000: return GPU::ReadVRAM_AOBJ<T>(addr); + case 0x00600000: return GPU::ReadVRAM_BOBJ<T>(addr); + default: return GPU::ReadVRAM_LCDC<T>(addr); + } } void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) { - if (cpu->Num == 0) - { - switch (addr & 0xFF000000) - { - case 0x04000000: - if (!store && size == 32 && addr == 0x04100010 && NDS::ExMemCnt[0] & (1<<11)) - return (void*)NDSCart::ReadROMData; - - /* - unfortunately we can't map GPU2D this way - since it's hidden inside an object - - though GPU3D registers are accessed much more intensive - */ - if (addr >= 0x04000320 && addr < 0x040006A4) - { - switch (size | store) - { - case 8: return (void*)GPU3D::Read8; - case 9: return (void*)GPU3D::Write8; - case 16: return (void*)GPU3D::Read16; - case 17: return (void*)GPU3D::Write16; - case 32: return (void*)GPU3D::Read32; - case 33: return (void*)GPU3D::Write32; - } - } - - if (NDS::ConsoleType == 0) - { - switch (size | store) - { - case 8: return (void*)NDS::ARM9IORead8; - case 9: return (void*)NDS::ARM9IOWrite8; - case 16: return (void*)NDS::ARM9IORead16; - case 17: return (void*)NDS::ARM9IOWrite16; - case 32: return (void*)NDS::ARM9IORead32; - case 33: return (void*)NDS::ARM9IOWrite32; - } - } - else - { - switch (size | store) - { - case 8: return (void*)DSi::ARM9IORead8; - case 9: return (void*)DSi::ARM9IOWrite8; - case 16: return (void*)DSi::ARM9IORead16; - case 17: return (void*)DSi::ARM9IOWrite16; - case 32: return (void*)DSi::ARM9IORead32; - case 33: return (void*)DSi::ARM9IOWrite32; - } - } - break; - case 0x06000000: - switch (size | store) - { - case 8: return (void*)VRAMRead<u8>; - case 9: return NULL; - case 16: return (void*)VRAMRead<u16>; - case 17: return (void*)VRAMWrite<u16>; - case 32: return (void*)VRAMRead<u32>; - case 33: return (void*)VRAMWrite<u32>; - } - break; - } - } - else - { - switch (addr & 0xFF800000) - { - case 0x04000000: - if (addr >= 0x04000400 && addr < 0x04000520) - { - switch (size | store) - { - case 8: return (void*)SPU::Read8; - case 9: return (void*)SPU::Write8; - case 16: return (void*)SPU::Read16; - case 17: return (void*)SPU::Write16; - case 32: return (void*)SPU::Read32; - case 33: return (void*)SPU::Write32; - } - } - - if (NDS::ConsoleType == 0) - { - switch (size | store) - { - case 8: return (void*)NDS::ARM7IORead8; - case 9: return (void*)NDS::ARM7IOWrite8; - case 16: return (void*)NDS::ARM7IORead16; - case 17: return (void*)NDS::ARM7IOWrite16; - case 32: return (void*)NDS::ARM7IORead32; - case 33: return (void*)NDS::ARM7IOWrite32; - } - } - else - { - switch (size | store) - { - case 8: return (void*)DSi::ARM7IORead8; - case 9: return (void*)DSi::ARM7IOWrite8; - case 16: return (void*)DSi::ARM7IORead16; - case 17: return (void*)DSi::ARM7IOWrite16; - case 32: return (void*)DSi::ARM7IORead32; - case 33: return (void*)DSi::ARM7IOWrite32; - } - } - break; - case 0x04800000: - if (addr < 0x04810000 && size >= 16) - { - switch (size | store) - { - case 16: return (void*)Wifi::Read; - case 17: return (void*)Wifi::Write; - case 32: return (void*)WifiRead32; - case 33: return (void*)WifiWrite32; - } - } - break; - case 0x06000000: - case 0x06800000: - switch (size | store) - { - case 8: return (void*)GPU::ReadVRAM_ARM7<u8>; - case 9: return (void*)GPU::WriteVRAM_ARM7<u8>; - case 16: return (void*)GPU::ReadVRAM_ARM7<u16>; - case 17: return (void*)GPU::WriteVRAM_ARM7<u16>; - case 32: return (void*)GPU::ReadVRAM_ARM7<u32>; - case 33: return (void*)GPU::WriteVRAM_ARM7<u32>; - } - } - } - return NULL; + if (cpu->Num == 0) + { + switch (addr & 0xFF000000) + { + case 0x04000000: + if (!store && size == 32 && addr == 0x04100010 && NDS::ExMemCnt[0] & (1<<11)) + return (void*)NDSCart::ReadROMData; + + /* + unfortunately we can't map GPU2D this way + since it's hidden inside an object + + though GPU3D registers are accessed much more intensive + */ + if (addr >= 0x04000320 && addr < 0x040006A4) + { + switch (size | store) + { + case 8: return (void*)GPU3D::Read8; + case 9: return (void*)GPU3D::Write8; + case 16: return (void*)GPU3D::Read16; + case 17: return (void*)GPU3D::Write16; + case 32: return (void*)GPU3D::Read32; + case 33: return (void*)GPU3D::Write32; + } + } + + if (NDS::ConsoleType == 0) + { + switch (size | store) + { + case 8: return (void*)NDS::ARM9IORead8; + case 9: return (void*)NDS::ARM9IOWrite8; + case 16: return (void*)NDS::ARM9IORead16; + case 17: return (void*)NDS::ARM9IOWrite16; + case 32: return (void*)NDS::ARM9IORead32; + case 33: return (void*)NDS::ARM9IOWrite32; + } + } + else + { + switch (size | store) + { + case 8: return (void*)DSi::ARM9IORead8; + case 9: return (void*)DSi::ARM9IOWrite8; + case 16: return (void*)DSi::ARM9IORead16; + case 17: return (void*)DSi::ARM9IOWrite16; + case 32: return (void*)DSi::ARM9IORead32; + case 33: return (void*)DSi::ARM9IOWrite32; + } + } + break; + case 0x06000000: + switch (size | store) + { + case 8: return (void*)VRAMRead<u8>; + case 9: return NULL; + case 16: return (void*)VRAMRead<u16>; + case 17: return (void*)VRAMWrite<u16>; + case 32: return (void*)VRAMRead<u32>; + case 33: return (void*)VRAMWrite<u32>; + } + break; + } + } + else + { + switch (addr & 0xFF800000) + { + case 0x04000000: + if (addr >= 0x04000400 && addr < 0x04000520) + { + switch (size | store) + { + case 8: return (void*)SPU::Read8; + case 9: return (void*)SPU::Write8; + case 16: return (void*)SPU::Read16; + case 17: return (void*)SPU::Write16; + case 32: return (void*)SPU::Read32; + case 33: return (void*)SPU::Write32; + } + } + + if (NDS::ConsoleType == 0) + { + switch (size | store) + { + case 8: return (void*)NDS::ARM7IORead8; + case 9: return (void*)NDS::ARM7IOWrite8; + case 16: return (void*)NDS::ARM7IORead16; + case 17: return (void*)NDS::ARM7IOWrite16; + case 32: return (void*)NDS::ARM7IORead32; + case 33: return (void*)NDS::ARM7IOWrite32; + } + } + else + { + switch (size | store) + { + case 8: return (void*)DSi::ARM7IORead8; + case 9: return (void*)DSi::ARM7IOWrite8; + case 16: return (void*)DSi::ARM7IORead16; + case 17: return (void*)DSi::ARM7IOWrite16; + case 32: return (void*)DSi::ARM7IORead32; + case 33: return (void*)DSi::ARM7IOWrite32; + } + } + break; + case 0x04800000: + if (addr < 0x04810000 && size >= 16) + { + switch (size | store) + { + case 16: return (void*)Wifi::Read; + case 17: return (void*)Wifi::Write; + case 32: return (void*)WifiRead32; + case 33: return (void*)WifiWrite32; + } + } + break; + case 0x06000000: + case 0x06800000: + switch (size | store) + { + case 8: return (void*)GPU::ReadVRAM_ARM7<u8>; + case 9: return (void*)GPU::WriteVRAM_ARM7<u8>; + case 16: return (void*)GPU::ReadVRAM_ARM7<u16>; + case 17: return (void*)GPU::WriteVRAM_ARM7<u16>; + case 32: return (void*)GPU::ReadVRAM_ARM7<u32>; + case 33: return (void*)GPU::WriteVRAM_ARM7<u32>; + } + } + } + return NULL; } }
\ No newline at end of file diff --git a/src/ARMJIT_Memory.h b/src/ARMJIT_Memory.h index 123e18e..4912449 100644 --- a/src/ARMJIT_Memory.h +++ b/src/ARMJIT_Memory.h @@ -18,28 +18,28 @@ void Reset(); enum { - memregion_Other = 0, - memregion_ITCM, - memregion_DTCM, - memregion_BIOS9, - memregion_MainRAM, - memregion_SharedWRAM, - memregion_IO9, - memregion_VRAM, - memregion_BIOS7, - memregion_WRAM7, - memregion_IO7, - memregion_Wifi, - memregion_VWRAM, - - // DSi - memregion_BIOS9DSi, - memregion_BIOS7DSi, - memregion_NewSharedWRAM_A, - memregion_NewSharedWRAM_B, - memregion_NewSharedWRAM_C, - - memregions_Count + memregion_Other = 0, + memregion_ITCM, + memregion_DTCM, + memregion_BIOS9, + memregion_MainRAM, + memregion_SharedWRAM, + memregion_IO9, + memregion_VRAM, + memregion_BIOS7, + memregion_WRAM7, + memregion_IO7, + memregion_Wifi, + memregion_VWRAM, + + // DSi + memregion_BIOS9DSi, + memregion_BIOS7DSi, + memregion_NewSharedWRAM_A, + memregion_NewSharedWRAM_B, + memregion_NewSharedWRAM_C, + + memregions_Count }; int ClassifyAddress9(u32 addr); diff --git a/src/ARMJIT_RegisterCache.h b/src/ARMJIT_RegisterCache.h index 0547c84..feb2d35 100644 --- a/src/ARMJIT_RegisterCache.h +++ b/src/ARMJIT_RegisterCache.h @@ -18,8 +18,8 @@ public: RegisterCache() {} - RegisterCache(T* compiler, FetchedInstr instrs[], int instrsCount, bool pcAllocatableAsSrc = false) - : Compiler(compiler), Instrs(instrs), InstrsCount(instrsCount) + RegisterCache(T* compiler, FetchedInstr instrs[], int instrsCount, bool pcAllocatableAsSrc = false) + : Compiler(compiler), Instrs(instrs), InstrsCount(instrsCount) { for (int i = 0; i < 16; i++) Mapping[i] = (Reg)-1; @@ -95,7 +95,7 @@ public: LiteralsLoaded = 0; } - void Prepare(bool thumb, int i) + void Prepare(bool thumb, int i) { FetchedInstr instr = Instrs[i]; @@ -175,23 +175,23 @@ public: DirtyRegs |= (LoadedRegs & instr.Info.DstRegs) & ~(1 << 15); } - static const Reg NativeRegAllocOrder[]; - static const int NativeRegsAvailable; + static const Reg NativeRegAllocOrder[]; + static const int NativeRegsAvailable; - Reg Mapping[16]; + Reg Mapping[16]; u32 LiteralValues[16]; u16 LiteralsLoaded = 0; - u32 NativeRegsUsed = 0; - u16 LoadedRegs = 0; - u16 DirtyRegs = 0; + u32 NativeRegsUsed = 0; + u16 LoadedRegs = 0; + u16 DirtyRegs = 0; u16 PCAllocatableAsSrc = 0; - T* Compiler; + T* Compiler; - FetchedInstr* Instrs; - int InstrsCount; + FetchedInstr* Instrs; + int InstrsCount; }; } diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp index 43b94b6..57a38c4 100644 --- a/src/ARMJIT_x64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp @@ -103,16 +103,30 @@ void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed) // also calculates cycles OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed) { + S = S && (CurInstr.SetFlags & 0x2); + if (CurInstr.Instr & (1 << 25)) { Comp_AddCycles_C(); + + u32 shift = (CurInstr.Instr >> 7) & 0x1E; + u32 imm = ROR(CurInstr.Instr & 0xFF, shift); + carryUsed = false; - return Imm32(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E)); + if (S && shift) + { + CPSRDirty = true; + carryUsed = true; + if (imm & 0x80000000) + MOV(32, R(RSCRATCH2), Imm32(1)); + else + XOR(32, R(RSCRATCH2), R(RSCRATCH2)); + } + + return Imm32(imm); } else { - S = S && (CurInstr.SetFlags & 0x2); - int op = (CurInstr.Instr >> 5) & 0x3; if (CurInstr.Instr & (1 << 4)) { diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index bda9e52..819fe3c 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -127,7 +127,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) ConstantCycles += cycles; else - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); } void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 6905010..1fdbaf8 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -315,7 +315,7 @@ Compiler::Compiler() { for (int reg = 0; reg < 16; reg++) { - if (reg == RSCRATCH || reg == ABI_PARAM1 || reg == ABI_PARAM2 || reg == ABI_PARAM3) + if (reg == RSCRATCH || reg == ABI_PARAM1 || reg == ABI_PARAM2) { PatchedStoreFuncs[consoleType][num][size][reg] = NULL; PatchedLoadFuncs[consoleType][num][size][0][reg] = NULL; @@ -330,7 +330,8 @@ Compiler::Compiler() if (num == 0) { MOV(64, R(ABI_PARAM2), R(RCPU)); - MOV(32, R(ABI_PARAM3), R(rdMapped)); + if (rdMapped != ABI_PARAM3) + MOV(32, R(ABI_PARAM3), R(rdMapped)); } else { @@ -626,7 +627,7 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken) { RegCache.PrepareExit(); - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); JMP((u8*)&ARM_Ret, true); } } @@ -759,7 +760,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] RegCache.Flush(); - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); JMP((u8*)ARM_Ret, true); /*FILE* codeout = fopen("codeout", "a"); @@ -778,7 +779,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant) : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -790,7 +791,7 @@ void Compiler::Comp_AddCycles_CI(u32 i) : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i; if (!Thumb && CurInstr.Cond() < 0xE) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -804,12 +805,12 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add) if (!Thumb && CurInstr.Cond() < 0xE) { LEA(32, RSCRATCH, MDisp(i, add + cycles)); - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); } else { ConstantCycles += cycles; - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); } } @@ -847,7 +848,7 @@ void Compiler::Comp_AddCycles_CDI() } if (!Thumb && CurInstr.Cond() < 0xE) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -891,7 +892,7 @@ void Compiler::Comp_AddCycles_CD() } if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp index ccec951..74a5f87 100644 --- a/src/ARM_InstrInfo.cpp +++ b/src/ARM_InstrInfo.cpp @@ -7,7 +7,7 @@ namespace ARMInstrInfo { -#define ak(x) ((x) << 22) +#define ak(x) ((x) << 23) enum { A_Read0 = 1 << 0, @@ -37,9 +37,10 @@ enum { A_RRXReadC = 1 << 17, A_StaticShiftSetC = 1 << 18, A_SetC = 1 << 19, + A_SetCImm = 1 << 20, - A_WriteMem = 1 << 20, - A_LoadMem = 1 << 21 + A_WriteMem = 1 << 21, + A_LoadMem = 1 << 22 }; #define A_BIOP A_Read16 @@ -52,7 +53,7 @@ enum { #define A_ARITH_SHIFT_REG A_SetCV #define A_LOGIC_SHIFT_REG A_SetMaybeC #define A_ARITH_IMM A_SetCV -#define A_LOGIC_IMM 0 +#define A_LOGIC_IMM A_SetCImm #define A_IMPLEMENT_ALU_OP(x,k,a,c) \ const u32 A_##x##_IMM = A_Write12 | c | A_##k | ak(ak_##x##_IMM); \ @@ -410,7 +411,7 @@ Info Decode(bool thumb, u32 num, u32 instr) if (data & A_UnkOnARM7 && num == 1) data = A_UNK; - res.Kind = (data >> 22) & 0x1FF; + res.Kind = (data >> 23) & 0x1FF; if (res.Kind >= ak_SMLAxy && res.Kind <= ak_SMULxy && num == 1) { @@ -496,7 +497,9 @@ Info Decode(bool thumb, u32 num, u32 instr) res.ReadFlags |= flag_C; if ((data & A_RRXReadC) && !((instr >> 7) & 0x1F)) res.ReadFlags |= flag_C; - if ((data & A_SetC) || ((data & A_StaticShiftSetC) && ((instr >> 7) & 0x1F))) + if ((data & A_SetC) + || ((data & A_StaticShiftSetC) && ((instr >> 7) & 0x1F)) + || ((data & A_SetCImm) && ((instr >> 7) & 0x1E))) res.WriteFlags |= flag_C; if (data & A_WriteMem) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 491a583..b2d0744 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -90,6 +90,7 @@ if (ENABLE_JIT) ARMJIT_A64/ARMJIT_Linkage.s ) + set_source_files_properties(ARMJIT_A64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp") endif() endif() diff --git a/src/CP15.cpp b/src/CP15.cpp index 992c83f..f6476ab 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -21,9 +21,11 @@ #include "NDS.h" #include "DSi.h" #include "ARM.h" + +#ifdef JIT_ENABLED #include "ARMJIT.h" #include "ARMJIT_Memory.h" - +#endif // access timing for cached regions // this would be an average between cache hits and cache misses @@ -105,7 +107,7 @@ void ARMv5::UpdateDTCMSetting() { newDTCMBase = DTCMSetting & 0xFFFFF000; newDTCMSize = 0x200 << ((DTCMSetting >> 1) & 0x1F); - //printf("DTCM [%08X] enabled at %08X, size %X\n", DTCMSetting, DTCMBase, DTCMSize); + //printf("DTCM [%08X] enabled at %08X, size %X\n", DTCMSetting, newDTCMBase, newDTCMSize); } else { @@ -115,7 +117,9 @@ void ARMv5::UpdateDTCMSetting() } if (newDTCMBase != DTCMBase || newDTCMSize != DTCMSize) { +#ifdef JIT_ENABLED ARMJIT_Memory::RemapDTCM(newDTCMBase, newDTCMSize); +#endif DTCMBase = newDTCMBase; DTCMSize = newDTCMSize; } @@ -631,7 +635,10 @@ void ARMv5::CP15Write(u32 id, u32 val) } - if ((id&0xF00)!=0x700) + if ((id & 0xF00) == 0xF00) // test/debug shit? + return; + + if ((id & 0xF00) != 0x700) printf("unknown CP15 write op %03X %08X\n", id, val); } @@ -725,6 +732,9 @@ u32 ARMv5::CP15Read(u32 id) return ITCMSetting; } + if ((id & 0xF00) == 0xF00) // test/debug shit? + return 0; + printf("unknown CP15 read op %03X\n", id); return 0; } diff --git a/src/DSi.cpp b/src/DSi.cpp index 97a63cd..42541fe 100644 --- a/src/DSi.cpp +++ b/src/DSi.cpp @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. @@ -181,15 +181,17 @@ void SoftReset() // also, BPTWL[0x70] could be abused to quickly boot specific titles +#ifdef JIT_ENABLED + ARMJIT_Memory::Reset(); + ARMJIT::CheckAndInvalidateITCM(); +#endif + NDS::ARM9->Reset(); NDS::ARM7->Reset(); NDS::ARM9->CP15Reset(); memcpy(NDS::ARM9->ITCM, ITCMInit, 0x8000); -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidateITCM(); -#endif DSi_AES::Reset(); @@ -540,7 +542,9 @@ void MapNWRAM_A(u32 num, u8 val) return; } +#ifdef JIT_ENABLED ARMJIT_Memory::RemapNWRAM(0); +#endif int mbkn = 0, mbks = 8*num; @@ -573,7 +577,9 @@ void MapNWRAM_B(u32 num, u8 val) return; } +#ifdef JIT_ENABLED ARMJIT_Memory::RemapNWRAM(1); +#endif int mbkn = 1+(num>>2), mbks = 8*(num&3); @@ -610,7 +616,9 @@ void MapNWRAM_C(u32 num, u8 val) return; } +#ifdef JIT_ENABLED ARMJIT_Memory::RemapNWRAM(2); +#endif int mbkn = 3+(num>>2), mbks = 8*(num&3); @@ -644,7 +652,9 @@ void MapNWRAMRange(u32 cpu, u32 num, u32 val) u32 oldval = MBK[cpu][5+num]; if (oldval == val) return; +#ifdef JIT_ENABLED ARMJIT_Memory::RemapNWRAM(num); +#endif MBK[cpu][5+num] = val; @@ -850,7 +860,9 @@ void ARM9Write8(u32 addr, u8 val) if (ptr) { *(u8*)&ptr[addr & 0xFFFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); +#endif } return; } @@ -860,7 +872,9 @@ void ARM9Write8(u32 addr, u8 val) if (ptr) { *(u8*)&ptr[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); +#endif } return; } @@ -870,7 +884,9 @@ void ARM9Write8(u32 addr, u8 val) if (ptr) { *(u8*)&ptr[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); +#endif } return; } @@ -895,7 +911,9 @@ void ARM9Write16(u32 addr, u16 val) if (ptr) { *(u16*)&ptr[addr & 0xFFFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); +#endif } return; } @@ -905,7 +923,9 @@ void ARM9Write16(u32 addr, u16 val) if (ptr) { *(u16*)&ptr[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); +#endif } return; } @@ -915,7 +935,9 @@ void ARM9Write16(u32 addr, u16 val) if (ptr) { *(u16*)&ptr[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); +#endif } return; } @@ -940,7 +962,9 @@ void ARM9Write32(u32 addr, u32 val) if (ptr) { *(u32*)&ptr[addr & 0xFFFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); +#endif } return; } @@ -950,7 +974,9 @@ void ARM9Write32(u32 addr, u32 val) if (ptr) { *(u32*)&ptr[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); +#endif } return; } @@ -960,7 +986,9 @@ void ARM9Write32(u32 addr, u32 val) if (ptr) { *(u32*)&ptr[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); +#endif } return; } @@ -1196,7 +1224,9 @@ void ARM7Write16(u32 addr, u16 val) if (ptr) { *(u16*)&ptr[addr & 0xFFFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); +#endif } return; } @@ -1206,7 +1236,9 @@ void ARM7Write16(u32 addr, u16 val) if (ptr) { *(u16*)&ptr[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); +#endif } return; } @@ -1216,7 +1248,9 @@ void ARM7Write16(u32 addr, u16 val) if (ptr) { *(u16*)&ptr[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); +#endif } return; } @@ -1241,7 +1275,9 @@ void ARM7Write32(u32 addr, u32 val) if (ptr) { *(u32*)&ptr[addr & 0xFFFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); +#endif } return; } @@ -1251,7 +1287,9 @@ void ARM7Write32(u32 addr, u32 val) if (ptr) { *(u32*)&ptr[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); +#endif } return; } @@ -1261,7 +1299,9 @@ void ARM7Write32(u32 addr, u32 val) if (ptr) { *(u32*)&ptr[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); +#endif } return; } @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. diff --git a/src/DSi_AES.cpp b/src/DSi_AES.cpp index 6a8ffad..dfa67bd 100644 --- a/src/DSi_AES.cpp +++ b/src/DSi_AES.cpp @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. @@ -165,9 +165,37 @@ void ProcessBlock_CCM_Decrypt() //printf("AES-CCM: "); _printhex2(data, 16); Swap16(data_rev, data); + AES_CTR_xcrypt_buffer(&Ctx, data_rev, 16); + for (int i = 0; i < 16; i++) CurMAC[i] ^= data_rev[i]; + AES_ECB_encrypt(&Ctx, CurMAC); + + Swap16(data, data_rev); + + //printf(" -> "); _printhex2(data, 16); + + OutputFIFO->Write(*(u32*)&data[0]); + OutputFIFO->Write(*(u32*)&data[4]); + OutputFIFO->Write(*(u32*)&data[8]); + OutputFIFO->Write(*(u32*)&data[12]); +} + +void ProcessBlock_CCM_Encrypt() +{ + u8 data[16]; + u8 data_rev[16]; + + *(u32*)&data[0] = InputFIFO->Read(); + *(u32*)&data[4] = InputFIFO->Read(); + *(u32*)&data[8] = InputFIFO->Read(); + *(u32*)&data[12] = InputFIFO->Read(); + + //printf("AES-CCM: "); _printhex2(data, 16); + + Swap16(data_rev, data); for (int i = 0; i < 16; i++) CurMAC[i] ^= data_rev[i]; + AES_CTR_xcrypt_buffer(&Ctx, data_rev, 16); AES_ECB_encrypt(&Ctx, CurMAC); Swap16(data, data_rev); @@ -232,7 +260,6 @@ void WriteCnt(u32 val) OutputDMASize = dmasize_out[(val >> 14) & 0x3]; AESMode = (val >> 28) & 0x3; - if (AESMode == 1) printf("AES-CCM TODO\n"); if (val & (1<<24)) { @@ -245,6 +272,8 @@ void WriteCnt(u32 val) // transfer start (checkme) RemBlocks = BlkCnt >> 16; + if (AESMode == 0 && (!(val & (1<<20)))) printf("AES: CCM-DECRYPT MAC FROM WRFIFO, TODO\n"); + if (RemBlocks > 0) { u8 key[16]; @@ -365,14 +394,9 @@ void Update() switch (AESMode) { case 0: ProcessBlock_CCM_Decrypt(); break; + case 1: ProcessBlock_CCM_Encrypt(); break; case 2: case 3: ProcessBlock_CTR(); break; - default: - // dorp - OutputFIFO->Write(InputFIFO->Read()); - OutputFIFO->Write(InputFIFO->Read()); - OutputFIFO->Write(InputFIFO->Read()); - OutputFIFO->Write(InputFIFO->Read()); } RemBlocks--; @@ -398,6 +422,24 @@ void Update() if (CurMAC[15-i] != MAC[i]) Cnt &= ~(1<<21); } } + else if (AESMode == 1) + { + Ctx.Iv[13] = 0x00; + Ctx.Iv[14] = 0x00; + Ctx.Iv[15] = 0x00; + AES_CTR_xcrypt_buffer(&Ctx, CurMAC, 16); + + u8 finalmac[16]; + Swap16(finalmac, CurMAC); + + OutputFIFO->Write(*(u32*)&finalmac[0]); + OutputFIFO->Write(*(u32*)&finalmac[4]); + OutputFIFO->Write(*(u32*)&finalmac[8]); + OutputFIFO->Write(*(u32*)&finalmac[12]); + + // CHECKME + Cnt &= ~(1<<21); + } else { // CHECKME diff --git a/src/DSi_AES.h b/src/DSi_AES.h index 354c4a7..77ef464 100644 --- a/src/DSi_AES.h +++ b/src/DSi_AES.h @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. diff --git a/src/DSi_Camera.cpp b/src/DSi_Camera.cpp index 45061b2..cc44052 100644 --- a/src/DSi_Camera.cpp +++ b/src/DSi_Camera.cpp @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. diff --git a/src/DSi_Camera.h b/src/DSi_Camera.h index 78629b5..844a4d2 100644 --- a/src/DSi_Camera.h +++ b/src/DSi_Camera.h @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. diff --git a/src/DSi_I2C.cpp b/src/DSi_I2C.cpp index e22c708..d58a38c 100644 --- a/src/DSi_I2C.cpp +++ b/src/DSi_I2C.cpp @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. diff --git a/src/DSi_I2C.h b/src/DSi_I2C.h index d058be1..35f1ad8 100644 --- a/src/DSi_I2C.h +++ b/src/DSi_I2C.h @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. diff --git a/src/DSi_NDMA.cpp b/src/DSi_NDMA.cpp index 19c72b6..707c777 100644 --- a/src/DSi_NDMA.cpp +++ b/src/DSi_NDMA.cpp @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. diff --git a/src/DSi_NDMA.h b/src/DSi_NDMA.h index d7b7483..57da289 100644 --- a/src/DSi_NDMA.h +++ b/src/DSi_NDMA.h @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. diff --git a/src/DSi_NWifi.cpp b/src/DSi_NWifi.cpp index 79bc632..73cf4b4 100644 --- a/src/DSi_NWifi.cpp +++ b/src/DSi_NWifi.cpp @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. @@ -21,6 +21,8 @@ #include "DSi.h" #include "DSi_NWifi.h" #include "SPI.h" +#include "WifiAP.h" +#include "Platform.h" const u8 CIS0[256] = @@ -111,15 +113,37 @@ const u8 CIS1[256] = }; -// hax -DSi_NWifi* hax_wifi; -void triggerirq(u32 param) +DSi_NWifi* Ctx = nullptr; + + +DSi_NWifi::DSi_NWifi(DSi_SDHost* host) : DSi_SDDevice(host) { - hax_wifi->SetIRQ_F1_Counter(0); + // HACK + // the mailboxes are supposed to be 0x80 bytes + // however, as we do things instantly, emulating this is meaningless + // and only adds complication + for (int i = 0; i < 8; i++) + Mailbox[i] = new FIFO<u8>(0x600);//0x80); + + // extra mailbox acting as a bigger RX buffer + Mailbox[8] = new FIFO<u8>(0x8000); + + // this seems to control whether the firmware upload is done + EEPROMReady = 0; + + Ctx = this; } +DSi_NWifi::~DSi_NWifi() +{ + for (int i = 0; i < 9; i++) + delete Mailbox[i]; -DSi_NWifi::DSi_NWifi(DSi_SDHost* host) : DSi_SDDevice(host) + NDS::CancelEvent(NDS::Event_DSi_NWifi); + Ctx = nullptr; +} + +void DSi_NWifi::Reset() { TransferCmd = 0xFFFFFFFF; RemSize = 0; @@ -134,9 +158,8 @@ DSi_NWifi::DSi_NWifi(DSi_SDHost* host) : DSi_SDDevice(host) WindowReadAddr = 0; WindowWriteAddr = 0; - // TODO: check the actual mailbox size (presumably 0x200) - for (int i = 0; i < 8; i++) - Mailbox[i] = new FIFO<u8>(0x200); + for (int i = 0; i < 9; i++) + Mailbox[i]->Clear(); u8* mac = SPI_Firmware::GetWifiMAC(); printf("NWifi MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", @@ -158,15 +181,17 @@ DSi_NWifi::DSi_NWifi(DSi_SDHost* host) : DSi_SDDevice(host) *(u16*)&EEPROM[0x004] = chk; - EEPROMReady = 0; - + // TODO: SDIO reset shouldn't reset this + // this is reset by the internal reset register, and that also resets EEPROM init BootPhase = 0; -} -DSi_NWifi::~DSi_NWifi() -{ - for (int i = 0; i < 8; i++) - delete Mailbox[i]; + ErrorMask = 0; + ScanTimer = 0; + + BeaconTimer = 0x10A2220ULL; + ConnectionStatus = 0; + + NDS::CancelEvent(NDS::Event_DSi_NWifi); } @@ -283,6 +308,7 @@ u8 DSi_NWifi::F1_Read(u32 addr) if (addr < 0x100) { u8 ret = Mailbox[4]->Read(); + if (addr == 0xFF) DrainRXBuffer(); UpdateIRQ_F1(); return ret; } @@ -348,6 +374,7 @@ u8 DSi_NWifi::F1_Read(u32 addr) else if (addr < 0x1000) { u8 ret = Mailbox[4]->Read(); + if (addr == 0xFFF) DrainRXBuffer(); UpdateIRQ_F1(); return ret; } @@ -372,11 +399,12 @@ u8 DSi_NWifi::F1_Read(u32 addr) else { u8 ret = Mailbox[4]->Read(); + if (addr == 0x3FFF) DrainRXBuffer(); UpdateIRQ_F1(); return ret; } - printf("NWIFI: unknown func1 read %05X\n", addr); + //printf("NWIFI: unknown func1 read %05X\n", addr); return 0; } @@ -663,13 +691,13 @@ void DSi_NWifi::HandleCommand() switch (BootPhase) { case 0: return BMI_Command(); - case 1: return WMI_Command(); + case 1: return HTC_Command(); + case 2: return WMI_Command(); } } void DSi_NWifi::BMI_Command() { - // HLE command handling stub u32 cmd = MB_Read32(0); switch (cmd) @@ -678,8 +706,8 @@ void DSi_NWifi::BMI_Command() { printf("BMI_DONE\n"); EEPROMReady = 1; // GROSS FUCKING HACK - u8 ready_msg[8] = {0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00}; - SendWMIFrame(ready_msg, 8, 0, 0x00, 0x0000); + u8 ready_msg[6] = {0x0A, 0x00, 0x08, 0x06, 0x16, 0x00}; + SendWMIEvent(0, 0x0001, ready_msg, 6); BootPhase = 1; } return; @@ -743,7 +771,7 @@ void DSi_NWifi::BMI_Command() { u32 len = MB_Read32(0); printf("BMI LZ write %08X\n", len); - //FILE* f = fopen("wififirm.bin", "ab"); + //FILE* f = fopen("debug/wififirm.bin", "ab"); for (int i = 0; i < len; i++) { @@ -762,15 +790,13 @@ void DSi_NWifi::BMI_Command() } } -void DSi_NWifi::WMI_Command() +void DSi_NWifi::HTC_Command() { - // HLE command handling stub u16 h0 = MB_Read16(0); u16 len = MB_Read16(0); u16 h2 = MB_Read16(0); u16 cmd = MB_Read16(0); - printf("WMI: cmd %04X\n", cmd); switch (cmd) { @@ -778,59 +804,631 @@ void DSi_NWifi::WMI_Command() { u16 svc_id = MB_Read16(0); u16 conn_flags = MB_Read16(0); - - u8 svc_resp[10]; - *(u16*)&svc_resp[0] = 0x0003; - *(u16*)&svc_resp[2] = svc_id; - svc_resp[4] = 0; - svc_resp[5] = (svc_id & 0xFF) + 1; - *(u16*)&svc_resp[6] = 0x0001; - *(u16*)&svc_resp[8] = 0x0001; - SendWMIFrame(svc_resp, 10, 0, 0x00, 0x0000); + printf("service connect %04X %04X %04X\n", svc_id, conn_flags, MB_Read16(0)); + + u8 svc_resp[8]; + // responses from hardware: + // 0003 0100 00 01 0602 00 00 + // 0003 0101 00 02 0600 00 00 + // 0003 0102 00 03 0600 00 00 + // 0003 0103 00 04 0600 00 00 + // 0003 0104 00 05 0600 00 00 + *(u16*)&svc_resp[0] = svc_id; + svc_resp[2] = 0; + svc_resp[3] = (svc_id & 0xFF) + 1; + *(u16*)&svc_resp[4] = (svc_id==0x0100) ? 0x0602 : 0x0600; // max message size + *(u16*)&svc_resp[6] = 0x0000; + SendWMIEvent(0, 0x0003, svc_resp, 8); } break; case 0x0004: // setup complete { - u8 ready_evt[14]; - memset(ready_evt, 0, 14); - *(u16*)&ready_evt[0] = 0x1001; - memcpy(&ready_evt[2], SPI_Firmware::GetWifiMAC(), 6); - ready_evt[8] = 0x02; - *(u32*)&ready_evt[10] = 0x23000024; - // ctrl[0] = trailer size - // trailer[1] = trailer extra size - // trailer[0] = trailer type??? - SendWMIFrame(ready_evt, 14, 1, 0x00, 0x0000); + u8 ready_evt[12]; + memcpy(&ready_evt[0], SPI_Firmware::GetWifiMAC(), 6); + ready_evt[6] = 0x02; + ready_evt[7] = 0; + *(u32*)&ready_evt[8] = 0x2300006C; + SendWMIEvent(1, 0x1001, ready_evt, 12); + + u8 regdomain_evt[4]; + *(u32*)®domain_evt[0] = 0x80000000 | (*(u16*)&EEPROM[0x008] & 0x0FFF); + SendWMIEvent(1, 0x1006, regdomain_evt, 4); + + BootPhase = 2; + NDS::ScheduleEvent(NDS::Event_DSi_NWifi, true, 33611, MSTimer, 0); } break; default: - printf("unknown WMI command %04X\n", cmd); + printf("unknown HTC command %04X\n", cmd); + for (int i = 0; i < len; i++) + { + printf("%02X ", Mailbox[0]->Read()); + if ((i&0xF)==0xF) printf("\n"); + } + printf("\n"); break; } MB_Drain(0); } -void DSi_NWifi::SendWMIFrame(u8* data, u32 len, u8 ep, u8 flags, u16 ctrl) +void DSi_NWifi::WMI_Command() +{ + u16 h0 = MB_Read16(0); + u16 len = MB_Read16(0); + u16 h2 = MB_Read16(0); + + u8 ep = h0 & 0xFF; + if (ep > 0x01) // data endpoints + { + WMI_SendPacket(len); + } + else + { + u16 cmd = MB_Read16(0); + + switch (cmd) + { + case 0x0001: // connect to network + { + WMI_ConnectToNetwork(); + } + break; + + case 0x0003: // disconnect + { + if (ConnectionStatus != 1) + printf("WMI: ?? trying to disconnect while not connected\n"); + + printf("WMI: disconnect\n"); + ConnectionStatus = 0; + + u8 reply[11]; + *(u16*)&reply[0] = 3; // checkme + memcpy(&reply[2], WifiAP::APMac, 6); + reply[8] = 3; // disconnect reason (via cmd) + reply[9] = 0; // assoc-response length (none here) + reply[10] = 0; // we need atleast one byte here, even if there is no assoc-response + SendWMIEvent(1, 0x1003, reply, 11); + } + break; + + case 0x0004: // synchronize + { + Mailbox[0]->Read(); + // TODO?? + } + break; + + case 0x0005: // create priority stream + { + // TODO??? + // there's a lot of crap in there. + } + break; + + case 0x0007: // start scan + { + u32 forcefg = MB_Read32(0); + u32 legacy = MB_Read32(0); + u32 scantime = MB_Read32(0); + u32 forceinterval = MB_Read32(0); + u8 scantype = Mailbox[0]->Read(); + u8 nchannels = Mailbox[0]->Read(); + + printf("WMI: start scan, forceFG=%d, legacy=%d, scanTime=%d, interval=%d, scanType=%d, chan=%d\n", + forcefg, legacy, scantime, forceinterval, scantype, nchannels); + + if (ScanTimer > 0) + { + printf("!! CHECKME: START SCAN BUT WAS ALREADY SCANNING (%d)\n", ScanTimer); + } + + // checkme + ScanTimer = scantime*5; + } + break; + + case 0x0008: // set scan params + { + // TODO: do something with the params!! + } + break; + + case 0x0009: // set BSS filter + { + // TODO: do something with the params!! + u8 bssfilter = Mailbox[0]->Read(); + Mailbox[0]->Read(); + Mailbox[0]->Read(); + Mailbox[0]->Read(); + u32 iemask = MB_Read32(0); + + printf("WMI: set BSS filter, filter=%02X, iemask=%08X\n", bssfilter, iemask); + } + break; + + case 0x000A: // set probed BSSID + { + u8 id = Mailbox[0]->Read(); + u8 flags = Mailbox[0]->Read(); + u8 len = Mailbox[0]->Read(); + + char ssid[33] = {0}; + for (int i = 0; i < len && i < 32; i++) + ssid[i] = Mailbox[0]->Read(); + + // TODO: store it somewhere + printf("WMI: set probed SSID: id=%d, flags=%02X, len=%d, SSID=%s\n", id, flags, len, ssid); + } + break; + + case 0x000D: // set disconnect timeout + { + Mailbox[0]->Read(); + // TODO?? + } + break; + + case 0x000E: // get channel list + { + int nchan = 11; // TODO: customize?? + u8 reply[2 + (nchan*2) + 2]; + + reply[0] = 0; + reply[1] = nchan; + for (int i = 0; i < nchan; i++) + *(u16*)&reply[2 + (i*2)] = 2412 + (i*5); + *(u16*)&reply[2 + (nchan*2)] = 0; + + SendWMIEvent(1, 0x000E, reply, 4+(nchan*2)); + } + break; + + case 0x0011: // set channel params + { + Mailbox[0]->Read(); + u8 scan = Mailbox[0]->Read(); + u8 phymode = Mailbox[0]->Read(); + u8 len = Mailbox[0]->Read(); + + u16 channels[32]; + for (int i = 0; i < len && i < 32; i++) + channels[i] = MB_Read16(0); + + // TODO: store it somewhere + printf("WMI: set channel params: scan=%d, phymode=%d, len=%d, channels=", scan, phymode, len); + for (int i = 0; i < len && i < 32; i++) + printf("%d,", channels[i]); + printf("\n"); + } + break; + + case 0x0012: // set power mode + { + Mailbox[0]->Read(); + // TODO?? + } + break; + + case 0x0017: // dummy? + Mailbox[0]->Read(); + break; + + case 0x0022: // set error bitmask + { + ErrorMask = MB_Read32(0); + } + break; + + case 0x002E: // extension shit + { + u32 extcmd = MB_Read32(0); + switch (extcmd) + { + case 0x2008: // 'heartbeat'?? + { + u32 cookie = MB_Read32(0); + u32 source = MB_Read32(0); + + u8 reply[12]; + *(u32*)&reply[0] = 0x3007; + *(u32*)&reply[4] = cookie; + *(u32*)&reply[8] = source; + + SendWMIEvent(1, 0x1010, reply, 12); + } + break; + + default: + printf("WMI: unknown ext cmd 002E:%04X\n", extcmd); + break; + } + } + break; + + case 0x003D: // set keepalive interval + { + Mailbox[0]->Read(); + // TODO?? + } + break; + + case 0x0041: // 'WMI_SET_WSC_STATUS_CMD' + { + Mailbox[0]->Read(); + // TODO?? + } + break; + + case 0x0047: // cmd47 -- timer shenanigans?? + { + // + } + break; + + case 0x0048: // not supported by DSi?? + { + MB_Read32(0); + MB_Read32(0); + Mailbox[0]->Read(); + Mailbox[0]->Read(); + } + break; + + case 0x0049: // 'host exit notify' + { + // + } + break; + + case 0xF000: // set bitrate + { + // TODO! + Mailbox[0]->Read(); + Mailbox[0]->Read(); + Mailbox[0]->Read(); + } + break; + + default: + printf("unknown WMI command %04X (header: %04X:%04X:%04X)\n", cmd, h0, len, h2); + for (int i = 0; i < len-2; i++) + { + printf("%02X ", Mailbox[0]->Read()); + if ((i&0xF)==0xF) printf("\n"); + } + printf("\n"); + break; + } + } + + if (h0 & (1<<8)) + SendWMIAck(ep); + + MB_Drain(0); +} + +void DSi_NWifi::WMI_ConnectToNetwork() { - u32 wlen = 0; + u8 type = Mailbox[0]->Read(); + u8 auth11 = Mailbox[0]->Read(); + u8 auth = Mailbox[0]->Read(); + u8 pCryptoType = Mailbox[0]->Read(); + u8 pCryptoLen = Mailbox[0]->Read(); + u8 gCryptoType = Mailbox[0]->Read(); + u8 gCryptoLen = Mailbox[0]->Read(); + u8 ssidLen = Mailbox[0]->Read(); + + char ssid[33] = {0}; + for (int i = 0; i < 32; i++) + ssid[i] = Mailbox[0]->Read(); + if (ssidLen <= 32) + ssid[ssidLen] = '\0'; + + u16 channel = MB_Read16(0); + + u8 bssid[6]; + *(u32*)&bssid[0] = MB_Read32(0); + *(u16*)&bssid[4] = MB_Read16(0); + + u32 flags = MB_Read32(0); + + if ((type != 0x01) || + (auth11 != 0x01) || + (auth != 0x01) || + (pCryptoType != 0x01) || + (gCryptoType != 0x01) || + (memcmp(bssid, WifiAP::APMac, 6))) + { + printf("WMI_Connect: bad parameters\n"); + // TODO: send disconnect?? + return; + } + + printf("WMI: connecting to network %s\n", ssid); - Mailbox[4]->Write(ep); // eid - Mailbox[4]->Write(flags); // flags - MB_Write16(4, len); // payload length - MB_Write16(4, ctrl); // ctrl - wlen += 6; + u8 reply[20]; + + // hope this is right! + *(u16*)&reply[0] = 2437; // channel + memcpy(&reply[2], WifiAP::APMac, 6); // BSSID + *(u16*)&reply[8] = 128; // listen interval + *(u16*)&reply[10] = 128; // beacon interval + *(u32*)&reply[12] = 0x01; // network type + + reply[16] = 0x16; // beaconIeLen ??? + reply[17] = 0x2F; // assocReqLen + reply[18] = 0x16; // assocRespLen + reply[19] = 0; // ????? + + SendWMIEvent(1, 0x1002, reply, 20); + + ConnectionStatus = 1; +} + +void DSi_NWifi::WMI_SendPacket(u16 len) +{ + if (ConnectionStatus != 1) + { + printf("WMI: !! trying to send shit while not connected\n"); + // TODO: report error?? + return; + } + + // header??? + // packets with bit1=1 are something special (sync??) + // otherwise, ???? + // header is 001C on ARP frames, 0000 otherwise + u16 hdr = MB_Read16(0); + hdr = ((hdr & 0xFF00) >> 8) | ((hdr & 0x00FF) << 8); + u16 type = hdr & 0x0003; + + if (type == 2) // data sync + { + printf("WMI: data sync\n"); + + /*Mailbox[8]->Write(2); // eid + Mailbox[8]->Write(0x00); // flags + MB_Write16(8, 2); // data length + Mailbox[8]->Write(0); // + Mailbox[8]->Write(0); // + MB_Write16(8, 0x0200); // + + DrainRXBuffer();*/ + return; + } + + if (type) + { + printf("WMI: special frame %04X len=%d\n", hdr, len); + for (int i = 0; i < len-2; i++) + { + printf("%02X ", Mailbox[0]->Read()); + if ((i&0xF)==0xF) printf("\n"); + } + printf("\n"); + return; + } + + printf("WMI: send packet, hdr=%04X, len=%d\n", hdr, len); + + u8 dstmac[6]; + u8 srcmac[6]; + u16 plen; + + *(u32*)&dstmac[0] = MB_Read32(0); + *(u16*)&dstmac[4] = MB_Read16(0); + *(u32*)&srcmac[0] = MB_Read32(0); + *(u16*)&srcmac[4] = MB_Read16(0); + plen = MB_Read16(0); + plen = ((plen & 0xFF00) >> 8) | ((plen & 0x00FF) << 8); + + if (plen > len-16) + { + printf("WMI: bad packet length %d > %d\n", plen, len-16); + return; + } + + u32 h0 = MB_Read32(0); + u16 h1 = MB_Read16(0); + + if (h0 != 0x0003AAAA || h1 != 0x0000) + { + printf("WMI: bad LLC/SLIP header\n"); + return; + } + + u16 ethertype = MB_Read16(0); + + int lan_len = (plen - 8) + 14; + + memcpy(&LANBuffer[0], dstmac, 6); // destination MAC + memcpy(&LANBuffer[6], srcmac, 6); // source MAC + *(u16*)&LANBuffer[12] = ethertype; // type + for (int i = 0; i < lan_len-14; i++) + { + LANBuffer[14+i] = Mailbox[0]->Read(); + } + + /*for (int i = 0; i < lan_len; i++) + { + printf("%02X ", LANBuffer[i]); + if ((i&0xF)==0xF) printf("\n"); + } + printf("\n");*/ + + Platform::LAN_SendPacket(LANBuffer, lan_len); +} + +void DSi_NWifi::SendWMIEvent(u8 ep, u16 id, u8* data, u32 len) +{ + if (!Mailbox[8]->CanFit(6+len+2+8)) + { + printf("NWifi: !! not enough space in RX buffer for WMI event %04X\n", id); + return; + } + + Mailbox[8]->Write(ep); // eid + Mailbox[8]->Write(0x02); // flags (trailer) + MB_Write16(8, len+2+8); // data length (plus event ID and trailer) + Mailbox[8]->Write(8); // trailer length + Mailbox[8]->Write(0); // + MB_Write16(8, id); // event ID for (int i = 0; i < len; i++) { - Mailbox[4]->Write(data[i]); - wlen++; + Mailbox[8]->Write(data[i]); } - for (; wlen & 0x7F; wlen++) - Mailbox[4]->Write(0); + // trailer + Mailbox[8]->Write(0x02); + Mailbox[8]->Write(0x06); + Mailbox[8]->Write(0x00); + Mailbox[8]->Write(0x00); + Mailbox[8]->Write(0x00); + Mailbox[8]->Write(0x00); + Mailbox[8]->Write(0x00); + Mailbox[8]->Write(0x00); + + DrainRXBuffer(); +} + +void DSi_NWifi::SendWMIAck(u8 ep) +{ + if (!Mailbox[8]->CanFit(6+12)) + { + printf("NWifi: !! not enough space in RX buffer for WMI ack (ep #%d)\n", ep); + return; + } + + Mailbox[8]->Write(0); // eid + Mailbox[8]->Write(0x02); // flags (trailer) + MB_Write16(8, 0xC); // data length (plus trailer) + Mailbox[8]->Write(0xC); // trailer length + Mailbox[8]->Write(0); // + + // credit report + Mailbox[8]->Write(0x01); + Mailbox[8]->Write(0x02); + Mailbox[8]->Write(ep); + Mailbox[8]->Write(0x01); + + // lookahead + Mailbox[8]->Write(0x02); + Mailbox[8]->Write(0x06); + Mailbox[8]->Write(0x00); + Mailbox[8]->Write(0x00); + Mailbox[8]->Write(0x00); + Mailbox[8]->Write(0x00); + Mailbox[8]->Write(0x00); + Mailbox[8]->Write(0x00); + + DrainRXBuffer(); +} + +void DSi_NWifi::SendWMIBSSInfo(u8 type, u8* data, u32 len) +{ + if (!Mailbox[8]->CanFit(6+len+2+16)) + { + printf("NWifi: !! not enough space in RX buffer for WMI BSSINFO event\n"); + return; + } + + // TODO: check when version>=2 frame type is used? + // I observed the version<2 variant on my DSi + + Mailbox[8]->Write(1); // eid + Mailbox[8]->Write(0x00); // flags + MB_Write16(8, len+2+16); // data length (plus event ID and trailer) + Mailbox[8]->Write(0xFF); // trailer length + Mailbox[8]->Write(0xFF); // + MB_Write16(8, 0x1004); // event ID + + MB_Write16(8, 2437); // channel (6) (checkme!) + Mailbox[8]->Write(type); + Mailbox[8]->Write(0x1B); // 'snr' + MB_Write16(8, 0xFFBC); // RSSI + MB_Write32(8, *(u32*)&WifiAP::APMac[0]); + MB_Write16(8, *(u16*)&WifiAP::APMac[4]); + MB_Write32(8, 0); // ieMask + + for (int i = 0; i < len; i++) + { + Mailbox[8]->Write(data[i]); + } + + DrainRXBuffer(); +} + +void DSi_NWifi::CheckRX() +{ + if (!Mailbox[8]->CanFit(2048)) + return; + + int rxlen = Platform::LAN_RecvPacket(LANBuffer); + if (rxlen > 0) + { + //printf("WMI packet recv %04X %04X %04X\n", *(u16*)&LANBuffer[0], *(u16*)&LANBuffer[2], *(u16*)&LANBuffer[4]); + // check destination MAC + if (*(u32*)&LANBuffer[0] != 0xFFFFFFFF || *(u16*)&LANBuffer[4] != 0xFFFF) + { + if (memcmp(&LANBuffer[0], &EEPROM[0x00A], 6)) + return; + } + + // check source MAC, in case we get a packet we just sent out + if (!memcmp(&LANBuffer[6], &EEPROM[0x00A], 6)) + return; + + // packet is good + + printf("WMI: receive packet %04X, len=%d\n", *(u16*)&LANBuffer[12], rxlen); + + /*for (int i = 0; i < rxlen; i++) + { + printf("%02X ", LANBuffer[i]); + if ((i&0xF)==0xF) printf("\n"); + } + printf("\n");*/ + + int datalen = rxlen - 14; // length of packet body + + u16 hdr = 0x0000; + //if (*(u16*)&LANBuffer[12] == 0x0608) // HAX!!! + // hdr = 0x1C00; + hdr = 0x80; + + // TODO: not hardcode the endpoint ID!! + u8 ep = 2; + + Mailbox[8]->Write(ep); + Mailbox[8]->Write(0x00); + MB_Write16(8, 16 + 8 + datalen); + Mailbox[8]->Write(0); + Mailbox[8]->Write(0); + + MB_Write16(8, hdr); + MB_Write32(8, *(u32*)&LANBuffer[0]); + MB_Write16(8, *(u16*)&LANBuffer[4]); + MB_Write32(8, *(u32*)&LANBuffer[6]); + MB_Write16(8, *(u16*)&LANBuffer[10]); + u16 plen = datalen + 8; + plen = ((plen & 0xFF00) >> 8) | ((plen & 0x00FF) << 8); + MB_Write16(8, plen); + + MB_Write16(8, 0xAAAA); + MB_Write16(8, 0x0003); + MB_Write16(8, 0x0000); + MB_Write16(8, *(u16*)&LANBuffer[12]); + + for (int i = 0; i < datalen; i++) + Mailbox[8]->Write(LANBuffer[14+i]); + + DrainRXBuffer(); + } } @@ -849,7 +1447,7 @@ u32 DSi_NWifi::WindowRead(u32 addr) // base address of EEPROM data // TODO find what the actual address is! return 0x1FFC00; - case 0x58: return EEPROMReady; // hax + case 0x58: return EEPROMReady; } return 0; @@ -879,3 +1477,69 @@ void DSi_NWifi::WindowWrite(u32 addr, u32 val) { printf("NWifi: window write %08X %08X\n", addr, val); } + + +void DSi_NWifi::_MSTimer() +{ + BeaconTimer++; + + if (ScanTimer > 0) + { + ScanTimer--; + + // send a beacon + if (!(BeaconTimer & 0x7F)) + { + u8 beacon[] = + { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // timestamp + 0x80, 0x00, // beacon interval + 0x21, 0x00, // capability, + 0x01, 0x08, 0x82, 0x84, 0x8B, 0x96, 0x0C, 0x12, 0x18, 0x24, // rates + 0x03, 0x01, 0x06, // channel + 0x05, 0x04, 0x00, 0x00, 0x00, 0x00, // TIM + 0x00, 0x07, 'm', 'e', 'l', 'o', 'n', 'A', 'P', // SSID + }; + + SendWMIBSSInfo(0x01, beacon, sizeof(beacon)); + printf("send beacon\n"); + } + + if (ScanTimer == 0) + { + u32 status = 0; + SendWMIEvent(1, 0x100A, (u8*)&status, 4); + } + } + + if (ConnectionStatus == 1) + { + //if (Mailbox[4]->IsEmpty()) + CheckRX(); + } +} + +void DSi_NWifi::DrainRXBuffer() +{ + while (Mailbox[8]->Level() >= 6) + { + u16 len = Mailbox[8]->Peek(2) | (Mailbox[8]->Peek(3) << 8); + u32 totallen = len + 6; + u32 required = (totallen + 0x7F) & ~0x7F; + + if (!Mailbox[4]->CanFit(required)) + break; + + u32 i = 0; + for (; i < totallen; i++) Mailbox[4]->Write(Mailbox[8]->Read()); + for (; i < required; i++) Mailbox[4]->Write(0); + } + + UpdateIRQ_F1(); +} + +void DSi_NWifi::MSTimer(u32 param) +{ + Ctx->_MSTimer(); + NDS::ScheduleEvent(NDS::Event_DSi_NWifi, true, 33611, MSTimer, 0); +} diff --git a/src/DSi_NWifi.h b/src/DSi_NWifi.h index e5fe637..a72d54d 100644 --- a/src/DSi_NWifi.h +++ b/src/DSi_NWifi.h @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. @@ -28,6 +28,8 @@ public: DSi_NWifi(DSi_SDHost* host); ~DSi_NWifi(); + void Reset(); + void SendCMD(u8 cmd, u32 param); void SendACMD(u8 cmd, u32 param); @@ -35,6 +37,10 @@ public: void SetIRQ_F1_Counter(u32 n); + void _MSTimer(); + + static void MSTimer(u32 param); + private: u32 TransferCmd; u32 TransferAddr; @@ -60,9 +66,18 @@ private: void HandleCommand(); void BMI_Command(); + void HTC_Command(); void WMI_Command(); - void SendWMIFrame(u8* data, u32 len, u8 ep, u8 flags, u16 ctrl); + void WMI_ConnectToNetwork(); + void WMI_SendPacket(u16 len); + + void SendWMIEvent(u8 ep, u16 id, u8* data, u32 len); + void SendWMIAck(u8 ep); + void SendWMIBSSInfo(u8 type, u8* data, u32 len); + + void CheckRX(); + void DrainRXBuffer(); u32 WindowRead(u32 addr); void WindowWrite(u32 addr, u32 val); @@ -102,7 +117,7 @@ private: while (!Mailbox[n]->IsEmpty()) Mailbox[n]->Read(); } - FIFO<u8>* Mailbox[8]; + FIFO<u8>* Mailbox[9]; u8 F0_IRQEnable; u8 F0_IRQStatus; @@ -116,6 +131,14 @@ private: u32 EEPROMReady; u32 BootPhase; + + u32 ErrorMask; + u32 ScanTimer; + + u64 BeaconTimer; + u32 ConnectionStatus; + + u8 LANBuffer[2048]; }; #endif // DSI_NWIFI_H diff --git a/src/DSi_SD.cpp b/src/DSi_SD.cpp index 7fdba0f..def7a33 100644 --- a/src/DSi_SD.cpp +++ b/src/DSi_SD.cpp @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. @@ -112,8 +112,8 @@ void DSi_SDHost::Reset() if (Ports[0]) delete Ports[0]; if (Ports[1]) delete Ports[1]; - Ports[0] = NULL; - Ports[1] = NULL; + Ports[0] = nullptr; + Ports[1] = nullptr; if (Num == 0) { @@ -135,6 +135,9 @@ void DSi_SDHost::Reset() Ports[0] = nwifi; } + + if (Ports[0]) Ports[0]->Reset(); + if (Ports[1]) Ports[1]->Reset(); } void DSi_SDHost::DoSavestate(Savestate* file) @@ -205,6 +208,18 @@ void DSi_SDHost::SetCardIRQ() } } +void DSi_SDHost::UpdateCardIRQ(u16 oldmask) +{ + u16 oldflags = CardIRQStatus & ~oldmask; + u16 newflags = CardIRQStatus & ~CardIRQMask; + + if ((oldflags == 0) && (newflags != 0)) // checkme + { + NDS::SetIRQ2(Num ? NDS::IRQ2_DSi_SDIO : NDS::IRQ2_DSi_SDMMC); + NDS::SetIRQ2(Num ? NDS::IRQ2_DSi_SDIO_Data1 : NDS::IRQ2_DSi_SD_Data1); + } +} + void DSi_SDHost::SendResponse(u32 val, bool last) { *(u32*)&ResponseBuffer[6] = *(u32*)&ResponseBuffer[4]; @@ -448,6 +463,7 @@ u16 DSi_SDHost::Read(u32 addr) case 0x0F6: return 0; // MMC write protect (always 0) case 0x100: return Data32IRQ; + case 0x102: return 0; case 0x104: return BlockLen32; case 0x108: return BlockCount32; } @@ -549,8 +565,8 @@ void DSi_SDHost::Write(u32 addr, u16 val) u32 oldmask = IRQMask; IRQMask = (IRQMask & 0x0000031D) | ((val & 0x8B7F) << 16); UpdateIRQ(oldmask); - if (!DataFIFO[CurFIFO]->IsEmpty()) SetIRQ(24); // checkme - if (DataFIFO[CurFIFO]->IsEmpty()) SetIRQ(25); // checkme + //if (!DataFIFO[CurFIFO]->IsEmpty()) SetIRQ(24); // checkme + //if (DataFIFO[CurFIFO]->IsEmpty()) SetIRQ(25); // checkme } return; @@ -571,8 +587,13 @@ void DSi_SDHost::Write(u32 addr, u16 val) CardIRQStatus &= val; return; case 0x038: - CardIRQMask = val & 0xC007; - SetCardIRQ(); + { + u16 oldmask = CardIRQMask; + CardIRQMask = val & 0xC007; + UpdateCardIRQ(oldmask); + } + //CardIRQMask = val & 0xC007; + //SetCardIRQ(); return; case 0x0D8: @@ -592,6 +613,9 @@ void DSi_SDHost::Write(u32 addr, u16 val) SDOption = 0x40EE; // TODO: CARD_IRQ_STAT // TODO: FIFO16 shit + + if (Ports[0]) Ports[0]->Reset(); + if (Ports[1]) Ports[1]->Reset(); } SoftReset = 0x0006 | (val & 0x0001); return; @@ -601,6 +625,7 @@ void DSi_SDHost::Write(u32 addr, u16 val) if (val & (1<<10)) DataFIFO32->Clear(); DataMode = ((DataCtl >> 1) & 0x1) & ((Data32IRQ >> 1) & 0x1); return; + case 0x102: return; case 0x104: BlockLen32 = val & 0x03FF; return; case 0x108: BlockCount32 = val; return; } @@ -701,6 +726,16 @@ DSi_MMCStorage::DSi_MMCStorage(DSi_SDHost* host, bool internal, const char* path File = Platform::OpenLocalFile(path, "w+b"); } } +} + +DSi_MMCStorage::~DSi_MMCStorage() +{ + if (File) fclose(File); +} + +void DSi_MMCStorage::Reset() +{ + // TODO: reset file access???? CSR = 0x00000100; // checkme @@ -723,11 +758,6 @@ DSi_MMCStorage::DSi_MMCStorage(DSi_SDHost* host, bool internal, const char* path RWCommand = 0; } -DSi_MMCStorage::~DSi_MMCStorage() -{ - if (File) fclose(File); -} - void DSi_MMCStorage::SendCMD(u8 cmd, u32 param) { if (CSR & (1<<5)) diff --git a/src/DSi_SD.h b/src/DSi_SD.h index 2862173..30da6c7 100644 --- a/src/DSi_SD.h +++ b/src/DSi_SD.h @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. @@ -95,6 +95,7 @@ private: void ClearIRQ(u32 irq); void SetIRQ(u32 irq); void UpdateIRQ(u32 oldmask); + void UpdateCardIRQ(u16 oldmask); }; @@ -104,6 +105,8 @@ public: DSi_SDDevice(DSi_SDHost* host) { Host = host; IRQ = false; } ~DSi_SDDevice() {} + virtual void Reset() = 0; + virtual void SendCMD(u8 cmd, u32 param) = 0; virtual void ContinueTransfer() = 0; @@ -120,6 +123,8 @@ public: DSi_MMCStorage(DSi_SDHost* host, bool internal, const char* path); ~DSi_MMCStorage(); + void Reset(); + void SetCID(u8* cid) { memcpy(CID, cid, 16); } void SendCMD(u8 cmd, u32 param); diff --git a/src/DSi_SPI_TSC.cpp b/src/DSi_SPI_TSC.cpp index 507005b..adff4a3 100644 --- a/src/DSi_SPI_TSC.cpp +++ b/src/DSi_SPI_TSC.cpp @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. diff --git a/src/DSi_SPI_TSC.h b/src/DSi_SPI_TSC.h index f3ffc32..c8e1164 100644 --- a/src/DSi_SPI_TSC.h +++ b/src/DSi_SPI_TSC.h @@ -1,5 +1,5 @@ /* - Copyright 2016-2019 Arisotura + Copyright 2016-2020 Arisotura This file is part of melonDS. @@ -102,6 +102,8 @@ public: bool IsEmpty() { return NumOccupied == 0; } bool IsFull() { return NumOccupied >= NumEntries; } + bool CanFit(u32 num) { return ((NumOccupied + num) <= NumEntries); } + private: u32 NumEntries; T* Entries; diff --git a/src/NDS.cpp b/src/NDS.cpp index 8cd7f93..823d39a 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -493,7 +493,7 @@ void Reset() printf("ARM7 BIOS loaded\n"); fclose(f); } - + #ifdef JIT_ENABLED ARMJIT::Reset(); #endif @@ -1137,7 +1137,9 @@ void MapSharedWRAM(u8 val) if (val == WRAMCnt) return; +#ifdef JIT_ENABLED ARMJIT_Memory::RemapSWRAM(); +#endif WRAMCnt = val; @@ -1268,6 +1270,9 @@ bool HaltInterrupted(u32 cpu) if (IF[cpu] & IE[cpu]) return true; + if ((ConsoleType == 1) && cpu && (IF2 & IE2)) + return true; + return false; } @@ -1813,14 +1818,14 @@ void debug(u32 param) } fclose(shit);*/ FILE* - /*shit = fopen("debug/dump9.bin", "wb"); + shit = fopen("debug/dump9.bin", "wb"); for (u32 i = 0x02000000; i < 0x04000000; i+=4) { u32 val = DSi::ARM9Read32(i); fwrite(&val, 4, 1, shit); } - fclose(shit);*/ - shit = fopen("debug/dump7_2.bin", "wb"); + fclose(shit); + shit = fopen("debug/dump7.bin", "wb"); for (u32 i = 0x02000000; i < 0x04000000; i+=4) { u32 val = DSi::ARM7Read32(i); diff --git a/src/RTC.cpp b/src/RTC.cpp index 0d80b2c..ba51dff 100644 --- a/src/RTC.cpp +++ b/src/RTC.cpp @@ -106,7 +106,6 @@ u8 BCD(u8 val) void ByteIn(u8 val) { - //printf("RTC IN: %02X\n", val); if (InputPos == 0) { if ((val & 0xF0) == 0x60) @@ -175,7 +174,6 @@ void ByteIn(u8 val) case 0x70: Output[0] = FreeReg; break; } } - InputPos++; return; } @@ -221,8 +219,6 @@ void ByteIn(u8 val) if (InputPos == 1) FreeReg = val; break; } - - InputPos++; } diff --git a/src/SPI.cpp b/src/SPI.cpp index ac40707..eff0a05 100644 --- a/src/SPI.cpp +++ b/src/SPI.cpp @@ -162,22 +162,26 @@ void Reset() UserSettings = userdata; - // fix touchscreen coords - #if 0 - *(u16*)&Firmware[userdata+0x58] = 0; - *(u16*)&Firmware[userdata+0x5A] = 0; - Firmware[userdata+0x5C] = 0; - Firmware[userdata+0x5D] = 0; - *(u16*)&Firmware[userdata+0x5E] = 255<<4; - *(u16*)&Firmware[userdata+0x60] = 191<<4; - Firmware[userdata+0x62] = 255; - Firmware[userdata+0x63] = 191; - - // disable autoboot - //Firmware[userdata+0x64] &= 0xBF; - - *(u16*)&Firmware[userdata+0x72] = CRC16(&Firmware[userdata], 0x70, 0xFFFF); + // TODO evetually: do this in DSi mode + if (NDS::ConsoleType == 0) + { + // fix touchscreen coords + *(u16*)&Firmware[userdata+0x58] = 0; + *(u16*)&Firmware[userdata+0x5A] = 0; + Firmware[userdata+0x5C] = 0; + Firmware[userdata+0x5D] = 0; + *(u16*)&Firmware[userdata+0x5E] = 255<<4; + *(u16*)&Firmware[userdata+0x60] = 191<<4; + Firmware[userdata+0x62] = 255; + Firmware[userdata+0x63] = 191; + + // disable autoboot + //Firmware[userdata+0x64] &= 0xBF; + + *(u16*)&Firmware[userdata+0x72] = CRC16(&Firmware[userdata], 0x70, 0xFFFF); + } +#if 0 // replace MAC address with random address // TODO: make optional? Firmware[0x36] = 0x00; diff --git a/src/SPU.cpp b/src/SPU.cpp index 9f6b107..cd5c5b8 100644 --- a/src/SPU.cpp +++ b/src/SPU.cpp @@ -160,6 +160,8 @@ void Channel::Reset() else BusRead32 = NDS::ARM7Read32; + KeyOn = false; + SetCnt(0); SrcAddr = 0; TimerReload = 0; @@ -187,6 +189,7 @@ void Channel::DoSavestate(Savestate* file) file->Var8(&VolumeShift); file->Var8(&Pan); + file->Var8((u8*)&KeyOn); file->Var32(&Timer); file->Var32((u32*)&Pos); file->Var16((u16*)&CurSample); @@ -417,6 +420,12 @@ void Channel::Run(s32* buf, u32 samples) { if (!(Cnt & (1<<31))) return; + if (KeyOn) + { + Start(); + KeyOn = false; + } + for (u32 s = 0; s < samples; s++) { Timer += 512; // 1 sample = 512 cycles at 16MHz @@ -69,6 +69,7 @@ public: u8 VolumeShift; u8 Pan; + bool KeyOn; u32 Timer; s32 Pos; s16 CurSample; @@ -105,7 +106,7 @@ public: if ((val & (1<<31)) && !(oldcnt & (1<<31))) { - Start(); + KeyOn = true; } } diff --git a/src/Wifi.cpp b/src/Wifi.cpp index ab82724..8188151 100644 --- a/src/Wifi.cpp +++ b/src/Wifi.cpp @@ -188,6 +188,8 @@ void Reset() IOPORT(0x000) = 0x1440; else if (console == 0x20) IOPORT(0x000) = 0xC340; + else if (NDS::ConsoleType == 1 && console == 0x57) + IOPORT(0x000) = 0xC340; // DSi has the modern DS-wifi variant else { printf("wifi: unknown console type %02X\n", console); diff --git a/src/WifiAP.cpp b/src/WifiAP.cpp index 3a5de37..ae9664c 100644 --- a/src/WifiAP.cpp +++ b/src/WifiAP.cpp @@ -30,9 +30,6 @@ namespace WifiAP { -#define AP_MAC 0x00, 0xF0, 0x77, 0x77, 0x77, 0x77 -#define AP_NAME "melonAP" - const u8 APMac[6] = {AP_MAC}; #define PWRITE_8(p, v) *p++ = v; @@ -130,6 +127,18 @@ void USTimer() } } +void MSTimer() +{ + USCounter += 0x400; + + u32 chk = (u32)USCounter; + if (!(chk & 0x1FC00)) + { + // send beacon every 128ms + BeaconDue = true; + } +} + int HandleManagementFrame(u8* data, int len) { diff --git a/src/WifiAP.h b/src/WifiAP.h index 97946a5..130383e 100644 --- a/src/WifiAP.h +++ b/src/WifiAP.h @@ -22,11 +22,17 @@ namespace WifiAP { +#define AP_MAC 0x00, 0xF0, 0x77, 0x77, 0x77, 0x77 +#define AP_NAME "melonAP" + +extern const u8 APMac[6]; + bool Init(); void DeInit(); void Reset(); void USTimer(); +void MSTimer(); // packet format: 12-byte TX header + original 802.11 frame int SendPacket(u8* data, int len); diff --git a/src/frontend/qt_sdl/CMakeLists.txt b/src/frontend/qt_sdl/CMakeLists.txt index 5617f25..20cde39 100644 --- a/src/frontend/qt_sdl/CMakeLists.txt +++ b/src/frontend/qt_sdl/CMakeLists.txt @@ -46,7 +46,9 @@ set(CMAKE_AUTORCC ON) find_package(Threads REQUIRED) find_package(PkgConfig REQUIRED) +find_package(Iconv REQUIRED) pkg_check_modules(SDL2 REQUIRED sdl2) +pkg_check_modules(SLIRP REQUIRED slirp) pkg_check_modules(LIBZIP REQUIRED libzip) if (WIN32 AND (CMAKE_BUILD_TYPE STREQUAL Release)) @@ -58,6 +60,7 @@ endif() target_link_libraries(melonDS ${CMAKE_THREAD_LIBS_INIT}) target_include_directories(melonDS PRIVATE ${SDL2_INCLUDE_DIRS}) +target_include_directories(melonDS PRIVATE ${SLIRP_INCLUDE_DIRS}) target_include_directories(melonDS PRIVATE ${LIBZIP_INCLUDE_DIRS}) target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") @@ -65,9 +68,13 @@ target_include_directories(melonDS PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../..") target_link_libraries(melonDS core) if (BUILD_STATIC) - target_link_libraries(melonDS -static ${SDL2_LIBRARIES} ${LIBZIP_STATIC_LIBRARIES}) + target_link_libraries(melonDS -static ${SDL2_STATIC_LIBRARIES} ${SLIRP_STATIC_LIBRARIES} ${LIBZIP_STATIC_LIBRARIES}) else() - target_link_libraries(melonDS ${SDL2_LIBRARIES} ${LIBZIP_LIBRARIES}) + target_link_libraries(melonDS ${SDL2_LIBRARIES} ${SLIRP_LIBRARIES} ${LIBZIP_LIBRARIES}) +endif() + +if (NOT Iconv_IS_BUILT_IN) + target_link_libraries(melonDS iconv) endif() if (UNIX) diff --git a/src/frontend/qt_sdl/EmuSettingsDialog.cpp b/src/frontend/qt_sdl/EmuSettingsDialog.cpp index 77301b7..dc7eaf5 100644 --- a/src/frontend/qt_sdl/EmuSettingsDialog.cpp +++ b/src/frontend/qt_sdl/EmuSettingsDialog.cpp @@ -98,6 +98,7 @@ void EmuSettingsDialog::verifyFirmware() char filename[1024]; strncpy(filename, ui->txtFirmwarePath->text().toStdString().c_str(), 1023); filename[1023] = '\0'; FILE* f = Platform::OpenLocalFile(filename, "rb"); + if (!f) return; u8 chk1[0x180], chk2[0x180]; fseek(f, 0, SEEK_SET); @@ -163,9 +164,9 @@ void EmuSettingsDialog::done(int r) || strcmp(Config::DSiNANDPath, dsiNANDPath.c_str()) != 0) { if (RunningSomething - && QMessageBox::warning(this, "Reset necessary to apply changes", - "The emulation will be reset for the changes to take place", - QMessageBox::Yes, QMessageBox::Cancel) != QMessageBox::Yes) + && QMessageBox::warning(this, "Reset necessary to apply changes", + "The emulation will be reset for the changes to take place.", + QMessageBox::Ok, QMessageBox::Cancel) != QMessageBox::Ok) return; strncpy(Config::BIOS9Path, bios9Path.c_str(), 1023); Config::BIOS9Path[1023] = '\0'; @@ -290,4 +291,4 @@ void EmuSettingsDialog::on_chkEnableJIT_toggled() ui->chkJITLiteralOptimisations->setDisabled(disabled); ui->chkJITFastMemory->setDisabled(disabled); ui->spnJITMaximumBlockSize->setDisabled(disabled); -}
\ No newline at end of file +} diff --git a/src/frontend/qt_sdl/LAN_Socket.cpp b/src/frontend/qt_sdl/LAN_Socket.cpp index c6fbd4b..458c931 100644 --- a/src/frontend/qt_sdl/LAN_Socket.cpp +++ b/src/frontend/qt_sdl/LAN_Socket.cpp @@ -21,30 +21,20 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include "../Wifi.h" +#include "Wifi.h" #include "LAN_Socket.h" -#include "../Config.h" +#include "Config.h" +#include "FIFO.h" + +#include <slirp/libslirp.h> #ifdef __WIN32__ - #include <winsock2.h> #include <ws2tcpip.h> - #define socket_t SOCKET - #define sockaddr_t SOCKADDR #else - #include <unistd.h> - #include <arpa/inet.h> - #include <netinet/in.h> - #include <sys/types.h> - #include <sys/select.h> #include <sys/socket.h> #include <netdb.h> - #define socket_t int - #define sockaddr_t struct sockaddr - #define closesocket close -#endif - -#ifndef INVALID_SOCKET -#define INVALID_SOCKET (socket_t)-1 + #include <poll.h> + #include <time.h> #endif @@ -57,85 +47,192 @@ const u32 kDNSIP = kSubnet | 0x02; const u32 kClientIP = kSubnet | 0x10; const u8 kServerMAC[6] = {0x00, 0xAB, 0x33, 0x28, 0x99, 0x44}; -const u8 kDNSMAC[6] = {0x00, 0xAB, 0x33, 0x28, 0x99, 0x55}; -u8 PacketBuffer[2048]; -int PacketLen; -volatile int RXNum; +FIFO<u32>* RXBuffer = nullptr; + +u32 IPv4ID; + +Slirp* Ctx = nullptr; + +/*const int FDListMax = 64; +struct pollfd FDList[FDListMax]; +int FDListSize;*/ + + +#ifdef __WIN32__ + +#define poll WSAPoll + +// https://stackoverflow.com/questions/5404277/porting-clock-gettime-to-windows + +struct timespec { long tv_sec; long tv_nsec; }; +#define CLOCK_MONOTONIC 1312 + +int clock_gettime(int, struct timespec *spec) +{ + __int64 wintime; + GetSystemTimeAsFileTime((FILETIME*)&wintime); + wintime -=116444736000000000LL; //1jan1601 to 1jan1970 + spec->tv_sec = wintime / 10000000LL; //seconds + spec->tv_nsec = wintime % 10000000LL * 100; //nano-seconds + return 0; +} + +#endif // __WIN32__ + + +void RXEnqueue(const void* buf, int len) +{ + int alignedlen = (len + 3) & ~3; + int totallen = alignedlen + 4; + + if (!RXBuffer->CanFit(totallen >> 2)) + { + printf("slirp: !! NOT ENOUGH SPACE IN RX BUFFER\n"); + return; + } + + u32 header = (alignedlen & 0xFFFF) | (len << 16); + RXBuffer->Write(header); + for (int i = 0; i < alignedlen; i += 4) + RXBuffer->Write(((u32*)buf)[i>>2]); +} -u16 IPv4ID; +ssize_t SlirpCbSendPacket(const void* buf, size_t len, void* opaque) +{ + if (len > 2048) + { + printf("slirp: packet too big (%d)\n", len); + return 0; + } + printf("slirp: response packet of %d bytes, type %04X\n", len, ntohs(((u16*)buf)[6])); -// TODO: UDP sockets -// * use FIFO list -// * assign new socket when seeing new IP/port + RXEnqueue(buf, len); + return len; +} -typedef struct +void SlirpCbGuestError(const char* msg, void* opaque) { - u8 DestIP[4]; - u16 SourcePort; - u16 DestPort; + printf("SLIRP: error: %s\n", msg); +} - u32 SeqNum; // sequence number for incoming frames - u32 AckNum; +int64_t SlirpCbClockGetNS(void* opaque) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000LL + ts.tv_nsec; +} - // 0: unused - // 1: connected - u8 Status; +void* SlirpCbTimerNew(SlirpTimerCb cb, void* cb_opaque, void* opaque) +{ + return nullptr; +} - socket_t Backend; +void SlirpCbTimerFree(void* timer, void* opaque) +{ +} -} TCPSocket; +void SlirpCbTimerMod(void* timer, int64_t expire_time, void* opaque) +{ +} -typedef struct +void SlirpCbRegisterPollFD(int fd, void* opaque) { - u8 DestIP[4]; - u16 SourcePort; - u16 DestPort; + printf("Slirp: register poll FD %d\n", fd); + + /*if (FDListSize >= FDListMax) + { + printf("!! SLIRP FD LIST FULL\n"); + return; + } + + for (int i = 0; i < FDListSize; i++) + { + if (FDList[i].fd == fd) return; + } - socket_t Backend; - struct sockaddr_in BackendAddr; + FDList[FDListSize].fd = fd; + FDListSize++;*/ +} -} UDPSocket; +void SlirpCbUnregisterPollFD(int fd, void* opaque) +{ + printf("Slirp: unregister poll FD %d\n", fd); -TCPSocket TCPSocketList[16]; -UDPSocket UDPSocketList[4]; + /*if (FDListSize < 1) + { + printf("!! SLIRP FD LIST EMPTY\n"); + return; + } -int UDPSocketID = 0; + for (int i = 0; i < FDListSize; i++) + { + if (FDList[i].fd == fd) + { + FDListSize--; + FDList[i] = FDList[FDListSize]; + } + }*/ +} +void SlirpCbNotify(void* opaque) +{ + printf("Slirp: notify???\n"); +} + +SlirpCb cb = +{ + .send_packet = SlirpCbSendPacket, + .guest_error = SlirpCbGuestError, + .clock_get_ns = SlirpCbClockGetNS, + .timer_new = SlirpCbTimerNew, + .timer_free = SlirpCbTimerFree, + .timer_mod = SlirpCbTimerMod, + .register_poll_fd = SlirpCbRegisterPollFD, + .unregister_poll_fd = SlirpCbUnregisterPollFD, + .notify = SlirpCbNotify +}; bool Init() { - // TODO: how to deal with cases where an adapter is unplugged or changes config?? - //if (PCapLib) return true; + IPv4ID = 0; + + //FDListSize = 0; + //memset(FDList, 0, sizeof(FDList)); - //Lib = NULL; - PacketLen = 0; - RXNum = 0; + RXBuffer = new FIFO<u32>(0x8000 >> 2); - IPv4ID = 1; + SlirpConfig cfg; + memset(&cfg, 0, sizeof(cfg)); + cfg.version = 1; - memset(TCPSocketList, 0, sizeof(TCPSocketList)); - memset(UDPSocketList, 0, sizeof(UDPSocketList)); + cfg.in_enabled = true; + *(u32*)&cfg.vnetwork = htonl(kSubnet); + *(u32*)&cfg.vnetmask = htonl(0xFFFFFF00); + *(u32*)&cfg.vhost = htonl(kServerIP); + cfg.vhostname = "melonServer"; + *(u32*)&cfg.vdhcp_start = htonl(kClientIP); + *(u32*)&cfg.vnameserver = htonl(kDNSIP); - UDPSocketID = 0; + Ctx = slirp_new(&cfg, &cb, nullptr); return true; } void DeInit() { - for (int i = 0; i < (sizeof(TCPSocketList)/sizeof(TCPSocket)); i++) + if (Ctx) { - TCPSocket* sock = &TCPSocketList[i]; - if (sock->Backend) closesocket(sock->Backend); + slirp_cleanup(Ctx); + Ctx = nullptr; } - for (int i = 0; i < (sizeof(UDPSocketList)/sizeof(UDPSocket)); i++) + if (RXBuffer) { - UDPSocket* sock = &UDPSocketList[i]; - if (sock->Backend) closesocket(sock->Backend); + delete RXBuffer; + RXBuffer = nullptr; } } @@ -179,165 +276,6 @@ void FinishUDPFrame(u8* data, int len) *(u16*)&udpheader[6] = htons(tmp); } -void FinishTCPFrame(u8* data, int len) -{ - u8* ipheader = &data[0xE]; - u8* tcpheader = &data[0x22]; - - // lengths - *(u16*)&ipheader[2] = htons(len - 0xE); - - // IP checksum - u32 tmp = 0; - - for (int i = 0; i < 20; i += 2) - tmp += ntohs(*(u16*)&ipheader[i]); - while (tmp >> 16) - tmp = (tmp & 0xFFFF) + (tmp >> 16); - tmp ^= 0xFFFF; - *(u16*)&ipheader[10] = htons(tmp); - - u32 tcplen = ntohs(*(u16*)&ipheader[2]) - 0x14; - - // TCP checksum - tmp = 0; - tmp += ntohs(*(u16*)&ipheader[12]); - tmp += ntohs(*(u16*)&ipheader[14]); - tmp += ntohs(*(u16*)&ipheader[16]); - tmp += ntohs(*(u16*)&ipheader[18]); - tmp += ntohs(0x0600); - tmp += tcplen; - for (u8* i = tcpheader; i < &tcpheader[tcplen-1]; i += 2) - tmp += ntohs(*(u16*)i); - if (tcplen & 1) - tmp += ntohs((u_short)tcpheader[tcplen-1]); - while (tmp >> 16) - tmp = (tmp & 0xFFFF) + (tmp >> 16); - tmp ^= 0xFFFF; - *(u16*)&tcpheader[16] = htons(tmp); -} - - -void HandleDHCPFrame(u8* data, int len) -{ - u8 type = 0xFF; - - u32 transid = *(u32*)&data[0x2E]; - - u8* options = &data[0x11A]; - for (;;) - { - if (options >= &data[len]) break; - u8 opt = *options++; - if (opt == 255) break; - - u8 len = *options++; - switch (opt) - { - case 53: // frame type - type = options[0]; - break; - } - - options += len; - } - - if (type == 0xFF) - { - printf("DHCP: bad frame\n"); - return; - } - - printf("DHCP: frame type %d, transid %08X\n", type, transid); - - if (type == 1 || // discover - type == 3) // request - { - u8 resp[512]; - u8* out = &resp[0]; - - // ethernet - memcpy(out, &data[6], 6); out += 6; - memcpy(out, kServerMAC, 6); out += 6; - *(u16*)out = htons(0x0800); out += 2; - - // IP - u8* ipheader = out; - *out++ = 0x45; - *out++ = 0x00; - *(u16*)out = 0; out += 2; // total length - *(u16*)out = htons(IPv4ID); out += 2; IPv4ID++; - *out++ = 0x00; - *out++ = 0x00; - *out++ = 0x80; // TTL - *out++ = 0x11; // protocol (UDP) - *(u16*)out = 0; out += 2; // checksum - *(u32*)out = htonl(kServerIP); out += 4; // source IP - if (type == 1) - { - *(u32*)out = htonl(0xFFFFFFFF); out += 4; // destination IP - } - else if (type == 3) - { - *(u32*)out = htonl(kClientIP); out += 4; // destination IP - } - - // UDP - u8* udpheader = out; - *(u16*)out = htons(67); out += 2; // source port - *(u16*)out = htons(68); out += 2; // destination port - *(u16*)out = 0; out += 2; // length - *(u16*)out = 0; out += 2; // checksum - - // DHCP - u8* body = out; - *out++ = 0x02; - *out++ = 0x01; - *out++ = 0x06; - *out++ = 0x00; - *(u32*)out = transid; out += 4; - *(u16*)out = 0; out += 2; // seconds elapsed - *(u16*)out = 0; out += 2; - *(u32*)out = htonl(0x00000000); out += 4; // client IP - *(u32*)out = htonl(kClientIP); out += 4; // your IP - *(u32*)out = htonl(kServerIP); out += 4; // server IP - *(u32*)out = htonl(0x00000000); out += 4; // gateway IP - memcpy(out, &data[6], 6); out += 6; - memset(out, 0, 10); out += 10; - memset(out, 0, 192); out += 192; - *(u32*)out = 0x63538263; out += 4; // DHCP magic - - // DHCP options - *out++ = 53; *out++ = 1; - *out++ = (type==1) ? 2 : 5; // DHCP type: offer/ack - *out++ = 1; *out++ = 4; - *(u32*)out = htonl(0xFFFFFF00); out += 4; // subnet mask - *out++ = 3; *out++ = 4; - *(u32*)out = htonl(kServerIP); out += 4; // router - *out++ = 51; *out++ = 4; - *(u32*)out = htonl(442030); out += 4; // lease time - *out++ = 54; *out++ = 4; - *(u32*)out = htonl(kServerIP); out += 4; // DHCP server - *out++ = 6; *out++ = 4; - *(u32*)out = htonl(kDNSIP); out += 4; // DNS (hax) - - *out++ = 0xFF; - memset(out, 0, 20); out += 20; - - u32 framelen = (u32)(out - &resp[0]); - if (framelen & 1) { *out++ = 0; framelen++; } - FinishUDPFrame(resp, framelen); - - // TODO: if there is already a packet queued, this will overwrite it - // that being said, this will only happen during DHCP setup, so probably - // not a big deal - - PacketLen = framelen; - memcpy(PacketBuffer, resp, PacketLen); - RXNum = 1; - } -} - void HandleDNSFrame(u8* data, int len) { u8* ipheader = &data[0xE]; @@ -458,12 +396,13 @@ void HandleDNSFrame(u8* data, int len) while (p) { struct sockaddr_in* addr = (struct sockaddr_in*)p->ai_addr; - /*printf(" -> %d.%d.%d.%d", - addr->sin_addr.S_un.S_un_b.s_b1, addr->sin_addr.S_un.S_un_b.s_b2, - addr->sin_addr.S_un.S_un_b.s_b3, addr->sin_addr.S_un.S_un_b.s_b4);*/ - - //addr_res = addr->sin_addr.S_un.S_addr; addr_res = *(u32*)&addr->sin_addr; + + printf(" -> %d.%d.%d.%d", + addr_res & 0xFF, (addr_res >> 8) & 0xFF, + (addr_res >> 16) & 0xFF, addr_res >> 24); + + break; p = p->ai_next; } } @@ -490,653 +429,116 @@ void HandleDNSFrame(u8* data, int len) if (framelen & 1) { *out++ = 0; framelen++; } FinishUDPFrame(resp, framelen); - // TODO: if there is already a packet queued, this will overwrite it - // that being said, this will only happen during DHCP setup, so probably - // not a big deal - - PacketLen = framelen; - memcpy(PacketBuffer, resp, PacketLen); - RXNum = 1; + RXEnqueue(resp, framelen); } -void UDP_BuildIncomingFrame(UDPSocket* sock, u8* data, int len) -{ - u8 resp[2048]; - u8* out = &resp[0]; - - if (len > 1536) return; - - // ethernet - memcpy(out, Wifi::GetMAC(), 6); out += 6; // hurf - memcpy(out, kServerMAC, 6); out += 6; - *(u16*)out = htons(0x0800); out += 2; - - // IP - u8* resp_ipheader = out; - *out++ = 0x45; - *out++ = 0x00; - *(u16*)out = 0; out += 2; // total length - *(u16*)out = htons(IPv4ID); out += 2; IPv4ID++; - *out++ = 0x00; - *out++ = 0x00; - *out++ = 0x80; // TTL - *out++ = 0x11; // protocol (UDP) - *(u16*)out = 0; out += 2; // checksum - memcpy(out, sock->DestIP, 4); out += 4; // source IP - *(u32*)out = htonl(kClientIP); out += 4; // destination IP - - // UDP - u8* resp_tcpheader = out; - *(u16*)out = htons(sock->DestPort); out += 2; // source port - *(u16*)out = htons(sock->SourcePort); out += 2; // destination port - *(u16*)out = htons(len+8); out += 2; // length of header+data - *(u16*)out = 0; out += 2; // checksum - - memcpy(out, data, len); out += len; - - u32 framelen = (u32)(out - &resp[0]); - FinishUDPFrame(resp, framelen); - - // TODO: if there is already a packet queued, this will overwrite it - // that being said, this will only happen during DHCP setup, so probably - // not a big deal - - PacketLen = framelen; - memcpy(PacketBuffer, resp, PacketLen); - RXNum = 1; -} - -void HandleUDPFrame(u8* data, int len) +int SendPacket(u8* data, int len) { - u8* ipheader = &data[0xE]; - u8* udpheader = &data[0x22]; - - // debug - /*for (int j = 0; j < len; j += 16) - { - int rem = len - j; - if (rem > 16) rem = 16; - for (int i = 0; i < rem; i++) - { - printf("%02X ", data[i+j]); - } - printf("\n"); - }*/ - - u16 srcport = ntohs(*(u16*)&udpheader[0]); - u16 dstport = ntohs(*(u16*)&udpheader[2]); + if (!Ctx) return 0; - int sockid = -1; - UDPSocket* sock; - for (int i = 0; i < (sizeof(UDPSocketList)/sizeof(UDPSocket)); i++) + if (len > 2048) { - sock = &UDPSocketList[i]; - if (sock->Backend != 0 && !memcmp(&sock->DestIP, &ipheader[16], 4) && - sock->SourcePort == srcport && sock->DestPort == dstport) - { - sockid = i; - break; - } + printf("LAN_SendPacket: error: packet too long (%d)\n", len); + return 0; } - if (sockid == -1) - { - sockid = UDPSocketID; - sock = &UDPSocketList[sockid]; - - UDPSocketID++; - if (UDPSocketID >= (sizeof(UDPSocketList)/sizeof(UDPSocket))) - UDPSocketID = 0; + u16 ethertype = ntohs(*(u16*)&data[0xC]); - if (sock->Backend != 0) + if (ethertype == 0x800) + { + u8 protocol = data[0x17]; + if (protocol == 0x11) // UDP { - printf("LANMAGIC: closing previous UDP socket #%d\n", sockid); - closesocket(sock->Backend); + u16 dstport = ntohs(*(u16*)&data[0x24]); + if (dstport == 53 && htonl(*(u32*)&data[0x1E]) == kDNSIP) // DNS + { + HandleDNSFrame(data, len); + return len; + } } - - sock->Backend = socket(AF_INET, SOCK_DGRAM, 0); - - memcpy(sock->DestIP, &ipheader[16], 4); - sock->SourcePort = srcport; - sock->DestPort = dstport; - - memset(&sock->BackendAddr, 0, sizeof(sock->BackendAddr)); - sock->BackendAddr.sin_family = AF_INET; - sock->BackendAddr.sin_port = htons(dstport); - memcpy(&sock->BackendAddr.sin_addr, &ipheader[16], 4); - /*if (bind(sock->Backend, (struct sockaddr*)&sock->BackendAddr, sizeof(sock->BackendAddr)) == -1) - { - printf("bind() shat itself :(\n"); - }*/ - - printf("LANMAGIC: opening UDP socket #%d to %d.%d.%d.%d:%d, srcport %d\n", - sockid, - ipheader[16], ipheader[17], ipheader[18], ipheader[19], - dstport, srcport); } - u16 udplen = ntohs(*(u16*)&udpheader[4]) - 8; - - printf("UDP: socket %d sending %d bytes\n", sockid, udplen); - sendto(sock->Backend, (char*)&udpheader[8], udplen, 0, - (struct sockaddr*)&sock->BackendAddr, sizeof(sock->BackendAddr)); -} - -void TCP_SYNACK(TCPSocket* sock, u8* data, int len) -{ - u8 resp[128]; - u8* out = &resp[0]; - - u8* ipheader = &data[0xE]; - u8* tcpheader = &data[0x22]; - - u32 seqnum = htonl(*(u32*)&tcpheader[4]); - seqnum++; - sock->AckNum = seqnum; - - //printf("SYNACK SEQ=%08X|%08X\n", sock->SeqNum, sock->AckNum); - - // ethernet - memcpy(out, &data[6], 6); out += 6; - memcpy(out, kServerMAC, 6); out += 6; - *(u16*)out = htons(0x0800); out += 2; - - // IP - u8* resp_ipheader = out; - *out++ = 0x45; - *out++ = 0x00; - *(u16*)out = 0; out += 2; // total length - *(u16*)out = htons(IPv4ID); out += 2; IPv4ID++; - *out++ = 0x00; - *out++ = 0x00; - *out++ = 0x80; // TTL - *out++ = 0x06; // protocol (TCP) - *(u16*)out = 0; out += 2; // checksum - *(u32*)out = *(u32*)&ipheader[16]; out += 4; // source IP - *(u32*)out = *(u32*)&ipheader[12]; out += 4; // destination IP - - // TCP - u8* resp_tcpheader = out; - *(u16*)out = *(u16*)&tcpheader[2]; out += 2; // source port - *(u16*)out = *(u16*)&tcpheader[0]; out += 2; // destination port - *(u32*)out = htonl(sock->SeqNum); out += 4; sock->SeqNum++; // seq number - *(u32*)out = htonl(seqnum); out += 4; // ack seq number - *(u16*)out = htons(0x8012); out += 2; // flags (SYN+ACK) - *(u16*)out = htons(0x7000); out += 2; // window size (uuuh) - *(u16*)out = 0; out += 2; // checksum - *(u16*)out = 0; out += 2; // urgent pointer - - // TCP options - *out++ = 0x02; *out++ = 0x04; // max segment size - *(u16*)out = htons(0x05B4); out += 2; - *out++ = 0x01; - *out++ = 0x01; - *out++ = 0x04; *out++ = 0x02; // SACK permitted - *out++ = 0x01; - *out++ = 0x03; *out++ = 0x03; // window size - *out++ = 0x08; - - u32 framelen = (u32)(out - &resp[0]); - //if (framelen & 1) { *out++ = 0; framelen++; } - FinishTCPFrame(resp, framelen); - - // TODO: if there is already a packet queued, this will overwrite it - // that being said, this will only happen during DHCP setup, so probably - // not a big deal - - PacketLen = framelen; - memcpy(PacketBuffer, resp, PacketLen); - RXNum = 1; -} - -void TCP_ACK(TCPSocket* sock, bool fin) -{ - u8 resp[64]; - u8* out = &resp[0]; - - u16 flags = 0x5010; - if (fin) flags |= 0x0001; - - //printf("%sACK SEQ=%08X|%08X\n", fin?"FIN":" ", sock->SeqNum, sock->AckNum); - - // ethernet - memcpy(out, Wifi::GetMAC(), 6); out += 6; - memcpy(out, kServerMAC, 6); out += 6; - *(u16*)out = htons(0x0800); out += 2; - - // IP - u8* resp_ipheader = out; - *out++ = 0x45; - *out++ = 0x00; - *(u16*)out = 0; out += 2; // total length - *(u16*)out = htons(IPv4ID); out += 2; IPv4ID++; - *out++ = 0x00; - *out++ = 0x00; - *out++ = 0x80; // TTL - *out++ = 0x06; // protocol (TCP) - *(u16*)out = 0; out += 2; // checksum - *(u32*)out = *(u32*)&sock->DestIP; out += 4; // source IP - *(u32*)out = htonl(kClientIP); out += 4; // destination IP - - // TCP - u8* resp_tcpheader = out; - *(u16*)out = htonl(sock->DestPort); out += 2; // source port - *(u16*)out = htonl(sock->SourcePort); out += 2; // destination port - *(u32*)out = htonl(sock->SeqNum); out += 4; // seq number - *(u32*)out = htonl(sock->AckNum); out += 4; // ack seq number - *(u16*)out = htons(flags); out += 2; // flags - *(u16*)out = htons(0x7000); out += 2; // window size (uuuh) - *(u16*)out = 0; out += 2; // checksum - *(u16*)out = 0; out += 2; // urgent pointer - - u32 framelen = (u32)(out - &resp[0]); - //if (framelen & 1) { *out++ = 0; framelen++; } - FinishTCPFrame(resp, framelen); - - // TODO: if there is already a packet queued, this will overwrite it - // that being said, this will only happen during DHCP setup, so probably - // not a big deal - - PacketLen = framelen; - memcpy(PacketBuffer, resp, PacketLen); - RXNum = 1; + slirp_input(Ctx, data, len); + return len; } -void TCP_BuildIncomingFrame(TCPSocket* sock, u8* data, int len) -{ - u8 resp[2048]; - u8* out = &resp[0]; - - if (len > 1536) return; -//printf("INCOMING SEQ=%08X|%08X\n", sock->SeqNum, sock->AckNum); - // ethernet - memcpy(out, Wifi::GetMAC(), 6); out += 6; // hurf - memcpy(out, kServerMAC, 6); out += 6; - *(u16*)out = htons(0x0800); out += 2; - - // IP - u8* resp_ipheader = out; - *out++ = 0x45; - *out++ = 0x00; - *(u16*)out = 0; out += 2; // total length - *(u16*)out = htons(IPv4ID); out += 2; IPv4ID++; - *out++ = 0x00; - *out++ = 0x00; - *out++ = 0x80; // TTL - *out++ = 0x06; // protocol (TCP) - *(u16*)out = 0; out += 2; // checksum - memcpy(out, sock->DestIP, 4); out += 4; // source IP - *(u32*)out = htonl(kClientIP); out += 4; // destination IP - - // TCP - u8* resp_tcpheader = out; - *(u16*)out = htons(sock->DestPort); out += 2; // source port - *(u16*)out = htons(sock->SourcePort); out += 2; // destination port - *(u32*)out = htonl(sock->SeqNum); out += 4; // seq number - *(u32*)out = htonl(sock->AckNum); out += 4; // ack seq number - *(u16*)out = htons(0x5018); out += 2; // flags (ACK, PSH) - *(u16*)out = htons(0x7000); out += 2; // window size (uuuh) - *(u16*)out = 0; out += 2; // checksum - *(u16*)out = 0; out += 2; // urgent pointer - - memcpy(out, data, len); out += len; - - u32 framelen = (u32)(out - &resp[0]); - FinishTCPFrame(resp, framelen); - - // TODO: if there is already a packet queued, this will overwrite it - // that being said, this will only happen during DHCP setup, so probably - // not a big deal - - PacketLen = framelen; - memcpy(PacketBuffer, resp, PacketLen); - RXNum = 1; - - sock->SeqNum += len; -} +const int PollListMax = 64; +struct pollfd PollList[PollListMax]; +int PollListSize; -void HandleTCPFrame(u8* data, int len) +int SlirpCbAddPoll(int fd, int events, void* opaque) { - u8* ipheader = &data[0xE]; - u8* tcpheader = &data[0x22]; - - u16 srcport = ntohs(*(u16*)&tcpheader[0]); - u16 dstport = ntohs(*(u16*)&tcpheader[2]); - u16 flags = ntohs(*(u16*)&tcpheader[12]); - - u32 tcpheaderlen = 4 * (flags >> 12); - u32 tcplen = ntohs(*(u16*)&ipheader[2]) - 0x14; - u32 tcpdatalen = tcplen - tcpheaderlen; - - /*printf("tcpflags=%04X header=%d data=%d seq=%08X|%08X\n", - flags, tcpheaderlen, tcpdatalen, - ntohl(*(u32*)&tcpheader[4]), - ntohl(*(u32*)&tcpheader[8]));*/ - - if (flags & 0x002) // SYN + if (PollListSize >= PollListMax) { - int sockid = -1; - TCPSocket* sock; - for (int i = 0; i < (sizeof(TCPSocketList)/sizeof(TCPSocket)); i++) - { - sock = &TCPSocketList[i]; - if (sock->Status != 0 && !memcmp(&sock->DestIP, &ipheader[16], 4) && - sock->SourcePort == srcport && sock->DestPort == dstport) - { - printf("LANMAGIC: duplicate TCP socket\n"); - sockid = i; - break; - } - } - - if (sockid == -1) - { - for (int i = 0; i < (sizeof(TCPSocketList)/sizeof(TCPSocket)); i++) - { - sock = &TCPSocketList[i]; - if (sock->Status == 0) - { - sockid = i; - break; - } - } - } - - if (sockid == -1) - { - printf("LANMAGIC: !! TCP SOCKET LIST FULL\n"); - return; - } - - printf("LANMAGIC: opening TCP socket #%d to %d.%d.%d.%d:%d, srcport %d\n", - sockid, - ipheader[16], ipheader[17], ipheader[18], ipheader[19], - dstport, srcport); - - // keep track of it - sock->Status = 1; - memcpy(sock->DestIP, &ipheader[16], 4); - sock->DestPort = dstport; - sock->SourcePort = srcport; - sock->SeqNum = 0x13370000; - sock->AckNum = 0; - - // open backend socket - if (!sock->Backend) - { - sock->Backend = socket(AF_INET, SOCK_STREAM, 0); - } - - struct sockaddr_in conn_addr; - memset(&conn_addr, 0, sizeof(conn_addr)); - conn_addr.sin_family = AF_INET; - memcpy(&conn_addr.sin_addr, &ipheader[16], 4); - conn_addr.sin_port = htons(dstport); - if (connect(sock->Backend, (sockaddr*)&conn_addr, sizeof(conn_addr)) == -1) - { - printf("connect() shat itself :(\n"); - } - else - { - // acknowledge it - TCP_SYNACK(sock, data, len); - } + printf("slirp: POLL LIST FULL\n"); + return -1; } - else - { - int sockid = -1; - TCPSocket* sock; - for (int i = 0; i < (sizeof(TCPSocketList)/sizeof(TCPSocket)); i++) - { - sock = &TCPSocketList[i]; - if (sock->Status != 0 && !memcmp(&sock->DestIP, &ipheader[16], 4) && - sock->SourcePort == srcport && sock->DestPort == dstport) - { - sockid = i; - break; - } - } - if (sockid == -1) - { - printf("LANMAGIC: bad TCP packet\n"); - return; - } + int idx = PollListSize++; - // TODO: check those - u32 seqnum = ntohl(*(u32*)&tcpheader[4]); - u32 acknum = ntohl(*(u32*)&tcpheader[8]); - sock->SeqNum = acknum; - sock->AckNum = seqnum + tcpdatalen; + //printf("Slirp: add poll: fd=%d, idx=%d, events=%08X\n", fd, idx, events); - // send data over the socket - if (tcpdatalen > 0) - { - u8* tcpdata = &tcpheader[tcpheaderlen]; + u16 evt = 0; - printf("TCP: socket %d sending %d bytes (flags=%04X)\n", sockid, tcpdatalen, flags); - send(sock->Backend, (char*)tcpdata, tcpdatalen, 0); + if (events & SLIRP_POLL_IN) evt |= POLLIN; + if (events & SLIRP_POLL_OUT) evt |= POLLWRNORM; - // kind of a hack, there - TCP_ACK(sock, false); - } +#ifndef __WIN32__ + // CHECKME + if (events & SLIRP_POLL_PRI) evt |= POLLPRI; + if (events & SLIRP_POLL_ERR) evt |= POLLERR; + if (events & SLIRP_POLL_HUP) evt |= POLLHUP; +#endif // !__WIN32__ - if (flags & 0x001) // FIN - { - // TODO: timeout etc - printf("TCP: socket %d closing\n", sockid); + PollList[idx].fd = fd; + PollList[idx].events = evt; - sock->Status = 0; - closesocket(sock->Backend); - sock->Backend = 0; - } - } + return idx; } -void HandleARPFrame(u8* data, int len) +int SlirpCbGetREvents(int idx, void* opaque) { - u16 protocol = ntohs(*(u16*)&data[0x10]); - if (protocol != 0x0800) return; - - u16 op = ntohs(*(u16*)&data[0x14]); - u32 targetip = ntohl(*(u32*)&data[0x26]); - - // TODO: handle ARP to the client - // this only handles ARP to the DHCP/router - - if (op == 1) - { - // opcode 1=req 2=reply - // sender MAC - // sender IP - // target MAC - // target IP - - const u8* targetmac; - if (targetip == kServerIP) targetmac = kServerMAC; - else if (targetip == kDNSIP) targetmac = kDNSMAC; - else return; - - u8 resp[64]; - u8* out = &resp[0]; - - // ethernet - memcpy(out, &data[6], 6); out += 6; - memcpy(out, kServerMAC, 6); out += 6; - *(u16*)out = htons(0x0806); out += 2; - - // ARP - *(u16*)out = htons(0x0001); out += 2; // hardware type - *(u16*)out = htons(0x0800); out += 2; // protocol - *out++ = 6; // MAC address size - *out++ = 4; // IP address size - *(u16*)out = htons(0x0002); out += 2; // opcode - memcpy(out, targetmac, 6); out += 6; - *(u32*)out = htonl(targetip); out += 4; - memcpy(out, &data[0x16], 6+4); out += 6+4; - - u32 framelen = (u32)(out - &resp[0]); - - // TODO: if there is already a packet queued, this will overwrite it - // that being said, this will only happen during DHCP setup, so probably - // not a big deal - - PacketLen = framelen; - memcpy(PacketBuffer, resp, PacketLen); - RXNum = 1; - } - else - { - printf("wat??\n"); - } -} + if (idx < 0 || idx >= PollListSize) + return 0; -void HandlePacket(u8* data, int len) -{ - u16 ethertype = ntohs(*(u16*)&data[0xC]); + //printf("Slirp: get revents, idx=%d, res=%04X\n", idx, FDList[idx].revents); - if (ethertype == 0x0800) // IPv4 - { - u8 protocol = data[0x17]; - if (protocol == 0x11) // UDP - { - u16 srcport = ntohs(*(u16*)&data[0x22]); - u16 dstport = ntohs(*(u16*)&data[0x24]); - if (srcport == 68 && dstport == 67) // DHCP - { - printf("LANMAGIC: DHCP packet\n"); - return HandleDHCPFrame(data, len); - } - else if (dstport == 53 && htonl(*(u32*)&data[0x1E]) == kDNSIP) // DNS - { - printf("LANMAGIC: DNS packet\n"); - return HandleDNSFrame(data, len); - } + u16 evt = PollList[idx].revents; + int ret = 0; - printf("LANMAGIC: UDP packet %d->%d\n", srcport, dstport); - return HandleUDPFrame(data, len); - } - else if (protocol == 0x06) // TCP - { - printf("LANMAGIC: TCP packet\n"); - return HandleTCPFrame(data, len); - } - else - printf("LANMAGIC: unsupported IP protocol %02X\n", protocol); - } - else if (ethertype == 0x0806) // ARP - { - printf("LANMAGIC: ARP packet\n"); - return HandleARPFrame(data, len); - } - else - printf("LANMAGIC: unsupported ethernet type %04X\n", ethertype); -} + if (evt & POLLIN) ret |= SLIRP_POLL_IN; + if (evt & POLLWRNORM) ret |= SLIRP_POLL_OUT; + if (evt & POLLPRI) ret |= SLIRP_POLL_PRI; + if (evt & POLLERR) ret |= SLIRP_POLL_ERR; + if (evt & POLLHUP) ret |= SLIRP_POLL_HUP; -int SendPacket(u8* data, int len) -{ - if (len > 2048) - { - printf("LAN_SendPacket: error: packet too long (%d)\n", len); - return 0; - } - - HandlePacket(data, len); - return len; + return ret; } int RecvPacket(u8* data) { + if (!Ctx) return 0; + int ret = 0; - if (RXNum > 0) - { - memcpy(data, PacketBuffer, PacketLen); - ret = PacketLen; - RXNum = 0; - } - for (int i = 0; i < (sizeof(TCPSocketList)/sizeof(TCPSocket)); i++) + //if (PollListSize > 0) { - TCPSocket* sock = &TCPSocketList[i]; - if (sock->Status != 1) continue; - - fd_set fd; - struct timeval tv; - - FD_ZERO(&fd); - FD_SET(sock->Backend, &fd); - tv.tv_sec = 0; - tv.tv_usec = 0; - - if (!select(sock->Backend+1, &fd, 0, 0, &tv)) - { - continue; - } - - u8 recvbuf[1024]; - int recvlen = recv(sock->Backend, (char*)recvbuf, 1024, 0); - if (recvlen < 1) - { - if (recvlen == 0) - { - // socket has closed from the other side - printf("TCP: socket %d closed from other side\n", i); - sock->Status = 2; - TCP_ACK(sock, true); - } - continue; - } - - printf("TCP: socket %d receiving %d bytes\n", i, recvlen); - TCP_BuildIncomingFrame(sock, recvbuf, recvlen); - - // debug - /*for (int j = 0; j < recvlen; j += 16) - { - int rem = recvlen - j; - if (rem > 16) rem = 16; - for (int k = 0; k < rem; k++) - { - printf("%02X ", recvbuf[k+j]); - } - printf("\n"); - }*/ - - //recvlen = recv(sock->Backend, (char*)recvbuf, 1024, 0); - //if (recvlen == 0) printf("it closed immediately after\n"); + u32 timeout = 0; + PollListSize = 0; + slirp_pollfds_fill(Ctx, &timeout, SlirpCbAddPoll, nullptr); + int res = poll(PollList, PollListSize, timeout); + slirp_pollfds_poll(Ctx, res<0, SlirpCbGetREvents, nullptr); } - for (int i = 0; i < (sizeof(UDPSocketList)/sizeof(UDPSocket)); i++) + if (!RXBuffer->IsEmpty()) { - UDPSocket* sock = &UDPSocketList[i]; - if (sock->Backend == 0) continue; - - fd_set fd; - struct timeval tv; - - FD_ZERO(&fd); - FD_SET(sock->Backend, &fd); - tv.tv_sec = 0; - tv.tv_usec = 0; - - if (!select(sock->Backend+1, &fd, 0, 0, &tv)) - { - continue; - } - - u8 recvbuf[1024]; - sockaddr_t fromAddr; - socklen_t fromLen = sizeof(sockaddr_t); - int recvlen = recvfrom(sock->Backend, (char*)recvbuf, 1024, 0, &fromAddr, &fromLen); - if (recvlen < 1) continue; + u32 header = RXBuffer->Read(); + u32 len = header & 0xFFFF; - if (fromAddr.sa_family != AF_INET) continue; - struct sockaddr_in* fromAddrIn = (struct sockaddr_in*)&fromAddr; - if (memcmp(&fromAddrIn->sin_addr, sock->DestIP, 4)) continue; - if (ntohs(fromAddrIn->sin_port) != sock->DestPort) continue; + for (int i = 0; i < len; i += 4) + ((u32*)data)[i>>2] = RXBuffer->Read(); - printf("UDP: socket %d receiving %d bytes\n", i, recvlen); - UDP_BuildIncomingFrame(sock, recvbuf, recvlen); + ret = header >> 16; } return ret; diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp index 43f358f..05a0c2d 100644 --- a/src/frontend/qt_sdl/Platform.cpp +++ b/src/frontend/qt_sdl/Platform.cpp @@ -111,6 +111,7 @@ void Init(int argc, char** argv) confdir = config.absolutePath() + "/melonDS/"; EmuDirectory = new char[confdir.length() + 1]; memcpy(EmuDirectory, confdir.toUtf8().data(), confdir.length()); + EmuDirectory[confdir.length()] = '\0'; #endif } diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index a867dc6..c48e506 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -259,6 +259,7 @@ EmuThread::EmuThread(QObject* parent) : QThread(parent) { EmuStatus = 0; EmuRunning = 2; + EmuPause = 0; RunningSomething = false; connect(this, SIGNAL(windowUpdate()), mainWindow->panel, SLOT(update())); @@ -568,6 +569,7 @@ void EmuThread::changeWindowTitle(char* title) void EmuThread::emuRun() { EmuRunning = 1; + EmuPause = 0; RunningSomething = true; // checkme @@ -578,6 +580,9 @@ void EmuThread::emuRun() void EmuThread::emuPause() { + EmuPause++; + if (EmuPause > 1) return; + PrevEmuStatus = EmuRunning; EmuRunning = 2; while (EmuStatus != 2); @@ -588,6 +593,11 @@ void EmuThread::emuPause() void EmuThread::emuUnpause() { + if (EmuPause < 1) return; + + EmuPause--; + if (EmuPause > 0) return; + EmuRunning = PrevEmuStatus; if (audioDevice) SDL_PauseAudioDevice(audioDevice, 0); @@ -597,6 +607,7 @@ void EmuThread::emuUnpause() void EmuThread::emuStop() { EmuRunning = 0; + EmuPause = 0; if (audioDevice) SDL_PauseAudioDevice(audioDevice, 1); if (micDevice) SDL_PauseAudioDevice(micDevice, 1); @@ -1289,6 +1300,8 @@ void MainWindow::keyPressEvent(QKeyEvent* event) { if (event->isAutoRepeat()) return; + if (event->key() == Qt::Key_F11) NDS::debug(0); + Input::KeyPress(event); } @@ -1694,11 +1707,10 @@ void MainWindow::onOpenEmuSettings() void MainWindow::onEmuSettingsDialogFinished(int res) { + emuThread->emuUnpause(); + if (EmuSettingsDialog::needsReset) - { - emuThread->emuUnpause(); onReset(); - } } void MainWindow::onOpenInputConfig() @@ -2128,12 +2140,12 @@ int CALLBACK WinMain(HINSTANCE hinst, HINSTANCE hprev, LPSTR cmdline, int cmdsho if (argv_w) LocalFree(argv_w); - if (AttachConsole(ATTACH_PARENT_PROCESS)) + /*if (AttachConsole(ATTACH_PARENT_PROCESS)) { freopen("CONOUT$", "w", stdout); freopen("CONOUT$", "w", stderr); printf("\n"); - } + }*/ int ret = main(argc, argv); diff --git a/src/frontend/qt_sdl/main.h b/src/frontend/qt_sdl/main.h index eec2a48..7f33973 100644 --- a/src/frontend/qt_sdl/main.h +++ b/src/frontend/qt_sdl/main.h @@ -74,6 +74,7 @@ private: volatile int EmuStatus; int PrevEmuStatus; int EmuRunning; + int EmuPause; QOffscreenSurface* oglSurface; QOpenGLContext* oglContext; |