diff options
Diffstat (limited to 'src/ARMJIT_A64/ARMJIT_LoadStore.cpp')
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 790 |
1 files changed, 368 insertions, 422 deletions
diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index 6cf710b..b307d0e 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -2,286 +2,62 @@ #include "../Config.h" +#include "../ARMJIT_Memory.h" + using namespace Arm64Gen; namespace ARMJIT { -// W0 - address -// (if store) W1 - value to store -// W2 - code cycles -void* Compiler::Gen_MemoryRoutine9(int size, bool store) +bool Compiler::IsJITFault(u64 pc) { - AlignCode16(); - void* res = GetRXPtr(); - - u32 addressMask; - switch (size) - { - case 32: addressMask = ~3; break; - case 16: addressMask = ~1; break; - case 8: addressMask = ~0; break; - } - - LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, DTCMBase)); - LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMSize)); - SUB(W3, W0, W3); - CMP(W3, W4); - FixupBranch insideDTCM = B(CC_LO); - - UBFX(W4, W0, 24, 8); - CMP(W4, 0x02); - FixupBranch outsideMainRAM = B(CC_NEQ); - ANDI2R(W3, W0, addressMask & (MAIN_RAM_SIZE - 1)); - MOVP2R(X4, NDS::MainRAM); - if (!store && size == 32) - { - LDR(W3, X3, X4); - ANDI2R(W0, W0, 3); - LSL(W0, W0, 3); - RORV(W0, W3, W0); - } - else if (store) - STRGeneric(size, W1, X3, X4); - else - LDRGeneric(size, false, W0, X3, X4); - RET(); - - SetJumpTarget(outsideMainRAM); - - LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize)); - CMP(W0, W3); - FixupBranch insideITCM = B(CC_LO); - - if (store) - { - if (size > 8) - ANDI2R(W0, W0, addressMask); - - switch (size) - { - case 32: QuickTailCall(X4, NDS::ARM9Write32); break; - case 16: QuickTailCall(X4, NDS::ARM9Write16); break; - case 8: QuickTailCall(X4, NDS::ARM9Write8); break; - } - } - else - { - if (size == 32) - ABI_PushRegisters({0, 30}); - if (size > 8) - ANDI2R(W0, W0, addressMask); - - switch (size) - { - case 32: QuickCallFunction(X4, NDS::ARM9Read32); break; - case 16: QuickTailCall (X4, NDS::ARM9Read16); break; - case 8: QuickTailCall (X4, NDS::ARM9Read8 ); break; - } - if (size == 32) - { - ABI_PopRegisters({1, 30}); - ANDI2R(W1, W1, 3); - LSL(W1, W1, 3); - RORV(W0, W0, W1); - RET(); - } - } - - SetJumpTarget(insideDTCM); - ANDI2R(W3, W3, 0x3FFF & addressMask); - ADDI2R(W3, W3, offsetof(ARMv5, DTCM), W4); - if (!store && size == 32) - { - ANDI2R(W4, W0, 3); - LDR(W0, RCPU, W3); - LSL(W4, W4, 3); - RORV(W0, W0, W4); - } - else if (store) - STRGeneric(size, W1, RCPU, W3); - else - LDRGeneric(size, false, W0, RCPU, W3); - - RET(); - - SetJumpTarget(insideITCM); - ANDI2R(W3, W0, 0x7FFF & addressMask); - if (store) - { - ADDI2R(W0, W3, ExeMemRegionOffsets[exeMem_ITCM], W4); - LSR(W5, W0, 9); - MOVP2R(X4, CodeRanges); - ADD(X4, X4, X5, ArithOption(X5, ST_LSL, 4)); - static_assert(sizeof(AddressRange) == 16); - LDRH(INDEX_UNSIGNED, W4, X4, offsetof(AddressRange, Blocks.Length)); - FixupBranch null = CBZ(W4); - ABI_PushRegisters({1, 3, 30}); - QuickCallFunction(X4, InvalidateByAddr); - ABI_PopRegisters({1, 3, 30}); - SetJumpTarget(null); - } - ADDI2R(W3, W3, offsetof(ARMv5, ITCM), W4); - if (!store && size == 32) - { - ANDI2R(W4, W0, 3); - LDR(W0, RCPU, W3); - LSL(W4, W4, 3); - RORV(W0, W0, W4); - } - else if (store) - STRGeneric(size, W1, RCPU, W3); - else - LDRGeneric(size, false, W0, RCPU, W3); - RET(); - - return res; + return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize); } -/* - W0 - base address - X1 - stack space - W2 - values count -*/ -void* Compiler::Gen_MemoryRoutine9Seq(bool store, bool preinc) +s64 Compiler::RewriteMemAccess(u64 pc) { - AlignCode16(); - void* res = GetRXPtr(); - - void* loopStart = GetRXPtr(); - SUB(W2, W2, 1); - - if (preinc) - ADD(W0, W0, 4); + ptrdiff_t pcOffset = pc - (u64)GetRXBase(); - LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMBase)); - LDR(INDEX_UNSIGNED, W5, RCPU, offsetof(ARMv5, DTCMSize)); - SUB(W4, W0, W4); - CMP(W4, W5); - FixupBranch insideDTCM = B(CC_LO); + auto it = LoadStorePatches.find(pcOffset); - LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, ITCMSize)); - CMP(W0, W4); - FixupBranch insideITCM = B(CC_LO); - - ABI_PushRegisters({0, 1, 2, 30}); // TODO: move SP only once - if (store) + if (it != LoadStorePatches.end()) { - LDR(X1, X1, ArithOption(X2, true)); - QuickCallFunction(X4, NDS::ARM9Write32); + LoadStorePatch patch = it->second; - ABI_PopRegisters({0, 1, 2, 30}); - } - else - { - QuickCallFunction(X4, NDS::ARM9Read32); - MOV(W4, W0); + ptrdiff_t curCodeOffset = GetCodeOffset(); - ABI_PopRegisters({0, 1, 2, 30}); + SetCodePtrUnsafe(pcOffset + patch.PatchOffset); - STR(X4, X1, ArithOption(X2, true)); - } + BL(patch.PatchFunc); - if (!preinc) - ADD(W0, W0, 4); - CBNZ(W2, loopStart); - RET(); + for (int i = 0; i < patch.PatchSize / 4 - 1; i++) + HINT(HINT_NOP); - SetJumpTarget(insideDTCM); + FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr()); - ANDI2R(W4, W4, ~3 & 0x3FFF); - ADDI2R(X4, X4, offsetof(ARMv5, DTCM)); - if (store) - { - LDR(X5, X1, ArithOption(X2, true)); - STR(W5, RCPU, X4); - } - else - { - LDR(W5, RCPU, X4); - STR(X5, X1, ArithOption(X2, true)); - } + SetCodePtrUnsafe(curCodeOffset); - if (!preinc) - ADD(W0, W0, 4); - CBNZ(W2, loopStart); - RET(); - - SetJumpTarget(insideITCM); - - ANDI2R(W4, W0, ~3 & 0x7FFF); - - ADDI2R(W6, W4, offsetof(ARMv5, ITCM), W5); - if (store) - { - LDR(X5, X1, ArithOption(X2, true)); - STR(W5, RCPU, X6); - } - else - { - LDR(W5, RCPU, X6); - STR(X5, X1, ArithOption(X2, true)); - } + LoadStorePatches.erase(it); - if (store) - { - ADDI2R(W4, W4, ExeMemRegionOffsets[exeMem_ITCM], W5); - LSR(W6, W4, 9); - MOVP2R(X5, CodeRanges); - ADD(X5, X5, X6, ArithOption(X6, ST_LSL, 4)); - static_assert(sizeof(AddressRange) == 16); - LDRH(INDEX_UNSIGNED, W5, X5, offsetof(AddressRange, Blocks.Length)); - FixupBranch null = CBZ(W5); - ABI_PushRegisters({0, 1, 2, 4, 30}); - MOV(W0, W4); - QuickCallFunction(X5, InvalidateByAddr); - ABI_PopRegisters({0, 1, 2, 4, 30}); - SetJumpTarget(null); + return patch.PatchOffset; } - - if (!preinc) - ADD(W0, W0, 4); - CBNZ(W2, loopStart); - RET(); - return res; + printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc)); + assert(false); } -void* Compiler::Gen_MemoryRoutine7Seq(bool store, bool preinc) +bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr) { - AlignCode16(); - void* res = GetRXPtr(); + u32 localAddr = LocaliseCodeAddress(Num, addr); - void* loopStart = GetRXPtr(); - SUB(W2, W2, 1); - - if (preinc) - ADD(W0, W0, 4); - - ABI_PushRegisters({0, 1, 2, 30}); - if (store) + int invalidLiteralIdx = InvalidLiterals.Find(localAddr); + if (invalidLiteralIdx != -1) { - LDR(X1, X1, ArithOption(X2, true)); - QuickCallFunction(X4, NDS::ARM7Write32); - ABI_PopRegisters({0, 1, 2, 30}); + InvalidLiterals.Remove(invalidLiteralIdx); + return false; } - else - { - QuickCallFunction(X4, NDS::ARM7Read32); - MOV(W4, W0); - ABI_PopRegisters({0, 1, 2, 30}); - STR(X4, X1, ArithOption(X2, true)); - } - - if (!preinc) - ADD(W0, W0, 4); - CBNZ(W2, loopStart); - RET(); - return res; -} + Comp_AddCycles_CDI(); -void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr) -{ u32 val; // make sure arm7 bios is accessible u32 tmpR15 = CurCPU->R[15]; @@ -309,6 +85,8 @@ void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr) if (Thumb || CurInstr.Cond() == 0xE) RegCache.PutLiteral(rd, val); + + return true; } void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) @@ -318,163 +96,209 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) addressMask = ~3; if (size == 16) addressMask = ~1; + + if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback))) + { + u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); + + if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr)) + return; + } if (flags & memop_Store) Comp_AddCycles_CD(); else Comp_AddCycles_CDI(); - if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback))) - { - u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); - u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr); + ARM64Reg rdMapped = MapReg(rd); + ARM64Reg rnMapped = MapReg(rn); - if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16)))) - { - Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr); - return; - } + if (Thumb && rn == 15) + { + ANDI2R(W3, rnMapped, ~2); + rnMapped = W3; } + ARM64Reg finalAddr = W0; + if (flags & memop_Post) { - ARM64Reg rdMapped = MapReg(rd); - ARM64Reg rnMapped = MapReg(rn); - - bool inlinePreparation = Num == 1; - u32 constLocalROR32 = 4; + finalAddr = rnMapped; + MOV(W0, rnMapped); + } - void* memFunc = Num == 0 - ? MemFunc9[size >> 4][!!(flags & memop_Store)] - : MemFunc7[size >> 4][!!((flags & memop_Store))]; + bool addrIsStatic = Config::JIT_LiteralOptimisations + && RegCache.IsLiteral(rn) && offset.IsImm && !(flags & (memop_Writeback|memop_Post)); + u32 staticAddress; + if (addrIsStatic) + staticAddress = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); - if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && offset.IsImm && RegCache.IsLiteral(rn)) + if (!offset.IsImm) + Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2); + // offset might has become an immediate + if (offset.IsImm) + { + if (offset.Imm) + { + if (flags & memop_SubtractOffset) + SUB(finalAddr, rnMapped, offset.Imm); + else + ADD(finalAddr, rnMapped, offset.Imm); + } + else if (finalAddr != rnMapped) + MOV(finalAddr, rnMapped); + } + else + { + if (offset.Reg.ShiftType == ST_ROR) { - u32 addr = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); + ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount); + offset = Op2(W0); + } - NDS::MemRegion region; - region.Mem = NULL; - if (Num == 0) - { - ARMv5* cpu5 = (ARMv5*)CurCPU; + if (flags & memop_SubtractOffset) + SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption()); + else + ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption()); + } - // stupid dtcm... - if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize)) - { - region.Mem = cpu5->DTCM; - region.Mask = 0x3FFF; - } - else - { - NDS::ARM9GetMemRegion(addr, flags & memop_Store, ®ion); - } - } - else - NDS::ARM7GetMemRegion(addr, flags & memop_Store, ®ion); + if (!(flags & memop_Post) && (flags & memop_Writeback)) + MOV(rnMapped, W0); - if (region.Mem != NULL) - { - void* ptr = ®ion.Mem[addr & addressMask & region.Mask]; + u32 expectedTarget = Num == 0 + ? ARMJIT_Memory::ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion) + : ARMJIT_Memory::ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion); - MOVP2R(X0, ptr); - if (flags & memop_Store) - STRGeneric(size, INDEX_UNSIGNED, rdMapped, X0, 0); - else - { - LDRGeneric(size, flags & memop_SignExtend, INDEX_UNSIGNED, rdMapped, X0, 0); - if (size == 32 && addr & ~0x3) - ROR_(rdMapped, rdMapped, (addr & 0x3) << 3); - } - return; - } + if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsMappable(expectedTarget))) + { + ptrdiff_t memopStart = GetCodeOffset(); + LoadStorePatch patch; - void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size); - if (specialFunc) - { - memFunc = specialFunc; - inlinePreparation = true; - constLocalROR32 = addr & 0x3; - } - } + patch.PatchFunc = flags & memop_Store + ? PatchedStoreFuncs[Num][__builtin_ctz(size) - 3][rdMapped - W19] + : PatchedLoadFuncs[Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19]; + assert(rdMapped - W19 >= 0 && rdMapped - W19 < 8); - ARM64Reg finalAddr = W0; - if (flags & memop_Post) - { - finalAddr = rnMapped; - MOV(W0, rnMapped); - } + MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + // take a chance at fastmem + if (size > 8) + ANDI2R(W1, W0, addressMask); + + ptrdiff_t loadStorePosition = GetCodeOffset(); if (flags & memop_Store) - MOV(W1, rdMapped); - - if (!offset.IsImm) - Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2); - // offset might become an immediate - if (offset.IsImm) { - if (flags & memop_SubtractOffset) - SUB(finalAddr, rnMapped, offset.Imm); - else - ADD(finalAddr, rnMapped, offset.Imm); + STRGeneric(size, rdMapped, size > 8 ? X1 : X0, X7); } else { - if (offset.Reg.ShiftType == ST_ROR) + LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7); + if (size == 32) { - ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount); - offset = Op2(W0); + UBFIZ(W0, W0, 3, 2); + RORV(rdMapped, rdMapped, W0); } - - if (flags & memop_SubtractOffset) - SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption()); - else - ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption()); } - if (!(flags & memop_Post) && (flags & memop_Writeback)) - MOV(rnMapped, W0); + patch.PatchOffset = memopStart - loadStorePosition; + patch.PatchSize = GetCodeOffset() - memopStart; + LoadStorePatches[loadStorePosition] = patch; + } + else + { + void* func = NULL; + if (addrIsStatic) + func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size); - if (inlinePreparation) + if (func) { - if (size == 32 && !(flags & memop_Store) && constLocalROR32 == 4) - ANDI2R(rdMapped, W0, 3); - if (size > 8) - ANDI2R(W0, W0, addressMask); + if (flags & memop_Store) + MOV(W1, rdMapped); + QuickCallFunction(X2, (void (*)())func); + + if (!(flags & memop_Store)) + { + if (size == 32) + { + if (staticAddress & 0x3) + ROR_(rdMapped, W0, (staticAddress & 0x3) << 3); + else + MOV(rdMapped, W0); + } + else + { + if (flags & memop_SignExtend) + SBFX(rdMapped, W0, 0, size); + else + UBFX(rdMapped, W0, 0, size); + } + } } - QuickCallFunction(X2, memFunc); - if (!(flags & memop_Store)) + else { - if (inlinePreparation && !(flags & memop_Store) && size == 32) + if (Num == 0) { - if (constLocalROR32 == 4) + MOV(X1, RCPU); + if (flags & memop_Store) { - LSL(rdMapped, rdMapped, 3); - RORV(rdMapped, W0, rdMapped); + MOV(W2, rdMapped); + switch (size) + { + case 32: QuickCallFunction(X3, SlowWrite9<u32>); break; + case 16: QuickCallFunction(X3, SlowWrite9<u16>); break; + case 8: QuickCallFunction(X3, SlowWrite9<u8>); break; + } } - else if (constLocalROR32 > 0) - ROR_(rdMapped, W0, constLocalROR32 << 3); else - MOV(rdMapped, W0); + { + switch (size) + { + case 32: QuickCallFunction(X3, SlowRead9<u32>); break; + case 16: QuickCallFunction(X3, SlowRead9<u16>); break; + case 8: QuickCallFunction(X3, SlowRead9<u8>); break; + } + } } - else if (flags & memop_SignExtend) + else { - if (size == 16) - SXTH(rdMapped, W0); - else if (size == 8) - SXTB(rdMapped, W0); + if (flags & memop_Store) + { + MOV(W1, rdMapped); + switch (size) + { + case 32: QuickCallFunction(X3, SlowWrite7<u32>); break; + case 16: QuickCallFunction(X3, SlowWrite7<u16>); break; + case 8: QuickCallFunction(X3, SlowWrite7<u8>); break; + } + } else - assert("What's wrong with you?"); + { + switch (size) + { + case 32: QuickCallFunction(X3, SlowRead7<u32>); break; + case 16: QuickCallFunction(X3, SlowRead7<u16>); break; + case 8: QuickCallFunction(X3, SlowRead7<u8>); break; + } + } } - else - MOV(rdMapped, W0); - - if (CurInstr.Info.Branches()) + + if (!(flags & memop_Store)) { - if (size < 32) - printf("LDR size < 32 branching?\n"); - Comp_JumpTo(rdMapped, Num == 0, false); + if (size == 32) + MOV(rdMapped, W0); + else if (flags & memop_SignExtend) + SBFX(rdMapped, W0, 0, size); + else + UBFX(rdMapped, W0, 0, size); } } } + + if (CurInstr.Info.Branches()) + { + if (size < 32) + printf("LDR size < 32 branching?\n"); + Comp_JumpTo(rdMapped, Num == 0, false); + } } void Compiler::A_Comp_MemWB() @@ -589,19 +413,11 @@ void Compiler::T_Comp_MemImmHalf() void Compiler::T_Comp_LoadPCRel() { - u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2); + u32 offset = ((CurInstr.Instr & 0xFF) << 2); + u32 addr = (R15 & ~0x2) + offset; - if (Config::JIT_LiteralOptimisations) - { - Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr); - Comp_AddCycles_CDI(); - } - else - { - bool negative = addr < R15; - u32 abs = negative ? R15 - addr : addr - R15; - Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(abs), 32, negative ? memop_SubtractOffset : 0); - } + if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr)) + Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0); } void Compiler::T_Comp_MemSPRel() @@ -621,15 +437,138 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (regsCount == 0) return 0; // actually not the right behaviour TODO: fix me - SUB(SP, SP, ((regsCount + 1) & ~1) * 8); - if (store) + if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin())) { + int flags = 0; + if (store) + flags |= memop_Store; + if (decrement) + flags |= memop_SubtractOffset; + Op2 offset = preinc ? Op2(4) : Op2(0); + + Comp_MemAccess(*regs.begin(), rn, offset, 32, flags); + + return decrement ? -4 : 4; + } + + if (store) Comp_AddCycles_CD(); + else + Comp_AddCycles_CDI(); - if (usermode && (regs & BitSet16(0x7f00))) - UBFX(W0, RCPSR, 0, 5); + int expectedTarget = Num == 0 + ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion) + : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion); + + bool compileFastPath = Config::JIT_FastMemory + && store && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsMappable(expectedTarget)); + + if (decrement) + { + SUB(W0, MapReg(rn), regsCount * 4); + ANDI2R(W0, W0, ~3); + preinc ^= true; + } + else + { + ANDI2R(W0, MapReg(rn), ~3); + } + + LoadStorePatch patch; + if (compileFastPath) + { + ptrdiff_t fastPathStart = GetCodeOffset(); + ptrdiff_t firstLoadStoreOffset; + + bool firstLoadStore = true; + + MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + ADD(X1, X1, X0); + + u32 offset = preinc ? 4 : 0; + BitSet16::Iterator it = regs.begin(); + + if (regsCount & 1) + { + int reg = *it; + it++; + + ARM64Reg first = W3; + if (RegCache.LoadedRegs & (1 << reg)) + first = MapReg(reg); + else if (store) + LoadReg(reg, first); + + if (firstLoadStore) + { + firstLoadStoreOffset = GetCodeOffset(); + firstLoadStore = false; + } + + if (store) + STR(INDEX_UNSIGNED, first, X1, offset); + else + LDR(INDEX_UNSIGNED, first, X1, offset); + + if (!(RegCache.LoadedRegs & (1 << reg)) && !store) + SaveReg(reg, first); + + offset += 4; + } + + while (it != regs.end()) + { + int reg = *it; + it++; + int nextReg = *it; + it++; - int i = regsCount - 1; + ARM64Reg first = W3, second = W4; + if (RegCache.LoadedRegs & (1 << reg)) + first = MapReg(reg); + else if (store) + LoadReg(reg, first); + if (RegCache.LoadedRegs & (1 << nextReg)) + second = MapReg(nextReg); + else if (store) + LoadReg(nextReg, second); + + if (firstLoadStore) + { + firstLoadStoreOffset = GetCodeOffset(); + firstLoadStore = false; + } + + if (store) + STP(INDEX_SIGNED, first, second, X1, offset); + else + LDP(INDEX_SIGNED, first, second, X1, offset); + + if (!(RegCache.LoadedRegs & (1 << reg)) && !store) + SaveReg(reg, first); + if (!(RegCache.LoadedRegs & (1 << nextReg)) && !store) + SaveReg(nextReg, second); + + offset += 8; + } + + patch.PatchSize = GetCodeOffset() - fastPathStart; + patch.PatchOffset = fastPathStart - firstLoadStoreOffset; + SwapCodeRegion(); + patch.PatchFunc = GetRXPtr(); + + LoadStorePatches[firstLoadStoreOffset] = patch; + + ABI_PushRegisters({30}); + } + + int i = 0; + + SUB(SP, SP, ((regsCount + 1) & ~1) * 8); + if (store) + { + if (usermode && (regs & BitSet16(0x7f00))) + UBFX(W5, RCPSR, 0, 5); BitSet16::Iterator it = regs.begin(); while (it != regs.end()) @@ -641,7 +580,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (usermode && reg >= 8 && reg < 15) { - if (RegCache.Mapping[reg] != INVALID_REG) + if (RegCache.LoadedRegs & (1 << reg)) MOV(W3, MapReg(reg)); else LoadReg(reg, W3); @@ -651,55 +590,67 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc } else if (!usermode && nextReg != regs.end()) { - ARM64Reg first = W3; - ARM64Reg second = W4; + ARM64Reg first = W3, second = W4; - if (RegCache.Mapping[reg] != INVALID_REG) + if (RegCache.LoadedRegs & (1 << reg)) first = MapReg(reg); else LoadReg(reg, W3); - if (RegCache.Mapping[*nextReg] != INVALID_REG) + if (RegCache.LoadedRegs & (1 << *nextReg)) second = MapReg(*nextReg); else LoadReg(*nextReg, W4); - STP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8); + STP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8); - i--; + i++; it++; } - else if (RegCache.Mapping[reg] != INVALID_REG) + else if (RegCache.LoadedRegs & (1 << reg)) + { STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8); + } else { LoadReg(reg, W3); STR(INDEX_UNSIGNED, W3, SP, i * 8); } - i--; + i++; it++; } } - if (decrement) - { - SUB(W0, MapReg(rn), regsCount * 4); - preinc ^= true; - } - else - MOV(W0, MapReg(rn)); + ADD(X1, SP, 0); MOVI2R(W2, regsCount); - BL(Num ? MemFuncsSeq7[store][preinc] : MemFuncsSeq9[store][preinc]); + if (Num == 0) + { + MOV(X3, RCPU); + switch (preinc * 2 | store) + { + case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, false>); break; + case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, true>); break; + case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, false>); break; + case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, true>); break; + } + } + else + { + switch (preinc * 2 | store) + { + case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, false>); break; + case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, true>); break; + case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, false>); break; + case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, true>); break; + } + } if (!store) { - Comp_AddCycles_CDI(); - if (usermode && !regs[15] && (regs & BitSet16(0x7f00))) - UBFX(W0, RCPSR, 0, 5); + UBFX(W5, RCPSR, 0, 5); - int i = regsCount - 1; BitSet16::Iterator it = regs.begin(); while (it != regs.end()) { @@ -714,11 +665,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc MOVI2R(W1, reg - 8); BL(WriteBanked); FixupBranch alreadyWritten = CBNZ(W4); - if (RegCache.Mapping[reg] != INVALID_REG) - { + if (RegCache.LoadedRegs & (1 << reg)) MOV(MapReg(reg), W3); - RegCache.DirtyRegs |= 1 << reg; - } else SaveReg(reg, W3); SetJumpTarget(alreadyWritten); @@ -727,20 +675,12 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc { ARM64Reg first = W3, second = W4; - if (RegCache.Mapping[reg] != INVALID_REG) - { + if (RegCache.LoadedRegs & (1 << reg)) first = MapReg(reg); - if (reg != 15) - RegCache.DirtyRegs |= 1 << reg; - } - if (RegCache.Mapping[*nextReg] != INVALID_REG) - { + if (RegCache.LoadedRegs & (1 << *nextReg)) second = MapReg(*nextReg); - if (*nextReg != 15) - RegCache.DirtyRegs |= 1 << *nextReg; - } - LDP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8); + LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8); if (first == W3) SaveReg(reg, W3); @@ -748,15 +688,12 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc SaveReg(*nextReg, W4); it++; - i--; + i++; } - else if (RegCache.Mapping[reg] != INVALID_REG) + else if (RegCache.LoadedRegs & (1 << reg)) { ARM64Reg mapped = MapReg(reg); LDR(INDEX_UNSIGNED, mapped, SP, i * 8); - - if (reg != 15) - RegCache.DirtyRegs |= 1 << reg; } else { @@ -765,11 +702,20 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc } it++; - i--; + i++; } } ADD(SP, SP, ((regsCount + 1) & ~1) * 8); + if (compileFastPath) + { + ABI_PopRegisters({30}); + RET(); + + FlushIcacheSection((u8*)patch.PatchFunc, (u8*)GetRXPtr()); + SwapCodeRegion(); + } + if (!store && regs[15]) { ARM64Reg mapped = MapReg(15); |