diff options
Diffstat (limited to 'src/ARMJIT_A64/ARMJIT_LoadStore.cpp')
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 848 |
1 files changed, 848 insertions, 0 deletions
diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp new file mode 100644 index 0000000..a5d0e3f --- /dev/null +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -0,0 +1,848 @@ +#include "ARMJIT_Compiler.h" + +#include "../Config.h" + +using namespace Arm64Gen; + +namespace ARMJIT +{ + +// W0 - address +// (if store) W1 - value to store +// W2 - code cycles +void* Compiler::Gen_MemoryRoutine9(int size, bool store) +{ + AlignCode16(); + void* res = GetRXPtr(); + + u32 addressMask; + switch (size) + { + case 32: addressMask = ~3; break; + case 16: addressMask = ~1; break; + case 8: addressMask = ~0; break; + } + + LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, DTCMBase)); + LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMSize)); + SUB(W3, W0, W3); + CMP(W3, W4); + FixupBranch insideDTCM = B(CC_LO); + + UBFX(W4, W0, 24, 8); + CMP(W4, 0x02); + FixupBranch outsideMainRAM = B(CC_NEQ); + ANDI2R(W3, W0, addressMask & (MAIN_RAM_SIZE - 1)); + MOVP2R(X4, NDS::MainRAM); + if (!store && size == 32) + { + LDR(W3, X3, X4); + ANDI2R(W0, W0, 3); + LSL(W0, W0, 3); + RORV(W0, W3, W0); + } + else if (store) + STRGeneric(size, W1, X3, X4); + else + LDRGeneric(size, false, W0, X3, X4); + RET(); + + SetJumpTarget(outsideMainRAM); + + LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize)); + CMP(W0, W3); + FixupBranch insideITCM = B(CC_LO); + + if (store) + { + if (size > 8) + ANDI2R(W0, W0, addressMask); + + switch (size) + { + case 32: QuickTailCall(X4, NDS::ARM9Write32); break; + case 16: QuickTailCall(X4, NDS::ARM9Write16); break; + case 8: QuickTailCall(X4, NDS::ARM9Write8); break; + } + } + else + { + if (size == 32) + ABI_PushRegisters({0, 30}); + if (size > 8) + ANDI2R(W0, W0, addressMask); + + switch (size) + { + case 32: QuickCallFunction(X4, NDS::ARM9Read32); break; + case 16: QuickTailCall (X4, NDS::ARM9Read16); break; + case 8: QuickTailCall (X4, NDS::ARM9Read8 ); break; + } + if (size == 32) + { + ABI_PopRegisters({1, 30}); + ANDI2R(W1, W1, 3); + LSL(W1, W1, 3); + RORV(W0, W0, W1); + RET(); + } + } + + SetJumpTarget(insideDTCM); + ANDI2R(W3, W3, 0x3FFF & addressMask); + ADDI2R(W3, W3, offsetof(ARMv5, DTCM), W4); + if (!store && size == 32) + { + ANDI2R(W4, W0, 3); + LDR(W0, RCPU, W3); + LSL(W4, W4, 3); + RORV(W0, W0, W4); + } + else if (store) + STRGeneric(size, W1, RCPU, W3); + else + LDRGeneric(size, false, W0, RCPU, W3); + + RET(); + + SetJumpTarget(insideITCM); + ANDI2R(W3, W0, 0x7FFF & addressMask); + if (store) + { + LSR(W0, W3, 8); + ADDI2R(W0, W0, ExeMemRegionOffsets[exeMem_ITCM], W4); + MOVP2R(X4, CodeRanges); + ADD(X4, X4, X0, ArithOption(X0, ST_LSL, 4)); + static_assert(sizeof(AddressRange) == 16); + LDR(INDEX_UNSIGNED, W4, X4, offsetof(AddressRange, Blocks.Length)); + FixupBranch null = CBZ(W4); + ABI_PushRegisters({1, 3, 30}); + QuickCallFunction(X4, InvalidateByAddr); + ABI_PopRegisters({1, 3, 30}); + SetJumpTarget(null); + } + ADDI2R(W3, W3, offsetof(ARMv5, ITCM), W4); + if (!store && size == 32) + { + ANDI2R(W4, W0, 3); + LDR(W0, RCPU, W3); + LSL(W4, W4, 3); + RORV(W0, W0, W4); + } + else if (store) + STRGeneric(size, W1, RCPU, W3); + else + LDRGeneric(size, false, W0, RCPU, W3); + RET(); + + return res; +} + +/* + W0 - base address + X1 - stack space + W2 - values count +*/ +void* Compiler::Gen_MemoryRoutine9Seq(bool store, bool preinc) +{ + AlignCode16(); + void* res = GetRXPtr(); + + void* loopStart = GetRXPtr(); + SUB(W2, W2, 1); + + if (preinc) + ADD(W0, W0, 4); + + LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMBase)); + LDR(INDEX_UNSIGNED, W5, RCPU, offsetof(ARMv5, DTCMSize)); + SUB(W4, W0, W4); + CMP(W4, W5); + FixupBranch insideDTCM = B(CC_LO); + + LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, ITCMSize)); + CMP(W0, W4); + FixupBranch insideITCM = B(CC_LO); + + ABI_PushRegisters({0, 1, 2, 30}); // TODO: move SP only once + if (store) + { + LDR(X1, X1, ArithOption(X2, true)); + QuickCallFunction(X4, NDS::ARM9Write32); + + ABI_PopRegisters({0, 1, 2, 30}); + } + else + { + QuickCallFunction(X4, NDS::ARM9Read32); + MOV(W4, W0); + + ABI_PopRegisters({0, 1, 2, 30}); + + STR(X4, X1, ArithOption(X2, true)); + } + + if (!preinc) + ADD(W0, W0, 4); + CBNZ(W2, loopStart); + RET(); + + SetJumpTarget(insideDTCM); + + ANDI2R(W4, W4, ~3 & 0x3FFF); + ADDI2R(X4, X4, offsetof(ARMv5, DTCM)); + if (store) + { + LDR(X5, X1, ArithOption(X2, true)); + STR(W5, RCPU, X4); + } + else + { + LDR(W5, RCPU, X4); + STR(X5, X1, ArithOption(X2, true)); + } + + if (!preinc) + ADD(W0, W0, 4); + CBNZ(W2, loopStart); + RET(); + + SetJumpTarget(insideITCM); + + ANDI2R(W4, W0, ~3 & 0x7FFF); + + if (store) + { + LSR(W6, W4, 8); + ADDI2R(W6, W6, ExeMemRegionOffsets[exeMem_ITCM], W5); + MOVP2R(X5, CodeRanges); + ADD(X5, X5, X6, ArithOption(X6, ST_LSL, 4)); + static_assert(sizeof(AddressRange) == 16); + LDR(INDEX_UNSIGNED, W5, X5, offsetof(AddressRange, Blocks.Length)); + FixupBranch null = CBZ(W5); + ABI_PushRegisters({0, 1, 2, 4, 30}); + MOV(W0, W6); + QuickCallFunction(X5, InvalidateByAddr); + ABI_PopRegisters({0, 1, 2, 4, 30}); + SetJumpTarget(null); + } + + ADDI2R(W4, W4, offsetof(ARMv5, ITCM), W5); + if (store) + { + LDR(X5, X1, ArithOption(X2, true)); + STR(W5, RCPU, X4); + } + else + { + LDR(W5, RCPU, X4); + STR(X5, X1, ArithOption(X2, true)); + } + + if (!preinc) + ADD(W0, W0, 4); + CBNZ(W2, loopStart); + RET(); + return res; +} + +void* Compiler::Gen_MemoryRoutine7Seq(bool store, bool preinc) +{ + AlignCode16(); + void* res = GetRXPtr(); + + void* loopStart = GetRXPtr(); + SUB(W2, W2, 1); + + if (preinc) + ADD(W0, W0, 4); + + ABI_PushRegisters({0, 1, 2, 30}); + if (store) + { + LDR(X1, X1, ArithOption(X2, true)); + QuickCallFunction(X4, NDS::ARM7Write32); + ABI_PopRegisters({0, 1, 2, 30}); + } + else + { + QuickCallFunction(X4, NDS::ARM7Read32); + MOV(W4, W0); + ABI_PopRegisters({0, 1, 2, 30}); + STR(X4, X1, ArithOption(X2, true)); + } + + if (!preinc) + ADD(W0, W0, 4); + CBNZ(W2, loopStart); + RET(); + + return res; +} + +void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr) +{ + u32 val; + // make sure arm7 bios is accessible + u32 tmpR15 = CurCPU->R[15]; + CurCPU->R[15] = R15; + if (size == 32) + { + CurCPU->DataRead32(addr & ~0x3, &val); + val = ROR(val, (addr & 0x3) << 3); + } + else if (size == 16) + { + CurCPU->DataRead16(addr & ~0x1, &val); + if (signExtend) + val = ((s32)val << 16) >> 16; + } + else + { + CurCPU->DataRead8(addr, &val); + if (signExtend) + val = ((s32)val << 24) >> 24; + } + CurCPU->R[15] = tmpR15; + + MOVI2R(MapReg(rd), val); + + if (Thumb || CurInstr.Cond() == 0xE) + RegCache.PutLiteral(rd, val); +} + +void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) +{ + u32 addressMask = ~0; + if (size == 32) + addressMask = ~3; + if (size == 16) + addressMask = ~1; + + if (flags & memop_Store) + Comp_AddCycles_CD(); + else + Comp_AddCycles_CDI(); + + if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback))) + { + u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); + u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr); + + if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16)))) + { + Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr); + return; + } + } + + { + ARM64Reg rdMapped = MapReg(rd); + ARM64Reg rnMapped = MapReg(rn); + + bool inlinePreparation = Num == 1; + u32 constLocalROR32 = 4; + + void* memFunc = Num == 0 + ? MemFunc9[size >> 4][!!(flags & memop_Store)] + : MemFunc7[size >> 4][!!((flags & memop_Store))]; + + if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && offset.IsImm && RegCache.IsLiteral(rn)) + { + u32 addr = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); + + NDS::MemRegion region; + region.Mem = NULL; + if (Num == 0) + { + ARMv5* cpu5 = (ARMv5*)CurCPU; + + // stupid dtcm... + if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize)) + { + region.Mem = cpu5->DTCM; + region.Mask = 0x3FFF; + } + else + { + NDS::ARM9GetMemRegion(addr, flags & memop_Store, ®ion); + } + } + else + NDS::ARM7GetMemRegion(addr, flags & memop_Store, ®ion); + + if (region.Mem != NULL) + { + void* ptr = ®ion.Mem[addr & addressMask & region.Mask]; + + MOVP2R(X0, ptr); + if (flags & memop_Store) + STRGeneric(size, INDEX_UNSIGNED, rdMapped, X0, 0); + else + { + LDRGeneric(size, flags & memop_SignExtend, INDEX_UNSIGNED, rdMapped, X0, 0); + if (size == 32 && addr & ~0x3) + ROR_(rdMapped, rdMapped, (addr & 0x3) << 3); + } + return; + } + + void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size); + if (specialFunc) + { + memFunc = specialFunc; + inlinePreparation = true; + constLocalROR32 = addr & 0x3; + } + } + + ARM64Reg finalAddr = W0; + if (flags & memop_Post) + { + finalAddr = rnMapped; + MOV(W0, rnMapped); + } + + if (flags & memop_Store) + MOV(W1, rdMapped); + + if (!offset.IsImm) + Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2); + // offset might become an immediate + if (offset.IsImm) + { + if (flags & memop_SubtractOffset) + SUB(finalAddr, rnMapped, offset.Imm); + else + ADD(finalAddr, rnMapped, offset.Imm); + } + else + { + if (offset.Reg.ShiftType == ST_ROR) + { + ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount); + offset = Op2(W0); + } + + if (flags & memop_SubtractOffset) + SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption()); + else + ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption()); + } + + if (!(flags & memop_Post) && (flags & memop_Writeback)) + MOV(rnMapped, W0); + + if (inlinePreparation) + { + if (size == 32 && !(flags & memop_Store) && constLocalROR32 == 4) + ANDI2R(rdMapped, W0, 3); + if (size > 8) + ANDI2R(W0, W0, addressMask); + } + QuickCallFunction(X2, memFunc); + if (!(flags & memop_Store)) + { + if (inlinePreparation && !(flags & memop_Store) && size == 32) + { + if (constLocalROR32 == 4) + { + LSL(rdMapped, rdMapped, 3); + RORV(rdMapped, W0, rdMapped); + } + else if (constLocalROR32 > 0) + ROR_(rdMapped, W0, constLocalROR32 << 3); + else + MOV(rdMapped, W0); + } + else if (flags & memop_SignExtend) + { + if (size == 16) + SXTH(rdMapped, W0); + else if (size == 8) + SXTB(rdMapped, W0); + else + assert("What's wrong with you?"); + } + else + MOV(rdMapped, W0); + + if (CurInstr.Info.Branches()) + { + if (size < 32) + printf("LDR size < 32 branching?\n"); + Comp_JumpTo(rdMapped, Num == 0, false); + } + } + } +} + +void Compiler::A_Comp_MemWB() +{ + Op2 offset; + if (CurInstr.Instr & (1 << 25)) + offset = Op2(MapReg(CurInstr.A_Reg(0)), (ShiftType)((CurInstr.Instr >> 5) & 0x3), (CurInstr.Instr >> 7) & 0x1F); + else + offset = Op2(CurInstr.Instr & 0xFFF); + + bool load = CurInstr.Instr & (1 << 20); + bool byte = CurInstr.Instr & (1 << 22); + + int flags = 0; + if (!load) + flags |= memop_Store; + if (!(CurInstr.Instr & (1 << 24))) + flags |= memop_Post; + if (CurInstr.Instr & (1 << 21)) + flags |= memop_Writeback; + if (!(CurInstr.Instr & (1 << 23))) + flags |= memop_SubtractOffset; + + Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, byte ? 8 : 32, flags); +} + +void Compiler::A_Comp_MemHD() +{ + bool load = CurInstr.Instr & (1 << 20); + bool signExtend; + int op = (CurInstr.Instr >> 5) & 0x3; + int size; + + if (load) + { + signExtend = op >= 2; + size = op == 2 ? 8 : 16; + } + else + { + size = 16; + signExtend = false; + } + + Op2 offset; + if (CurInstr.Instr & (1 << 22)) + offset = Op2((CurInstr.Instr & 0xF) | ((CurInstr.Instr >> 4) & 0xF0)); + else + offset = Op2(MapReg(CurInstr.A_Reg(0))); + + int flags = 0; + if (signExtend) + flags |= memop_SignExtend; + if (!load) + flags |= memop_Store; + if (!(CurInstr.Instr & (1 << 24))) + flags |= memop_Post; + if (!(CurInstr.Instr & (1 << 23))) + flags |= memop_SubtractOffset; + if (CurInstr.Instr & (1 << 21)) + flags |= memop_Writeback; + + Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags); +} + +void Compiler::T_Comp_MemReg() +{ + int op = (CurInstr.Instr >> 10) & 0x3; + bool load = op & 0x2; + bool byte = op & 0x1; + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), + Op2(MapReg(CurInstr.T_Reg(6))), byte ? 8 : 32, load ? 0 : memop_Store); +} + +void Compiler::T_Comp_MemImm() +{ + int op = (CurInstr.Instr >> 11) & 0x3; + bool load = op & 0x1; + bool byte = op & 0x2; + u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4); + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), + byte ? 8 : 32, load ? 0 : memop_Store); +} + +void Compiler::T_Comp_MemRegHalf() +{ + int op = (CurInstr.Instr >> 10) & 0x3; + bool load = op != 0; + int size = op != 1 ? 16 : 8; + bool signExtend = op & 1; + + int flags = 0; + if (signExtend) + flags |= memop_SignExtend; + if (!load) + flags |= memop_Store; + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(MapReg(CurInstr.T_Reg(6))), + size, flags); +} + +void Compiler::T_Comp_MemImmHalf() +{ + u32 offset = (CurInstr.Instr >> 5) & 0x3E; + bool load = CurInstr.Instr & (1 << 11); + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16, + load ? 0 : memop_Store); +} + +void Compiler::T_Comp_LoadPCRel() +{ + u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2); + + if (Config::JIT_LiteralOptimisations) + { + Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr); + Comp_AddCycles_CDI(); + } + else + { + bool negative = addr < R15; + u32 abs = negative ? R15 - addr : addr - R15; + Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(abs), 32, negative ? memop_SubtractOffset : 0); + } +} + +void Compiler::T_Comp_MemSPRel() +{ + u32 offset = (CurInstr.Instr & 0xFF) * 4; + bool load = CurInstr.Instr & (1 << 11); + + Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store); +} + +s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) +{ + IrregularCycles = true; + + int regsCount = regs.Count(); + + if (regsCount == 0) + return 0; // actually not the right behaviour TODO: fix me + + SUB(SP, SP, ((regsCount + 1) & ~1) * 8); + if (store) + { + Comp_AddCycles_CD(); + + if (usermode && (regs & BitSet16(0x7f00))) + UBFX(W0, RCPSR, 0, 5); + + int i = regsCount - 1; + + BitSet16::Iterator it = regs.begin(); + while (it != regs.end()) + { + BitSet16::Iterator nextReg = it; + nextReg++; + + int reg = *it; + + if (usermode && reg >= 8 && reg < 15) + { + if (RegCache.Mapping[reg] != INVALID_REG) + MOV(W3, MapReg(reg)); + else + LoadReg(reg, W3); + MOVI2R(W1, reg - 8); + BL(ReadBanked); + STR(INDEX_UNSIGNED, W3, SP, i * 8); + } + else if (!usermode && nextReg != regs.end()) + { + ARM64Reg first = W3; + ARM64Reg second = W4; + + if (RegCache.Mapping[reg] != INVALID_REG) + first = MapReg(reg); + else + LoadReg(reg, W3); + + if (RegCache.Mapping[*nextReg] != INVALID_REG) + second = MapReg(*nextReg); + else + LoadReg(*nextReg, W4); + + STP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8); + + i--; + it++; + } + else if (RegCache.Mapping[reg] != INVALID_REG) + STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8); + else + { + LoadReg(reg, W3); + STR(INDEX_UNSIGNED, W3, SP, i * 8); + } + i--; + it++; + } + } + if (decrement) + { + SUB(W0, MapReg(rn), regsCount * 4); + preinc ^= true; + } + else + MOV(W0, MapReg(rn)); + ADD(X1, SP, 0); + MOVI2R(W2, regsCount); + + BL(Num ? MemFuncsSeq7[store][preinc] : MemFuncsSeq9[store][preinc]); + + if (!store) + { + Comp_AddCycles_CDI(); + + if (usermode && (regs & BitSet16(0x7f00))) + UBFX(W0, RCPSR, 0, 5); + + int i = regsCount - 1; + BitSet16::Iterator it = regs.begin(); + while (it != regs.end()) + { + BitSet16::Iterator nextReg = it; + nextReg++; + + int reg = *it; + + if (usermode && reg >= 8 && reg < 15) + { + LDR(INDEX_UNSIGNED, W3, SP, i * 8); + MOVI2R(W1, reg - 8); + BL(WriteBanked); + FixupBranch alreadyWritten = CBNZ(W4); + if (RegCache.Mapping[reg] != INVALID_REG) + { + MOV(MapReg(reg), W3); + RegCache.DirtyRegs |= 1 << reg; + } + else + SaveReg(reg, W3); + SetJumpTarget(alreadyWritten); + } + else if (!usermode && nextReg != regs.end()) + { + ARM64Reg first = W3, second = W4; + + if (RegCache.Mapping[reg] != INVALID_REG) + { + first = MapReg(reg); + if (reg != 15) + RegCache.DirtyRegs |= 1 << reg; + } + if (RegCache.Mapping[*nextReg] != INVALID_REG) + { + second = MapReg(*nextReg); + if (*nextReg != 15) + RegCache.DirtyRegs |= 1 << *nextReg; + } + + LDP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8); + + if (first == W3) + SaveReg(reg, W3); + if (second == W4) + SaveReg(*nextReg, W4); + + it++; + i--; + } + else if (RegCache.Mapping[reg] != INVALID_REG) + { + ARM64Reg mapped = MapReg(reg); + LDR(INDEX_UNSIGNED, mapped, SP, i * 8); + + if (reg != 15) + RegCache.DirtyRegs |= 1 << reg; + } + else + { + LDR(INDEX_UNSIGNED, W3, SP, i * 8); + SaveReg(reg, W3); + } + + it++; + i--; + } + } + ADD(SP, SP, ((regsCount + 1) & ~1) * 8); + + if (!store && regs[15]) + { + ARM64Reg mapped = MapReg(15); + Comp_JumpTo(mapped, Num == 0, usermode); + } + + return regsCount * 4 * (decrement ? -1 : 1); +} + +void Compiler::A_Comp_LDM_STM() +{ + BitSet16 regs(CurInstr.Instr & 0xFFFF); + + bool load = CurInstr.Instr & (1 << 20); + bool pre = CurInstr.Instr & (1 << 24); + bool add = CurInstr.Instr & (1 << 23); + bool writeback = CurInstr.Instr & (1 << 21); + bool usermode = CurInstr.Instr & (1 << 22); + + ARM64Reg rn = MapReg(CurInstr.A_Reg(16)); + + s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode); + + if (load && writeback && regs[CurInstr.A_Reg(16)]) + writeback = Num == 0 + ? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)) + : false; + if (writeback) + { + if (offset > 0) + ADD(rn, rn, offset); + else + SUB(rn, rn, -offset); + } +} + +void Compiler::T_Comp_PUSH_POP() +{ + bool load = CurInstr.Instr & (1 << 11); + BitSet16 regs(CurInstr.Instr & 0xFF); + if (CurInstr.Instr & (1 << 8)) + { + if (load) + regs[15] = true; + else + regs[14] = true; + } + + ARM64Reg sp = MapReg(13); + s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false); + + if (offset > 0) + ADD(sp, sp, offset); + else + SUB(sp, sp, -offset); +} + +void Compiler::T_Comp_LDMIA_STMIA() +{ + BitSet16 regs(CurInstr.Instr & 0xFF); + ARM64Reg rb = MapReg(CurInstr.T_Reg(8)); + bool load = CurInstr.Instr & (1 << 11); + u32 regsCount = regs.Count(); + + s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false); + + if (!load || !regs[CurInstr.T_Reg(8)]) + { + if (offset > 0) + ADD(rb, rb, offset); + else + SUB(rb, rb, -offset); + } +} + +}
\ No newline at end of file |