diff options
author | RSDuck <rsduck@users.noreply.github.com> | 2019-07-12 03:43:45 +0200 |
---|---|---|
committer | RSDuck <rsduck@users.noreply.github.com> | 2020-04-26 13:02:59 +0200 |
commit | 2efab201e936ab0f60baf1de8e957080141d2d93 (patch) | |
tree | 1943d29467b261a4539e880477a1bd6ca774064d | |
parent | c58fdbd66bab9f1b97e9522afa5436f212540b6d (diff) |
jit: LDM/STM finally(!) working + MUL, MLA and CLZ
-rw-r--r-- | src/ARM.cpp | 7 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_ALU.cpp | 74 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Branch.cpp | 7 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.cpp | 108 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.h | 14 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_LoadStore.cpp | 116 |
6 files changed, 279 insertions, 47 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp index aca876d..a77fbc4 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -80,8 +80,15 @@ ARMv4::ARMv4() : ARM(1) // } +namespace ARMJIT {extern int instructionPopularityARM[ARMInstrInfo::ak_Count];} + void ARM::Reset() { + FILE* blabla = fopen("fhhg", "w"); + for (int i = 0; i < ARMInstrInfo::ak_Count; i++) + fprintf(blabla, "%d -> %dx\n", i, ARMJIT::instructionPopularityARM[i]); + fclose(blabla); + Cycles = 0; Halted = 0; diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp index c22751e..cbe67fd 100644 --- a/src/ARMJIT_x64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp @@ -223,6 +223,73 @@ void Compiler::A_Comp_MovOp() Comp_JumpTo(rd.GetSimpleReg(), S); } +void Compiler::A_Comp_CLZ() +{ + OpArg rd = MapReg(CurInstr.A_Reg(12)); + OpArg rm = MapReg(CurInstr.A_Reg(0)); + + MOV(32, R(RSCRATCH), Imm32(32)); + TEST(32, rm, rm); + FixupBranch skipZero = J_CC(CC_Z); + BSR(32, RSCRATCH, rm); + XOR(32, R(RSCRATCH), Imm8(0x1F)); // 31 - RSCRATCH + SetJumpTarget(skipZero); + MOV(32, rd, R(RSCRATCH)); +} + +void Compiler::Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn) +{ + if (Num == 0) + Comp_AddCycles_CI(S ? 3 : 1); + else + { + XOR(32, R(RSCRATCH), R(RSCRATCH)); + MOV(32, R(RSCRATCH3), rs); + TEST(32, R(RSCRATCH3), R(RSCRATCH3)); + FixupBranch zeroBSR = J_CC(CC_Z); + BSR(32, RSCRATCH2, R(RSCRATCH3)); + NOT(32, R(RSCRATCH3)); + BSR(32, RSCRATCH, R(RSCRATCH3)); + CMP(32, R(RSCRATCH2), R(RSCRATCH)); + CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L); + SHR(32, R(RSCRATCH), Imm8(3)); + SetJumpTarget(zeroBSR); // fortunately that's even right + Comp_AddCycles_CI(RSCRATCH, add ? 2 : 1); + } + + static_assert(EAX == RSCRATCH); + MOV(32, R(RSCRATCH), rm); + if (add) + { + IMUL(32, RSCRATCH, rs); + LEA(32, rd.GetSimpleReg(), MRegSum(RSCRATCH, rn.GetSimpleReg())); + TEST(32, rd, rd); + } + else + { + IMUL(32, RSCRATCH, rs); + MOV(32, rd, R(RSCRATCH)); + TEST(32, R(RSCRATCH), R(RSCRATCH)); + } + + if (S) + Comp_RetriveFlags(false, false, false); +} + +void Compiler::A_Comp_MUL_MLA() +{ + bool S = CurInstr.Instr & (1 << 20); + bool add = CurInstr.Instr & (1 << 21); + OpArg rd = MapReg(CurInstr.A_Reg(16)); + OpArg rm = MapReg(CurInstr.A_Reg(0)); + OpArg rs = MapReg(CurInstr.A_Reg(8)); + OpArg rn; + if (add) + rn = MapReg(CurInstr.A_Reg(12)); + + Comp_MulOp(S, add, rd, rm, rs, rn); +} + void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed) { CPSRDirty = true; @@ -455,6 +522,13 @@ void Compiler::T_Comp_ALU_Imm8() } } +void Compiler::T_Comp_MUL() +{ + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rs = MapReg(CurInstr.T_Reg(3)); + Comp_MulOp(true, false, rd, rd, rs, Imm8(-1)); +} + void Compiler::T_Comp_ALU() { OpArg rd = MapReg(CurInstr.T_Reg(0)); diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index fb2acba..bd01ffb 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -126,17 +126,14 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) { - BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFFFF0000); + BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFF00); bool previouslyDirty = CPSRDirty; SaveCPSR(); if (restoreCPSR) { if (Thumb || CurInstr.Cond() >= 0xE) - { - for (int reg : hiRegsLoaded) - RegCache.UnloadRegister(reg); - } + RegCache.Flush(); else { // the ugly way... diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 6799a90..8a895d1 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -26,10 +26,14 @@ const int RegisterCache<Compiler, X64Reg>::NativeRegsAvailable = #endif ; +int instructionPopularityARM[ARMInstrInfo::ak_Count]; + Compiler::Compiler() { AllocCodeSpace(1024 * 1024 * 16); + memset(instructionPopularityARM, 0, sizeof(instructionPopularityARM)); + for (int i = 0; i < 3; i++) { for (int j = 0; j < 2; j++) @@ -47,7 +51,88 @@ Compiler::Compiler() MemoryFuncsSeq7[i][j][1] = Gen_MemoryRoutineSeq7(i, j, true); } - ResetStart = GetWritableCodePtr(); + { + // RSCRATCH mode + // ABI_PARAM2 reg number + // ABI_PARAM3 value in current mode + // ret - ABI_PARAM3 + ReadBanked = (void*)GetWritableCodePtr(); + CMP(32, R(RSCRATCH), Imm8(0x11)); + FixupBranch fiq = J_CC(CC_E); + SUB(32, R(ABI_PARAM2), Imm8(13 - 8)); + FixupBranch notEverything = J_CC(CC_L); + CMP(32, R(RSCRATCH), Imm8(0x12)); + FixupBranch irq = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x13)); + FixupBranch svc = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x17)); + FixupBranch abt = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x1B)); + FixupBranch und = J_CC(CC_E); + SetJumpTarget(notEverything); + RET(); + + SetJumpTarget(fiq); + MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_FIQ))); + RET(); + SetJumpTarget(irq); + MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_IRQ))); + RET(); + SetJumpTarget(svc); + MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_SVC))); + RET(); + SetJumpTarget(abt); + MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_ABT))); + RET(); + SetJumpTarget(und); + MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_UND))); + RET(); + } + { + // RSCRATCH mode + // ABI_PARAM2 reg n + // ABI_PARAM3 value + // carry flag set if the register isn't banked + WriteBanked = (void*)GetWritableCodePtr(); + CMP(32, R(RSCRATCH), Imm8(0x11)); + FixupBranch fiq = J_CC(CC_E); + SUB(32, R(ABI_PARAM2), Imm8(13 - 8)); + FixupBranch notEverything = J_CC(CC_L); + CMP(32, R(RSCRATCH), Imm8(0x12)); + FixupBranch irq = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x13)); + FixupBranch svc = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x17)); + FixupBranch abt = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x1B)); + FixupBranch und = J_CC(CC_E); + SetJumpTarget(notEverything); + STC(); + RET(); + + SetJumpTarget(fiq); + MOV(32, MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_FIQ)), R(ABI_PARAM3)); + CLC(); + RET(); + SetJumpTarget(irq); + MOV(32, MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_IRQ)), R(ABI_PARAM3)); + CLC(); + RET(); + SetJumpTarget(svc); + MOV(32, MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_SVC)), R(ABI_PARAM3)); + CLC(); + RET(); + SetJumpTarget(abt); + MOV(32, MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_ABT)), R(ABI_PARAM3)); + CLC(); + RET(); + SetJumpTarget(und); + MOV(32, MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_UND)), R(ABI_PARAM3)); + CLC(); + RET(); + } + + ResetStart = (void*)GetWritableCodePtr(); } void Compiler::LoadCPSR() @@ -136,6 +221,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs CurInstr = instrs[i]; CompileFunc comp = GetCompFunc(CurInstr.Info.Kind); + + if (!Thumb) + instructionPopularityARM[CurInstr.Info.Kind] += comp == NULL; if (comp == NULL || i == instrsCount - 1) { @@ -287,9 +375,9 @@ CompileFunc Compiler::GetCompFunc(int kind) // CMN A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, // Mul - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + A_Comp_MUL_MLA, A_Comp_MUL_MLA, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // ARMv5 stuff - NULL, NULL, NULL, NULL, NULL, + A_Comp_CLZ, NULL, NULL, NULL, NULL, // STR A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, //NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -315,7 +403,7 @@ CompileFunc Compiler::GetCompFunc(int kind) // swap NULL, NULL, // LDM/STM - NULL, NULL, + A_Comp_LDM_STM, A_Comp_LDM_STM, // Branch A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchXchangeReg, A_Comp_BranchXchangeReg, // system stuff @@ -333,7 +421,7 @@ CompileFunc Compiler::GetCompFunc(int kind) T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, - T_Comp_ALU, NULL, T_Comp_ALU, T_Comp_ALU, + T_Comp_ALU, T_Comp_MUL, T_Comp_ALU, T_Comp_ALU, // hi reg T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, // pc/sp relative @@ -387,4 +475,14 @@ void Compiler::Comp_AddCycles_CI(u32 i) ConstantCycles += cycles; } +void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add) +{ + s32 cycles = Num ? + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); + + LEA(32, RSCRATCH, MDisp(i, add + cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); +} + }
\ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index 45b488a..89dfe28 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -40,6 +40,7 @@ private: void Comp_AddCycles_C(bool forceNonConstant = false); void Comp_AddCycles_CI(u32 i); + void Comp_AddCycles_CI(Gen::X64Reg i, int add); enum { @@ -55,6 +56,10 @@ private: void A_Comp_MovOp(); void A_Comp_CmpOp(); + void A_Comp_MUL_MLA(); + + void A_Comp_CLZ(); + void A_Comp_MemWB(); void A_Comp_MemHalf(); void A_Comp_LDM_STM(); @@ -62,11 +67,13 @@ private: void A_Comp_BranchImm(); void A_Comp_BranchXchangeReg(); + void T_Comp_ShiftImm(); void T_Comp_AddSub_(); void T_Comp_ALU_Imm8(); void T_Comp_ALU(); void T_Comp_ALU_HiReg(); + void T_Comp_MUL(); void T_Comp_RelAddr(); void T_Comp_AddSP(); @@ -88,7 +95,7 @@ private: void T_Comp_BL_Merged(FetchedInstr prefix); void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size); - s32 Comp_MemAccessBlock(Gen::OpArg rb, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode); + s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode); void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags); @@ -96,6 +103,8 @@ private: Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags); void Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed); + void Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn); + void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed); void* Gen_MemoryRoutine9(bool store, int size); @@ -133,6 +142,9 @@ private: void* MemoryFuncsSeq9[2][2]; void* MemoryFuncsSeq7[2][2][2]; + void* ReadBanked; + void* WriteBanked; + bool CPSRDirty = false; FetchedInstr CurInstr; diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index 69b324c..8fbcafd 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -1,7 +1,5 @@ #include "ARMJIT_Compiler.h" -#include "../GPU.h" -#include "../Wifi.h" using namespace Gen; @@ -362,7 +360,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc) CMP(32, R(ABI_PARAM3), Imm8(1)); FixupBranch skipSequential = J_CC(CC_E); SUB(32, R(ABI_PARAM3), Imm8(1)); - IMUL(32, R(ABI_PARAM3)); + IMUL(32, RSCRATCH, R(ABI_PARAM3)); ADD(32, R(ABI_PARAM2), R(RSCRATCH)); SetJumpTarget(skipSequential); @@ -413,10 +411,11 @@ void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM) POP(ABI_PARAM4); POP(ABI_PARAM3); + // TODO: optimise this CMP(32, R(ABI_PARAM3), Imm8(1)); FixupBranch skipSequential = J_CC(CC_E); SUB(32, R(ABI_PARAM3), Imm8(1)); - IMUL(32, R(ABI_PARAM3)); + IMUL(32, RSCRATCH, R(ABI_PARAM3)); ADD(32, R(ABI_PARAM2), R(RSCRATCH)); SetJumpTarget(skipSequential); @@ -458,25 +457,35 @@ void Compiler::Comp_MemAccess(OpArg rd, bool signExtend, bool store, int size) } } -s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) +void printStuff2(u32 a, u32 b) { + printf("b %x %x\n", a, b); +} + +s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) +{ + FILE* f; + const u8* start = GetCodePtr(); + int regsCount = regs.Count(); if (decrement) { - MOV_sum(32, ABI_PARAM1, rb, Imm32(-regsCount * 4)); + MOV_sum(32, ABI_PARAM1, MapReg(rn), Imm32(-regsCount * 4)); preinc ^= true; } else - MOV(32, R(ABI_PARAM1), rb); + MOV(32, R(ABI_PARAM1), MapReg(rn)); + + s32 offset = (regsCount * 4) * (decrement ? -1 : 1); - MOV(32, R(ABI_PARAM3), Imm32(regsCount)); - u32 cycles = Num + u32 cycles = Num ? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] : (R15 & 0x2 ? 0 : CurInstr.CodeCycles); MOV(32, R(ABI_PARAM4), Imm32(cycles)); if (!store) { + MOV(32, R(ABI_PARAM3), Imm32(regsCount)); SUB(32, R(RSP), regsCount < 16 ? Imm8(regsCount * 8) : Imm32(regsCount * 8)); MOV(64, R(ABI_PARAM2), R(RSP)); @@ -484,20 +493,29 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei ? MemoryFuncsSeq9[0][preinc] : MemoryFuncsSeq7[0][preinc][CodeRegion == 0x02]); + bool firstUserMode = true; for (int reg = 15; reg >= 0; reg--) { if (regs[reg]) { - /*if (usermode && reg >= 8 && reg < 15) + if (usermode && reg >= 8 && reg < 15) { - MOV(32, R(RSCRATCH2), R(RCPSR)); - AND(32, R(RSCRATCH2), Imm8(0x1F)); - // (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great! - MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8))); - POP(RSCRATCH); - MOV(32, MRegSum(RCPU, RSCRATCH2), R(RSCRATCH)); + if (firstUserMode) + { + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + firstUserMode = false; + } + MOV(32, R(ABI_PARAM2), Imm32(reg - 8)); + POP(ABI_PARAM3); + CALL(WriteBanked); + FixupBranch sucessfulWritten = J_CC(CC_NC); + if (RegCache.Mapping[reg] != INVALID_REG && RegCache.DirtyRegs & (1 << reg)) + MOV(32, R(RegCache.Mapping[reg]), R(ABI_PARAM3)); + SaveReg(reg, ABI_PARAM3); + SetJumpTarget(sucessfulWritten); } - else */if (RegCache.Mapping[reg] == INVALID_REG) + else if (RegCache.Mapping[reg] == INVALID_REG) { assert(reg != 15); @@ -516,32 +534,48 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei if (regs[15]) { if (Num == 1) - OR(32, MapReg(15), Imm8(1)); + { + if (Thumb) + OR(32, MapReg(15), Imm8(1)); + else + AND(32, MapReg(15), Imm8(0xFE)); + } Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode); } } else { + bool firstUserMode = true; for (int reg : regs) { - /*if (usermode && reg >= 8 && reg < 15) + if (usermode && reg >= 8 && reg < 15) { - MOV(32, R(RSCRATCH), R(RCPSR)); - AND(32, R(RSCRATCH), Imm8(0x1F)); - // (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great! - MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8))); - MOV(32, R(RSCRATCH), MRegSum(RCPU, RSCRATCH)); - PUSH(RSCRATCH); + if (firstUserMode) + { + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + firstUserMode = false; + } + if (RegCache.Mapping[reg] == INVALID_REG) + LoadReg(reg, ABI_PARAM3); + else + MOV(32, R(ABI_PARAM3), R(RegCache.Mapping[reg])); + MOV(32, R(ABI_PARAM2), Imm32(reg - 8)); + CALL(ReadBanked); + PUSH(ABI_PARAM3); } - else */if (RegCache.Mapping[reg] == INVALID_REG) + else if (RegCache.Mapping[reg] == INVALID_REG) { LoadReg(reg, RSCRATCH); PUSH(RSCRATCH); } else + { PUSH(MapReg(reg).GetSimpleReg()); + } } MOV(64, R(ABI_PARAM2), R(RSP)); + MOV(32, R(ABI_PARAM3), Imm32(regsCount)); CALL(Num == 0 ? MemoryFuncsSeq9[1][preinc] @@ -550,7 +584,14 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei ADD(32, R(RSP), regsCount < 16 ? Imm8(regsCount * 8) : Imm32(regsCount * 8)); } - return (regsCount * 4) * (decrement ? -1 : 1); + if (usermode && !store) + { + f= fopen("ldm", "a"); + fwrite(start, GetCodePtr() - start, 1, f); + fclose(f); + } + + return offset; } OpArg Compiler::A_Comp_GetMemWBOffset() @@ -697,16 +738,20 @@ void Compiler::A_Comp_LDM_STM() { BitSet16 regs(CurInstr.Instr & 0xFFFF); - bool load = (CurInstr.Instr >> 20) & 1; - bool pre = (CurInstr.Instr >> 24) & 1; - bool add = (CurInstr.Instr >> 23) & 1; - bool writeback = (CurInstr.Instr >> 21) & 1; - bool usermode = (CurInstr.Instr >> 22) & 1; + bool load = CurInstr.Instr & (1 << 20); + bool pre = CurInstr.Instr & (1 << 24); + bool add = CurInstr.Instr & (1 << 23); + bool writeback = CurInstr.Instr & (1 << 21); + bool usermode = CurInstr.Instr & (1 << 22); OpArg rn = MapReg(CurInstr.A_Reg(16)); - s32 offset = Comp_MemAccessBlock(rn, regs, !load, pre, !add, false); + s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode); + if (load && writeback && regs[CurInstr.A_Reg(16)]) + writeback = Num == 0 + ? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)) + : false; if (writeback) ADD(32, rn, offset >= INT8_MIN && offset < INT8_MAX ? Imm8(offset) : Imm32(offset)); } @@ -789,8 +834,7 @@ void Compiler::T_Comp_PUSH_POP() } OpArg sp = MapReg(13); - - s32 offset = Comp_MemAccessBlock(sp, regs, !load, !load, !load, false); + s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false); ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max } @@ -801,7 +845,7 @@ void Compiler::T_Comp_LDMIA_STMIA() OpArg rb = MapReg(CurInstr.T_Reg(8)); bool load = CurInstr.Instr & (1 << 11); - s32 offset = Comp_MemAccessBlock(rb, regs, !load, false, false, false); + s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false); if (!load || !regs[CurInstr.T_Reg(8)]) ADD(32, rb, Imm8(offset)); |