diff options
Diffstat (limited to 'src/ARMJIT_x64')
| -rw-r--r-- | src/ARMJIT_x64/ARMJIT_ALU.cpp | 74 | ||||
| -rw-r--r-- | src/ARMJIT_x64/ARMJIT_Branch.cpp | 7 | ||||
| -rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.cpp | 108 | ||||
| -rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.h | 14 | ||||
| -rw-r--r-- | src/ARMJIT_x64/ARMJIT_LoadStore.cpp | 116 | 
5 files changed, 272 insertions, 47 deletions
| diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp index c22751e..cbe67fd 100644 --- a/src/ARMJIT_x64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp @@ -223,6 +223,73 @@ void Compiler::A_Comp_MovOp()          Comp_JumpTo(rd.GetSimpleReg(), S);  } +void Compiler::A_Comp_CLZ() +{ +    OpArg rd = MapReg(CurInstr.A_Reg(12)); +    OpArg rm = MapReg(CurInstr.A_Reg(0)); + +    MOV(32, R(RSCRATCH), Imm32(32)); +    TEST(32, rm, rm); +    FixupBranch skipZero = J_CC(CC_Z); +    BSR(32, RSCRATCH, rm); +    XOR(32, R(RSCRATCH), Imm8(0x1F)); // 31 - RSCRATCH +    SetJumpTarget(skipZero); +    MOV(32, rd, R(RSCRATCH)); +} + +void Compiler::Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn) +{ +    if (Num == 0) +        Comp_AddCycles_CI(S ? 3 : 1); +    else +    { +        XOR(32, R(RSCRATCH), R(RSCRATCH)); +        MOV(32, R(RSCRATCH3), rs); +        TEST(32, R(RSCRATCH3), R(RSCRATCH3)); +        FixupBranch zeroBSR = J_CC(CC_Z); +        BSR(32, RSCRATCH2, R(RSCRATCH3)); +        NOT(32, R(RSCRATCH3)); +        BSR(32, RSCRATCH, R(RSCRATCH3)); +        CMP(32, R(RSCRATCH2), R(RSCRATCH)); +        CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L); +        SHR(32, R(RSCRATCH), Imm8(3)); +        SetJumpTarget(zeroBSR); // fortunately that's even right +        Comp_AddCycles_CI(RSCRATCH, add ? 2 : 1); +    } + +    static_assert(EAX == RSCRATCH); +    MOV(32, R(RSCRATCH), rm); +    if (add) +    { +        IMUL(32, RSCRATCH, rs); +        LEA(32, rd.GetSimpleReg(), MRegSum(RSCRATCH, rn.GetSimpleReg())); +        TEST(32, rd, rd); +    } +    else +    { +        IMUL(32, RSCRATCH, rs); +        MOV(32, rd, R(RSCRATCH)); +        TEST(32, R(RSCRATCH), R(RSCRATCH)); +    } + +    if (S) +        Comp_RetriveFlags(false, false, false); +} + +void Compiler::A_Comp_MUL_MLA() +{ +    bool S = CurInstr.Instr & (1 << 20); +    bool add = CurInstr.Instr & (1 << 21); +    OpArg rd = MapReg(CurInstr.A_Reg(16)); +    OpArg rm = MapReg(CurInstr.A_Reg(0)); +    OpArg rs = MapReg(CurInstr.A_Reg(8)); +    OpArg rn; +    if (add) +        rn = MapReg(CurInstr.A_Reg(12)); + +    Comp_MulOp(S, add, rd, rm, rs, rn); +} +  void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)  {      CPSRDirty = true; @@ -455,6 +522,13 @@ void Compiler::T_Comp_ALU_Imm8()      }  } +void Compiler::T_Comp_MUL() +{ +    OpArg rd = MapReg(CurInstr.T_Reg(0)); +    OpArg rs = MapReg(CurInstr.T_Reg(3)); +    Comp_MulOp(true, false, rd, rd, rs, Imm8(-1)); +} +  void Compiler::T_Comp_ALU()  {      OpArg rd = MapReg(CurInstr.T_Reg(0)); diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index fb2acba..bd01ffb 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -126,17 +126,14 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)  void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)  { -    BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFFFF0000); +    BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFF00);      bool previouslyDirty = CPSRDirty;      SaveCPSR();      if (restoreCPSR)      {          if (Thumb || CurInstr.Cond() >= 0xE) -        { -            for (int reg : hiRegsLoaded) -                RegCache.UnloadRegister(reg); -        } +            RegCache.Flush();          else          {              // the ugly way... diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 6799a90..8a895d1 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -26,10 +26,14 @@ const int RegisterCache<Compiler, X64Reg>::NativeRegsAvailable =  #endif  ; +int instructionPopularityARM[ARMInstrInfo::ak_Count]; +  Compiler::Compiler()  {      AllocCodeSpace(1024 * 1024 * 16); +    memset(instructionPopularityARM, 0, sizeof(instructionPopularityARM)); +      for (int i = 0; i < 3; i++)      {          for (int j = 0; j < 2; j++) @@ -47,7 +51,88 @@ Compiler::Compiler()              MemoryFuncsSeq7[i][j][1] = Gen_MemoryRoutineSeq7(i, j, true);          } -    ResetStart = GetWritableCodePtr(); +    { +        // RSCRATCH mode +        // ABI_PARAM2 reg number +        // ABI_PARAM3 value in current mode +        // ret - ABI_PARAM3 +        ReadBanked = (void*)GetWritableCodePtr(); +        CMP(32, R(RSCRATCH), Imm8(0x11)); +        FixupBranch fiq = J_CC(CC_E); +        SUB(32, R(ABI_PARAM2), Imm8(13 - 8)); +        FixupBranch notEverything = J_CC(CC_L); +        CMP(32, R(RSCRATCH), Imm8(0x12)); +        FixupBranch irq = J_CC(CC_E); +        CMP(32, R(RSCRATCH), Imm8(0x13)); +        FixupBranch svc = J_CC(CC_E); +        CMP(32, R(RSCRATCH), Imm8(0x17)); +        FixupBranch abt = J_CC(CC_E); +        CMP(32, R(RSCRATCH), Imm8(0x1B)); +        FixupBranch und = J_CC(CC_E); +        SetJumpTarget(notEverything); +        RET(); + +        SetJumpTarget(fiq); +        MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_FIQ))); +        RET(); +        SetJumpTarget(irq); +        MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_IRQ))); +        RET(); +        SetJumpTarget(svc); +        MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_SVC))); +        RET(); +        SetJumpTarget(abt); +        MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_ABT))); +        RET(); +        SetJumpTarget(und); +        MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_UND))); +        RET(); +        } +    { +        // RSCRATCH  mode +        // ABI_PARAM2 reg n +        // ABI_PARAM3 value +        // carry flag set if the register isn't banked +        WriteBanked = (void*)GetWritableCodePtr(); +        CMP(32, R(RSCRATCH), Imm8(0x11)); +        FixupBranch fiq = J_CC(CC_E); +        SUB(32, R(ABI_PARAM2), Imm8(13 - 8)); +        FixupBranch notEverything = J_CC(CC_L); +        CMP(32, R(RSCRATCH), Imm8(0x12)); +        FixupBranch irq = J_CC(CC_E); +        CMP(32, R(RSCRATCH), Imm8(0x13)); +        FixupBranch svc = J_CC(CC_E); +        CMP(32, R(RSCRATCH), Imm8(0x17)); +        FixupBranch abt = J_CC(CC_E); +        CMP(32, R(RSCRATCH), Imm8(0x1B)); +        FixupBranch und = J_CC(CC_E); +        SetJumpTarget(notEverything); +        STC(); +        RET(); + +        SetJumpTarget(fiq); +        MOV(32, MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_FIQ)), R(ABI_PARAM3)); +        CLC(); +        RET(); +        SetJumpTarget(irq); +        MOV(32, MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_IRQ)), R(ABI_PARAM3)); +        CLC(); +        RET(); +        SetJumpTarget(svc); +        MOV(32, MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_SVC)), R(ABI_PARAM3)); +        CLC(); +        RET(); +        SetJumpTarget(abt); +        MOV(32, MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_ABT)), R(ABI_PARAM3)); +        CLC(); +        RET(); +        SetJumpTarget(und); +        MOV(32, MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_UND)), R(ABI_PARAM3)); +        CLC(); +        RET(); +    } + +    ResetStart = (void*)GetWritableCodePtr();  }  void Compiler::LoadCPSR() @@ -136,6 +221,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs          CurInstr = instrs[i];          CompileFunc comp = GetCompFunc(CurInstr.Info.Kind); +         +        if (!Thumb) +            instructionPopularityARM[CurInstr.Info.Kind] += comp == NULL;          if (comp == NULL || i == instrsCount - 1)          { @@ -287,9 +375,9 @@ CompileFunc Compiler::GetCompFunc(int kind)          // CMN          A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp,          // Mul -        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +        A_Comp_MUL_MLA, A_Comp_MUL_MLA, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,          // ARMv5 stuff -        NULL, NULL, NULL, NULL, NULL, +        A_Comp_CLZ, NULL, NULL, NULL, NULL,          // STR          A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,          //NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -315,7 +403,7 @@ CompileFunc Compiler::GetCompFunc(int kind)          // swap          NULL, NULL,          // LDM/STM -        NULL, NULL, +        A_Comp_LDM_STM, A_Comp_LDM_STM,          // Branch          A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchXchangeReg, A_Comp_BranchXchangeReg,          // system stuff @@ -333,7 +421,7 @@ CompileFunc Compiler::GetCompFunc(int kind)          T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,          T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,          T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, -        T_Comp_ALU, NULL, T_Comp_ALU, T_Comp_ALU, +        T_Comp_ALU, T_Comp_MUL, T_Comp_ALU, T_Comp_ALU,          // hi reg          T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg,          // pc/sp relative @@ -387,4 +475,14 @@ void Compiler::Comp_AddCycles_CI(u32 i)          ConstantCycles += cycles;  } +void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add) +{ +    s32 cycles = Num ? +        NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] +        : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); +     +    LEA(32, RSCRATCH, MDisp(i, add + cycles)); +    ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); +} +  }
\ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index 45b488a..89dfe28 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -40,6 +40,7 @@ private:      void Comp_AddCycles_C(bool forceNonConstant = false);      void Comp_AddCycles_CI(u32 i); +    void Comp_AddCycles_CI(Gen::X64Reg i, int add);      enum      { @@ -55,6 +56,10 @@ private:      void A_Comp_MovOp();      void A_Comp_CmpOp(); +    void A_Comp_MUL_MLA(); + +    void A_Comp_CLZ(); +          void A_Comp_MemWB();      void A_Comp_MemHalf();      void A_Comp_LDM_STM(); @@ -62,11 +67,13 @@ private:      void A_Comp_BranchImm();      void A_Comp_BranchXchangeReg(); +      void T_Comp_ShiftImm();      void T_Comp_AddSub_();      void T_Comp_ALU_Imm8();      void T_Comp_ALU();      void T_Comp_ALU_HiReg(); +    void T_Comp_MUL();      void T_Comp_RelAddr();      void T_Comp_AddSP(); @@ -88,7 +95,7 @@ private:      void T_Comp_BL_Merged(FetchedInstr prefix);      void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size); -    s32 Comp_MemAccessBlock(Gen::OpArg rb, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode); +    s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);      void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),           Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags); @@ -96,6 +103,8 @@ private:          Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);      void Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed); +    void Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn); +      void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);      void* Gen_MemoryRoutine9(bool store, int size); @@ -133,6 +142,9 @@ private:      void* MemoryFuncsSeq9[2][2];      void* MemoryFuncsSeq7[2][2][2]; +    void* ReadBanked; +    void* WriteBanked; +      bool CPSRDirty = false;      FetchedInstr CurInstr; diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index 69b324c..8fbcafd 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -1,7 +1,5 @@  #include "ARMJIT_Compiler.h" -#include "../GPU.h" -#include "../Wifi.h"  using namespace Gen; @@ -362,7 +360,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)      CMP(32, R(ABI_PARAM3), Imm8(1));      FixupBranch skipSequential = J_CC(CC_E);      SUB(32, R(ABI_PARAM3), Imm8(1)); -    IMUL(32, R(ABI_PARAM3)); +    IMUL(32, RSCRATCH, R(ABI_PARAM3));      ADD(32, R(ABI_PARAM2), R(RSCRATCH));      SetJumpTarget(skipSequential); @@ -413,10 +411,11 @@ void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM)      POP(ABI_PARAM4);      POP(ABI_PARAM3); +    // TODO: optimise this      CMP(32, R(ABI_PARAM3), Imm8(1));      FixupBranch skipSequential = J_CC(CC_E);      SUB(32, R(ABI_PARAM3), Imm8(1)); -    IMUL(32, R(ABI_PARAM3)); +    IMUL(32, RSCRATCH, R(ABI_PARAM3));      ADD(32, R(ABI_PARAM2), R(RSCRATCH));      SetJumpTarget(skipSequential); @@ -458,25 +457,35 @@ void Compiler::Comp_MemAccess(OpArg rd, bool signExtend, bool store, int size)      }  } -s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) +void printStuff2(u32 a, u32 b)  { +    printf("b %x %x\n", a, b); +} + +s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) +{ +    FILE* f; +    const u8* start = GetCodePtr(); +      int regsCount = regs.Count();      if (decrement)      { -        MOV_sum(32, ABI_PARAM1, rb, Imm32(-regsCount * 4)); +        MOV_sum(32, ABI_PARAM1, MapReg(rn), Imm32(-regsCount * 4));          preinc ^= true;      }      else -        MOV(32, R(ABI_PARAM1), rb); +        MOV(32, R(ABI_PARAM1), MapReg(rn)); + +    s32 offset = (regsCount * 4) * (decrement ? -1 : 1); -    MOV(32, R(ABI_PARAM3), Imm32(regsCount)); -        u32 cycles = Num +    u32 cycles = Num              ? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]              : (R15 & 0x2 ? 0 : CurInstr.CodeCycles);      MOV(32, R(ABI_PARAM4), Imm32(cycles));      if (!store)      { +        MOV(32, R(ABI_PARAM3), Imm32(regsCount));          SUB(32, R(RSP), regsCount < 16 ? Imm8(regsCount * 8) : Imm32(regsCount * 8));          MOV(64, R(ABI_PARAM2), R(RSP)); @@ -484,20 +493,29 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei              ? MemoryFuncsSeq9[0][preinc]              : MemoryFuncsSeq7[0][preinc][CodeRegion == 0x02]); +        bool firstUserMode = true;          for (int reg = 15; reg >= 0; reg--)          {              if (regs[reg])              { -                /*if (usermode && reg >= 8 && reg < 15) +                if (usermode && reg >= 8 && reg < 15)                  { -                    MOV(32, R(RSCRATCH2), R(RCPSR)); -                    AND(32, R(RSCRATCH2), Imm8(0x1F)); -                    // (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great! -                    MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8))); -                    POP(RSCRATCH); -                    MOV(32, MRegSum(RCPU, RSCRATCH2), R(RSCRATCH)); +                    if (firstUserMode) +                    { +                        MOV(32, R(RSCRATCH), R(RCPSR)); +                        AND(32, R(RSCRATCH), Imm8(0x1F)); +                        firstUserMode = false; +                    } +                    MOV(32, R(ABI_PARAM2), Imm32(reg - 8)); +                    POP(ABI_PARAM3); +                    CALL(WriteBanked); +                    FixupBranch sucessfulWritten = J_CC(CC_NC); +                    if (RegCache.Mapping[reg] != INVALID_REG && RegCache.DirtyRegs & (1 << reg)) +                        MOV(32, R(RegCache.Mapping[reg]), R(ABI_PARAM3)); +                    SaveReg(reg, ABI_PARAM3); +                    SetJumpTarget(sucessfulWritten);                  } -                else */if (RegCache.Mapping[reg] == INVALID_REG) +                else if (RegCache.Mapping[reg] == INVALID_REG)                  {                      assert(reg != 15); @@ -516,32 +534,48 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei          if (regs[15])          {              if (Num == 1) -                OR(32, MapReg(15), Imm8(1)); +            { +                if (Thumb) +                    OR(32, MapReg(15), Imm8(1)); +                else +                    AND(32, MapReg(15), Imm8(0xFE)); +            }              Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);          }      }      else      { +        bool firstUserMode = true;          for (int reg : regs)          { -            /*if (usermode && reg >= 8 && reg < 15) +            if (usermode && reg >= 8 && reg < 15)              { -                MOV(32, R(RSCRATCH), R(RCPSR)); -                AND(32, R(RSCRATCH), Imm8(0x1F)); -                // (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great! -                MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8))); -                MOV(32, R(RSCRATCH), MRegSum(RCPU, RSCRATCH)); -                PUSH(RSCRATCH); +                if (firstUserMode) +                { +                    MOV(32, R(RSCRATCH), R(RCPSR)); +                    AND(32, R(RSCRATCH), Imm8(0x1F)); +                    firstUserMode = false; +                } +                if (RegCache.Mapping[reg] == INVALID_REG) +                    LoadReg(reg, ABI_PARAM3); +                else +                    MOV(32, R(ABI_PARAM3), R(RegCache.Mapping[reg])); +                MOV(32, R(ABI_PARAM2), Imm32(reg - 8)); +                CALL(ReadBanked); +                PUSH(ABI_PARAM3);              } -            else */if (RegCache.Mapping[reg] == INVALID_REG) +            else if (RegCache.Mapping[reg] == INVALID_REG)              {                  LoadReg(reg, RSCRATCH);                  PUSH(RSCRATCH);              }              else +            {                  PUSH(MapReg(reg).GetSimpleReg()); +            }          }          MOV(64, R(ABI_PARAM2), R(RSP)); +        MOV(32, R(ABI_PARAM3), Imm32(regsCount));          CALL(Num == 0              ? MemoryFuncsSeq9[1][preinc] @@ -550,7 +584,14 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei          ADD(32, R(RSP), regsCount < 16 ? Imm8(regsCount * 8) : Imm32(regsCount * 8));      } -    return (regsCount * 4) * (decrement ? -1 : 1); +    if (usermode && !store) +    { +        f= fopen("ldm", "a"); +        fwrite(start, GetCodePtr() - start, 1, f); +        fclose(f); +    } + +    return offset;  }  OpArg Compiler::A_Comp_GetMemWBOffset() @@ -697,16 +738,20 @@ void Compiler::A_Comp_LDM_STM()  {      BitSet16 regs(CurInstr.Instr & 0xFFFF); -    bool load = (CurInstr.Instr >> 20) & 1; -    bool pre = (CurInstr.Instr >> 24) & 1; -    bool add = (CurInstr.Instr >> 23) & 1; -    bool writeback = (CurInstr.Instr >> 21) & 1; -    bool usermode = (CurInstr.Instr >> 22) & 1; +    bool load = CurInstr.Instr & (1 << 20); +    bool pre = CurInstr.Instr & (1 << 24); +    bool add = CurInstr.Instr & (1 << 23); +    bool writeback = CurInstr.Instr & (1 << 21); +    bool usermode = CurInstr.Instr & (1 << 22);      OpArg rn = MapReg(CurInstr.A_Reg(16)); -    s32 offset = Comp_MemAccessBlock(rn, regs, !load, pre, !add, false); +    s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode); +    if (load && writeback && regs[CurInstr.A_Reg(16)]) +        writeback = Num == 0 +            ? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)) +            : false;      if (writeback)          ADD(32, rn, offset >= INT8_MIN && offset < INT8_MAX ? Imm8(offset) : Imm32(offset));  } @@ -789,8 +834,7 @@ void Compiler::T_Comp_PUSH_POP()      }      OpArg sp = MapReg(13); -     -    s32 offset = Comp_MemAccessBlock(sp, regs, !load, !load, !load, false); +    s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false);      ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max  } @@ -801,7 +845,7 @@ void Compiler::T_Comp_LDMIA_STMIA()      OpArg rb = MapReg(CurInstr.T_Reg(8));      bool load = CurInstr.Instr & (1 << 11); -    s32 offset = Comp_MemAccessBlock(rb, regs, !load, false, false, false); +    s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false);      if (!load || !regs[CurInstr.T_Reg(8)])          ADD(32, rb, Imm8(offset)); |