diff options
Diffstat (limited to 'src/ARMJIT_A64')
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_ALU.cpp | 13 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Branch.cpp | 10 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Compiler.cpp | 134 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Compiler.h | 6 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Linkage.s | 6 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 91 |
6 files changed, 165 insertions, 95 deletions
diff --git a/src/ARMJIT_A64/ARMJIT_ALU.cpp b/src/ARMJIT_A64/ARMJIT_ALU.cpp index 5f021a0..26a89cb 100644 --- a/src/ARMJIT_A64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_A64/ARMJIT_ALU.cpp @@ -434,6 +434,19 @@ void Compiler::A_Comp_GetOp2(bool S, Op2& op2) if (CurInstr.Instr & (1 << 25)) { Comp_AddCycles_C(); + + u32 shift = (CurInstr.Instr >> 7) & 0x1E; + u32 imm = ROR(CurInstr.Instr & 0xFF, shift); + + if (S && shift && (CurInstr.SetFlags & 0x2)) + { + CPSRDirty = true; + if (imm & 0x80000000) + ORRI2R(RCPSR, RCPSR, 1 << 29); + else + ANDI2R(RCPSR, RCPSR, ~(1 << 29)); + } + op2 = Op2(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E)); } else diff --git a/src/ARMJIT_A64/ARMJIT_Branch.cpp b/src/ARMJIT_A64/ARMJIT_Branch.cpp index f130938..117eaa0 100644 --- a/src/ARMJIT_A64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_A64/ARMJIT_Branch.cpp @@ -143,7 +143,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } @@ -181,7 +181,7 @@ void* Compiler::Gen_JumpTo9(int kind) STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15])); ADD(W1, W1, W1); - SUB(RCycles, RCycles, W1); + ADD(RCycles, RCycles, W1); RET(); } @@ -201,7 +201,7 @@ void* Compiler::Gen_JumpTo9(int kind) ADD(W2, W1, W1); TSTI2R(W0, 0x2); CSEL(W1, W1, W2, CC_EQ); - SUB(RCycles, RCycles, W1); + ADD(RCycles, RCycles, W1); RET(); } @@ -229,7 +229,7 @@ void* Compiler::Gen_JumpTo7(int kind) UBFX(W2, W3, 0, 8); UBFX(W3, W3, 8, 8); ADD(W2, W3, W2); - SUB(RCycles, RCycles, W2); + ADD(RCycles, RCycles, W2); ANDI2R(W0, W0, ~3); @@ -253,7 +253,7 @@ void* Compiler::Gen_JumpTo7(int kind) UBFX(W2, W3, 16, 8); UBFX(W3, W3, 24, 8); ADD(W2, W3, W2); - SUB(RCycles, RCycles, W2); + ADD(RCycles, RCycles, W2); ANDI2R(W0, W0, ~1); diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp index 62323ff..b046123 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp @@ -312,59 +312,93 @@ Compiler::Compiler() RET(); } - for (int num = 0; num < 2; num++) + for (int consoleType = 0; consoleType < 2; consoleType++) { - for (int size = 0; size < 3; size++) + for (int num = 0; num < 2; num++) { - for (int reg = 0; reg < 8; reg++) + for (int size = 0; size < 3; size++) { - ARM64Reg rdMapped = (ARM64Reg)(W19 + reg); - PatchedStoreFuncs[num][size][reg] = GetRXPtr(); - if (num == 0) + for (int reg = 0; reg < 8; reg++) { - MOV(X1, RCPU); - MOV(W2, rdMapped); - } - else - { - MOV(W1, rdMapped); - } - ABI_PushRegisters({30}); - switch ((8 << size) | num) - { - case 32: QuickCallFunction(X3, SlowWrite9<u32>); break; - case 33: QuickCallFunction(X3, SlowWrite7<u32>); break; - case 16: QuickCallFunction(X3, SlowWrite9<u16>); break; - case 17: QuickCallFunction(X3, SlowWrite7<u16>); break; - case 8: QuickCallFunction(X3, SlowWrite9<u8>); break; - case 9: QuickCallFunction(X3, SlowWrite7<u8>); break; - } - ABI_PopRegisters({30}); - RET(); - - for (int signextend = 0; signextend < 2; signextend++) - { - PatchedLoadFuncs[num][size][signextend][reg] = GetRXPtr(); + ARM64Reg rdMapped = (ARM64Reg)(W19 + reg); + PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr(); if (num == 0) + { MOV(X1, RCPU); + MOV(W2, rdMapped); + } + else + { + MOV(W1, rdMapped); + } ABI_PushRegisters({30}); - switch ((8 << size) | num) + if (consoleType == 0) { - case 32: QuickCallFunction(X3, SlowRead9<u32>); break; - case 33: QuickCallFunction(X3, SlowRead7<u32>); break; - case 16: QuickCallFunction(X3, SlowRead9<u16>); break; - case 17: QuickCallFunction(X3, SlowRead7<u16>); break; - case 8: QuickCallFunction(X3, SlowRead9<u8>); break; - case 9: QuickCallFunction(X3, SlowRead7<u8>); break; + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowWrite9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowWrite7<u32, 0>); break; + case 16: QuickCallFunction(X3, SlowWrite9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowWrite7<u16, 0>); break; + case 8: QuickCallFunction(X3, SlowWrite9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowWrite7<u8, 0>); break; + } } - ABI_PopRegisters({30}); - if (size == 32) - MOV(rdMapped, W0); - else if (signextend) - SBFX(rdMapped, W0, 0, 8 << size); else - UBFX(rdMapped, W0, 0, 8 << size); + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowWrite9<u32, 1>); break; + case 33: QuickCallFunction(X3, SlowWrite7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowWrite9<u16, 1>); break; + case 17: QuickCallFunction(X3, SlowWrite7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowWrite9<u8, 1>); break; + case 9: QuickCallFunction(X3, SlowWrite7<u8, 1>); break; + } + } + + ABI_PopRegisters({30}); RET(); + + for (int signextend = 0; signextend < 2; signextend++) + { + PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr(); + if (num == 0) + MOV(X1, RCPU); + ABI_PushRegisters({30}); + if (consoleType == 0) + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowRead9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowRead7<u32, 0>); break; + case 16: QuickCallFunction(X3, SlowRead9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowRead7<u16, 0>); break; + case 8: QuickCallFunction(X3, SlowRead9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowRead7<u8, 0>); break; + } + } + else + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowRead9<u32, 1>); break; + case 33: QuickCallFunction(X3, SlowRead7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowRead9<u16, 1>); break; + case 17: QuickCallFunction(X3, SlowRead7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowRead9<u8, 1>); break; + case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break; + } + } + ABI_PopRegisters({30}); + if (size == 32) + MOV(rdMapped, W0); + else if (signextend) + SBFX(rdMapped, W0, 0, 8 << size); + else + UBFX(rdMapped, W0, 0, 8 << size); + RET(); + } } } } @@ -595,7 +629,7 @@ void Compiler::Comp_BranchSpecialBehaviour(bool taken) { RegCache.PrepareExit(); - SUB(RCycles, RCycles, ConstantCycles); + ADD(RCycles, RCycles, ConstantCycles); QuickTailCall(X0, ARM_Ret); } } @@ -736,7 +770,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] RegCache.Flush(); - SUB(RCycles, RCycles, ConstantCycles); + ADD(RCycles, RCycles, ConstantCycles); QuickTailCall(X0, ARM_Ret); FlushIcache(); @@ -766,7 +800,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant) if (forceNonConstant) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CI(u32 numI) @@ -780,7 +814,7 @@ void Compiler::Comp_AddCycles_CI(u32 numI) if (Thumb || CurInstr.Cond() == 0xE) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift) @@ -791,11 +825,11 @@ void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift) NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c; - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); if (Thumb || CurInstr.Cond() >= 0xE) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CDI() @@ -832,7 +866,7 @@ void Compiler::Comp_AddCycles_CDI() } if (!Thumb && CurInstr.Cond() < 0xE) - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); else ConstantCycles += cycles; } @@ -876,7 +910,7 @@ void Compiler::Comp_AddCycles_CD() } if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles) - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); else ConstantCycles += cycles; } diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h index e4ffc63..0e7d54c 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.h +++ b/src/ARMJIT_A64/ARMJIT_Compiler.h @@ -247,9 +247,9 @@ public: std::unordered_map<ptrdiff_t, LoadStorePatch> LoadStorePatches; - // [Num][Size][Sign Extend][Output register] - void* PatchedLoadFuncs[2][3][2][8]; - void* PatchedStoreFuncs[2][3][8]; + // [Console Type][Num][Size][Sign Extend][Output register] + void* PatchedLoadFuncs[2][2][3][2][8]; + void* PatchedStoreFuncs[2][2][3][8]; RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache; diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.s b/src/ARMJIT_A64/ARMJIT_Linkage.s index 536a478..7886315 100644 --- a/src/ARMJIT_A64/ARMJIT_Linkage.s +++ b/src/ARMJIT_A64/ARMJIT_Linkage.s @@ -2,9 +2,9 @@ .text -#define RCPSR W27 -#define RCycles W28 -#define RCPU X29 +#define RCPSR w27 +#define RCycles w28 +#define RCPU x29 .p2align 4,,15 diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index bdd9f43..6140ffc 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -174,8 +174,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) LoadStorePatch patch; patch.PatchFunc = flags & memop_Store - ? PatchedStoreFuncs[Num][__builtin_ctz(size) - 3][rdMapped - W19] - : PatchedLoadFuncs[Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19]; + ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped - W19] + : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19]; assert(rdMapped - W19 >= 0 && rdMapped - W19 < 8); MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); @@ -241,20 +241,26 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) if (flags & memop_Store) { MOV(W2, rdMapped); - switch (size) + switch (size | NDS::ConsoleType) { - case 32: QuickCallFunction(X3, SlowWrite9<u32>); break; - case 16: QuickCallFunction(X3, SlowWrite9<u16>); break; - case 8: QuickCallFunction(X3, SlowWrite9<u8>); break; + case 32: QuickCallFunction(X3, SlowWrite9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowWrite9<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowWrite9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowWrite9<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowWrite9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowWrite9<u8, 1>); break; } } else { - switch (size) + switch (size | NDS::ConsoleType) { - case 32: QuickCallFunction(X3, SlowRead9<u32>); break; - case 16: QuickCallFunction(X3, SlowRead9<u16>); break; - case 8: QuickCallFunction(X3, SlowRead9<u8>); break; + case 32: QuickCallFunction(X3, SlowRead9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowRead9<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowRead9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowRead9<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowRead9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowRead9<u8, 1>); break; } } } @@ -263,20 +269,26 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) if (flags & memop_Store) { MOV(W1, rdMapped); - switch (size) + switch (size | NDS::ConsoleType) { - case 32: QuickCallFunction(X3, SlowWrite7<u32>); break; - case 16: QuickCallFunction(X3, SlowWrite7<u16>); break; - case 8: QuickCallFunction(X3, SlowWrite7<u8>); break; + case 32: QuickCallFunction(X3, SlowWrite7<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowWrite7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowWrite7<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowWrite7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowWrite7<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowWrite7<u8, 1>); break; } } else { - switch (size) + switch (size | NDS::ConsoleType) { - case 32: QuickCallFunction(X3, SlowRead7<u32>); break; - case 16: QuickCallFunction(X3, SlowRead7<u16>); break; - case 8: QuickCallFunction(X3, SlowRead7<u8>); break; + case 32: QuickCallFunction(X3, SlowRead7<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowRead7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowRead7<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowRead7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowRead7<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break; } } } @@ -465,15 +477,25 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (decrement) { - SUB(W0, MapReg(rn), regsCount * 4); - ANDI2R(W0, W0, ~3); - preinc ^= true; + s32 offset = -regsCount * 4 + (preinc ? 0 : 4); + if (offset) + { + ADDI2R(W0, MapReg(rn), offset); + ANDI2R(W0, W0, ~3); + } + else + { + ANDI2R(W0, MapReg(rn), ~3); + } } else { ANDI2R(W0, MapReg(rn), ~3); + if (preinc) + ADD(W0, W0, 4); } + u8* patchFunc; if (compileFastPath) { ptrdiff_t fastPathStart = GetCodeOffset(); @@ -482,7 +504,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); ADD(X1, X1, X0); - u32 offset = preinc ? 4 : 0; + u32 offset = 0; BitSet16::Iterator it = regs.begin(); u32 i = 0; @@ -545,7 +567,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc LoadStorePatch patch; patch.PatchSize = GetCodeOffset() - fastPathStart; SwapCodeRegion(); - patch.PatchFunc = GetRXPtr(); + patchFunc = (u8*)GetRXPtr(); + patch.PatchFunc = patchFunc; for (i = 0; i < regsCount; i++) { patch.PatchOffset = fastPathStart - loadStoreOffsets[i]; @@ -620,22 +643,22 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (Num == 0) { MOV(X3, RCPU); - switch (preinc * 2 | store) + switch ((u32)store * 2 | NDS::ConsoleType) { - case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, false>); break; - case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, true>); break; - case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, false>); break; - case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, true>); break; + case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, 0>); break; + case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, 1>); break; + case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, 0>); break; + case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, 1>); break; } } else { - switch (preinc * 2 | store) + switch ((u32)store * 2 | NDS::ConsoleType) { - case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, false>); break; - case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, true>); break; - case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, false>); break; - case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, true>); break; + case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, 0>); break; + case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, 1>); break; + case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, 0>); break; + case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, 1>); break; } } @@ -705,7 +728,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc ABI_PopRegisters({30}); RET(); - FlushIcacheSection((u8*)patch.PatchFunc, (u8*)GetRXPtr()); + FlushIcacheSection(patchFunc, (u8*)GetRXPtr()); SwapCodeRegion(); } |