From 961b4252e21e217878ef0cf36bee18a3784add84 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 23 Jul 2020 19:07:33 +0000 Subject: Make it buildable on aarch64 --- src/ARMJIT_A64/ARMJIT_Compiler.cpp | 118 +++++++++++++++++++++++------------- src/ARMJIT_A64/ARMJIT_Compiler.h | 6 +- src/ARMJIT_A64/ARMJIT_Linkage.s | 6 +- src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 65 +++++++++++++------- 4 files changed, 126 insertions(+), 69 deletions(-) (limited to 'src/ARMJIT_A64') diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp index 62323ff..413c673 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp @@ -312,59 +312,93 @@ Compiler::Compiler() RET(); } - for (int num = 0; num < 2; num++) + for (int consoleType = 0; consoleType < 2; consoleType++) { - for (int size = 0; size < 3; size++) + for (int num = 0; num < 2; num++) { - for (int reg = 0; reg < 8; reg++) + for (int size = 0; size < 3; size++) { - ARM64Reg rdMapped = (ARM64Reg)(W19 + reg); - PatchedStoreFuncs[num][size][reg] = GetRXPtr(); - if (num == 0) + for (int reg = 0; reg < 8; reg++) { - MOV(X1, RCPU); - MOV(W2, rdMapped); - } - else - { - MOV(W1, rdMapped); - } - ABI_PushRegisters({30}); - switch ((8 << size) | num) - { - case 32: QuickCallFunction(X3, SlowWrite9); break; - case 33: QuickCallFunction(X3, SlowWrite7); break; - case 16: QuickCallFunction(X3, SlowWrite9); break; - case 17: QuickCallFunction(X3, SlowWrite7); break; - case 8: QuickCallFunction(X3, SlowWrite9); break; - case 9: QuickCallFunction(X3, SlowWrite7); break; - } - ABI_PopRegisters({30}); - RET(); - - for (int signextend = 0; signextend < 2; signextend++) - { - PatchedLoadFuncs[num][size][signextend][reg] = GetRXPtr(); + ARM64Reg rdMapped = (ARM64Reg)(W19 + reg); + PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr(); if (num == 0) + { MOV(X1, RCPU); + MOV(W2, rdMapped); + } + else + { + MOV(W1, rdMapped); + } ABI_PushRegisters({30}); - switch ((8 << size) | num) + if (consoleType == 0) { - case 32: QuickCallFunction(X3, SlowRead9); break; - case 33: QuickCallFunction(X3, SlowRead7); break; - case 16: QuickCallFunction(X3, SlowRead9); break; - case 17: QuickCallFunction(X3, SlowRead7); break; - case 8: QuickCallFunction(X3, SlowRead9); break; - case 9: QuickCallFunction(X3, SlowRead7); break; + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowWrite9); break; + case 33: QuickCallFunction(X3, SlowWrite7); break; + case 16: QuickCallFunction(X3, SlowWrite9); break; + case 17: QuickCallFunction(X3, SlowWrite7); break; + case 8: QuickCallFunction(X3, SlowWrite9); break; + case 9: QuickCallFunction(X3, SlowWrite7); break; + } } - ABI_PopRegisters({30}); - if (size == 32) - MOV(rdMapped, W0); - else if (signextend) - SBFX(rdMapped, W0, 0, 8 << size); else - UBFX(rdMapped, W0, 0, 8 << size); + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowWrite9); break; + case 33: QuickCallFunction(X3, SlowWrite7); break; + case 16: QuickCallFunction(X3, SlowWrite9); break; + case 17: QuickCallFunction(X3, SlowWrite7); break; + case 8: QuickCallFunction(X3, SlowWrite9); break; + case 9: QuickCallFunction(X3, SlowWrite7); break; + } + } + + ABI_PopRegisters({30}); RET(); + + for (int signextend = 0; signextend < 2; signextend++) + { + PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr(); + if (num == 0) + MOV(X1, RCPU); + ABI_PushRegisters({30}); + if (consoleType == 0) + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowRead9); break; + case 33: QuickCallFunction(X3, SlowRead7); break; + case 16: QuickCallFunction(X3, SlowRead9); break; + case 17: QuickCallFunction(X3, SlowRead7); break; + case 8: QuickCallFunction(X3, SlowRead9); break; + case 9: QuickCallFunction(X3, SlowRead7); break; + } + } + else + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowRead9); break; + case 33: QuickCallFunction(X3, SlowRead7); break; + case 16: QuickCallFunction(X3, SlowRead9); break; + case 17: QuickCallFunction(X3, SlowRead7); break; + case 8: QuickCallFunction(X3, SlowRead9); break; + case 9: QuickCallFunction(X3, SlowRead7); break; + } + } + ABI_PopRegisters({30}); + if (size == 32) + MOV(rdMapped, W0); + else if (signextend) + SBFX(rdMapped, W0, 0, 8 << size); + else + UBFX(rdMapped, W0, 0, 8 << size); + RET(); + } } } } diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h index e4ffc63..0e7d54c 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.h +++ b/src/ARMJIT_A64/ARMJIT_Compiler.h @@ -247,9 +247,9 @@ public: std::unordered_map LoadStorePatches; - // [Num][Size][Sign Extend][Output register] - void* PatchedLoadFuncs[2][3][2][8]; - void* PatchedStoreFuncs[2][3][8]; + // [Console Type][Num][Size][Sign Extend][Output register] + void* PatchedLoadFuncs[2][2][3][2][8]; + void* PatchedStoreFuncs[2][2][3][8]; RegisterCache RegCache; diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.s b/src/ARMJIT_A64/ARMJIT_Linkage.s index 536a478..7886315 100644 --- a/src/ARMJIT_A64/ARMJIT_Linkage.s +++ b/src/ARMJIT_A64/ARMJIT_Linkage.s @@ -2,9 +2,9 @@ .text -#define RCPSR W27 -#define RCycles W28 -#define RCPU X29 +#define RCPSR w27 +#define RCycles w28 +#define RCPU x29 .p2align 4,,15 diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index bdd9f43..6aad0eb 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -174,8 +174,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) LoadStorePatch patch; patch.PatchFunc = flags & memop_Store - ? PatchedStoreFuncs[Num][__builtin_ctz(size) - 3][rdMapped - W19] - : PatchedLoadFuncs[Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19]; + ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped - W19] + : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19]; assert(rdMapped - W19 >= 0 && rdMapped - W19 < 8); MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); @@ -241,20 +241,26 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) if (flags & memop_Store) { MOV(W2, rdMapped); - switch (size) + switch (size | NDS::ConsoleType) { - case 32: QuickCallFunction(X3, SlowWrite9); break; - case 16: QuickCallFunction(X3, SlowWrite9); break; - case 8: QuickCallFunction(X3, SlowWrite9); break; + case 32: QuickCallFunction(X3, SlowWrite9); break; + case 33: QuickCallFunction(X3, SlowWrite9); break; + case 16: QuickCallFunction(X3, SlowWrite9); break; + case 17: QuickCallFunction(X3, SlowWrite9); break; + case 8: QuickCallFunction(X3, SlowWrite9); break; + case 9: QuickCallFunction(X3, SlowWrite9); break; } } else { switch (size) { - case 32: QuickCallFunction(X3, SlowRead9); break; - case 16: QuickCallFunction(X3, SlowRead9); break; - case 8: QuickCallFunction(X3, SlowRead9); break; + case 32: QuickCallFunction(X3, SlowRead9); break; + case 33: QuickCallFunction(X3, SlowRead9); break; + case 16: QuickCallFunction(X3, SlowRead9); break; + case 17: QuickCallFunction(X3, SlowRead9); break; + case 8: QuickCallFunction(X3, SlowRead9); break; + case 9: QuickCallFunction(X3, SlowRead9); break; } } } @@ -265,18 +271,24 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) MOV(W1, rdMapped); switch (size) { - case 32: QuickCallFunction(X3, SlowWrite7); break; - case 16: QuickCallFunction(X3, SlowWrite7); break; - case 8: QuickCallFunction(X3, SlowWrite7); break; + case 32: QuickCallFunction(X3, SlowWrite7); break; + case 33: QuickCallFunction(X3, SlowWrite7); break; + case 16: QuickCallFunction(X3, SlowWrite7); break; + case 17: QuickCallFunction(X3, SlowWrite7); break; + case 8: QuickCallFunction(X3, SlowWrite7); break; + case 9: QuickCallFunction(X3, SlowWrite7); break; } } else { switch (size) { - case 32: QuickCallFunction(X3, SlowRead7); break; - case 16: QuickCallFunction(X3, SlowRead7); break; - case 8: QuickCallFunction(X3, SlowRead7); break; + case 32: QuickCallFunction(X3, SlowRead7); break; + case 33: QuickCallFunction(X3, SlowRead7); break; + case 16: QuickCallFunction(X3, SlowRead7); break; + case 17: QuickCallFunction(X3, SlowRead7); break; + case 8: QuickCallFunction(X3, SlowRead7); break; + case 9: QuickCallFunction(X3, SlowRead7); break; } } } @@ -465,15 +477,25 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (decrement) { - SUB(W0, MapReg(rn), regsCount * 4); - ANDI2R(W0, W0, ~3); - preinc ^= true; + s32 offset = -regsCount * 4 + (preinc ? 0 : 4); + if (offset) + { + ADDI2R(W0, MapReg(rn), offset); + ANDI2R(W0, W0, ~3); + } + else + { + ANDI2R(W0, MapReg(rn), ~3); + } } else { ANDI2R(W0, MapReg(rn), ~3); + if (preinc) + ADD(W0, W0, 4); } + u8* patchFunc; if (compileFastPath) { ptrdiff_t fastPathStart = GetCodeOffset(); @@ -482,7 +504,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); ADD(X1, X1, X0); - u32 offset = preinc ? 4 : 0; + u32 offset = 0; BitSet16::Iterator it = regs.begin(); u32 i = 0; @@ -545,7 +567,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc LoadStorePatch patch; patch.PatchSize = GetCodeOffset() - fastPathStart; SwapCodeRegion(); - patch.PatchFunc = GetRXPtr(); + patchFunc = (u8*)GetRXPtr(); + patch.PatchFunc = patchFunc; for (i = 0; i < regsCount; i++) { patch.PatchOffset = fastPathStart - loadStoreOffsets[i]; @@ -705,7 +728,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc ABI_PopRegisters({30}); RET(); - FlushIcacheSection((u8*)patch.PatchFunc, (u8*)GetRXPtr()); + FlushIcacheSection(patchFunc, (u8*)GetRXPtr()); SwapCodeRegion(); } -- cgit v1.2.3 From e85d2e2cf3439a79305f7132fbcd9774385e9020 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 23 Jul 2020 19:12:25 +0000 Subject: Use the correct slow path for block read/write --- src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/ARMJIT_A64') diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index 6aad0eb..0bddf74 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -643,22 +643,22 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (Num == 0) { MOV(X3, RCPU); - switch (preinc * 2 | store) + switch (store * 2 | NDS::ConsoleType) { - case 0: QuickCallFunction(X4, SlowBlockTransfer9); break; - case 1: QuickCallFunction(X4, SlowBlockTransfer9); break; - case 2: QuickCallFunction(X4, SlowBlockTransfer9); break; - case 3: QuickCallFunction(X4, SlowBlockTransfer9); break; + case 0: QuickCallFunction(X4, SlowBlockTransfer9); break; + case 1: QuickCallFunction(X4, SlowBlockTransfer9); break; + case 2: QuickCallFunction(X4, SlowBlockTransfer9); break; + case 3: QuickCallFunction(X4, SlowBlockTransfer9); break; } } else { - switch (preinc * 2 | store) + switch (store * 2 | NDS::ConsoleType) { - case 0: QuickCallFunction(X4, SlowBlockTransfer7); break; - case 1: QuickCallFunction(X4, SlowBlockTransfer7); break; - case 2: QuickCallFunction(X4, SlowBlockTransfer7); break; - case 3: QuickCallFunction(X4, SlowBlockTransfer7); break; + case 0: QuickCallFunction(X4, SlowBlockTransfer7); break; + case 1: QuickCallFunction(X4, SlowBlockTransfer7); break; + case 2: QuickCallFunction(X4, SlowBlockTransfer7); break; + case 3: QuickCallFunction(X4, SlowBlockTransfer7); break; } } -- cgit v1.2.3 From f5130f82eb75376b6bd508b01a50f6d79669a20b Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 23 Jul 2020 19:56:09 +0000 Subject: Arisotura isn't the only derp --- src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/ARMJIT_A64') diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index 0bddf74..c965d80 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -253,7 +253,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) } else { - switch (size) + switch (size | NDS::ConsoleType) { case 32: QuickCallFunction(X3, SlowRead9); break; case 33: QuickCallFunction(X3, SlowRead9); break; @@ -269,7 +269,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) if (flags & memop_Store) { MOV(W1, rdMapped); - switch (size) + switch (size | NDS::ConsoleType) { case 32: QuickCallFunction(X3, SlowWrite7); break; case 33: QuickCallFunction(X3, SlowWrite7); break; @@ -281,7 +281,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) } else { - switch (size) + switch (size | NDS::ConsoleType) { case 32: QuickCallFunction(X3, SlowRead7); break; case 33: QuickCallFunction(X3, SlowRead7); break; -- cgit v1.2.3 From 116d831cfd1b46afd436a4026a046e38dfd4ada6 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 23 Jul 2020 20:06:44 +0000 Subject: Fix 16-bit DSi ARM9 read --- src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/ARMJIT_A64') diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index c965d80..6140ffc 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -258,7 +258,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) case 32: QuickCallFunction(X3, SlowRead9); break; case 33: QuickCallFunction(X3, SlowRead9); break; case 16: QuickCallFunction(X3, SlowRead9); break; - case 17: QuickCallFunction(X3, SlowRead9); break; + case 17: QuickCallFunction(X3, SlowRead9); break; case 8: QuickCallFunction(X3, SlowRead9); break; case 9: QuickCallFunction(X3, SlowRead9); break; } @@ -643,7 +643,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc if (Num == 0) { MOV(X3, RCPU); - switch (store * 2 | NDS::ConsoleType) + switch ((u32)store * 2 | NDS::ConsoleType) { case 0: QuickCallFunction(X4, SlowBlockTransfer9); break; case 1: QuickCallFunction(X4, SlowBlockTransfer9); break; @@ -653,7 +653,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc } else { - switch (store * 2 | NDS::ConsoleType) + switch ((u32)store * 2 | NDS::ConsoleType) { case 0: QuickCallFunction(X4, SlowBlockTransfer7); break; case 1: QuickCallFunction(X4, SlowBlockTransfer7); break; -- cgit v1.2.3 From 887ad27ed88fa3cd12ab14a8fe1c0c5bc63f37fb Mon Sep 17 00:00:00 2001 From: RSDuck Date: Sat, 25 Jul 2020 22:08:43 +0200 Subject: implement carry setting ALU op with imm --- src/ARMInterpreter_ALU.cpp | 9 +++++++-- src/ARMJIT_A64/ARMJIT_ALU.cpp | 13 +++++++++++++ src/ARMJIT_x64/ARMJIT_ALU.cpp | 20 +++++++++++++++++--- src/ARM_InstrInfo.cpp | 15 +++++++++------ 4 files changed, 46 insertions(+), 11 deletions(-) (limited to 'src/ARMJIT_A64') diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 2d185b5..2095432 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -126,6 +126,11 @@ namespace ARMInterpreter #define A_CALC_OP2_IMM \ u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); +#define A_CALC_OP2_IMM_S \ + u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \ + if ((cpu->CurInstr>>7)&0x1E) \ + cpu->SetC(b & 0x80000000); + #define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \ u32 b = cpu->R[cpu->CurInstr&0xF]; \ u32 s = (cpu->CurInstr>>7)&0x1F; \ @@ -186,7 +191,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ } \ void A_##x##_IMM_S(ARM* cpu) \ { \ - A_CALC_OP2_IMM \ + A_CALC_OP2_IMM##s \ A_##x##_S(0) \ } \ void A_##x##_REG_LSL_IMM_S(ARM* cpu) \ @@ -234,7 +239,7 @@ void A_##x##_REG_ROR_REG_S(ARM* cpu) \ \ void A_##x##_IMM(ARM* cpu) \ { \ - A_CALC_OP2_IMM \ + A_CALC_OP2_IMM##s \ A_##x(0) \ } \ void A_##x##_REG_LSL_IMM(ARM* cpu) \ diff --git a/src/ARMJIT_A64/ARMJIT_ALU.cpp b/src/ARMJIT_A64/ARMJIT_ALU.cpp index 5f021a0..26a89cb 100644 --- a/src/ARMJIT_A64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_A64/ARMJIT_ALU.cpp @@ -434,6 +434,19 @@ void Compiler::A_Comp_GetOp2(bool S, Op2& op2) if (CurInstr.Instr & (1 << 25)) { Comp_AddCycles_C(); + + u32 shift = (CurInstr.Instr >> 7) & 0x1E; + u32 imm = ROR(CurInstr.Instr & 0xFF, shift); + + if (S && shift && (CurInstr.SetFlags & 0x2)) + { + CPSRDirty = true; + if (imm & 0x80000000) + ORRI2R(RCPSR, RCPSR, 1 << 29); + else + ANDI2R(RCPSR, RCPSR, ~(1 << 29)); + } + op2 = Op2(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E)); } else diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp index 43b94b6..57a38c4 100644 --- a/src/ARMJIT_x64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp @@ -103,16 +103,30 @@ void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed) // also calculates cycles OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed) { + S = S && (CurInstr.SetFlags & 0x2); + if (CurInstr.Instr & (1 << 25)) { Comp_AddCycles_C(); + + u32 shift = (CurInstr.Instr >> 7) & 0x1E; + u32 imm = ROR(CurInstr.Instr & 0xFF, shift); + carryUsed = false; - return Imm32(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E)); + if (S && shift) + { + CPSRDirty = true; + carryUsed = true; + if (imm & 0x80000000) + MOV(32, R(RSCRATCH2), Imm32(1)); + else + XOR(32, R(RSCRATCH2), R(RSCRATCH2)); + } + + return Imm32(imm); } else { - S = S && (CurInstr.SetFlags & 0x2); - int op = (CurInstr.Instr >> 5) & 0x3; if (CurInstr.Instr & (1 << 4)) { diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp index ccec951..74a5f87 100644 --- a/src/ARM_InstrInfo.cpp +++ b/src/ARM_InstrInfo.cpp @@ -7,7 +7,7 @@ namespace ARMInstrInfo { -#define ak(x) ((x) << 22) +#define ak(x) ((x) << 23) enum { A_Read0 = 1 << 0, @@ -37,9 +37,10 @@ enum { A_RRXReadC = 1 << 17, A_StaticShiftSetC = 1 << 18, A_SetC = 1 << 19, + A_SetCImm = 1 << 20, - A_WriteMem = 1 << 20, - A_LoadMem = 1 << 21 + A_WriteMem = 1 << 21, + A_LoadMem = 1 << 22 }; #define A_BIOP A_Read16 @@ -52,7 +53,7 @@ enum { #define A_ARITH_SHIFT_REG A_SetCV #define A_LOGIC_SHIFT_REG A_SetMaybeC #define A_ARITH_IMM A_SetCV -#define A_LOGIC_IMM 0 +#define A_LOGIC_IMM A_SetCImm #define A_IMPLEMENT_ALU_OP(x,k,a,c) \ const u32 A_##x##_IMM = A_Write12 | c | A_##k | ak(ak_##x##_IMM); \ @@ -410,7 +411,7 @@ Info Decode(bool thumb, u32 num, u32 instr) if (data & A_UnkOnARM7 && num == 1) data = A_UNK; - res.Kind = (data >> 22) & 0x1FF; + res.Kind = (data >> 23) & 0x1FF; if (res.Kind >= ak_SMLAxy && res.Kind <= ak_SMULxy && num == 1) { @@ -496,7 +497,9 @@ Info Decode(bool thumb, u32 num, u32 instr) res.ReadFlags |= flag_C; if ((data & A_RRXReadC) && !((instr >> 7) & 0x1F)) res.ReadFlags |= flag_C; - if ((data & A_SetC) || ((data & A_StaticShiftSetC) && ((instr >> 7) & 0x1F))) + if ((data & A_SetC) + || ((data & A_StaticShiftSetC) && ((instr >> 7) & 0x1F)) + || ((data & A_SetCImm) && ((instr >> 7) & 0x1E))) res.WriteFlags |= flag_C; if (data & A_WriteMem) -- cgit v1.2.3 From 5903b11bda0aa181f2914a06650b2cbaf28aa9f1 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Mon, 27 Jul 2020 23:14:23 +0200 Subject: subtract cycles after checking IRQ and Halt also switch back to adding to ARM::Cycles instead of subtracting from them --- src/ARM.cpp | 45 ++++++++++++++++++-------------------- src/ARM.h | 32 +++++++++++++-------------- src/ARMJIT_A64/ARMJIT_Branch.cpp | 10 ++++----- src/ARMJIT_A64/ARMJIT_Compiler.cpp | 16 +++++++------- src/ARMJIT_x64/ARMJIT_Branch.cpp | 2 +- src/ARMJIT_x64/ARMJIT_Compiler.cpp | 16 +++++++------- 6 files changed, 59 insertions(+), 62 deletions(-) (limited to 'src/ARMJIT_A64') diff --git a/src/ARM.cpp b/src/ARM.cpp index ecf94cd..c1743ea 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -274,15 +274,15 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) if (addr & 0x2) { NextInstr[0] = CodeRead32(addr-2, true) >> 16; - Cycles -= CodeCycles; + Cycles += CodeCycles; NextInstr[1] = CodeRead32(addr+2, false); - Cycles -= CodeCycles; + Cycles += CodeCycles; } else { NextInstr[0] = CodeRead32(addr, true); NextInstr[1] = NextInstr[0] >> 16; - Cycles -= CodeCycles; + Cycles += CodeCycles; } CPSR |= 0x20; @@ -295,9 +295,9 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) if (newregion != oldregion) SetupCodeMem(addr); NextInstr[0] = CodeRead32(addr, true); - Cycles -= CodeCycles; + Cycles += CodeCycles; NextInstr[1] = CodeRead32(addr+4, false); - Cycles -= CodeCycles; + Cycles += CodeCycles; CPSR &= ~0x20; } @@ -337,7 +337,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) NextInstr[0] = CodeRead16(addr); NextInstr[1] = CodeRead16(addr+2); - Cycles -= NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1]; + Cycles += NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1]; CPSR |= 0x20; } @@ -350,7 +350,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) NextInstr[0] = CodeRead32(addr); NextInstr[1] = CodeRead32(addr+4); - Cycles -= NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3]; + Cycles += NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3]; CPSR &= ~0x20; } @@ -609,7 +609,7 @@ void ARMv5::Execute() }*/ if (IRQ) TriggerIRQ(); - NDS::ARM9Timestamp -= Cycles; + NDS::ARM9Timestamp += Cycles; Cycles = 0; } @@ -643,9 +643,6 @@ void ARMv5::ExecuteJIT() { u32 instrAddr = R[15] - ((CPSR&0x20)?2:4); - // hack so Cycles <= 0 becomes Cycles < 0 - Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1; - if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize)) && !ARMJIT::SetupExecutableRegion(0, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize)) { @@ -661,13 +658,8 @@ void ARMv5::ExecuteJIT() else ARMJIT::CompileBlock(this); - NDS::ARM9Timestamp = NDS::ARM9Target - Cycles - 1; - if (StopExecution) { - if (IRQ) - TriggerIRQ(); - if (Halted || IdleLoop) { bool idleLoop = IdleLoop; @@ -678,7 +670,13 @@ void ARMv5::ExecuteJIT() } break; } + + if (IRQ) + TriggerIRQ(); } + + NDS::ARM9Timestamp += Cycles; + Cycles = 0; } if (Halted == 2) @@ -755,7 +753,7 @@ void ARMv4::Execute() }*/ if (IRQ) TriggerIRQ(); - NDS::ARM7Timestamp -= Cycles; + NDS::ARM7Timestamp += Cycles; Cycles = 0; } @@ -795,8 +793,6 @@ void ARMv4::ExecuteJIT() { u32 instrAddr = R[15] - ((CPSR&0x20)?2:4); - Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1; - if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize)) && !ARMJIT::SetupExecutableRegion(1, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize)) { @@ -812,14 +808,9 @@ void ARMv4::ExecuteJIT() else ARMJIT::CompileBlock(this); - NDS::ARM7Timestamp = NDS::ARM7Target - Cycles - 1; - // TODO optimize this shit!!! if (StopExecution) { - if (IRQ) - TriggerIRQ(); - if (Halted || IdleLoop) { bool idleLoop = IdleLoop; @@ -830,7 +821,13 @@ void ARMv4::ExecuteJIT() } break; } + + if (IRQ) + TriggerIRQ(); } + + NDS::ARM7Timestamp += Cycles; + Cycles = 0; } if (Halted == 2) diff --git a/src/ARM.h b/src/ARM.h index ee6ac96..deacbee 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -202,14 +202,14 @@ public: { // code only. always nonseq 32-bit for ARM9. s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - Cycles -= numC; + Cycles += numC; } void AddCycles_CI(s32 numI) { // code+internal s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - Cycles -= numC + numI; + Cycles += numC + numI; } void AddCycles_CDI() @@ -220,9 +220,9 @@ public: s32 numD = DataCycles; //if (DataRegion != CodeRegion) - Cycles -= std::max(numC + numD - 6, std::max(numC, numD)); + Cycles += std::max(numC + numD - 6, std::max(numC, numD)); //else - // Cycles -= numC + numD; + // Cycles += numC + numD; } void AddCycles_CD() @@ -232,9 +232,9 @@ public: s32 numD = DataCycles; //if (DataRegion != CodeRegion) - Cycles -= std::max(numC + numD - 6, std::max(numC, numD)); + Cycles += std::max(numC + numD - 6, std::max(numC, numD)); //else - // Cycles -= numC + numD; + // Cycles += numC + numD; } void GetCodeMemRegion(u32 addr, NDS::MemRegion* region); @@ -396,13 +396,13 @@ public: void AddCycles_C() { // code only. this code fetch is sequential. - Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3]; + Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3]; } void AddCycles_CI(s32 num) { // code+internal. results in a nonseq code fetch. - Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num; + Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num; } void AddCycles_CDI() @@ -414,21 +414,21 @@ public: if ((DataRegion >> 24) == 0x02) // mainRAM { if (CodeRegion == 0x02) - Cycles -= numC + numD; + Cycles += numC + numD; else { numC++; - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } } else if (CodeRegion == 0x02) { numD++; - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else { - Cycles -= numC + numD + 1; + Cycles += numC + numD + 1; } } @@ -441,17 +441,17 @@ public: if ((DataRegion >> 24) == 0x02) { if (CodeRegion == 0x02) - Cycles -= numC + numD; + Cycles += numC + numD; else - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else if (CodeRegion == 0x02) { - Cycles -= std::max(numC + numD - 3, std::max(numC, numD)); + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else { - Cycles -= numC + numD; + Cycles += numC + numD; } } }; diff --git a/src/ARMJIT_A64/ARMJIT_Branch.cpp b/src/ARMJIT_A64/ARMJIT_Branch.cpp index f130938..117eaa0 100644 --- a/src/ARMJIT_A64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_A64/ARMJIT_Branch.cpp @@ -143,7 +143,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } @@ -181,7 +181,7 @@ void* Compiler::Gen_JumpTo9(int kind) STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15])); ADD(W1, W1, W1); - SUB(RCycles, RCycles, W1); + ADD(RCycles, RCycles, W1); RET(); } @@ -201,7 +201,7 @@ void* Compiler::Gen_JumpTo9(int kind) ADD(W2, W1, W1); TSTI2R(W0, 0x2); CSEL(W1, W1, W2, CC_EQ); - SUB(RCycles, RCycles, W1); + ADD(RCycles, RCycles, W1); RET(); } @@ -229,7 +229,7 @@ void* Compiler::Gen_JumpTo7(int kind) UBFX(W2, W3, 0, 8); UBFX(W3, W3, 8, 8); ADD(W2, W3, W2); - SUB(RCycles, RCycles, W2); + ADD(RCycles, RCycles, W2); ANDI2R(W0, W0, ~3); @@ -253,7 +253,7 @@ void* Compiler::Gen_JumpTo7(int kind) UBFX(W2, W3, 16, 8); UBFX(W3, W3, 24, 8); ADD(W2, W3, W2); - SUB(RCycles, RCycles, W2); + ADD(RCycles, RCycles, W2); ANDI2R(W0, W0, ~1); diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp index 413c673..b046123 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp @@ -629,7 +629,7 @@ void Compiler::Comp_BranchSpecialBehaviour(bool taken) { RegCache.PrepareExit(); - SUB(RCycles, RCycles, ConstantCycles); + ADD(RCycles, RCycles, ConstantCycles); QuickTailCall(X0, ARM_Ret); } } @@ -770,7 +770,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] RegCache.Flush(); - SUB(RCycles, RCycles, ConstantCycles); + ADD(RCycles, RCycles, ConstantCycles); QuickTailCall(X0, ARM_Ret); FlushIcache(); @@ -800,7 +800,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant) if (forceNonConstant) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CI(u32 numI) @@ -814,7 +814,7 @@ void Compiler::Comp_AddCycles_CI(u32 numI) if (Thumb || CurInstr.Cond() == 0xE) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift) @@ -825,11 +825,11 @@ void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift) NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c; - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); if (Thumb || CurInstr.Cond() >= 0xE) ConstantCycles += cycles; else - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CDI() @@ -866,7 +866,7 @@ void Compiler::Comp_AddCycles_CDI() } if (!Thumb && CurInstr.Cond() < 0xE) - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); else ConstantCycles += cycles; } @@ -910,7 +910,7 @@ void Compiler::Comp_AddCycles_CD() } if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles) - SUB(RCycles, RCycles, cycles); + ADD(RCycles, RCycles, cycles); else ConstantCycles += cycles; } diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index bda9e52..819fe3c 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -127,7 +127,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) ConstantCycles += cycles; else - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); } void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 7f32f31..1fdbaf8 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -627,7 +627,7 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken) { RegCache.PrepareExit(); - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); JMP((u8*)&ARM_Ret, true); } } @@ -760,7 +760,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] RegCache.Flush(); - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); JMP((u8*)ARM_Ret, true); /*FILE* codeout = fopen("codeout", "a"); @@ -779,7 +779,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant) : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -791,7 +791,7 @@ void Compiler::Comp_AddCycles_CI(u32 i) : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i; if (!Thumb && CurInstr.Cond() < 0xE) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -805,12 +805,12 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add) if (!Thumb && CurInstr.Cond() < 0xE) { LEA(32, RSCRATCH, MDisp(i, add + cycles)); - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); } else { ConstantCycles += cycles; - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); } } @@ -848,7 +848,7 @@ void Compiler::Comp_AddCycles_CDI() } if (!Thumb && CurInstr.Cond() < 0xE) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -892,7 +892,7 @@ void Compiler::Comp_AddCycles_CD() } if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE) - SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } -- cgit v1.2.3