diff options
author | RSDuck <rsduck@users.noreply.github.com> | 2019-08-25 12:28:48 +0200 |
---|---|---|
committer | RSDuck <rsduck@users.noreply.github.com> | 2020-06-16 11:55:44 +0200 |
commit | f378458c104f1879f30610dfe4010e4772218787 (patch) | |
tree | abac6775cf402756d516c6ac9479b69580b7f4c8 | |
parent | 316378092ac1791f4ada3b6b81b2681eab14d58d (diff) |
optimise away unneeded flag sets
- especially useful for thumb code and larger max block sizes
- can still be improved upon
-rw-r--r-- | src/ARMJIT.cpp | 24 | ||||
-rw-r--r-- | src/ARMJIT.h | 1 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_ALU.cpp | 64 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.cpp | 9 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.h | 6 | ||||
-rw-r--r-- | src/ARM_InstrInfo.cpp | 238 | ||||
-rw-r--r-- | src/ARM_InstrInfo.h | 13 |
7 files changed, 246 insertions, 109 deletions
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 949bc1c..3b6bc2e 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -126,6 +126,24 @@ void DeInit() delete compiler; } +void floodFillSetFlags(FetchedInstr instrs[], int start, u8 flags) +{ + for (int j = start; j >= 0; j--) + { + u8 match = instrs[j].Info.WriteFlags & flags; + u8 matchMaybe = (instrs[j].Info.WriteFlags >> 4) & flags; + if (matchMaybe) // writes flags maybe + instrs[j].SetFlags |= matchMaybe; + if (match) + { + instrs[j].SetFlags |= match; + flags &= ~match; + if (!flags) + return; + } + } +} + CompiledBlock CompileBlock(ARM* cpu) { bool thumb = cpu->CPSR & 0x20; @@ -175,8 +193,14 @@ CompiledBlock CompileBlock(ARM* cpu) instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr); i++; + + bool canCompile = compiler->CanCompile(thumb, instrs[i - 1].Info.Kind); + if (instrs[i - 1].Info.ReadFlags != 0 || !canCompile) + floodFillSetFlags(instrs, i - 2, canCompile ? instrs[i - 1].Info.ReadFlags : 0xF); } while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize); + floodFillSetFlags(instrs, i - 1, 0xF); + CompiledBlock block = compiler->CompileBlock(cpu, instrs, i); if (cpu->Num == 0) diff --git a/src/ARMJIT.h b/src/ARMJIT.h index 0fc1c38..6197695 100644 --- a/src/ARMJIT.h +++ b/src/ARMJIT.h @@ -28,6 +28,7 @@ struct FetchedInstr return Instr >> 28; } + u8 SetFlags; u32 Instr; u32 NextInstr[2]; diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp index f0bcf8e..6a7d711 100644 --- a/src/ARMJIT_x64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp @@ -111,6 +111,8 @@ OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed) } else { + S = S && (CurInstr.SetFlags & 0x2); + int op = (CurInstr.Instr >> 5) & 0x3; if (CurInstr.Instr & (1 << 4)) { @@ -215,7 +217,8 @@ void Compiler::A_Comp_MovOp() if (S) { - TEST(32, rd, rd); + if (FlagsNZRequired()) + TEST(32, rd, rd); Comp_RetriveFlags(false, false, carryUsed); } @@ -263,12 +266,14 @@ void Compiler::Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::O { IMUL(32, RSCRATCH, rs); LEA(32, rd.GetSimpleReg(), MRegSum(RSCRATCH, rn.GetSimpleReg())); - TEST(32, rd, rd); + if (S && FlagsNZRequired()) + TEST(32, rd, rd); } else { IMUL(32, RSCRATCH, rs); MOV(32, rd, R(RSCRATCH)); + if (S && FlagsNZRequired()) TEST(32, R(RSCRATCH), R(RSCRATCH)); } @@ -331,7 +336,7 @@ void Compiler::A_Comp_SMULL_SMLAL() else { IMUL(64, RSCRATCH2, R(RSCRATCH3)); - if (S) + if (S && FlagsNZRequired()) TEST(64, R(RSCRATCH2), R(RSCRATCH2)); } @@ -345,9 +350,20 @@ void Compiler::A_Comp_SMULL_SMLAL() void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed) { - CPSRDirty = true; + if (CurInstr.SetFlags == 0) + return; + if (retriveCV && !(CurInstr.SetFlags & 0x3)) + retriveCV = false; bool carryOnly = !retriveCV && carryUsed; + if (carryOnly && !(CurInstr.SetFlags & 0x2)) + { + carryUsed = false; + carryOnly = false; + } + + CPSRDirty = true; + if (retriveCV) { SETcc(CC_O, R(RSCRATCH)); @@ -355,19 +371,28 @@ void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed) LEA(32, RSCRATCH2, MComplex(RSCRATCH, RSCRATCH3, SCALE_2, 0)); } - SETcc(CC_S, R(RSCRATCH)); - SETcc(CC_Z, R(RSCRATCH3)); - LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0)); - int shiftAmount = 30; - if (retriveCV || carryUsed) + if (FlagsNZRequired()) { - LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0)); - shiftAmount = carryOnly ? 29 : 28; - } - SHL(32, R(RSCRATCH), Imm8(shiftAmount)); + SETcc(CC_S, R(RSCRATCH)); + SETcc(CC_Z, R(RSCRATCH3)); + LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0)); + int shiftAmount = 30; + if (retriveCV || carryUsed) + { + LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0)); + shiftAmount = carryOnly ? 29 : 28; + } + SHL(32, R(RSCRATCH), Imm8(shiftAmount)); - AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28))); - OR(32, R(RCPSR), R(RSCRATCH)); + AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28))); + OR(32, R(RCPSR), R(RSCRATCH)); + } + else + { + SHL(32, R(RSCRATCH2), Imm8(carryOnly ? 29 : 28)); + AND(32, R(RCPSR), Imm32(0xFFFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28))); + OR(32, R(RCPSR), R(RSCRATCH2)); + } } // always uses RSCRATCH, RSCRATCH2 only if S == true @@ -523,7 +548,8 @@ void Compiler::T_Comp_ShiftImm() if (shifted != rd) MOV(32, rd, shifted); - TEST(32, rd, rd); + if (FlagsNZRequired()) + TEST(32, rd, rd); Comp_RetriveFlags(false, false, carryUsed); } @@ -557,7 +583,8 @@ void Compiler::T_Comp_ALU_Imm8() { case 0x0: MOV(32, rd, imm); - TEST(32, rd, rd); + if (FlagsNZRequired()) + TEST(32, rd, rd); Comp_RetriveFlags(false, false, false); return; case 0x1: @@ -607,7 +634,8 @@ void Compiler::T_Comp_ALU() int shiftOp = op == 0x7 ? 3 : op - 0x2; bool carryUsed; OpArg shifted = Comp_RegShiftReg(shiftOp, rs, rd, true, carryUsed); - TEST(32, shifted, shifted); + if (FlagsNZRequired()) + TEST(32, shifted, shifted); MOV(32, rd, shifted); Comp_RetriveFlags(false, false, true); } diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index ab13cb6..6abb2bb 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -342,6 +342,11 @@ const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = { }; #undef F +bool Compiler::CanCompile(bool thumb, u16 kind) +{ + return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL; +} + void Compiler::Reset() { memset(ResetStart, 0xcc, CodeMemSize); @@ -380,11 +385,15 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs // TODO: this is ugly as a whole, do better RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount); + printf("block start %d\n", Thumb); + for (int i = 0; i < instrsCount; i++) { R15 += Thumb ? 2 : 4; CurInstr = instrs[i]; + printf("%x %d %d %d\n", CurInstr.Instr, CurInstr.SetFlags, CurInstr.Info.WriteFlags, CurInstr.Info.ReadFlags); + CompileFunc comp = Thumb ? T_Comp[CurInstr.Info.Kind] : A_Comp[CurInstr.Info.Kind]; diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index 3151cbc..8861884 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -29,6 +29,8 @@ public: void LoadReg(int reg, Gen::X64Reg nativeReg); void SaveReg(int reg, Gen::X64Reg nativeReg); + bool CanCompile(bool thumb, u16 kind); + typedef void (Compiler::*CompileFunc)(); void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false); @@ -64,7 +66,6 @@ public: void A_Comp_BranchImm(); void A_Comp_BranchXchangeReg(); - void T_Comp_ShiftImm(); void T_Comp_AddSub_(); void T_Comp_ALU_Imm8(); @@ -121,6 +122,9 @@ public: void LoadCPSR(); void SaveCPSR(); + bool FlagsNZRequired() + { return CurInstr.SetFlags & 0xC; } + Gen::FixupBranch CheckCondition(u32 cond); Gen::OpArg MapReg(int reg) diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp index 4813799..ea6d827 100644 --- a/src/ARM_InstrInfo.cpp +++ b/src/ARM_InstrInfo.cpp @@ -5,7 +5,7 @@ namespace ARMInstrInfo { -#define ak(x) ((x) << 13) +#define ak(x) ((x) << 18) enum { A_Read0 = 1 << 0, @@ -26,69 +26,81 @@ enum { A_Link = 1 << 10, A_UnkOnARM7 = 1 << 11, + + A_SetNZ = 1 << 12, + A_SetCV = 1 << 13, + A_SetMaybeC = 1 << 14, + A_MulFlags = 1 << 15, + A_ReadC = 1 << 16, + A_RRXReadC = 1 << 17, }; #define A_BIOP A_Read16 #define A_MONOOP 0 -#define A_IMPLEMENT_ALU_OP(x,k) \ - const u32 A_##x##_IMM = A_Write12 | A_##k | ak(ak_##x##_IMM); \ - const u32 A_##x##_REG_LSL_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \ - const u32 A_##x##_REG_LSR_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \ - const u32 A_##x##_REG_ASR_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \ - const u32 A_##x##_REG_ROR_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \ - const u32 A_##x##_REG_LSL_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \ - const u32 A_##x##_REG_LSR_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \ - const u32 A_##x##_REG_ASR_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \ - const u32 A_##x##_REG_ROR_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG); \ +#define A_ARITH A_SetCV +#define A_LOGIC A_SetMaybeC +#define A_ARITH_IMM A_SetCV +#define A_LOGIC_IMM 0 + +#define A_IMPLEMENT_ALU_OP(x,k,a,c) \ + const u32 A_##x##_IMM = A_Write12 | c | A_##k | ak(ak_##x##_IMM); \ + const u32 A_##x##_REG_LSL_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \ + const u32 A_##x##_REG_LSR_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \ + const u32 A_##x##_REG_ASR_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \ + const u32 A_##x##_REG_ROR_IMM = A_RRXReadC | A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \ + const u32 A_##x##_REG_LSL_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \ + const u32 A_##x##_REG_LSR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \ + const u32 A_##x##_REG_ASR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \ + const u32 A_##x##_REG_ROR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG); \ \ - const u32 A_##x##_IMM_S = A_Write12 | A_##k | ak(ak_##x##_IMM_S); \ - const u32 A_##x##_REG_LSL_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM_S); \ - const u32 A_##x##_REG_LSR_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM_S); \ - const u32 A_##x##_REG_ASR_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM_S); \ - const u32 A_##x##_REG_ROR_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM_S); \ - const u32 A_##x##_REG_LSL_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG_S); \ - const u32 A_##x##_REG_LSR_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG_S); \ - const u32 A_##x##_REG_ASR_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG_S); \ - const u32 A_##x##_REG_ROR_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG_S); - -A_IMPLEMENT_ALU_OP(AND,BIOP) -A_IMPLEMENT_ALU_OP(EOR,BIOP) -A_IMPLEMENT_ALU_OP(SUB,BIOP) -A_IMPLEMENT_ALU_OP(RSB,BIOP) -A_IMPLEMENT_ALU_OP(ADD,BIOP) -A_IMPLEMENT_ALU_OP(ADC,BIOP) -A_IMPLEMENT_ALU_OP(SBC,BIOP) -A_IMPLEMENT_ALU_OP(RSC,BIOP) -A_IMPLEMENT_ALU_OP(ORR,BIOP) -A_IMPLEMENT_ALU_OP(MOV,MONOOP) -A_IMPLEMENT_ALU_OP(BIC,BIOP) -A_IMPLEMENT_ALU_OP(MVN,MONOOP) + const u32 A_##x##_IMM_S = A_SetNZ | c | A_##a##_IMM | A_Write12 | A_##k | ak(ak_##x##_IMM_S); \ + const u32 A_##x##_REG_LSL_IMM_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM_S); \ + const u32 A_##x##_REG_LSR_IMM_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM_S); \ + const u32 A_##x##_REG_ASR_IMM_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM_S); \ + const u32 A_##x##_REG_ROR_IMM_S = A_RRXReadC | A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM_S); \ + const u32 A_##x##_REG_LSL_REG_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG_S); \ + const u32 A_##x##_REG_LSR_REG_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG_S); \ + const u32 A_##x##_REG_ASR_REG_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG_S); \ + const u32 A_##x##_REG_ROR_REG_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG_S); + +A_IMPLEMENT_ALU_OP(AND,BIOP,LOGIC,0) +A_IMPLEMENT_ALU_OP(EOR,BIOP,LOGIC,0) +A_IMPLEMENT_ALU_OP(SUB,BIOP,ARITH,0) +A_IMPLEMENT_ALU_OP(RSB,BIOP,ARITH,0) +A_IMPLEMENT_ALU_OP(ADD,BIOP,ARITH,0) +A_IMPLEMENT_ALU_OP(ADC,BIOP,ARITH,A_ReadC) +A_IMPLEMENT_ALU_OP(SBC,BIOP,ARITH,A_ReadC) +A_IMPLEMENT_ALU_OP(RSC,BIOP,ARITH,A_ReadC) +A_IMPLEMENT_ALU_OP(ORR,BIOP,LOGIC,0) +A_IMPLEMENT_ALU_OP(MOV,MONOOP,LOGIC,0) +A_IMPLEMENT_ALU_OP(BIC,BIOP,LOGIC,0) +A_IMPLEMENT_ALU_OP(MVN,MONOOP,LOGIC,0) const u32 A_MOV_REG_LSL_IMM_DBG = A_MOV_REG_LSL_IMM; -#define A_IMPLEMENT_ALU_TEST(x) \ - const u32 A_##x##_IMM = A_Read16 | A_Read0 | ak(ak_##x##_IMM); \ - const u32 A_##x##_REG_LSL_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \ - const u32 A_##x##_REG_LSR_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \ - const u32 A_##x##_REG_ASR_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \ - const u32 A_##x##_REG_ROR_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \ - const u32 A_##x##_REG_LSL_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \ - const u32 A_##x##_REG_LSR_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \ - const u32 A_##x##_REG_ASR_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \ - const u32 A_##x##_REG_ROR_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG); - -A_IMPLEMENT_ALU_TEST(TST) -A_IMPLEMENT_ALU_TEST(TEQ) -A_IMPLEMENT_ALU_TEST(CMP) -A_IMPLEMENT_ALU_TEST(CMN) - -const u32 A_MUL = A_Write16 | A_Read0 | A_Read8 | ak(ak_MUL); -const u32 A_MLA = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_MLA); -const u32 A_UMULL = A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_UMULL); -const u32 A_UMLAL = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_UMLAL); -const u32 A_SMULL = A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_SMULL); -const u32 A_SMLAL = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLAL); +#define A_IMPLEMENT_ALU_TEST(x,a) \ + const u32 A_##x##_IMM = A_SetNZ | A_Read16 | A_##a | A_Read0 | ak(ak_##x##_IMM); \ + const u32 A_##x##_REG_LSL_IMM = A_SetNZ | A_Read16 | A_##a | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \ + const u32 A_##x##_REG_LSR_IMM = A_SetNZ | A_Read16 | A_##a | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \ + const u32 A_##x##_REG_ASR_IMM = A_SetNZ | A_Read16 | A_##a | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \ + const u32 A_##x##_REG_ROR_IMM = A_RRXReadC | A_SetNZ | A_Read16 | A_##a | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \ + const u32 A_##x##_REG_LSL_REG = A_SetNZ | A_Read16 | A_##a | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \ + const u32 A_##x##_REG_LSR_REG = A_SetNZ | A_Read16 | A_##a | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \ + const u32 A_##x##_REG_ASR_REG = A_SetNZ | A_Read16 | A_##a | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \ + const u32 A_##x##_REG_ROR_REG = A_SetNZ | A_Read16 | A_##a | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG); + +A_IMPLEMENT_ALU_TEST(TST,LOGIC) +A_IMPLEMENT_ALU_TEST(TEQ,LOGIC) +A_IMPLEMENT_ALU_TEST(CMP,ARITH) +A_IMPLEMENT_ALU_TEST(CMN,ARITH) + +const u32 A_MUL = A_MulFlags | A_Write16 | A_Read0 | A_Read8 | ak(ak_MUL); +const u32 A_MLA = A_MulFlags | A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_MLA); +const u32 A_UMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_UMULL); +const u32 A_UMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_UMLAL); +const u32 A_SMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_SMULL); +const u32 A_SMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLAL); const u32 A_SMLAxy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLALxy); const u32 A_SMLAWy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLAWy); const u32 A_SMULWy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULWy); @@ -161,7 +173,7 @@ const u32 A_SVC = A_BranchAlways | A_Link | ak(ak_SVC); // THUMB -#define tk(x) ((x) << 16) +#define tk(x) ((x) << 20) enum { T_Read0 = 1 << 0, @@ -183,42 +195,47 @@ enum { T_ReadR14 = 1 << 13, T_WriteR14 = 1 << 14, - T_PopPC = 1 << 15 + T_PopPC = 1 << 15, + + T_SetNZ = 1 << 16, + T_SetCV = 1 << 17, + T_SetMaybeC = 1 << 18, + T_ReadC = 1 << 19 }; -const u32 T_LSL_IMM = T_Write0 | T_Read3 | tk(tk_LSL_IMM); -const u32 T_LSR_IMM = T_Write0 | T_Read3 | tk(tk_LSR_IMM); -const u32 T_ASR_IMM = T_Write0 | T_Read3 | tk(tk_ASR_IMM); - -const u32 T_ADD_REG_ = T_Write0 | T_Read3 | T_Read6 | tk(tk_ADD_REG_); -const u32 T_SUB_REG_ = T_Write0 | T_Read3 | T_Read6 | tk(tk_SUB_REG_); -const u32 T_ADD_IMM_ = T_Write0 | T_Read3 | tk(tk_ADD_IMM_); -const u32 T_SUB_IMM_ = T_Write0 | T_Read3 | tk(tk_SUB_IMM_); - -const u32 T_MOV_IMM = T_Write8 | tk(tk_MOV_IMM); -const u32 T_CMP_IMM = T_Write8 | tk(tk_CMP_IMM); -const u32 T_ADD_IMM = T_Write8 | T_Read8 | tk(tk_ADD_IMM); -const u32 T_SUB_IMM = T_Write8 | T_Read8 | tk(tk_SUB_IMM); - -const u32 T_AND_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_AND_REG); -const u32 T_EOR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_EOR_REG); -const u32 T_LSL_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_LSL_REG); -const u32 T_LSR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_LSR_REG); -const u32 T_ASR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ASR_REG); -const u32 T_ADC_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ADC_REG); -const u32 T_SBC_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_SBC_REG); -const u32 T_ROR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ROR_REG); -const u32 T_TST_REG = T_Read0 | T_Read3 | tk(tk_TST_REG); -const u32 T_NEG_REG = T_Write0 | T_Read3 | tk(tk_NEG_REG); -const u32 T_CMP_REG = T_Read0 | T_Read3 | tk(tk_CMP_REG); -const u32 T_CMN_REG = T_Read0 | T_Read3 | tk(tk_CMN_REG); -const u32 T_ORR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ORR_REG); -const u32 T_MUL_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_MUL_REG); -const u32 T_BIC_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_BIC_REG); -const u32 T_MVN_REG = T_Write0 | T_Read3 | tk(tk_MVN_REG); +const u32 T_LSL_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_LSL_IMM); +const u32 T_LSR_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_LSR_IMM); +const u32 T_ASR_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_ASR_IMM); + +const u32 T_ADD_REG_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | T_Read6 | tk(tk_ADD_REG_); +const u32 T_SUB_REG_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | T_Read6 | tk(tk_SUB_REG_); +const u32 T_ADD_IMM_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_ADD_IMM_); +const u32 T_SUB_IMM_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_SUB_IMM_); + +const u32 T_MOV_IMM = T_SetNZ | T_Write8 | tk(tk_MOV_IMM); +const u32 T_CMP_IMM = T_SetNZ | T_SetCV | T_Write8 | tk(tk_CMP_IMM); +const u32 T_ADD_IMM = T_SetNZ | T_SetCV | T_Write8 | T_Read8 | tk(tk_ADD_IMM); +const u32 T_SUB_IMM = T_SetNZ | T_SetCV | T_Write8 | T_Read8 | tk(tk_SUB_IMM); + +const u32 T_AND_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_AND_REG); +const u32 T_EOR_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_EOR_REG); +const u32 T_LSL_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_LSL_REG); +const u32 T_LSR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_LSR_REG); +const u32 T_ASR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_ASR_REG); +const u32 T_ADC_REG = T_ReadC | T_SetNZ | T_SetCV | T_Write0 | T_Read0 | T_Read3 | tk(tk_ADC_REG); +const u32 T_SBC_REG = T_ReadC | T_SetNZ | T_SetCV | T_Write0 | T_Read0 | T_Read3 | tk(tk_SBC_REG); +const u32 T_ROR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_ROR_REG); +const u32 T_TST_REG = T_SetNZ | T_Read0 | T_Read3 | tk(tk_TST_REG); +const u32 T_NEG_REG = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_NEG_REG); +const u32 T_CMP_REG = T_SetNZ | T_SetCV | T_Read0 | T_Read3 | tk(tk_CMP_REG); +const u32 T_CMN_REG = T_SetNZ | T_SetCV | T_Read0 | T_Read3 | tk(tk_CMN_REG); +const u32 T_ORR_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_ORR_REG); +const u32 T_MUL_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_MUL_REG); +const u32 T_BIC_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_BIC_REG); +const u32 T_MVN_REG = T_SetNZ | T_Write0 | T_Read3 | tk(tk_MVN_REG); const u32 T_ADD_HIREG = T_WriteHi0 | T_ReadHi0 | T_ReadHi3 | tk(tk_ADD_HIREG); -const u32 T_CMP_HIREG = T_ReadHi0 | T_ReadHi3 | tk(tk_CMP_HIREG); +const u32 T_CMP_HIREG = T_SetNZ | T_SetCV | T_ReadHi0 | T_ReadHi3 | tk(tk_CMP_HIREG); const u32 T_MOV_HIREG = T_WriteHi0 | T_ReadHi3 | tk(tk_MOV_HIREG); const u32 T_ADD_PCREL = T_Write8 | tk(tk_ADD_PCREL); @@ -268,10 +285,20 @@ const u32 T_SVC = T_BranchAlways | T_WriteR14 | tk(tk_SVC); Info Decode(bool thumb, u32 num, u32 instr) { + const u8 FlagsReadPerCond[7] = { + flag_Z, + flag_C, + flag_N, + flag_V, + flag_C | flag_Z, + flag_N | flag_V, + flag_Z | flag_N | flag_V}; + Info res = {0}; if (thumb) { u32 data = THUMBInstrTable[(instr >> 6) & 0x3FF]; + res.Kind = (data >> 20) & 0x3F; if (data & T_Read0) res.SrcRegs |= 1 << (instr & 0x7); @@ -309,7 +336,18 @@ Info Decode(bool thumb, u32 num, u32 instr) if (data & T_PopPC && instr & (1 << 8)) res.DstRegs |= 1 << 15; - res.Kind = (data >> 16) & 0x3F; + if (data & T_SetNZ) + res.WriteFlags |= flag_N | flag_Z; + if (data & T_SetCV) + res.WriteFlags |= flag_C | flag_V; + if (data & T_SetMaybeC) + res.WriteFlags |= flag_C << 4; + if (data & T_ReadC) + res.ReadFlags |= flag_C; + + if (res.Kind == tk_BCOND) + res.ReadFlags |= FlagsReadPerCond[(instr >> 9) & 0x7]; + res.EndBlock = res.Branches(); return res; @@ -323,7 +361,7 @@ Info Decode(bool thumb, u32 num, u32 instr) if (data & A_UnkOnARM7 && num != 0) data = A_UNK; - res.Kind = (data >> 13) & 0x1FF; + res.Kind = (data >> 18) & 0x1FF; if (res.Kind == ak_MCR) { @@ -382,6 +420,26 @@ Info Decode(bool thumb, u32 num, u32 instr) if (res.Kind == ak_LDM) res.DstRegs |= instr & (1 << 15); // this is right + if (data & A_SetNZ) + res.WriteFlags |= flag_N | flag_Z; + if (data & A_SetCV) + res.WriteFlags |= flag_C | flag_V; + if (data & A_SetMaybeC) + res.WriteFlags |= flag_C << 4; + if ((data & A_MulFlags) && (instr & (1 << 20))) + res.WriteFlags |= flag_N | flag_Z; + if (data & A_ReadC) + res.ReadFlags |= flag_C; + if ((data & A_RRXReadC) && !((instr >> 7) & 0x1F)) + res.ReadFlags |= flag_C; + + if ((instr >> 28) < 0xE) + { + // make non conditional flag sets conditional + res.WriteFlags = res.WriteFlags | (res.WriteFlags << 4); + res.ReadFlags |= FlagsReadPerCond[instr >> 29]; + } + res.EndBlock |= res.Branches(); return res; diff --git a/src/ARM_InstrInfo.h b/src/ARM_InstrInfo.h index 4fe9b10..5336837 100644 --- a/src/ARM_InstrInfo.h +++ b/src/ARM_InstrInfo.h @@ -215,11 +215,24 @@ enum tk_Count }; +enum +{ + flag_N = 1 << 3, + flag_Z = 1 << 2, + flag_C = 1 << 1, + flag_V = 1 << 0, +}; + struct Info { u16 DstRegs, SrcRegs; u16 Kind; + u8 ReadFlags; + // lower 4 bits - set always + // upper 4 bits - might set flag + u8 WriteFlags; + bool EndBlock; bool Branches() { |