aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRSDuck <rsduck@users.noreply.github.com>2019-08-25 12:28:48 +0200
committerRSDuck <rsduck@users.noreply.github.com>2020-04-26 13:05:00 +0200
commit5ea91b8a039e0735ac5cb102e2375c26c4f7a150 (patch)
tree197761d920867febb0b6f7a5c1947fe6fc8e13b6 /src
parent03ab7f1645f5a5c8427bc53a12f417845a17c980 (diff)
optimise away unneeded flag sets
- especially useful for thumb code and larger max block sizes - can still be improved upon
Diffstat (limited to 'src')
-rw-r--r--src/ARMJIT.cpp24
-rw-r--r--src/ARMJIT.h1
-rw-r--r--src/ARMJIT_x64/ARMJIT_ALU.cpp64
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp9
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.h6
-rw-r--r--src/ARM_InstrInfo.cpp238
-rw-r--r--src/ARM_InstrInfo.h13
-rw-r--r--src/libui_sdl/main.cpp2
8 files changed, 248 insertions, 109 deletions
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp
index 949bc1c..3b6bc2e 100644
--- a/src/ARMJIT.cpp
+++ b/src/ARMJIT.cpp
@@ -126,6 +126,24 @@ void DeInit()
delete compiler;
}
+void floodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
+{
+ for (int j = start; j >= 0; j--)
+ {
+ u8 match = instrs[j].Info.WriteFlags & flags;
+ u8 matchMaybe = (instrs[j].Info.WriteFlags >> 4) & flags;
+ if (matchMaybe) // writes flags maybe
+ instrs[j].SetFlags |= matchMaybe;
+ if (match)
+ {
+ instrs[j].SetFlags |= match;
+ flags &= ~match;
+ if (!flags)
+ return;
+ }
+ }
+}
+
CompiledBlock CompileBlock(ARM* cpu)
{
bool thumb = cpu->CPSR & 0x20;
@@ -175,8 +193,14 @@ CompiledBlock CompileBlock(ARM* cpu)
instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr);
i++;
+
+ bool canCompile = compiler->CanCompile(thumb, instrs[i - 1].Info.Kind);
+ if (instrs[i - 1].Info.ReadFlags != 0 || !canCompile)
+ floodFillSetFlags(instrs, i - 2, canCompile ? instrs[i - 1].Info.ReadFlags : 0xF);
} while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize);
+ floodFillSetFlags(instrs, i - 1, 0xF);
+
CompiledBlock block = compiler->CompileBlock(cpu, instrs, i);
if (cpu->Num == 0)
diff --git a/src/ARMJIT.h b/src/ARMJIT.h
index 0fc1c38..6197695 100644
--- a/src/ARMJIT.h
+++ b/src/ARMJIT.h
@@ -28,6 +28,7 @@ struct FetchedInstr
return Instr >> 28;
}
+ u8 SetFlags;
u32 Instr;
u32 NextInstr[2];
diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp
index f0bcf8e..6a7d711 100644
--- a/src/ARMJIT_x64/ARMJIT_ALU.cpp
+++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp
@@ -111,6 +111,8 @@ OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed)
}
else
{
+ S = S && (CurInstr.SetFlags & 0x2);
+
int op = (CurInstr.Instr >> 5) & 0x3;
if (CurInstr.Instr & (1 << 4))
{
@@ -215,7 +217,8 @@ void Compiler::A_Comp_MovOp()
if (S)
{
- TEST(32, rd, rd);
+ if (FlagsNZRequired())
+ TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
@@ -263,12 +266,14 @@ void Compiler::Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::O
{
IMUL(32, RSCRATCH, rs);
LEA(32, rd.GetSimpleReg(), MRegSum(RSCRATCH, rn.GetSimpleReg()));
- TEST(32, rd, rd);
+ if (S && FlagsNZRequired())
+ TEST(32, rd, rd);
}
else
{
IMUL(32, RSCRATCH, rs);
MOV(32, rd, R(RSCRATCH));
+ if (S && FlagsNZRequired())
TEST(32, R(RSCRATCH), R(RSCRATCH));
}
@@ -331,7 +336,7 @@ void Compiler::A_Comp_SMULL_SMLAL()
else
{
IMUL(64, RSCRATCH2, R(RSCRATCH3));
- if (S)
+ if (S && FlagsNZRequired())
TEST(64, R(RSCRATCH2), R(RSCRATCH2));
}
@@ -345,9 +350,20 @@ void Compiler::A_Comp_SMULL_SMLAL()
void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
{
- CPSRDirty = true;
+ if (CurInstr.SetFlags == 0)
+ return;
+ if (retriveCV && !(CurInstr.SetFlags & 0x3))
+ retriveCV = false;
bool carryOnly = !retriveCV && carryUsed;
+ if (carryOnly && !(CurInstr.SetFlags & 0x2))
+ {
+ carryUsed = false;
+ carryOnly = false;
+ }
+
+ CPSRDirty = true;
+
if (retriveCV)
{
SETcc(CC_O, R(RSCRATCH));
@@ -355,19 +371,28 @@ void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
LEA(32, RSCRATCH2, MComplex(RSCRATCH, RSCRATCH3, SCALE_2, 0));
}
- SETcc(CC_S, R(RSCRATCH));
- SETcc(CC_Z, R(RSCRATCH3));
- LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0));
- int shiftAmount = 30;
- if (retriveCV || carryUsed)
+ if (FlagsNZRequired())
{
- LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0));
- shiftAmount = carryOnly ? 29 : 28;
- }
- SHL(32, R(RSCRATCH), Imm8(shiftAmount));
+ SETcc(CC_S, R(RSCRATCH));
+ SETcc(CC_Z, R(RSCRATCH3));
+ LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0));
+ int shiftAmount = 30;
+ if (retriveCV || carryUsed)
+ {
+ LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0));
+ shiftAmount = carryOnly ? 29 : 28;
+ }
+ SHL(32, R(RSCRATCH), Imm8(shiftAmount));
- AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
- OR(32, R(RCPSR), R(RSCRATCH));
+ AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
+ OR(32, R(RCPSR), R(RSCRATCH));
+ }
+ else
+ {
+ SHL(32, R(RSCRATCH2), Imm8(carryOnly ? 29 : 28));
+ AND(32, R(RCPSR), Imm32(0xFFFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
+ OR(32, R(RCPSR), R(RSCRATCH2));
+ }
}
// always uses RSCRATCH, RSCRATCH2 only if S == true
@@ -523,7 +548,8 @@ void Compiler::T_Comp_ShiftImm()
if (shifted != rd)
MOV(32, rd, shifted);
- TEST(32, rd, rd);
+ if (FlagsNZRequired())
+ TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
@@ -557,7 +583,8 @@ void Compiler::T_Comp_ALU_Imm8()
{
case 0x0:
MOV(32, rd, imm);
- TEST(32, rd, rd);
+ if (FlagsNZRequired())
+ TEST(32, rd, rd);
Comp_RetriveFlags(false, false, false);
return;
case 0x1:
@@ -607,7 +634,8 @@ void Compiler::T_Comp_ALU()
int shiftOp = op == 0x7 ? 3 : op - 0x2;
bool carryUsed;
OpArg shifted = Comp_RegShiftReg(shiftOp, rs, rd, true, carryUsed);
- TEST(32, shifted, shifted);
+ if (FlagsNZRequired())
+ TEST(32, shifted, shifted);
MOV(32, rd, shifted);
Comp_RetriveFlags(false, false, true);
}
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index ab13cb6..6abb2bb 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -342,6 +342,11 @@ const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = {
};
#undef F
+bool Compiler::CanCompile(bool thumb, u16 kind)
+{
+ return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
+}
+
void Compiler::Reset()
{
memset(ResetStart, 0xcc, CodeMemSize);
@@ -380,11 +385,15 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
// TODO: this is ugly as a whole, do better
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
+ printf("block start %d\n", Thumb);
+
for (int i = 0; i < instrsCount; i++)
{
R15 += Thumb ? 2 : 4;
CurInstr = instrs[i];
+ printf("%x %d %d %d\n", CurInstr.Instr, CurInstr.SetFlags, CurInstr.Info.WriteFlags, CurInstr.Info.ReadFlags);
+
CompileFunc comp = Thumb
? T_Comp[CurInstr.Info.Kind]
: A_Comp[CurInstr.Info.Kind];
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
index 3151cbc..8861884 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -29,6 +29,8 @@ public:
void LoadReg(int reg, Gen::X64Reg nativeReg);
void SaveReg(int reg, Gen::X64Reg nativeReg);
+ bool CanCompile(bool thumb, u16 kind);
+
typedef void (Compiler::*CompileFunc)();
void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false);
@@ -64,7 +66,6 @@ public:
void A_Comp_BranchImm();
void A_Comp_BranchXchangeReg();
-
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALU_Imm8();
@@ -121,6 +122,9 @@ public:
void LoadCPSR();
void SaveCPSR();
+ bool FlagsNZRequired()
+ { return CurInstr.SetFlags & 0xC; }
+
Gen::FixupBranch CheckCondition(u32 cond);
Gen::OpArg MapReg(int reg)
diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp
index 4813799..ea6d827 100644
--- a/src/ARM_InstrInfo.cpp
+++ b/src/ARM_InstrInfo.cpp
@@ -5,7 +5,7 @@
namespace ARMInstrInfo
{
-#define ak(x) ((x) << 13)
+#define ak(x) ((x) << 18)
enum {
A_Read0 = 1 << 0,
@@ -26,69 +26,81 @@ enum {
A_Link = 1 << 10,
A_UnkOnARM7 = 1 << 11,
+
+ A_SetNZ = 1 << 12,
+ A_SetCV = 1 << 13,
+ A_SetMaybeC = 1 << 14,
+ A_MulFlags = 1 << 15,
+ A_ReadC = 1 << 16,
+ A_RRXReadC = 1 << 17,
};
#define A_BIOP A_Read16
#define A_MONOOP 0
-#define A_IMPLEMENT_ALU_OP(x,k) \
- const u32 A_##x##_IMM = A_Write12 | A_##k | ak(ak_##x##_IMM); \
- const u32 A_##x##_REG_LSL_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \
- const u32 A_##x##_REG_LSR_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \
- const u32 A_##x##_REG_ASR_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \
- const u32 A_##x##_REG_ROR_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \
- const u32 A_##x##_REG_LSL_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \
- const u32 A_##x##_REG_LSR_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \
- const u32 A_##x##_REG_ASR_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \
- const u32 A_##x##_REG_ROR_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG); \
+#define A_ARITH A_SetCV
+#define A_LOGIC A_SetMaybeC
+#define A_ARITH_IMM A_SetCV
+#define A_LOGIC_IMM 0
+
+#define A_IMPLEMENT_ALU_OP(x,k,a,c) \
+ const u32 A_##x##_IMM = A_Write12 | c | A_##k | ak(ak_##x##_IMM); \
+ const u32 A_##x##_REG_LSL_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \
+ const u32 A_##x##_REG_LSR_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \
+ const u32 A_##x##_REG_ASR_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \
+ const u32 A_##x##_REG_ROR_IMM = A_RRXReadC | A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \
+ const u32 A_##x##_REG_LSL_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \
+ const u32 A_##x##_REG_LSR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \
+ const u32 A_##x##_REG_ASR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \
+ const u32 A_##x##_REG_ROR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG); \
\
- const u32 A_##x##_IMM_S = A_Write12 | A_##k | ak(ak_##x##_IMM_S); \
- const u32 A_##x##_REG_LSL_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM_S); \
- const u32 A_##x##_REG_LSR_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM_S); \
- const u32 A_##x##_REG_ASR_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM_S); \
- const u32 A_##x##_REG_ROR_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM_S); \
- const u32 A_##x##_REG_LSL_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG_S); \
- const u32 A_##x##_REG_LSR_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG_S); \
- const u32 A_##x##_REG_ASR_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG_S); \
- const u32 A_##x##_REG_ROR_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG_S);
-
-A_IMPLEMENT_ALU_OP(AND,BIOP)
-A_IMPLEMENT_ALU_OP(EOR,BIOP)
-A_IMPLEMENT_ALU_OP(SUB,BIOP)
-A_IMPLEMENT_ALU_OP(RSB,BIOP)
-A_IMPLEMENT_ALU_OP(ADD,BIOP)
-A_IMPLEMENT_ALU_OP(ADC,BIOP)
-A_IMPLEMENT_ALU_OP(SBC,BIOP)
-A_IMPLEMENT_ALU_OP(RSC,BIOP)
-A_IMPLEMENT_ALU_OP(ORR,BIOP)
-A_IMPLEMENT_ALU_OP(MOV,MONOOP)
-A_IMPLEMENT_ALU_OP(BIC,BIOP)
-A_IMPLEMENT_ALU_OP(MVN,MONOOP)
+ const u32 A_##x##_IMM_S = A_SetNZ | c | A_##a##_IMM | A_Write12 | A_##k | ak(ak_##x##_IMM_S); \
+ const u32 A_##x##_REG_LSL_IMM_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM_S); \
+ const u32 A_##x##_REG_LSR_IMM_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM_S); \
+ const u32 A_##x##_REG_ASR_IMM_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM_S); \
+ const u32 A_##x##_REG_ROR_IMM_S = A_RRXReadC | A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM_S); \
+ const u32 A_##x##_REG_LSL_REG_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG_S); \
+ const u32 A_##x##_REG_LSR_REG_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG_S); \
+ const u32 A_##x##_REG_ASR_REG_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG_S); \
+ const u32 A_##x##_REG_ROR_REG_S = A_SetNZ | c | A_##a | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG_S);
+
+A_IMPLEMENT_ALU_OP(AND,BIOP,LOGIC,0)
+A_IMPLEMENT_ALU_OP(EOR,BIOP,LOGIC,0)
+A_IMPLEMENT_ALU_OP(SUB,BIOP,ARITH,0)
+A_IMPLEMENT_ALU_OP(RSB,BIOP,ARITH,0)
+A_IMPLEMENT_ALU_OP(ADD,BIOP,ARITH,0)
+A_IMPLEMENT_ALU_OP(ADC,BIOP,ARITH,A_ReadC)
+A_IMPLEMENT_ALU_OP(SBC,BIOP,ARITH,A_ReadC)
+A_IMPLEMENT_ALU_OP(RSC,BIOP,ARITH,A_ReadC)
+A_IMPLEMENT_ALU_OP(ORR,BIOP,LOGIC,0)
+A_IMPLEMENT_ALU_OP(MOV,MONOOP,LOGIC,0)
+A_IMPLEMENT_ALU_OP(BIC,BIOP,LOGIC,0)
+A_IMPLEMENT_ALU_OP(MVN,MONOOP,LOGIC,0)
const u32 A_MOV_REG_LSL_IMM_DBG = A_MOV_REG_LSL_IMM;
-#define A_IMPLEMENT_ALU_TEST(x) \
- const u32 A_##x##_IMM = A_Read16 | A_Read0 | ak(ak_##x##_IMM); \
- const u32 A_##x##_REG_LSL_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \
- const u32 A_##x##_REG_LSR_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \
- const u32 A_##x##_REG_ASR_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \
- const u32 A_##x##_REG_ROR_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \
- const u32 A_##x##_REG_LSL_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \
- const u32 A_##x##_REG_LSR_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \
- const u32 A_##x##_REG_ASR_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \
- const u32 A_##x##_REG_ROR_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG);
-
-A_IMPLEMENT_ALU_TEST(TST)
-A_IMPLEMENT_ALU_TEST(TEQ)
-A_IMPLEMENT_ALU_TEST(CMP)
-A_IMPLEMENT_ALU_TEST(CMN)
-
-const u32 A_MUL = A_Write16 | A_Read0 | A_Read8 | ak(ak_MUL);
-const u32 A_MLA = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_MLA);
-const u32 A_UMULL = A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_UMULL);
-const u32 A_UMLAL = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_UMLAL);
-const u32 A_SMULL = A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_SMULL);
-const u32 A_SMLAL = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLAL);
+#define A_IMPLEMENT_ALU_TEST(x,a) \
+ const u32 A_##x##_IMM = A_SetNZ | A_Read16 | A_##a | A_Read0 | ak(ak_##x##_IMM); \
+ const u32 A_##x##_REG_LSL_IMM = A_SetNZ | A_Read16 | A_##a | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \
+ const u32 A_##x##_REG_LSR_IMM = A_SetNZ | A_Read16 | A_##a | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \
+ const u32 A_##x##_REG_ASR_IMM = A_SetNZ | A_Read16 | A_##a | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \
+ const u32 A_##x##_REG_ROR_IMM = A_RRXReadC | A_SetNZ | A_Read16 | A_##a | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \
+ const u32 A_##x##_REG_LSL_REG = A_SetNZ | A_Read16 | A_##a | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \
+ const u32 A_##x##_REG_LSR_REG = A_SetNZ | A_Read16 | A_##a | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \
+ const u32 A_##x##_REG_ASR_REG = A_SetNZ | A_Read16 | A_##a | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \
+ const u32 A_##x##_REG_ROR_REG = A_SetNZ | A_Read16 | A_##a | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG);
+
+A_IMPLEMENT_ALU_TEST(TST,LOGIC)
+A_IMPLEMENT_ALU_TEST(TEQ,LOGIC)
+A_IMPLEMENT_ALU_TEST(CMP,ARITH)
+A_IMPLEMENT_ALU_TEST(CMN,ARITH)
+
+const u32 A_MUL = A_MulFlags | A_Write16 | A_Read0 | A_Read8 | ak(ak_MUL);
+const u32 A_MLA = A_MulFlags | A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_MLA);
+const u32 A_UMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_UMULL);
+const u32 A_UMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_UMLAL);
+const u32 A_SMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_SMULL);
+const u32 A_SMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLAL);
const u32 A_SMLAxy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLALxy);
const u32 A_SMLAWy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLAWy);
const u32 A_SMULWy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULWy);
@@ -161,7 +173,7 @@ const u32 A_SVC = A_BranchAlways | A_Link | ak(ak_SVC);
// THUMB
-#define tk(x) ((x) << 16)
+#define tk(x) ((x) << 20)
enum {
T_Read0 = 1 << 0,
@@ -183,42 +195,47 @@ enum {
T_ReadR14 = 1 << 13,
T_WriteR14 = 1 << 14,
- T_PopPC = 1 << 15
+ T_PopPC = 1 << 15,
+
+ T_SetNZ = 1 << 16,
+ T_SetCV = 1 << 17,
+ T_SetMaybeC = 1 << 18,
+ T_ReadC = 1 << 19
};
-const u32 T_LSL_IMM = T_Write0 | T_Read3 | tk(tk_LSL_IMM);
-const u32 T_LSR_IMM = T_Write0 | T_Read3 | tk(tk_LSR_IMM);
-const u32 T_ASR_IMM = T_Write0 | T_Read3 | tk(tk_ASR_IMM);
-
-const u32 T_ADD_REG_ = T_Write0 | T_Read3 | T_Read6 | tk(tk_ADD_REG_);
-const u32 T_SUB_REG_ = T_Write0 | T_Read3 | T_Read6 | tk(tk_SUB_REG_);
-const u32 T_ADD_IMM_ = T_Write0 | T_Read3 | tk(tk_ADD_IMM_);
-const u32 T_SUB_IMM_ = T_Write0 | T_Read3 | tk(tk_SUB_IMM_);
-
-const u32 T_MOV_IMM = T_Write8 | tk(tk_MOV_IMM);
-const u32 T_CMP_IMM = T_Write8 | tk(tk_CMP_IMM);
-const u32 T_ADD_IMM = T_Write8 | T_Read8 | tk(tk_ADD_IMM);
-const u32 T_SUB_IMM = T_Write8 | T_Read8 | tk(tk_SUB_IMM);
-
-const u32 T_AND_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_AND_REG);
-const u32 T_EOR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_EOR_REG);
-const u32 T_LSL_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_LSL_REG);
-const u32 T_LSR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_LSR_REG);
-const u32 T_ASR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ASR_REG);
-const u32 T_ADC_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ADC_REG);
-const u32 T_SBC_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_SBC_REG);
-const u32 T_ROR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ROR_REG);
-const u32 T_TST_REG = T_Read0 | T_Read3 | tk(tk_TST_REG);
-const u32 T_NEG_REG = T_Write0 | T_Read3 | tk(tk_NEG_REG);
-const u32 T_CMP_REG = T_Read0 | T_Read3 | tk(tk_CMP_REG);
-const u32 T_CMN_REG = T_Read0 | T_Read3 | tk(tk_CMN_REG);
-const u32 T_ORR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ORR_REG);
-const u32 T_MUL_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_MUL_REG);
-const u32 T_BIC_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_BIC_REG);
-const u32 T_MVN_REG = T_Write0 | T_Read3 | tk(tk_MVN_REG);
+const u32 T_LSL_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_LSL_IMM);
+const u32 T_LSR_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_LSR_IMM);
+const u32 T_ASR_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_ASR_IMM);
+
+const u32 T_ADD_REG_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | T_Read6 | tk(tk_ADD_REG_);
+const u32 T_SUB_REG_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | T_Read6 | tk(tk_SUB_REG_);
+const u32 T_ADD_IMM_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_ADD_IMM_);
+const u32 T_SUB_IMM_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_SUB_IMM_);
+
+const u32 T_MOV_IMM = T_SetNZ | T_Write8 | tk(tk_MOV_IMM);
+const u32 T_CMP_IMM = T_SetNZ | T_SetCV | T_Write8 | tk(tk_CMP_IMM);
+const u32 T_ADD_IMM = T_SetNZ | T_SetCV | T_Write8 | T_Read8 | tk(tk_ADD_IMM);
+const u32 T_SUB_IMM = T_SetNZ | T_SetCV | T_Write8 | T_Read8 | tk(tk_SUB_IMM);
+
+const u32 T_AND_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_AND_REG);
+const u32 T_EOR_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_EOR_REG);
+const u32 T_LSL_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_LSL_REG);
+const u32 T_LSR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_LSR_REG);
+const u32 T_ASR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_ASR_REG);
+const u32 T_ADC_REG = T_ReadC | T_SetNZ | T_SetCV | T_Write0 | T_Read0 | T_Read3 | tk(tk_ADC_REG);
+const u32 T_SBC_REG = T_ReadC | T_SetNZ | T_SetCV | T_Write0 | T_Read0 | T_Read3 | tk(tk_SBC_REG);
+const u32 T_ROR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_ROR_REG);
+const u32 T_TST_REG = T_SetNZ | T_Read0 | T_Read3 | tk(tk_TST_REG);
+const u32 T_NEG_REG = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_NEG_REG);
+const u32 T_CMP_REG = T_SetNZ | T_SetCV | T_Read0 | T_Read3 | tk(tk_CMP_REG);
+const u32 T_CMN_REG = T_SetNZ | T_SetCV | T_Read0 | T_Read3 | tk(tk_CMN_REG);
+const u32 T_ORR_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_ORR_REG);
+const u32 T_MUL_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_MUL_REG);
+const u32 T_BIC_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_BIC_REG);
+const u32 T_MVN_REG = T_SetNZ | T_Write0 | T_Read3 | tk(tk_MVN_REG);
const u32 T_ADD_HIREG = T_WriteHi0 | T_ReadHi0 | T_ReadHi3 | tk(tk_ADD_HIREG);
-const u32 T_CMP_HIREG = T_ReadHi0 | T_ReadHi3 | tk(tk_CMP_HIREG);
+const u32 T_CMP_HIREG = T_SetNZ | T_SetCV | T_ReadHi0 | T_ReadHi3 | tk(tk_CMP_HIREG);
const u32 T_MOV_HIREG = T_WriteHi0 | T_ReadHi3 | tk(tk_MOV_HIREG);
const u32 T_ADD_PCREL = T_Write8 | tk(tk_ADD_PCREL);
@@ -268,10 +285,20 @@ const u32 T_SVC = T_BranchAlways | T_WriteR14 | tk(tk_SVC);
Info Decode(bool thumb, u32 num, u32 instr)
{
+ const u8 FlagsReadPerCond[7] = {
+ flag_Z,
+ flag_C,
+ flag_N,
+ flag_V,
+ flag_C | flag_Z,
+ flag_N | flag_V,
+ flag_Z | flag_N | flag_V};
+
Info res = {0};
if (thumb)
{
u32 data = THUMBInstrTable[(instr >> 6) & 0x3FF];
+ res.Kind = (data >> 20) & 0x3F;
if (data & T_Read0)
res.SrcRegs |= 1 << (instr & 0x7);
@@ -309,7 +336,18 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (data & T_PopPC && instr & (1 << 8))
res.DstRegs |= 1 << 15;
- res.Kind = (data >> 16) & 0x3F;
+ if (data & T_SetNZ)
+ res.WriteFlags |= flag_N | flag_Z;
+ if (data & T_SetCV)
+ res.WriteFlags |= flag_C | flag_V;
+ if (data & T_SetMaybeC)
+ res.WriteFlags |= flag_C << 4;
+ if (data & T_ReadC)
+ res.ReadFlags |= flag_C;
+
+ if (res.Kind == tk_BCOND)
+ res.ReadFlags |= FlagsReadPerCond[(instr >> 9) & 0x7];
+
res.EndBlock = res.Branches();
return res;
@@ -323,7 +361,7 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (data & A_UnkOnARM7 && num != 0)
data = A_UNK;
- res.Kind = (data >> 13) & 0x1FF;
+ res.Kind = (data >> 18) & 0x1FF;
if (res.Kind == ak_MCR)
{
@@ -382,6 +420,26 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (res.Kind == ak_LDM)
res.DstRegs |= instr & (1 << 15); // this is right
+ if (data & A_SetNZ)
+ res.WriteFlags |= flag_N | flag_Z;
+ if (data & A_SetCV)
+ res.WriteFlags |= flag_C | flag_V;
+ if (data & A_SetMaybeC)
+ res.WriteFlags |= flag_C << 4;
+ if ((data & A_MulFlags) && (instr & (1 << 20)))
+ res.WriteFlags |= flag_N | flag_Z;
+ if (data & A_ReadC)
+ res.ReadFlags |= flag_C;
+ if ((data & A_RRXReadC) && !((instr >> 7) & 0x1F))
+ res.ReadFlags |= flag_C;
+
+ if ((instr >> 28) < 0xE)
+ {
+ // make non conditional flag sets conditional
+ res.WriteFlags = res.WriteFlags | (res.WriteFlags << 4);
+ res.ReadFlags |= FlagsReadPerCond[instr >> 29];
+ }
+
res.EndBlock |= res.Branches();
return res;
diff --git a/src/ARM_InstrInfo.h b/src/ARM_InstrInfo.h
index 4fe9b10..5336837 100644
--- a/src/ARM_InstrInfo.h
+++ b/src/ARM_InstrInfo.h
@@ -215,11 +215,24 @@ enum
tk_Count
};
+enum
+{
+ flag_N = 1 << 3,
+ flag_Z = 1 << 2,
+ flag_C = 1 << 1,
+ flag_V = 1 << 0,
+};
+
struct Info
{
u16 DstRegs, SrcRegs;
u16 Kind;
+ u8 ReadFlags;
+ // lower 4 bits - set always
+ // upper 4 bits - might set flag
+ u8 WriteFlags;
+
bool EndBlock;
bool Branches()
{
diff --git a/src/libui_sdl/main.cpp b/src/libui_sdl/main.cpp
index 0066668..c3db88d 100644
--- a/src/libui_sdl/main.cpp
+++ b/src/libui_sdl/main.cpp
@@ -2675,6 +2675,8 @@ void RecreateMainWindow(bool opengl)
int main(int argc, char** argv)
{
+ freopen("miauz.txt", "w", stdout);
+
srand(time(NULL));
printf("melonDS " MELONDS_VERSION "\n");