diff options
Diffstat (limited to 'src/ARMJIT_x64/ARMJIT_Compiler.cpp')
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.cpp | 899 |
1 files changed, 899 insertions, 0 deletions
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp new file mode 100644 index 0000000..d8bdd56 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -0,0 +1,899 @@ +#include "ARMJIT_Compiler.h" + +#include "../ARMInterpreter.h" +#include "../Config.h" + +#include <assert.h> + +#include "../dolphin/CommonFuncs.h" + +#ifdef _WIN32 +#include <windows.h> +#else +#include <sys/mman.h> +#include <unistd.h> +#endif + +using namespace Gen; + +extern "C" void ARM_Ret(); + +namespace ARMJIT +{ +template <> +const X64Reg RegisterCache<Compiler, X64Reg>::NativeRegAllocOrder[] = +{ +#ifdef _WIN32 + RBX, RSI, RDI, R12, R13, R14, // callee saved + R10, R11, // caller saved +#else + RBX, R12, R13, R14, // callee saved, this is sad + R9, R10, R11, // caller saved +#endif +}; +template <> +const int RegisterCache<Compiler, X64Reg>::NativeRegsAvailable = +#ifdef _WIN32 + 8 +#else + 7 +#endif +; + +#ifdef _WIN32 +const BitSet32 CallerSavedPushRegs({R10, R11}); +#else +const BitSet32 CallerSavedPushRegs({R9, R10, R11}); +#endif + +void Compiler::PushRegs(bool saveHiRegs) +{ + BitSet32 loadedRegs(RegCache.LoadedRegs); + + if (saveHiRegs) + { + BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); + for (int reg : hiRegsLoaded) + { + if (Thumb || CurInstr.Cond() == 0xE) + RegCache.UnloadRegister(reg); + else + SaveReg(reg, RegCache.Mapping[reg]); + // prevent saving the register twice + loadedRegs[reg] = false; + } + } + + for (int reg : loadedRegs) + if (BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED) + SaveReg(reg, RegCache.Mapping[reg]); +} + +void Compiler::PopRegs(bool saveHiRegs) +{ + BitSet32 loadedRegs(RegCache.LoadedRegs); + for (int reg : loadedRegs) + { + if ((saveHiRegs && reg >= 8 && reg < 15) + || BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED) + { + LoadReg(reg, RegCache.Mapping[reg]); + } + } +} + +void Compiler::A_Comp_MRS() +{ + Comp_AddCycles_C(); + + OpArg rd = MapReg(CurInstr.A_Reg(12)); + + if (CurInstr.Instr & (1 << 22)) + { + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + XOR(32, R(RSCRATCH3), R(RSCRATCH3)); + MOV(32, R(RSCRATCH2), Imm32(15 - 8)); + CALL(ReadBanked); + MOV(32, rd, R(RSCRATCH3)); + } + else + MOV(32, rd, R(RCPSR)); +} + +void Compiler::A_Comp_MSR() +{ + Comp_AddCycles_C(); + + OpArg val = CurInstr.Instr & (1 << 25) + ? Imm32(ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E))) + : MapReg(CurInstr.A_Reg(0)); + + u32 mask = 0; + if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF; + if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00; + if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000; + if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000; + + if (CurInstr.Instr & (1 << 22)) + { + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + XOR(32, R(RSCRATCH3), R(RSCRATCH3)); + MOV(32, R(RSCRATCH2), Imm32(15 - 8)); + CALL(ReadBanked); + + MOV(32, R(RSCRATCH2), Imm32(mask)); + MOV(32, R(RSCRATCH4), R(RSCRATCH2)); + AND(32, R(RSCRATCH4), Imm32(0xFFFFFF00)); + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + CMP(32, R(RSCRATCH), Imm8(0x10)); + CMOVcc(32, RSCRATCH2, R(RSCRATCH4), CC_E); + + MOV(32, R(RSCRATCH4), R(RSCRATCH2)); + NOT(32, R(RSCRATCH4)); + AND(32, R(RSCRATCH3), R(RSCRATCH4)); + + AND(32, R(RSCRATCH2), val); + OR(32, R(RSCRATCH3), R(RSCRATCH2)); + + MOV(32, R(RSCRATCH2), Imm32(15 - 8)); + CALL(WriteBanked); + } + else + { + mask &= 0xFFFFFFDF; + CPSRDirty = true; + + if ((mask & 0xFF) == 0) + { + AND(32, R(RCPSR), Imm32(~mask)); + if (!val.IsImm()) + { + MOV(32, R(RSCRATCH), val); + AND(32, R(RSCRATCH), Imm32(mask)); + OR(32, R(RCPSR), R(RSCRATCH)); + } + else + { + OR(32, R(RCPSR), Imm32(val.Imm32() & mask)); + } + } + else + { + MOV(32, R(RSCRATCH2), Imm32(mask)); + MOV(32, R(RSCRATCH3), R(RSCRATCH2)); + AND(32, R(RSCRATCH3), Imm32(0xFFFFFF00)); + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + CMP(32, R(RSCRATCH), Imm8(0x10)); + CMOVcc(32, RSCRATCH2, R(RSCRATCH3), CC_E); + + MOV(32, R(RSCRATCH3), R(RCPSR)); + + // I need you ANDN + MOV(32, R(RSCRATCH), R(RSCRATCH2)); + NOT(32, R(RSCRATCH)); + AND(32, R(RCPSR), R(RSCRATCH)); + + AND(32, R(RSCRATCH2), val); + OR(32, R(RCPSR), R(RSCRATCH2)); + + PushRegs(true); + + MOV(32, R(ABI_PARAM3), R(RCPSR)); + MOV(32, R(ABI_PARAM2), R(RSCRATCH3)); + MOV(64, R(ABI_PARAM1), R(RCPU)); + CALL((void*)&ARM::UpdateMode); + + PopRegs(true); + } + } +} + +/* + We'll repurpose this .bss memory + + */ +u8 CodeMemory[1024 * 1024 * 32]; + +Compiler::Compiler() +{ + { + #ifdef _WIN32 + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); + + u64 pageSize = (u64)sysInfo.dwPageSize; + #else + u64 pageSize = sysconf(_SC_PAGE_SIZE); + #endif + + u8* pageAligned = (u8*)(((u64)CodeMemory & ~(pageSize - 1)) + pageSize); + u64 alignedSize = (((u64)CodeMemory + sizeof(CodeMemory)) & ~(pageSize - 1)) - (u64)pageAligned; + + #ifdef _WIN32 + DWORD dummy; + VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy); + #else + mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE); + #endif + + ResetStart = pageAligned; + CodeMemSize = alignedSize; + } + + Reset(); + + { + // RSCRATCH mode + // RSCRATCH2 reg number + // RSCRATCH3 value in current mode + // ret - RSCRATCH3 + ReadBanked = (void*)GetWritableCodePtr(); + CMP(32, R(RSCRATCH), Imm8(0x11)); + FixupBranch fiq = J_CC(CC_E); + SUB(32, R(RSCRATCH2), Imm8(13 - 8)); + FixupBranch notEverything = J_CC(CC_L); + CMP(32, R(RSCRATCH), Imm8(0x12)); + FixupBranch irq = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x13)); + FixupBranch svc = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x17)); + FixupBranch abt = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x1B)); + FixupBranch und = J_CC(CC_E); + SetJumpTarget(notEverything); + RET(); + + SetJumpTarget(fiq); + MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_FIQ))); + RET(); + SetJumpTarget(irq); + MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_IRQ))); + RET(); + SetJumpTarget(svc); + MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_SVC))); + RET(); + SetJumpTarget(abt); + MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_ABT))); + RET(); + SetJumpTarget(und); + MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_UND))); + RET(); + } + { + // RSCRATCH mode + // RSCRATCH2 reg n + // RSCRATCH3 value + // carry flag set if the register isn't banked + WriteBanked = (void*)GetWritableCodePtr(); + CMP(32, R(RSCRATCH), Imm8(0x11)); + FixupBranch fiq = J_CC(CC_E); + SUB(32, R(RSCRATCH2), Imm8(13 - 8)); + FixupBranch notEverything = J_CC(CC_L); + CMP(32, R(RSCRATCH), Imm8(0x12)); + FixupBranch irq = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x13)); + FixupBranch svc = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x17)); + FixupBranch abt = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x1B)); + FixupBranch und = J_CC(CC_E); + SetJumpTarget(notEverything); + STC(); + RET(); + + SetJumpTarget(fiq); + MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_FIQ)), R(RSCRATCH3)); + CLC(); + RET(); + SetJumpTarget(irq); + MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_IRQ)), R(RSCRATCH3)); + CLC(); + RET(); + SetJumpTarget(svc); + MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_SVC)), R(RSCRATCH3)); + CLC(); + RET(); + SetJumpTarget(abt); + MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_ABT)), R(RSCRATCH3)); + CLC(); + RET(); + SetJumpTarget(und); + MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_UND)), R(RSCRATCH3)); + CLC(); + RET(); + } + + for (int consoleType = 0; consoleType < 2; consoleType++) + { + for (int num = 0; num < 2; num++) + { + for (int size = 0; size < 3; size++) + { + for (int reg = 0; reg < 16; reg++) + { + if (reg == RSCRATCH || reg == ABI_PARAM1 || reg == ABI_PARAM2 || reg == ABI_PARAM3) + { + PatchedStoreFuncs[consoleType][num][size][reg] = NULL; + PatchedLoadFuncs[consoleType][num][size][0][reg] = NULL; + PatchedLoadFuncs[consoleType][num][size][1][reg] = NULL; + continue; + } + + X64Reg rdMapped = (X64Reg)reg; + PatchedStoreFuncs[consoleType][num][size][reg] = GetWritableCodePtr(); + if (RSCRATCH3 != ABI_PARAM1) + MOV(32, R(ABI_PARAM1), R(RSCRATCH3)); + if (num == 0) + { + MOV(64, R(ABI_PARAM2), R(RCPU)); + MOV(32, R(ABI_PARAM3), R(rdMapped)); + } + else + { + MOV(32, R(ABI_PARAM2), R(rdMapped)); + } + ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8); + if (consoleType == 0) + { + switch ((8 << size) | num) + { + case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break; + case 33: ABI_CallFunction(SlowWrite7<u32, 0>); break; + case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break; + case 17: ABI_CallFunction(SlowWrite7<u16, 0>); break; + case 8: ABI_CallFunction(SlowWrite9<u8, 0>); break; + case 9: ABI_CallFunction(SlowWrite7<u8, 0>); break; + } + } + else + { + switch ((8 << size) | num) + { + case 32: ABI_CallFunction(SlowWrite9<u32, 1>); break; + case 33: ABI_CallFunction(SlowWrite7<u32, 1>); break; + case 16: ABI_CallFunction(SlowWrite9<u16, 1>); break; + case 17: ABI_CallFunction(SlowWrite7<u16, 1>); break; + case 8: ABI_CallFunction(SlowWrite9<u8, 1>); break; + case 9: ABI_CallFunction(SlowWrite7<u8, 1>); break; + } + } + ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8); + RET(); + + for (int signextend = 0; signextend < 2; signextend++) + { + PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetWritableCodePtr(); + if (RSCRATCH3 != ABI_PARAM1) + MOV(32, R(ABI_PARAM1), R(RSCRATCH3)); + if (num == 0) + MOV(64, R(ABI_PARAM2), R(RCPU)); + ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8); + if (consoleType == 0) + { + switch ((8 << size) | num) + { + case 32: ABI_CallFunction(SlowRead9<u32, 0>); break; + case 33: ABI_CallFunction(SlowRead7<u32, 0>); break; + case 16: ABI_CallFunction(SlowRead9<u16, 0>); break; + case 17: ABI_CallFunction(SlowRead7<u16, 0>); break; + case 8: ABI_CallFunction(SlowRead9<u8, 0>); break; + case 9: ABI_CallFunction(SlowRead7<u8, 0>); break; + } + } + else + { + switch ((8 << size) | num) + { + case 32: ABI_CallFunction(SlowRead9<u32, 1>); break; + case 33: ABI_CallFunction(SlowRead7<u32, 1>); break; + case 16: ABI_CallFunction(SlowRead9<u16, 1>); break; + case 17: ABI_CallFunction(SlowRead7<u16, 1>); break; + case 8: ABI_CallFunction(SlowRead9<u8, 1>); break; + case 9: ABI_CallFunction(SlowRead7<u8, 1>); break; + } + } + ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8); + if (signextend) + MOVSX(32, 8 << size, rdMapped, R(RSCRATCH)); + else + MOVZX(32, 8 << size, rdMapped, R(RSCRATCH)); + RET(); + } + } + } + } + } + + // move the region forward to prevent overwriting the generated functions + CodeMemSize -= GetWritableCodePtr() - ResetStart; + ResetStart = GetWritableCodePtr(); + + NearStart = ResetStart; + FarStart = ResetStart + 1024*1024*24; + + NearSize = FarStart - ResetStart; + FarSize = (ResetStart + CodeMemSize) - FarStart; +} + +void Compiler::LoadCPSR() +{ + assert(!CPSRDirty); + + MOV(32, R(RCPSR), MDisp(RCPU, offsetof(ARM, CPSR))); +} + +void Compiler::SaveCPSR(bool flagClean) +{ + if (CPSRDirty) + { + MOV(32, MDisp(RCPU, offsetof(ARM, CPSR)), R(RCPSR)); + if (flagClean) + CPSRDirty = false; + } +} + +void Compiler::LoadReg(int reg, X64Reg nativeReg) +{ + if (reg != 15) + MOV(32, R(nativeReg), MDisp(RCPU, offsetof(ARM, R[reg]))); + else + MOV(32, R(nativeReg), Imm32(R15)); +} + +void Compiler::SaveReg(int reg, X64Reg nativeReg) +{ + MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg)); +} + +// invalidates RSCRATCH and RSCRATCH3 +Gen::FixupBranch Compiler::CheckCondition(u32 cond) +{ + // hack, ldm/stm can get really big TODO: make this better + bool ldmStm = !Thumb && + (CurInstr.Info.Kind == ARMInstrInfo::ak_LDM || CurInstr.Info.Kind == ARMInstrInfo::ak_STM); + if (cond >= 0x8) + { + static_assert(RSCRATCH3 == ECX, "RSCRATCH has to be equal to ECX!"); + MOV(32, R(RSCRATCH3), R(RCPSR)); + SHR(32, R(RSCRATCH3), Imm8(28)); + MOV(32, R(RSCRATCH), Imm32(1)); + SHL(32, R(RSCRATCH), R(RSCRATCH3)); + TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond])); + + return J_CC(CC_Z, ldmStm); + } + else + { + // could have used a LUT, but then where would be the fun? + TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1))))); + + return J_CC(cond & 1 ? CC_NZ : CC_Z, ldmStm); + } +} + +#define F(x) &Compiler::x +const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] = +{ + // AND + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // EOR + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // SUB + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // RSB + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // ADD + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // ADC + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // SBC + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // RSC + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // ORR + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // MOV + F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), + F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), + // BIC + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // MVN + F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), + F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), + // TST + F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), + // TEQ + F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), + // CMP + F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), + // CMN + F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), + // Mul + F(A_Comp_MUL_MLA), F(A_Comp_MUL_MLA), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), NULL, NULL, NULL, NULL, NULL, + // ARMv5 stuff + F(A_Comp_CLZ), NULL, NULL, NULL, NULL, + // STR + F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), + // STRB + F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), + // LDR + F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), + // LDRB + F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), + // STRH + F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), + // LDRD, STRD never used by anything so they stay interpreted (by anything I mean the 5 games I checked) + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + // LDRH + F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), + // LDRSB + F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), + // LDRSH + F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), + // swap + NULL, NULL, + // LDM/STM + F(A_Comp_LDM_STM), F(A_Comp_LDM_STM), + // Branch + F(A_Comp_BranchImm), F(A_Comp_BranchImm), F(A_Comp_BranchImm), F(A_Comp_BranchXchangeReg), F(A_Comp_BranchXchangeReg), + // system stuff + NULL, F(A_Comp_MSR), F(A_Comp_MSR), F(A_Comp_MRS), NULL, NULL, NULL, + F(Nop) +}; + +const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = { + // Shift imm + F(T_Comp_ShiftImm), F(T_Comp_ShiftImm), F(T_Comp_ShiftImm), + // Three operand ADD/SUB + F(T_Comp_AddSub_), F(T_Comp_AddSub_), F(T_Comp_AddSub_), F(T_Comp_AddSub_), + // 8 bit imm + F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), + // general ALU + F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), + F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), + F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), + F(T_Comp_ALU), F(T_Comp_MUL), F(T_Comp_ALU), F(T_Comp_ALU), + // hi reg + F(T_Comp_ALU_HiReg), F(T_Comp_ALU_HiReg), F(T_Comp_ALU_HiReg), + // pc/sp relative + F(T_Comp_RelAddr), F(T_Comp_RelAddr), F(T_Comp_AddSP), + // LDR pcrel + F(T_Comp_LoadPCRel), + // LDR/STR reg offset + F(T_Comp_MemReg), F(T_Comp_MemReg), F(T_Comp_MemReg), F(T_Comp_MemReg), + // LDR/STR sign extended, half + F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), + // LDR/STR imm offset + F(T_Comp_MemImm), F(T_Comp_MemImm), F(T_Comp_MemImm), F(T_Comp_MemImm), + // LDR/STR half imm offset + F(T_Comp_MemImmHalf), F(T_Comp_MemImmHalf), + // LDR/STR sp rel + F(T_Comp_MemSPRel), F(T_Comp_MemSPRel), + // PUSH/POP + F(T_Comp_PUSH_POP), F(T_Comp_PUSH_POP), + // LDMIA, STMIA + F(T_Comp_LDMIA_STMIA), F(T_Comp_LDMIA_STMIA), + // Branch + F(T_Comp_BCOND), F(T_Comp_BranchXchangeReg), F(T_Comp_BranchXchangeReg), F(T_Comp_B), F(T_Comp_BL_LONG_1), F(T_Comp_BL_LONG_2), + // Unk, SVC + NULL, NULL, + F(T_Comp_BL_Merged) +}; +#undef F + +bool Compiler::CanCompile(bool thumb, u16 kind) +{ + return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL; +} + +void Compiler::Reset() +{ + memset(ResetStart, 0xcc, CodeMemSize); + SetCodePtr(ResetStart); + + NearCode = NearStart; + FarCode = FarStart; + + LoadStorePatches.clear(); +} + +bool Compiler::IsJITFault(u64 addr) +{ + return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory); +} + +void Compiler::Comp_SpecialBranchBehaviour(bool taken) +{ + if (taken && CurInstr.BranchFlags & branch_IdleBranch) + OR(8, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1)); + + if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken) + || (CurInstr.BranchFlags & branch_FollowCondTaken && !taken)) + { + RegCache.PrepareExit(); + + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + JMP((u8*)&ARM_Ret, true); + } +} + +JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount) +{ + if (NearSize - (NearCode - NearStart) < 1024 * 32) // guess... + { + printf("near reset\n"); + ResetBlockCache(); + } + if (FarSize - (FarCode - FarStart) < 1024 * 32) // guess... + { + printf("far reset\n"); + ResetBlockCache(); + } + + ConstantCycles = 0; + Thumb = thumb; + Num = cpu->Num; + CodeRegion = instrs[0].Addr >> 24; + CurCPU = cpu; + // CPSR might have been modified in a previous block + CPSRDirty = false; + + JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr(); + + RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount); + + for (int i = 0; i < instrsCount; i++) + { + CurInstr = instrs[i]; + R15 = CurInstr.Addr + (Thumb ? 4 : 8); + CodeRegion = R15 >> 24; + + Exit = i == instrsCount - 1 || (CurInstr.BranchFlags & branch_FollowCondNotTaken); + + CompileFunc comp = Thumb + ? T_Comp[CurInstr.Info.Kind] + : A_Comp[CurInstr.Info.Kind]; + + bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE; + if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional))) + { + MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15)); + if (comp == NULL) + { + MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles)); + MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr)); + + SaveCPSR(); + } + } + + if (comp != NULL) + RegCache.Prepare(Thumb, i); + else + RegCache.Flush(); + + if (Thumb) + { + if (comp == NULL) + { + MOV(64, R(ABI_PARAM1), R(RCPU)); + + ABI_CallFunction(InterpretTHUMB[CurInstr.Info.Kind]); + } + else + (this->*comp)(); + } + else + { + u32 cond = CurInstr.Cond(); + if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM) + { + if (comp) + (this->*comp)(); + else + { + MOV(64, R(ABI_PARAM1), R(RCPU)); + ABI_CallFunction(ARMInterpreter::A_BLX_IMM); + } + } + else if (cond == 0xF) + { + Comp_AddCycles_C(); + } + else + { + IrregularCycles = false; + + FixupBranch skipExecute; + if (cond < 0xE) + skipExecute = CheckCondition(cond); + + if (comp == NULL) + { + MOV(64, R(ABI_PARAM1), R(RCPU)); + + ABI_CallFunction(InterpretARM[CurInstr.Info.Kind]); + } + else + (this->*comp)(); + + Comp_SpecialBranchBehaviour(true); + + if (CurInstr.Cond() < 0xE) + { + if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken)) + { + FixupBranch skipFailed = J(); + SetJumpTarget(skipExecute); + + Comp_AddCycles_C(true); + + Comp_SpecialBranchBehaviour(false); + + SetJumpTarget(skipFailed); + } + else + SetJumpTarget(skipExecute); + } + + } + } + + if (comp == NULL) + LoadCPSR(); + } + + RegCache.Flush(); + + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + JMP((u8*)ARM_Ret, true); + + /*FILE* codeout = fopen("codeout", "a"); + fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr); + fwrite((u8*)res, GetWritableCodePtr() - (u8*)res, 1, codeout); + + fclose(codeout);*/ + + return res; +} + +void Compiler::Comp_AddCycles_C(bool forceNonConstant) +{ + s32 cycles = Num ? + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); + + if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant) + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + else + ConstantCycles += cycles; +} + +void Compiler::Comp_AddCycles_CI(u32 i) +{ + s32 cycles = (Num ? + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i; + + if (!Thumb && CurInstr.Cond() < 0xE) + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + else + ConstantCycles += cycles; +} + +void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add) +{ + s32 cycles = Num ? + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); + + if (!Thumb && CurInstr.Cond() < 0xE) + { + LEA(32, RSCRATCH, MDisp(i, add + cycles)); + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); + } + else + { + ConstantCycles += cycles; + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); + } +} + +void Compiler::Comp_AddCycles_CDI() +{ + if (Num == 0) + Comp_AddCycles_CD(); + else + { + IrregularCycles = true; + + s32 cycles; + + s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]; + s32 numD = CurInstr.DataCycles; + + if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM + { + if (CodeRegion == 0x02) + cycles = numC + numD; + else + { + numC++; + cycles = std::max(numC + numD - 3, std::max(numC, numD)); + } + } + else if (CodeRegion == 0x02) + { + numD++; + cycles = std::max(numC + numD - 3, std::max(numC, numD)); + } + else + { + cycles = numC + numD + 1; + } + + if (!Thumb && CurInstr.Cond() < 0xE) + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + else + ConstantCycles += cycles; + } +} + +void Compiler::Comp_AddCycles_CD() +{ + u32 cycles = 0; + if (Num == 0) + { + s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles; + s32 numD = CurInstr.DataCycles; + + //if (DataRegion != CodeRegion) + cycles = std::max(numC + numD - 6, std::max(numC, numD)); + + IrregularCycles = cycles != numC; + } + else + { + s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]; + s32 numD = CurInstr.DataCycles; + + if ((CurInstr.DataRegion >> 4) == 0x02) + { + if (CodeRegion == 0x02) + cycles += numC + numD; + else + cycles += std::max(numC + numD - 3, std::max(numC, numD)); + } + else if (CodeRegion == 0x02) + { + cycles += std::max(numC + numD - 3, std::max(numC, numD)); + } + else + { + cycles += numC + numD; + } + + IrregularCycles = true; + } + + if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE) + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + else + ConstantCycles += cycles; +} + +}
\ No newline at end of file |