/* Copyright 2016-2022 melonDS team, RSDuck This file is part of melonDS. melonDS is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. melonDS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with melonDS. If not, see http://www.gnu.org/licenses/. */ #include "ARMJIT_Compiler.h" #include "../ARMJIT_Internal.h" #include "../ARMInterpreter.h" #include "../ARMJIT.h" #include "../NDS.h" #if defined(__SWITCH__) #include extern char __start__; #elif defined(_WIN32) #include #else #include #include #endif #include using namespace Arm64Gen; extern "C" void ARM_Ret(); namespace melonDS { /* Recompiling classic ARM to ARMv8 code is at the same time easier and trickier than compiling to a less related architecture like x64. At one hand you can translate a lot of instructions directly. But at the same time, there are a ton of exceptions, like for example ADD and SUB can't have a RORed second operand on ARMv8. While writing a JIT when an instruction is recompiled into multiple ones not to write back until you've read all the other operands! */ template <> const ARM64Reg RegisterCache::NativeRegAllocOrder[] = { W19, W20, W21, W22, W23, W24, W25, W8, W9, W10, W11, W12, W13, W14, W15 }; template <> const int RegisterCache::NativeRegsAvailable = 15; const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15}); const int JitMemSize = 16 * 1024 * 1024; #ifndef __SWITCH__ u8 JitMem[JitMemSize]; #endif void Compiler::MovePC() { ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4); } void Compiler::A_Comp_MRS() { Comp_AddCycles_C(); ARM64Reg rd = MapReg(CurInstr.A_Reg(12)); if (CurInstr.Instr & (1 << 22)) { ANDI2R(W5, RCPSR, 0x1F); MOVI2R(W3, 0); MOVI2R(W1, 15 - 8); BL(ReadBanked); MOV(rd, W3); } else MOV(rd, RCPSR); } void UpdateModeTrampoline(ARM* arm, u32 oldmode, u32 newmode) { arm->UpdateMode(oldmode, newmode); } void Compiler::A_Comp_MSR() { Comp_AddCycles_C(); ARM64Reg val; if (CurInstr.Instr & (1 << 25)) { val = W0; MOVI2R(val, melonDS::ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E))); } else { val = MapReg(CurInstr.A_Reg(0)); } u32 mask = 0; if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF; if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00; if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000; if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000; if (CurInstr.Instr & (1 << 22)) { ANDI2R(W5, RCPSR, 0x1F); MOVI2R(W3, 0); MOVI2R(W1, 15 - 8); BL(ReadBanked); MOVI2R(W1, mask); MOVI2R(W2, mask & 0xFFFFFF00); ANDI2R(W5, RCPSR, 0x1F); CMP(W5, 0x10); CSEL(W1, W2, W1, CC_EQ); BIC(W3, W3, W1); AND(W0, val, W1); ORR(W3, W3, W0); MOVI2R(W1, 15 - 8); BL(WriteBanked); } else { mask &= 0xFFFFFFDF; CPSRDirty = true; if ((mask & 0xFF) == 0) { ANDI2R(RCPSR, RCPSR, ~mask); ANDI2R(W0, val, mask); ORR(RCPSR, RCPSR, W0); } else { MOVI2R(W2, mask); MOVI2R(W3, mask & 0xFFFFFF00); ANDI2R(W1, RCPSR, 0x1F); // W1 = first argument CMP(W1, 0x10); CSEL(W2, W3, W2, CC_EQ); BIC(RCPSR, RCPSR, W2); AND(W0, val, W2); ORR(RCPSR, RCPSR, W0); MOV(W2, RCPSR); MOV(X0, RCPU); PushRegs(true, true); QuickCallFunction(X3, UpdateModeTrampoline); PopRegs(true, true); } } } void Compiler::PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload) { BitSet32 loadedRegs(RegCache.LoadedRegs); if (saveHiRegs) { BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); for (int reg : hiRegsLoaded) { if (Thumb || CurInstr.Cond() == 0xE) RegCache.UnloadRegister(reg); else SaveReg(reg, RegCache.Mapping[reg]); // prevent saving the register twice loadedRegs[reg] = false; } } for (int reg : loadedRegs) { if (CallerSavedPushRegs[RegCache.Mapping[reg]] && (saveRegsToBeChanged || !((1<= 8 && reg < 15) || (CallerSavedPushRegs[RegCache.Mapping[reg]] && (saveRegsToBeChanged || !((1< 8) { Log(LogLevel::Error, "couldn't find unmapped place for jit memory\n"); JitRXStart = NULL; } } assert(JitRXStart != NULL); bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize)); assert(succeded); succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize, Perm_Rx)); assert(succeded); succeded = R_SUCCEEDED(svcMapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize)); assert(succeded); virtmemUnlock(); SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart); JitMemMainSize = JitMemSize; #else #ifdef _WIN32 SYSTEM_INFO sysInfo; GetSystemInfo(&sysInfo); u64 pageSize = (u64)sysInfo.dwPageSize; #else u64 pageSize = sysconf(_SC_PAGE_SIZE); #endif u8* pageAligned = (u8*)(((u64)JitMem & ~(pageSize - 1)) + pageSize); u64 alignedSize = (((u64)JitMem + sizeof(JitMem)) & ~(pageSize - 1)) - (u64)pageAligned; #if defined(_WIN32) DWORD dummy; VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy); #elif defined(__APPLE__) pageAligned = (u8*)mmap(NULL, 1024*1024*16, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT,-1, 0); nds.JIT.JitEnableWrite(); #else mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE); #endif SetCodeBase(pageAligned, pageAligned); JitMemMainSize = alignedSize; #endif SetCodePtr(0); for (int i = 0; i < 3; i++) { JumpToFuncs9[i] = Gen_JumpTo9(i); JumpToFuncs7[i] = Gen_JumpTo7(i); } /* W4 - whether the register was written to W5 - mode W1 - reg num W3 - in/out value of reg */ { ReadBanked = GetRXPtr(); ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2)); CMP(W5, 0x11); FixupBranch fiq = B(CC_EQ); SUBS(W1, W1, 13 - 8); ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2)); FixupBranch notEverything = B(CC_LT); CMP(W5, 0x12); FixupBranch irq = B(CC_EQ); CMP(W5, 0x13); FixupBranch svc = B(CC_EQ); CMP(W5, 0x17); FixupBranch abt = B(CC_EQ); CMP(W5, 0x1B); FixupBranch und = B(CC_EQ); SetJumpTarget(notEverything); RET(); SetJumpTarget(fiq); LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ)); RET(); SetJumpTarget(irq); LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ)); RET(); SetJumpTarget(svc); LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC)); RET(); SetJumpTarget(abt); LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT)); RET(); SetJumpTarget(und); LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND)); RET(); } { WriteBanked = GetRXPtr(); ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2)); CMP(W5, 0x11); FixupBranch fiq = B(CC_EQ); SUBS(W1, W1, 13 - 8); ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2)); FixupBranch notEverything = B(CC_LT); CMP(W5, 0x12); FixupBranch irq = B(CC_EQ); CMP(W5, 0x13); FixupBranch svc = B(CC_EQ); CMP(W5, 0x17); FixupBranch abt = B(CC_EQ); CMP(W5, 0x1B); FixupBranch und = B(CC_EQ); SetJumpTarget(notEverything); MOVI2R(W4, 0); RET(); SetJumpTarget(fiq); STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ)); MOVI2R(W4, 1); RET(); SetJumpTarget(irq); STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ)); MOVI2R(W4, 1); RET(); SetJumpTarget(svc); STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC)); MOVI2R(W4, 1); RET(); SetJumpTarget(abt); STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT)); MOVI2R(W4, 1); RET(); SetJumpTarget(und); STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND)); MOVI2R(W4, 1); RET(); } for (int consoleType = 0; consoleType < 2; consoleType++) { for (int num = 0; num < 2; num++) { for (int size = 0; size < 3; size++) { for (int reg = 0; reg < 32; reg++) { if (!(reg == W4 || (reg >= W8 && reg <= W15) || (reg >= W19 && reg <= W25))) continue; ARM64Reg rdMapped = (ARM64Reg)reg; PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr(); if (num == 0) { MOV(X1, RCPU); MOV(W2, rdMapped); } else { MOV(W1, rdMapped); } ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs); if (consoleType == 0) { switch ((8 << size) | num) { case 32: QuickCallFunction(X3, SlowWrite9); break; case 33: QuickCallFunction(X3, SlowWrite7); break; case 16: QuickCallFunction(X3, SlowWrite9); break; case 17: QuickCallFunction(X3, SlowWrite7); break; case 8: QuickCallFunction(X3, SlowWrite9); break; case 9: QuickCallFunction(X3, SlowWrite7); break; } } else { switch ((8 << size) | num) { case 32: QuickCallFunction(X3, SlowWrite9); break; case 33: QuickCallFunction(X3, SlowWrite7); break; case 16: QuickCallFunction(X3, SlowWrite9); break; case 17: QuickCallFunction(X3, SlowWrite7); break; case 8: QuickCallFunction(X3, SlowWrite9); break; case 9: QuickCallFunction(X3, SlowWrite7); break; } } ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs); RET(); for (int signextend = 0; signextend < 2; signextend++) { PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr(); if (num == 0) MOV(X1, RCPU); ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs); if (consoleType == 0) { switch ((8 << size) | num) { case 32: QuickCallFunction(X3, SlowRead9); break; case 33: QuickCallFunction(X3, SlowRead7); break; case 16: QuickCallFunction(X3, SlowRead9); break; case 17: QuickCallFunction(X3, SlowRead7); break; case 8: QuickCallFunction(X3, SlowRead9); break; case 9: QuickCallFunction(X3, SlowRead7); break; } } else { switch ((8 << size) | num) { case 32: QuickCallFunction(X3, SlowRead9); break; case 33: QuickCallFunction(X3, SlowRead7); break; case 16: QuickCallFunction(X3, SlowRead9); break; case 17: QuickCallFunction(X3, SlowRead7); break; case 8: QuickCallFunction(X3, SlowRead9); break; case 9: QuickCallFunction(X3, SlowRead7); break; } } ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs); if (size == 32) MOV(rdMapped, W0); else if (signextend) SBFX(rdMapped, W0, 0, 8 << size); else UBFX(rdMapped, W0, 0, 8 << size); RET(); } } } } } FlushIcache(); JitMemSecondarySize = 1024*1024*4; JitMemMainSize -= GetCodeOffset(); JitMemMainSize -= JitMemSecondarySize; SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr()); } Compiler::~Compiler() { #ifdef __SWITCH__ if (JitRWStart != NULL) { bool succeded = R_SUCCEEDED(svcUnmapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize)); assert(succeded); succeded = R_SUCCEEDED(svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize)); assert(succeded); free(JitRWBase); } #endif } void Compiler::LoadCycles() { LDR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles)); } void Compiler::SaveCycles() { STR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles)); } void Compiler::LoadReg(int reg, ARM64Reg nativeReg) { if (reg == 15) MOVI2R(nativeReg, R15); else LDR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R) + reg*4); } void Compiler::SaveReg(int reg, ARM64Reg nativeReg) { STR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R) + reg*4); } void Compiler::LoadCPSR() { assert(!CPSRDirty); LDR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR)); } void Compiler::SaveCPSR(bool markClean) { if (CPSRDirty) { STR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR)); CPSRDirty = CPSRDirty && !markClean; } } FixupBranch Compiler::CheckCondition(u32 cond) { if (cond >= 0x8) { LSR(W1, RCPSR, 28); MOVI2R(W2, 1); LSLV(W2, W2, W1); ANDI2R(W2, W2, ARM::ConditionTable[cond], W3); return CBZ(W2); } else { u8 bit = (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1))); if (cond & 1) return TBNZ(RCPSR, bit); else return TBZ(RCPSR, bit); } } #define F(x) &Compiler::A_Comp_##x const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] = { // AND F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), // EOR F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), // SUB F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), // RSB F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), // ADD F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), // ADC F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), // SBC F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), // RSC F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), // ORR F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), // MOV F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), // BIC F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), // MVN F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), // TST F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), // TEQ F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), // CMP F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), // CMN F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), // Mul F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short), // ARMv5 exclusives F(Clz), NULL, NULL, NULL, NULL, // STR F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), // STRB F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), // LDR F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), // LDRB F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), // STRH F(MemHD), F(MemHD), F(MemHD), F(MemHD), // LDRD NULL, NULL, NULL, NULL, // STRD NULL, NULL, NULL, NULL, // LDRH F(MemHD), F(MemHD), F(MemHD), F(MemHD), // LDRSB F(MemHD), F(MemHD), F(MemHD), F(MemHD), // LDRSH F(MemHD), F(MemHD), F(MemHD), F(MemHD), // Swap NULL, NULL, // LDM, STM F(LDM_STM), F(LDM_STM), // Branch F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg), // Special NULL, F(MSR), F(MSR), F(MRS), NULL, NULL, NULL, &Compiler::Nop }; #undef F #define F(x) &Compiler::T_Comp_##x const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = { // Shift imm F(ShiftImm), F(ShiftImm), F(ShiftImm), // Add/sub tri operand F(AddSub_), F(AddSub_), F(AddSub_), F(AddSub_), // 8 bit imm F(ALUImm8), F(ALUImm8), F(ALUImm8), F(ALUImm8), // ALU F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), // ALU hi reg F(ALU_HiReg), F(ALU_HiReg), F(ALU_HiReg), // PC/SP relative ops F(RelAddr), F(RelAddr), F(AddSP), // LDR PC rel F(LoadPCRel), // LDR/STR reg offset F(MemReg), F(MemReg), F(MemReg), F(MemReg), // LDR/STR sign extended, half F(MemRegHalf), F(MemRegHalf), F(MemRegHalf), F(MemRegHalf), // LDR/STR imm offset F(MemImm), F(MemImm), F(MemImm), F(MemImm), // LDR/STR half imm offset F(MemImmHalf), F(MemImmHalf), // LDR/STR sp rel F(MemSPRel), F(MemSPRel), // PUSH/POP F(PUSH_POP), F(PUSH_POP), // LDMIA, STMIA F(LDMIA_STMIA), F(LDMIA_STMIA), // Branch F(BCOND), F(BranchXchangeReg), F(BranchXchangeReg), F(B), F(BL_LONG_1), F(BL_LONG_2), // Unk, SVC NULL, NULL, F(BL_Merged) }; bool Compiler::CanCompile(bool thumb, u16 kind) { return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL; } void Compiler::Comp_BranchSpecialBehaviour(bool taken) { if (taken && CurInstr.BranchFlags & branch_IdleBranch) { MOVI2R(W0, 1); STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop)); } if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken) || (CurInstr.BranchFlags & branch_FollowCondTaken && !taken)) { RegCache.PrepareExit(); if (ConstantCycles) ADD(RCycles, RCycles, ConstantCycles); QuickTailCall(X0, ARM_Ret); } } JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemInstr) { if (JitMemMainSize - GetCodeOffset() < 1024 * 16) { Log(LogLevel::Debug, "JIT near memory full, resetting...\n"); NDS.JIT.ResetBlockCache(); } if ((JitMemMainSize + JitMemSecondarySize) - OtherCodeRegion < 1024 * 8) { Log(LogLevel::Debug, "JIT far memory full, resetting...\n"); NDS.JIT.ResetBlockCache(); } JitBlockEntry res = (JitBlockEntry)GetRXPtr(); Thumb = thumb; Num = cpu->Num; CurCPU = cpu; ConstantCycles = 0; RegCache = RegisterCache(this, instrs, instrsCount, true); CPSRDirty = false; if (hasMemInstr) MOVP2R(RMemBase, Num == 0 ? NDS.JIT.Memory.FastMem9Start : NDS.JIT.Memory.FastMem7Start); for (int i = 0; i < instrsCount; i++) { CurInstr = instrs[i]; R15 = CurInstr.Addr + (Thumb ? 4 : 8); CodeRegion = R15 >> 24; CompileFunc comp = Thumb ? T_Comp[CurInstr.Info.Kind] : A_Comp[CurInstr.Info.Kind]; Exit = i == (instrsCount - 1) || (CurInstr.BranchFlags & branch_FollowCondNotTaken); //printf("%x instr %x regs: r%x w%x n%x flags: %x %x %x\n", R15, CurInstr.Instr, CurInstr.Info.SrcRegs, CurInstr.Info.DstRegs, CurInstr.Info.ReadFlags, CurInstr.Info.NotStrictlyNeeded, CurInstr.Info.WriteFlags, CurInstr.SetFlags); bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE; if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional))) { MOVI2R(W0, R15); STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15])); if (comp == NULL) { MOVI2R(W0, CurInstr.Instr); STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CurInstr)); } if (Num == 0) { MOVI2R(W0, (s32)CurInstr.CodeCycles); STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles)); } } if (comp == NULL) { SaveCycles(); SaveCPSR(); RegCache.Flush(); } else RegCache.Prepare(Thumb, i); if (Thumb) { if (comp == NULL) { MOV(X0, RCPU); QuickCallFunction(X1, InterpretTHUMB[CurInstr.Info.Kind]); } else { (this->*comp)(); } } else { u32 cond = CurInstr.Cond(); if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM) { if (comp) (this->*comp)(); else { MOV(X0, RCPU); QuickCallFunction(X1, ARMInterpreter::A_BLX_IMM); } } else if (cond == 0xF) { Comp_AddCycles_C(); } else { IrregularCycles = comp == NULL; FixupBranch skipExecute; if (cond < 0xE) skipExecute = CheckCondition(cond); if (comp == NULL) { MOV(X0, RCPU); QuickCallFunction(X1, InterpretARM[CurInstr.Info.Kind]); } else { (this->*comp)(); } Comp_BranchSpecialBehaviour(true); if (cond < 0xE) { if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken)) { FixupBranch skipNop = B(); SetJumpTarget(skipExecute); if (IrregularCycles) Comp_AddCycles_C(true); Comp_BranchSpecialBehaviour(false); SetJumpTarget(skipNop); } else { SetJumpTarget(skipExecute); } } } } if (comp == NULL) { LoadCycles(); LoadCPSR(); } } RegCache.Flush(); if (ConstantCycles) ADD(RCycles, RCycles, ConstantCycles); QuickTailCall(X0, ARM_Ret); FlushIcache(); return res; } void Compiler::Reset() { LoadStorePatches.clear(); SetCodePtr(0); OtherCodeRegion = JitMemMainSize; const u32 brk_0 = 0xD4200000; for (int i = 0; i < (JitMemMainSize + JitMemSecondarySize) / 4; i++) *(((u32*)GetRWPtr()) + i) = brk_0; } void Compiler::Comp_AddCycles_C(bool forceNonConstant) { s32 cycles = Num ? NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3] : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); if (forceNonConstant) ConstantCycles += cycles; else ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CI(u32 numI) { IrregularCycles = true; s32 cycles = (Num ? NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI; if (Thumb || CurInstr.Cond() == 0xE) ConstantCycles += cycles; else ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift) { IrregularCycles = true; s32 cycles = (Num ? NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c; ADD(RCycles, RCycles, cycles); if (Thumb || CurInstr.Cond() >= 0xE) ConstantCycles += cycles; else ADD(RCycles, RCycles, cycles); } void Compiler::Comp_AddCycles_CDI() { if (Num == 0) Comp_AddCycles_CD(); else { IrregularCycles = true; s32 cycles; s32 numC = NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]; s32 numD = CurInstr.DataCycles; if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM { if (CodeRegion == 0x02) cycles = numC + numD; else { numC++; cycles = std::max(numC + numD - 3, std::max(numC, numD)); } } else if (CodeRegion == 0x02) { numD++; cycles = std::max(numC + numD - 3, std::max(numC, numD)); } else { cycles = numC + numD + 1; } if (!Thumb && CurInstr.Cond() < 0xE) ADD(RCycles, RCycles, cycles); else ConstantCycles += cycles; } } void Compiler::Comp_AddCycles_CD() { u32 cycles = 0; if (Num == 0) { s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles; s32 numD = CurInstr.DataCycles; //if (DataRegion != CodeRegion) cycles = std::max(numC + numD - 6, std::max(numC, numD)); IrregularCycles = cycles != numC; } else { s32 numC = NDS.ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]; s32 numD = CurInstr.DataCycles; if ((CurInstr.DataRegion >> 24) == 0x02) { if (CodeRegion == 0x02) cycles += numC + numD; else cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else if (CodeRegion == 0x02) { cycles += std::max(numC + numD - 3, std::max(numC, numD)); } else { cycles += numC + numD; } IrregularCycles = true; } if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles) ADD(RCycles, RCycles, cycles); else ConstantCycles += cycles; } }