aboutsummaryrefslogtreecommitdiff
path: root/src/ARMJIT_x64
diff options
context:
space:
mode:
authorRSDuck <rsduck@users.noreply.github.com>2020-06-30 23:50:41 +0200
committerRSDuck <rsduck@users.noreply.github.com>2020-06-30 23:50:41 +0200
commitc5381d2911d47fb1fcbd6ec27a83f5da3606c4bd (patch)
tree1dbf9eb1bbe418d14f07dc3a0e30821fb5deb258 /src/ARMJIT_x64
parentea6d03581b689738d0d1930b28d1588019cf4077 (diff)
reconcile DSi and JIT, fastmem for x64 and Windows
Diffstat (limited to 'src/ARMJIT_x64')
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp109
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.h14
-rw-r--r--src/ARMJIT_x64/ARMJIT_LoadStore.cpp632
3 files changed, 354 insertions, 401 deletions
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index 34c1c91..d8bdd56 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -40,6 +40,12 @@ const int RegisterCache<Compiler, X64Reg>::NativeRegsAvailable =
#endif
;
+#ifdef _WIN32
+const BitSet32 CallerSavedPushRegs({R10, R11});
+#else
+const BitSet32 CallerSavedPushRegs({R9, R10, R11});
+#endif
+
void Compiler::PushRegs(bool saveHiRegs)
{
BitSet32 loadedRegs(RegCache.LoadedRegs);
@@ -301,6 +307,107 @@ Compiler::Compiler()
RET();
}
+ for (int consoleType = 0; consoleType < 2; consoleType++)
+ {
+ for (int num = 0; num < 2; num++)
+ {
+ for (int size = 0; size < 3; size++)
+ {
+ for (int reg = 0; reg < 16; reg++)
+ {
+ if (reg == RSCRATCH || reg == ABI_PARAM1 || reg == ABI_PARAM2 || reg == ABI_PARAM3)
+ {
+ PatchedStoreFuncs[consoleType][num][size][reg] = NULL;
+ PatchedLoadFuncs[consoleType][num][size][0][reg] = NULL;
+ PatchedLoadFuncs[consoleType][num][size][1][reg] = NULL;
+ continue;
+ }
+
+ X64Reg rdMapped = (X64Reg)reg;
+ PatchedStoreFuncs[consoleType][num][size][reg] = GetWritableCodePtr();
+ if (RSCRATCH3 != ABI_PARAM1)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ if (num == 0)
+ {
+ MOV(64, R(ABI_PARAM2), R(RCPU));
+ MOV(32, R(ABI_PARAM3), R(rdMapped));
+ }
+ else
+ {
+ MOV(32, R(ABI_PARAM2), R(rdMapped));
+ }
+ ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
+ if (consoleType == 0)
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
+ case 33: ABI_CallFunction(SlowWrite7<u32, 0>); break;
+ case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break;
+ case 17: ABI_CallFunction(SlowWrite7<u16, 0>); break;
+ case 8: ABI_CallFunction(SlowWrite9<u8, 0>); break;
+ case 9: ABI_CallFunction(SlowWrite7<u8, 0>); break;
+ }
+ }
+ else
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: ABI_CallFunction(SlowWrite9<u32, 1>); break;
+ case 33: ABI_CallFunction(SlowWrite7<u32, 1>); break;
+ case 16: ABI_CallFunction(SlowWrite9<u16, 1>); break;
+ case 17: ABI_CallFunction(SlowWrite7<u16, 1>); break;
+ case 8: ABI_CallFunction(SlowWrite9<u8, 1>); break;
+ case 9: ABI_CallFunction(SlowWrite7<u8, 1>); break;
+ }
+ }
+ ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8);
+ RET();
+
+ for (int signextend = 0; signextend < 2; signextend++)
+ {
+ PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetWritableCodePtr();
+ if (RSCRATCH3 != ABI_PARAM1)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ if (num == 0)
+ MOV(64, R(ABI_PARAM2), R(RCPU));
+ ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
+ if (consoleType == 0)
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: ABI_CallFunction(SlowRead9<u32, 0>); break;
+ case 33: ABI_CallFunction(SlowRead7<u32, 0>); break;
+ case 16: ABI_CallFunction(SlowRead9<u16, 0>); break;
+ case 17: ABI_CallFunction(SlowRead7<u16, 0>); break;
+ case 8: ABI_CallFunction(SlowRead9<u8, 0>); break;
+ case 9: ABI_CallFunction(SlowRead7<u8, 0>); break;
+ }
+ }
+ else
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: ABI_CallFunction(SlowRead9<u32, 1>); break;
+ case 33: ABI_CallFunction(SlowRead7<u32, 1>); break;
+ case 16: ABI_CallFunction(SlowRead9<u16, 1>); break;
+ case 17: ABI_CallFunction(SlowRead7<u16, 1>); break;
+ case 8: ABI_CallFunction(SlowRead9<u8, 1>); break;
+ case 9: ABI_CallFunction(SlowRead7<u8, 1>); break;
+ }
+ }
+ ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8);
+ if (signextend)
+ MOVSX(32, 8 << size, rdMapped, R(RSCRATCH));
+ else
+ MOVZX(32, 8 << size, rdMapped, R(RSCRATCH));
+ RET();
+ }
+ }
+ }
+ }
+ }
+
// move the region forward to prevent overwriting the generated functions
CodeMemSize -= GetWritableCodePtr() - ResetStart;
ResetStart = GetWritableCodePtr();
@@ -500,6 +607,8 @@ void Compiler::Reset()
NearCode = NearStart;
FarCode = FarStart;
+
+ LoadStorePatches.clear();
}
bool Compiler::IsJITFault(u64 addr)
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
index d1a6c07..0fe0147 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -7,6 +7,8 @@
#include "../ARMJIT_Internal.h"
#include "../ARMJIT_RegisterCache.h"
+#include <unordered_map>
+
namespace ARMJIT
{
@@ -18,6 +20,13 @@ const Gen::X64Reg RSCRATCH2 = Gen::EDX;
const Gen::X64Reg RSCRATCH3 = Gen::ECX;
const Gen::X64Reg RSCRATCH4 = Gen::R8;
+struct LoadStorePatch
+{
+ void* PatchFunc;
+ s16 Offset;
+ u16 Size;
+};
+
struct Op2
{
Op2()
@@ -211,6 +220,11 @@ public:
u8* NearStart;
u8* FarStart;
+ void* PatchedStoreFuncs[2][2][3][16];
+ void* PatchedLoadFuncs[2][2][3][2][16];
+
+ std::unordered_map<u8*, LoadStorePatch> LoadStorePatches;
+
u8* ResetStart;
u32 CodeMemSize;
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
index b780c55..2da113b 100644
--- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -17,7 +17,30 @@ int squeezePointer(T* ptr)
s32 Compiler::RewriteMemAccess(u64 pc)
{
- return 0;
+ auto it = LoadStorePatches.find((u8*)pc);
+ if (it != LoadStorePatches.end())
+ {
+ LoadStorePatch patch = it->second;
+ LoadStorePatches.erase(it);
+
+ u8* curCodePtr = GetWritableCodePtr();
+ u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset;
+ SetCodePtr(rewritePtr);
+
+ CALL(patch.PatchFunc);
+ u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr);
+ if (remainingSize > 0)
+ NOP(remainingSize);
+
+ //printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size);
+
+ SetCodePtr(curCodePtr);
+
+ return patch.Offset;
+ }
+
+ printf("this is a JIT bug %x\n", pc);
+ abort();
}
/*
@@ -91,369 +114,213 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
return;
}
+ if (flags & memop_Store)
{
- if (flags & memop_Store)
- {
- Comp_AddCycles_CD();
- }
- else
- {
- Comp_AddCycles_CDI();
- }
+ Comp_AddCycles_CD();
+ }
+ else
+ {
+ Comp_AddCycles_CDI();
+ }
- bool addrIsStatic = Config::JIT_LiteralOptimisations
- && RegCache.IsLiteral(rn) && op2.IsImm && !(flags & (memop_Writeback|memop_Post));
- u32 staticAddress;
- if (addrIsStatic)
- staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
- OpArg rdMapped = MapReg(rd);
+ bool addrIsStatic = Config::JIT_LiteralOptimisations
+ && RegCache.IsLiteral(rn) && op2.IsImm && !(flags & (memop_Writeback|memop_Post));
+ u32 staticAddress;
+ if (addrIsStatic)
+ staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
+ OpArg rdMapped = MapReg(rd);
- if (true)
- {
- OpArg rnMapped = MapReg(rn);
- if (Thumb && rn == 15)
- rnMapped = Imm32(R15 & ~0x2);
+ OpArg rnMapped = MapReg(rn);
+ if (Thumb && rn == 15)
+ rnMapped = Imm32(R15 & ~0x2);
- X64Reg finalAddr = RSCRATCH3;
- if (flags & memop_Post)
- {
- MOV(32, R(RSCRATCH3), rnMapped);
+ X64Reg finalAddr = RSCRATCH3;
+ if (flags & memop_Post)
+ {
+ MOV(32, R(RSCRATCH3), rnMapped);
- finalAddr = rnMapped.GetSimpleReg();
- }
+ finalAddr = rnMapped.GetSimpleReg();
+ }
- if (op2.IsImm)
+ if (op2.IsImm)
+ {
+ MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
+ }
+ else
+ {
+ OpArg rm = MapReg(op2.Reg.Reg);
+
+ if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
+ && op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
+ {
+ LEA(32, finalAddr,
+ MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
+ }
+ else
+ {
+ bool throwAway;
+ OpArg offset =
+ Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
+
+ if (flags & memop_SubtractOffset)
{
- MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
+ if (R(finalAddr) != rnMapped)
+ MOV(32, R(finalAddr), rnMapped);
+ if (!offset.IsZero())
+ SUB(32, R(finalAddr), offset);
}
else
- {
- OpArg rm = MapReg(op2.Reg.Reg);
+ MOV_sum(32, finalAddr, rnMapped, offset);
+ }
+ }
- if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
- && op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
- {
- LEA(32, finalAddr,
- MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
- }
- else
- {
- bool throwAway;
- OpArg offset =
- Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
+ if ((flags & memop_Writeback) && !(flags & memop_Post))
+ MOV(32, rnMapped, R(finalAddr));
- if (flags & memop_SubtractOffset)
- {
- if (R(finalAddr) != rnMapped)
- MOV(32, R(finalAddr), rnMapped);
- if (!offset.IsZero())
- SUB(32, R(finalAddr), offset);
- }
- else
- MOV_sum(32, finalAddr, rnMapped, offset);
- }
- }
+ u32 expectedTarget = Num == 0
+ ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
+ : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
- if ((flags & memop_Writeback) && !(flags & memop_Post))
- MOV(32, rnMapped, R(finalAddr));
- }
+ if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsFastmemCompatible(expectedTarget)))
+ {
+ u8* memopStart = GetWritableCodePtr();
+ LoadStorePatch patch;
+
+ patch.PatchFunc = flags & memop_Store
+ ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()]
+ : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()];
- /*int expectedTarget = Num == 0
- ? ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
- : ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
- if (CurInstr.Cond() < 0xE)
- expectedTarget = memregion_Other;
+ assert(patch.PatchFunc != NULL);
- bool compileFastPath = false, compileSlowPath = !addrIsStatic || (flags & memop_Store);
+ MOV(64, R(RSCRATCH), ImmPtr(Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start));
- switch (expectedTarget)
+ X64Reg maskedAddr = RSCRATCH3;
+ if (size > 8)
{
- case memregion_MainRAM:
- case memregion_DTCM:
- case memregion_WRAM7:
- case memregion_SWRAM9:
- case memregion_SWRAM7:
- case memregion_IO9:
- case memregion_IO7:
- case memregion_VWRAM:
- compileFastPath = true;
- break;
- case memregion_Wifi:
- compileFastPath = size >= 16;
- break;
- case memregion_VRAM:
- compileFastPath = !(flags & memop_Store) || size >= 16;
- case memregion_BIOS9:
- compileFastPath = !(flags & memop_Store);
- break;
- default: break;
+ maskedAddr = RSCRATCH2;
+ MOV(32, R(RSCRATCH2), R(RSCRATCH3));
+ AND(32, R(RSCRATCH2), Imm8(addressMask));
}
- if (addrIsStatic && !compileFastPath)
+ u8* memopLoadStoreLocation = GetWritableCodePtr();
+ if (flags & memop_Store)
{
- compileFastPath = false;
- compileSlowPath = true;
+ MOV(size, MRegSum(RSCRATCH, maskedAddr), rdMapped);
}
-
- if (addrIsStatic && compileSlowPath)
- MOV(32, R(RSCRATCH3), Imm32(staticAddress));
-*/
- /*if (compileFastPath)
+ else
{
- FixupBranch slowPath;
- if (compileSlowPath)
- {
- MOV(32, R(RSCRATCH), R(RSCRATCH3));
- SHR(32, R(RSCRATCH), Imm8(9));
- if (flags & memop_Store)
- {
- CMP(8, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)), Imm8(expectedTarget));
- }
- else
- {
- MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)));
- AND(32, R(RSCRATCH), Imm8(~0x80));
- CMP(32, R(RSCRATCH), Imm8(expectedTarget));
- }
-
- slowPath = J_CC(CC_NE, true);
- }
+ if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), MRegSum(RSCRATCH, maskedAddr));
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), MRegSum(RSCRATCH, maskedAddr));
- if (expectedTarget == memregion_MainRAM || expectedTarget == memregion_WRAM7
- || expectedTarget == memregion_BIOS9)
+ if (size == 32)
{
- u8* data;
- u32 mask;
- if (expectedTarget == memregion_MainRAM)
- {
- data = NDS::MainRAM;
- mask = MAIN_RAM_SIZE - 1;
- }
- else if (expectedTarget == memregion_BIOS9)
- {
- data = NDS::ARM9BIOS;
- mask = 0xFFF;
- }
- else
- {
- data = NDS::ARM7WRAM;
- mask = 0xFFFF;
- }
- OpArg memLoc;
- if (addrIsStatic)
- {
- memLoc = M(data + ((staticAddress & mask & addressMask)));
- }
- else
- {
- MOV(32, R(RSCRATCH), R(RSCRATCH3));
- AND(32, R(RSCRATCH), Imm32(mask & addressMask));
- memLoc = MDisp(RSCRATCH, squeezePointer(data));
- }
- if (flags & memop_Store)
- MOV(size, memLoc, rdMapped);
- else if (flags & memop_SignExtend)
- MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
- else
- MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
- }
- else if (expectedTarget == memregion_DTCM)
- {
- if (addrIsStatic)
- MOV(32, R(RSCRATCH), Imm32(staticAddress));
- else
- MOV(32, R(RSCRATCH), R(RSCRATCH3));
- SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
- AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
- OpArg memLoc = MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM));
- if (flags & memop_Store)
- MOV(size, memLoc, rdMapped);
- else if (flags & memop_SignExtend)
- MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
- else
- MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
- }
- else if (expectedTarget == memregion_SWRAM9 || expectedTarget == memregion_SWRAM7)
- {
- MOV(64, R(RSCRATCH2), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9 : &NDS::SWRAM_ARM7));
- if (addrIsStatic)
- {
- MOV(32, R(RSCRATCH), Imm32(staticAddress & addressMask));
- }
- else
- {
- MOV(32, R(RSCRATCH), R(RSCRATCH3));
- AND(32, R(RSCRATCH), Imm8(addressMask));
- }
- AND(32, R(RSCRATCH), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9Mask : &NDS::SWRAM_ARM7Mask));
- OpArg memLoc = MRegSum(RSCRATCH, RSCRATCH2);
- if (flags & memop_Store)
- MOV(size, memLoc, rdMapped);
- else if (flags & memop_SignExtend)
- MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
- else
- MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
+ AND(32, R(RSCRATCH3), Imm8(0x3));
+ SHL(32, R(RSCRATCH3), Imm8(3));
+ ROR_(32, rdMapped, R(RSCRATCH3));
}
- else
- {
- u32 maskedDataRegion;
-
- if (addrIsStatic)
- {
- maskedDataRegion = staticAddress;
- MOV(32, R(ABI_PARAM1), Imm32(staticAddress));
- }
- else
- {
- if (ABI_PARAM1 != RSCRATCH3)
- MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
- AND(32, R(ABI_PARAM1), Imm8(addressMask));
-
- maskedDataRegion = CurInstr.DataRegion;
- if (Num == 0)
- maskedDataRegion &= ~0xFFFFFF;
- else
- maskedDataRegion &= ~0x7FFFFF;
- }
+ }
- void* func = GetFuncForAddr(CurCPU, maskedDataRegion, flags & memop_Store, size);
+ patch.Offset = memopStart - memopLoadStoreLocation;
+ patch.Size = GetWritableCodePtr() - memopStart;
- if (flags & memop_Store)
- {
- PushRegs(false);
+ assert(patch.Size >= 5);
- MOV(32, R(ABI_PARAM2), rdMapped);
+ LoadStorePatches[memopLoadStoreLocation] = patch;
+ }
+ else
+ {
+ PushRegs(false);
- ABI_CallFunction((void(*)())func);
+ if (Num == 0)
+ {
+ MOV(64, R(ABI_PARAM2), R(RCPU));
+ if (ABI_PARAM1 != RSCRATCH3)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ if (flags & memop_Store)
+ {
+ MOV(32, R(ABI_PARAM3), rdMapped);
- PopRegs(false);
- }
- else
+ switch (size | NDS::ConsoleType)
{
- if (!addrIsStatic)
- MOV(32, rdMapped, R(RSCRATCH3));
-
- PushRegs(false);
-
- ABI_CallFunction((void(*)())func);
-
- PopRegs(false);
-
- if (!addrIsStatic)
- MOV(32, R(RSCRATCH3), rdMapped);
-
- if (flags & memop_SignExtend)
- MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
- else
- MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ case 32: CALL((void*)&SlowWrite9<u32, 0>); break;
+ case 16: CALL((void*)&SlowWrite9<u16, 0>); break;
+ case 8: CALL((void*)&SlowWrite9<u8, 0>); break;
+ case 33: CALL((void*)&SlowWrite9<u32, 1>); break;
+ case 17: CALL((void*)&SlowWrite9<u16, 1>); break;
+ case 9: CALL((void*)&SlowWrite9<u8, 1>); break;
}
}
-
- if ((size == 32 && !(flags & memop_Store)))
+ else
{
- if (addrIsStatic)
- {
- if (staticAddress & 0x3)
- ROR_(32, rdMapped, Imm8((staticAddress & 0x3) * 8));
- }
- else
+ switch (size | NDS::ConsoleType)
{
- AND(32, R(RSCRATCH3), Imm8(0x3));
- SHL(32, R(RSCRATCH3), Imm8(3));
- ROR_(32, rdMapped, R(RSCRATCH3));
+ case 32: CALL((void*)&SlowRead9<u32, 0>); break;
+ case 16: CALL((void*)&SlowRead9<u16, 0>); break;
+ case 8: CALL((void*)&SlowRead9<u8, 0>); break;
+ case 33: CALL((void*)&SlowRead9<u32, 1>); break;
+ case 17: CALL((void*)&SlowRead9<u16, 1>); break;
+ case 9: CALL((void*)&SlowRead9<u8, 1>); break;
}
}
-
- if (compileSlowPath)
- {
- SwitchToFarCode();
- SetJumpTarget(slowPath);
- }
}
-*/
- if (true)
+ else
{
- PushRegs(false);
-
- if (Num == 0)
+ if (ABI_PARAM1 != RSCRATCH3)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ if (flags & memop_Store)
{
- MOV(64, R(ABI_PARAM2), R(RCPU));
- if (ABI_PARAM1 != RSCRATCH3)
- MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
- if (flags & memop_Store)
- {
- MOV(32, R(ABI_PARAM3), rdMapped);
+ MOV(32, R(ABI_PARAM2), rdMapped);
- switch (size)
- {
- case 32: CALL((void*)&SlowWrite9<u32>); break;
- case 16: CALL((void*)&SlowWrite9<u16>); break;
- case 8: CALL((void*)&SlowWrite9<u8>); break;
- }
- }
- else
+ switch (size | NDS::ConsoleType)
{
- switch (size)
- {
- case 32: CALL((void*)&SlowRead9<u32>); break;
- case 16: CALL((void*)&SlowRead9<u16>); break;
- case 8: CALL((void*)&SlowRead9<u8>); break;
- }
+ case 32: CALL((void*)&SlowWrite7<u32, 0>); break;
+ case 16: CALL((void*)&SlowWrite7<u16, 0>); break;
+ case 8: CALL((void*)&SlowWrite7<u8, 0>); break;
+ case 33: CALL((void*)&SlowWrite7<u32, 1>); break;
+ case 17: CALL((void*)&SlowWrite7<u16, 1>); break;
+ case 9: CALL((void*)&SlowWrite7<u8, 1>); break;
}
}
else
{
- if (ABI_PARAM1 != RSCRATCH3)
- MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
- if (flags & memop_Store)
+ switch (size | NDS::ConsoleType)
{
- MOV(32, R(ABI_PARAM2), rdMapped);
-
- switch (size)
- {
- case 32: CALL((void*)&SlowWrite7<u32>); break;
- case 16: CALL((void*)&SlowWrite7<u16>); break;
- case 8: CALL((void*)&SlowWrite7<u8>); break;
- }
- }
- else
- {
- switch (size)
- {
- case 32: CALL((void*)&SlowRead7<u32>); break;
- case 16: CALL((void*)&SlowRead7<u16>); break;
- case 8: CALL((void*)&SlowRead7<u8>); break;
- }
+ case 32: CALL((void*)&SlowRead7<u32, 0>); break;
+ case 16: CALL((void*)&SlowRead7<u16, 0>); break;
+ case 8: CALL((void*)&SlowRead7<u8, 0>); break;
+ case 33: CALL((void*)&SlowRead7<u32, 1>); break;
+ case 17: CALL((void*)&SlowRead7<u16, 1>); break;
+ case 9: CALL((void*)&SlowRead7<u8, 1>); break;
}
}
+ }
- PopRegs(false);
+ PopRegs(false);
- if (!(flags & memop_Store))
- {
- if (flags & memop_SignExtend)
- MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
- else
- MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
- }
- }
-/*
- if (compileFastPath && compileSlowPath)
+ if (!(flags & memop_Store))
{
- FixupBranch ret = J(true);
- SwitchToNearCode();
- SetJumpTarget(ret);
- }*/
+ if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ }
+ }
- if (!(flags & memop_Store) && rd == 15)
+ if (!(flags & memop_Store) && rd == 15)
+ {
+ if (size < 32)
+ printf("!!! LDR <32 bit PC %08X %x\n", R15, CurInstr.Instr);
{
- if (size < 32)
- printf("!!! LDR <32 bit PC %08X %x\n", R15, CurInstr.Instr);
+ if (Num == 1)
{
- if (Num == 1)
- AND(32, rdMapped, Imm8(0xFE)); // immediate is sign extended
- Comp_JumpTo(rdMapped.GetSimpleReg());
+ if (Thumb)
+ OR(32, rdMapped, Imm8(0x1));
+ else
+ AND(32, rdMapped, Imm8(0xFE));
}
+ Comp_JumpTo(rdMapped.GetSimpleReg());
}
}
}
@@ -470,7 +337,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
int flags = 0;
if (store)
flags |= memop_Store;
- if (decrement)
+ if (decrement && preinc)
flags |= memop_SubtractOffset;
Op2 offset = preinc ? Op2(4) : Op2(0);
@@ -481,96 +348,52 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
s32 offset = (regsCount * 4) * (decrement ? -1 : 1);
- // we need to make sure that the stack stays aligned to 16 bytes
-#ifdef _WIN32
- // include shadow
- u32 stackAlloc = ((regsCount + 4 + 1) & ~1) * 8;
-#else
- u32 stackAlloc = ((regsCount + 1) & ~1) * 8;
-#endif
- u32 allocOffset = stackAlloc - regsCount * 8;
-/*
int expectedTarget = Num == 0
- ? ClassifyAddress9(CurInstr.DataRegion)
- : ClassifyAddress7(CurInstr.DataRegion);
- if (usermode || CurInstr.Cond() < 0xE)
- expectedTarget = memregion_Other;
-
- bool compileFastPath = false;
+ ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
+ : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
- switch (expectedTarget)
- {
- case memregion_DTCM:
- case memregion_MainRAM:
- case memregion_SWRAM9:
- case memregion_SWRAM7:
- case memregion_WRAM7:
- compileFastPath = true;
- break;
- default:
- break;
- }
-*/
if (!store)
Comp_AddCycles_CDI();
else
Comp_AddCycles_CD();
+ bool compileFastPath = Config::JIT_FastMemory
+ && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsFastmemCompatible(expectedTarget));
+
+ // we need to make sure that the stack stays aligned to 16 bytes
+#ifdef _WIN32
+ // include shadow
+ u32 stackAlloc = (((regsCount + 4 + 1) & ~1) + (compileFastPath ? 1 : 0)) * 8;
+#else
+ u32 stackAlloc = (((regsCount + 1) & ~1) + (compileFastPath ? 1 : 0)) * 8;
+#endif
+ u32 allocOffset = stackAlloc - regsCount * 8;
+
if (decrement)
- {
- MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(-regsCount * 4));
- preinc ^= true;
- }
+ MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(-regsCount * 4 + (preinc ? 0 : 4)));
else
- MOV(32, R(RSCRATCH4), MapReg(rn));
-/*
+ MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(preinc ? 4 : 0));
+
if (compileFastPath)
{
- assert(!usermode);
+ AND(32, R(RSCRATCH4), Imm8(~3));
- MOV(32, R(RSCRATCH), R(RSCRATCH4));
- SHR(32, R(RSCRATCH), Imm8(9));
+ u8* fastPathStart = GetWritableCodePtr();
+ u8* firstLoadStoreAddr;
- if (store)
- {
- CMP(8, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)), Imm8(expectedTarget));
- }
- else
- {
- MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)));
- AND(32, R(RSCRATCH), Imm8(~0x80));
- CMP(32, R(RSCRATCH), Imm8(expectedTarget));
- }
- FixupBranch slowPath = J_CC(CC_NE, true);
+ bool firstLoadStore = true;
+
+ MOV(64, R(RSCRATCH2), ImmPtr(Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start));
+ ADD(64, R(RSCRATCH2), R(RSCRATCH4));
+ MOV(32, R(RSCRATCH3), R(RSCRATCH4));
- if (expectedTarget == memregion_DTCM)
- {
- SUB(32, R(RSCRATCH4), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
- AND(32, R(RSCRATCH4), Imm32(0x3FFF & ~3));
- LEA(64, RSCRATCH4, MComplex(RCPU, RSCRATCH4, 1, offsetof(ARMv5, DTCM)));
- }
- else if (expectedTarget == memregion_MainRAM)
- {
- AND(32, R(RSCRATCH4), Imm32((MAIN_RAM_SIZE - 1) & ~3));
- ADD(64, R(RSCRATCH4), Imm32(squeezePointer(NDS::MainRAM)));
- }
- else if (expectedTarget == memregion_WRAM7)
- {
- AND(32, R(RSCRATCH4), Imm32(0xFFFF & ~3));
- ADD(64, R(RSCRATCH4), Imm32(squeezePointer(NDS::ARM7WRAM)));
- }
- else // SWRAM
- {
- AND(32, R(RSCRATCH4), Imm8(~3));
- AND(32, R(RSCRATCH4), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9Mask : &NDS::SWRAM_ARM7Mask));
- ADD(64, R(RSCRATCH4), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9 : &NDS::SWRAM_ARM7));
- }
u32 offset = 0;
for (int reg : regs)
{
- if (preinc)
- offset += 4;
- OpArg mem = MDisp(RSCRATCH4, offset);
+ if (firstLoadStore)
+ firstLoadStoreAddr = GetWritableCodePtr();
+
+ OpArg mem = MDisp(RSCRATCH2, offset);
if (store)
{
if (RegCache.LoadedRegs & (1 << reg))
@@ -580,6 +403,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
else
{
LoadReg(reg, RSCRATCH);
+ if (firstLoadStore)
+ firstLoadStoreAddr = GetWritableCodePtr();
MOV(32, mem, R(RSCRATCH));
}
}
@@ -595,13 +420,19 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
SaveReg(reg, RSCRATCH);
}
}
- if (!preinc)
- offset += 4;
+ offset += 4;
+
+ firstLoadStore = false;
}
+ LoadStorePatch patch;
+ patch.Size = GetWritableCodePtr() - fastPathStart;
+ patch.Offset = fastPathStart - firstLoadStoreAddr;
SwitchToFarCode();
- SetJumpTarget(slowPath);
- }*/
+ patch.PatchFunc = GetWritableCodePtr();
+
+ LoadStorePatches[firstLoadStoreAddr] = patch;
+ }
if (!store)
{
@@ -618,12 +449,12 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (Num == 0)
MOV(64, R(ABI_PARAM4), R(RCPU));
- switch (Num * 2 | preinc)
+ switch (Num * 2 | NDS::ConsoleType)
{
- case 0: CALL((void*)&SlowBlockTransfer9<false, false>); break;
- case 1: CALL((void*)&SlowBlockTransfer9<true, false>); break;
- case 2: CALL((void*)&SlowBlockTransfer7<false, false>); break;
- case 3: CALL((void*)&SlowBlockTransfer7<true, false>); break;
+ case 0: CALL((void*)&SlowBlockTransfer9<false, 0>); break;
+ case 1: CALL((void*)&SlowBlockTransfer9<false, 1>); break;
+ case 2: CALL((void*)&SlowBlockTransfer7<false, 0>); break;
+ case 3: CALL((void*)&SlowBlockTransfer7<false, 1>); break;
}
PopRegs(false);
@@ -715,25 +546,24 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (Num == 0)
MOV(64, R(ABI_PARAM4), R(RCPU));
- switch (Num * 2 | preinc)
+ switch (Num * 2 | NDS::ConsoleType)
{
- case 0: CALL((void*)&SlowBlockTransfer9<false, true>); break;
- case 1: CALL((void*)&SlowBlockTransfer9<true, true>); break;
- case 2: CALL((void*)&SlowBlockTransfer7<false, true>); break;
- case 3: CALL((void*)&SlowBlockTransfer7<true, true>); break;
+ case 0: CALL((void*)&SlowBlockTransfer9<true, 0>); break;
+ case 1: CALL((void*)&SlowBlockTransfer9<true, 1>); break;
+ case 2: CALL((void*)&SlowBlockTransfer7<true, 0>); break;
+ case 3: CALL((void*)&SlowBlockTransfer7<true, 1>); break;
}
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
PopRegs(false);
}
-/*
+
if (compileFastPath)
{
- FixupBranch ret = J(true);
+ RET();
SwitchToNearCode();
- SetJumpTarget(ret);
- }*/
+ }
if (!store && regs[15])
{