aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRSDuck <rsduck@users.noreply.github.com>2020-06-14 21:04:25 +0200
committerRSDuck <rsduck@users.noreply.github.com>2020-06-16 12:11:19 +0200
commite335a8ca7615c702cfa2dcdb71deb69468088fd8 (patch)
treec09dcec016d87e7d82a6aec377f8eb3fa9949026
parentfea9f95bba7475b2cd3b624a3ccd6cdee00a33f1 (diff)
first steps in bringing over the JIT refactor/fastmem
-rw-r--r--src/ARM.cpp43
-rw-r--r--src/ARM.h15
-rw-r--r--src/ARMJIT.cpp771
-rw-r--r--src/ARMJIT.h64
-rw-r--r--src/ARMJIT_A64/ARMJIT_ALU.cpp123
-rw-r--r--src/ARMJIT_A64/ARMJIT_Branch.cpp99
-rw-r--r--src/ARMJIT_A64/ARMJIT_Compiler.cpp383
-rw-r--r--src/ARMJIT_A64/ARMJIT_Compiler.h71
-rw-r--r--src/ARMJIT_A64/ARMJIT_Linkage.s68
-rw-r--r--src/ARMJIT_A64/ARMJIT_LoadStore.cpp790
-rw-r--r--src/ARMJIT_Compiler.h12
-rw-r--r--src/ARMJIT_Internal.h70
-rw-r--r--src/ARMJIT_Memory.cpp822
-rw-r--r--src/ARMJIT_Memory.h53
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp92
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.h11
-rw-r--r--src/ARMJIT_x64/ARMJIT_LoadStore.cpp45
-rw-r--r--src/ARM_InstrInfo.cpp73
-rw-r--r--src/ARM_InstrInfo.h1
-rw-r--r--src/CMakeLists.txt6
-rw-r--r--src/CP15.cpp84
-rw-r--r--src/Config.cpp6
-rw-r--r--src/Config.h1
-rw-r--r--src/NDS.cpp220
-rw-r--r--src/NDS.h17
25 files changed, 2342 insertions, 1598 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp
index 92a3a9e..e529be8 100644
--- a/src/ARM.cpp
+++ b/src/ARM.cpp
@@ -21,6 +21,8 @@
#include "DSi.h"
#include "ARM.h"
#include "ARMInterpreter.h"
+#include "ARMJIT.h"
+#include "Config.h"
#include "AREngine.h"
#include "ARMJIT.h"
#include "Config.h"
@@ -74,7 +76,9 @@ ARM::~ARM()
ARMv5::ARMv5() : ARM(0)
{
- //
+#ifndef JIT_ENABLED
+ DTCM = new u8[DTCMSize];
+#endif
}
ARMv4::ARMv4() : ARM(1)
@@ -82,6 +86,13 @@ ARMv4::ARMv4() : ARM(1)
//
}
+ARMv5::~ARMv5()
+{
+#ifndef JIT_ENABLED
+ delete[] DTCM;
+#endif
+}
+
void ARM::Reset()
{
Cycles = 0;
@@ -622,24 +633,26 @@ void ARMv5::ExecuteJIT()
while (NDS::ARM9Timestamp < NDS::ARM9Target)
{
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
- u32 translatedAddr = ARMJIT::TranslateAddr9(instrAddr);
- if (!translatedAddr)
+
+ // hack so Cycles <= 0 becomes Cycles < 0
+ Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1;
+
+ if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize))
+ && !ARMJIT::SetupExecutableRegion(0, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize))
{
NDS::ARM9Timestamp = NDS::ARM9Target;
printf("ARMv5 PC in non executable region %08X\n", R[15]);
return;
}
- // hack so Cycles <= 0 becomes Cycles < 0
- Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1;
-
- ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry<0>(translatedAddr);
+ ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(0, FastBlockLookup,
+ instrAddr - FastBlockLookupStart, instrAddr);
if (block)
ARM_Dispatch(this, block);
else
ARMJIT::CompileBlock(this);
- NDS::ARM9Timestamp = NDS::ARM9Target - (Cycles + 1);
+ NDS::ARM9Timestamp = NDS::ARM9Target - Cycles - 1;
if (StopExecution)
{
@@ -766,23 +779,25 @@ void ARMv4::ExecuteJIT()
while (NDS::ARM7Timestamp < NDS::ARM7Target)
{
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
- u32 translatedAddr = ARMJIT::TranslateAddr7(instrAddr);
- if (!translatedAddr)
+
+ Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1;
+
+ if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize))
+ && !ARMJIT::SetupExecutableRegion(1, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize))
{
NDS::ARM7Timestamp = NDS::ARM7Target;
printf("ARMv4 PC in non executable region %08X\n", R[15]);
return;
}
- Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1;
-
- ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry<1>(translatedAddr);
+ ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(1, FastBlockLookup,
+ instrAddr - FastBlockLookupStart, instrAddr);
if (block)
ARM_Dispatch(this, block);
else
ARMJIT::CompileBlock(this);
- NDS::ARM7Timestamp = NDS::ARM7Target - (Cycles + 1);
+ NDS::ARM7Timestamp = NDS::ARM7Target - Cycles - 1;
// TODO optimize this shit!!!
if (StopExecution)
diff --git a/src/ARM.h b/src/ARM.h
index b1e8053..b7f16d6 100644
--- a/src/ARM.h
+++ b/src/ARM.h
@@ -32,11 +32,14 @@ enum
RWFlags_ForceUser = (1<<21),
};
+const u32 ITCMPhysicalSize = 0x8000;
+const u32 DTCMPhysicalSize = 0x4000;
+
class ARM
{
public:
ARM(u32 num);
- ~ARM(); // destroy shit
+ virtual ~ARM(); // destroy shit
virtual void Reset();
@@ -143,6 +146,11 @@ public:
NDS::MemRegion CodeMem;
+#ifdef JIT_ENABLED
+ u32 FastBlockLookupStart = 0, FastBlockLookupSize = 0;
+ u64* FastBlockLookup;
+#endif
+
static u32 ConditionTable[16];
protected:
@@ -158,6 +166,7 @@ class ARMv5 : public ARM
{
public:
ARMv5();
+ ~ARMv5();
void Reset();
@@ -260,8 +269,8 @@ public:
u32 DTCMBase, DTCMSize;
s32 RegionCodeCycles;
- u8 ITCM[0x8000];
- u8 DTCM[0x4000];
+ u8 ITCM[ITCMPhysicalSize];
+ u8* DTCM;
u8 ICache[0x2000];
u32 ICacheTags[64*4];
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp
index 8d87c76..53b28c1 100644
--- a/src/ARMJIT.cpp
+++ b/src/ARMJIT.cpp
@@ -10,13 +10,8 @@
#include "Config.h"
#include "ARMJIT_Internal.h"
-#if defined(__x86_64__)
-#include "ARMJIT_x64/ARMJIT_Compiler.h"
-#elif defined(__aarch64__)
-#include "ARMJIT_A64/ARMJIT_Compiler.h"
-#else
-#error "The current target platform doesn't have a JIT backend"
-#endif
+#include "ARMJIT_Memory.h"
+#include "ARMJIT_Compiler.h"
#include "ARMInterpreter_ALU.h"
#include "ARMInterpreter_LoadStore.h"
@@ -29,6 +24,11 @@
#include "Wifi.h"
#include "NDSCart.h"
+#include "ARMJIT_x64/ARMJIT_Offsets.h"
+static_assert(offsetof(ARM, CPSR) == ARM_CPSR_offset);
+static_assert(offsetof(ARM, Cycles) == ARM_Cycles_offset);
+static_assert(offsetof(ARM, StopExecution) == ARM_StopExecution_offset);
+
namespace ARMJIT
{
@@ -37,281 +37,100 @@ namespace ARMJIT
Compiler* JITCompiler;
-const u32 ExeMemRegionSizes[] =
-{
- 0x8000, // Unmapped Region (dummy)
- 0x8000, // ITCM
- 4*1024*1024, // Main RAM
- 0x8000, // SWRAM
- 0xA4000, // LCDC
- 0x8000, // ARM9 BIOS
- 0x4000, // ARM7 BIOS
- 0x10000, // ARM7 WRAM
- 0x40000 // ARM7 WVRAM
-};
-
-const u32 ExeMemRegionOffsets[] =
-{
- 0,
- 0x8000,
- 0x10000,
- 0x410000,
- 0x418000,
- 0x4BC000,
- 0x4C4000,
- 0x4C8000,
- 0x4D8000,
- 0x518000,
-};
-
-/*
- translates address to pseudo physical address
- - more compact, eliminates mirroring, everything comes in a row
- - we only need one translation table
-*/
-
-u32 TranslateAddr9(u32 addr)
-{
- switch (ClassifyAddress9(addr))
- {
- case memregion_MainRAM: return ExeMemRegionOffsets[exeMem_MainRAM] + (addr & (MAIN_RAM_SIZE - 1));
- case memregion_SWRAM9:
- if (NDS::SWRAM_ARM9)
- return ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM9 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM9Mask);
- else
- return 0;
- case memregion_ITCM: return ExeMemRegionOffsets[exeMem_ITCM] + (addr & 0x7FFF);
- case memregion_VRAM: return (addr >= 0x6800000 && addr < 0x68A4000) ? ExeMemRegionOffsets[exeMem_LCDC] + (addr - 0x6800000) : 0;
- case memregion_BIOS9: return ExeMemRegionOffsets[exeMem_ARM9_BIOS] + (addr & 0xFFF);
- default: return 0;
- }
-}
-
-u32 TranslateAddr7(u32 addr)
-{
- switch (ClassifyAddress7(addr))
- {
- case memregion_MainRAM: return ExeMemRegionOffsets[exeMem_MainRAM] + (addr & (MAIN_RAM_SIZE - 1));
- case memregion_SWRAM7:
- if (NDS::SWRAM_ARM7)
- return ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM7 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM7Mask);
- else
- return 0;
- case memregion_BIOS7: return ExeMemRegionOffsets[exeMem_ARM7_BIOS] + addr;
- case memregion_WRAM7: return ExeMemRegionOffsets[exeMem_ARM7_WRAM] + (addr & 0xFFFF);
- case memregion_VWRAM: return ExeMemRegionOffsets[exeMem_ARM7_WVRAM] + (addr & 0x1FFFF);
- default: return 0;
- }
-}
-
-AddressRange CodeRanges[ExeMemSpaceSize / 512];
-
-TinyVector<u32> InvalidLiterals;
+AddressRange CodeIndexITCM[ITCMPhysicalSize / 512];
+AddressRange CodeIndexMainRAM[NDS::MainRAMSize / 512];
+AddressRange CodeIndexSWRAM[NDS::SharedWRAMSize / 512];
+AddressRange CodeIndexVRAM[0x100000 / 512];
+AddressRange CodeIndexARM9BIOS[sizeof(NDS::ARM9BIOS) / 512];
+AddressRange CodeIndexARM7BIOS[sizeof(NDS::ARM7BIOS) / 512];
+AddressRange CodeIndexARM7WRAM[NDS::ARM7WRAMSize / 512];
+AddressRange CodeIndexARM7WVRAM[0x40000 / 512];
std::unordered_map<u32, JitBlock*> JitBlocks9;
std::unordered_map<u32, JitBlock*> JitBlocks7;
-u8 MemoryStatus9[0x800000];
-u8 MemoryStatus7[0x800000];
+u64 FastBlockLookupITCM[ITCMPhysicalSize / 2];
+u64 FastBlockLookupMainRAM[NDS::MainRAMSize / 2];
+u64 FastBlockLookupSWRAM[NDS::SharedWRAMSize / 2];
+u64 FastBlockLookupVRAM[0x100000 / 2];
+u64 FastBlockLookupARM9BIOS[sizeof(NDS::ARM9BIOS) / 2];
+u64 FastBlockLookupARM7BIOS[sizeof(NDS::ARM7BIOS) / 2];
+u64 FastBlockLookupARM7WRAM[NDS::ARM7WRAMSize / 2];
+u64 FastBlockLookupARM7WVRAM[0x40000 / 2];
-int ClassifyAddress9(u32 addr)
+const u32 CodeRegionSizes[ARMJIT_Memory::memregions_Count] =
{
- if (addr < NDS::ARM9->ITCMSize)
- return memregion_ITCM;
- else if (addr >= NDS::ARM9->DTCMBase && addr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize))
- return memregion_DTCM;
- else if ((addr & 0xFFFFF000) == 0xFFFF0000)
- return memregion_BIOS9;
- else
- {
- switch (addr & 0xFF000000)
- {
- case 0x02000000:
- return memregion_MainRAM;
- case 0x03000000:
- return memregion_SWRAM9;
- case 0x04000000:
- return memregion_IO9;
- case 0x06000000:
- return memregion_VRAM;
- }
- }
- return memregion_Other;
-}
+ 0,
+ ITCMPhysicalSize,
+ 0,
+ sizeof(NDS::ARM9BIOS),
+ NDS::MainRAMSize,
+ NDS::SharedWRAMSize,
+ 0,
+ 0x100000,
+ sizeof(NDS::ARM7BIOS),
+ NDS::ARM7WRAMSize,
+ 0,
+ 0,
+ 0x40000,
+};
-int ClassifyAddress7(u32 addr)
+AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count] =
{
- if (addr < 0x00004000)
- return memregion_BIOS7;
- else
- {
- switch (addr & 0xFF800000)
- {
- case 0x02000000:
- case 0x02800000:
- return memregion_MainRAM;
- case 0x03000000:
- if (NDS::SWRAM_ARM7)
- return memregion_SWRAM7;
- else
- return memregion_WRAM7;
- case 0x03800000:
- return memregion_WRAM7;
- case 0x04000000:
- return memregion_IO7;
- case 0x04800000:
- return memregion_Wifi;
- case 0x06000000:
- case 0x06800000:
- return memregion_VWRAM;
- }
- }
- return memregion_Other;
-}
+ NULL,
+ CodeIndexITCM,
+ NULL,
+ CodeIndexARM9BIOS,
+ CodeIndexMainRAM,
+ CodeIndexSWRAM,
+ NULL,
+ CodeIndexVRAM,
+ CodeIndexARM7BIOS,
+ CodeIndexARM7WRAM,
+ NULL,
+ NULL,
+ CodeIndexARM7WVRAM,
+};
-void UpdateMemoryStatus9(u32 start, u32 end)
+u64* const FastBlockLookupRegions[ARMJIT_Memory::memregions_Count] =
{
- start >>= 12;
- end >>= 12;
-
- if (end == 0xFFFFF)
- end++;
-
- for (u32 i = start; i < end; i++)
- {
- u32 addr = i << 12;
-
- int region = ClassifyAddress9(addr);
- u32 pseudoPhyisical = TranslateAddr9(addr);
-
- for (u32 j = 0; j < 8; j++)
- {
- u8 val = region;
- if (CodeRanges[(pseudoPhyisical + (j << 12)) / 512].Blocks.Length)
- val |= 0x80;
- MemoryStatus9[i * 8 + j] = val;
- }
- }
-}
+ NULL,
+ FastBlockLookupITCM,
+ NULL,
+ FastBlockLookupARM9BIOS,
+ FastBlockLookupMainRAM,
+ FastBlockLookupSWRAM,
+ NULL,
+ FastBlockLookupVRAM,
+ FastBlockLookupARM7BIOS,
+ FastBlockLookupARM7WRAM,
+ NULL,
+ NULL,
+ FastBlockLookupARM7WVRAM
+};
-void UpdateMemoryStatus7(u32 start, u32 end)
+u32 LocaliseCodeAddress(u32 num, u32 addr)
{
- start >>= 12;
- end >>= 12;
-
- if (end == 0xFFFFF)
- end++;
-
- for (u32 i = start; i < end; i++)
+ int region = num == 0
+ ? ARMJIT_Memory::ClassifyAddress9(addr)
+ : ARMJIT_Memory::ClassifyAddress7(addr);
+
+ u32 mappingStart, mappingSize, memoryOffset, memorySize;
+ if (ARMJIT_Memory::GetRegionMapping(region, num, mappingStart,
+ mappingSize, memoryOffset, memorySize)
+ && CodeMemRegions[region])
{
- u32 addr = i << 12;
-
- int region = ClassifyAddress7(addr);
- u32 pseudoPhyisical = TranslateAddr7(addr);
-
- for (u32 j = 0; j < 8; j++)
- {
- u8 val = region;
- if (CodeRanges[(pseudoPhyisical + (j << 12)) / 512].Blocks.Length)
- val |= 0x80;
- MemoryStatus7[i * 8 + j] = val;
- }
+ addr = ((addr - mappingStart) & (memorySize - 1)) + memoryOffset;
+ addr |= (u32)region << 28;
+ return addr;
}
+ return 0;
}
-void UpdateRegionByPseudoPhyiscal(u32 addr, bool invalidate)
-{
- for (u32 i = 1; i < exeMem_Count; i++)
- {
- if (addr >= ExeMemRegionOffsets[i] && addr < ExeMemRegionOffsets[i] + ExeMemRegionSizes[i])
- {
- for (u32 num = 0; num < 2; num++)
- {
- u32 physSize = ExeMemRegionSizes[i];
- u32 mapSize = 0;
- u32 mapStart = 0;
- switch (i)
- {
- case exeMem_ITCM:
- if (num == 0)
- mapStart = 0; mapSize = NDS::ARM9->ITCMSize;
- break;
- case exeMem_MainRAM: mapStart = 0x2000000; mapSize = 0x1000000; break;
- case exeMem_SWRAM:
- if (num == 0)
- {
- if (NDS::SWRAM_ARM9)
- mapStart = 0x3000000, mapSize = 0x1000000;
- else
- mapStart = mapSize = 0;
- }
- else
- {
- if (NDS::SWRAM_ARM7)
- mapStart = 0x3000000, mapSize = 0x800000;
- else
- mapStart = mapSize = 0;
- }
- break;
- case exeMem_LCDC:
- if (num == 0)
- mapStart = 0x6800000, mapSize = 0xA4000;
- break;
- case exeMem_ARM9_BIOS:
- if (num == 0)
- mapStart = 0xFFFF0000, mapSize = 0x10000;
- break;
- case exeMem_ARM7_BIOS:
- if (num == 1)
- mapStart = 0; mapSize = 0x4000;
- break;
- case exeMem_ARM7_WRAM:
- if (num == 1)
- {
- if (NDS::SWRAM_ARM7)
- mapStart = 0x3800000, mapSize = 0x800000;
- else
- mapStart = 0x3000000, mapSize = 0x1000000;
- }
- break;
- case exeMem_ARM7_WVRAM:
- if (num == 1)
- mapStart = 0x6000000, mapSize = 0x1000000;
- break;
- }
-
- for (u32 j = 0; j < mapSize / physSize; j++)
- {
- u32 virtAddr = mapStart + physSize * j + (addr - ExeMemRegionOffsets[i]);
- if (num == 0
- && virtAddr >= NDS::ARM9->DTCMBase && virtAddr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize))
- continue;
- if (invalidate)
- {
- if (num == 0)
- MemoryStatus9[virtAddr / 512] |= 0x80;
- else
- MemoryStatus7[virtAddr / 512] |= 0x80;
- }
- else
- {
- if (num == 0)
- MemoryStatus9[virtAddr / 512] &= ~0x80;
- else
- MemoryStatus7[virtAddr / 512] &= ~0x80;
- }
- }
-
- }
- return;
- }
- }
-
- assert(false);
-}
+TinyVector<u32> InvalidLiterals;
template <typename T>
-T SlowRead9(ARMv5* cpu, u32 addr)
+T SlowRead9(u32 addr, ARMv5* cpu)
{
u32 offset = addr & 0x3;
addr &= ~(sizeof(T) - 1);
@@ -335,13 +154,13 @@ T SlowRead9(ARMv5* cpu, u32 addr)
}
template <typename T>
-void SlowWrite9(ARMv5* cpu, u32 addr, T val)
+void SlowWrite9(u32 addr, ARMv5* cpu, T val)
{
addr &= ~(sizeof(T) - 1);
if (addr < cpu->ITCMSize)
{
- InvalidateITCMIfNecessary(addr);
+ CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
*(T*)&cpu->ITCM[addr & 0x7FFF] = val;
}
else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize))
@@ -362,13 +181,13 @@ void SlowWrite9(ARMv5* cpu, u32 addr, T val)
}
}
-template void SlowWrite9<u32>(ARMv5*, u32, u32);
-template void SlowWrite9<u16>(ARMv5*, u32, u16);
-template void SlowWrite9<u8>(ARMv5*, u32, u8);
+template void SlowWrite9<u32>(u32, ARMv5*, u32);
+template void SlowWrite9<u16>(u32, ARMv5*, u16);
+template void SlowWrite9<u8>(u32, ARMv5*, u8);
-template u32 SlowRead9<u32>(ARMv5*, u32);
-template u16 SlowRead9<u16>(ARMv5*, u32);
-template u8 SlowRead9<u8>(ARMv5*, u32);
+template u32 SlowRead9<u32>(u32, ARMv5*);
+template u16 SlowRead9<u16>(u32, ARMv5*);
+template u8 SlowRead9<u8>(u32, ARMv5*);
template <typename T>
T SlowRead7(u32 addr)
@@ -407,14 +226,15 @@ template <bool PreInc, bool Write>
void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu)
{
addr &= ~0x3;
+ if (PreInc)
+ addr += 4;
for (int i = 0; i < num; i++)
{
- addr += PreInc * 4;
if (Write)
- SlowWrite9<u32>(cpu, addr, data[i]);
+ SlowWrite9<u32>(addr, cpu, data[i]);
else
- data[i] = SlowRead9<u32>(cpu, addr);
- addr += !PreInc * 4;
+ data[i] = SlowRead9<u32>(addr, cpu);
+ addr += 4;
}
}
@@ -422,14 +242,15 @@ template <bool PreInc, bool Write>
void SlowBlockTransfer7(u32 addr, u64* data, u32 num)
{
addr &= ~0x3;
+ if (PreInc)
+ addr += 4;
for (int i = 0; i < num; i++)
{
- addr += PreInc * 4;
if (Write)
SlowWrite7<u32>(addr, data[i]);
else
data[i] = SlowRead7<u32>(addr);
- addr += !PreInc * 4;
+ addr += 4;
}
}
@@ -540,16 +361,18 @@ struct UnreliableHashTable
};
UnreliableHashTable<u32, JitBlock*, 0x800, nullptr> RestoreCandidates;
-UnreliableHashTable<u32, u32, 0x800, UINT32_MAX> FastBlockLookUp9;
-UnreliableHashTable<u32, u32, 0x800, UINT32_MAX> FastBlockLookUp7;
void Init()
{
JITCompiler = new Compiler();
+
+ ARMJIT_Memory::Init();
}
void DeInit()
{
+ ARMJIT_Memory::DeInit();
+
delete JITCompiler;
}
@@ -557,8 +380,7 @@ void Reset()
{
ResetBlockCache();
- UpdateMemoryStatus9(0, 0xFFFFFFFF);
- UpdateMemoryStatus7(0, 0xFFFFFFFF);
+ ARMJIT_Memory::Reset();
}
void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
@@ -673,11 +495,12 @@ bool IsIdleLoop(FetchedInstr* instrs, int instrsCount)
// it basically checks if one iteration of a loop depends on another
// the rules are quite simple
+ JIT_DEBUGPRINT("checking potential idle loop\n");
u16 regsWrittenTo = 0;
u16 regsDisallowedToWrite = 0;
for (int i = 0; i < instrsCount; i++)
{
- //printf("instr %d %x regs(%x %x) %x %x\n", i, instrs[i].Instr, instrs[i].Info.DstRegs, instrs[i].Info.SrcRegs, regsWrittenTo, regsDisallowedToWrite);
+ JIT_DEBUGPRINT("instr %d %x regs(%x %x) %x %x\n", i, instrs[i].Instr, instrs[i].Info.DstRegs, instrs[i].Info.SrcRegs, regsWrittenTo, regsDisallowedToWrite);
if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem)
return false;
if (i < instrsCount - 1 && instrs[i].Info.Branches())
@@ -782,8 +605,6 @@ InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] =
};
#undef F
-
-extern u32 literalsPerBlock;
void CompileBlock(ARM* cpu)
{
bool thumb = cpu->CPSR & 0x20;
@@ -794,14 +615,28 @@ void CompileBlock(ARM* cpu)
Config::JIT_MaxBlockSize = 32;
u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4);
- u32 pseudoPhysicalAddr = cpu->Num == 0
- ? TranslateAddr9(blockAddr)
- : TranslateAddr7(blockAddr);
- if (pseudoPhysicalAddr < ExeMemRegionSizes[exeMem_Unmapped])
- {
- printf("Trying to compile a block in unmapped memory: %x\n", blockAddr);
- }
-
+
+ auto& map = cpu->Num == 0 ? JitBlocks9 : JitBlocks7;
+ auto existingBlockIt = map.find(blockAddr);
+ if (existingBlockIt != map.end())
+ {
+ // there's already a block, though it's not inside the fast map
+ // could be that there are two blocks at the same physical addr
+ // but different mirrors
+ u32 localAddr = existingBlockIt->second->StartAddrLocal;
+
+ u64* entry = &FastBlockLookupRegions[localAddr >> 28][localAddr & 0xFFFFFFF];
+ *entry = ((u64)blockAddr | cpu->Num) << 32;
+ *entry |= JITCompiler->SubEntryOffset(existingBlockIt->second->EntryPoint);
+ return;
+ }
+
+ u32 localAddr = LocaliseCodeAddress(cpu->Num, blockAddr);
+ if (!localAddr)
+ {
+ printf("trying to compile non executable code? %x\n", blockAddr);
+ }
+
FetchedInstr instrs[Config::JIT_MaxBlockSize];
int i = 0;
u32 r15 = cpu->R[15];
@@ -842,9 +677,8 @@ void CompileBlock(ARM* cpu)
instrValues[i] = instrs[i].Instr;
- u32 translatedAddr = cpu->Num == 0
- ? TranslateAddr9(instrs[i].Addr)
- : TranslateAddr7(instrs[i].Addr);
+ u32 translatedAddr = LocaliseCodeAddress(cpu->Num, instrs[i].Addr);
+ assert(translatedAddr);
u32 translatedAddrRounded = translatedAddr & ~0x1FF;
if (i == 0 || translatedAddrRounded != addressRanges[numAddressRanges - 1])
{
@@ -928,9 +762,11 @@ void CompileBlock(ARM* cpu)
&& instrs[i].Info.SpecialKind == ARMInstrInfo::special_LoadLiteral
&& DecodeLiteral(thumb, instrs[i], literalAddr))
{
- u32 translatedAddr = cpu->Num == 0
- ? TranslateAddr9(literalAddr)
- : TranslateAddr7(literalAddr);
+ u32 translatedAddr = LocaliseCodeAddress(cpu->Num, literalAddr);
+ if (!translatedAddr)
+ {
+ printf("literal in non executable memory?\n");
+ }
u32 translatedAddrRounded = translatedAddr & ~0x1FF;
u32 j = 0;
@@ -994,9 +830,7 @@ void CompileBlock(ARM* cpu)
}
else if (hasBranched && !isBackJump && i + 1 < Config::JIT_MaxBlockSize)
{
- u32 targetPseudoPhysical = cpu->Num == 0
- ? TranslateAddr9(target)
- : TranslateAddr7(target);
+ u32 targetLocalised = LocaliseCodeAddress(cpu->Num, target);
if (link)
{
@@ -1048,7 +882,7 @@ void CompileBlock(ARM* cpu)
{
RestoreCandidates.Remove(instrHash);
- mayRestore = prevBlock->PseudoPhysicalAddr == pseudoPhysicalAddr && prevBlock->LiteralHash == literalHash;
+ mayRestore = prevBlock->StartAddr == blockAddr && prevBlock->LiteralHash == literalHash;
if (mayRestore && prevBlock->NumAddresses == numAddressRanges)
{
@@ -1087,11 +921,12 @@ void CompileBlock(ARM* cpu)
for (int j = 0; j < numLiterals; j++)
block->Literals()[j] = literalLoadAddrs[j];
- block->PseudoPhysicalAddr = pseudoPhysicalAddr;
+ block->StartAddr = blockAddr;
+ block->StartAddrLocal = localAddr;
FloodFillSetFlags(instrs, i - 1, 0xF);
- block->EntryPoint = JITCompiler->CompileBlock(pseudoPhysicalAddr, cpu, thumb, instrs, i);
+ block->EntryPoint = JITCompiler->CompileBlock(cpu, thumb, instrs, i);
}
else
{
@@ -1104,30 +939,34 @@ void CompileBlock(ARM* cpu)
assert(addressRanges[j] == block->AddressRanges()[j]);
assert(addressMasks[j] == block->AddressMasks()[j]);
assert(addressMasks[j] != 0);
- CodeRanges[addressRanges[j] / 512].Code |= addressMasks[j];
- CodeRanges[addressRanges[j] / 512].Blocks.Add(block);
- UpdateRegionByPseudoPhyiscal(addressRanges[j], true);
+ AddressRange* region = CodeMemRegions[addressRanges[j] >> 28];
+
+ if (!PageContainsCode(&region[(addressRanges[j] & 0xFFFF000) / 512]))
+ ARMJIT_Memory::SetCodeProtection(addressRanges[j] >> 28, addressRanges[j] & 0xFFFFFFF, true);
+
+ AddressRange* range = &region[(addressRanges[j] & 0xFFFFFFF) / 512];
+ range->Code |= addressMasks[j];
+ range->Blocks.Add(block);
}
if (cpu->Num == 0)
- {
- JitBlocks9[pseudoPhysicalAddr] = block;
- FastBlockLookUp9.Insert(pseudoPhysicalAddr, JITCompiler->SubEntryOffset(block->EntryPoint));
- }
+ JitBlocks9[blockAddr] = block;
else
- {
- JitBlocks7[pseudoPhysicalAddr] = block;
- FastBlockLookUp7.Insert(pseudoPhysicalAddr, JITCompiler->SubEntryOffset(block->EntryPoint));
- }
+ JitBlocks7[blockAddr] = block;
+
+ u64* entry = &FastBlockLookupRegions[(localAddr >> 28)][(localAddr & 0xFFFFFFF) / 2];
+ *entry = ((u64)blockAddr | cpu->Num) << 32;
+ *entry |= JITCompiler->SubEntryOffset(block->EntryPoint);
}
-void InvalidateByAddr(u32 pseudoPhysical)
+void InvalidateByAddr(u32 localAddr)
{
- JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical);
+ JIT_DEBUGPRINT("invalidating by addr %x\n", localAddr);
- AddressRange* range = &CodeRanges[pseudoPhysical / 512];
- u32 mask = 1 << ((pseudoPhysical & 0x1FF) / 16);
+ AddressRange* region = CodeMemRegions[localAddr >> 28];
+ AddressRange* range = &region[(localAddr & 0xFFFFFFF) / 512];
+ u32 mask = 1 << ((localAddr & 0x1FF) / 16);
range->Code = 0;
for (int i = 0; i < range->Blocks.Length;)
@@ -1138,7 +977,7 @@ void InvalidateByAddr(u32 pseudoPhysical)
u32 mask = 0;
for (int j = 0; j < block->NumAddresses; j++)
{
- if (block->AddressRanges()[j] == (pseudoPhysical & ~0x1FF))
+ if (block->AddressRanges()[j] == (localAddr & ~0x1FF))
{
mask = block->AddressMasks()[j];
invalidated = block->AddressMasks()[j] & mask;
@@ -1154,15 +993,21 @@ void InvalidateByAddr(u32 pseudoPhysical)
}
range->Blocks.Remove(i);
+ if (range->Blocks.Length == 0
+ && !PageContainsCode(&region[(localAddr & 0xFFFF000) / 512]))
+ {
+ ARMJIT_Memory::SetCodeProtection(localAddr >> 28, localAddr & 0xFFFFFFF, false);
+ }
+
bool literalInvalidation = false;
for (int j = 0; j < block->NumLiterals; j++)
{
u32 addr = block->Literals()[j];
- if (addr == pseudoPhysical)
+ if (addr == localAddr)
{
- if (InvalidLiterals.Find(pseudoPhysical) != -1)
+ if (InvalidLiterals.Find(localAddr) != -1)
{
- InvalidLiterals.Add(pseudoPhysical);
+ InvalidLiterals.Add(localAddr);
JIT_DEBUGPRINT("found invalid literal %d\n", InvalidLiterals.Length);
}
literalInvalidation = true;
@@ -1172,35 +1017,30 @@ void InvalidateByAddr(u32 pseudoPhysical)
for (int j = 0; j < block->NumAddresses; j++)
{
u32 addr = block->AddressRanges()[j];
- if ((addr / 512) != (pseudoPhysical / 512))
+ if ((addr / 512) != (localAddr / 512))
{
- AddressRange* otherRange = &CodeRanges[addr / 512];
+ AddressRange* otherRegion = CodeMemRegions[addr >> 28];
+ AddressRange* otherRange = &otherRegion[(addr & 0xFFFFFFF) / 512];
assert(otherRange != range);
+
bool removed = otherRange->Blocks.RemoveByValue(block);
assert(removed);
if (otherRange->Blocks.Length == 0)
{
+ if (!PageContainsCode(&otherRegion[(addr & 0xFFFF000) / 512]))
+ ARMJIT_Memory::SetCodeProtection(addr >> 28, addr & 0xFFFFFFF, false);
+
otherRange->Code = 0;
- UpdateRegionByPseudoPhyiscal(addr, false);
}
}
}
- for (int j = 0; j < block->NumLinks(); j++)
- JITCompiler->UnlinkBlock(block->Links()[j]);
- block->ResetLinks();
-
+ FastBlockLookupRegions[block->StartAddrLocal >> 28][(block->StartAddrLocal & 0xFFFFFFF) / 2] = (u64)UINT32_MAX << 32;
if (block->Num == 0)
- {
- JitBlocks9.erase(block->PseudoPhysicalAddr);
- FastBlockLookUp9.Remove(block->PseudoPhysicalAddr);
- }
+ JitBlocks9.erase(block->StartAddr);
else
- {
- JitBlocks7.erase(block->PseudoPhysicalAddr);
- FastBlockLookUp7.Remove(block->PseudoPhysicalAddr);
- }
+ JitBlocks7.erase(block->StartAddr);
if (!literalInvalidation)
{
@@ -1213,24 +1053,66 @@ void InvalidateByAddr(u32 pseudoPhysical)
delete block;
}
}
+}
- if (range->Blocks.Length == 0)
- UpdateRegionByPseudoPhyiscal(pseudoPhysical, false);
+template <u32 num, int region>
+void CheckAndInvalidate(u32 addr)
+{
+ // let's hope this gets all properly inlined
+ u32 mappingStart, mappingSize, memoryOffset, memorySize;
+ if (ARMJIT_Memory::GetRegionMapping(region, num, mappingStart, mappingSize, memoryOffset, memorySize))
+ {
+ u32 localAddr = ((addr - mappingStart) & (memorySize - 1)) + memoryOffset;
+ if (CodeMemRegions[region][localAddr / 512].Code & (1 << ((localAddr & 0x1FF) / 16)))
+ InvalidateByAddr(localAddr | (region << 28));
+ }
+}
+
+JitBlockEntry LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr)
+{
+ u64* entry = &entries[offset / 2];
+ if (*entry >> 32 == (addr | num))
+ return JITCompiler->AddEntryOffset((u32)*entry);
+ return NULL;
}
-void InvalidateRegionIfNecessary(u32 pseudoPhyisical)
+bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size)
{
- if (CodeRanges[pseudoPhyisical / 512].Code & (1 << ((pseudoPhyisical & 0x1FF) / 16)))
- InvalidateByAddr(pseudoPhyisical);
+ int region = num == 0
+ ? ARMJIT_Memory::ClassifyAddress9(blockAddr)
+ : ARMJIT_Memory::ClassifyAddress7(blockAddr);
+
+ u32 mappingStart, mappingSize, memoryOffset, memorySize;
+ if (CodeMemRegions[region]
+ && ARMJIT_Memory::GetRegionMapping(region, num, mappingStart,
+ mappingSize, memoryOffset, memorySize))
+ {
+ entry = FastBlockLookupRegions[region] + memoryOffset / 2;
+ // evil, though it should work for everything except DTCM which is not relevant here
+ start = blockAddr & ~(memorySize - 1);
+ size = memorySize;
+ return true;
+ }
+ else
+ return false;
}
+template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(u32);
+template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(u32);
+template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_SWRAM>(u32);
+template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_SWRAM>(u32);
+template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(u32);
+template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(u32);
+template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(u32);
+template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(u32);
+
void ResetBlockCache()
{
printf("Resetting JIT block cache...\n");
InvalidLiterals.Clear();
- FastBlockLookUp9.Reset();
- FastBlockLookUp7.Reset();
+ for (int i = 0; i < ARMJIT_Memory::memregions_Count; i++)
+ memset(FastBlockLookupRegions[i], 0xFF, CodeRegionSizes[i] * sizeof(u64) / 2);
RestoreCandidates.Reset();
for (int i = 0; i < sizeof(RestoreCandidates.Table)/sizeof(RestoreCandidates.Table[0]); i++)
{
@@ -1251,8 +1133,9 @@ void ResetBlockCache()
for (int j = 0; j < block->NumAddresses; j++)
{
u32 addr = block->AddressRanges()[j];
- CodeRanges[addr / 512].Blocks.Clear();
- CodeRanges[addr / 512].Code = 0;
+ AddressRange* range = &CodeMemRegions[addr >> 28][(addr & 0xFFFFFFF) / 512];
+ range->Blocks.Clear();
+ range->Code = 0;
}
delete block;
}
@@ -1262,8 +1145,9 @@ void ResetBlockCache()
for (int j = 0; j < block->NumAddresses; j++)
{
u32 addr = block->AddressRanges()[j];
- CodeRanges[addr / 512].Blocks.Clear();
- CodeRanges[addr / 512].Code = 0;
+ AddressRange* range = &CodeMemRegions[addr >> 28][(addr & 0xFFFFFFF) / 512];
+ range->Blocks.Clear();
+ range->Code = 0;
}
}
JitBlocks9.clear();
@@ -1272,191 +1156,4 @@ void ResetBlockCache()
JITCompiler->Reset();
}
-template <u32 Num>
-JitBlockEntry LookUpBlockEntry(u32 addr)
-{
- auto& fastMap = Num == 0 ? FastBlockLookUp9 : FastBlockLookUp7;
- u32 entryOffset = fastMap.LookUp(addr);
- if (entryOffset != UINT32_MAX)
- return JITCompiler->AddEntryOffset(entryOffset);
-
- auto& slowMap = Num == 0 ? JitBlocks9 : JitBlocks7;
- auto block = slowMap.find(addr);
- if (block != slowMap.end())
- {
- fastMap.Insert(addr, JITCompiler->SubEntryOffset(block->second->EntryPoint));
- return block->second->EntryPoint;
- }
- return NULL;
-}
-
-template JitBlockEntry LookUpBlockEntry<0>(u32);
-template JitBlockEntry LookUpBlockEntry<1>(u32);
-
-template <u32 Num>
-void LinkBlock(ARM* cpu, u32 codeOffset)
-{
- auto& blockMap = Num == 0 ? JitBlocks9 : JitBlocks7;
- u32 instrAddr = cpu->R[15] - ((cpu->CPSR&0x20)?2:4);
- u32 targetPseudoPhys = Num == 0 ? TranslateAddr9(instrAddr) : TranslateAddr7(instrAddr);
- auto block = blockMap.find(targetPseudoPhys);
- if (block == blockMap.end())
- {
- CompileBlock(cpu);
- block = blockMap.find(targetPseudoPhys);
- }
-
- JIT_DEBUGPRINT("linking to block %08x\n", targetPseudoPhys);
-
- block->second->AddLink(codeOffset);
- JITCompiler->LinkBlock(codeOffset, block->second->EntryPoint);
-}
-
-template void LinkBlock<0>(ARM*, u32);
-template void LinkBlock<1>(ARM*, u32);
-
-void WifiWrite32(u32 addr, u32 val)
-{
- Wifi::Write(addr, val & 0xFFFF);
- Wifi::Write(addr + 2, val >> 16);
-}
-
-u32 WifiRead32(u32 addr)
-{
- return Wifi::Read(addr) | (Wifi::Read(addr + 2) << 16);
-}
-
-template <typename T>
-void VRAMWrite(u32 addr, T val)
-{
- switch (addr & 0x00E00000)
- {
- case 0x00000000: GPU::WriteVRAM_ABG<T>(addr, val); return;
- case 0x00200000: GPU::WriteVRAM_BBG<T>(addr, val); return;
- case 0x00400000: GPU::WriteVRAM_AOBJ<T>(addr, val); return;
- case 0x00600000: GPU::WriteVRAM_BOBJ<T>(addr, val); return;
- default: GPU::WriteVRAM_LCDC<T>(addr, val); return;
- }
-}
-template <typename T>
-T VRAMRead(u32 addr)
-{
- switch (addr & 0x00E00000)
- {
- case 0x00000000: return GPU::ReadVRAM_ABG<T>(addr);
- case 0x00200000: return GPU::ReadVRAM_BBG<T>(addr);
- case 0x00400000: return GPU::ReadVRAM_AOBJ<T>(addr);
- case 0x00600000: return GPU::ReadVRAM_BOBJ<T>(addr);
- default: return GPU::ReadVRAM_LCDC<T>(addr);
- }
-}
-
-void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
-{
- if (cpu->Num == 0)
- {
- switch (addr & 0xFF000000)
- {
- case 0x04000000:
- if (!store && size == 32 && addr == 0x04100010 && NDS::ExMemCnt[0] & (1<<11))
- return (void*)NDSCart::ReadROMData;
-
- /*
- unfortunately we can't map GPU2D this way
- since it's hidden inside an object
-
- though GPU3D registers are accessed much more intensive
- */
- if (addr >= 0x04000320 && addr < 0x040006A4)
- {
- switch (size | store)
- {
- case 8: return (void*)GPU3D::Read8;
- case 9: return (void*)GPU3D::Write8;
- case 16: return (void*)GPU3D::Read16;
- case 17: return (void*)GPU3D::Write16;
- case 32: return (void*)GPU3D::Read32;
- case 33: return (void*)GPU3D::Write32;
- }
- }
-
- switch (size | store)
- {
- case 8: return (void*)NDS::ARM9IORead8;
- case 9: return (void*)NDS::ARM9IOWrite8;
- case 16: return (void*)NDS::ARM9IORead16;
- case 17: return (void*)NDS::ARM9IOWrite16;
- case 32: return (void*)NDS::ARM9IORead32;
- case 33: return (void*)NDS::ARM9IOWrite32;
- }
- break;
- case 0x06000000:
- switch (size | store)
- {
- case 8: return (void*)VRAMRead<u8>;
- case 9: return NULL;
- case 16: return (void*)VRAMRead<u16>;
- case 17: return (void*)VRAMWrite<u16>;
- case 32: return (void*)VRAMRead<u32>;
- case 33: return (void*)VRAMWrite<u32>;
- }
- break;
- }
- }
- else
- {
- switch (addr & 0xFF800000)
- {
- case 0x04000000:
- if (addr >= 0x04000400 && addr < 0x04000520)
- {
- switch (size | store)
- {
- case 8: return (void*)SPU::Read8;
- case 9: return (void*)SPU::Write8;
- case 16: return (void*)SPU::Read16;
- case 17: return (void*)SPU::Write16;
- case 32: return (void*)SPU::Read32;
- case 33: return (void*)SPU::Write32;
- }
- }
-
- switch (size | store)
- {
- case 8: return (void*)NDS::ARM7IORead8;
- case 9: return (void*)NDS::ARM7IOWrite8;
- case 16: return (void*)NDS::ARM7IORead16;
- case 17: return (void*)NDS::ARM7IOWrite16;
- case 32: return (void*)NDS::ARM7IORead32;
- case 33: return (void*)NDS::ARM7IOWrite32;
- }
- break;
- case 0x04800000:
- if (addr < 0x04810000 && size >= 16)
- {
- switch (size | store)
- {
- case 16: return (void*)Wifi::Read;
- case 17: return (void*)Wifi::Write;
- case 32: return (void*)WifiRead32;
- case 33: return (void*)WifiWrite32;
- }
- }
- break;
- case 0x06000000:
- case 0x06800000:
- switch (size | store)
- {
- case 8: return (void*)GPU::ReadVRAM_ARM7<u8>;
- case 9: return (void*)GPU::WriteVRAM_ARM7<u8>;
- case 16: return (void*)GPU::ReadVRAM_ARM7<u16>;
- case 17: return (void*)GPU::WriteVRAM_ARM7<u16>;
- case 32: return (void*)GPU::ReadVRAM_ARM7<u32>;
- case 33: return (void*)GPU::WriteVRAM_ARM7<u32>;
- }
- }
- }
- return NULL;
-}
-
}
diff --git a/src/ARMJIT.h b/src/ARMJIT.h
index 44a6140..2320b7b 100644
--- a/src/ARMJIT.h
+++ b/src/ARMJIT.h
@@ -9,32 +9,7 @@
namespace ARMJIT
{
-enum ExeMemKind
-{
- exeMem_Unmapped = 0,
- exeMem_ITCM,
- exeMem_MainRAM,
- exeMem_SWRAM,
- exeMem_LCDC,
- exeMem_ARM9_BIOS,
- exeMem_ARM7_BIOS,
- exeMem_ARM7_WRAM,
- exeMem_ARM7_WVRAM,
- exeMem_Count
-};
-
-extern const u32 ExeMemRegionOffsets[];
-extern const u32 ExeMemRegionSizes[];
-
-typedef u32 (*JitBlockEntry)();
-
-const u32 ExeMemSpaceSize = 0x518000; // I hate you C++, sometimes I really hate you...
-
-u32 TranslateAddr9(u32 addr);
-u32 TranslateAddr7(u32 addr);
-
-template <u32 Num>
-JitBlockEntry LookUpBlockEntry(u32 addr);
+typedef void (*JitBlockEntry)();
void Init();
void DeInit();
@@ -43,44 +18,15 @@ void Reset();
void InvalidateByAddr(u32 pseudoPhysical);
-void InvalidateRegionIfNecessary(u32 addr);
-
-inline void InvalidateMainRAMIfNecessary(u32 addr)
-{
- InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_MainRAM] + (addr & (MAIN_RAM_SIZE - 1)));
-}
-inline void InvalidateITCMIfNecessary(u32 addr)
-{
- InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ITCM] + (addr & 0x7FFF));
-}
-inline void InvalidateLCDCIfNecessary(u32 addr)
-{
- if (addr < 0x68A3FFF)
- InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_LCDC] + (addr - 0x6800000));
-}
-inline void InvalidateSWRAM7IfNecessary(u32 addr)
-{
- InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM7 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM7Mask));
-}
-inline void InvalidateSWRAM9IfNecessary(u32 addr)
-{
- InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM9 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM9Mask));
-}
-inline void InvalidateARM7WRAMIfNecessary(u32 addr)
-{
- InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ARM7_WRAM] + (addr & 0xFFFF));
-}
-inline void InvalidateARM7WVRAMIfNecessary(u32 addr)
-{
- InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ARM7_WVRAM] + (addr & 0x1FFFF));
-}
+template <u32 num, int region>
+void CheckAndInvalidate(u32 addr);
void CompileBlock(ARM* cpu);
void ResetBlockCache();
-void UpdateMemoryStatus9(u32 start, u32 end);
-void UpdateMemoryStatus7(u32 start, u32 end);
+JitBlockEntry LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr);
+bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size);
}
diff --git a/src/ARMJIT_A64/ARMJIT_ALU.cpp b/src/ARMJIT_A64/ARMJIT_ALU.cpp
index 0fe6a97..5f021a0 100644
--- a/src/ARMJIT_A64/ARMJIT_ALU.cpp
+++ b/src/ARMJIT_A64/ARMJIT_ALU.cpp
@@ -243,7 +243,7 @@ void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2
if (S && !CurInstr.SetFlags)
S = false;
- bool CVInGP = false;
+ bool CVInGPR = false;
switch (op)
{
case 0x2: // SUB
@@ -306,7 +306,7 @@ void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2
UBFX(W2, RCPSR, 29, 1);
if (S)
{
- CVInGP = true;
+ CVInGPR = true;
ADDS(W1, rn, W2);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
@@ -335,7 +335,7 @@ void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2
ORN(W1, WZR, op2.Reg.Rm, op2.ToArithOption());
if (S)
{
- CVInGP = true;
+ CVInGPR = true;
ADDS(W1, W2, W1);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
@@ -355,7 +355,7 @@ void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2
MVN(W1, rn);
if (S)
{
- CVInGP = true;
+ CVInGPR = true;
ADDS(W1, W2, W1);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
@@ -379,12 +379,12 @@ void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2
if (S)
{
- if (CVInGP)
+ if (CVInGPR)
{
BFI(RCPSR, W2, 29, 1);
BFI(RCPSR, W3, 28, 1);
}
- Comp_RetriveFlags(!CVInGP);
+ Comp_RetriveFlags(!CVInGPR);
}
}
@@ -501,7 +501,23 @@ void Compiler::A_Comp_ALUMovOp()
MOVI2R(rd, op2.Imm);
}
else
- MOV(rd, op2.Reg.Rm, op2.ToArithOption());
+ {
+ // ORR with shifted operand has cycles latency
+ if (op2.Reg.ShiftAmount > 0)
+ {
+ switch (op2.Reg.ShiftType)
+ {
+ case ST_LSL: LSL(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
+ case ST_LSR: LSR(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
+ case ST_ASR: ASR(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
+ case ST_ROR: ROR_(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
+ }
+ }
+ else
+ {
+ MOV(rd, op2.Reg.Rm, op2.ToArithOption());
+ }
+ }
}
if (S)
@@ -558,10 +574,7 @@ void Compiler::Comp_Mul_Mla(bool S, bool mla, ARM64Reg rd, ARM64Reg rm, ARM64Reg
}
else
{
- CLZ(W0, rs);
- CLS(W1, rs);
- CMP(W0, W1);
- CSEL(W0, W0, W1, CC_GT);
+ CLS(W0, rs);
Comp_AddCycles_CI(mla ? 1 : 0, W0, ArithOption(W0, ST_LSR, 3));
}
@@ -594,10 +607,10 @@ void Compiler::A_Comp_Mul_Long()
}
else
{
- CLZ(W0, rs);
- CLS(W1, rs);
- CMP(W0, W1);
- CSEL(W0, W0, W1, CC_GT);
+ if (sign)
+ CLS(W0, rs);
+ else
+ CLZ(W0, rs);
Comp_AddCycles_CI(0, W0, ArithOption(W0, ST_LSR, 3));
}
@@ -628,6 +641,86 @@ void Compiler::A_Comp_Mul_Long()
Comp_RetriveFlags(false);
}
+void Compiler::A_Comp_Mul_Short()
+{
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
+ ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
+ ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
+
+ bool x = CurInstr.Instr & (1 << 5);
+ bool y = CurInstr.Instr & (1 << 6);
+
+ SBFX(W1, rs, y ? 16 : 0, 16);
+
+ if (op == 0b1000)
+ {
+ // SMLAxy
+
+ SBFX(W0, rm, x ? 16 : 0, 16);
+
+ MUL(W0, W0, W1);
+
+ ORRI2R(W1, RCPSR, 0x08000000);
+
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
+ ADDS(rd, W0, rn);
+
+ CSEL(RCPSR, W1, RCPSR, CC_VS);
+
+ CPSRDirty = true;
+
+ Comp_AddCycles_C();
+ }
+ else if (op == 0b1011)
+ {
+ // SMULxy
+
+ SBFX(W0, rm, x ? 16 : 0, 16);
+
+ MUL(rd, W0, W1);
+
+ Comp_AddCycles_C();
+ }
+ else if (op == 0b1010)
+ {
+ // SMLALxy
+
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
+
+ MOV(W2, rn);
+ BFI(X2, rd, 32, 32);
+
+ SBFX(W0, rm, x ? 16 : 0, 16);
+
+ SMADDL(EncodeRegTo64(rn), W0, W1, X2);
+
+ UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
+
+ Comp_AddCycles_CI(1);
+ }
+ else if (op == 0b1001)
+ {
+ // SMLAWy/SMULWy
+ SMULL(X0, rm, W1);
+ ASR(x ? EncodeRegTo64(rd) : X0, X0, 16);
+
+ if (!x)
+ {
+ ORRI2R(W1, RCPSR, 0x08000000);
+
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
+ ADDS(rd, W0, rn);
+
+ CSEL(RCPSR, W1, RCPSR, CC_VS);
+
+ CPSRDirty = true;
+ }
+
+ Comp_AddCycles_C();
+ }
+}
+
void Compiler::A_Comp_Mul()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
diff --git a/src/ARMJIT_A64/ARMJIT_Branch.cpp b/src/ARMJIT_A64/ARMJIT_Branch.cpp
index 542f0b7..f130938 100644
--- a/src/ARMJIT_A64/ARMJIT_Branch.cpp
+++ b/src/ARMJIT_A64/ARMJIT_Branch.cpp
@@ -143,7 +143,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
}
@@ -152,23 +152,19 @@ void* Compiler::Gen_JumpTo9(int kind)
AlignCode16();
void* res = GetRXPtr();
- MOVI2R(W2, kCodeCacheTiming);
- // W1 - code cycles non branch
- // W2 - branch code cycles
LSR(W1, W0, 12);
- LSL(W1, W1, 2);
ADDI2R(W1, W1, offsetof(ARMv5, MemTimings), W2);
LDRB(W1, RCPU, W1);
- LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
+ LDR(INDEX_UNSIGNED, W2, RCPU, offsetof(ARMv5, ITCMSize));
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARMv5, RegionCodeCycles));
- CMP(W0, W3);
- FixupBranch outsideITCM = B(CC_LO);
- MOVI2R(W1, 1);
- MOVI2R(W2, 1);
- SetJumpTarget(outsideITCM);
+ CMP(W1, 0xFF);
+ MOVI2R(W3, kCodeCacheTiming);
+ CSEL(W1, W3, W1, CC_EQ);
+ CMP(W0, W2);
+ CSINC(W1, W1, WZR, CC_HS);
FixupBranch switchToThumb;
if (kind == 0)
@@ -176,40 +172,36 @@ void* Compiler::Gen_JumpTo9(int kind)
if (kind == 0 || kind == 1)
{
- ANDI2R(W0, W0, ~3);
-
+ // ARM
if (kind == 0)
ANDI2R(RCPSR, RCPSR, ~0x20);
- ADD(W3, W0, 4);
- STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
-
- ADD(W1, W1, W2);
- ADD(RCycles, RCycles, W1);
+ ANDI2R(W0, W0, ~3);
+ ADD(W0, W0, 4);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15]));
+ ADD(W1, W1, W1);
+ SUB(RCycles, RCycles, W1);
RET();
}
+
if (kind == 0 || kind == 2)
{
+ // Thumb
if (kind == 0)
{
SetJumpTarget(switchToThumb);
-
ORRI2R(RCPSR, RCPSR, 0x20);
}
ANDI2R(W0, W0, ~1);
+ ADD(W0, W0, 2);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15]));
- ADD(W3, W0, 2);
- STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
-
- FixupBranch halfwordLoc = TBZ(W0, 1);
- ADD(W1, W1, W2);
- ADD(RCycles, RCycles, W1);
- RET();
-
- SetJumpTarget(halfwordLoc);
- ADD(RCycles, RCycles, W2);
+ ADD(W2, W1, W1);
+ TSTI2R(W0, 0x2);
+ CSEL(W1, W1, W2, CC_EQ);
+ SUB(RCycles, RCycles, W1);
RET();
}
@@ -237,7 +229,7 @@ void* Compiler::Gen_JumpTo7(int kind)
UBFX(W2, W3, 0, 8);
UBFX(W3, W3, 8, 8);
ADD(W2, W3, W2);
- ADD(RCycles, RCycles, W2);
+ SUB(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~3);
@@ -261,7 +253,7 @@ void* Compiler::Gen_JumpTo7(int kind)
UBFX(W2, W3, 16, 8);
UBFX(W3, W3, 24, 8);
ADD(W2, W3, W2);
- ADD(RCycles, RCycles, W2);
+ SUB(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~1);
@@ -287,22 +279,11 @@ void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool resto
}
else
{
- BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFF00);
- bool previouslyDirty = CPSRDirty;
+
+ bool cpsrDirty = CPSRDirty;
SaveCPSR();
-
- if (restoreCPSR)
- {
- if (Thumb || CurInstr.Cond() >= 0xE)
- RegCache.Flush();
- else
- {
- // the ugly way...
- // we only save them, to load and save them again
- for (int reg : hiRegsLoaded)
- SaveReg(reg, RegCache.Mapping[reg]);
- }
- }
+ SaveCycles();
+ PushRegs(restoreCPSR);
if (switchThumb)
MOV(W1, addr);
@@ -319,16 +300,12 @@ void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool resto
QuickCallFunction(X3, jumpToTrampoline<ARMv5>);
else
QuickCallFunction(X3, jumpToTrampoline<ARMv4>);
-
- if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE)
- {
- for (int reg : hiRegsLoaded)
- LoadReg(reg, RegCache.Mapping[reg]);
- }
- if (previouslyDirty)
- LoadCPSR();
- CPSRDirty = previouslyDirty;
+ PopRegs(restoreCPSR);
+ LoadCycles();
+ LoadCPSR();
+ if (CurInstr.Cond() < 0xE)
+ CPSRDirty = cpsrDirty;
}
}
@@ -368,21 +345,13 @@ void Compiler::T_Comp_BCOND()
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
- Comp_BranchSpecialBehaviour();
+ Comp_BranchSpecialBehaviour(true);
FixupBranch skipFailed = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C(true);
- if (CurInstr.BranchFlags & branch_FollowCondTaken)
- {
- SaveCPSR(false);
- RegCache.PrepareExit();
-
- ADD(W0, RCycles, ConstantCycles);
- ABI_PopRegisters(SavedRegs);
- RET();
- }
+ Comp_BranchSpecialBehaviour(false);
SetJumpTarget(skipFailed);
}
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
index a67f357..42435ed 100644
--- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
@@ -1,9 +1,3 @@
-#include "ARMJIT_Compiler.h"
-
-#include "../ARMInterpreter.h"
-
-#include "../ARMJIT_Internal.h"
-
#ifdef __SWITCH__
#include "../switch/compat_switch.h"
@@ -13,10 +7,17 @@ extern char __start__;
#include <unistd.h>
#endif
+#include "ARMJIT_Compiler.h"
+
+#include "../ARMJIT_Internal.h"
+#include "../ARMInterpreter.h"
+#include "../Config.h"
+
#include <malloc.h>
using namespace Arm64Gen;
+extern "C" void ARM_Ret();
namespace ARMJIT
{
@@ -28,7 +29,10 @@ namespace ARMJIT
like x64. At one hand you can translate a lot of instructions directly.
But at the same time, there are a ton of exceptions, like for
example ADD and SUB can't have a RORed second operand on ARMv8.
- */
+
+ While writing a JIT when an instruction is recompiled into multiple ones
+ not to write back until you've read all the other operands!
+*/
template <>
const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
@@ -46,6 +50,132 @@ void Compiler::MovePC()
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
}
+void Compiler::A_Comp_MRS()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
+
+ if (CurInstr.Instr & (1 << 22))
+ {
+ ANDI2R(W5, RCPSR, 0x1F);
+ MOVI2R(W3, 0);
+ MOVI2R(W1, 15 - 8);
+ BL(ReadBanked);
+ MOV(rd, W3);
+ }
+ else
+ MOV(rd, RCPSR);
+}
+
+void Compiler::A_Comp_MSR()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg val;
+ if (CurInstr.Instr & (1 << 25))
+ {
+ val = W0;
+ MOVI2R(val, ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E)));
+ }
+ else
+ {
+ val = MapReg(CurInstr.A_Reg(0));
+ }
+
+ u32 mask = 0;
+ if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF;
+ if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00;
+ if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000;
+ if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000;
+
+ if (CurInstr.Instr & (1 << 22))
+ {
+ ANDI2R(W5, RCPSR, 0x1F);
+ MOVI2R(W3, 0);
+ MOVI2R(W1, 15 - 8);
+ BL(ReadBanked);
+
+ MOVI2R(W1, mask);
+ MOVI2R(W2, mask & 0xFFFFFF00);
+ ANDI2R(W5, RCPSR, 0x1F);
+ CMP(W5, 0x10);
+ CSEL(W1, W2, W1, CC_EQ);
+
+ BIC(W3, W3, W1);
+ AND(W0, val, W1);
+ ORR(W3, W3, W0);
+
+ MOVI2R(W1, 15 - 8);
+
+ BL(WriteBanked);
+ }
+ else
+ {
+ mask &= 0xFFFFFFDF;
+ CPSRDirty = true;
+
+ if ((mask & 0xFF) == 0)
+ {
+ ANDI2R(RCPSR, RCPSR, ~mask);
+ ANDI2R(W0, val, mask);
+ ORR(RCPSR, RCPSR, W0);
+ }
+ else
+ {
+ MOVI2R(W2, mask);
+ MOVI2R(W3, mask & 0xFFFFFF00);
+ ANDI2R(W1, RCPSR, 0x1F);
+ // W1 = first argument
+ CMP(W1, 0x10);
+ CSEL(W2, W3, W2, CC_EQ);
+
+ BIC(RCPSR, RCPSR, W2);
+ AND(W0, val, W2);
+ ORR(RCPSR, RCPSR, W0);
+
+ MOV(W2, RCPSR);
+ MOV(X0, RCPU);
+
+ PushRegs(true);
+
+ QuickCallFunction(X3, (void*)&ARM::UpdateMode);
+
+ PopRegs(true);
+ }
+ }
+}
+
+void Compiler::PushRegs(bool saveHiRegs)
+{
+ if (saveHiRegs)
+ {
+ if (Thumb || CurInstr.Cond() == 0xE)
+ {
+ BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
+ for (int reg : hiRegsLoaded)
+ RegCache.UnloadRegister(reg);
+ }
+ else
+ {
+ BitSet16 hiRegsDirty(RegCache.LoadedRegs & 0x7F00);
+ for (int reg : hiRegsDirty)
+ SaveReg(reg, RegCache.Mapping[reg]);
+ }
+ }
+}
+
+void Compiler::PopRegs(bool saveHiRegs)
+{
+ if (saveHiRegs)
+ {
+ BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
+
+ for (int reg : hiRegsLoaded)
+ LoadReg(reg, RegCache.Mapping[reg]);
+ }
+}
+
Compiler::Compiler()
{
#ifdef __SWITCH__
@@ -80,8 +210,7 @@ Compiler::Compiler()
assert(succeded);
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
- JitMemUseableSize = JitMemSize;
- Reset();
+ JitMemMainSize = JitMemSize;
#else
u64 pageSize = sysconf(_SC_PAGE_SIZE);
u8* pageAligned = (u8*)(((u64)JitMem & ~(pageSize - 1)) + pageSize);
@@ -90,31 +219,8 @@ Compiler::Compiler()
SetCodeBase(pageAligned, pageAligned);
JitMemUseableSize = alignedSize;
- Reset();
#endif
-
- for (int i = 0; i < 3; i++)
- {
- for (int j = 0; j < 2; j++)
- {
- MemFunc9[i][j] = Gen_MemoryRoutine9(8 << i, j);
- }
- }
- MemFunc7[0][0] = (void*)NDS::ARM7Read8;
- MemFunc7[1][0] = (void*)NDS::ARM7Read16;
- MemFunc7[2][0] = (void*)NDS::ARM7Read32;
- MemFunc7[0][1] = (void*)NDS::ARM7Write8;
- MemFunc7[1][1] = (void*)NDS::ARM7Write16;
- MemFunc7[2][1] = (void*)NDS::ARM7Write32;
-
- for (int i = 0; i < 2; i++)
- {
- for (int j = 0; j < 2; j++)
- {
- MemFuncsSeq9[i][j] = Gen_MemoryRoutine9Seq(i, j);
- MemFuncsSeq7[i][j] = Gen_MemoryRoutine7Seq(i, j);
- }
- }
+ SetCodePtr(0);
for (int i = 0; i < 3; i++)
{
@@ -123,26 +229,26 @@ Compiler::Compiler()
}
/*
- W0 - mode
+ W5 - mode
W1 - reg num
W3 - in/out value of reg
*/
{
ReadBanked = GetRXPtr();
- ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
- CMP(W0, 0x11);
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
+ CMP(W5, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
- ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
- CMP(W0, 0x12);
+ CMP(W5, 0x12);
FixupBranch irq = B(CC_EQ);
- CMP(W0, 0x13);
+ CMP(W5, 0x13);
FixupBranch svc = B(CC_EQ);
- CMP(W0, 0x17);
+ CMP(W5, 0x17);
FixupBranch abt = B(CC_EQ);
- CMP(W0, 0x1B);
+ CMP(W5, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
RET();
@@ -166,19 +272,19 @@ Compiler::Compiler()
{
WriteBanked = GetRXPtr();
- ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
- CMP(W0, 0x11);
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
+ CMP(W5, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
- ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
- CMP(W0, 0x12);
+ CMP(W5, 0x12);
FixupBranch irq = B(CC_EQ);
- CMP(W0, 0x13);
+ CMP(W5, 0x13);
FixupBranch svc = B(CC_EQ);
- CMP(W0, 0x17);
+ CMP(W5, 0x17);
FixupBranch abt = B(CC_EQ);
- CMP(W0, 0x1B);
+ CMP(W5, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
MOVI2R(W4, 0);
@@ -206,9 +312,71 @@ Compiler::Compiler()
RET();
}
- //FlushIcache();
+ for (int num = 0; num < 2; num++)
+ {
+ for (int size = 0; size < 3; size++)
+ {
+ for (int reg = 0; reg < 8; reg++)
+ {
+ ARM64Reg rdMapped = (ARM64Reg)(W19 + reg);
+ PatchedStoreFuncs[num][size][reg] = GetRXPtr();
+ if (num == 0)
+ {
+ MOV(X1, RCPU);
+ MOV(W2, rdMapped);
+ }
+ else
+ {
+ MOV(W1, rdMapped);
+ }
+ ABI_PushRegisters({30});
+ switch ((8 << size) | num)
+ {
+ case 32: QuickCallFunction(X3, SlowWrite9<u32>); break;
+ case 33: QuickCallFunction(X3, SlowWrite7<u32>); break;
+ case 16: QuickCallFunction(X3, SlowWrite9<u16>); break;
+ case 17: QuickCallFunction(X3, SlowWrite7<u16>); break;
+ case 8: QuickCallFunction(X3, SlowWrite9<u8>); break;
+ case 9: QuickCallFunction(X3, SlowWrite7<u8>); break;
+ }
+ ABI_PopRegisters({30});
+ RET();
+
+ for (int signextend = 0; signextend < 2; signextend++)
+ {
+ PatchedLoadFuncs[num][size][signextend][reg] = GetRXPtr();
+ if (num == 0)
+ MOV(X1, RCPU);
+ ABI_PushRegisters({30});
+ switch ((8 << size) | num)
+ {
+ case 32: QuickCallFunction(X3, SlowRead9<u32>); break;
+ case 33: QuickCallFunction(X3, SlowRead7<u32>); break;
+ case 16: QuickCallFunction(X3, SlowRead9<u16>); break;
+ case 17: QuickCallFunction(X3, SlowRead7<u16>); break;
+ case 8: QuickCallFunction(X3, SlowRead9<u8>); break;
+ case 9: QuickCallFunction(X3, SlowRead7<u8>); break;
+ }
+ ABI_PopRegisters({30});
+ if (size == 32)
+ MOV(rdMapped, W0);
+ else if (signextend)
+ SBFX(rdMapped, W0, 0, 8 << size);
+ else
+ UBFX(rdMapped, W0, 0, 8 << size);
+ RET();
+ }
+ }
+ }
+ }
+
+ FlushIcache();
+
+ JitMemSecondarySize = 1024*1024*4;
+
+ JitMemMainSize -= GetCodeOffset();
+ JitMemMainSize -= JitMemSecondarySize;
- JitMemUseableSize -= GetCodeOffset();
SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr());
}
@@ -227,6 +395,16 @@ Compiler::~Compiler()
#endif
}
+void Compiler::LoadCycles()
+{
+ LDR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
+}
+
+void Compiler::SaveCycles()
+{
+ STR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
+}
+
void Compiler::LoadReg(int reg, ARM64Reg nativeReg)
{
if (reg == 15)
@@ -325,7 +503,7 @@ const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
// CMN
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// Mul
- F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), NULL, NULL, NULL, NULL, NULL,
+ F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short),
// ARMv5 exclusives
F(Clz), NULL, NULL, NULL, NULL,
@@ -356,7 +534,7 @@ const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
// Branch
F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg),
// Special
- NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, F(MSR), F(MSR), F(MRS), NULL, NULL, NULL,
&Compiler::Nop
};
#undef F
@@ -404,29 +582,34 @@ bool Compiler::CanCompile(bool thumb, u16 kind)
return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
}
-void Compiler::Comp_BranchSpecialBehaviour()
+void Compiler::Comp_BranchSpecialBehaviour(bool taken)
{
- if (CurInstr.BranchFlags & branch_IdleBranch)
+ if (taken && CurInstr.BranchFlags & branch_IdleBranch)
{
MOVI2R(W0, 1);
STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop));
}
- if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
+ if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
+ || (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
{
- SaveCPSR(false);
RegCache.PrepareExit();
- ADD(W0, RCycles, ConstantCycles);
- ABI_PopRegisters(SavedRegs);
- RET();
+
+ SUB(RCycles, RCycles, ConstantCycles);
+ QuickTailCall(X0, ARM_Ret);
}
}
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
- if (JitMemUseableSize - GetCodeOffset() < 1024 * 16)
+ if (JitMemMainSize - GetCodeOffset() < 1024 * 16)
+ {
+ printf("JIT near memory full, resetting...\n");
+ ResetBlockCache();
+ }
+ if ((JitMemMainSize + JitMemSecondarySize) - OtherCodeRegion < 1024 * 8)
{
- printf("JIT memory full, resetting...\n");
+ printf("JIT far memory full, resetting...\n");
ResetBlockCache();
}
@@ -437,21 +620,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
CurCPU = cpu;
ConstantCycles = 0;
RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
-
- //printf("compiling block at %x\n", R15 - (Thumb ? 2 : 4));
- const u32 ALL_CALLEE_SAVED = 0x7FF80000;
-
- SavedRegs = BitSet32((RegCache.GetPushRegs() | BitSet32(0x78000000)) & BitSet32(ALL_CALLEE_SAVED));
-
- //if (Num == 1)
- {
- ABI_PushRegisters(SavedRegs);
-
- MOVP2R(RCPU, CurCPU);
- MOVI2R(RCycles, 0);
-
- LoadCPSR();
- }
+ CPSRDirty = false;
for (int i = 0; i < instrsCount; i++)
{
@@ -486,6 +655,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
if (comp == NULL)
{
+ SaveCycles();
SaveCPSR();
RegCache.Flush();
}
@@ -535,25 +705,18 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
(this->*comp)();
}
- Comp_BranchSpecialBehaviour();
+ Comp_BranchSpecialBehaviour(true);
if (cond < 0xE)
{
- if (IrregularCycles)
+ if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
{
FixupBranch skipNop = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C();
- if (CurInstr.BranchFlags & branch_FollowCondTaken)
- {
- SaveCPSR(false);
- RegCache.PrepareExit();
- ADD(W0, RCycles, ConstantCycles);
- ABI_PopRegisters(SavedRegs);
- RET();
- }
+ Comp_BranchSpecialBehaviour(false);
SetJumpTarget(skipNop);
}
@@ -565,76 +728,74 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
}
if (comp == NULL)
+ {
+ LoadCycles();
LoadCPSR();
+ }
}
RegCache.Flush();
- //if (Num == 1)
- {
- SaveCPSR();
-
- ADD(W0, RCycles, ConstantCycles);
-
- ABI_PopRegisters(SavedRegs);
- }
- //else
- // ADD(RCycles, RCycles, ConstantCycles);
-
- RET();
+ SUB(RCycles, RCycles, ConstantCycles);
+ QuickTailCall(X0, ARM_Ret);
FlushIcache();
- //printf("finished\n");
-
return res;
}
void Compiler::Reset()
{
+ LoadStorePatches.clear();
+
SetCodePtr(0);
+ OtherCodeRegion = JitMemMainSize;
const u32 brk_0 = 0xD4200000;
- for (int i = 0; i < JitMemUseableSize / 4; i++)
+ for (int i = 0; i < (JitMemMainSize + JitMemSecondarySize) / 4; i++)
*(((u32*)GetRWPtr()) + i) = brk_0;
}
-void Compiler::Comp_AddCycles_C(bool nonConst)
+void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
- if (!nonConst && !CurInstr.Info.Branches())
+ if (forceNonConstant)
ConstantCycles += cycles;
else
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 numI)
{
+ IrregularCycles = true;
+
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI;
- if (Thumb || CurInstr.Cond() >= 0xE)
+ if (Thumb || CurInstr.Cond() == 0xE)
ConstantCycles += cycles;
else
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift)
{
+ IrregularCycles = true;
+
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c;
- ADD(RCycles, RCycles, numI, shift);
+ SUB(RCycles, RCycles, cycles);
if (Thumb || CurInstr.Cond() >= 0xE)
- ConstantCycles += c;
+ ConstantCycles += cycles;
else
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CDI()
@@ -671,7 +832,7 @@ void Compiler::Comp_AddCycles_CDI()
}
if (!Thumb && CurInstr.Cond() < 0xE)
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}
@@ -715,7 +876,7 @@ void Compiler::Comp_AddCycles_CD()
}
if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles)
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h
index 5c9ef41..e4ffc63 100644
--- a/src/ARMJIT_A64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.h
@@ -9,6 +9,8 @@
#include "../ARMJIT_Internal.h"
#include "../ARMJIT_RegisterCache.h"
+#include <unordered_map>
+
namespace ARMJIT
{
@@ -64,7 +66,14 @@ struct Op2
};
};
-class Compiler : Arm64Gen::ARM64XEmitter
+struct LoadStorePatch
+{
+ void* PatchFunc;
+ s32 PatchOffset;
+ u32 PatchSize;
+};
+
+class Compiler : public Arm64Gen::ARM64XEmitter
{
public:
typedef void (Compiler::*CompileFunc)();
@@ -72,6 +81,9 @@ public:
Compiler();
~Compiler();
+ void PushRegs(bool saveHiRegs);
+ void PopRegs(bool saveHiRegs);
+
Arm64Gen::ARM64Reg MapReg(int reg)
{
assert(RegCache.Mapping[reg] != Arm64Gen::INVALID_REG);
@@ -89,7 +101,7 @@ public:
void Reset();
- void Comp_AddCycles_C(bool forceNonConst = false);
+ void Comp_AddCycles_C(bool forceNonConstant = false);
void Comp_AddCycles_CI(u32 numI);
void Comp_AddCycles_CI(u32 c, Arm64Gen::ARM64Reg numI, Arm64Gen::ArithOption shift);
void Comp_AddCycles_CD();
@@ -103,6 +115,9 @@ public:
void LoadCPSR();
void SaveCPSR(bool markClean = true);
+ void LoadCycles();
+ void SaveCycles();
+
void Nop() {}
void A_Comp_ALUTriOp();
@@ -111,6 +126,7 @@ public:
void A_Comp_Mul();
void A_Comp_Mul_Long();
+ void A_Comp_Mul_Short();
void A_Comp_Clz();
@@ -122,6 +138,8 @@ public:
void A_Comp_BranchImm();
void A_Comp_BranchXchangeReg();
+ void A_Comp_MRS();
+ void A_Comp_MSR();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
@@ -168,7 +186,7 @@ public:
void Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, Arm64Gen::ARM64Reg tmp = Arm64Gen::W0);
void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
- void Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
+ bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
enum
{
memop_Writeback = 1 << 0,
@@ -179,16 +197,33 @@ public:
};
void Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags);
- void* Gen_MemoryRoutine9(int size, bool store);
-
- void* Gen_MemoryRoutine9Seq(bool store, bool preinc);
- void* Gen_MemoryRoutine7Seq(bool store, bool preinc);
-
// 0 = switch mode, 1 = stay arm, 2 = stay thumb
void* Gen_JumpTo9(int kind);
void* Gen_JumpTo7(int kind);
- void Comp_BranchSpecialBehaviour();
+ void Comp_BranchSpecialBehaviour(bool taken);
+
+ JitBlockEntry AddEntryOffset(u32 offset)
+ {
+ return (JitBlockEntry)(GetRXBase() + offset);
+ }
+
+ u32 SubEntryOffset(JitBlockEntry entry)
+ {
+ return (u8*)entry - GetRXBase();
+ }
+
+ bool IsJITFault(u64 pc);
+ s64 RewriteMemAccess(u64 pc);
+
+ void SwapCodeRegion()
+ {
+ ptrdiff_t offset = GetCodeOffset();
+ SetCodePtrUnsafe(OtherCodeRegion);
+ OtherCodeRegion = offset;
+ }
+
+ ptrdiff_t OtherCodeRegion;
bool Exit;
@@ -202,22 +237,20 @@ public:
BitSet32 SavedRegs;
- u32 JitMemUseableSize;
+ u32 JitMemSecondarySize;
+ u32 JitMemMainSize;
void* ReadBanked, *WriteBanked;
- // [size][store]
- void* MemFunc9[3][2];
- void* MemFunc7[3][2];
-
- // [store][pre increment]
- void* MemFuncsSeq9[2][2];
- // "[code in main ram]
- void* MemFuncsSeq7[2][2];
-
void* JumpToFuncs9[3];
void* JumpToFuncs7[3];
+ std::unordered_map<ptrdiff_t, LoadStorePatch> LoadStorePatches;
+
+ // [Num][Size][Sign Extend][Output register]
+ void* PatchedLoadFuncs[2][3][2][8];
+ void* PatchedStoreFuncs[2][3][8];
+
RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache;
bool CPSRDirty = false;
diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.s b/src/ARMJIT_A64/ARMJIT_Linkage.s
new file mode 100644
index 0000000..536a478
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_Linkage.s
@@ -0,0 +1,68 @@
+#include "../ARMJIT_x64/ARMJIT_Offsets.h"
+
+.text
+
+#define RCPSR W27
+#define RCycles W28
+#define RCPU X29
+
+.p2align 4,,15
+
+.global ARM_Dispatch
+ARM_Dispatch:
+ stp x19, x20, [sp, #-96]!
+ stp x21, x22, [sp, #16]
+ stp x23, x24, [sp, #32]
+ stp x25, x26, [sp, #48]
+ stp x27, x28, [sp, #64]
+ stp x29, x30, [sp, #80]
+
+ mov RCPU, x0
+ ldr RCycles, [RCPU, ARM_Cycles_offset]
+ ldr RCPSR, [RCPU, ARM_CPSR_offset]
+
+ br x1
+
+.p2align 4,,15
+
+.global ARM_Ret
+ARM_Ret:
+ str RCycles, [RCPU, ARM_Cycles_offset]
+ str RCPSR, [RCPU, ARM_CPSR_offset]
+
+ ldp x29, x30, [sp, #80]
+ ldp x27, x28, [sp, #64]
+ ldp x25, x26, [sp, #48]
+ ldp x23, x24, [sp, #32]
+ ldp x21, x22, [sp, #16]
+ ldp x19, x20, [sp], #96
+
+ ret
+
+.p2align 4,,15
+
+.global ARM_RestoreContext
+ARM_RestoreContext:
+ mov sp, x0
+
+ ldp x0, x1, [sp]
+ ldp x2, x3, [sp, #16]
+ ldp x4, x5, [sp, #32]
+ ldp x6, x7, [sp, #48]
+ ldp x8, x9, [sp, #64]
+ ldp x10, x11, [sp, #80]
+ ldp x12, x13, [sp, #96]
+ ldp x14, x15, [sp, #112]
+ ldp x16, x17, [sp, #128]
+ ldp x18, x19, [sp, #144]
+ ldp x20, x21, [sp, #160]
+ ldp x22, x23, [sp, #176]
+ ldp x24, x25, [sp, #192]
+ ldp x26, x27, [sp, #208]
+ ldp x28, x29, [sp, #224]
+ ldr x30, [sp, #240]
+
+ ldp x17, x18, [sp, #248]
+ mov sp, x17
+
+ br x18 \ No newline at end of file
diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
index 6cf710b..b307d0e 100644
--- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
@@ -2,286 +2,62 @@
#include "../Config.h"
+#include "../ARMJIT_Memory.h"
+
using namespace Arm64Gen;
namespace ARMJIT
{
-// W0 - address
-// (if store) W1 - value to store
-// W2 - code cycles
-void* Compiler::Gen_MemoryRoutine9(int size, bool store)
+bool Compiler::IsJITFault(u64 pc)
{
- AlignCode16();
- void* res = GetRXPtr();
-
- u32 addressMask;
- switch (size)
- {
- case 32: addressMask = ~3; break;
- case 16: addressMask = ~1; break;
- case 8: addressMask = ~0; break;
- }
-
- LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, DTCMBase));
- LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMSize));
- SUB(W3, W0, W3);
- CMP(W3, W4);
- FixupBranch insideDTCM = B(CC_LO);
-
- UBFX(W4, W0, 24, 8);
- CMP(W4, 0x02);
- FixupBranch outsideMainRAM = B(CC_NEQ);
- ANDI2R(W3, W0, addressMask & (MAIN_RAM_SIZE - 1));
- MOVP2R(X4, NDS::MainRAM);
- if (!store && size == 32)
- {
- LDR(W3, X3, X4);
- ANDI2R(W0, W0, 3);
- LSL(W0, W0, 3);
- RORV(W0, W3, W0);
- }
- else if (store)
- STRGeneric(size, W1, X3, X4);
- else
- LDRGeneric(size, false, W0, X3, X4);
- RET();
-
- SetJumpTarget(outsideMainRAM);
-
- LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
- CMP(W0, W3);
- FixupBranch insideITCM = B(CC_LO);
-
- if (store)
- {
- if (size > 8)
- ANDI2R(W0, W0, addressMask);
-
- switch (size)
- {
- case 32: QuickTailCall(X4, NDS::ARM9Write32); break;
- case 16: QuickTailCall(X4, NDS::ARM9Write16); break;
- case 8: QuickTailCall(X4, NDS::ARM9Write8); break;
- }
- }
- else
- {
- if (size == 32)
- ABI_PushRegisters({0, 30});
- if (size > 8)
- ANDI2R(W0, W0, addressMask);
-
- switch (size)
- {
- case 32: QuickCallFunction(X4, NDS::ARM9Read32); break;
- case 16: QuickTailCall (X4, NDS::ARM9Read16); break;
- case 8: QuickTailCall (X4, NDS::ARM9Read8 ); break;
- }
- if (size == 32)
- {
- ABI_PopRegisters({1, 30});
- ANDI2R(W1, W1, 3);
- LSL(W1, W1, 3);
- RORV(W0, W0, W1);
- RET();
- }
- }
-
- SetJumpTarget(insideDTCM);
- ANDI2R(W3, W3, 0x3FFF & addressMask);
- ADDI2R(W3, W3, offsetof(ARMv5, DTCM), W4);
- if (!store && size == 32)
- {
- ANDI2R(W4, W0, 3);
- LDR(W0, RCPU, W3);
- LSL(W4, W4, 3);
- RORV(W0, W0, W4);
- }
- else if (store)
- STRGeneric(size, W1, RCPU, W3);
- else
- LDRGeneric(size, false, W0, RCPU, W3);
-
- RET();
-
- SetJumpTarget(insideITCM);
- ANDI2R(W3, W0, 0x7FFF & addressMask);
- if (store)
- {
- ADDI2R(W0, W3, ExeMemRegionOffsets[exeMem_ITCM], W4);
- LSR(W5, W0, 9);
- MOVP2R(X4, CodeRanges);
- ADD(X4, X4, X5, ArithOption(X5, ST_LSL, 4));
- static_assert(sizeof(AddressRange) == 16);
- LDRH(INDEX_UNSIGNED, W4, X4, offsetof(AddressRange, Blocks.Length));
- FixupBranch null = CBZ(W4);
- ABI_PushRegisters({1, 3, 30});
- QuickCallFunction(X4, InvalidateByAddr);
- ABI_PopRegisters({1, 3, 30});
- SetJumpTarget(null);
- }
- ADDI2R(W3, W3, offsetof(ARMv5, ITCM), W4);
- if (!store && size == 32)
- {
- ANDI2R(W4, W0, 3);
- LDR(W0, RCPU, W3);
- LSL(W4, W4, 3);
- RORV(W0, W0, W4);
- }
- else if (store)
- STRGeneric(size, W1, RCPU, W3);
- else
- LDRGeneric(size, false, W0, RCPU, W3);
- RET();
-
- return res;
+ return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
}
-/*
- W0 - base address
- X1 - stack space
- W2 - values count
-*/
-void* Compiler::Gen_MemoryRoutine9Seq(bool store, bool preinc)
+s64 Compiler::RewriteMemAccess(u64 pc)
{
- AlignCode16();
- void* res = GetRXPtr();
-
- void* loopStart = GetRXPtr();
- SUB(W2, W2, 1);
-
- if (preinc)
- ADD(W0, W0, 4);
+ ptrdiff_t pcOffset = pc - (u64)GetRXBase();
- LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMBase));
- LDR(INDEX_UNSIGNED, W5, RCPU, offsetof(ARMv5, DTCMSize));
- SUB(W4, W0, W4);
- CMP(W4, W5);
- FixupBranch insideDTCM = B(CC_LO);
+ auto it = LoadStorePatches.find(pcOffset);
- LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, ITCMSize));
- CMP(W0, W4);
- FixupBranch insideITCM = B(CC_LO);
-
- ABI_PushRegisters({0, 1, 2, 30}); // TODO: move SP only once
- if (store)
+ if (it != LoadStorePatches.end())
{
- LDR(X1, X1, ArithOption(X2, true));
- QuickCallFunction(X4, NDS::ARM9Write32);
+ LoadStorePatch patch = it->second;
- ABI_PopRegisters({0, 1, 2, 30});
- }
- else
- {
- QuickCallFunction(X4, NDS::ARM9Read32);
- MOV(W4, W0);
+ ptrdiff_t curCodeOffset = GetCodeOffset();
- ABI_PopRegisters({0, 1, 2, 30});
+ SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
- STR(X4, X1, ArithOption(X2, true));
- }
+ BL(patch.PatchFunc);
- if (!preinc)
- ADD(W0, W0, 4);
- CBNZ(W2, loopStart);
- RET();
+ for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
+ HINT(HINT_NOP);
- SetJumpTarget(insideDTCM);
+ FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
- ANDI2R(W4, W4, ~3 & 0x3FFF);
- ADDI2R(X4, X4, offsetof(ARMv5, DTCM));
- if (store)
- {
- LDR(X5, X1, ArithOption(X2, true));
- STR(W5, RCPU, X4);
- }
- else
- {
- LDR(W5, RCPU, X4);
- STR(X5, X1, ArithOption(X2, true));
- }
+ SetCodePtrUnsafe(curCodeOffset);
- if (!preinc)
- ADD(W0, W0, 4);
- CBNZ(W2, loopStart);
- RET();
-
- SetJumpTarget(insideITCM);
-
- ANDI2R(W4, W0, ~3 & 0x7FFF);
-
- ADDI2R(W6, W4, offsetof(ARMv5, ITCM), W5);
- if (store)
- {
- LDR(X5, X1, ArithOption(X2, true));
- STR(W5, RCPU, X6);
- }
- else
- {
- LDR(W5, RCPU, X6);
- STR(X5, X1, ArithOption(X2, true));
- }
+ LoadStorePatches.erase(it);
- if (store)
- {
- ADDI2R(W4, W4, ExeMemRegionOffsets[exeMem_ITCM], W5);
- LSR(W6, W4, 9);
- MOVP2R(X5, CodeRanges);
- ADD(X5, X5, X6, ArithOption(X6, ST_LSL, 4));
- static_assert(sizeof(AddressRange) == 16);
- LDRH(INDEX_UNSIGNED, W5, X5, offsetof(AddressRange, Blocks.Length));
- FixupBranch null = CBZ(W5);
- ABI_PushRegisters({0, 1, 2, 4, 30});
- MOV(W0, W4);
- QuickCallFunction(X5, InvalidateByAddr);
- ABI_PopRegisters({0, 1, 2, 4, 30});
- SetJumpTarget(null);
+ return patch.PatchOffset;
}
-
- if (!preinc)
- ADD(W0, W0, 4);
- CBNZ(W2, loopStart);
- RET();
- return res;
+ printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
+ assert(false);
}
-void* Compiler::Gen_MemoryRoutine7Seq(bool store, bool preinc)
+bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
{
- AlignCode16();
- void* res = GetRXPtr();
+ u32 localAddr = LocaliseCodeAddress(Num, addr);
- void* loopStart = GetRXPtr();
- SUB(W2, W2, 1);
-
- if (preinc)
- ADD(W0, W0, 4);
-
- ABI_PushRegisters({0, 1, 2, 30});
- if (store)
+ int invalidLiteralIdx = InvalidLiterals.Find(localAddr);
+ if (invalidLiteralIdx != -1)
{
- LDR(X1, X1, ArithOption(X2, true));
- QuickCallFunction(X4, NDS::ARM7Write32);
- ABI_PopRegisters({0, 1, 2, 30});
+ InvalidLiterals.Remove(invalidLiteralIdx);
+ return false;
}
- else
- {
- QuickCallFunction(X4, NDS::ARM7Read32);
- MOV(W4, W0);
- ABI_PopRegisters({0, 1, 2, 30});
- STR(X4, X1, ArithOption(X2, true));
- }
-
- if (!preinc)
- ADD(W0, W0, 4);
- CBNZ(W2, loopStart);
- RET();
- return res;
-}
+ Comp_AddCycles_CDI();
-void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
-{
u32 val;
// make sure arm7 bios is accessible
u32 tmpR15 = CurCPU->R[15];
@@ -309,6 +85,8 @@ void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.PutLiteral(rd, val);
+
+ return true;
}
void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
@@ -318,163 +96,209 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
addressMask = ~3;
if (size == 16)
addressMask = ~1;
+
+ if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
+ {
+ u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
+
+ if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr))
+ return;
+ }
if (flags & memop_Store)
Comp_AddCycles_CD();
else
Comp_AddCycles_CDI();
- if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
- {
- u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
- u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
+ ARM64Reg rdMapped = MapReg(rd);
+ ARM64Reg rnMapped = MapReg(rn);
- if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
- {
- Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr);
- return;
- }
+ if (Thumb && rn == 15)
+ {
+ ANDI2R(W3, rnMapped, ~2);
+ rnMapped = W3;
}
+ ARM64Reg finalAddr = W0;
+ if (flags & memop_Post)
{
- ARM64Reg rdMapped = MapReg(rd);
- ARM64Reg rnMapped = MapReg(rn);
-
- bool inlinePreparation = Num == 1;
- u32 constLocalROR32 = 4;
+ finalAddr = rnMapped;
+ MOV(W0, rnMapped);
+ }
- void* memFunc = Num == 0
- ? MemFunc9[size >> 4][!!(flags & memop_Store)]
- : MemFunc7[size >> 4][!!((flags & memop_Store))];
+ bool addrIsStatic = Config::JIT_LiteralOptimisations
+ && RegCache.IsLiteral(rn) && offset.IsImm && !(flags & (memop_Writeback|memop_Post));
+ u32 staticAddress;
+ if (addrIsStatic)
+ staticAddress = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
- if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && offset.IsImm && RegCache.IsLiteral(rn))
+ if (!offset.IsImm)
+ Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
+ // offset might has become an immediate
+ if (offset.IsImm)
+ {
+ if (offset.Imm)
+ {
+ if (flags & memop_SubtractOffset)
+ SUB(finalAddr, rnMapped, offset.Imm);
+ else
+ ADD(finalAddr, rnMapped, offset.Imm);
+ }
+ else if (finalAddr != rnMapped)
+ MOV(finalAddr, rnMapped);
+ }
+ else
+ {
+ if (offset.Reg.ShiftType == ST_ROR)
{
- u32 addr = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
+ ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
+ offset = Op2(W0);
+ }
- NDS::MemRegion region;
- region.Mem = NULL;
- if (Num == 0)
- {
- ARMv5* cpu5 = (ARMv5*)CurCPU;
+ if (flags & memop_SubtractOffset)
+ SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
+ else
+ ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
+ }
- // stupid dtcm...
- if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
- {
- region.Mem = cpu5->DTCM;
- region.Mask = 0x3FFF;
- }
- else
- {
- NDS::ARM9GetMemRegion(addr, flags & memop_Store, &region);
- }
- }
- else
- NDS::ARM7GetMemRegion(addr, flags & memop_Store, &region);
+ if (!(flags & memop_Post) && (flags & memop_Writeback))
+ MOV(rnMapped, W0);
- if (region.Mem != NULL)
- {
- void* ptr = &region.Mem[addr & addressMask & region.Mask];
+ u32 expectedTarget = Num == 0
+ ? ARMJIT_Memory::ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
+ : ARMJIT_Memory::ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
- MOVP2R(X0, ptr);
- if (flags & memop_Store)
- STRGeneric(size, INDEX_UNSIGNED, rdMapped, X0, 0);
- else
- {
- LDRGeneric(size, flags & memop_SignExtend, INDEX_UNSIGNED, rdMapped, X0, 0);
- if (size == 32 && addr & ~0x3)
- ROR_(rdMapped, rdMapped, (addr & 0x3) << 3);
- }
- return;
- }
+ if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsMappable(expectedTarget)))
+ {
+ ptrdiff_t memopStart = GetCodeOffset();
+ LoadStorePatch patch;
- void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
- if (specialFunc)
- {
- memFunc = specialFunc;
- inlinePreparation = true;
- constLocalROR32 = addr & 0x3;
- }
- }
+ patch.PatchFunc = flags & memop_Store
+ ? PatchedStoreFuncs[Num][__builtin_ctz(size) - 3][rdMapped - W19]
+ : PatchedLoadFuncs[Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19];
+ assert(rdMapped - W19 >= 0 && rdMapped - W19 < 8);
- ARM64Reg finalAddr = W0;
- if (flags & memop_Post)
- {
- finalAddr = rnMapped;
- MOV(W0, rnMapped);
- }
+ MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
+ // take a chance at fastmem
+ if (size > 8)
+ ANDI2R(W1, W0, addressMask);
+
+ ptrdiff_t loadStorePosition = GetCodeOffset();
if (flags & memop_Store)
- MOV(W1, rdMapped);
-
- if (!offset.IsImm)
- Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
- // offset might become an immediate
- if (offset.IsImm)
{
- if (flags & memop_SubtractOffset)
- SUB(finalAddr, rnMapped, offset.Imm);
- else
- ADD(finalAddr, rnMapped, offset.Imm);
+ STRGeneric(size, rdMapped, size > 8 ? X1 : X0, X7);
}
else
{
- if (offset.Reg.ShiftType == ST_ROR)
+ LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
+ if (size == 32)
{
- ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
- offset = Op2(W0);
+ UBFIZ(W0, W0, 3, 2);
+ RORV(rdMapped, rdMapped, W0);
}
-
- if (flags & memop_SubtractOffset)
- SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
- else
- ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
}
- if (!(flags & memop_Post) && (flags & memop_Writeback))
- MOV(rnMapped, W0);
+ patch.PatchOffset = memopStart - loadStorePosition;
+ patch.PatchSize = GetCodeOffset() - memopStart;
+ LoadStorePatches[loadStorePosition] = patch;
+ }
+ else
+ {
+ void* func = NULL;
+ if (addrIsStatic)
+ func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size);
- if (inlinePreparation)
+ if (func)
{
- if (size == 32 && !(flags & memop_Store) && constLocalROR32 == 4)
- ANDI2R(rdMapped, W0, 3);
- if (size > 8)
- ANDI2R(W0, W0, addressMask);
+ if (flags & memop_Store)
+ MOV(W1, rdMapped);
+ QuickCallFunction(X2, (void (*)())func);
+
+ if (!(flags & memop_Store))
+ {
+ if (size == 32)
+ {
+ if (staticAddress & 0x3)
+ ROR_(rdMapped, W0, (staticAddress & 0x3) << 3);
+ else
+ MOV(rdMapped, W0);
+ }
+ else
+ {
+ if (flags & memop_SignExtend)
+ SBFX(rdMapped, W0, 0, size);
+ else
+ UBFX(rdMapped, W0, 0, size);
+ }
+ }
}
- QuickCallFunction(X2, memFunc);
- if (!(flags & memop_Store))
+ else
{
- if (inlinePreparation && !(flags & memop_Store) && size == 32)
+ if (Num == 0)
{
- if (constLocalROR32 == 4)
+ MOV(X1, RCPU);
+ if (flags & memop_Store)
{
- LSL(rdMapped, rdMapped, 3);
- RORV(rdMapped, W0, rdMapped);
+ MOV(W2, rdMapped);
+ switch (size)
+ {
+ case 32: QuickCallFunction(X3, SlowWrite9<u32>); break;
+ case 16: QuickCallFunction(X3, SlowWrite9<u16>); break;
+ case 8: QuickCallFunction(X3, SlowWrite9<u8>); break;
+ }
}
- else if (constLocalROR32 > 0)
- ROR_(rdMapped, W0, constLocalROR32 << 3);
else
- MOV(rdMapped, W0);
+ {
+ switch (size)
+ {
+ case 32: QuickCallFunction(X3, SlowRead9<u32>); break;
+ case 16: QuickCallFunction(X3, SlowRead9<u16>); break;
+ case 8: QuickCallFunction(X3, SlowRead9<u8>); break;
+ }
+ }
}
- else if (flags & memop_SignExtend)
+ else
{
- if (size == 16)
- SXTH(rdMapped, W0);
- else if (size == 8)
- SXTB(rdMapped, W0);
+ if (flags & memop_Store)
+ {
+ MOV(W1, rdMapped);
+ switch (size)
+ {
+ case 32: QuickCallFunction(X3, SlowWrite7<u32>); break;
+ case 16: QuickCallFunction(X3, SlowWrite7<u16>); break;
+ case 8: QuickCallFunction(X3, SlowWrite7<u8>); break;
+ }
+ }
else
- assert("What's wrong with you?");
+ {
+ switch (size)
+ {
+ case 32: QuickCallFunction(X3, SlowRead7<u32>); break;
+ case 16: QuickCallFunction(X3, SlowRead7<u16>); break;
+ case 8: QuickCallFunction(X3, SlowRead7<u8>); break;
+ }
+ }
}
- else
- MOV(rdMapped, W0);
-
- if (CurInstr.Info.Branches())
+
+ if (!(flags & memop_Store))
{
- if (size < 32)
- printf("LDR size < 32 branching?\n");
- Comp_JumpTo(rdMapped, Num == 0, false);
+ if (size == 32)
+ MOV(rdMapped, W0);
+ else if (flags & memop_SignExtend)
+ SBFX(rdMapped, W0, 0, size);
+ else
+ UBFX(rdMapped, W0, 0, size);
}
}
}
+
+ if (CurInstr.Info.Branches())
+ {
+ if (size < 32)
+ printf("LDR size < 32 branching?\n");
+ Comp_JumpTo(rdMapped, Num == 0, false);
+ }
}
void Compiler::A_Comp_MemWB()
@@ -589,19 +413,11 @@ void Compiler::T_Comp_MemImmHalf()
void Compiler::T_Comp_LoadPCRel()
{
- u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
+ u32 offset = ((CurInstr.Instr & 0xFF) << 2);
+ u32 addr = (R15 & ~0x2) + offset;
- if (Config::JIT_LiteralOptimisations)
- {
- Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr);
- Comp_AddCycles_CDI();
- }
- else
- {
- bool negative = addr < R15;
- u32 abs = negative ? R15 - addr : addr - R15;
- Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(abs), 32, negative ? memop_SubtractOffset : 0);
- }
+ if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr))
+ Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0);
}
void Compiler::T_Comp_MemSPRel()
@@ -621,15 +437,138 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (regsCount == 0)
return 0; // actually not the right behaviour TODO: fix me
- SUB(SP, SP, ((regsCount + 1) & ~1) * 8);
- if (store)
+ if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin()))
{
+ int flags = 0;
+ if (store)
+ flags |= memop_Store;
+ if (decrement)
+ flags |= memop_SubtractOffset;
+ Op2 offset = preinc ? Op2(4) : Op2(0);
+
+ Comp_MemAccess(*regs.begin(), rn, offset, 32, flags);
+
+ return decrement ? -4 : 4;
+ }
+
+ if (store)
Comp_AddCycles_CD();
+ else
+ Comp_AddCycles_CDI();
- if (usermode && (regs & BitSet16(0x7f00)))
- UBFX(W0, RCPSR, 0, 5);
+ int expectedTarget = Num == 0
+ ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
+ : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
+
+ bool compileFastPath = Config::JIT_FastMemory
+ && store && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsMappable(expectedTarget));
+
+ if (decrement)
+ {
+ SUB(W0, MapReg(rn), regsCount * 4);
+ ANDI2R(W0, W0, ~3);
+ preinc ^= true;
+ }
+ else
+ {
+ ANDI2R(W0, MapReg(rn), ~3);
+ }
+
+ LoadStorePatch patch;
+ if (compileFastPath)
+ {
+ ptrdiff_t fastPathStart = GetCodeOffset();
+ ptrdiff_t firstLoadStoreOffset;
+
+ bool firstLoadStore = true;
+
+ MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
+ ADD(X1, X1, X0);
+
+ u32 offset = preinc ? 4 : 0;
+ BitSet16::Iterator it = regs.begin();
+
+ if (regsCount & 1)
+ {
+ int reg = *it;
+ it++;
+
+ ARM64Reg first = W3;
+ if (RegCache.LoadedRegs & (1 << reg))
+ first = MapReg(reg);
+ else if (store)
+ LoadReg(reg, first);
+
+ if (firstLoadStore)
+ {
+ firstLoadStoreOffset = GetCodeOffset();
+ firstLoadStore = false;
+ }
+
+ if (store)
+ STR(INDEX_UNSIGNED, first, X1, offset);
+ else
+ LDR(INDEX_UNSIGNED, first, X1, offset);
+
+ if (!(RegCache.LoadedRegs & (1 << reg)) && !store)
+ SaveReg(reg, first);
+
+ offset += 4;
+ }
+
+ while (it != regs.end())
+ {
+ int reg = *it;
+ it++;
+ int nextReg = *it;
+ it++;
- int i = regsCount - 1;
+ ARM64Reg first = W3, second = W4;
+ if (RegCache.LoadedRegs & (1 << reg))
+ first = MapReg(reg);
+ else if (store)
+ LoadReg(reg, first);
+ if (RegCache.LoadedRegs & (1 << nextReg))
+ second = MapReg(nextReg);
+ else if (store)
+ LoadReg(nextReg, second);
+
+ if (firstLoadStore)
+ {
+ firstLoadStoreOffset = GetCodeOffset();
+ firstLoadStore = false;
+ }
+
+ if (store)
+ STP(INDEX_SIGNED, first, second, X1, offset);
+ else
+ LDP(INDEX_SIGNED, first, second, X1, offset);
+
+ if (!(RegCache.LoadedRegs & (1 << reg)) && !store)
+ SaveReg(reg, first);
+ if (!(RegCache.LoadedRegs & (1 << nextReg)) && !store)
+ SaveReg(nextReg, second);
+
+ offset += 8;
+ }
+
+ patch.PatchSize = GetCodeOffset() - fastPathStart;
+ patch.PatchOffset = fastPathStart - firstLoadStoreOffset;
+ SwapCodeRegion();
+ patch.PatchFunc = GetRXPtr();
+
+ LoadStorePatches[firstLoadStoreOffset] = patch;
+
+ ABI_PushRegisters({30});
+ }
+
+ int i = 0;
+
+ SUB(SP, SP, ((regsCount + 1) & ~1) * 8);
+ if (store)
+ {
+ if (usermode && (regs & BitSet16(0x7f00)))
+ UBFX(W5, RCPSR, 0, 5);
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
@@ -641,7 +580,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (usermode && reg >= 8 && reg < 15)
{
- if (RegCache.Mapping[reg] != INVALID_REG)
+ if (RegCache.LoadedRegs & (1 << reg))
MOV(W3, MapReg(reg));
else
LoadReg(reg, W3);
@@ -651,55 +590,67 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
}
else if (!usermode && nextReg != regs.end())
{
- ARM64Reg first = W3;
- ARM64Reg second = W4;
+ ARM64Reg first = W3, second = W4;
- if (RegCache.Mapping[reg] != INVALID_REG)
+ if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else
LoadReg(reg, W3);
- if (RegCache.Mapping[*nextReg] != INVALID_REG)
+ if (RegCache.LoadedRegs & (1 << *nextReg))
second = MapReg(*nextReg);
else
LoadReg(*nextReg, W4);
- STP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
+ STP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
- i--;
+ i++;
it++;
}
- else if (RegCache.Mapping[reg] != INVALID_REG)
+ else if (RegCache.LoadedRegs & (1 << reg))
+ {
STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8);
+ }
else
{
LoadReg(reg, W3);
STR(INDEX_UNSIGNED, W3, SP, i * 8);
}
- i--;
+ i++;
it++;
}
}
- if (decrement)
- {
- SUB(W0, MapReg(rn), regsCount * 4);
- preinc ^= true;
- }
- else
- MOV(W0, MapReg(rn));
+
ADD(X1, SP, 0);
MOVI2R(W2, regsCount);
- BL(Num ? MemFuncsSeq7[store][preinc] : MemFuncsSeq9[store][preinc]);
+ if (Num == 0)
+ {
+ MOV(X3, RCPU);
+ switch (preinc * 2 | store)
+ {
+ case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, false>); break;
+ case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, true>); break;
+ case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, false>); break;
+ case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, true>); break;
+ }
+ }
+ else
+ {
+ switch (preinc * 2 | store)
+ {
+ case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, false>); break;
+ case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, true>); break;
+ case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, false>); break;
+ case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, true>); break;
+ }
+ }
if (!store)
{
- Comp_AddCycles_CDI();
-
if (usermode && !regs[15] && (regs & BitSet16(0x7f00)))
- UBFX(W0, RCPSR, 0, 5);
+ UBFX(W5, RCPSR, 0, 5);
- int i = regsCount - 1;
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
{
@@ -714,11 +665,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
MOVI2R(W1, reg - 8);
BL(WriteBanked);
FixupBranch alreadyWritten = CBNZ(W4);
- if (RegCache.Mapping[reg] != INVALID_REG)
- {
+ if (RegCache.LoadedRegs & (1 << reg))
MOV(MapReg(reg), W3);
- RegCache.DirtyRegs |= 1 << reg;
- }
else
SaveReg(reg, W3);
SetJumpTarget(alreadyWritten);
@@ -727,20 +675,12 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
{
ARM64Reg first = W3, second = W4;
- if (RegCache.Mapping[reg] != INVALID_REG)
- {
+ if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
- if (reg != 15)
- RegCache.DirtyRegs |= 1 << reg;
- }
- if (RegCache.Mapping[*nextReg] != INVALID_REG)
- {
+ if (RegCache.LoadedRegs & (1 << *nextReg))
second = MapReg(*nextReg);
- if (*nextReg != 15)
- RegCache.DirtyRegs |= 1 << *nextReg;
- }
- LDP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
+ LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
if (first == W3)
SaveReg(reg, W3);
@@ -748,15 +688,12 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
SaveReg(*nextReg, W4);
it++;
- i--;
+ i++;
}
- else if (RegCache.Mapping[reg] != INVALID_REG)
+ else if (RegCache.LoadedRegs & (1 << reg))
{
ARM64Reg mapped = MapReg(reg);
LDR(INDEX_UNSIGNED, mapped, SP, i * 8);
-
- if (reg != 15)
- RegCache.DirtyRegs |= 1 << reg;
}
else
{
@@ -765,11 +702,20 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
}
it++;
- i--;
+ i++;
}
}
ADD(SP, SP, ((regsCount + 1) & ~1) * 8);
+ if (compileFastPath)
+ {
+ ABI_PopRegisters({30});
+ RET();
+
+ FlushIcacheSection((u8*)patch.PatchFunc, (u8*)GetRXPtr());
+ SwapCodeRegion();
+ }
+
if (!store && regs[15])
{
ARM64Reg mapped = MapReg(15);
diff --git a/src/ARMJIT_Compiler.h b/src/ARMJIT_Compiler.h
new file mode 100644
index 0000000..513c103
--- /dev/null
+++ b/src/ARMJIT_Compiler.h
@@ -0,0 +1,12 @@
+#if defined(__x86_64__)
+#include "ARMJIT_x64/ARMJIT_Compiler.h"
+#elif defined(__aarch64__)
+#include "ARMJIT_A64/ARMJIT_Compiler.h"
+#else
+#error "The current target platform doesn't have a JIT backend"
+#endif
+
+namespace ARMJIT
+{
+extern Compiler* JITCompiler;
+} \ No newline at end of file
diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h
index 4e45760..19684c4 100644
--- a/src/ARMJIT_Internal.h
+++ b/src/ARMJIT_Internal.h
@@ -3,8 +3,11 @@
#include "types.h"
#include <stdint.h>
+#include <string.h>
+#include <assert.h>
#include "ARMJIT.h"
+#include "ARMJIT_Memory.h"
// here lands everything which doesn't fit into ARMJIT.h
// where it would be included by pretty much everything
@@ -160,8 +163,8 @@ public:
Data.SetLength(numAddresses * 2 + numLiterals);
}
- u32 PseudoPhysicalAddr;
-
+ u32 StartAddr;
+ u32 StartAddrLocal;
u32 InstrHash, LiteralHash;
u8 Num;
u16 NumAddresses;
@@ -175,28 +178,8 @@ public:
{ return &Data[NumAddresses]; }
u32* Literals()
{ return &Data[NumAddresses * 2]; }
- u32* Links()
- { return &Data[NumAddresses * 2 + NumLiterals]; }
-
- u32 NumLinks()
- { return Data.Length - NumAddresses * 2 - NumLiterals; }
-
- void AddLink(u32 link)
- {
- Data.Add(link);
- }
-
- void ResetLinks()
- {
- Data.SetLength(NumAddresses * 2 + NumLiterals);
- }
private:
- /*
- 0..<NumInstrs - the instructions of the block
- NumInstrs..<(NumLinks + NumInstrs) - pseudo physical addresses where the block is located
- (atleast one, the pseudo physical address of the block)
- */
TinyVector<u32> Data;
};
@@ -207,45 +190,32 @@ struct __attribute__((packed)) AddressRange
u32 Code;
};
-extern AddressRange CodeRanges[ExeMemSpaceSize / 512];
typedef void (*InterpreterFunc)(ARM* cpu);
extern InterpreterFunc InterpretARM[];
extern InterpreterFunc InterpretTHUMB[];
-extern u8 MemoryStatus9[0x800000];
-extern u8 MemoryStatus7[0x800000];
-
extern TinyVector<u32> InvalidLiterals;
-void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
-
-template <u32 Num>
-void LinkBlock(ARM* cpu, u32 codeOffset);
+extern AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count];
-enum
+inline bool PageContainsCode(AddressRange* range)
{
- memregion_Other = 0,
- memregion_ITCM,
- memregion_DTCM,
- memregion_BIOS9,
- memregion_MainRAM,
- memregion_SWRAM9,
- memregion_SWRAM7,
- memregion_IO9,
- memregion_VRAM,
- memregion_BIOS7,
- memregion_WRAM7,
- memregion_IO7,
- memregion_Wifi,
- memregion_VWRAM,
-};
+ for (int i = 0; i < 8; i++)
+ {
+ if (range[i].Blocks.Length > 0)
+ return true;
+ }
+ return false;
+}
+
+u32 LocaliseCodeAddress(u32 num, u32 addr);
-int ClassifyAddress9(u32 addr);
-int ClassifyAddress7(u32 addr);
+template <u32 Num>
+void LinkBlock(ARM* cpu, u32 codeOffset);
-template <typename T> T SlowRead9(ARMv5* cpu, u32 addr);
-template <typename T> void SlowWrite9(ARMv5* cpu, u32 addr, T val);
+template <typename T> T SlowRead9(u32 addr, ARMv5* cpu);
+template <typename T> void SlowWrite9(u32 addr, ARMv5* cpu, T val);
template <typename T> T SlowRead7(u32 addr);
template <typename T> void SlowWrite7(u32 addr, T val);
diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp
new file mode 100644
index 0000000..162827d
--- /dev/null
+++ b/src/ARMJIT_Memory.cpp
@@ -0,0 +1,822 @@
+#ifdef __SWITCH__
+#include "switch/compat_switch.h"
+#endif
+
+#include "ARMJIT_Memory.h"
+
+#include "ARMJIT_Internal.h"
+#include "ARMJIT_Compiler.h"
+
+#include "GPU.h"
+#include "GPU3D.h"
+#include "Wifi.h"
+#include "NDSCart.h"
+#include "SPU.h"
+
+#include <malloc.h>
+
+/*
+ We're handling fastmem here.
+
+ Basically we're repurposing a big piece of virtual memory
+ and map the memory regions as they're structured on the DS
+ in it.
+
+ On most systems you have a single piece of main ram,
+ maybe some video ram and faster cache RAM and that's about it.
+ Here we have not only a lot more different memory regions,
+ but also two address spaces. Not only that but they all have
+ mirrors (the worst case is 16kb SWRAM which is mirrored 1024x).
+
+ We handle this by only mapping those regions which are actually
+ used and by praying the games don't go wild.
+
+ Beware, this file is full of platform specific code.
+
+*/
+
+namespace ARMJIT_Memory
+{
+#ifdef __aarch64__
+struct FaultDescription
+{
+ u64 IntegerRegisters[33];
+ u64 FaultAddr;
+
+ u32 GetEmulatedAddr()
+ {
+ // now this is podracing
+ return (u32)IntegerRegisters[0];
+ }
+ u64 RealAddr()
+ {
+ return FaultAddr;
+ }
+
+ u64 GetPC()
+ {
+ return IntegerRegisters[32];
+ }
+
+ void RestoreAndRepeat(s64 offset);
+};
+#else
+struct FaultDescription
+{
+ u64 GetPC()
+ {
+ return 0;
+ }
+
+ u32 GetEmulatedAddr()
+ {
+ return 0;
+ }
+ u64 RealAddr()
+ {
+ return 0;
+ }
+
+ void RestoreAndRepeat(s64 offset);
+};
+#endif
+
+void FaultHandler(FaultDescription* faultDesc);
+}
+
+
+#ifdef __aarch64__
+
+extern "C" void ARM_RestoreContext(u64* registers) __attribute__((noreturn));
+
+#endif
+
+#ifdef __SWITCH__
+// with LTO the symbols seem to be not properly overriden
+// if they're somewhere else
+
+extern "C"
+{
+extern char __start__;
+extern char __rodata_start;
+
+alignas(16) u8 __nx_exception_stack[0x8000];
+u64 __nx_exception_stack_size = 0x8000;
+
+void __libnx_exception_handler(ThreadExceptionDump* ctx)
+{
+ ARMJIT_Memory::FaultDescription desc;
+ memcpy(desc.IntegerRegisters, &ctx->cpu_gprs[0].x, 8*29);
+ desc.IntegerRegisters[29] = ctx->fp.x;
+ desc.IntegerRegisters[30] = ctx->lr.x;
+ desc.IntegerRegisters[31] = ctx->sp.x;
+ desc.IntegerRegisters[32] = ctx->pc.x;
+
+ ARMJIT_Memory::FaultHandler(&desc);
+
+ if (ctx->pc.x >= (u64)&__start__ && ctx->pc.x < (u64)&__rodata_start)
+ {
+ printf("non JIT fault in .text at 0x%x (type %d) (trying to access 0x%x?)\n",
+ ctx->pc.x - (u64)&__start__, ctx->error_desc, ctx->far.x);
+ }
+ else
+ {
+ printf("non JIT fault somewhere in deep (address) space at %x (type %d)\n", ctx->pc.x, ctx->error_desc);
+ }
+}
+
+}
+#endif
+
+namespace ARMJIT_Memory
+{
+
+#ifdef __aarch64__
+void FaultDescription::RestoreAndRepeat(s64 offset)
+{
+ IntegerRegisters[32] += offset;
+
+ ARM_RestoreContext(IntegerRegisters);
+}
+#else
+void FaultDescription::RestoreAndRepeat(s64 offset)
+{
+
+}
+#endif
+
+void* FastMem9Start, *FastMem7Start;
+
+const u32 MemoryTotalSize =
+ NDS::MainRAMSize
+ + NDS::SharedWRAMSize
+ + NDS::ARM7WRAMSize
+ + DTCMPhysicalSize;
+
+const u32 MemBlockMainRAMOffset = 0;
+const u32 MemBlockSWRAMOffset = NDS::MainRAMSize;
+const u32 MemBlockARM7WRAMOffset = NDS::MainRAMSize + NDS::SharedWRAMSize;
+const u32 MemBlockDTCMOffset = NDS::MainRAMSize + NDS::SharedWRAMSize + NDS::ARM7WRAMSize;
+
+const u32 OffsetsPerRegion[memregions_Count] =
+{
+ UINT32_MAX,
+ UINT32_MAX,
+ MemBlockDTCMOffset,
+ UINT32_MAX,
+ MemBlockMainRAMOffset,
+ MemBlockSWRAMOffset,
+ UINT32_MAX,
+ UINT32_MAX,
+ UINT32_MAX,
+ MemBlockARM7WRAMOffset,
+ UINT32_MAX,
+ UINT32_MAX,
+ UINT32_MAX,
+};
+
+enum
+{
+ memstate_Unmapped,
+ memstate_MappedRW,
+ // on switch this is unmapped as well
+ memstate_MappedProtected,
+};
+
+u8 MappingStatus9[1 << (32-12)];
+u8 MappingStatus7[1 << (32-12)];
+
+#ifdef __SWITCH__
+u8* MemoryBase;
+u8* MemoryBaseCodeMem;
+#else
+u8* MemoryBase;
+#endif
+
+bool MapIntoRange(u32 addr, u32 num, u32 offset, u32 size)
+{
+ u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
+#ifdef __SWITCH__
+ Result r = (svcMapProcessMemory(dst, envGetOwnProcessHandle(),
+ (u64)(MemoryBaseCodeMem + offset), size));
+ return R_SUCCEEDED(r);
+#endif
+}
+
+bool UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size)
+{
+ u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
+#ifdef __SWITCH__
+ Result r = svcUnmapProcessMemory(dst, envGetOwnProcessHandle(),
+ (u64)(MemoryBaseCodeMem + offset), size);
+ printf("%x\n", r);
+ return R_SUCCEEDED(r);
+#endif
+}
+
+struct Mapping
+{
+ u32 Addr;
+ u32 Size, LocalOffset;
+ u32 Num;
+
+ void Unmap(int region)
+ {
+ bool skipDTCM = Num == 0 && region != memregion_DTCM;
+ u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7;
+ u32 offset = 0;
+ while (offset < Size)
+ {
+ if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase)
+ {
+ offset += NDS::ARM9->DTCMSize;
+ printf("%x skip\n", NDS::ARM9->DTCMSize);
+ }
+ else
+ {
+ u32 segmentOffset = offset;
+ u8 status = statuses[(Addr + offset) >> 12];
+ while (statuses[(Addr + offset) >> 12] == status
+ && offset < Size
+ && (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase))
+ {
+ assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped);
+ statuses[(Addr + offset) >> 12] = memstate_Unmapped;
+ offset += 0x1000;
+ }
+
+ if (status == memstate_MappedRW)
+ {
+ u32 segmentSize = offset - segmentOffset;
+ printf("unmapping %x %x %x %x\n", Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize);
+ bool success = UnmapFromRange(Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize);
+ assert(success);
+ }
+ }
+ }
+ }
+};
+ARMJIT::TinyVector<Mapping> Mappings[memregions_Count];
+
+void SetCodeProtection(int region, u32 offset, bool protect)
+{
+ offset &= ~0xFFF;
+ printf("set code protection %d %x %d\n", region, offset, protect);
+
+ for (int i = 0; i < Mappings[region].Length; i++)
+ {
+ Mapping& mapping = Mappings[region][i];
+
+ u32 effectiveAddr = mapping.Addr + (offset - mapping.LocalOffset);
+ if (mapping.Num == 0
+ && region != memregion_DTCM
+ && effectiveAddr >= NDS::ARM9->DTCMBase
+ && effectiveAddr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize))
+ continue;
+
+ u8* states = (u8*)(mapping.Num == 0 ? MappingStatus9 : MappingStatus7);
+
+ printf("%d %x %d\n", states[effectiveAddr >> 12], effectiveAddr, mapping.Num);
+ assert(states[effectiveAddr >> 12] == (protect ? memstate_MappedRW : memstate_MappedProtected));
+ states[effectiveAddr >> 12] = protect ? memstate_MappedProtected : memstate_MappedRW;
+
+ bool success;
+ if (protect)
+ success = UnmapFromRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000);
+ else
+ success = MapIntoRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000);
+ assert(success);
+ }
+}
+
+void RemapDTCM(u32 newBase, u32 newSize)
+{
+ // this first part could be made more efficient
+ // by unmapping DTCM first and then map the holes
+ u32 oldDTCMBase = NDS::ARM9->DTCMBase;
+ u32 oldDTCBEnd = oldDTCMBase + NDS::ARM9->DTCMSize;
+
+ u32 newEnd = newBase + newSize;
+
+ printf("remapping DTCM %x %x %x %x\n", newBase, newEnd, oldDTCMBase, oldDTCBEnd);
+ // unmap all regions containing the old or the current DTCM mapping
+ for (int region = 0; region < memregions_Count; region++)
+ {
+ if (region == memregion_DTCM)
+ continue;
+
+ for (int i = 0; i < Mappings[region].Length;)
+ {
+ Mapping& mapping = Mappings[region][i];
+
+ u32 start = mapping.Addr;
+ u32 end = mapping.Addr + mapping.Size;
+
+ printf("mapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset);
+
+ bool oldOverlap = NDS::ARM9->DTCMSize > 0 && ((oldDTCMBase >= start && oldDTCMBase < end) || (oldDTCBEnd >= start && oldDTCBEnd < end));
+ bool newOverlap = newSize > 0 && ((newBase >= start && newBase < end) || (newEnd >= start && newEnd < end));
+
+ if (mapping.Num == 0 && (oldOverlap || newOverlap))
+ {
+ mapping.Unmap(region);
+ Mappings[region].Remove(i);
+ }
+ else
+ {
+ i++;
+ }
+ }
+ }
+
+ for (int i = 0; i < Mappings[memregion_DTCM].Length; i++)
+ {
+ Mappings[memregion_DTCM][i].Unmap(memregion_DTCM);
+ }
+ Mappings[memregion_DTCM].Clear();
+}
+
+void RemapSWRAM()
+{
+ printf("remapping SWRAM\n");
+ for (int i = 0; i < Mappings[memregion_SWRAM].Length; i++)
+ {
+ Mappings[memregion_SWRAM][i].Unmap(memregion_SWRAM);
+ }
+ Mappings[memregion_SWRAM].Clear();
+ for (int i = 0; i < Mappings[memregion_WRAM7].Length; i++)
+ {
+ Mappings[memregion_WRAM7][i].Unmap(memregion_WRAM7);
+ }
+ Mappings[memregion_WRAM7].Clear();
+}
+
+bool MapAtAddress(u32 addr)
+{
+ u32 num = NDS::CurCPU;
+
+ int region = num == 0
+ ? ClassifyAddress9(addr)
+ : ClassifyAddress7(addr);
+
+ if (!IsMappable(region))
+ return false;
+
+ u32 mappingStart, mappingSize, memoryOffset, memorySize;
+ bool isMapped = GetRegionMapping(region, num, mappingStart, mappingSize, memoryOffset, memorySize);
+
+ if (!isMapped)
+ return false;
+
+ // this calculation even works with DTCM
+ // which doesn't have to be aligned to it's own size
+ u32 mirrorStart = (addr - mappingStart) / memorySize * memorySize + mappingStart;
+
+ u8* states = num == 0 ? MappingStatus9 : MappingStatus7;
+ printf("trying to create mapping %08x %d %x %d %x\n", addr, num, memorySize, region, memoryOffset);
+ bool isExecutable = ARMJIT::CodeMemRegions[region];
+
+ ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset;
+
+ // this overcomplicated piece of code basically just finds whole pieces of code memory
+ // which can be mapped
+ u32 offset = 0;
+ bool skipDTCM = num == 0 && region != memregion_DTCM;
+ while (offset < memorySize)
+ {
+ if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase)
+ {
+ offset += NDS::ARM9->DTCMSize;
+ }
+ else
+ {
+ u32 sectionOffset = offset;
+ bool hasCode = isExecutable && ARMJIT::PageContainsCode(&range[offset / 512]);
+ while ((!isExecutable || ARMJIT::PageContainsCode(&range[offset / 512]) == hasCode)
+ && offset < memorySize
+ && (!skipDTCM || mirrorStart + offset != NDS::ARM9->DTCMBase))
+ {
+ assert(states[(mirrorStart + offset) >> 12] == memstate_Unmapped);
+ states[(mirrorStart + offset) >> 12] = hasCode ? memstate_MappedProtected : memstate_MappedRW;
+ offset += 0x1000;
+ }
+
+ u32 sectionSize = offset - sectionOffset;
+
+ if (!hasCode)
+ {
+ printf("trying to map %x (size: %x) from %x\n", mirrorStart + sectionOffset, sectionSize, sectionOffset + memoryOffset + OffsetsPerRegion[region]);
+ bool succeded = MapIntoRange(mirrorStart + sectionOffset, num, sectionOffset + memoryOffset + OffsetsPerRegion[region], sectionSize);
+ assert(succeded);
+ }
+ }
+ }
+
+ Mapping mapping{mirrorStart, memorySize, memoryOffset, num};
+ Mappings[region].Add(mapping);
+
+ printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + memorySize - 1);
+
+ return true;
+}
+
+void FaultHandler(FaultDescription* faultDesc)
+{
+ if (ARMJIT::JITCompiler->IsJITFault(faultDesc->GetPC()))
+ {
+ bool rewriteToSlowPath = true;
+
+ u32 addr = faultDesc->GetEmulatedAddr();
+
+ if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped)
+ rewriteToSlowPath = !MapAtAddress(faultDesc->GetEmulatedAddr());
+
+ s64 offset = 0;
+ if (rewriteToSlowPath)
+ {
+ offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->GetPC());
+ }
+ faultDesc->RestoreAndRepeat(offset);
+ }
+}
+
+void Init()
+{
+#if defined(__SWITCH__)
+ MemoryBase = (u8*)memalign(0x1000, MemoryTotalSize);
+ MemoryBaseCodeMem = (u8*)virtmemReserve(MemoryTotalSize);
+
+ bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem,
+ (u64)MemoryBase, MemoryTotalSize));
+ assert(succeded);
+ succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem,
+ MemoryTotalSize, Perm_Rw));
+ assert(succeded);
+
+ // 8 GB of address space, just don't ask...
+ FastMem9Start = virtmemReserve(0x100000000);
+ assert(FastMem9Start);
+ FastMem7Start = virtmemReserve(0x100000000);
+ assert(FastMem7Start);
+
+ NDS::MainRAM = MemoryBaseCodeMem + MemBlockMainRAMOffset;
+ NDS::SharedWRAM = MemoryBaseCodeMem + MemBlockSWRAMOffset;
+ NDS::ARM7WRAM = MemoryBaseCodeMem + MemBlockARM7WRAMOffset;
+ NDS::ARM9->DTCM = MemoryBaseCodeMem + MemBlockDTCMOffset;
+#else
+ MemoryBase = new u8[MemoryTotalSize];
+
+ NDS::MainRAM = MemoryBase + MemBlockMainRAMOffset;
+ NDS::SharedWRAM = MemoryBase + MemBlockSWRAMOffset;
+ NDS::ARM7WRAM = MemoryBase + MemBlockARM7WRAMOffset;
+ NDS::ARM9->DTCM = MemoryBase + MemBlockDTCMOffset;
+#endif
+}
+
+void DeInit()
+{
+#if defined(__SWITCH__)
+ virtmemFree(FastMem9Start, 0x100000000);
+ virtmemFree(FastMem7Start, 0x100000000);
+
+ svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize);
+ virtmemFree(MemoryBaseCodeMem, MemoryTotalSize);
+ free(MemoryBase);
+#else
+ delete[] MemoryBase;
+#endif
+}
+
+void Reset()
+{
+ for (int region = 0; region < memregions_Count; region++)
+ {
+ for (int i = 0; i < Mappings[region].Length; i++)
+ Mappings[region][i].Unmap(region);
+ Mappings[region].Clear();
+ }
+
+ for (int i = 0; i < sizeof(MappingStatus9); i++)
+ {
+ assert(MappingStatus9[i] == memstate_Unmapped);
+ assert(MappingStatus7[i] == memstate_Unmapped);
+ }
+
+ printf("done resetting jit mem\n");
+}
+
+bool IsMappable(int region)
+{
+ return OffsetsPerRegion[region] != UINT32_MAX;
+}
+
+bool GetRegionMapping(int region, u32 num, u32& mappingStart, u32& mappingSize, u32& memoryOffset, u32& memorySize)
+{
+ memoryOffset = 0;
+ switch (region)
+ {
+ case memregion_ITCM:
+ if (num == 0)
+ {
+ mappingStart = 0;
+ mappingSize = NDS::ARM9->ITCMSize;
+ memorySize = ITCMPhysicalSize;
+ return true;
+ }
+ return false;
+ case memregion_DTCM:
+ if (num == 0)
+ {
+ mappingStart = NDS::ARM9->DTCMBase;
+ mappingSize = NDS::ARM9->DTCMSize;
+ memorySize = DTCMPhysicalSize;
+ return true;
+ }
+ return false;
+ case memregion_BIOS9:
+ if (num == 0)
+ {
+ mappingStart = 0xFFFF0000;
+ mappingSize = 0x10000;
+ memorySize = 0x1000;
+ return true;
+ }
+ return false;
+ case memregion_MainRAM:
+ mappingStart = 0x2000000;
+ mappingSize = 0x1000000;
+ memorySize = NDS::MainRAMSize;
+ return true;
+ case memregion_SWRAM:
+ mappingStart = 0x3000000;
+ if (num == 0 && NDS::SWRAM_ARM9.Mem)
+ {
+ mappingSize = 0x1000000;
+ memoryOffset = NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM;
+ memorySize = NDS::SWRAM_ARM9.Mask + 1;
+ return true;
+ }
+ else if (num == 1 && NDS::SWRAM_ARM7.Mem)
+ {
+ mappingSize = 0x800000;
+ memoryOffset = NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM;
+ memorySize = NDS::SWRAM_ARM7.Mask + 1;
+ return true;
+ }
+ return false;
+ case memregion_VRAM:
+ if (num == 0)
+ {
+ // this is a gross simplification
+ // mostly to make code on vram working
+ // it doesn't take any of the actual VRAM mappings into account
+ mappingStart = 0x6000000;
+ mappingSize = 0x1000000;
+ memorySize = 0x100000;
+ return true;
+ }
+ return false;
+ case memregion_BIOS7:
+ if (num == 1)
+ {
+ mappingStart = 0;
+ mappingSize = 0x4000;
+ memorySize = 0x4000;
+ return true;
+ }
+ return false;
+ case memregion_WRAM7:
+ if (num == 1)
+ {
+ if (NDS::SWRAM_ARM7.Mem)
+ {
+ mappingStart = 0x3800000;
+ mappingSize = 0x800000;
+ }
+ else
+ {
+ mappingStart = 0x3000000;
+ mappingSize = 0x1000000;
+ }
+ memorySize = NDS::ARM7WRAMSize;
+ return true;
+ }
+ return false;
+ case memregion_VWRAM:
+ if (num == 1)
+ {
+ mappingStart = 0x6000000;
+ mappingSize = 0x1000000;
+ memorySize = 0x20000;
+ return true;
+ }
+ return false;
+ default:
+ // for the JIT we don't are about the rest
+ return false;
+ }
+}
+
+int ClassifyAddress9(u32 addr)
+{
+ if (addr < NDS::ARM9->ITCMSize)
+ return memregion_ITCM;
+ else if (addr >= NDS::ARM9->DTCMBase && addr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize))
+ return memregion_DTCM;
+ else if ((addr & 0xFFFFF000) == 0xFFFF0000)
+ return memregion_BIOS9;
+ else
+ {
+ switch (addr & 0xFF000000)
+ {
+ case 0x02000000:
+ return memregion_MainRAM;
+ case 0x03000000:
+ if (NDS::SWRAM_ARM9.Mem)
+ return memregion_SWRAM;
+ else
+ return memregion_Other;
+ case 0x04000000:
+ return memregion_IO9;
+ case 0x06000000:
+ return memregion_VRAM;
+ }
+ }
+ return memregion_Other;
+}
+
+int ClassifyAddress7(u32 addr)
+{
+ if (addr < 0x00004000)
+ return memregion_BIOS7;
+ else
+ {
+ switch (addr & 0xFF800000)
+ {
+ case 0x02000000:
+ case 0x02800000:
+ return memregion_MainRAM;
+ case 0x03000000:
+ if (NDS::SWRAM_ARM7.Mem)
+ return memregion_SWRAM;
+ else
+ return memregion_WRAM7;
+ case 0x03800000:
+ return memregion_WRAM7;
+ case 0x04000000:
+ return memregion_IO7;
+ case 0x04800000:
+ return memregion_Wifi;
+ case 0x06000000:
+ case 0x06800000:
+ return memregion_VWRAM;
+ }
+ }
+ return memregion_Other;
+}
+
+void WifiWrite32(u32 addr, u32 val)
+{
+ Wifi::Write(addr, val & 0xFFFF);
+ Wifi::Write(addr + 2, val >> 16);
+}
+
+u32 WifiRead32(u32 addr)
+{
+ return Wifi::Read(addr) | (Wifi::Read(addr + 2) << 16);
+}
+
+template <typename T>
+void VRAMWrite(u32 addr, T val)
+{
+ switch (addr & 0x00E00000)
+ {
+ case 0x00000000: GPU::WriteVRAM_ABG<T>(addr, val); return;
+ case 0x00200000: GPU::WriteVRAM_BBG<T>(addr, val); return;
+ case 0x00400000: GPU::WriteVRAM_AOBJ<T>(addr, val); return;
+ case 0x00600000: GPU::WriteVRAM_BOBJ<T>(addr, val); return;
+ default: GPU::WriteVRAM_LCDC<T>(addr, val); return;
+ }
+}
+template <typename T>
+T VRAMRead(u32 addr)
+{
+ switch (addr & 0x00E00000)
+ {
+ case 0x00000000: return GPU::ReadVRAM_ABG<T>(addr);
+ case 0x00200000: return GPU::ReadVRAM_BBG<T>(addr);
+ case 0x00400000: return GPU::ReadVRAM_AOBJ<T>(addr);
+ case 0x00600000: return GPU::ReadVRAM_BOBJ<T>(addr);
+ default: return GPU::ReadVRAM_LCDC<T>(addr);
+ }
+}
+
+void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
+{
+ if (cpu->Num == 0)
+ {
+ switch (addr & 0xFF000000)
+ {
+ case 0x04000000:
+ if (!store && size == 32 && addr == 0x04100010 && NDS::ExMemCnt[0] & (1<<11))
+ return (void*)NDSCart::ReadROMData;
+
+ /*
+ unfortunately we can't map GPU2D this way
+ since it's hidden inside an object
+
+ though GPU3D registers are accessed much more intensive
+ */
+ if (addr >= 0x04000320 && addr < 0x040006A4)
+ {
+ switch (size | store)
+ {
+ case 8: return (void*)GPU3D::Read8;
+ case 9: return (void*)GPU3D::Write8;
+ case 16: return (void*)GPU3D::Read16;
+ case 17: return (void*)GPU3D::Write16;
+ case 32: return (void*)GPU3D::Read32;
+ case 33: return (void*)GPU3D::Write32;
+ }
+ }
+
+ switch (size | store)
+ {
+ case 8: return (void*)NDS::ARM9IORead8;
+ case 9: return (void*)NDS::ARM9IOWrite8;
+ case 16: return (void*)NDS::ARM9IORead16;
+ case 17: return (void*)NDS::ARM9IOWrite16;
+ case 32: return (void*)NDS::ARM9IORead32;
+ case 33: return (void*)NDS::ARM9IOWrite32;
+ }
+ break;
+ case 0x06000000:
+ switch (size | store)
+ {
+ case 8: return (void*)VRAMRead<u8>;
+ case 9: return NULL;
+ case 16: return (void*)VRAMRead<u16>;
+ case 17: return (void*)VRAMWrite<u16>;
+ case 32: return (void*)VRAMRead<u32>;
+ case 33: return (void*)VRAMWrite<u32>;
+ }
+ break;
+ }
+ }
+ else
+ {
+ switch (addr & 0xFF800000)
+ {
+ case 0x04000000:
+ if (addr >= 0x04000400 && addr < 0x04000520)
+ {
+ switch (size | store)
+ {
+ case 8: return (void*)SPU::Read8;
+ case 9: return (void*)SPU::Write8;
+ case 16: return (void*)SPU::Read16;
+ case 17: return (void*)SPU::Write16;
+ case 32: return (void*)SPU::Read32;
+ case 33: return (void*)SPU::Write32;
+ }
+ }
+
+ switch (size | store)
+ {
+ case 8: return (void*)NDS::ARM7IORead8;
+ case 9: return (void*)NDS::ARM7IOWrite8;
+ case 16: return (void*)NDS::ARM7IORead16;
+ case 17: return (void*)NDS::ARM7IOWrite16;
+ case 32: return (void*)NDS::ARM7IORead32;
+ case 33: return (void*)NDS::ARM7IOWrite32;
+ }
+ break;
+ case 0x04800000:
+ if (addr < 0x04810000 && size >= 16)
+ {
+ switch (size | store)
+ {
+ case 16: return (void*)Wifi::Read;
+ case 17: return (void*)Wifi::Write;
+ case 32: return (void*)WifiRead32;
+ case 33: return (void*)WifiWrite32;
+ }
+ }
+ break;
+ case 0x06000000:
+ case 0x06800000:
+ switch (size | store)
+ {
+ case 8: return (void*)GPU::ReadVRAM_ARM7<u8>;
+ case 9: return (void*)GPU::WriteVRAM_ARM7<u8>;
+ case 16: return (void*)GPU::ReadVRAM_ARM7<u16>;
+ case 17: return (void*)GPU::WriteVRAM_ARM7<u16>;
+ case 32: return (void*)GPU::ReadVRAM_ARM7<u32>;
+ case 33: return (void*)GPU::WriteVRAM_ARM7<u32>;
+ }
+ }
+ }
+ return NULL;
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_Memory.h b/src/ARMJIT_Memory.h
new file mode 100644
index 0000000..1a59d98
--- /dev/null
+++ b/src/ARMJIT_Memory.h
@@ -0,0 +1,53 @@
+#ifndef ARMJIT_MEMORY
+#define ARMJIT_MEMORY
+
+#include "types.h"
+
+#include "ARM.h"
+
+namespace ARMJIT_Memory
+{
+
+extern void* FastMem9Start;
+extern void* FastMem7Start;
+
+void Init();
+void DeInit();
+
+void Reset();
+
+enum
+{
+ memregion_Other = 0,
+ memregion_ITCM,
+ memregion_DTCM,
+ memregion_BIOS9,
+ memregion_MainRAM,
+ memregion_SWRAM,
+ memregion_IO9,
+ memregion_VRAM,
+ memregion_BIOS7,
+ memregion_WRAM7,
+ memregion_IO7,
+ memregion_Wifi,
+ memregion_VWRAM,
+ memregions_Count
+};
+
+int ClassifyAddress9(u32 addr);
+int ClassifyAddress7(u32 addr);
+
+bool GetRegionMapping(int region, u32 num, u32& mappingStart, u32& mappingSize, u32& memoryOffset, u32& memorySize);
+
+bool IsMappable(int region);
+
+void RemapDTCM(u32 newBase, u32 newSize);
+void RemapSWRAM();
+
+void SetCodeProtection(int region, u32 offset, bool protect);
+
+void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
+
+}
+
+#endif \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index fd3fb70..34c1c91 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -301,24 +301,6 @@ Compiler::Compiler()
RET();
}
- {
- CPSRDirty = true;
- BranchStub[0] = GetWritableCodePtr();
- SaveCPSR();
- MOV(64, R(ABI_PARAM1), R(RCPU));
- CALL((u8*)ARMJIT::LinkBlock<0>);
- LoadCPSR();
- JMP((u8*)ARM_Ret, true);
-
- CPSRDirty = true;
- BranchStub[1] = GetWritableCodePtr();
- SaveCPSR();
- MOV(64, R(ABI_PARAM1), R(RCPU));
- CALL((u8*)ARMJIT::LinkBlock<1>);
- LoadCPSR();
- JMP((u8*)ARM_Ret, true);
- }
-
// move the region forward to prevent overwriting the generated functions
CodeMemSize -= GetWritableCodePtr() - ResetStart;
ResetStart = GetWritableCodePtr();
@@ -520,6 +502,11 @@ void Compiler::Reset()
FarCode = FarStart;
}
+bool Compiler::IsJITFault(u64 addr)
+{
+ return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory);
+}
+
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
{
if (taken && CurInstr.BranchFlags & branch_IdleBranch)
@@ -531,32 +518,11 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken)
RegCache.PrepareExit();
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
-
- if (Config::JIT_BrancheOptimisations == 2 && !(CurInstr.BranchFlags & branch_IdleBranch)
- && (!taken || (CurInstr.BranchFlags & branch_StaticTarget)))
- {
- FixupBranch ret = J_CC(CC_S);
- CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
- FixupBranch ret2 = J_CC(CC_NZ);
-
- u8* rewritePart = GetWritableCodePtr();
- NOP(5);
-
- MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
- JMP((u8*)BranchStub[Num], true);
-
- SetJumpTarget(ret);
- SetJumpTarget(ret2);
- JMP((u8*)ARM_Ret, true);
- }
- else
- {
- JMP((u8*)&ARM_Ret, true);
- }
+ JMP((u8*)&ARM_Ret, true);
}
}
-JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
+JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
if (NearSize - (NearCode - NearStart) < 1024 * 32) // guess...
{
@@ -575,7 +541,7 @@ JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, F
CodeRegion = instrs[0].Addr >> 24;
CurCPU = cpu;
// CPSR might have been modified in a previous block
- CPSRDirty = Config::JIT_BrancheOptimisations == 2;
+ CPSRDirty = false;
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
@@ -685,31 +651,7 @@ JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, F
RegCache.Flush();
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
-
- if (Config::JIT_BrancheOptimisations == 2
- && !(instrs[instrsCount - 1].BranchFlags & branch_IdleBranch)
- && (!instrs[instrsCount - 1].Info.Branches()
- || instrs[instrsCount - 1].BranchFlags & branch_FollowCondNotTaken
- || (instrs[instrsCount - 1].BranchFlags & branch_FollowCondTaken && instrs[instrsCount - 1].BranchFlags & branch_StaticTarget)))
- {
- FixupBranch ret = J_CC(CC_S);
- CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
- FixupBranch ret2 = J_CC(CC_NZ);
-
- u8* rewritePart = GetWritableCodePtr();
- NOP(5);
-
- MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
- JMP((u8*)BranchStub[Num], true);
-
- SetJumpTarget(ret);
- SetJumpTarget(ret2);
- JMP((u8*)ARM_Ret, true);
- }
- else
- {
- JMP((u8*)ARM_Ret, true);
- }
+ JMP((u8*)ARM_Ret, true);
/*FILE* codeout = fopen("codeout", "a");
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
@@ -720,22 +662,6 @@ JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, F
return res;
}
-void Compiler::LinkBlock(u32 offset, JitBlockEntry entry)
-{
- u8* curPtr = GetWritableCodePtr();
- SetCodePtr(ResetStart + offset);
- JMP((u8*)entry, true);
- SetCodePtr(curPtr);
-}
-
-void Compiler::UnlinkBlock(u32 offset)
-{
- u8* curPtr = GetWritableCodePtr();
- SetCodePtr(ResetStart + offset);
- NOP(5);
- SetCodePtr(curPtr);
-}
-
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
index f2fc301..09ac257 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -52,10 +52,7 @@ public:
void Reset();
- void LinkBlock(u32 offset, JitBlockEntry entry);
- void UnlinkBlock(u32 offset);
-
- JitBlockEntry CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
+ JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
void LoadReg(int reg, Gen::X64Reg nativeReg);
void SaveReg(int reg, Gen::X64Reg nativeReg);
@@ -202,6 +199,10 @@ public:
SetCodePtr(FarCode);
}
+ bool IsJITFault(u64 addr);
+
+ s32 RewriteMemAccess(u64 pc);
+
u8* FarCode;
u8* NearCode;
u32 FarSize;
@@ -216,8 +217,6 @@ public:
bool Exit;
bool IrregularCycles;
- void* BranchStub[2];
-
void* ReadBanked;
void* WriteBanked;
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
index cf0bd23..0bf2f83 100644
--- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -15,6 +15,11 @@ int squeezePointer(T* ptr)
return truncated;
}
+s32 Compiler::RewriteMemAccess(u64 pc)
+{
+ return 0;
+}
+
/*
According to DeSmuME and my own research, approx. 99% (seriously, that's an empirical number)
of all memory load and store instructions always access addresses in the same region as
@@ -27,14 +32,15 @@ int squeezePointer(T* ptr)
bool Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
{
- u32 translatedAddr = Num == 0 ? TranslateAddr9(addr) : TranslateAddr7(addr);
+ return false;
+ //u32 translatedAddr = Num == 0 ? TranslateAddr9(addr) : TranslateAddr7(addr);
- int invalidLiteralIdx = InvalidLiterals.Find(translatedAddr);
+ /*int invalidLiteralIdx = InvalidLiterals.Find(translatedAddr);
if (invalidLiteralIdx != -1)
{
InvalidLiterals.Remove(invalidLiteralIdx);
return false;
- }
+ }*/
u32 val;
// make sure arm7 bios is accessible
@@ -95,7 +101,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
OpArg rdMapped = MapReg(rd);
- if (!addrIsStatic)
+ if (true)
{
OpArg rnMapped = MapReg(rn);
if (Thumb && rn == 15)
@@ -145,7 +151,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
MOV(32, rnMapped, R(finalAddr));
}
- int expectedTarget = Num == 0
+ /*int expectedTarget = Num == 0
? ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
: ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
if (CurInstr.Cond() < 0xE)
@@ -184,8 +190,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
if (addrIsStatic && compileSlowPath)
MOV(32, R(RSCRATCH3), Imm32(staticAddress));
-
- if (compileFastPath)
+*/
+ /*if (compileFastPath)
{
FixupBranch slowPath;
if (compileSlowPath)
@@ -357,15 +363,16 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
SetJumpTarget(slowPath);
}
}
-
- if (compileSlowPath)
+*/
+ if (true)
{
PushRegs(false);
if (Num == 0)
{
- MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
- MOV(64, R(ABI_PARAM1), R(RCPU));
+ MOV(64, R(ABI_PARAM2), R(RCPU));
+ if (ABI_PARAM1 != RSCRATCH3)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (flags & memop_Store)
{
MOV(32, R(ABI_PARAM3), rdMapped);
@@ -423,13 +430,13 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
}
}
-
+/*
if (compileFastPath && compileSlowPath)
{
FixupBranch ret = J(true);
SwitchToNearCode();
SetJumpTarget(ret);
- }
+ }*/
if (!(flags & memop_Store) && rd == 15)
{
@@ -458,7 +465,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
u32 stackAlloc = ((regsCount + 1) & ~1) * 8;
#endif
u32 allocOffset = stackAlloc - regsCount * 8;
-
+/*
int expectedTarget = Num == 0
? ClassifyAddress9(CurInstr.DataRegion)
: ClassifyAddress7(CurInstr.DataRegion);
@@ -479,7 +486,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
default:
break;
}
-
+*/
if (!store)
Comp_AddCycles_CDI();
else
@@ -492,7 +499,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
}
else
MOV(32, R(RSCRATCH4), MapReg(rn));
-
+/*
if (compileFastPath)
{
assert(!usermode);
@@ -570,7 +577,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
SwitchToFarCode();
SetJumpTarget(slowPath);
- }
+ }*/
if (!store)
{
@@ -696,13 +703,13 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
PopRegs(false);
}
-
+/*
if (compileFastPath)
{
FixupBranch ret = J(true);
SwitchToNearCode();
SetJumpTarget(ret);
- }
+ }*/
if (!store && regs[15])
{
diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp
index b50e821..ccec951 100644
--- a/src/ARM_InstrInfo.cpp
+++ b/src/ARM_InstrInfo.cpp
@@ -206,15 +206,14 @@ enum {
T_ReadR14 = 1 << 13,
T_WriteR14 = 1 << 14,
- T_PopPC = 1 << 15,
-
- T_SetNZ = 1 << 16,
- T_SetCV = 1 << 17,
- T_SetMaybeC = 1 << 18,
- T_ReadC = 1 << 19,
- T_SetC = 1 << 20,
+ T_SetNZ = 1 << 15,
+ T_SetCV = 1 << 16,
+ T_SetMaybeC = 1 << 17,
+ T_ReadC = 1 << 18,
+ T_SetC = 1 << 19,
- T_WriteMem = 1 << 21,
+ T_WriteMem = 1 << 20,
+ T_LoadMem = 1 << 21,
};
const u32 T_LSL_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_LSL_IMM);
@@ -256,31 +255,31 @@ const u32 T_ADD_PCREL = T_Write8 | tk(tk_ADD_PCREL);
const u32 T_ADD_SPREL = T_Write8 | T_ReadR13 | tk(tk_ADD_SPREL);
const u32 T_ADD_SP = T_WriteR13 | T_ReadR13 | tk(tk_ADD_SP);
-const u32 T_LDR_PCREL = T_Write8 | tk(tk_LDR_PCREL);
+const u32 T_LDR_PCREL = T_Write8 | T_LoadMem | tk(tk_LDR_PCREL);
const u32 T_STR_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STR_REG);
const u32 T_STRB_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STRB_REG);
-const u32 T_LDR_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDR_REG);
-const u32 T_LDRB_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRB_REG);
+const u32 T_LDR_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDR_REG);
+const u32 T_LDRB_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRB_REG);
const u32 T_STRH_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STRH_REG);
-const u32 T_LDRSB_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRSB_REG);
-const u32 T_LDRH_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRH_REG);
-const u32 T_LDRSH_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRSH_REG);
+const u32 T_LDRSB_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRSB_REG);
+const u32 T_LDRH_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRH_REG);
+const u32 T_LDRSH_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRSH_REG);
const u32 T_STR_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STR_IMM);
-const u32 T_LDR_IMM = T_Write0 | T_Read3 | tk(tk_LDR_IMM);
+const u32 T_LDR_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDR_IMM);
const u32 T_STRB_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STRB_IMM);
-const u32 T_LDRB_IMM = T_Write0 | T_Read3 | tk(tk_LDRB_IMM);
+const u32 T_LDRB_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDRB_IMM);
const u32 T_STRH_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STRH_IMM);
-const u32 T_LDRH_IMM = T_Write0 | T_Read3 | tk(tk_LDRH_IMM);
+const u32 T_LDRH_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDRH_IMM);
const u32 T_STR_SPREL = T_Read8 | T_ReadR13 | T_WriteMem | tk(tk_STR_SPREL);
-const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | tk(tk_LDR_SPREL);
+const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | T_LoadMem | tk(tk_LDR_SPREL);
const u32 T_PUSH = T_ReadR13 | T_WriteR13 | T_WriteMem | tk(tk_PUSH);
-const u32 T_POP = T_PopPC | T_ReadR13 | T_WriteR13 | tk(tk_POP);
+const u32 T_POP = T_ReadR13 | T_WriteR13 | T_LoadMem | tk(tk_POP);
-const u32 T_LDMIA = T_Read8 | T_Write8 | tk(tk_LDMIA);
+const u32 T_LDMIA = T_Read8 | T_Write8 | T_LoadMem | tk(tk_LDMIA);
const u32 T_STMIA = T_Read8 | T_Write8 | T_WriteMem | tk(tk_STMIA);
const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND);
@@ -347,7 +346,7 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (data & T_BranchAlways)
res.DstRegs |= (1 << 15);
- if (data & T_PopPC && instr & (1 << 8))
+ if (res.Kind == tk_POP && instr & (1 << 8))
res.DstRegs |= 1 << 15;
if (data & T_SetNZ)
@@ -364,11 +363,18 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (data & T_WriteMem)
res.SpecialKind = special_WriteMem;
- if (res.Kind == ARMInstrInfo::tk_LDR_PCREL)
+ if (data & T_LoadMem)
{
- if (!Config::JIT_LiteralOptimisations)
- res.SrcRegs |= 1 << 15;
- res.SpecialKind = special_LoadLiteral;
+ if (res.Kind == tk_LDR_PCREL)
+ {
+ if (!Config::JIT_LiteralOptimisations)
+ res.SrcRegs |= 1 << 15;
+ res.SpecialKind = special_LoadLiteral;
+ }
+ else
+ {
+ res.SpecialKind = special_LoadMem;
+ }
}
if (res.Kind == tk_LDMIA || res.Kind == tk_POP)
@@ -401,11 +407,17 @@ Info Decode(bool thumb, u32 num, u32 instr)
else if ((instr >> 28) == 0xF)
data = ak(ak_Nop);
- if (data & A_UnkOnARM7 && num != 0)
+ if (data & A_UnkOnARM7 && num == 1)
data = A_UNK;
res.Kind = (data >> 22) & 0x1FF;
+ if (res.Kind >= ak_SMLAxy && res.Kind <= ak_SMULxy && num == 1)
+ {
+ data = ak(ak_Nop);
+ res.Kind = ak_Nop;
+ }
+
if (res.Kind == ak_MCR)
{
u32 cn = (instr >> 16) & 0xF;
@@ -490,8 +502,13 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (data & A_WriteMem)
res.SpecialKind = special_WriteMem;
- if ((data & A_LoadMem) && res.SrcRegs == (1 << 15))
- res.SpecialKind = special_LoadLiteral;
+ if (data & A_LoadMem)
+ {
+ if (res.SrcRegs == (1 << 15))
+ res.SpecialKind = special_LoadLiteral;
+ else
+ res.SpecialKind = special_LoadMem;
+ }
if (res.Kind == ak_LDM)
{
diff --git a/src/ARM_InstrInfo.h b/src/ARM_InstrInfo.h
index 6ab4929..a702435 100644
--- a/src/ARM_InstrInfo.h
+++ b/src/ARM_InstrInfo.h
@@ -232,6 +232,7 @@ enum
{
special_NotSpecialAtAll = 0,
special_WriteMem,
+ special_LoadMem,
special_WaitForInterrupt,
special_LoadLiteral
};
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f35b3e9..84bbc2b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -55,9 +55,11 @@ if (ENABLE_JIT)
enable_language(ASM)
target_sources(core PRIVATE
- ARMJIT.cpp
ARM_InstrInfo.cpp
+ ARMJIT.cpp
+ ARMJIT_Memory.cpp
+
dolphin/CommonFuncs.cpp
)
@@ -85,6 +87,8 @@ if (ENABLE_JIT)
ARMJIT_A64/ARMJIT_ALU.cpp
ARMJIT_A64/ARMJIT_LoadStore.cpp
ARMJIT_A64/ARMJIT_Branch.cpp
+
+ ARMJIT_A64/ARMJIT_Linkage.s
)
endif()
endif()
diff --git a/src/CP15.cpp b/src/CP15.cpp
index 225847e..3d64259 100644
--- a/src/CP15.cpp
+++ b/src/CP15.cpp
@@ -22,6 +22,7 @@
#include "DSi.h"
#include "ARM.h"
#include "ARMJIT.h"
+#include "ARMJIT_Memory.h"
// access timing for cached regions
@@ -42,8 +43,8 @@ void ARMv5::CP15Reset()
DTCMSetting = 0;
ITCMSetting = 0;
- memset(ITCM, 0, 0x8000);
- memset(DTCM, 0, 0x4000);
+ memset(ITCM, 0, ITCMPhysicalSize);
+ memset(DTCM, 0, DTCMPhysicalSize);
ITCMSize = 0;
DTCMBase = 0xFFFFFFFF;
@@ -75,8 +76,8 @@ void ARMv5::CP15DoSavestate(Savestate* file)
file->Var32(&DTCMSetting);
file->Var32(&ITCMSetting);
- file->VarArray(ITCM, 0x8000);
- file->VarArray(DTCM, 0x4000);
+ file->VarArray(ITCM, ITCMPhysicalSize);
+ file->VarArray(DTCM, DTCMPhysicalSize);
file->Var32(&PU_CodeCacheable);
file->Var32(&PU_DataCacheable);
@@ -98,36 +99,30 @@ void ARMv5::CP15DoSavestate(Savestate* file)
void ARMv5::UpdateDTCMSetting()
{
-#ifdef JIT_ENABLED
- u32 oldDTCMBase = DTCMBase;
- u32 oldDTCMSize = DTCMSize;
-#endif
+ u32 newDTCMBase;
+ u32 newDTCMSize;
if (CP15Control & (1<<16))
{
- DTCMBase = DTCMSetting & 0xFFFFF000;
- DTCMSize = 0x200 << ((DTCMSetting >> 1) & 0x1F);
+ newDTCMBase = DTCMSetting & 0xFFFFF000;
+ newDTCMSize = 0x200 << ((DTCMSetting >> 1) & 0x1F);
//printf("DTCM [%08X] enabled at %08X, size %X\n", DTCMSetting, DTCMBase, DTCMSize);
}
else
{
- DTCMBase = 0xFFFFFFFF;
- DTCMSize = 0;
+ newDTCMBase = 0xFFFFFFFF;
+ newDTCMSize = 0;
//printf("DTCM disabled\n");
}
-#ifdef JIT_ENABLED
- if (oldDTCMBase != DTCMBase || oldDTCMSize != DTCMSize)
+ if (newDTCMBase != DTCMBase || newDTCMSize != DTCMSize)
{
- ARMJIT::UpdateMemoryStatus9(oldDTCMBase, oldDTCMBase + oldDTCMSize);
- ARMJIT::UpdateMemoryStatus9(DTCMBase, DTCMBase + DTCMSize);
+ ARMJIT_Memory::RemapDTCM(newDTCMBase, newDTCMSize);
+ DTCMBase = newDTCMBase;
+ DTCMSize = newDTCMSize;
}
-#endif
}
void ARMv5::UpdateITCMSetting()
{
-#ifdef JIT_ENABLED
- u32 oldITCMSize = ITCMSize;
-#endif
if (CP15Control & (1<<18))
{
ITCMSize = 0x200 << ((ITCMSetting >> 1) & 0x1F);
@@ -138,10 +133,6 @@ void ARMv5::UpdateITCMSetting()
ITCMSize = 0;
//printf("ITCM disabled\n");
}
-#ifdef JIT_ENABLED
- if (oldITCMSize != ITCMSize)
- ARMJIT::UpdateMemoryStatus9(0, std::max(oldITCMSize, ITCMSize));
-#endif
}
@@ -581,12 +572,15 @@ void ARMv5::CP15Write(u32 id, u32 val)
case 0x750:
ICacheInvalidateAll();
+ //Halt(255);
return;
case 0x751:
ICacheInvalidateByAddr(val);
+ //Halt(255);
return;
case 0x752:
printf("CP15: ICACHE INVALIDATE WEIRD. %08X\n", val);
+ //Halt(255);
return;
@@ -723,7 +717,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
if (addr < ITCMSize)
{
CodeCycles = 1;
- return *(u32*)&ITCM[addr & 0x7FFF];
+ return *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
}
CodeCycles = RegionCodeCycles;
@@ -750,13 +744,13 @@ void ARMv5::DataRead8(u32 addr, u32* val)
if (addr < ITCMSize)
{
DataCycles = 1;
- *val = *(u8*)&ITCM[addr & 0x7FFF];
+ *val = *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
- *val = *(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF];
+ *val = *(u8*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@@ -773,13 +767,13 @@ void ARMv5::DataRead16(u32 addr, u32* val)
if (addr < ITCMSize)
{
DataCycles = 1;
- *val = *(u16*)&ITCM[addr & 0x7FFF];
+ *val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
- *val = *(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF];
+ *val = *(u16*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@@ -796,13 +790,13 @@ void ARMv5::DataRead32(u32 addr, u32* val)
if (addr < ITCMSize)
{
DataCycles = 1;
- *val = *(u32*)&ITCM[addr & 0x7FFF];
+ *val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
- *val = *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF];
+ *val = *(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@@ -817,13 +811,13 @@ void ARMv5::DataRead32S(u32 addr, u32* val)
if (addr < ITCMSize)
{
DataCycles += 1;
- *val = *(u32*)&ITCM[addr & 0x7FFF];
+ *val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles += 1;
- *val = *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF];
+ *val = *(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@@ -838,16 +832,16 @@ void ARMv5::DataWrite8(u32 addr, u8 val)
if (addr < ITCMSize)
{
DataCycles = 1;
- *(u8*)&ITCM[addr & 0x7FFF] = val;
+ *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
- ARMJIT::InvalidateITCMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
- *(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
+ *(u8*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}
@@ -864,16 +858,16 @@ void ARMv5::DataWrite16(u32 addr, u16 val)
if (addr < ITCMSize)
{
DataCycles = 1;
- *(u16*)&ITCM[addr & 0x7FFF] = val;
+ *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
- ARMJIT::InvalidateITCMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
- *(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
+ *(u16*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}
@@ -890,16 +884,16 @@ void ARMv5::DataWrite32(u32 addr, u32 val)
if (addr < ITCMSize)
{
DataCycles = 1;
- *(u32*)&ITCM[addr & 0x7FFF] = val;
+ *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
- ARMJIT::InvalidateITCMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
- *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
+ *(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}
@@ -914,16 +908,16 @@ void ARMv5::DataWrite32S(u32 addr, u32 val)
if (addr < ITCMSize)
{
DataCycles += 1;
- *(u32*)&ITCM[addr & 0x7FFF] = val;
+ *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
- ARMJIT::InvalidateITCMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles += 1;
- *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
+ *(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}
diff --git a/src/Config.cpp b/src/Config.cpp
index 22e9c11..edf84f2 100644
--- a/src/Config.cpp
+++ b/src/Config.cpp
@@ -47,8 +47,9 @@ int JIT_LiteralOptimisations = true;
#ifdef JIT_ENABLED
int JIT_Enable = false;
int JIT_MaxBlockSize = 32;
-int JIT_BrancheOptimisations = 2;
+int JIT_BrancheOptimisations = true;
int JIT_LiteralOptimisations = true;
+int JIT_FastMemory = true;
#endif
ConfigEntry ConfigFile[] =
@@ -72,8 +73,9 @@ ConfigEntry ConfigFile[] =
#ifdef JIT_ENABLED
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0},
- {"JIT_BranchOptimisations", 0, &JIT_BrancheOptimisations, 2, NULL, 0},
+ {"JIT_BranchOptimisations", 0, &JIT_BrancheOptimisations, 1, NULL, 0},
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
+ {"JIT_FastMem", 0, &JIT_FastMemory, 1, NULL, 0},
#endif
{"", -1, NULL, 0, NULL, 0}
diff --git a/src/Config.h b/src/Config.h
index 31fa67a..7b19a4b 100644
--- a/src/Config.h
+++ b/src/Config.h
@@ -63,6 +63,7 @@ extern int JIT_Enable;
extern int JIT_MaxBlockSize;
extern int JIT_BrancheOptimisations;
extern int JIT_LiteralOptimisations;
+extern int JIT_FastMemory;
#endif
}
diff --git a/src/NDS.cpp b/src/NDS.cpp
index 657241f..3d65482 100644
--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@@ -33,6 +33,7 @@
#include "AREngine.h"
#include "Platform.h"
#include "ARMJIT.h"
+#include "ARMJIT_Memory.h"
#include "DSi.h"
#include "DSi_SPI_TSC.h"
@@ -94,17 +95,17 @@ u32 CPUStop;
u8 ARM9BIOS[0x1000];
u8 ARM7BIOS[0x4000];
-u8 MainRAM[0x1000000];
+u8* MainRAM;
u32 MainRAMMask;
-u8 SharedWRAM[0x8000];
+u8* SharedWRAM;
u8 WRAMCnt;
-u8* SWRAM_ARM9;
-u8* SWRAM_ARM7;
-u32 SWRAM_ARM9Mask;
-u32 SWRAM_ARM7Mask;
-u8 ARM7WRAM[0x10000];
+// putting them together so they're always next to each other
+MemRegion SWRAM_ARM9;
+MemRegion SWRAM_ARM7;
+
+u8* ARM7WRAM;
u16 ExMemCnt[2];
@@ -171,6 +172,10 @@ bool Init()
#ifdef JIT_ENABLED
ARMJIT::Init();
+#else
+ MainRAM = new u8[MainRAMSize];
+ ARM7WRAM = new u8[ARM7WRAMSize];
+ SharedWRAM = new u8[SharedWRAMSize];
#endif
DMAs[0] = new DMA(0, 0);
@@ -485,6 +490,10 @@ void Reset()
printf("ARM7 BIOS loaded\n");
fclose(f);
}
+
+#ifdef JIT_ENABLED
+ ARMJIT::Reset();
+#endif
if (ConsoleType == 1)
{
@@ -510,7 +519,7 @@ void Reset()
InitTimings();
- memset(MainRAM, 0, 0x1000000);
+ memset(MainRAM, 0, MainRAMMask + 1);
memset(SharedWRAM, 0, 0x8000);
memset(ARM7WRAM, 0, 0x10000);
@@ -587,10 +596,6 @@ void Reset()
}
AREngine::Reset();
-
-#ifdef JIT_ENABLED
- ARMJIT::Reset();
-#endif
}
void Stop()
@@ -705,7 +710,7 @@ bool DoSavestate(Savestate* file)
file->VarArray(MainRAM, 0x400000);
file->VarArray(SharedWRAM, 0x8000);
- file->VarArray(ARM7WRAM, 0x10000);
+ file->VarArray(ARM7WRAM, ARM7WRAMSize);
file->VarArray(ExMemCnt, 2*sizeof(u16));
file->VarArray(ROMSeed0, 2*8);
@@ -1128,43 +1133,40 @@ void MapSharedWRAM(u8 val)
if (val == WRAMCnt)
return;
+ ARMJIT_Memory::RemapSWRAM();
+
WRAMCnt = val;
switch (WRAMCnt & 0x3)
{
case 0:
- SWRAM_ARM9 = &SharedWRAM[0];
- SWRAM_ARM9Mask = 0x7FFF;
- SWRAM_ARM7 = NULL;
- SWRAM_ARM7Mask = 0;
+ SWRAM_ARM9.Mem = &SharedWRAM[0];
+ SWRAM_ARM9.Mask = 0x7FFF;
+ SWRAM_ARM7.Mem = NULL;
+ SWRAM_ARM7.Mask = 0;
break;
case 1:
- SWRAM_ARM9 = &SharedWRAM[0x4000];
- SWRAM_ARM9Mask = 0x3FFF;
- SWRAM_ARM7 = &SharedWRAM[0];
- SWRAM_ARM7Mask = 0x3FFF;
+ SWRAM_ARM9.Mem = &SharedWRAM[0x4000];
+ SWRAM_ARM9.Mask = 0x3FFF;
+ SWRAM_ARM7.Mem = &SharedWRAM[0];
+ SWRAM_ARM7.Mask = 0x3FFF;
break;
case 2:
- SWRAM_ARM9 = &SharedWRAM[0];
- SWRAM_ARM9Mask = 0x3FFF;
- SWRAM_ARM7 = &SharedWRAM[0x4000];
- SWRAM_ARM7Mask = 0x3FFF;
+ SWRAM_ARM9.Mem = &SharedWRAM[0];
+ SWRAM_ARM9.Mask = 0x3FFF;
+ SWRAM_ARM7.Mem = &SharedWRAM[0x4000];
+ SWRAM_ARM7.Mask = 0x3FFF;
break;
case 3:
- SWRAM_ARM9 = NULL;
- SWRAM_ARM9Mask = 0;
- SWRAM_ARM7 = &SharedWRAM[0];
- SWRAM_ARM7Mask = 0x7FFF;
+ SWRAM_ARM9.Mem = NULL;
+ SWRAM_ARM9.Mask = 0;
+ SWRAM_ARM7.Mem = &SharedWRAM[0];
+ SWRAM_ARM7.Mask = 0x7FFF;
break;
}
-
-#ifdef JIT_ENABLED
- ARMJIT::UpdateMemoryStatus9(0x3000000, 0x3000000 + 0x1000000);
- ARMJIT::UpdateMemoryStatus7(0x3000000, 0x3000000 + 0x1000000);
-#endif
}
@@ -1835,12 +1837,12 @@ u8 ARM9Read8(u32 addr)
switch (addr & 0xFF000000)
{
case 0x02000000:
- return *(u8*)&MainRAM[addr & MainRAMMask];
+ return *(u8*)&MainRAM[addr & (MainRAMSize - 1)];
case 0x03000000:
- if (SWRAM_ARM9)
+ if (SWRAM_ARM9.Mem)
{
- return *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
+ return *(u8*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask];
}
else
{
@@ -1900,12 +1902,12 @@ u16 ARM9Read16(u32 addr)
switch (addr & 0xFF000000)
{
case 0x02000000:
- return *(u16*)&MainRAM[addr & MainRAMMask];
+ return *(u16*)&MainRAM[addr & (MainRAMSize - 1)];
case 0x03000000:
- if (SWRAM_ARM9)
+ if (SWRAM_ARM9.Mem)
{
- return *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
+ return *(u16*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask];
}
else
{
@@ -1968,9 +1970,9 @@ u32 ARM9Read32(u32 addr)
return *(u32*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
- if (SWRAM_ARM9)
+ if (SWRAM_ARM9.Mem)
{
- return *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
+ return *(u32*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask];
}
else
{
@@ -2026,7 +2028,7 @@ void ARM9Write8(u32 addr, u8 val)
{
case 0x02000000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateMainRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u8*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@@ -2035,12 +2037,12 @@ void ARM9Write8(u32 addr, u8 val)
return;
case 0x03000000:
- if (SWRAM_ARM9)
+ if (SWRAM_ARM9.Mem)
{
#ifdef JIT_ENABLED
- ARMJIT::InvalidateSWRAM9IfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
- *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
+ *(u8*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val;
}
return;
@@ -2085,7 +2087,7 @@ void ARM9Write16(u32 addr, u16 val)
{
case 0x02000000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateMainRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u16*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@@ -2094,12 +2096,12 @@ void ARM9Write16(u32 addr, u16 val)
return;
case 0x03000000:
- if (SWRAM_ARM9)
+ if (SWRAM_ARM9.Mem)
{
#ifdef JIT_ENABLED
- ARMJIT::InvalidateSWRAM9IfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
- *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
+ *(u16*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val;
}
return;
@@ -2113,18 +2115,16 @@ void ARM9Write16(u32 addr, u16 val)
return;
case 0x06000000:
+#ifdef JIT_ENABLED
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr);
+#endif
switch (addr & 0x00E00000)
{
case 0x00000000: GPU::WriteVRAM_ABG<u16>(addr, val); return;
case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); return;
- default:
-#ifdef JIT_ENABLED
- ARMJIT::InvalidateLCDCIfNecessary(addr);
-#endif
- GPU::WriteVRAM_LCDC<u16>(addr, val);
- return;
+ default: GPU::WriteVRAM_LCDC<u16>(addr, val); return;
}
case 0x07000000:
@@ -2165,7 +2165,7 @@ void ARM9Write32(u32 addr, u32 val)
{
case 0x02000000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateMainRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u32*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@@ -2174,12 +2174,12 @@ void ARM9Write32(u32 addr, u32 val)
return ;
case 0x03000000:
- if (SWRAM_ARM9)
+ if (SWRAM_ARM9.Mem)
{
#ifdef JIT_ENABLED
- ARMJIT::InvalidateSWRAM9IfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
- *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
+ *(u32*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val;
}
return;
@@ -2193,18 +2193,16 @@ void ARM9Write32(u32 addr, u32 val)
return;
case 0x06000000:
+#ifdef JIT_ENABLED
+ ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr);
+#endif
switch (addr & 0x00E00000)
{
case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); return;
case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return;
- default:
-#ifdef JIT_ENABLED
- ARMJIT::InvalidateLCDCIfNecessary(addr);
-#endif
- GPU::WriteVRAM_LCDC<u32>(addr, val);
- return;
+ default: GPU::WriteVRAM_LCDC<u32>(addr, val); return;
}
case 0x07000000:
@@ -2250,10 +2248,10 @@ bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region)
return true;
case 0x03000000:
- if (SWRAM_ARM9)
+ if (SWRAM_ARM9.Mem)
{
- region->Mem = SWRAM_ARM9;
- region->Mask = SWRAM_ARM9Mask;
+ region->Mem = SWRAM_ARM9.Mem;
+ region->Mask = SWRAM_ARM9.Mask;
return true;
}
break;
@@ -2292,17 +2290,17 @@ u8 ARM7Read8(u32 addr)
return *(u8*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
- if (SWRAM_ARM7)
+ if (SWRAM_ARM7.Mem)
{
- return *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
+ return *(u8*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask];
}
else
{
- return *(u8*)&ARM7WRAM[addr & 0xFFFF];
+ return *(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
}
case 0x03800000:
- return *(u8*)&ARM7WRAM[addr & 0xFFFF];
+ return *(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
case 0x04000000:
return ARM7IORead8(addr);
@@ -2352,17 +2350,17 @@ u16 ARM7Read16(u32 addr)
return *(u16*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
- if (SWRAM_ARM7)
+ if (SWRAM_ARM7.Mem)
{
- return *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
+ return *(u16*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask];
}
else
{
- return *(u16*)&ARM7WRAM[addr & 0xFFFF];
+ return *(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
}
case 0x03800000:
- return *(u16*)&ARM7WRAM[addr & 0xFFFF];
+ return *(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
case 0x04000000:
return ARM7IORead16(addr);
@@ -2419,17 +2417,17 @@ u32 ARM7Read32(u32 addr)
return *(u32*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
- if (SWRAM_ARM7)
+ if (SWRAM_ARM7.Mem)
{
- return *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
+ return *(u32*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask];
}
else
{
- return *(u32*)&ARM7WRAM[addr & 0xFFFF];
+ return *(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
}
case 0x03800000:
- return *(u32*)&ARM7WRAM[addr & 0xFFFF];
+ return *(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
case 0x04000000:
return ARM7IORead32(addr);
@@ -2474,7 +2472,7 @@ void ARM7Write8(u32 addr, u8 val)
case 0x02000000:
case 0x02800000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateMainRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u8*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@@ -2483,28 +2481,28 @@ void ARM7Write8(u32 addr, u8 val)
return;
case 0x03000000:
- if (SWRAM_ARM7)
+ if (SWRAM_ARM7.Mem)
{
#ifdef JIT_ENABLED
- ARMJIT::InvalidateSWRAM7IfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
- *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
+ *(u8*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val;
return;
}
else
{
#ifdef JIT_ENABLED
- ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
- *(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
+ *(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
}
case 0x03800000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
- *(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
+ *(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
case 0x04000000:
@@ -2514,7 +2512,7 @@ void ARM7Write8(u32 addr, u8 val)
case 0x06000000:
case 0x06800000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr);
#endif
GPU::WriteVRAM_ARM7<u8>(addr, val);
return;
@@ -2551,7 +2549,7 @@ void ARM7Write16(u32 addr, u16 val)
case 0x02000000:
case 0x02800000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateMainRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u16*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@@ -2560,28 +2558,28 @@ void ARM7Write16(u32 addr, u16 val)
return;
case 0x03000000:
- if (SWRAM_ARM7)
+ if (SWRAM_ARM7.Mem)
{
#ifdef JIT_ENABLED
- ARMJIT::InvalidateSWRAM7IfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
- *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
+ *(u16*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val;
return;
}
else
{
#ifdef JIT_ENABLED
- ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
- *(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
+ *(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
}
case 0x03800000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
- *(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
+ *(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
case 0x04000000:
@@ -2599,7 +2597,7 @@ void ARM7Write16(u32 addr, u16 val)
case 0x06000000:
case 0x06800000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr);
#endif
GPU::WriteVRAM_ARM7<u16>(addr, val);
return;
@@ -2638,7 +2636,7 @@ void ARM7Write32(u32 addr, u32 val)
case 0x02000000:
case 0x02800000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateMainRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u32*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@@ -2647,28 +2645,28 @@ void ARM7Write32(u32 addr, u32 val)
return;
case 0x03000000:
- if (SWRAM_ARM7)
+ if (SWRAM_ARM7.Mem)
{
#ifdef JIT_ENABLED
- ARMJIT::InvalidateSWRAM7IfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
- *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
+ *(u32*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val;
return;
}
else
{
#ifdef JIT_ENABLED
- ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
- *(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
+ *(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
}
case 0x03800000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
- *(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
+ *(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
case 0x04000000:
@@ -2687,7 +2685,7 @@ void ARM7Write32(u32 addr, u32 val)
case 0x06000000:
case 0x06800000:
#ifdef JIT_ENABLED
- ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
+ ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr);
#endif
GPU::WriteVRAM_ARM7<u32>(addr, val);
return;
@@ -2736,17 +2734,17 @@ bool ARM7GetMemRegion(u32 addr, bool write, MemRegion* region)
// then access all the WRAM as one contiguous block starting at 0x037F8000
// this case needs a bit of a hack to cover
// it's not really worth bothering anyway
- if (!SWRAM_ARM7)
+ if (!SWRAM_ARM7.Mem)
{
region->Mem = ARM7WRAM;
- region->Mask = 0xFFFF;
+ region->Mask = ARM7WRAMSize-1;
return true;
}
break;
case 0x03800000:
region->Mem = ARM7WRAM;
- region->Mask = 0xFFFF;
+ region->Mask = ARM7WRAMSize-1;
return true;
}
diff --git a/src/NDS.h b/src/NDS.h
index e9b56da..4b4f9a1 100644
--- a/src/NDS.h
+++ b/src/NDS.h
@@ -134,6 +134,7 @@ typedef struct
} MemRegion;
extern int ConsoleType;
+extern int CurCPU;
extern u8 ARM9MemTimings[0x40000][4];
extern u8 ARM7MemTimings[0x20000][4];
@@ -161,20 +162,20 @@ extern u8 ARM9BIOS[0x1000];
extern u8 ARM7BIOS[0x4000];
extern u16 ARM7BIOSProt;
-extern u8 MainRAM[0x1000000];
+extern u8* MainRAM;
extern u32 MainRAMMask;
-extern u8 SharedWRAM[0x8000];
-extern u8* SWRAM_ARM9;
-extern u8* SWRAM_ARM7;
-extern u32 SWRAM_ARM9Mask;
-extern u32 SWRAM_ARM7Mask;
-
-extern u8 ARM7WRAM[0x10000];
+const u32 SharedWRAMSize = 0x8000;
+extern u8* SharedWRAM;
+extern MemRegion SWRAM_ARM9;
+extern MemRegion SWRAM_ARM7;
extern u32 KeyInput;
+const u32 ARM7WRAMSize = 0x10000;
+extern u8* ARM7WRAM;
+
bool Init();
void DeInit();
void Reset();