aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ARM.cpp10
-rw-r--r--src/ARM.h24
-rw-r--r--src/ARMJIT.cpp905
-rw-r--r--src/ARMJIT.h65
-rw-r--r--src/ARMJIT_A64/ARMJIT_Compiler.cpp4
-rw-r--r--src/ARMJIT_Internal.h68
-rw-r--r--src/ARMJIT_RegisterCache.h18
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp43
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.h34
-rw-r--r--src/ARMJIT_x64/ARMJIT_LoadStore.cpp935
-rw-r--r--src/ARM_InstrInfo.cpp16
-rw-r--r--src/CP15.cpp44
-rw-r--r--src/NDS.cpp105
-rw-r--r--src/NDS.h8
14 files changed, 1465 insertions, 814 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp
index 95d2b8b..205332d 100644
--- a/src/ARM.cpp
+++ b/src/ARM.cpp
@@ -579,7 +579,8 @@ void ARMv5::ExecuteJIT()
while (NDS::ARM9Timestamp < NDS::ARM9Target)
{
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
- if (!ARMJIT::IsMapped<0>(instrAddr))
+ u32 translatedAddr = ARMJIT::TranslateAddr9(instrAddr);
+ if (!translatedAddr)
{
NDS::ARM9Timestamp = NDS::ARM9Target;
printf("ARMv5 PC in non executable region %08X\n", R[15]);
@@ -589,7 +590,7 @@ void ARMv5::ExecuteJIT()
// hack so Cycles <= 0 becomes Cycles < 0
Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1;
- ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry(ARMJIT::TranslateAddr<0>(instrAddr));
+ ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry<0>(translatedAddr);
if (block)
ARM_Dispatch(this, block);
else
@@ -722,7 +723,8 @@ void ARMv4::ExecuteJIT()
while (NDS::ARM7Timestamp < NDS::ARM7Target)
{
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
- if (!ARMJIT::IsMapped<1>(instrAddr))
+ u32 translatedAddr = ARMJIT::TranslateAddr7(instrAddr);
+ if (!translatedAddr)
{
NDS::ARM7Timestamp = NDS::ARM7Target;
printf("ARMv4 PC in non executable region %08X\n", R[15]);
@@ -731,7 +733,7 @@ void ARMv4::ExecuteJIT()
Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1;
- ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry(ARMJIT::TranslateAddr<1>(instrAddr));
+ ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry<1>(translatedAddr);
if (block)
ARM_Dispatch(this, block);
else
diff --git a/src/ARM.h b/src/ARM.h
index 4877956..f64b7fe 100644
--- a/src/ARM.h
+++ b/src/ARM.h
@@ -308,7 +308,7 @@ public:
void DataRead8(u32 addr, u32* val)
{
*val = NDS::ARM7Read8(addr);
- DataRegion = addr >> 20;
+ DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
@@ -317,7 +317,7 @@ public:
addr &= ~1;
*val = NDS::ARM7Read16(addr);
- DataRegion = addr >> 20;
+ DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
@@ -326,7 +326,7 @@ public:
addr &= ~3;
*val = NDS::ARM7Read32(addr);
- DataRegion = addr >> 20;
+ DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][2];
}
@@ -341,7 +341,7 @@ public:
void DataWrite8(u32 addr, u8 val)
{
NDS::ARM7Write8(addr, val);
- DataRegion = addr >> 20;
+ DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
@@ -350,7 +350,7 @@ public:
addr &= ~1;
NDS::ARM7Write16(addr, val);
- DataRegion = addr >> 20;
+ DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
@@ -359,7 +359,7 @@ public:
addr &= ~3;
NDS::ARM7Write32(addr, val);
- DataRegion = addr >> 20;
+ DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][2];
}
@@ -390,7 +390,7 @@ public:
s32 numC = NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2];
s32 numD = DataCycles;
- if ((DataRegion >> 4) == 0x02) // mainRAM
+ if ((DataRegion >> 24) == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
Cycles -= numC + numD;
@@ -417,7 +417,7 @@ public:
s32 numC = NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2];
s32 numD = DataCycles;
- if ((DataRegion >> 4) == 0x02)
+ if ((DataRegion >> 24) == 0x02)
{
if (CodeRegion == 0x02)
Cycles -= numC + numD;
@@ -443,4 +443,12 @@ void T_UNK(ARM* cpu);
}
+namespace NDS
+{
+
+extern ARMv5* ARM9;
+extern ARMv4* ARM7;
+
+}
+
#endif // ARM_H
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp
index 46f71f1..9602aed 100644
--- a/src/ARMJIT.cpp
+++ b/src/ARMJIT.cpp
@@ -23,6 +23,7 @@
#include "ARMInterpreter_Branch.h"
#include "ARMInterpreter.h"
+#include "GPU.h"
#include "GPU3D.h"
#include "SPU.h"
#include "Wifi.h"
@@ -34,9 +35,10 @@ namespace ARMJIT
#define JIT_DEBUGPRINT(msg, ...)
//#define JIT_DEBUGPRINT(msg, ...) printf(msg, ## __VA_ARGS__)
-Compiler* compiler;
+Compiler* JITCompiler;
-const u32 ExeMemRegionSizes[] = {
+const u32 ExeMemRegionSizes[] =
+{
0x8000, // Unmapped Region (dummy)
0x8000, // ITCM
4*1024*1024, // Main RAM
@@ -48,7 +50,8 @@ const u32 ExeMemRegionSizes[] = {
0x40000 // ARM7 WVRAM
};
-const u32 ExeMemRegionOffsets[] = {
+const u32 ExeMemRegionOffsets[] =
+{
0,
0x8000,
0x10000,
@@ -61,65 +64,391 @@ const u32 ExeMemRegionOffsets[] = {
0x518000,
};
-#define DUP2(x) x, x
-
-const static ExeMemKind JIT_MEM[2][32] = {
- //arm9
- {
- /* 0X*/ DUP2(exeMem_ITCM),
- /* 1X*/ DUP2(exeMem_ITCM), // mirror
- /* 2X*/ DUP2(exeMem_MainRAM),
- /* 3X*/ DUP2(exeMem_SWRAM),
- /* 4X*/ DUP2(exeMem_Unmapped),
- /* 5X*/ DUP2(exeMem_Unmapped),
- /* 6X*/ exeMem_Unmapped,
- exeMem_LCDC, // Plain ARM9-CPU Access (LCDC mode) (max 656KB)
- /* 7X*/ DUP2(exeMem_Unmapped),
- /* 8X*/ DUP2(exeMem_Unmapped),
- /* 9X*/ DUP2(exeMem_Unmapped),
- /* AX*/ DUP2(exeMem_Unmapped),
- /* BX*/ DUP2(exeMem_Unmapped),
- /* CX*/ DUP2(exeMem_Unmapped),
- /* DX*/ DUP2(exeMem_Unmapped),
- /* EX*/ DUP2(exeMem_Unmapped),
- /* FX*/ DUP2(exeMem_ARM9_BIOS)
- },
- //arm7
- {
- /* 0X*/ DUP2(exeMem_ARM7_BIOS),
- /* 1X*/ DUP2(exeMem_Unmapped),
- /* 2X*/ DUP2(exeMem_MainRAM),
- /* 3X*/ exeMem_SWRAM,
- exeMem_ARM7_WRAM,
- /* 4X*/ DUP2(exeMem_Unmapped),
- /* 5X*/ DUP2(exeMem_Unmapped),
- /* 6X*/ DUP2(exeMem_ARM7_WVRAM), /* contrary to Gbatek, melonDS and itself,
- DeSmuME doesn't mirror the 64 MB region at 0x6800000 */
- /* 7X*/ DUP2(exeMem_Unmapped),
- /* 8X*/ DUP2(exeMem_Unmapped),
- /* 9X*/ DUP2(exeMem_Unmapped),
- /* AX*/ DUP2(exeMem_Unmapped),
- /* BX*/ DUP2(exeMem_Unmapped),
- /* CX*/ DUP2(exeMem_Unmapped),
- /* DX*/ DUP2(exeMem_Unmapped),
- /* EX*/ DUP2(exeMem_Unmapped),
- /* FX*/ DUP2(exeMem_Unmapped)
- }
-};
-
-#undef DUP2
-
/*
translates address to pseudo physical address
- more compact, eliminates mirroring, everything comes in a row
- we only need one translation table
*/
-u32 AddrTranslate9[0x2000];
-u32 AddrTranslate7[0x4000];
+
+u32 TranslateAddr9(u32 addr)
+{
+ switch (ClassifyAddress9(addr))
+ {
+ case memregion_MainRAM: return ExeMemRegionOffsets[exeMem_MainRAM] + (addr & (MAIN_RAM_SIZE - 1));
+ case memregion_SWRAM9:
+ if (NDS::SWRAM_ARM9)
+ return ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM9 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM9Mask);
+ else
+ return 0;
+ case memregion_ITCM: return ExeMemRegionOffsets[exeMem_ITCM] + (addr & 0x7FFF);
+ case memregion_VRAM: return (addr >= 0x6800000 && addr < 0x68A4000) ? ExeMemRegionOffsets[exeMem_LCDC] + (addr - 0x6800000) : 0;
+ case memregion_BIOS9: return ExeMemRegionOffsets[exeMem_ARM9_BIOS] + (addr & 0xFFF);
+ default: return 0;
+ }
+}
+
+u32 TranslateAddr7(u32 addr)
+{
+ switch (ClassifyAddress7(addr))
+ {
+ case memregion_MainRAM: return ExeMemRegionOffsets[exeMem_MainRAM] + (addr & (MAIN_RAM_SIZE - 1));
+ case memregion_SWRAM7:
+ if (NDS::SWRAM_ARM7)
+ return ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM7 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM7Mask);
+ else
+ return 0;
+ case memregion_BIOS7: return ExeMemRegionOffsets[exeMem_ARM7_BIOS] + addr;
+ case memregion_WRAM7: return ExeMemRegionOffsets[exeMem_ARM7_WRAM] + (addr & 0xFFFF);
+ case memregion_VWRAM: return ExeMemRegionOffsets[exeMem_ARM7_WVRAM] + (addr & 0x1FFFF);
+ default: return 0;
+ }
+}
AddressRange CodeRanges[ExeMemSpaceSize / 512];
-std::unordered_map<u32, JitBlock*> JitBlocks;
+TinyVector<u32> InvalidLiterals;
+
+std::unordered_map<u32, JitBlock*> JitBlocks9;
+std::unordered_map<u32, JitBlock*> JitBlocks7;
+
+u8 MemoryStatus9[0x800000];
+u8 MemoryStatus7[0x800000];
+
+int ClassifyAddress9(u32 addr)
+{
+ if (addr < NDS::ARM9->ITCMSize)
+ return memregion_ITCM;
+ else if (addr >= NDS::ARM9->DTCMBase && addr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize))
+ return memregion_DTCM;
+ else if ((addr & 0xFFFFF000) == 0xFFFF0000)
+ return memregion_BIOS9;
+ else
+ {
+ switch (addr & 0xFF000000)
+ {
+ case 0x02000000:
+ return memregion_MainRAM;
+ case 0x03000000:
+ return memregion_SWRAM9;
+ case 0x04000000:
+ return memregion_IO9;
+ case 0x06000000:
+ return memregion_VRAM;
+ }
+ }
+ return memregion_Other;
+}
+
+int ClassifyAddress7(u32 addr)
+{
+ if (addr < 0x00004000)
+ return memregion_BIOS7;
+ else
+ {
+ switch (addr & 0xFF800000)
+ {
+ case 0x02000000:
+ case 0x02800000:
+ return memregion_MainRAM;
+ case 0x03000000:
+ if (NDS::SWRAM_ARM7)
+ return memregion_SWRAM7;
+ else
+ return memregion_WRAM7;
+ case 0x03800000:
+ return memregion_WRAM7;
+ case 0x04000000:
+ return memregion_IO7;
+ case 0x04800000:
+ return memregion_Wifi;
+ case 0x06000000:
+ case 0x06800000:
+ return memregion_VWRAM;
+ }
+ }
+ return memregion_Other;
+}
+
+void UpdateMemoryStatus9(u32 start, u32 end)
+{
+ start >>= 12;
+ end >>= 12;
+
+ if (end == 0xFFFFF)
+ end++;
+
+ for (u32 i = start; i < end; i++)
+ {
+ u32 addr = i << 12;
+
+ int region = ClassifyAddress9(addr);
+ u32 pseudoPhyisical = TranslateAddr9(addr);
+
+ for (u32 j = 0; j < 8; j++)
+ {
+ u8 val = region;
+ if (CodeRanges[(pseudoPhyisical + (j << 12)) / 512].Blocks.Length)
+ val |= 0x80;
+ MemoryStatus9[i * 8 + j] = val;
+ }
+ }
+}
+
+void UpdateMemoryStatus7(u32 start, u32 end)
+{
+ start >>= 12;
+ end >>= 12;
+
+ if (end == 0xFFFFF)
+ end++;
+
+ for (u32 i = start; i < end; i++)
+ {
+ u32 addr = i << 12;
+
+ int region = ClassifyAddress7(addr);
+ u32 pseudoPhyisical = TranslateAddr7(addr);
+
+ for (u32 j = 0; j < 8; j++)
+ {
+ u8 val = region;
+ if (CodeRanges[(pseudoPhyisical + (j << 12)) / 512].Blocks.Length)
+ val |= 0x80;
+ MemoryStatus7[i * 8 + j] = val;
+ }
+ }
+}
+
+void UpdateRegionByPseudoPhyiscal(u32 addr, bool invalidate)
+{
+ for (u32 i = 1; i < exeMem_Count; i++)
+ {
+ if (addr >= ExeMemRegionOffsets[i] && addr < ExeMemRegionOffsets[i] + ExeMemRegionSizes[i])
+ {
+ for (u32 num = 0; num < 2; num++)
+ {
+ u32 physSize = ExeMemRegionSizes[i];
+ u32 mapSize = 0;
+ u32 mapStart = 0;
+ switch (i)
+ {
+ case exeMem_ITCM:
+ if (num == 0)
+ mapStart = 0; mapSize = NDS::ARM9->ITCMSize;
+ break;
+ case exeMem_MainRAM: mapStart = 0x2000000; mapSize = 0x1000000; break;
+ case exeMem_SWRAM:
+ if (num == 0)
+ {
+ if (NDS::SWRAM_ARM9)
+ mapStart = 0x3000000, mapSize = 0x1000000;
+ else
+ mapStart = mapSize = 0;
+ }
+ else
+ {
+ if (NDS::SWRAM_ARM7)
+ mapStart = 0x3000000, mapSize = 0x800000;
+ else
+ mapStart = mapSize = 0;
+ }
+ break;
+ case exeMem_LCDC:
+ if (num == 0)
+ mapStart = 0x6800000, mapSize = 0xA4000;
+ break;
+ case exeMem_ARM9_BIOS:
+ if (num == 0)
+ mapStart = 0xFFFF0000, mapSize = 0x10000;
+ break;
+ case exeMem_ARM7_BIOS:
+ if (num == 1)
+ mapStart = 0; mapSize = 0x4000;
+ break;
+ case exeMem_ARM7_WRAM:
+ if (num == 1)
+ {
+ if (NDS::SWRAM_ARM7)
+ mapStart = 0x3800000, mapSize = 0x800000;
+ else
+ mapStart = 0x3000000, mapSize = 0x1000000;
+ }
+ break;
+ case exeMem_ARM7_WVRAM:
+ if (num == 1)
+ mapStart = 0x6000000, mapSize = 0x1000000;
+ break;
+ }
+
+ for (u32 j = 0; j < mapSize / physSize; j++)
+ {
+ u32 virtAddr = mapStart + physSize * j + (addr - ExeMemRegionOffsets[i]);
+ if (num == 0
+ && virtAddr >= NDS::ARM9->DTCMBase && virtAddr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize))
+ continue;
+ if (invalidate)
+ {
+ if (num == 0)
+ MemoryStatus9[virtAddr / 512] |= 0x80;
+ else
+ MemoryStatus7[virtAddr / 512] |= 0x80;
+ }
+ else
+ {
+ if (num == 0)
+ MemoryStatus9[virtAddr / 512] &= ~0x80;
+ else
+ MemoryStatus7[virtAddr / 512] &= ~0x80;
+ }
+ }
+
+ }
+ return;
+ }
+ }
+
+ assert(false);
+}
+
+template <typename T>
+T SlowRead9(ARMv5* cpu, u32 addr)
+{
+ u32 offset = addr & 0x3;
+ addr &= ~(sizeof(T) - 1);
+
+ T val;
+ if (addr < cpu->ITCMSize)
+ val = *(T*)&cpu->ITCM[addr & 0x7FFF];
+ else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize))
+ val = *(T*)&cpu->DTCM[(addr - cpu->DTCMBase) & 0x3FFF];
+ else if (std::is_same<T, u32>::value)
+ val = NDS::ARM9Read32(addr);
+ else if (std::is_same<T, u16>::value)
+ val = NDS::ARM9Read16(addr);
+ else
+ val = NDS::ARM9Read8(addr);
+
+ if (std::is_same<T, u32>::value)
+ return ROR(val, offset << 3);
+ else
+ return val;
+}
+
+template <typename T>
+void SlowWrite9(ARMv5* cpu, u32 addr, T val)
+{
+ addr &= ~(sizeof(T) - 1);
+
+ if (addr < cpu->ITCMSize)
+ {
+ InvalidateITCMIfNecessary(addr);
+ *(T*)&cpu->ITCM[addr & 0x7FFF] = val;
+ }
+ else if (addr >= cpu->DTCMBase && addr < (cpu->DTCMBase + cpu->DTCMSize))
+ {
+ *(T*)&cpu->DTCM[(addr - cpu->DTCMBase) & 0x3FFF] = val;
+ }
+ else if (std::is_same<T, u32>::value)
+ {
+ NDS::ARM9Write32(addr, val);
+ }
+ else if (std::is_same<T, u16>::value)
+ {
+ NDS::ARM9Write16(addr, val);
+ }
+ else
+ {
+ NDS::ARM9Write8(addr, val);
+ }
+}
+
+template void SlowWrite9<u32>(ARMv5*, u32, u32);
+template void SlowWrite9<u16>(ARMv5*, u32, u16);
+template void SlowWrite9<u8>(ARMv5*, u32, u8);
+
+template u32 SlowRead9<u32>(ARMv5*, u32);
+template u16 SlowRead9<u16>(ARMv5*, u32);
+template u8 SlowRead9<u8>(ARMv5*, u32);
+
+template <typename T>
+T SlowRead7(u32 addr)
+{
+ u32 offset = addr & 0x3;
+ addr &= ~(sizeof(T) - 1);
+
+ T val;
+ if (std::is_same<T, u32>::value)
+ val = NDS::ARM7Read32(addr);
+ else if (std::is_same<T, u16>::value)
+ val = NDS::ARM7Read16(addr);
+ else
+ val = NDS::ARM7Read8(addr);
+
+ if (std::is_same<T, u32>::value)
+ return ROR(val, offset << 3);
+ else
+ return val;
+}
+
+template <typename T>
+void SlowWrite7(u32 addr, T val)
+{
+ addr &= ~(sizeof(T) - 1);
+
+ if (std::is_same<T, u32>::value)
+ NDS::ARM7Write32(addr, val);
+ else if (std::is_same<T, u16>::value)
+ NDS::ARM7Write16(addr, val);
+ else
+ NDS::ARM7Write8(addr, val);
+}
+
+template <bool PreInc, bool Write>
+void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu)
+{
+ addr &= ~0x3;
+ for (int i = 0; i < num; i++)
+ {
+ addr += PreInc * 4;
+ if (Write)
+ SlowWrite9<u32>(cpu, addr, data[i]);
+ else
+ data[i] = SlowRead9<u32>(cpu, addr);
+ addr += !PreInc * 4;
+ }
+}
+
+template <bool PreInc, bool Write>
+void SlowBlockTransfer7(u32 addr, u64* data, u32 num)
+{
+ addr &= ~0x3;
+ for (int i = 0; i < num; i++)
+ {
+ addr += PreInc * 4;
+ if (Write)
+ SlowWrite7<u32>(addr, data[i]);
+ else
+ data[i] = SlowRead7<u32>(addr);
+ addr += !PreInc * 4;
+ }
+}
+
+template void SlowWrite7<u32>(u32, u32);
+template void SlowWrite7<u16>(u32, u16);
+template void SlowWrite7<u8>(u32, u8);
+
+template u32 SlowRead7<u32>(u32);
+template u16 SlowRead7<u16>(u32);
+template u8 SlowRead7<u8>(u32);
+
+template void SlowBlockTransfer9<false, false>(u32, u64*, u32, ARMv5*);
+template void SlowBlockTransfer9<false, true>(u32, u64*, u32, ARMv5*);
+template void SlowBlockTransfer9<true, false>(u32, u64*, u32, ARMv5*);
+template void SlowBlockTransfer9<true, true>(u32, u64*, u32, ARMv5*);
+template void SlowBlockTransfer7<false, false>(u32 addr, u64* data, u32 num);
+template void SlowBlockTransfer7<false, true>(u32 addr, u64* data, u32 num);
+template void SlowBlockTransfer7<true, false>(u32 addr, u64* data, u32 num);
+template void SlowBlockTransfer7<true, true>(u32 addr, u64* data, u32 num);
template <typename K, typename V, int Size, V InvalidValue>
struct UnreliableHashTable
@@ -211,31 +540,25 @@ struct UnreliableHashTable
};
UnreliableHashTable<u32, JitBlock*, 0x800, nullptr> RestoreCandidates;
-UnreliableHashTable<u32, u32, 0x1000, UINT32_MAX> FastBlockLookUp;
+UnreliableHashTable<u32, u32, 0x800, UINT32_MAX> FastBlockLookUp9;
+UnreliableHashTable<u32, u32, 0x800, UINT32_MAX> FastBlockLookUp7;
void Init()
{
- for (int i = 0; i < 0x2000; i++)
- {
- ExeMemKind kind = JIT_MEM[0][i >> 8];
- u32 size = ExeMemRegionSizes[kind];
-
- AddrTranslate9[i] = ExeMemRegionOffsets[kind] + ((i << 15) & (size - 1));
- }
- for (int i = 0; i < 0x4000; i++)
- {
- ExeMemKind kind = JIT_MEM[1][i >> 9];
- u32 size = ExeMemRegionSizes[kind];
-
- AddrTranslate7[i] = ExeMemRegionOffsets[kind] + ((i << 14) & (size - 1));
- }
-
- compiler = new Compiler();
+ JITCompiler = new Compiler();
}
void DeInit()
{
- delete compiler;
+ delete JITCompiler;
+}
+
+void Reset()
+{
+ ResetBlockCache();
+
+ UpdateMemoryStatus9(0, 0xFFFFFFFF);
+ UpdateMemoryStatus7(0, 0xFFFFFFFF);
}
void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
@@ -256,25 +579,31 @@ void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
}
}
-bool DecodeLiteral(const FetchedInstr& instr, u32& addr)
+bool DecodeLiteral(bool thumb, const FetchedInstr& instr, u32& addr)
{
- switch (instr.Info.Kind)
+ if (!thumb)
{
- case ARMInstrInfo::ak_STR_IMM:
- case ARMInstrInfo::ak_STRB_IMM:
- addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1));
- return true;
- case ARMInstrInfo::ak_STRD_IMM:
- case ARMInstrInfo::ak_STRH_IMM:
- addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1));
- return true;
- case ARMInstrInfo::ak_STM: // I honestly hope noone was ever crazy enough to do stm pc, {whatever}
- addr = instr.Addr + 8;
+ switch (instr.Info.Kind)
+ {
+ case ARMInstrInfo::ak_LDR_IMM:
+ case ARMInstrInfo::ak_LDRB_IMM:
+ addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1));
+ return true;
+ case ARMInstrInfo::ak_LDRH_IMM:
+ addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1));
+ return true;
+ default:
+ break;
+ }
+ }
+ else if (instr.Info.Kind == ARMInstrInfo::tk_LDR_PCREL)
+ {
+ addr = ((instr.Addr + 4) & ~0x2) + ((instr.Instr & 0xFF) << 2);
return true;
- default:
- JIT_DEBUGPRINT("Literal %08x %x not recognised\n", instr.Instr, instr.Addr);
- return false;
}
+
+ JIT_DEBUGPRINT("Literal %08x %x not recognised %d\n", instr.Instr, instr.Addr, instr.Info.Kind);
+ return false;
}
bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link,
@@ -453,6 +782,8 @@ InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] =
};
#undef F
+
+extern u32 literalsPerBlock;
void CompileBlock(ARM* cpu)
{
bool thumb = cpu->CPSR & 0x20;
@@ -463,31 +794,33 @@ void CompileBlock(ARM* cpu)
Config::JIT_MaxBlockSize = 32;
u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4);
- if (!(cpu->Num == 0
- ? IsMapped<0>(blockAddr)
- : IsMapped<1>(blockAddr)))
+ u32 pseudoPhysicalAddr = cpu->Num == 0
+ ? TranslateAddr9(blockAddr)
+ : TranslateAddr7(blockAddr);
+ if (pseudoPhysicalAddr < ExeMemRegionSizes[exeMem_Unmapped])
{
printf("Trying to compile a block in unmapped memory: %x\n", blockAddr);
}
- u32 pseudoPhysicalAddr = cpu->Num == 0
- ? TranslateAddr<0>(blockAddr)
- : TranslateAddr<1>(blockAddr);
-
FetchedInstr instrs[Config::JIT_MaxBlockSize];
int i = 0;
u32 r15 = cpu->R[15];
- u32 addresseRanges[32] = {};
+ u32 addressRanges[Config::JIT_MaxBlockSize];
+ u32 addressMasks[Config::JIT_MaxBlockSize] = {0};
u32 numAddressRanges = 0;
+ u32 numLiterals = 0;
+ u32 literalLoadAddrs[Config::JIT_MaxBlockSize];
+ // they are going to be hashed
+ u32 literalValues[Config::JIT_MaxBlockSize];
+ u32 instrValues[Config::JIT_MaxBlockSize];
+
cpu->FillPipeline();
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
u32 nextInstrAddr[2] = {blockAddr, r15};
- JIT_DEBUGPRINT("start block %x %08x (%x) (region invalidates %dx)\n",
- blockAddr, cpu->CPSR, pseudoPhysicalAddr,
- CodeRanges[pseudoPhysicalAddr / 512].TimesInvalidated);
+ JIT_DEBUGPRINT("start block %x %08x (%x)\n", blockAddr, cpu->CPSR, pseudoPhysicalAddr);
u32 lastSegmentStart = blockAddr;
u32 lr;
@@ -507,23 +840,29 @@ void CompileBlock(ARM* cpu)
nextInstrAddr[1] = r15;
JIT_DEBUGPRINT("instr %08x %x\n", instrs[i].Instr & (thumb ? 0xFFFF : ~0), instrs[i].Addr);
- u32 translatedAddr = (cpu->Num == 0
- ? TranslateAddr<0>(instrs[i].Addr)
- : TranslateAddr<1>(instrs[i].Addr)) & ~0x1FF;
- if (i == 0 || translatedAddr != addresseRanges[numAddressRanges - 1])
+ instrValues[i] = instrs[i].Instr;
+
+ u32 translatedAddr = cpu->Num == 0
+ ? TranslateAddr9(instrs[i].Addr)
+ : TranslateAddr7(instrs[i].Addr);
+ u32 translatedAddrRounded = translatedAddr & ~0x1FF;
+ if (i == 0 || translatedAddrRounded != addressRanges[numAddressRanges - 1])
{
bool returning = false;
for (int j = 0; j < numAddressRanges; j++)
{
- if (addresseRanges[j] == translatedAddr)
+ if (addressRanges[j] == translatedAddrRounded)
{
+ std::swap(addressRanges[j], addressRanges[numAddressRanges - 1]);
+ std::swap(addressMasks[j], addressMasks[numAddressRanges - 1]);
returning = true;
break;
}
}
if (!returning)
- addresseRanges[numAddressRanges++] = translatedAddr;
+ addressRanges[numAddressRanges++] = translatedAddrRounded;
}
+ addressMasks[numAddressRanges - 1] |= 1 << ((translatedAddr & 0x1FF) / 16);
if (cpu->Num == 0)
{
@@ -572,7 +911,8 @@ void CompileBlock(ARM* cpu)
u32 icode = ((instrs[i].Instr >> 4) & 0xF) | ((instrs[i].Instr >> 16) & 0xFF0);
assert(InterpretARM[instrs[i].Info.Kind] == ARMInterpreter::ARMInstrTable[icode]
|| instrs[i].Info.Kind == ARMInstrInfo::ak_MOV_REG_LSL_IMM
- || instrs[i].Info.Kind == ARMInstrInfo::ak_Nop);
+ || instrs[i].Info.Kind == ARMInstrInfo::ak_Nop
+ || instrs[i].Info.Kind == ARMInstrInfo::ak_UNK);
if (cpu->CheckCondition(instrs[i].Cond()))
InterpretARM[instrs[i].Info.Kind](cpu);
else
@@ -583,21 +923,26 @@ void CompileBlock(ARM* cpu)
instrs[i].DataCycles = cpu->DataCycles;
instrs[i].DataRegion = cpu->DataRegion;
- if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem
- && instrs[i].Info.SrcRegs == (1 << 15)
- && instrs[i].Info.DstRegs == 0)
+ u32 literalAddr;
+ if (Config::JIT_LiteralOptimisations
+ && instrs[i].Info.SpecialKind == ARMInstrInfo::special_LoadLiteral
+ && DecodeLiteral(thumb, instrs[i], literalAddr))
{
- assert (!thumb);
-
- u32 addr;
- if (DecodeLiteral(instrs[i], addr))
- {
- JIT_DEBUGPRINT("pc relative write detected\n");
- u32 translatedAddr = cpu->Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
-
- ARMJIT::InvalidateByAddr(translatedAddr, false);
- CodeRanges[translatedAddr / 512].InvalidLiterals |= (1 << ((translatedAddr & 0x1FF) / 16));
- }
+ u32 translatedAddr = cpu->Num == 0
+ ? TranslateAddr9(literalAddr)
+ : TranslateAddr7(literalAddr);
+ u32 translatedAddrRounded = translatedAddr & ~0x1FF;
+
+ u32 j = 0;
+ for (; j < numAddressRanges; j++)
+ if (addressRanges[j] == translatedAddrRounded)
+ break;
+ if (j == numAddressRanges)
+ addressRanges[numAddressRanges++] = translatedAddrRounded;
+ addressMasks[j] |= 1 << ((translatedAddr & 0x1FF) / 16);
+ JIT_DEBUGPRINT("literal loading %08x %08x %08x %08x\n", literalAddr, translatedAddr, addressMasks[j], addressRanges[j]);
+ cpu->DataRead32(literalAddr, &literalValues[numLiterals]);
+ literalLoadAddrs[numLiterals++] = translatedAddr;
}
if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0
@@ -650,8 +995,8 @@ void CompileBlock(ARM* cpu)
else if (hasBranched && !isBackJump && i + 1 < Config::JIT_MaxBlockSize)
{
u32 targetPseudoPhysical = cpu->Num == 0
- ? TranslateAddr<0>(target)
- : TranslateAddr<1>(target);
+ ? TranslateAddr9(target)
+ : TranslateAddr7(target);
if (link)
{
@@ -688,36 +1033,29 @@ void CompileBlock(ARM* cpu)
i++;
- bool canCompile = compiler->CanCompile(thumb, instrs[i - 1].Info.Kind);
+ bool canCompile = JITCompiler->CanCompile(thumb, instrs[i - 1].Info.Kind);
bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken));
if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond)
FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF);
} while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted && (!cpu->IRQ || (cpu->CPSR & 0x80)));
+ u32 literalHash = (u32)XXH3_64bits(literalValues, numLiterals * 4);
+ u32 instrHash = (u32)XXH3_64bits(instrValues, i * 4);
+
JitBlock* prevBlock = RestoreCandidates.LookUp(pseudoPhysicalAddr);
bool mayRestore = true;
if (prevBlock)
{
RestoreCandidates.Remove(pseudoPhysicalAddr);
- if (prevBlock->NumInstrs == i)
- {
- for (int j = 0; j < i; j++)
- {
- if (prevBlock->Instrs()[j] != instrs[j].Instr)
- {
- mayRestore = false;
- break;
- }
- }
- }
- else
- mayRestore = false;
- if (prevBlock->NumAddresses == numAddressRanges)
+ mayRestore = prevBlock->LiteralHash == literalHash && prevBlock->InstrHash == instrHash;
+
+ if (mayRestore && prevBlock->NumAddresses == numAddressRanges)
{
for (int j = 0; j < numAddressRanges; j++)
{
- if (prevBlock->AddressRanges()[j] != addresseRanges[j])
+ if (prevBlock->AddressRanges()[j] != addressRanges[j]
+ || prevBlock->AddressMasks()[j] != addressMasks[j])
{
mayRestore = false;
break;
@@ -739,18 +1077,21 @@ void CompileBlock(ARM* cpu)
if (prevBlock)
delete prevBlock;
- block = new JitBlock(i, numAddressRanges);
- for (int j = 0; j < i; j++)
- block->Instrs()[j] = instrs[j].Instr;
+ block = new JitBlock(cpu->Num, i, numAddressRanges, numLiterals);
+ block->LiteralHash = literalHash;
+ block->InstrHash = instrHash;
+ for (int j = 0; j < numAddressRanges; j++)
+ block->AddressRanges()[j] = addressRanges[j];
for (int j = 0; j < numAddressRanges; j++)
- block->AddressRanges()[j] = addresseRanges[j];
+ block->AddressMasks()[j] = addressMasks[j];
+ for (int j = 0; j < numLiterals; j++)
+ block->Literals()[j] = literalLoadAddrs[j];
- block->StartAddr = blockAddr;
block->PseudoPhysicalAddr = pseudoPhysicalAddr;
FloodFillSetFlags(instrs, i - 1, 0xF);
- block->EntryPoint = compiler->CompileBlock(pseudoPhysicalAddr, cpu, thumb, instrs, i);
+ block->EntryPoint = JITCompiler->CompileBlock(pseudoPhysicalAddr, cpu, thumb, instrs, i);
}
else
{
@@ -760,23 +1101,73 @@ void CompileBlock(ARM* cpu)
for (int j = 0; j < numAddressRanges; j++)
{
- assert(addresseRanges[j] == block->AddressRanges()[j]);
- CodeRanges[addresseRanges[j] / 512].Blocks.Add(block);
+ assert(addressRanges[j] == block->AddressRanges()[j]);
+ assert(addressMasks[j] == block->AddressMasks()[j]);
+ assert(addressMasks[j] != 0);
+ CodeRanges[addressRanges[j] / 512].Code |= addressMasks[j];
+ CodeRanges[addressRanges[j] / 512].Blocks.Add(block);
+
+ UpdateRegionByPseudoPhyiscal(addressRanges[j], true);
}
- JitBlocks[pseudoPhysicalAddr] = block;
- FastBlockLookUp.Insert(pseudoPhysicalAddr, compiler->SubEntryOffset(block->EntryPoint));
+ if (cpu->Num == 0)
+ {
+ JitBlocks9[pseudoPhysicalAddr] = block;
+ FastBlockLookUp9.Insert(pseudoPhysicalAddr, JITCompiler->SubEntryOffset(block->EntryPoint));
+ }
+ else
+ {
+ JitBlocks7[pseudoPhysicalAddr] = block;
+ FastBlockLookUp7.Insert(pseudoPhysicalAddr, JITCompiler->SubEntryOffset(block->EntryPoint));
+ }
}
-void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
+void InvalidateByAddr(u32 pseudoPhysical)
{
JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical);
AddressRange* range = &CodeRanges[pseudoPhysical / 512];
- int startLength = range->Blocks.Length;
- for (int i = 0; i < range->Blocks.Length; i++)
+ u32 mask = 1 << ((pseudoPhysical & 0x1FF) / 16);
+
+ range->Code = 0;
+ for (int i = 0; i < range->Blocks.Length;)
{
- assert(range->Blocks.Length == startLength);
JitBlock* block = range->Blocks[i];
+
+ bool invalidated = false;
+ u32 mask = 0;
+ for (int j = 0; j < block->NumAddresses; j++)
+ {
+ if (block->AddressRanges()[j] == (pseudoPhysical & ~0x1FF))
+ {
+ mask = block->AddressMasks()[j];
+ invalidated = block->AddressMasks()[j] & mask;
+ break;
+ }
+ }
+ assert(mask);
+ if (!invalidated)
+ {
+ range->Code |= mask;
+ i++;
+ continue;
+ }
+ range->Blocks.Remove(i);
+
+ bool literalInvalidation = false;
+ for (int j = 0; j < block->NumLiterals; j++)
+ {
+ u32 addr = block->Literals()[j];
+ if (addr == pseudoPhysical)
+ {
+ if (InvalidLiterals.Find(pseudoPhysical) != -1)
+ {
+ InvalidLiterals.Add(pseudoPhysical);
+ JIT_DEBUGPRINT("found invalid literal %d\n", InvalidLiterals.Length);
+ }
+ literalInvalidation = true;
+ break;
+ }
+ }
for (int j = 0; j < block->NumAddresses; j++)
{
u32 addr = block->AddressRanges()[j];
@@ -786,76 +1177,59 @@ void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
assert(otherRange != range);
bool removed = otherRange->Blocks.RemoveByValue(block);
assert(removed);
+
+ if (otherRange->Blocks.Length == 0)
+ {
+ otherRange->Code = 0;
+ UpdateRegionByPseudoPhyiscal(addr, false);
+ }
}
}
for (int j = 0; j < block->NumLinks(); j++)
- compiler->UnlinkBlock(block->Links()[j]);
+ JITCompiler->UnlinkBlock(block->Links()[j]);
+ block->ResetLinks();
- JitBlocks.erase(block->PseudoPhysicalAddr);
- FastBlockLookUp.Remove(block->PseudoPhysicalAddr);
+ if (block->Num == 0)
+ {
+ JitBlocks9.erase(block->PseudoPhysicalAddr);
+ FastBlockLookUp9.Remove(block->PseudoPhysicalAddr);
+ }
+ else
+ {
+ JitBlocks7.erase(block->PseudoPhysicalAddr);
+ FastBlockLookUp7.Remove(block->PseudoPhysicalAddr);
+ }
- if (mayRestore)
+ if (!literalInvalidation)
{
JitBlock* prevBlock = RestoreCandidates.Insert(block->PseudoPhysicalAddr, block);
if (prevBlock)
delete prevBlock;
}
+ else
+ {
+ delete block;
+ }
}
- if ((range->TimesInvalidated + 1) > range->TimesInvalidated)
- range->TimesInvalidated++;
-
- range->Blocks.Clear();
-}
-void InvalidateByAddr7(u32 addr)
-{
- u32 pseudoPhysical = TranslateAddr<1>(addr);
- if (__builtin_expect(CodeRanges[pseudoPhysical / 512].Blocks.Length > 0, false))
- InvalidateByAddr(pseudoPhysical);
+ if (range->Blocks.Length == 0)
+ UpdateRegionByPseudoPhyiscal(pseudoPhysical, false);
}
-void InvalidateITCM(u32 addr)
+void InvalidateRegionIfNecessary(u32 pseudoPhyisical)
{
- u32 pseudoPhysical = addr + ExeMemRegionOffsets[exeMem_ITCM];
- if (CodeRanges[pseudoPhysical / 512].Blocks.Length > 0)
- InvalidateByAddr(pseudoPhysical);
-}
-
-void InvalidateAll()
-{
- JIT_DEBUGPRINT("invalidating all %x\n", JitBlocks.size());
- for (auto it : JitBlocks)
- {
- JitBlock* block = it.second;
-
- FastBlockLookUp.Remove(block->PseudoPhysicalAddr);
-
- for (int i = 0; i < block->NumAddresses; i++)
- {
- u32 addr = block->AddressRanges()[i];
- AddressRange* range = &CodeRanges[addr / 512];
- range->Blocks.Clear();
- if (range->TimesInvalidated + 1 > range->TimesInvalidated)
- range->TimesInvalidated++;
- }
- for (int i = 0; i < block->NumLinks(); i++)
- compiler->UnlinkBlock(block->Links()[i]);
- block->ResetLinks();
-
- JitBlock* prevBlock = RestoreCandidates.Insert(block->PseudoPhysicalAddr, block);
- if (prevBlock)
- delete prevBlock;
- }
-
- JitBlocks.clear();
+ if (CodeRanges[pseudoPhyisical / 512].Code & (1 << ((pseudoPhyisical & 0x1FF) / 16)))
+ InvalidateByAddr(pseudoPhyisical);
}
void ResetBlockCache()
{
printf("Resetting JIT block cache...\n");
- FastBlockLookUp.Reset();
+ InvalidLiterals.Clear();
+ FastBlockLookUp9.Reset();
+ FastBlockLookUp7.Reset();
RestoreCandidates.Reset();
for (int i = 0; i < sizeof(RestoreCandidates.Table)/sizeof(RestoreCandidates.Table[0]); i++)
{
@@ -870,61 +1244,119 @@ void ResetBlockCache()
RestoreCandidates.Table[i].ValB = NULL;
}
}
- for (auto it : JitBlocks)
+ for (auto it : JitBlocks9)
{
JitBlock* block = it.second;
for (int j = 0; j < block->NumAddresses; j++)
{
u32 addr = block->AddressRanges()[j];
CodeRanges[addr / 512].Blocks.Clear();
- CodeRanges[addr / 512].TimesInvalidated = 0;
- CodeRanges[addr / 512].InvalidLiterals = 0;
+ CodeRanges[addr / 512].Code = 0;
}
delete block;
}
- JitBlocks.clear();
+ for (auto it : JitBlocks7)
+ {
+ JitBlock* block = it.second;
+ for (int j = 0; j < block->NumAddresses; j++)
+ {
+ u32 addr = block->AddressRanges()[j];
+ CodeRanges[addr / 512].Blocks.Clear();
+ CodeRanges[addr / 512].Code = 0;
+ }
+ }
+ JitBlocks9.clear();
+ JitBlocks7.clear();
- compiler->Reset();
+ JITCompiler->Reset();
}
+template <u32 Num>
JitBlockEntry LookUpBlockEntry(u32 addr)
{
- u32 entryOffset = FastBlockLookUp.LookUp(addr);
+ auto& fastMap = Num == 0 ? FastBlockLookUp9 : FastBlockLookUp7;
+ u32 entryOffset = fastMap.LookUp(addr);
if (entryOffset != UINT32_MAX)
- return compiler->AddEntryOffset(entryOffset);
+ return JITCompiler->AddEntryOffset(entryOffset);
- auto block = JitBlocks.find(addr);
- if (block != JitBlocks.end())
+ auto& slowMap = Num == 0 ? JitBlocks9 : JitBlocks7;
+ auto block = slowMap.find(addr);
+ if (block != slowMap.end())
{
- FastBlockLookUp.Insert(addr, compiler->SubEntryOffset(block->second->EntryPoint));
+ fastMap.Insert(addr, JITCompiler->SubEntryOffset(block->second->EntryPoint));
return block->second->EntryPoint;
}
return NULL;
}
+template JitBlockEntry LookUpBlockEntry<0>(u32);
+template JitBlockEntry LookUpBlockEntry<1>(u32);
+
template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset)
{
- u32 targetPseudoPhys = TranslateAddr<Num>(cpu->R[15] - ((cpu->CPSR&0x20)?2:4));
- auto block = JitBlocks.find(targetPseudoPhys);
- if (block == JitBlocks.end())
+ auto& blockMap = Num == 0 ? JitBlocks9 : JitBlocks7;
+ u32 instrAddr = cpu->R[15] - ((cpu->CPSR&0x20)?2:4);
+ u32 targetPseudoPhys = Num == 0 ? TranslateAddr9(instrAddr) : TranslateAddr7(instrAddr);
+ auto block = blockMap.find(targetPseudoPhys);
+ if (block == blockMap.end())
{
CompileBlock(cpu);
- block = JitBlocks.find(targetPseudoPhys);
+ block = blockMap.find(targetPseudoPhys);
}
JIT_DEBUGPRINT("linking to block %08x\n", targetPseudoPhys);
block->second->AddLink(codeOffset);
- compiler->LinkBlock(codeOffset, block->second->EntryPoint);
+ JITCompiler->LinkBlock(codeOffset, block->second->EntryPoint);
+}
+
+template void LinkBlock<0>(ARM*, u32);
+template void LinkBlock<1>(ARM*, u32);
+
+void WifiWrite32(u32 addr, u32 val)
+{
+ Wifi::Write(addr, val & 0xFFFF);
+ Wifi::Write(addr + 2, val >> 16);
+}
+
+u32 WifiRead32(u32 addr)
+{
+ return Wifi::Read(addr) | (Wifi::Read(addr + 2) << 16);
+}
+
+template <typename T>
+void VRAMWrite(u32 addr, T val)
+{
+ switch (addr & 0x00E00000)
+ {
+ case 0x00000000: GPU::WriteVRAM_ABG<T>(addr, val); return;
+ case 0x00200000: GPU::WriteVRAM_BBG<T>(addr, val); return;
+ case 0x00400000: GPU::WriteVRAM_AOBJ<T>(addr, val); return;
+ case 0x00600000: GPU::WriteVRAM_BOBJ<T>(addr, val); return;
+ default: GPU::WriteVRAM_LCDC<T>(addr, val); return;
+ }
+}
+template <typename T>
+T VRAMRead(u32 addr)
+{
+ switch (addr & 0x00E00000)
+ {
+ case 0x00000000: return GPU::ReadVRAM_ABG<T>(addr);
+ case 0x00200000: return GPU::ReadVRAM_BBG<T>(addr);
+ case 0x00400000: return GPU::ReadVRAM_AOBJ<T>(addr);
+ case 0x00600000: return GPU::ReadVRAM_BOBJ<T>(addr);
+ default: return GPU::ReadVRAM_LCDC<T>(addr);
+ }
}
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
{
if (cpu->Num == 0)
{
- if ((addr & 0xFF000000) == 0x04000000)
+ switch (addr & 0xFF000000)
{
+ case 0x04000000:
if (!store && size == 32 && addr == 0x04100010 && NDS::ExMemCnt[0] & (1<<11))
return (void*)NDSCart::ReadROMData;
@@ -949,13 +1381,25 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
switch (size | store)
{
- case 8: return (void*)NDS::ARM9IORead8;
- case 9: return (void*)NDS::ARM9IOWrite8;
+ case 8: return (void*)NDS::ARM9IORead8;
+ case 9: return (void*)NDS::ARM9IOWrite8;
case 16: return (void*)NDS::ARM9IORead16;
case 17: return (void*)NDS::ARM9IOWrite16;
case 32: return (void*)NDS::ARM9IORead32;
case 33: return (void*)NDS::ARM9IOWrite32;
}
+ break;
+ case 0x06000000:
+ switch (size | store)
+ {
+ case 8: return (void*)VRAMRead<u8>;
+ case 9: return NULL;
+ case 16: return (void*)VRAMRead<u16>;
+ case 17: return (void*)VRAMWrite<u16>;
+ case 32: return (void*)VRAMRead<u32>;
+ case 33: return (void*)VRAMWrite<u32>;
+ }
+ break;
}
}
else
@@ -987,20 +1431,31 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
}
break;
case 0x04800000:
- if (addr < 0x04810000 && size == 16)
+ if (addr < 0x04810000 && size >= 16)
{
- if (store)
- return (void*)Wifi::Write;
- else
- return (void*)Wifi::Read;
+ switch (size | store)
+ {
+ case 16: return (void*)Wifi::Read;
+ case 17: return (void*)Wifi::Write;
+ case 32: return (void*)WifiRead32;
+ case 33: return (void*)WifiWrite32;
+ }
}
break;
+ case 0x06000000:
+ case 0x06800000:
+ switch (size | store)
+ {
+ case 8: return (void*)GPU::ReadVRAM_ARM7<u8>;
+ case 9: return (void*)GPU::WriteVRAM_ARM7<u8>;
+ case 16: return (void*)GPU::ReadVRAM_ARM7<u16>;
+ case 17: return (void*)GPU::WriteVRAM_ARM7<u16>;
+ case 32: return (void*)GPU::ReadVRAM_ARM7<u32>;
+ case 33: return (void*)GPU::WriteVRAM_ARM7<u32>;
+ }
}
}
return NULL;
}
}
-
-template void ARMJIT::LinkBlock<0>(ARM*, u32);
-template void ARMJIT::LinkBlock<1>(ARM*, u32);
diff --git a/src/ARMJIT.h b/src/ARMJIT.h
index cab385f..44a6140 100644
--- a/src/ARMJIT.h
+++ b/src/ARMJIT.h
@@ -28,45 +28,60 @@ extern const u32 ExeMemRegionSizes[];
typedef u32 (*JitBlockEntry)();
-extern u32 AddrTranslate9[0x2000];
-extern u32 AddrTranslate7[0x4000];
-
const u32 ExeMemSpaceSize = 0x518000; // I hate you C++, sometimes I really hate you...
-template <u32 num>
-inline bool IsMapped(u32 addr)
-{
- if (num == 0)
- return AddrTranslate9[(addr & 0xFFFFFFF) >> 15] >= ExeMemRegionSizes[exeMem_Unmapped];
- else
- return AddrTranslate7[(addr & 0xFFFFFFF) >> 14] >= ExeMemRegionSizes[exeMem_Unmapped];
-}
-
-template <u32 num>
-inline u32 TranslateAddr(u32 addr)
-{
- if (num == 0)
- return AddrTranslate9[(addr & 0xFFFFFFF) >> 15] + (addr & 0x7FFF);
- else
- return AddrTranslate7[(addr & 0xFFFFFFF) >> 14] + (addr & 0x3FFF);
-}
+u32 TranslateAddr9(u32 addr);
+u32 TranslateAddr7(u32 addr);
+template <u32 Num>
JitBlockEntry LookUpBlockEntry(u32 addr);
-
void Init();
void DeInit();
-void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore = true);
-void InvalidateAll();
+void Reset();
+
+void InvalidateByAddr(u32 pseudoPhysical);
+
+void InvalidateRegionIfNecessary(u32 addr);
-void InvalidateITCM(u32 addr);
-void InvalidateByAddr7(u32 addr);
+inline void InvalidateMainRAMIfNecessary(u32 addr)
+{
+ InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_MainRAM] + (addr & (MAIN_RAM_SIZE - 1)));
+}
+inline void InvalidateITCMIfNecessary(u32 addr)
+{
+ InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ITCM] + (addr & 0x7FFF));
+}
+inline void InvalidateLCDCIfNecessary(u32 addr)
+{
+ if (addr < 0x68A3FFF)
+ InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_LCDC] + (addr - 0x6800000));
+}
+inline void InvalidateSWRAM7IfNecessary(u32 addr)
+{
+ InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM7 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM7Mask));
+}
+inline void InvalidateSWRAM9IfNecessary(u32 addr)
+{
+ InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM9 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM9Mask));
+}
+inline void InvalidateARM7WRAMIfNecessary(u32 addr)
+{
+ InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ARM7_WRAM] + (addr & 0xFFFF));
+}
+inline void InvalidateARM7WVRAMIfNecessary(u32 addr)
+{
+ InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ARM7_WVRAM] + (addr & 0x1FFFF));
+}
void CompileBlock(ARM* cpu);
void ResetBlockCache();
+void UpdateMemoryStatus9(u32 start, u32 end);
+void UpdateMemoryStatus7(u32 start, u32 end);
+
}
extern "C" void ARM_Dispatch(ARM* cpu, ARMJIT::JitBlockEntry entry);
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
index 00fa436..a67f357 100644
--- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
@@ -650,7 +650,7 @@ void Compiler::Comp_AddCycles_CDI()
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
- if ((CurInstr.DataRegion >> 4) == 0x02) // mainRAM
+ if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
cycles = numC + numD;
@@ -695,7 +695,7 @@ void Compiler::Comp_AddCycles_CD()
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
- if ((CurInstr.DataRegion >> 4) == 0x02)
+ if ((CurInstr.DataRegion >> 24) == 0x02)
{
if (CodeRegion == 0x02)
cycles += numC + numD;
diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h
index 66d1808..4e45760 100644
--- a/src/ARMJIT_Internal.h
+++ b/src/ARMJIT_Internal.h
@@ -152,30 +152,34 @@ struct __attribute__((packed)) TinyVector
class JitBlock
{
public:
- JitBlock(u32 numInstrs, u32 numAddresses)
+ JitBlock(u32 num, u32 literalHash, u32 numAddresses, u32 numLiterals)
{
- NumInstrs = numInstrs;
+ Num = num;
NumAddresses = numAddresses;
- Data.SetLength(numInstrs + numAddresses);
+ NumLiterals = numLiterals;
+ Data.SetLength(numAddresses * 2 + numLiterals);
}
- u32 StartAddr;
u32 PseudoPhysicalAddr;
-
- u32 NumInstrs;
- u32 NumAddresses;
+
+ u32 InstrHash, LiteralHash;
+ u8 Num;
+ u16 NumAddresses;
+ u16 NumLiterals;
JitBlockEntry EntryPoint;
- u32* Instrs()
- { return &Data[0]; }
u32* AddressRanges()
- { return &Data[NumInstrs]; }
+ { return &Data[0]; }
+ u32* AddressMasks()
+ { return &Data[NumAddresses]; }
+ u32* Literals()
+ { return &Data[NumAddresses * 2]; }
u32* Links()
- { return &Data[NumInstrs + NumAddresses]; }
+ { return &Data[NumAddresses * 2 + NumLiterals]; }
u32 NumLinks()
- { return Data.Length - NumInstrs - NumAddresses; }
+ { return Data.Length - NumAddresses * 2 - NumLiterals; }
void AddLink(u32 link)
{
@@ -184,7 +188,7 @@ public:
void ResetLinks()
{
- Data.SetLength(NumInstrs + NumAddresses);
+ Data.SetLength(NumAddresses * 2 + NumLiterals);
}
private:
@@ -200,8 +204,7 @@ private:
struct __attribute__((packed)) AddressRange
{
TinyVector<JitBlock*> Blocks;
- u16 InvalidLiterals;
- u16 TimesInvalidated;
+ u32 Code;
};
extern AddressRange CodeRanges[ExeMemSpaceSize / 512];
@@ -210,14 +213,45 @@ typedef void (*InterpreterFunc)(ARM* cpu);
extern InterpreterFunc InterpretARM[];
extern InterpreterFunc InterpretTHUMB[];
-extern u8 MemRegion9[0x80000];
-extern u8 MemRegion7[0x80000];
+extern u8 MemoryStatus9[0x800000];
+extern u8 MemoryStatus7[0x800000];
+
+extern TinyVector<u32> InvalidLiterals;
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset);
+enum
+{
+ memregion_Other = 0,
+ memregion_ITCM,
+ memregion_DTCM,
+ memregion_BIOS9,
+ memregion_MainRAM,
+ memregion_SWRAM9,
+ memregion_SWRAM7,
+ memregion_IO9,
+ memregion_VRAM,
+ memregion_BIOS7,
+ memregion_WRAM7,
+ memregion_IO7,
+ memregion_Wifi,
+ memregion_VWRAM,
+};
+
+int ClassifyAddress9(u32 addr);
+int ClassifyAddress7(u32 addr);
+
+template <typename T> T SlowRead9(ARMv5* cpu, u32 addr);
+template <typename T> void SlowWrite9(ARMv5* cpu, u32 addr, T val);
+template <typename T> T SlowRead7(u32 addr);
+template <typename T> void SlowWrite7(u32 addr, T val);
+
+template <bool PreInc, bool Write> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
+template <bool PreInc, bool Write> void SlowBlockTransfer7(u32 addr, u64* data, u32 num);
+
}
#endif \ No newline at end of file
diff --git a/src/ARMJIT_RegisterCache.h b/src/ARMJIT_RegisterCache.h
index 5e18e84..0547c84 100644
--- a/src/ARMJIT_RegisterCache.h
+++ b/src/ARMJIT_RegisterCache.h
@@ -95,20 +95,6 @@ public:
LiteralsLoaded = 0;
}
- BitSet32 GetPushRegs()
- {
- BitSet16 used;
- for (int i = 0; i < InstrsCount; i++)
- used |= BitSet16(Instrs[i].Info.SrcRegs | Instrs[i].Info.DstRegs);
-
- BitSet32 res;
- u32 registersMax = std::min((int)used.Count(), NativeRegsAvailable);
- for (int i = 0; i < registersMax; i++)
- res |= BitSet32(1 << (int)NativeRegAllocOrder[i]);
-
- return res;
- }
-
void Prepare(bool thumb, int i)
{
FetchedInstr instr = Instrs[i];
@@ -139,7 +125,6 @@ public:
UnloadRegister(reg);
u16 necessaryRegs = ((instr.Info.SrcRegs & PCAllocatableAsSrc) | instr.Info.DstRegs) & ~instr.Info.NotStrictlyNeeded;
- u16 writeRegs = instr.Info.DstRegs & ~instr.Info.NotStrictlyNeeded;
BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs);
if (needToBeLoaded != BitSet16(0))
{
@@ -182,13 +167,12 @@ public:
if (left-- == 0)
break;
- writeRegs |= (1 << reg) & instr.Info.DstRegs;
LoadRegister(reg, !(thumb || instr.Cond() >= 0xE) || (1 << reg) & instr.Info.SrcRegs);
}
}
}
- DirtyRegs |= writeRegs & ~(1 << 15);
+ DirtyRegs |= (LoadedRegs & instr.Info.DstRegs) & ~(1 << 15);
}
static const Reg NativeRegAllocOrder[];
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index dd20e3c..eee2e0f 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -195,26 +195,6 @@ Compiler::Compiler()
Reset();
- for (int i = 0; i < 3; i++)
- {
- for (int j = 0; j < 2; j++)
- MemoryFuncs9[i][j] = Gen_MemoryRoutine9(j, 8 << i);
- }
- MemoryFuncs7[0][0] = (void*)NDS::ARM7Read8;
- MemoryFuncs7[0][1] = (void*)NDS::ARM7Write8;
- MemoryFuncs7[1][0] = (void*)NDS::ARM7Read16;
- MemoryFuncs7[1][1] = (void*)NDS::ARM7Write16;
- MemoryFuncs7[2][0] = (void*)NDS::ARM7Read32;
- MemoryFuncs7[2][1] = (void*)NDS::ARM7Write32;
-
- for (int i = 0; i < 2; i++)
- for (int j = 0; j < 2; j++)
- {
- MemoryFuncsSeq9[i][j] = Gen_MemoryRoutineSeq9(i, j);
- MemoryFuncsSeq7[i][j][0] = Gen_MemoryRoutineSeq7(i, j, false);
- MemoryFuncsSeq7[i][j][1] = Gen_MemoryRoutineSeq7(i, j, true);
- }
-
{
// RSCRATCH mode
// RSCRATCH2 reg number
@@ -317,6 +297,12 @@ Compiler::Compiler()
// move the region forward to prevent overwriting the generated functions
CodeMemSize -= GetWritableCodePtr() - ResetStart;
ResetStart = GetWritableCodePtr();
+
+ NearStart = ResetStart;
+ FarStart = ResetStart + 1024*1024*24;
+
+ NearSize = FarStart - ResetStart;
+ FarSize = (ResetStart + CodeMemSize) - FarStart;
}
void Compiler::LoadCPSR()
@@ -504,6 +490,9 @@ void Compiler::Reset()
{
memset(ResetStart, 0xcc, CodeMemSize);
SetCodePtr(ResetStart);
+
+ NearCode = NearStart;
+ FarCode = FarStart;
}
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
@@ -544,8 +533,16 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken)
JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
- if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
+ if (NearSize - (NearCode - NearStart) < 1024 * 32) // guess...
+ {
+ printf("near reset\n");
+ ResetBlockCache();
+ }
+ if (FarSize - (FarCode - FarStart) < 1024 * 32) // guess...
+ {
+ printf("far reset\n");
ResetBlockCache();
+ }
ConstantCycles = 0;
Thumb = thumb;
@@ -762,12 +759,14 @@ void Compiler::Comp_AddCycles_CDI()
Comp_AddCycles_CD();
else
{
+ IrregularCycles = true;
+
s32 cycles;
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
- if ((CurInstr.DataRegion >> 4) == 0x02) // mainRAM
+ if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
cycles = numC + numD;
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
index e0a4978..9df218b 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -140,7 +140,7 @@ public:
};
void Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags);
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
- void Comp_MemLoadLiteral(int size, int rd, u32 addr);
+ bool Comp_MemLoadLiteral(int size, int rd, u32 addr);
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
@@ -154,12 +154,6 @@ public:
void Comp_SpecialBranchBehaviour(bool taken);
- void* Gen_MemoryRoutine9(bool store, int size);
-
- void* Gen_MemoryRoutineSeq9(bool store, bool preinc);
- void* Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM);
-
- void* Gen_ChangeCPSRRoutine();
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
@@ -193,6 +187,26 @@ public:
return (u8*)entry - ResetStart;
}
+ void SwitchToNearCode()
+ {
+ FarCode = GetWritableCodePtr();
+ SetCodePtr(NearCode);
+ }
+
+ void SwitchToFarCode()
+ {
+ NearCode = GetWritableCodePtr();
+ SetCodePtr(FarCode);
+ }
+
+ u8* FarCode;
+ u8* NearCode;
+ u32 FarSize;
+ u32 NearSize;
+
+ u8* NearStart;
+ u8* FarStart;
+
u8* ResetStart;
u32 CodeMemSize;
@@ -201,12 +215,6 @@ public:
void* BranchStub[2];
- void* MemoryFuncs9[3][2];
- void* MemoryFuncs7[3][2];
-
- void* MemoryFuncsSeq9[2][2];
- void* MemoryFuncsSeq7[2][2][2];
-
void* ReadBanked;
void* WriteBanked;
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
index b595e32..c13b779 100644
--- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -25,236 +25,17 @@ int squeezePointer(T* ptr)
improvement.
*/
-/*
- address - ABI_PARAM1 (a.k.a. ECX = RSCRATCH3 on Windows)
- store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows)
-*/
-void* Compiler::Gen_MemoryRoutine9(bool store, int size)
+bool Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
{
- u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
- AlignCode4();
- void* res = GetWritableCodePtr();
-
- MOV(32, R(RSCRATCH), R(ABI_PARAM1));
- SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
- CMP(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
- FixupBranch insideDTCM = J_CC(CC_B);
-
- CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
- FixupBranch insideITCM = J_CC(CC_B);
-
- if (store)
- {
- if (size > 8)
- AND(32, R(ABI_PARAM1), Imm32(addressMask));
- switch (size)
- {
- case 32: JMP((u8*)NDS::ARM9Write32, true); break;
- case 16: JMP((u8*)NDS::ARM9Write16, true); break;
- case 8: JMP((u8*)NDS::ARM9Write8, true); break;
- }
- }
- else
- {
- if (size == 32)
- {
- ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8);
- AND(32, R(ABI_PARAM1), Imm32(addressMask));
- // everything's already in the appropriate register
- ABI_CallFunction(NDS::ARM9Read32);
- ABI_PopRegistersAndAdjustStack({ECX}, 8);
- AND(32, R(ECX), Imm8(3));
- SHL(32, R(ECX), Imm8(3));
- ROR_(32, R(RSCRATCH), R(ECX));
- RET();
- }
- else if (size == 16)
- {
- AND(32, R(ABI_PARAM1), Imm32(addressMask));
- JMP((u8*)NDS::ARM9Read16, true);
- }
- else
- JMP((u8*)NDS::ARM9Read8, true);
- }
-
- SetJumpTarget(insideDTCM);
- AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
- if (store)
- MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2));
- else
- {
- MOVZX(32, size, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
- if (size == 32)
- {
- if (ABI_PARAM1 != ECX)
- MOV(32, R(ECX), R(ABI_PARAM1));
- AND(32, R(ECX), Imm8(3));
- SHL(32, R(ECX), Imm8(3));
- ROR_(32, R(RSCRATCH), R(ECX));
- }
- }
- RET();
+ u32 translatedAddr = Num == 0 ? TranslateAddr9(addr) : TranslateAddr7(addr);
- SetJumpTarget(insideITCM);
- MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX
- AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask));
- if (store)
- {
- MOV(size, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM2));
-
- // if CodeRanges[pseudoPhysical/256].Blocks.Length > 0 we're writing into code!
- static_assert(sizeof(AddressRange) == 16);
- LEA(32, ABI_PARAM1, MDisp(ABI_PARAM3, ExeMemRegionOffsets[exeMem_ITCM]));
- MOV(32, R(RSCRATCH), R(ABI_PARAM1));
- SHR(32, R(RSCRATCH), Imm8(9));
- SHL(32, R(RSCRATCH), Imm8(4));
- CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
- FixupBranch noCode = J_CC(CC_Z);
- JMP((u8*)InvalidateByAddr, true);
- SetJumpTarget(noCode);
- }
- else
+ int invalidLiteralIdx = InvalidLiterals.Find(translatedAddr);
+ if (invalidLiteralIdx != -1)
{
- MOVZX(32, size, RSCRATCH, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)));
- if (size == 32)
- {
- if (ABI_PARAM1 != ECX)
- MOV(32, R(ECX), R(ABI_PARAM1));
- AND(32, R(ECX), Imm8(3));
- SHL(32, R(ECX), Imm8(3));
- ROR_(32, R(RSCRATCH), R(ECX));
- }
+ InvalidLiterals.Remove(invalidLiteralIdx);
+ return false;
}
- RET();
-
- static_assert(RSCRATCH == EAX, "Someone changed RSCRATCH!");
-
- return res;
-}
-
-#define MEMORY_SEQ_WHILE_COND \
- if (!store) \
- MOV(32, currentElement, R(EAX));\
- if (!preinc) \
- ADD(32, R(ABI_PARAM1), Imm8(4)); \
- \
- SUB(32, R(ABI_PARAM3), Imm8(1)); \
- J_CC(CC_NZ, repeat);
-
-/*
- ABI_PARAM1 address
- ABI_PARAM2 address where registers are stored
- ABI_PARAM3 how many values to read/write
-
- Dolphin x64CodeEmitter is my favourite assembler
- */
-void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
-{
- void* res = (void*)GetWritableCodePtr();
-
- const u8* repeat = GetCodePtr();
-
- if (preinc)
- ADD(32, R(ABI_PARAM1), Imm8(4));
- MOV(32, R(RSCRATCH), R(ABI_PARAM1));
- SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
- CMP(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
- FixupBranch insideDTCM = J_CC(CC_B);
-
- CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
- FixupBranch insideITCM = J_CC(CC_B);
-
- OpArg currentElement = MComplex(ABI_PARAM2, ABI_PARAM3, SCALE_8, -8); // wasting stack space like a gangster
-
- ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
- AND(32, R(ABI_PARAM1), Imm8(~3));
- if (store)
- {
- MOV(32, R(ABI_PARAM2), currentElement);
- CALL((void*)NDS::ARM9Write32);
- }
- else
- CALL((void*)NDS::ARM9Read32);
- ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
-
- MEMORY_SEQ_WHILE_COND
- RET();
-
- SetJumpTarget(insideDTCM);
- AND(32, R(RSCRATCH), Imm32(0x3FFF & ~3));
- if (store)
- {
- MOV(32, R(ABI_PARAM4), currentElement);
- MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM4));
- }
- else
- MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
-
- MEMORY_SEQ_WHILE_COND
- RET();
-
- SetJumpTarget(insideITCM);
- MOV(32, R(RSCRATCH), R(ABI_PARAM1));
- AND(32, R(RSCRATCH), Imm32(0x7FFF & ~3));
- if (store)
- {
- MOV(32, R(ABI_PARAM4), currentElement);
- MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM4));
-
- ADD(32, R(RSCRATCH), Imm32(ExeMemRegionOffsets[exeMem_ITCM]));
- MOV(32, R(ABI_PARAM4), R(RSCRATCH));
- SHR(32, R(RSCRATCH), Imm8(9));
- SHL(32, R(RSCRATCH), Imm8(4));
- CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
- FixupBranch noCode = J_CC(CC_Z);
- ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
- MOV(32, R(ABI_PARAM1), R(ABI_PARAM4));
- CALL((u8*)InvalidateByAddr);
- ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
- SetJumpTarget(noCode);
- }
- else
- MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)));
-
- MEMORY_SEQ_WHILE_COND
- RET();
-
- return res;
-}
-
-void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM)
-{
- void* res = (void*)GetWritableCodePtr();
-
- const u8* repeat = GetCodePtr();
-
- if (preinc)
- ADD(32, R(ABI_PARAM1), Imm8(4));
-
- OpArg currentElement = MComplex(ABI_PARAM2, ABI_PARAM3, SCALE_8, -8);
-
- ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
- AND(32, R(ABI_PARAM1), Imm8(~3));
- if (store)
- {
- MOV(32, R(ABI_PARAM2), currentElement);
- CALL((void*)NDS::ARM7Write32);
- }
- else
- CALL((void*)NDS::ARM7Read32);
- ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
-
- MEMORY_SEQ_WHILE_COND
- RET();
-
- return res;
-}
-
-#undef MEMORY_SEQ_WHILE_COND
-
-void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
-{
u32 val;
// make sure arm7 bios is accessible
u32 tmpR15 = CurCPU->R[15];
@@ -276,12 +57,10 @@ void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
RegCache.PutLiteral(rd, val);
Comp_AddCycles_CDI();
+
+ return true;
}
-/*void fault(u32 a, u32 b, u32 c, u32 d)
-{
- printf("actually not static! %x %x %x %x\n", a, b, c, d);
-}*/
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
{
@@ -291,17 +70,12 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
if (size == 16)
addressMask = ~1;
- //bool check = false;
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
- u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
-
- if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
- {
- Comp_MemLoadLiteral(size, rd, addr);
+
+ if (Comp_MemLoadLiteral(size, rd, addr))
return;
- }
}
{
@@ -314,173 +88,334 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
Comp_AddCycles_CDI();
}
+ bool addrIsStatic = Config::JIT_LiteralOptimisations
+ && RegCache.IsLiteral(rn) && op2.IsImm && !(flags & (memop_Writeback|memop_Post));
+ u32 staticAddress;
+ if (addrIsStatic)
+ staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
OpArg rdMapped = MapReg(rd);
- OpArg rnMapped = MapReg(rn);
- if (Thumb && rn == 15)
- rnMapped = Imm32(R15 & ~0x2);
-
- bool inlinePreparation = Num == 1;
- u32 constLocalROR32 = 4;
-
- void* memoryFunc = Num == 0
- ? MemoryFuncs9[size >> 4][!!(flags & memop_Store)]
- : MemoryFuncs7[size >> 4][!!((flags & memop_Store))];
- if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && op2.IsImm && RegCache.IsLiteral(rn))
+ if (!addrIsStatic)
{
- u32 addr = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
-
- /*MOV(32, R(ABI_PARAM1), Imm32(CurInstr.Instr));
- MOV(32, R(ABI_PARAM1), Imm32(R15));
- MOV_sum(32, RSCRATCH, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
- CMP(32, R(RSCRATCH), Imm32(addr));
- FixupBranch eq = J_CC(CC_E);
- CALL((void*)fault);
- SetJumpTarget(eq);*/
-
- NDS::MemRegion region;
- region.Mem = NULL;
- if (Num == 0)
+ OpArg rnMapped = MapReg(rn);
+ if (Thumb && rn == 15)
+ rnMapped = Imm32(R15 & ~0x2);
+
+ X64Reg finalAddr = RSCRATCH3;
+ if (flags & memop_Post)
{
- ARMv5* cpu5 = (ARMv5*)CurCPU;
+ MOV(32, R(RSCRATCH3), rnMapped);
- // stupid dtcm...
- if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
- {
- // disable this for now as DTCM is located in heap
- // which might excced the RIP-addressable range
- //region.Mem = cpu5->DTCM;
- //region.Mask = 0x3FFF;
- }
- else
- {
- NDS::ARM9GetMemRegion(addr, flags & memop_Store, &region);
- }
+ finalAddr = rnMapped.GetSimpleReg();
}
- else
- NDS::ARM7GetMemRegion(addr, flags & memop_Store, &region);
- if (region.Mem != NULL)
+ if (op2.IsImm)
+ {
+ MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
+ }
+ else
{
- void* ptr = &region.Mem[addr & addressMask & region.Mask];
+ OpArg rm = MapReg(op2.Reg.Reg);
- if (flags & memop_Store)
+ if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
+ && op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
{
- MOV(size, M(ptr), MapReg(rd));
+ LEA(32, finalAddr,
+ MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
}
else
{
- if (flags & memop_SignExtend)
- MOVSX(32, size, rdMapped.GetSimpleReg(), M(ptr));
- else
- MOVZX(32, size, rdMapped.GetSimpleReg(), M(ptr));
+ bool throwAway;
+ OpArg offset =
+ Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
- if (size == 32 && addr & ~0x3)
+ if (flags & memop_SubtractOffset)
{
- ROR_(32, rdMapped, Imm8((addr & 0x3) << 3));
+ if (R(finalAddr) != rnMapped)
+ MOV(32, R(finalAddr), rnMapped);
+ if (!offset.IsZero())
+ SUB(32, R(finalAddr), offset);
}
+ else
+ MOV_sum(32, finalAddr, rnMapped, offset);
}
-
- return;
}
- void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
- if (specialFunc)
- {
- memoryFunc = specialFunc;
- inlinePreparation = true;
- constLocalROR32 = addr & 0x3;
- }
+ if ((flags & memop_Writeback) && !(flags & memop_Post))
+ MOV(32, rnMapped, R(finalAddr));
}
- X64Reg finalAddr = ABI_PARAM1;
- if (flags & memop_Post)
- {
- MOV(32, R(ABI_PARAM1), rnMapped);
+ int expectedTarget = Num == 0
+ ? ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
+ : ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
+ if (CurInstr.Cond() < 0xE)
+ expectedTarget = memregion_Other;
+
+ bool compileFastPath = false, compileSlowPath = !addrIsStatic || (flags & memop_Store);
- finalAddr = rnMapped.GetSimpleReg();
+ switch (expectedTarget)
+ {
+ case memregion_MainRAM:
+ case memregion_DTCM:
+ case memregion_WRAM7:
+ case memregion_SWRAM9:
+ case memregion_SWRAM7:
+ case memregion_IO9:
+ case memregion_IO7:
+ case memregion_VWRAM:
+ compileFastPath = true;
+ break;
+ case memregion_Wifi:
+ compileFastPath = size >= 16;
+ break;
+ case memregion_VRAM:
+ compileFastPath = !(flags & memop_Store) || size >= 16;
+ case memregion_BIOS9:
+ compileFastPath = !(flags & memop_Store);
+ break;
+ default: break;
}
- if (op2.IsImm)
+ if (addrIsStatic && !compileFastPath)
{
- MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
+ compileFastPath = false;
+ compileSlowPath = true;
}
- else
+
+ if (addrIsStatic && compileSlowPath)
+ MOV(32, R(RSCRATCH3), Imm32(staticAddress));
+
+ if (compileFastPath)
{
- OpArg rm = MapReg(op2.Reg.Reg);
+ FixupBranch slowPath;
+ if (compileSlowPath)
+ {
+ MOV(32, R(RSCRATCH), R(RSCRATCH3));
+ SHR(32, R(RSCRATCH), Imm8(9));
+ if (flags & memop_Store)
+ {
+ CMP(8, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)), Imm8(expectedTarget));
+ }
+ else
+ {
+ MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)));
+ AND(32, R(RSCRATCH), Imm8(~0x80));
+ CMP(32, R(RSCRATCH), Imm8(expectedTarget));
+ }
+
+ slowPath = J_CC(CC_NE, true);
+ }
- if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
- && op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
+ if (expectedTarget == memregion_MainRAM || expectedTarget == memregion_WRAM7
+ || expectedTarget == memregion_BIOS9)
{
- LEA(32, finalAddr,
- MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
+ u8* data;
+ u32 mask;
+ if (expectedTarget == memregion_MainRAM)
+ {
+ data = NDS::MainRAM;
+ mask = MAIN_RAM_SIZE - 1;
+ }
+ else if (expectedTarget == memregion_BIOS9)
+ {
+ data = NDS::ARM9BIOS;
+ mask = 0xFFF;
+ }
+ else
+ {
+ data = NDS::ARM7WRAM;
+ mask = 0xFFFF;
+ }
+ OpArg memLoc;
+ if (addrIsStatic)
+ {
+ memLoc = M(data + ((staticAddress & mask & addressMask)));
+ }
+ else
+ {
+ MOV(32, R(RSCRATCH), R(RSCRATCH3));
+ AND(32, R(RSCRATCH), Imm32(mask & addressMask));
+ memLoc = MDisp(RSCRATCH, squeezePointer(data));
+ }
+ if (flags & memop_Store)
+ MOV(size, memLoc, rdMapped);
+ else if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
}
- else
+ else if (expectedTarget == memregion_DTCM)
+ {
+ if (addrIsStatic)
+ MOV(32, R(RSCRATCH), Imm32(staticAddress));
+ else
+ MOV(32, R(RSCRATCH), R(RSCRATCH3));
+ SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
+ AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
+ OpArg memLoc = MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM));
+ if (flags & memop_Store)
+ MOV(size, memLoc, rdMapped);
+ else if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
+ }
+ else if (expectedTarget == memregion_SWRAM9 || expectedTarget == memregion_SWRAM7)
{
- bool throwAway;
- OpArg offset =
- Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
-
- if (flags & memop_SubtractOffset)
+ MOV(64, R(RSCRATCH2), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9 : &NDS::SWRAM_ARM7));
+ if (addrIsStatic)
{
- if (R(finalAddr) != rnMapped)
- MOV(32, R(finalAddr), rnMapped);
- if (!offset.IsZero())
- SUB(32, R(finalAddr), offset);
+ MOV(32, R(RSCRATCH), Imm32(staticAddress & addressMask));
}
else
- MOV_sum(32, finalAddr, rnMapped, offset);
+ {
+ MOV(32, R(RSCRATCH), R(RSCRATCH3));
+ AND(32, R(RSCRATCH), Imm8(addressMask));
+ }
+ AND(32, R(RSCRATCH), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9Mask : &NDS::SWRAM_ARM7Mask));
+ OpArg memLoc = MRegSum(RSCRATCH, RSCRATCH2);
+ if (flags & memop_Store)
+ MOV(size, memLoc, rdMapped);
+ else if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
}
- }
+ else
+ {
+ u32 maskedDataRegion;
- if ((flags & memop_Writeback) && !(flags & memop_Post))
- MOV(32, rnMapped, R(finalAddr));
+ if (addrIsStatic)
+ {
+ maskedDataRegion = staticAddress;
+ MOV(32, R(ABI_PARAM1), Imm32(staticAddress));
+ }
+ else
+ {
+ if (ABI_PARAM1 != RSCRATCH3)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ AND(32, R(ABI_PARAM1), Imm8(addressMask));
- if (flags & memop_Store)
- MOV(32, R(ABI_PARAM2), rdMapped);
+ maskedDataRegion = CurInstr.DataRegion;
+ if (Num == 0)
+ maskedDataRegion &= ~0xFFFFFF;
+ else
+ maskedDataRegion &= ~0x7FFFFF;
+ }
- if (!(flags & memop_Store) && inlinePreparation && constLocalROR32 == 4 && size == 32)
- MOV(32, rdMapped, R(ABI_PARAM1));
+ void* func = GetFuncForAddr(CurCPU, maskedDataRegion, flags & memop_Store, size);
- if (inlinePreparation && size > 8)
- AND(32, R(ABI_PARAM1), Imm8(addressMask));
+ if (flags & memop_Store)
+ {
+ MOV(32, R(ABI_PARAM2), rdMapped);
- CALL(memoryFunc);
+ ABI_CallFunction((void(*)())func);
+ }
+ else
+ {
+ if (!addrIsStatic)
+ MOV(32, rdMapped, R(RSCRATCH3));
- /*if (Num == 0 && check)
- {
- CMP(32, R(EAX), rdMapped);
- FixupBranch notEqual = J_CC(CC_E);
- ABI_PushRegistersAndAdjustStack({RSCRATCH}, 0);
- MOV(32, R(ABI_PARAM1), Imm32(R15 - (Thumb ? 4 : 8)));
- MOV(32, R(ABI_PARAM2), R(EAX));
- MOV(32, R(ABI_PARAM3), rdMapped);
- MOV(32, R(ABI_PARAM4), Imm32(CurInstr.Instr));
- CALL((u8*)fault);
- ABI_PopRegistersAndAdjustStack({RSCRATCH}, 0);
- SetJumpTarget(notEqual);
- }*/
-
- if (!(flags & memop_Store))
- {
- if (inlinePreparation && size == 32)
+ ABI_CallFunction((void(*)())func);
+
+ if (!addrIsStatic)
+ MOV(32, R(RSCRATCH3), rdMapped);
+
+ if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ }
+ }
+
+ if ((size == 32 && !(flags & memop_Store)))
{
- if (constLocalROR32 == 4)
+ if (addrIsStatic)
+ {
+ if (staticAddress & 0x3)
+ ROR_(32, rdMapped, Imm8((staticAddress & 0x3) * 8));
+ }
+ else
{
- static_assert(RSCRATCH3 == ECX);
- MOV(32, R(ECX), rdMapped);
- AND(32, R(ECX), Imm8(3));
- SHL(32, R(ECX), Imm8(3));
- ROR_(32, R(RSCRATCH), R(ECX));
+ AND(32, R(RSCRATCH3), Imm8(0x3));
+ SHL(32, R(RSCRATCH3), Imm8(3));
+ ROR_(32, rdMapped, R(RSCRATCH3));
}
- else if (constLocalROR32 != 0)
- ROR_(32, R(RSCRATCH), Imm8(constLocalROR32 << 3));
}
- if (flags & memop_SignExtend)
- MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ if (compileSlowPath)
+ {
+ SwitchToFarCode();
+ SetJumpTarget(slowPath);
+ }
+ }
+
+ if (compileSlowPath)
+ {
+ if (Num == 0)
+ {
+ MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+ if (flags & memop_Store)
+ {
+ MOV(32, R(ABI_PARAM3), rdMapped);
+
+ switch (size)
+ {
+ case 32: CALL((void*)&SlowWrite9<u32>); break;
+ case 16: CALL((void*)&SlowWrite9<u16>); break;
+ case 8: CALL((void*)&SlowWrite9<u8>); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 32: CALL((void*)&SlowRead9<u32>); break;
+ case 16: CALL((void*)&SlowRead9<u16>); break;
+ case 8: CALL((void*)&SlowRead9<u8>); break;
+ }
+ }
+ }
else
- MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ {
+ if (ABI_PARAM1 != RSCRATCH3)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ if (flags & memop_Store)
+ {
+ MOV(32, R(ABI_PARAM2), rdMapped);
+
+ switch (size)
+ {
+ case 32: CALL((void*)&SlowWrite7<u32>); break;
+ case 16: CALL((void*)&SlowWrite7<u16>); break;
+ case 8: CALL((void*)&SlowWrite7<u8>); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 32: CALL((void*)&SlowRead7<u32>); break;
+ case 16: CALL((void*)&SlowRead7<u16>); break;
+ case 8: CALL((void*)&SlowRead7<u8>); break;
+ }
+ }
+ }
+ if (!(flags & memop_Store))
+ {
+ if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ }
+ }
+
+ if (compileFastPath && compileSlowPath)
+ {
+ FixupBranch ret = J(true);
+ SwitchToNearCode();
+ SetJumpTarget(ret);
}
if (!(flags & memop_Store) && rd == 15)
@@ -498,100 +433,160 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
{
- IrregularCycles = true;
-
int regsCount = regs.Count();
s32 offset = (regsCount * 4) * (decrement ? -1 : 1);
// we need to make sure that the stack stays aligned to 16 bytes
+#ifdef _WIN32
+ // include shadow
+ u32 stackAlloc = ((regsCount + 4 + 1) & ~1) * 8;
+#else
u32 stackAlloc = ((regsCount + 1) & ~1) * 8;
+#endif
+ u32 allocOffset = stackAlloc - regsCount * 8;
- if (!store)
+ int expectedTarget = Num == 0
+ ? ClassifyAddress9(CurInstr.DataRegion)
+ : ClassifyAddress7(CurInstr.DataRegion);
+ if (usermode || CurInstr.Cond() < 0xE)
+ expectedTarget = memregion_Other;
+
+ bool compileFastPath = false;
+
+ switch (expectedTarget)
{
+ case memregion_DTCM:
+ case memregion_MainRAM:
+ case memregion_SWRAM9:
+ case memregion_SWRAM7:
+ case memregion_WRAM7:
+ compileFastPath = true;
+ break;
+ default:
+ break;
+ }
+
+ if (!store)
Comp_AddCycles_CDI();
+ else
+ Comp_AddCycles_CD();
- if (decrement)
+ if (decrement)
+ {
+ MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(-regsCount * 4));
+ preinc ^= true;
+ }
+ else
+ MOV(32, R(RSCRATCH4), MapReg(rn));
+
+ if (compileFastPath)
+ {
+ assert(!usermode);
+
+ MOV(32, R(RSCRATCH), R(RSCRATCH4));
+ SHR(32, R(RSCRATCH), Imm8(9));
+
+ if (store)
{
- MOV_sum(32, ABI_PARAM1, MapReg(rn), Imm32(-regsCount * 4));
- preinc ^= true;
+ CMP(8, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)), Imm8(expectedTarget));
}
else
- MOV(32, R(ABI_PARAM1), MapReg(rn));
-
- MOV(32, R(ABI_PARAM3), Imm32(regsCount));
- SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
- MOV(64, R(ABI_PARAM2), R(RSP));
-
- CALL(Num == 0
- ? MemoryFuncsSeq9[0][preinc]
- : MemoryFuncsSeq7[0][preinc][CodeRegion == 0x02]);
+ {
+ MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)));
+ AND(32, R(RSCRATCH), Imm8(~0x80));
+ CMP(32, R(RSCRATCH), Imm8(expectedTarget));
+ }
+ FixupBranch slowPath = J_CC(CC_NE, true);
- bool firstUserMode = true;
- for (int reg = 15; reg >= 0; reg--)
+ if (expectedTarget == memregion_DTCM)
{
- if (regs[reg])
+ SUB(32, R(RSCRATCH4), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
+ AND(32, R(RSCRATCH4), Imm32(0x3FFF & ~3));
+ LEA(64, RSCRATCH4, MComplex(RCPU, RSCRATCH4, 1, offsetof(ARMv5, DTCM)));
+ }
+ else if (expectedTarget == memregion_MainRAM)
+ {
+ AND(32, R(RSCRATCH4), Imm32((MAIN_RAM_SIZE - 1) & ~3));
+ ADD(64, R(RSCRATCH4), Imm32(squeezePointer(NDS::MainRAM)));
+ }
+ else if (expectedTarget == memregion_WRAM7)
+ {
+ AND(32, R(RSCRATCH4), Imm32(0xFFFF & ~3));
+ ADD(64, R(RSCRATCH4), Imm32(squeezePointer(NDS::ARM7WRAM)));
+ }
+ else // SWRAM
+ {
+ AND(32, R(RSCRATCH4), Imm8(~3));
+ AND(32, R(RSCRATCH4), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9Mask : &NDS::SWRAM_ARM7Mask));
+ ADD(64, R(RSCRATCH4), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9 : &NDS::SWRAM_ARM7));
+ }
+ u32 offset = 0;
+ for (int reg : regs)
+ {
+ if (preinc)
+ offset += 4;
+ OpArg mem = MDisp(RSCRATCH4, offset);
+ if (store)
{
- if (usermode && !regs[15] && reg >= 8 && reg < 15)
+ if (RegCache.LoadedRegs & (1 << reg))
{
- if (firstUserMode)
- {
- MOV(32, R(RSCRATCH), R(RCPSR));
- AND(32, R(RSCRATCH), Imm8(0x1F));
- firstUserMode = false;
- }
- MOV(32, R(RSCRATCH2), Imm32(reg - 8));
- POP(RSCRATCH3);
- CALL(WriteBanked);
- FixupBranch sucessfulWritten = J_CC(CC_NC);
- if (RegCache.Mapping[reg] != INVALID_REG)
- MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3));
- else
- SaveReg(reg, RSCRATCH3);
- SetJumpTarget(sucessfulWritten);
+ MOV(32, mem, MapReg(reg));
}
- else if (RegCache.Mapping[reg] == INVALID_REG)
+ else
{
- assert(reg != 15);
-
- POP(RSCRATCH);
- SaveReg(reg, RSCRATCH);
+ LoadReg(reg, RSCRATCH);
+ MOV(32, mem, R(RSCRATCH));
+ }
+ }
+ else
+ {
+ if (RegCache.LoadedRegs & (1 << reg))
+ {
+ MOV(32, MapReg(reg), mem);
}
else
{
- if (reg != 15)
- RegCache.DirtyRegs |= (1 << reg);
- POP(MapReg(reg).GetSimpleReg());
+ MOV(32, R(RSCRATCH), mem);
+ SaveReg(reg, RSCRATCH);
}
}
+ if (!preinc)
+ offset += 4;
}
- if (regsCount & 1)
- POP(RSCRATCH);
+ SwitchToFarCode();
+ SetJumpTarget(slowPath);
+ }
+
+ if (!store)
+ {
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
+ MOV(32, R(ABI_PARAM3), Imm32(regsCount));
+ SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
+ if (allocOffset == 0)
+ MOV(64, R(ABI_PARAM2), R(RSP));
+ else
+ LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
+
+ if (Num == 0)
+ MOV(64, R(ABI_PARAM4), R(RCPU));
- if (regs[15])
+ switch (Num * 2 | preinc)
{
- if (Num == 1)
- {
- if (Thumb)
- OR(32, MapReg(15), Imm8(1));
- else
- AND(32, MapReg(15), Imm8(0xFE));
- }
- Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);
+ case 0: CALL((void*)&SlowBlockTransfer9<false, false>); break;
+ case 1: CALL((void*)&SlowBlockTransfer9<true, false>); break;
+ case 2: CALL((void*)&SlowBlockTransfer7<false, false>); break;
+ case 3: CALL((void*)&SlowBlockTransfer7<true, false>); break;
}
- }
- else
- {
- Comp_AddCycles_CD();
- if (regsCount & 1)
- PUSH(RSCRATCH);
+ if (allocOffset)
+ ADD(64, R(RSP), Imm8(allocOffset));
bool firstUserMode = true;
for (int reg : regs)
{
- if (usermode && reg >= 8 && reg < 15)
+ if (usermode && !regs[15] && reg >= 8 && reg < 15)
{
if (firstUserMode)
{
@@ -599,43 +594,107 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
AND(32, R(RSCRATCH), Imm8(0x1F));
firstUserMode = false;
}
- if (RegCache.Mapping[reg] == INVALID_REG)
- LoadReg(reg, RSCRATCH3);
- else
- MOV(32, R(RSCRATCH3), R(RegCache.Mapping[reg]));
MOV(32, R(RSCRATCH2), Imm32(reg - 8));
- CALL(ReadBanked);
- PUSH(RSCRATCH3);
+ POP(RSCRATCH3);
+ CALL(WriteBanked);
+ FixupBranch sucessfulWritten = J_CC(CC_NC);
+ if (RegCache.LoadedRegs & (1 << reg))
+ MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3));
+ else
+ SaveReg(reg, RSCRATCH3);
+ SetJumpTarget(sucessfulWritten);
}
- else if (RegCache.Mapping[reg] == INVALID_REG)
+ else if (!(RegCache.LoadedRegs & (1 << reg)))
{
- LoadReg(reg, RSCRATCH);
- PUSH(RSCRATCH);
+ assert(reg != 15);
+
+ POP(RSCRATCH);
+ SaveReg(reg, RSCRATCH);
}
else
{
- PUSH(MapReg(reg).GetSimpleReg());
+ POP(MapReg(reg).GetSimpleReg());
}
}
-
- if (decrement)
+ }
+ else
+ {
+ bool firstUserMode = true;
+ for (int reg = 15; reg >= 0; reg--)
{
- MOV_sum(32, ABI_PARAM1, MapReg(rn), Imm32(-regsCount * 4));
- preinc ^= true;
+ if (regs[reg])
+ {
+ if (usermode && reg >= 8 && reg < 15)
+ {
+ if (firstUserMode)
+ {
+ MOV(32, R(RSCRATCH), R(RCPSR));
+ AND(32, R(RSCRATCH), Imm8(0x1F));
+ firstUserMode = false;
+ }
+ if (RegCache.Mapping[reg] == INVALID_REG)
+ LoadReg(reg, RSCRATCH3);
+ else
+ MOV(32, R(RSCRATCH3), R(RegCache.Mapping[reg]));
+ MOV(32, R(RSCRATCH2), Imm32(reg - 8));
+ CALL(ReadBanked);
+ PUSH(RSCRATCH3);
+ }
+ else if (!(RegCache.LoadedRegs & (1 << reg)))
+ {
+ LoadReg(reg, RSCRATCH);
+ PUSH(RSCRATCH);
+ }
+ else
+ {
+ PUSH(MapReg(reg).GetSimpleReg());
+ }
+ }
}
- else
- MOV(32, R(ABI_PARAM1), MapReg(rn));
- MOV(64, R(ABI_PARAM2), R(RSP));
+ if (allocOffset)
+ SUB(64, R(RSP), Imm8(allocOffset));
+
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
+ if (allocOffset)
+ LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
+ else
+ MOV(64, R(ABI_PARAM2), R(RSP));
+
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
+ if (Num == 0)
+ MOV(64, R(ABI_PARAM4), R(RCPU));
- CALL(Num == 0
- ? MemoryFuncsSeq9[1][preinc]
- : MemoryFuncsSeq7[1][preinc][CodeRegion == 0x02]);
+ switch (Num * 2 | preinc)
+ {
+ case 0: CALL((void*)&SlowBlockTransfer9<false, true>); break;
+ case 1: CALL((void*)&SlowBlockTransfer9<true, true>); break;
+ case 2: CALL((void*)&SlowBlockTransfer7<false, true>); break;
+ case 3: CALL((void*)&SlowBlockTransfer7<true, true>); break;
+ }
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
}
+ if (compileFastPath)
+ {
+ FixupBranch ret = J(true);
+ SwitchToNearCode();
+ SetJumpTarget(ret);
+ }
+
+ if (!store && regs[15])
+ {
+ if (Num == 1)
+ {
+ if (Thumb)
+ OR(32, MapReg(15), Imm8(1));
+ else
+ AND(32, MapReg(15), Imm8(0xFE));
+ }
+ Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);
+ }
+
return offset;
}
@@ -786,9 +845,7 @@ void Compiler::T_Comp_LoadPCRel()
{
u32 offset = (CurInstr.Instr & 0xFF) << 2;
u32 addr = (R15 & ~0x2) + offset;
- if (Config::JIT_LiteralOptimisations)
- Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr);
- else
+ if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr))
Comp_MemAccess(CurInstr.T_Reg(8), 15, ComplexOperand(offset), 32, 0);
}
diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp
index 28362d9..b50e821 100644
--- a/src/ARM_InstrInfo.cpp
+++ b/src/ARM_InstrInfo.cpp
@@ -373,16 +373,16 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (res.Kind == tk_LDMIA || res.Kind == tk_POP)
{
- u32 set = (instr & 0xFF) & ~(res.DstRegs|res.SrcRegs);
- res.NotStrictlyNeeded |= set;
+ u32 set = (instr & 0xFF);
+ res.NotStrictlyNeeded |= set & ~(res.DstRegs|res.SrcRegs);
res.DstRegs |= set;
}
if (res.Kind == tk_STMIA || res.Kind == tk_PUSH)
{
- u32 set = (instr & 0xFF) & ~(res.DstRegs|res.SrcRegs);
+ u32 set = (instr & 0xFF);
if (res.Kind == tk_PUSH && instr & (1 << 8))
set |= (1 << 14);
- res.NotStrictlyNeeded |= set;
+ res.NotStrictlyNeeded |= set & ~(res.DstRegs|res.SrcRegs);
res.SrcRegs |= set;
}
@@ -495,15 +495,15 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (res.Kind == ak_LDM)
{
- u16 set = (instr & 0xFFFF) & ~(res.SrcRegs|res.DstRegs|(1<<15));
+ u16 set = (instr & 0xFFFF);
+ res.NotStrictlyNeeded |= set & ~(res.SrcRegs|res.DstRegs|(1<<15));
res.DstRegs |= set;
- res.NotStrictlyNeeded |= set;
}
if (res.Kind == ak_STM)
{
- u16 set = (instr & 0xFFFF) & ~(res.SrcRegs|res.DstRegs|(1<<15));
+ u16 set = (instr & 0xFFFF);
+ res.NotStrictlyNeeded |= set & ~(res.SrcRegs|res.DstRegs|(1<<15));
res.SrcRegs |= set;
- res.NotStrictlyNeeded |= set;
}
if ((instr >> 28) < 0xE)
diff --git a/src/CP15.cpp b/src/CP15.cpp
index 62258e9..e665dbd 100644
--- a/src/CP15.cpp
+++ b/src/CP15.cpp
@@ -97,6 +97,10 @@ void ARMv5::CP15DoSavestate(Savestate* file)
void ARMv5::UpdateDTCMSetting()
{
+#ifdef JIT_ENABLED
+ u32 oldDTCMBase = DTCMBase;
+ u32 oldDTCMSize = DTCMSize;
+#endif
if (CP15Control & (1<<16))
{
DTCMBase = DTCMSetting & 0xFFFFF000;
@@ -109,10 +113,20 @@ void ARMv5::UpdateDTCMSetting()
DTCMSize = 0;
//printf("DTCM disabled\n");
}
+#ifdef JIT_ENABLED
+ if (oldDTCMBase != DTCMBase || oldDTCMSize != DTCMSize)
+ {
+ ARMJIT::UpdateMemoryStatus9(oldDTCMBase, oldDTCMBase + oldDTCMSize);
+ ARMJIT::UpdateMemoryStatus9(DTCMBase, DTCMBase + DTCMSize);
+ }
+#endif
}
void ARMv5::UpdateITCMSetting()
{
+#ifdef JIT_ENABLED
+ u32 oldITCMSize = ITCMSize;
+#endif
if (CP15Control & (1<<18))
{
ITCMSize = 0x200 << ((ITCMSetting >> 1) & 0x1F);
@@ -123,6 +137,10 @@ void ARMv5::UpdateITCMSetting()
ITCMSize = 0;
//printf("ITCM disabled\n");
}
+#ifdef JIT_ENABLED
+ if (oldITCMSize != ITCMSize)
+ ARMJIT::UpdateMemoryStatus9(0, std::max(oldITCMSize, ITCMSize));
+#endif
}
@@ -561,15 +579,9 @@ void ARMv5::CP15Write(u32 id, u32 val)
case 0x750:
-#ifdef JIT_ENABLED
- ARMJIT::InvalidateAll();
-#endif
ICacheInvalidateAll();
return;
case 0x751:
-#ifdef JIT_ENABLED
- ARMJIT::InvalidateByAddr(ARMJIT::TranslateAddr<0>(val));
-#endif
ICacheInvalidateByAddr(val);
return;
case 0x752:
@@ -732,7 +744,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
void ARMv5::DataRead8(u32 addr, u32* val)
{
- DataRegion = addr >> 12;
+ DataRegion = addr;
if (addr < ITCMSize)
{
@@ -753,7 +765,7 @@ void ARMv5::DataRead8(u32 addr, u32* val)
void ARMv5::DataRead16(u32 addr, u32* val)
{
- DataRegion = addr >> 12;
+ DataRegion = addr;
addr &= ~1;
@@ -776,7 +788,7 @@ void ARMv5::DataRead16(u32 addr, u32* val)
void ARMv5::DataRead32(u32 addr, u32* val)
{
- DataRegion = addr >> 12;
+ DataRegion = addr;
addr &= ~3;
@@ -820,14 +832,14 @@ void ARMv5::DataRead32S(u32 addr, u32* val)
void ARMv5::DataWrite8(u32 addr, u8 val)
{
- DataRegion = addr >> 12;
+ DataRegion = addr;
if (addr < ITCMSize)
{
DataCycles = 1;
*(u8*)&ITCM[addr & 0x7FFF] = val;
#ifdef JIT_ENABLED
- ARMJIT::InvalidateITCM(addr & 0x7FFF);
+ ARMJIT::InvalidateITCMIfNecessary(addr);
#endif
return;
}
@@ -844,7 +856,7 @@ void ARMv5::DataWrite8(u32 addr, u8 val)
void ARMv5::DataWrite16(u32 addr, u16 val)
{
- DataRegion = addr >> 12;
+ DataRegion = addr;
addr &= ~1;
@@ -853,7 +865,7 @@ void ARMv5::DataWrite16(u32 addr, u16 val)
DataCycles = 1;
*(u16*)&ITCM[addr & 0x7FFF] = val;
#ifdef JIT_ENABLED
- ARMJIT::InvalidateITCM(addr & 0x7FFF);
+ ARMJIT::InvalidateITCMIfNecessary(addr);
#endif
return;
}
@@ -870,7 +882,7 @@ void ARMv5::DataWrite16(u32 addr, u16 val)
void ARMv5::DataWrite32(u32 addr, u32 val)
{
- DataRegion = addr >> 12;
+ DataRegion = addr;
addr &= ~3;
@@ -879,7 +891,7 @@ void ARMv5::DataWrite32(u32 addr, u32 val)
DataCycles = 1;
*(u32*)&ITCM[addr & 0x7FFF] = val;
#ifdef JIT_ENABLED
- ARMJIT::InvalidateITCM(addr & 0x7FFF);
+ ARMJIT::InvalidateITCMIfNecessary(addr);
#endif
return;
}
@@ -903,7 +915,7 @@ void ARMv5::DataWrite32S(u32 addr, u32 val)
DataCycles += 1;
*(u32*)&ITCM[addr & 0x7FFF] = val;
#ifdef JIT_ENABLED
- ARMJIT::InvalidateITCM(addr & 0x7FFF);
+ ARMJIT::InvalidateITCMIfNecessary(addr);
#endif
return;
}
diff --git a/src/NDS.cpp b/src/NDS.cpp
index 141c565..6e989a8 100644
--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@@ -535,10 +535,6 @@ void Reset()
KeyCnt = 0;
RCnt = 0;
-#ifdef JIT_ENABLED
- ARMJIT::ResetBlockCache();
-#endif
-
NDSCart::Reset();
GBACart::Reset();
GPU::Reset();
@@ -548,6 +544,10 @@ void Reset()
Wifi::Reset();
AREngine::Reset();
+
+#ifdef JIT_ENABLED
+ ARMJIT::Reset();
+#endif
}
void Stop()
@@ -1058,6 +1058,9 @@ void Halt()
void MapSharedWRAM(u8 val)
{
+ if (val == WRAMCnt)
+ return;
+
WRAMCnt = val;
switch (WRAMCnt & 0x3)
@@ -1090,6 +1093,11 @@ void MapSharedWRAM(u8 val)
SWRAM_ARM7Mask = 0x7FFF;
break;
}
+
+#ifdef JIT_ENABLED
+ ARMJIT::UpdateMemoryStatus9(0x3000000, 0x3000000 + 0x1000000);
+ ARMJIT::UpdateMemoryStatus7(0x3000000, 0x3000000 + 0x1000000);
+#endif
}
@@ -1873,12 +1881,18 @@ void ARM9Write8(u32 addr, u8 val)
switch (addr & 0xFF000000)
{
case 0x02000000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateMainRAMIfNecessary(addr);
+#endif
*(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
return;
case 0x03000000:
if (SWRAM_ARM9)
{
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateSWRAM9IfNecessary(addr);
+#endif
*(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
}
return;
@@ -1923,12 +1937,18 @@ void ARM9Write16(u32 addr, u16 val)
switch (addr & 0xFF000000)
{
case 0x02000000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateMainRAMIfNecessary(addr);
+#endif
*(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
return;
case 0x03000000:
if (SWRAM_ARM9)
{
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateSWRAM9IfNecessary(addr);
+#endif
*(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
}
return;
@@ -1949,7 +1969,12 @@ void ARM9Write16(u32 addr, u16 val)
case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); return;
- default: GPU::WriteVRAM_LCDC<u16>(addr, val); return;
+ default:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateLCDCIfNecessary(addr);
+#endif
+ GPU::WriteVRAM_LCDC<u16>(addr, val);
+ return;
}
case 0x07000000:
@@ -1989,12 +2014,18 @@ void ARM9Write32(u32 addr, u32 val)
switch (addr & 0xFF000000)
{
case 0x02000000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateMainRAMIfNecessary(addr);
+#endif
*(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
return ;
case 0x03000000:
if (SWRAM_ARM9)
{
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateSWRAM9IfNecessary(addr);
+#endif
*(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
}
return;
@@ -2015,7 +2046,12 @@ void ARM9Write32(u32 addr, u32 val)
case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return;
- default: GPU::WriteVRAM_LCDC<u32>(addr, val); return;
+ default:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateLCDCIfNecessary(addr);
+#endif
+ GPU::WriteVRAM_LCDC<u32>(addr, val);
+ return;
}
case 0x07000000:
@@ -2279,30 +2315,38 @@ u32 ARM7Read32(u32 addr)
void ARM7Write8(u32 addr, u8 val)
{
-#ifdef JIT_ENABLED
- ARMJIT::InvalidateByAddr7(addr);
-#endif
-
switch (addr & 0xFF800000)
{
case 0x02000000:
case 0x02800000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateMainRAMIfNecessary(addr);
+#endif
*(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
return;
case 0x03000000:
if (SWRAM_ARM7)
{
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateSWRAM7IfNecessary(addr);
+#endif
*(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
return;
}
else
{
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+#endif
*(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
return;
}
case 0x03800000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+#endif
*(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
return;
@@ -2312,6 +2356,9 @@ void ARM7Write8(u32 addr, u8 val)
case 0x06000000:
case 0x06800000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
+#endif
GPU::WriteVRAM_ARM7<u8>(addr, val);
return;
@@ -2342,30 +2389,38 @@ void ARM7Write8(u32 addr, u8 val)
void ARM7Write16(u32 addr, u16 val)
{
-#ifdef JIT_ENABLED
- ARMJIT::InvalidateByAddr7(addr);
-#endif
-
switch (addr & 0xFF800000)
{
case 0x02000000:
case 0x02800000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateMainRAMIfNecessary(addr);
+#endif
*(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
return;
case 0x03000000:
if (SWRAM_ARM7)
{
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateSWRAM7IfNecessary(addr);
+#endif
*(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
return;
}
else
{
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+#endif
*(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
return;
}
case 0x03800000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+#endif
*(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
return;
@@ -2383,6 +2438,9 @@ void ARM7Write16(u32 addr, u16 val)
case 0x06000000:
case 0x06800000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
+#endif
GPU::WriteVRAM_ARM7<u16>(addr, val);
return;
@@ -2415,30 +2473,38 @@ void ARM7Write16(u32 addr, u16 val)
void ARM7Write32(u32 addr, u32 val)
{
-#ifdef JIT_ENABLED
- ARMJIT::InvalidateByAddr7(addr);
-#endif
-
switch (addr & 0xFF800000)
{
case 0x02000000:
case 0x02800000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateMainRAMIfNecessary(addr);
+#endif
*(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
return;
case 0x03000000:
if (SWRAM_ARM7)
{
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateSWRAM7IfNecessary(addr);
+#endif
*(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
return;
}
else
{
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+#endif
*(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
return;
}
case 0x03800000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
+#endif
*(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
return;
@@ -2457,6 +2523,9 @@ void ARM7Write32(u32 addr, u32 val)
case 0x06000000:
case 0x06800000:
+#ifdef JIT_ENABLED
+ ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
+#endif
GPU::WriteVRAM_ARM7<u32>(addr, val);
return;
diff --git a/src/NDS.h b/src/NDS.h
index c7b455e..163260b 100644
--- a/src/NDS.h
+++ b/src/NDS.h
@@ -120,6 +120,14 @@ extern u8 ROMSeed1[2*8];
extern u8 ARM9BIOS[0x1000];
extern u8 ARM7BIOS[0x4000];
+extern u8 SharedWRAM[0x8000];
+extern u8* SWRAM_ARM9;
+extern u8* SWRAM_ARM7;
+extern u32 SWRAM_ARM9Mask;
+extern u32 SWRAM_ARM7Mask;
+
+extern u8 ARM7WRAM[0x10000];
+
#define MAIN_RAM_SIZE 0x400000
extern u8 MainRAM[MAIN_RAM_SIZE];