aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRSDuck <rsduck@users.noreply.github.com>2019-11-03 15:33:20 +0100
committerRSDuck <rsduck@users.noreply.github.com>2020-04-26 13:05:06 +0200
commit386100c053adad10ab7de066d37f383d58d5cfa1 (patch)
tree888c2b17c21805dc98bf78941c33180b29f6be17 /src
parent803c61e1266040c631a716a37105615a998a38af (diff)
make literal optimisation more reliable
fixes spanish Pokemon HeartGold
Diffstat (limited to 'src')
-rw-r--r--src/ARMJIT.cpp52
-rw-r--r--src/ARMJIT.h2
-rw-r--r--src/ARMJIT_Internal.h3
-rw-r--r--src/ARMJIT_x64/ARMJIT_LoadStore.cpp34
4 files changed, 77 insertions, 14 deletions
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp
index 0695b85..c7387c9 100644
--- a/src/ARMJIT.cpp
+++ b/src/ARMJIT.cpp
@@ -161,6 +161,27 @@ void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
}
}
+bool DecodeLiteral(const FetchedInstr& instr, u32& addr)
+{
+ switch (instr.Info.Kind)
+ {
+ case ARMInstrInfo::ak_STR_IMM:
+ case ARMInstrInfo::ak_STRB_IMM:
+ addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1));
+ return true;
+ case ARMInstrInfo::ak_STRD_IMM:
+ case ARMInstrInfo::ak_STRH_IMM:
+ addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1));
+ return true;
+ case ARMInstrInfo::ak_STM: // I honestly hope noone was ever crazy enough to do stm pc, {whatever}
+ addr = instr.Addr + 8;
+ return true;
+ default:
+ JIT_DEBUGPRINT("Literal %08x %x not recognised\n", instr.Instr, instr.Addr);
+ return false;
+ }
+}
+
bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link,
u32& linkAddr, u32& targetAddr)
{
@@ -463,6 +484,23 @@ void CompileBlock(ARM* cpu)
instrs[i].DataCycles = cpu->DataCycles;
instrs[i].DataRegion = cpu->DataRegion;
+ if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem
+ && instrs[i].Info.SrcRegs == (1 << 15)
+ && instrs[i].Info.DstRegs == 0)
+ {
+ assert (!thumb);
+
+ u32 addr;
+ if (DecodeLiteral(instrs[i], addr))
+ {
+ JIT_DEBUGPRINT("pc relative write detected\n");
+ u32 translatedAddr = cpu->Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
+
+ ARMJIT::InvalidateByAddr(translatedAddr, false);
+ CodeRanges[translatedAddr / 512].InvalidLiterals |= (1 << ((translatedAddr & 0x1FF) / 16));
+ }
+ }
+
if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0
&& instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1)
{
@@ -631,7 +669,7 @@ void CompileBlock(ARM* cpu)
JitBlocks.Add(block);
}
-void InvalidateByAddr(u32 pseudoPhysical)
+void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
{
JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical);
AddressRange* range = &CodeRanges[pseudoPhysical / 512];
@@ -657,11 +695,14 @@ void InvalidateByAddr(u32 pseudoPhysical)
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
- u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
- if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
- delete RestoreCandidates[slot];
+ if (mayRestore)
+ {
+ u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
+ if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
+ delete RestoreCandidates[slot];
- RestoreCandidates[slot] = block;
+ RestoreCandidates[slot] = block;
+ }
}
if ((range->TimesInvalidated + 1) > range->TimesInvalidated)
range->TimesInvalidated++;
@@ -732,6 +773,7 @@ void ResetBlockCache()
u32 addr = block->AddressRanges()[j];
CodeRanges[addr / 512].Blocks.Clear();
CodeRanges[addr / 512].TimesInvalidated = 0;
+ CodeRanges[addr / 512].InvalidLiterals = 0;
}
delete block;
}
diff --git a/src/ARMJIT.h b/src/ARMJIT.h
index 1db4d66..09cc463 100644
--- a/src/ARMJIT.h
+++ b/src/ARMJIT.h
@@ -61,7 +61,7 @@ inline JitBlockEntry LookUpBlock(u32 addr)
void Init();
void DeInit();
-void InvalidateByAddr(u32 pseudoPhysical);
+void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore = true);
void InvalidateAll();
void InvalidateITCM(u32 addr);
diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h
index 9e6713d..fb05f75 100644
--- a/src/ARMJIT_Internal.h
+++ b/src/ARMJIT_Internal.h
@@ -63,7 +63,7 @@ struct __attribute__((packed)) TinyVector
{
T* Data = NULL;
u16 Capacity = 0;
- u32 Length = 0; // make it 32 bit so we don't need movzx
+ u16 Length = 0;
~TinyVector()
{
@@ -181,6 +181,7 @@ private:
struct __attribute__((packed)) AddressRange
{
TinyVector<JitBlock*> Blocks;
+ u16 InvalidLiterals;
u16 TimesInvalidated;
};
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
index 3799774..82f80a7 100644
--- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -108,7 +108,7 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
SHR(32, R(RSCRATCH), Imm8(9));
SHL(32, R(RSCRATCH), Imm8(4));
- CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
+ CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
FixupBranch noCode = J_CC(CC_Z);
JMP((u8*)InvalidateByAddr, true);
SetJumpTarget(noCode);
@@ -206,7 +206,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
MOV(32, R(ABI_PARAM4), R(RSCRATCH));
SHR(32, R(RSCRATCH), Imm8(9));
SHL(32, R(RSCRATCH), Imm8(4));
- CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
+ CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
FixupBranch noCode = J_CC(CC_Z);
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
MOV(32, R(ABI_PARAM1), R(ABI_PARAM4));
@@ -278,10 +278,10 @@ void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
Comp_AddCycles_CDI();
}
-void fault(u32 a, u32 b)
+/*void fault(u32 a, u32 b, u32 c, u32 d)
{
- printf("actually not static! %x %x\n", a, b);
-}
+ printf("actually not static! %x %x %x %x\n", a, b, c, d);
+}*/
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
{
@@ -291,11 +291,17 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
if (size == 16)
addressMask = ~1;
+ //bool check = false;
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
- Comp_MemLoadLiteral(size, rd, addr);
- return;
+ u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
+
+ if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
+ {
+ Comp_MemLoadLiteral(size, rd, addr);
+ return;
+ }
}
{
@@ -438,6 +444,20 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
CALL(memoryFunc);
+ /*if (Num == 0 && check)
+ {
+ CMP(32, R(EAX), rdMapped);
+ FixupBranch notEqual = J_CC(CC_E);
+ ABI_PushRegistersAndAdjustStack({RSCRATCH}, 0);
+ MOV(32, R(ABI_PARAM1), Imm32(R15 - (Thumb ? 4 : 8)));
+ MOV(32, R(ABI_PARAM2), R(EAX));
+ MOV(32, R(ABI_PARAM3), rdMapped);
+ MOV(32, R(ABI_PARAM4), Imm32(CurInstr.Instr));
+ CALL((u8*)fault);
+ ABI_PopRegistersAndAdjustStack({RSCRATCH}, 0);
+ SetJumpTarget(notEqual);
+ }*/
+
if (!(flags & memop_Store))
{
if (inlinePreparation && size == 32)