aboutsummaryrefslogtreecommitdiff
path: root/src/ARMJIT_x64
diff options
context:
space:
mode:
authorRSDuck <rsduck@users.noreply.github.com>2020-05-09 00:45:05 +0200
committerRSDuck <rsduck@users.noreply.github.com>2020-05-09 00:45:05 +0200
commit0f53a34551d60964345debb1766f81ca4686eb17 (patch)
treec3d004adff258e1060e72076e829c99cd49b368c /src/ARMJIT_x64
parentbcc4b5c8dda5ec91127808a525e2b7dbda41a4f3 (diff)
rewrite JIT memory emulation
Diffstat (limited to 'src/ARMJIT_x64')
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp43
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.h34
-rw-r--r--src/ARMJIT_x64/ARMJIT_LoadStore.cpp935
3 files changed, 538 insertions, 474 deletions
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index dd20e3c..eee2e0f 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -195,26 +195,6 @@ Compiler::Compiler()
Reset();
- for (int i = 0; i < 3; i++)
- {
- for (int j = 0; j < 2; j++)
- MemoryFuncs9[i][j] = Gen_MemoryRoutine9(j, 8 << i);
- }
- MemoryFuncs7[0][0] = (void*)NDS::ARM7Read8;
- MemoryFuncs7[0][1] = (void*)NDS::ARM7Write8;
- MemoryFuncs7[1][0] = (void*)NDS::ARM7Read16;
- MemoryFuncs7[1][1] = (void*)NDS::ARM7Write16;
- MemoryFuncs7[2][0] = (void*)NDS::ARM7Read32;
- MemoryFuncs7[2][1] = (void*)NDS::ARM7Write32;
-
- for (int i = 0; i < 2; i++)
- for (int j = 0; j < 2; j++)
- {
- MemoryFuncsSeq9[i][j] = Gen_MemoryRoutineSeq9(i, j);
- MemoryFuncsSeq7[i][j][0] = Gen_MemoryRoutineSeq7(i, j, false);
- MemoryFuncsSeq7[i][j][1] = Gen_MemoryRoutineSeq7(i, j, true);
- }
-
{
// RSCRATCH mode
// RSCRATCH2 reg number
@@ -317,6 +297,12 @@ Compiler::Compiler()
// move the region forward to prevent overwriting the generated functions
CodeMemSize -= GetWritableCodePtr() - ResetStart;
ResetStart = GetWritableCodePtr();
+
+ NearStart = ResetStart;
+ FarStart = ResetStart + 1024*1024*24;
+
+ NearSize = FarStart - ResetStart;
+ FarSize = (ResetStart + CodeMemSize) - FarStart;
}
void Compiler::LoadCPSR()
@@ -504,6 +490,9 @@ void Compiler::Reset()
{
memset(ResetStart, 0xcc, CodeMemSize);
SetCodePtr(ResetStart);
+
+ NearCode = NearStart;
+ FarCode = FarStart;
}
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
@@ -544,8 +533,16 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken)
JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
- if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
+ if (NearSize - (NearCode - NearStart) < 1024 * 32) // guess...
+ {
+ printf("near reset\n");
+ ResetBlockCache();
+ }
+ if (FarSize - (FarCode - FarStart) < 1024 * 32) // guess...
+ {
+ printf("far reset\n");
ResetBlockCache();
+ }
ConstantCycles = 0;
Thumb = thumb;
@@ -762,12 +759,14 @@ void Compiler::Comp_AddCycles_CDI()
Comp_AddCycles_CD();
else
{
+ IrregularCycles = true;
+
s32 cycles;
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
- if ((CurInstr.DataRegion >> 4) == 0x02) // mainRAM
+ if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
cycles = numC + numD;
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
index e0a4978..9df218b 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -140,7 +140,7 @@ public:
};
void Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags);
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
- void Comp_MemLoadLiteral(int size, int rd, u32 addr);
+ bool Comp_MemLoadLiteral(int size, int rd, u32 addr);
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
@@ -154,12 +154,6 @@ public:
void Comp_SpecialBranchBehaviour(bool taken);
- void* Gen_MemoryRoutine9(bool store, int size);
-
- void* Gen_MemoryRoutineSeq9(bool store, bool preinc);
- void* Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM);
-
- void* Gen_ChangeCPSRRoutine();
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
@@ -193,6 +187,26 @@ public:
return (u8*)entry - ResetStart;
}
+ void SwitchToNearCode()
+ {
+ FarCode = GetWritableCodePtr();
+ SetCodePtr(NearCode);
+ }
+
+ void SwitchToFarCode()
+ {
+ NearCode = GetWritableCodePtr();
+ SetCodePtr(FarCode);
+ }
+
+ u8* FarCode;
+ u8* NearCode;
+ u32 FarSize;
+ u32 NearSize;
+
+ u8* NearStart;
+ u8* FarStart;
+
u8* ResetStart;
u32 CodeMemSize;
@@ -201,12 +215,6 @@ public:
void* BranchStub[2];
- void* MemoryFuncs9[3][2];
- void* MemoryFuncs7[3][2];
-
- void* MemoryFuncsSeq9[2][2];
- void* MemoryFuncsSeq7[2][2][2];
-
void* ReadBanked;
void* WriteBanked;
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
index b595e32..c13b779 100644
--- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -25,236 +25,17 @@ int squeezePointer(T* ptr)
improvement.
*/
-/*
- address - ABI_PARAM1 (a.k.a. ECX = RSCRATCH3 on Windows)
- store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows)
-*/
-void* Compiler::Gen_MemoryRoutine9(bool store, int size)
+bool Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
{
- u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
- AlignCode4();
- void* res = GetWritableCodePtr();
-
- MOV(32, R(RSCRATCH), R(ABI_PARAM1));
- SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
- CMP(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
- FixupBranch insideDTCM = J_CC(CC_B);
-
- CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
- FixupBranch insideITCM = J_CC(CC_B);
-
- if (store)
- {
- if (size > 8)
- AND(32, R(ABI_PARAM1), Imm32(addressMask));
- switch (size)
- {
- case 32: JMP((u8*)NDS::ARM9Write32, true); break;
- case 16: JMP((u8*)NDS::ARM9Write16, true); break;
- case 8: JMP((u8*)NDS::ARM9Write8, true); break;
- }
- }
- else
- {
- if (size == 32)
- {
- ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8);
- AND(32, R(ABI_PARAM1), Imm32(addressMask));
- // everything's already in the appropriate register
- ABI_CallFunction(NDS::ARM9Read32);
- ABI_PopRegistersAndAdjustStack({ECX}, 8);
- AND(32, R(ECX), Imm8(3));
- SHL(32, R(ECX), Imm8(3));
- ROR_(32, R(RSCRATCH), R(ECX));
- RET();
- }
- else if (size == 16)
- {
- AND(32, R(ABI_PARAM1), Imm32(addressMask));
- JMP((u8*)NDS::ARM9Read16, true);
- }
- else
- JMP((u8*)NDS::ARM9Read8, true);
- }
-
- SetJumpTarget(insideDTCM);
- AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
- if (store)
- MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2));
- else
- {
- MOVZX(32, size, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
- if (size == 32)
- {
- if (ABI_PARAM1 != ECX)
- MOV(32, R(ECX), R(ABI_PARAM1));
- AND(32, R(ECX), Imm8(3));
- SHL(32, R(ECX), Imm8(3));
- ROR_(32, R(RSCRATCH), R(ECX));
- }
- }
- RET();
+ u32 translatedAddr = Num == 0 ? TranslateAddr9(addr) : TranslateAddr7(addr);
- SetJumpTarget(insideITCM);
- MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX
- AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask));
- if (store)
- {
- MOV(size, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM2));
-
- // if CodeRanges[pseudoPhysical/256].Blocks.Length > 0 we're writing into code!
- static_assert(sizeof(AddressRange) == 16);
- LEA(32, ABI_PARAM1, MDisp(ABI_PARAM3, ExeMemRegionOffsets[exeMem_ITCM]));
- MOV(32, R(RSCRATCH), R(ABI_PARAM1));
- SHR(32, R(RSCRATCH), Imm8(9));
- SHL(32, R(RSCRATCH), Imm8(4));
- CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
- FixupBranch noCode = J_CC(CC_Z);
- JMP((u8*)InvalidateByAddr, true);
- SetJumpTarget(noCode);
- }
- else
+ int invalidLiteralIdx = InvalidLiterals.Find(translatedAddr);
+ if (invalidLiteralIdx != -1)
{
- MOVZX(32, size, RSCRATCH, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)));
- if (size == 32)
- {
- if (ABI_PARAM1 != ECX)
- MOV(32, R(ECX), R(ABI_PARAM1));
- AND(32, R(ECX), Imm8(3));
- SHL(32, R(ECX), Imm8(3));
- ROR_(32, R(RSCRATCH), R(ECX));
- }
+ InvalidLiterals.Remove(invalidLiteralIdx);
+ return false;
}
- RET();
-
- static_assert(RSCRATCH == EAX, "Someone changed RSCRATCH!");
-
- return res;
-}
-
-#define MEMORY_SEQ_WHILE_COND \
- if (!store) \
- MOV(32, currentElement, R(EAX));\
- if (!preinc) \
- ADD(32, R(ABI_PARAM1), Imm8(4)); \
- \
- SUB(32, R(ABI_PARAM3), Imm8(1)); \
- J_CC(CC_NZ, repeat);
-
-/*
- ABI_PARAM1 address
- ABI_PARAM2 address where registers are stored
- ABI_PARAM3 how many values to read/write
-
- Dolphin x64CodeEmitter is my favourite assembler
- */
-void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
-{
- void* res = (void*)GetWritableCodePtr();
-
- const u8* repeat = GetCodePtr();
-
- if (preinc)
- ADD(32, R(ABI_PARAM1), Imm8(4));
- MOV(32, R(RSCRATCH), R(ABI_PARAM1));
- SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
- CMP(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
- FixupBranch insideDTCM = J_CC(CC_B);
-
- CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
- FixupBranch insideITCM = J_CC(CC_B);
-
- OpArg currentElement = MComplex(ABI_PARAM2, ABI_PARAM3, SCALE_8, -8); // wasting stack space like a gangster
-
- ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
- AND(32, R(ABI_PARAM1), Imm8(~3));
- if (store)
- {
- MOV(32, R(ABI_PARAM2), currentElement);
- CALL((void*)NDS::ARM9Write32);
- }
- else
- CALL((void*)NDS::ARM9Read32);
- ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
-
- MEMORY_SEQ_WHILE_COND
- RET();
-
- SetJumpTarget(insideDTCM);
- AND(32, R(RSCRATCH), Imm32(0x3FFF & ~3));
- if (store)
- {
- MOV(32, R(ABI_PARAM4), currentElement);
- MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM4));
- }
- else
- MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
-
- MEMORY_SEQ_WHILE_COND
- RET();
-
- SetJumpTarget(insideITCM);
- MOV(32, R(RSCRATCH), R(ABI_PARAM1));
- AND(32, R(RSCRATCH), Imm32(0x7FFF & ~3));
- if (store)
- {
- MOV(32, R(ABI_PARAM4), currentElement);
- MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM4));
-
- ADD(32, R(RSCRATCH), Imm32(ExeMemRegionOffsets[exeMem_ITCM]));
- MOV(32, R(ABI_PARAM4), R(RSCRATCH));
- SHR(32, R(RSCRATCH), Imm8(9));
- SHL(32, R(RSCRATCH), Imm8(4));
- CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
- FixupBranch noCode = J_CC(CC_Z);
- ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
- MOV(32, R(ABI_PARAM1), R(ABI_PARAM4));
- CALL((u8*)InvalidateByAddr);
- ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
- SetJumpTarget(noCode);
- }
- else
- MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)));
-
- MEMORY_SEQ_WHILE_COND
- RET();
-
- return res;
-}
-
-void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM)
-{
- void* res = (void*)GetWritableCodePtr();
-
- const u8* repeat = GetCodePtr();
-
- if (preinc)
- ADD(32, R(ABI_PARAM1), Imm8(4));
-
- OpArg currentElement = MComplex(ABI_PARAM2, ABI_PARAM3, SCALE_8, -8);
-
- ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
- AND(32, R(ABI_PARAM1), Imm8(~3));
- if (store)
- {
- MOV(32, R(ABI_PARAM2), currentElement);
- CALL((void*)NDS::ARM7Write32);
- }
- else
- CALL((void*)NDS::ARM7Read32);
- ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
-
- MEMORY_SEQ_WHILE_COND
- RET();
-
- return res;
-}
-
-#undef MEMORY_SEQ_WHILE_COND
-
-void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
-{
u32 val;
// make sure arm7 bios is accessible
u32 tmpR15 = CurCPU->R[15];
@@ -276,12 +57,10 @@ void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
RegCache.PutLiteral(rd, val);
Comp_AddCycles_CDI();
+
+ return true;
}
-/*void fault(u32 a, u32 b, u32 c, u32 d)
-{
- printf("actually not static! %x %x %x %x\n", a, b, c, d);
-}*/
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
{
@@ -291,17 +70,12 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
if (size == 16)
addressMask = ~1;
- //bool check = false;
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
- u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
-
- if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
- {
- Comp_MemLoadLiteral(size, rd, addr);
+
+ if (Comp_MemLoadLiteral(size, rd, addr))
return;
- }
}
{
@@ -314,173 +88,334 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
Comp_AddCycles_CDI();
}
+ bool addrIsStatic = Config::JIT_LiteralOptimisations
+ && RegCache.IsLiteral(rn) && op2.IsImm && !(flags & (memop_Writeback|memop_Post));
+ u32 staticAddress;
+ if (addrIsStatic)
+ staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
OpArg rdMapped = MapReg(rd);
- OpArg rnMapped = MapReg(rn);
- if (Thumb && rn == 15)
- rnMapped = Imm32(R15 & ~0x2);
-
- bool inlinePreparation = Num == 1;
- u32 constLocalROR32 = 4;
-
- void* memoryFunc = Num == 0
- ? MemoryFuncs9[size >> 4][!!(flags & memop_Store)]
- : MemoryFuncs7[size >> 4][!!((flags & memop_Store))];
- if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && op2.IsImm && RegCache.IsLiteral(rn))
+ if (!addrIsStatic)
{
- u32 addr = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
-
- /*MOV(32, R(ABI_PARAM1), Imm32(CurInstr.Instr));
- MOV(32, R(ABI_PARAM1), Imm32(R15));
- MOV_sum(32, RSCRATCH, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
- CMP(32, R(RSCRATCH), Imm32(addr));
- FixupBranch eq = J_CC(CC_E);
- CALL((void*)fault);
- SetJumpTarget(eq);*/
-
- NDS::MemRegion region;
- region.Mem = NULL;
- if (Num == 0)
+ OpArg rnMapped = MapReg(rn);
+ if (Thumb && rn == 15)
+ rnMapped = Imm32(R15 & ~0x2);
+
+ X64Reg finalAddr = RSCRATCH3;
+ if (flags & memop_Post)
{
- ARMv5* cpu5 = (ARMv5*)CurCPU;
+ MOV(32, R(RSCRATCH3), rnMapped);
- // stupid dtcm...
- if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
- {
- // disable this for now as DTCM is located in heap
- // which might excced the RIP-addressable range
- //region.Mem = cpu5->DTCM;
- //region.Mask = 0x3FFF;
- }
- else
- {
- NDS::ARM9GetMemRegion(addr, flags & memop_Store, &region);
- }
+ finalAddr = rnMapped.GetSimpleReg();
}
- else
- NDS::ARM7GetMemRegion(addr, flags & memop_Store, &region);
- if (region.Mem != NULL)
+ if (op2.IsImm)
+ {
+ MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
+ }
+ else
{
- void* ptr = &region.Mem[addr & addressMask & region.Mask];
+ OpArg rm = MapReg(op2.Reg.Reg);
- if (flags & memop_Store)
+ if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
+ && op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
{
- MOV(size, M(ptr), MapReg(rd));
+ LEA(32, finalAddr,
+ MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
}
else
{
- if (flags & memop_SignExtend)
- MOVSX(32, size, rdMapped.GetSimpleReg(), M(ptr));
- else
- MOVZX(32, size, rdMapped.GetSimpleReg(), M(ptr));
+ bool throwAway;
+ OpArg offset =
+ Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
- if (size == 32 && addr & ~0x3)
+ if (flags & memop_SubtractOffset)
{
- ROR_(32, rdMapped, Imm8((addr & 0x3) << 3));
+ if (R(finalAddr) != rnMapped)
+ MOV(32, R(finalAddr), rnMapped);
+ if (!offset.IsZero())
+ SUB(32, R(finalAddr), offset);
}
+ else
+ MOV_sum(32, finalAddr, rnMapped, offset);
}
-
- return;
}
- void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
- if (specialFunc)
- {
- memoryFunc = specialFunc;
- inlinePreparation = true;
- constLocalROR32 = addr & 0x3;
- }
+ if ((flags & memop_Writeback) && !(flags & memop_Post))
+ MOV(32, rnMapped, R(finalAddr));
}
- X64Reg finalAddr = ABI_PARAM1;
- if (flags & memop_Post)
- {
- MOV(32, R(ABI_PARAM1), rnMapped);
+ int expectedTarget = Num == 0
+ ? ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
+ : ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
+ if (CurInstr.Cond() < 0xE)
+ expectedTarget = memregion_Other;
+
+ bool compileFastPath = false, compileSlowPath = !addrIsStatic || (flags & memop_Store);
- finalAddr = rnMapped.GetSimpleReg();
+ switch (expectedTarget)
+ {
+ case memregion_MainRAM:
+ case memregion_DTCM:
+ case memregion_WRAM7:
+ case memregion_SWRAM9:
+ case memregion_SWRAM7:
+ case memregion_IO9:
+ case memregion_IO7:
+ case memregion_VWRAM:
+ compileFastPath = true;
+ break;
+ case memregion_Wifi:
+ compileFastPath = size >= 16;
+ break;
+ case memregion_VRAM:
+ compileFastPath = !(flags & memop_Store) || size >= 16;
+ case memregion_BIOS9:
+ compileFastPath = !(flags & memop_Store);
+ break;
+ default: break;
}
- if (op2.IsImm)
+ if (addrIsStatic && !compileFastPath)
{
- MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
+ compileFastPath = false;
+ compileSlowPath = true;
}
- else
+
+ if (addrIsStatic && compileSlowPath)
+ MOV(32, R(RSCRATCH3), Imm32(staticAddress));
+
+ if (compileFastPath)
{
- OpArg rm = MapReg(op2.Reg.Reg);
+ FixupBranch slowPath;
+ if (compileSlowPath)
+ {
+ MOV(32, R(RSCRATCH), R(RSCRATCH3));
+ SHR(32, R(RSCRATCH), Imm8(9));
+ if (flags & memop_Store)
+ {
+ CMP(8, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)), Imm8(expectedTarget));
+ }
+ else
+ {
+ MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)));
+ AND(32, R(RSCRATCH), Imm8(~0x80));
+ CMP(32, R(RSCRATCH), Imm8(expectedTarget));
+ }
+
+ slowPath = J_CC(CC_NE, true);
+ }
- if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
- && op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
+ if (expectedTarget == memregion_MainRAM || expectedTarget == memregion_WRAM7
+ || expectedTarget == memregion_BIOS9)
{
- LEA(32, finalAddr,
- MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
+ u8* data;
+ u32 mask;
+ if (expectedTarget == memregion_MainRAM)
+ {
+ data = NDS::MainRAM;
+ mask = MAIN_RAM_SIZE - 1;
+ }
+ else if (expectedTarget == memregion_BIOS9)
+ {
+ data = NDS::ARM9BIOS;
+ mask = 0xFFF;
+ }
+ else
+ {
+ data = NDS::ARM7WRAM;
+ mask = 0xFFFF;
+ }
+ OpArg memLoc;
+ if (addrIsStatic)
+ {
+ memLoc = M(data + ((staticAddress & mask & addressMask)));
+ }
+ else
+ {
+ MOV(32, R(RSCRATCH), R(RSCRATCH3));
+ AND(32, R(RSCRATCH), Imm32(mask & addressMask));
+ memLoc = MDisp(RSCRATCH, squeezePointer(data));
+ }
+ if (flags & memop_Store)
+ MOV(size, memLoc, rdMapped);
+ else if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
}
- else
+ else if (expectedTarget == memregion_DTCM)
+ {
+ if (addrIsStatic)
+ MOV(32, R(RSCRATCH), Imm32(staticAddress));
+ else
+ MOV(32, R(RSCRATCH), R(RSCRATCH3));
+ SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
+ AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
+ OpArg memLoc = MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM));
+ if (flags & memop_Store)
+ MOV(size, memLoc, rdMapped);
+ else if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
+ }
+ else if (expectedTarget == memregion_SWRAM9 || expectedTarget == memregion_SWRAM7)
{
- bool throwAway;
- OpArg offset =
- Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
-
- if (flags & memop_SubtractOffset)
+ MOV(64, R(RSCRATCH2), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9 : &NDS::SWRAM_ARM7));
+ if (addrIsStatic)
{
- if (R(finalAddr) != rnMapped)
- MOV(32, R(finalAddr), rnMapped);
- if (!offset.IsZero())
- SUB(32, R(finalAddr), offset);
+ MOV(32, R(RSCRATCH), Imm32(staticAddress & addressMask));
}
else
- MOV_sum(32, finalAddr, rnMapped, offset);
+ {
+ MOV(32, R(RSCRATCH), R(RSCRATCH3));
+ AND(32, R(RSCRATCH), Imm8(addressMask));
+ }
+ AND(32, R(RSCRATCH), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9Mask : &NDS::SWRAM_ARM7Mask));
+ OpArg memLoc = MRegSum(RSCRATCH, RSCRATCH2);
+ if (flags & memop_Store)
+ MOV(size, memLoc, rdMapped);
+ else if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
}
- }
+ else
+ {
+ u32 maskedDataRegion;
- if ((flags & memop_Writeback) && !(flags & memop_Post))
- MOV(32, rnMapped, R(finalAddr));
+ if (addrIsStatic)
+ {
+ maskedDataRegion = staticAddress;
+ MOV(32, R(ABI_PARAM1), Imm32(staticAddress));
+ }
+ else
+ {
+ if (ABI_PARAM1 != RSCRATCH3)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ AND(32, R(ABI_PARAM1), Imm8(addressMask));
- if (flags & memop_Store)
- MOV(32, R(ABI_PARAM2), rdMapped);
+ maskedDataRegion = CurInstr.DataRegion;
+ if (Num == 0)
+ maskedDataRegion &= ~0xFFFFFF;
+ else
+ maskedDataRegion &= ~0x7FFFFF;
+ }
- if (!(flags & memop_Store) && inlinePreparation && constLocalROR32 == 4 && size == 32)
- MOV(32, rdMapped, R(ABI_PARAM1));
+ void* func = GetFuncForAddr(CurCPU, maskedDataRegion, flags & memop_Store, size);
- if (inlinePreparation && size > 8)
- AND(32, R(ABI_PARAM1), Imm8(addressMask));
+ if (flags & memop_Store)
+ {
+ MOV(32, R(ABI_PARAM2), rdMapped);
- CALL(memoryFunc);
+ ABI_CallFunction((void(*)())func);
+ }
+ else
+ {
+ if (!addrIsStatic)
+ MOV(32, rdMapped, R(RSCRATCH3));
- /*if (Num == 0 && check)
- {
- CMP(32, R(EAX), rdMapped);
- FixupBranch notEqual = J_CC(CC_E);
- ABI_PushRegistersAndAdjustStack({RSCRATCH}, 0);
- MOV(32, R(ABI_PARAM1), Imm32(R15 - (Thumb ? 4 : 8)));
- MOV(32, R(ABI_PARAM2), R(EAX));
- MOV(32, R(ABI_PARAM3), rdMapped);
- MOV(32, R(ABI_PARAM4), Imm32(CurInstr.Instr));
- CALL((u8*)fault);
- ABI_PopRegistersAndAdjustStack({RSCRATCH}, 0);
- SetJumpTarget(notEqual);
- }*/
-
- if (!(flags & memop_Store))
- {
- if (inlinePreparation && size == 32)
+ ABI_CallFunction((void(*)())func);
+
+ if (!addrIsStatic)
+ MOV(32, R(RSCRATCH3), rdMapped);
+
+ if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ }
+ }
+
+ if ((size == 32 && !(flags & memop_Store)))
{
- if (constLocalROR32 == 4)
+ if (addrIsStatic)
+ {
+ if (staticAddress & 0x3)
+ ROR_(32, rdMapped, Imm8((staticAddress & 0x3) * 8));
+ }
+ else
{
- static_assert(RSCRATCH3 == ECX);
- MOV(32, R(ECX), rdMapped);
- AND(32, R(ECX), Imm8(3));
- SHL(32, R(ECX), Imm8(3));
- ROR_(32, R(RSCRATCH), R(ECX));
+ AND(32, R(RSCRATCH3), Imm8(0x3));
+ SHL(32, R(RSCRATCH3), Imm8(3));
+ ROR_(32, rdMapped, R(RSCRATCH3));
}
- else if (constLocalROR32 != 0)
- ROR_(32, R(RSCRATCH), Imm8(constLocalROR32 << 3));
}
- if (flags & memop_SignExtend)
- MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ if (compileSlowPath)
+ {
+ SwitchToFarCode();
+ SetJumpTarget(slowPath);
+ }
+ }
+
+ if (compileSlowPath)
+ {
+ if (Num == 0)
+ {
+ MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+ if (flags & memop_Store)
+ {
+ MOV(32, R(ABI_PARAM3), rdMapped);
+
+ switch (size)
+ {
+ case 32: CALL((void*)&SlowWrite9<u32>); break;
+ case 16: CALL((void*)&SlowWrite9<u16>); break;
+ case 8: CALL((void*)&SlowWrite9<u8>); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 32: CALL((void*)&SlowRead9<u32>); break;
+ case 16: CALL((void*)&SlowRead9<u16>); break;
+ case 8: CALL((void*)&SlowRead9<u8>); break;
+ }
+ }
+ }
else
- MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ {
+ if (ABI_PARAM1 != RSCRATCH3)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ if (flags & memop_Store)
+ {
+ MOV(32, R(ABI_PARAM2), rdMapped);
+
+ switch (size)
+ {
+ case 32: CALL((void*)&SlowWrite7<u32>); break;
+ case 16: CALL((void*)&SlowWrite7<u16>); break;
+ case 8: CALL((void*)&SlowWrite7<u8>); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 32: CALL((void*)&SlowRead7<u32>); break;
+ case 16: CALL((void*)&SlowRead7<u16>); break;
+ case 8: CALL((void*)&SlowRead7<u8>); break;
+ }
+ }
+ }
+ if (!(flags & memop_Store))
+ {
+ if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ }
+ }
+
+ if (compileFastPath && compileSlowPath)
+ {
+ FixupBranch ret = J(true);
+ SwitchToNearCode();
+ SetJumpTarget(ret);
}
if (!(flags & memop_Store) && rd == 15)
@@ -498,100 +433,160 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
{
- IrregularCycles = true;
-
int regsCount = regs.Count();
s32 offset = (regsCount * 4) * (decrement ? -1 : 1);
// we need to make sure that the stack stays aligned to 16 bytes
+#ifdef _WIN32
+ // include shadow
+ u32 stackAlloc = ((regsCount + 4 + 1) & ~1) * 8;
+#else
u32 stackAlloc = ((regsCount + 1) & ~1) * 8;
+#endif
+ u32 allocOffset = stackAlloc - regsCount * 8;
- if (!store)
+ int expectedTarget = Num == 0
+ ? ClassifyAddress9(CurInstr.DataRegion)
+ : ClassifyAddress7(CurInstr.DataRegion);
+ if (usermode || CurInstr.Cond() < 0xE)
+ expectedTarget = memregion_Other;
+
+ bool compileFastPath = false;
+
+ switch (expectedTarget)
{
+ case memregion_DTCM:
+ case memregion_MainRAM:
+ case memregion_SWRAM9:
+ case memregion_SWRAM7:
+ case memregion_WRAM7:
+ compileFastPath = true;
+ break;
+ default:
+ break;
+ }
+
+ if (!store)
Comp_AddCycles_CDI();
+ else
+ Comp_AddCycles_CD();
- if (decrement)
+ if (decrement)
+ {
+ MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(-regsCount * 4));
+ preinc ^= true;
+ }
+ else
+ MOV(32, R(RSCRATCH4), MapReg(rn));
+
+ if (compileFastPath)
+ {
+ assert(!usermode);
+
+ MOV(32, R(RSCRATCH), R(RSCRATCH4));
+ SHR(32, R(RSCRATCH), Imm8(9));
+
+ if (store)
{
- MOV_sum(32, ABI_PARAM1, MapReg(rn), Imm32(-regsCount * 4));
- preinc ^= true;
+ CMP(8, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)), Imm8(expectedTarget));
}
else
- MOV(32, R(ABI_PARAM1), MapReg(rn));
-
- MOV(32, R(ABI_PARAM3), Imm32(regsCount));
- SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
- MOV(64, R(ABI_PARAM2), R(RSP));
-
- CALL(Num == 0
- ? MemoryFuncsSeq9[0][preinc]
- : MemoryFuncsSeq7[0][preinc][CodeRegion == 0x02]);
+ {
+ MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)));
+ AND(32, R(RSCRATCH), Imm8(~0x80));
+ CMP(32, R(RSCRATCH), Imm8(expectedTarget));
+ }
+ FixupBranch slowPath = J_CC(CC_NE, true);
- bool firstUserMode = true;
- for (int reg = 15; reg >= 0; reg--)
+ if (expectedTarget == memregion_DTCM)
{
- if (regs[reg])
+ SUB(32, R(RSCRATCH4), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
+ AND(32, R(RSCRATCH4), Imm32(0x3FFF & ~3));
+ LEA(64, RSCRATCH4, MComplex(RCPU, RSCRATCH4, 1, offsetof(ARMv5, DTCM)));
+ }
+ else if (expectedTarget == memregion_MainRAM)
+ {
+ AND(32, R(RSCRATCH4), Imm32((MAIN_RAM_SIZE - 1) & ~3));
+ ADD(64, R(RSCRATCH4), Imm32(squeezePointer(NDS::MainRAM)));
+ }
+ else if (expectedTarget == memregion_WRAM7)
+ {
+ AND(32, R(RSCRATCH4), Imm32(0xFFFF & ~3));
+ ADD(64, R(RSCRATCH4), Imm32(squeezePointer(NDS::ARM7WRAM)));
+ }
+ else // SWRAM
+ {
+ AND(32, R(RSCRATCH4), Imm8(~3));
+ AND(32, R(RSCRATCH4), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9Mask : &NDS::SWRAM_ARM7Mask));
+ ADD(64, R(RSCRATCH4), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9 : &NDS::SWRAM_ARM7));
+ }
+ u32 offset = 0;
+ for (int reg : regs)
+ {
+ if (preinc)
+ offset += 4;
+ OpArg mem = MDisp(RSCRATCH4, offset);
+ if (store)
{
- if (usermode && !regs[15] && reg >= 8 && reg < 15)
+ if (RegCache.LoadedRegs & (1 << reg))
{
- if (firstUserMode)
- {
- MOV(32, R(RSCRATCH), R(RCPSR));
- AND(32, R(RSCRATCH), Imm8(0x1F));
- firstUserMode = false;
- }
- MOV(32, R(RSCRATCH2), Imm32(reg - 8));
- POP(RSCRATCH3);
- CALL(WriteBanked);
- FixupBranch sucessfulWritten = J_CC(CC_NC);
- if (RegCache.Mapping[reg] != INVALID_REG)
- MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3));
- else
- SaveReg(reg, RSCRATCH3);
- SetJumpTarget(sucessfulWritten);
+ MOV(32, mem, MapReg(reg));
}
- else if (RegCache.Mapping[reg] == INVALID_REG)
+ else
{
- assert(reg != 15);
-
- POP(RSCRATCH);
- SaveReg(reg, RSCRATCH);
+ LoadReg(reg, RSCRATCH);
+ MOV(32, mem, R(RSCRATCH));
+ }
+ }
+ else
+ {
+ if (RegCache.LoadedRegs & (1 << reg))
+ {
+ MOV(32, MapReg(reg), mem);
}
else
{
- if (reg != 15)
- RegCache.DirtyRegs |= (1 << reg);
- POP(MapReg(reg).GetSimpleReg());
+ MOV(32, R(RSCRATCH), mem);
+ SaveReg(reg, RSCRATCH);
}
}
+ if (!preinc)
+ offset += 4;
}
- if (regsCount & 1)
- POP(RSCRATCH);
+ SwitchToFarCode();
+ SetJumpTarget(slowPath);
+ }
+
+ if (!store)
+ {
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
+ MOV(32, R(ABI_PARAM3), Imm32(regsCount));
+ SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
+ if (allocOffset == 0)
+ MOV(64, R(ABI_PARAM2), R(RSP));
+ else
+ LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
+
+ if (Num == 0)
+ MOV(64, R(ABI_PARAM4), R(RCPU));
- if (regs[15])
+ switch (Num * 2 | preinc)
{
- if (Num == 1)
- {
- if (Thumb)
- OR(32, MapReg(15), Imm8(1));
- else
- AND(32, MapReg(15), Imm8(0xFE));
- }
- Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);
+ case 0: CALL((void*)&SlowBlockTransfer9<false, false>); break;
+ case 1: CALL((void*)&SlowBlockTransfer9<true, false>); break;
+ case 2: CALL((void*)&SlowBlockTransfer7<false, false>); break;
+ case 3: CALL((void*)&SlowBlockTransfer7<true, false>); break;
}
- }
- else
- {
- Comp_AddCycles_CD();
- if (regsCount & 1)
- PUSH(RSCRATCH);
+ if (allocOffset)
+ ADD(64, R(RSP), Imm8(allocOffset));
bool firstUserMode = true;
for (int reg : regs)
{
- if (usermode && reg >= 8 && reg < 15)
+ if (usermode && !regs[15] && reg >= 8 && reg < 15)
{
if (firstUserMode)
{
@@ -599,43 +594,107 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
AND(32, R(RSCRATCH), Imm8(0x1F));
firstUserMode = false;
}
- if (RegCache.Mapping[reg] == INVALID_REG)
- LoadReg(reg, RSCRATCH3);
- else
- MOV(32, R(RSCRATCH3), R(RegCache.Mapping[reg]));
MOV(32, R(RSCRATCH2), Imm32(reg - 8));
- CALL(ReadBanked);
- PUSH(RSCRATCH3);
+ POP(RSCRATCH3);
+ CALL(WriteBanked);
+ FixupBranch sucessfulWritten = J_CC(CC_NC);
+ if (RegCache.LoadedRegs & (1 << reg))
+ MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3));
+ else
+ SaveReg(reg, RSCRATCH3);
+ SetJumpTarget(sucessfulWritten);
}
- else if (RegCache.Mapping[reg] == INVALID_REG)
+ else if (!(RegCache.LoadedRegs & (1 << reg)))
{
- LoadReg(reg, RSCRATCH);
- PUSH(RSCRATCH);
+ assert(reg != 15);
+
+ POP(RSCRATCH);
+ SaveReg(reg, RSCRATCH);
}
else
{
- PUSH(MapReg(reg).GetSimpleReg());
+ POP(MapReg(reg).GetSimpleReg());
}
}
-
- if (decrement)
+ }
+ else
+ {
+ bool firstUserMode = true;
+ for (int reg = 15; reg >= 0; reg--)
{
- MOV_sum(32, ABI_PARAM1, MapReg(rn), Imm32(-regsCount * 4));
- preinc ^= true;
+ if (regs[reg])
+ {
+ if (usermode && reg >= 8 && reg < 15)
+ {
+ if (firstUserMode)
+ {
+ MOV(32, R(RSCRATCH), R(RCPSR));
+ AND(32, R(RSCRATCH), Imm8(0x1F));
+ firstUserMode = false;
+ }
+ if (RegCache.Mapping[reg] == INVALID_REG)
+ LoadReg(reg, RSCRATCH3);
+ else
+ MOV(32, R(RSCRATCH3), R(RegCache.Mapping[reg]));
+ MOV(32, R(RSCRATCH2), Imm32(reg - 8));
+ CALL(ReadBanked);
+ PUSH(RSCRATCH3);
+ }
+ else if (!(RegCache.LoadedRegs & (1 << reg)))
+ {
+ LoadReg(reg, RSCRATCH);
+ PUSH(RSCRATCH);
+ }
+ else
+ {
+ PUSH(MapReg(reg).GetSimpleReg());
+ }
+ }
}
- else
- MOV(32, R(ABI_PARAM1), MapReg(rn));
- MOV(64, R(ABI_PARAM2), R(RSP));
+ if (allocOffset)
+ SUB(64, R(RSP), Imm8(allocOffset));
+
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
+ if (allocOffset)
+ LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
+ else
+ MOV(64, R(ABI_PARAM2), R(RSP));
+
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
+ if (Num == 0)
+ MOV(64, R(ABI_PARAM4), R(RCPU));
- CALL(Num == 0
- ? MemoryFuncsSeq9[1][preinc]
- : MemoryFuncsSeq7[1][preinc][CodeRegion == 0x02]);
+ switch (Num * 2 | preinc)
+ {
+ case 0: CALL((void*)&SlowBlockTransfer9<false, true>); break;
+ case 1: CALL((void*)&SlowBlockTransfer9<true, true>); break;
+ case 2: CALL((void*)&SlowBlockTransfer7<false, true>); break;
+ case 3: CALL((void*)&SlowBlockTransfer7<true, true>); break;
+ }
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
}
+ if (compileFastPath)
+ {
+ FixupBranch ret = J(true);
+ SwitchToNearCode();
+ SetJumpTarget(ret);
+ }
+
+ if (!store && regs[15])
+ {
+ if (Num == 1)
+ {
+ if (Thumb)
+ OR(32, MapReg(15), Imm8(1));
+ else
+ AND(32, MapReg(15), Imm8(0xFE));
+ }
+ Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);
+ }
+
return offset;
}
@@ -786,9 +845,7 @@ void Compiler::T_Comp_LoadPCRel()
{
u32 offset = (CurInstr.Instr & 0xFF) << 2;
u32 addr = (R15 & ~0x2) + offset;
- if (Config::JIT_LiteralOptimisations)
- Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr);
- else
+ if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr))
Comp_MemAccess(CurInstr.T_Reg(8), 15, ComplexOperand(offset), 32, 0);
}