aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ARM.cpp411
-rw-r--r--src/ARM.h234
-rw-r--r--src/ARMInterpreter.cpp221
-rw-r--r--src/ARMInterpreter.h35
-rw-r--r--src/ARMInterpreter_ALU.cpp1461
-rw-r--r--src/ARMInterpreter_ALU.h135
-rw-r--r--src/ARMInterpreter_Branch.cpp116
-rw-r--r--src/ARMInterpreter_Branch.h39
-rw-r--r--src/ARMInterpreter_LoadStore.cpp729
-rw-r--r--src/ARMInterpreter_LoadStore.h95
-rw-r--r--src/ARM_InstrTable.h1979
-rw-r--r--src/CP15.cpp300
-rw-r--r--src/CP15.h44
-rw-r--r--src/DMA.cpp269
-rw-r--r--src/DMA.h64
-rw-r--r--src/FIFO.h93
-rw-r--r--src/GPU.cpp732
-rw-r--r--src/GPU.h395
-rw-r--r--src/GPU2D.cpp1604
-rw-r--r--src/GPU2D.h97
-rw-r--r--src/GPU3D.cpp1917
-rw-r--r--src/GPU3D.h98
-rw-r--r--src/GPU3D_Soft.cpp853
-rw-r--r--src/NDS.cpp2192
-rw-r--r--src/NDS.h181
-rw-r--r--src/NDSCart.cpp939
-rw-r--r--src/NDSCart.h55
-rw-r--r--src/RTC.cpp255
-rw-r--r--src/RTC.h36
-rw-r--r--src/SPI.cpp457
-rw-r--r--src/SPI.h46
-rw-r--r--src/Wifi.cpp120
-rw-r--r--src/Wifi.h35
-rw-r--r--src/main.cpp272
-rw-r--r--src/types.h31
35 files changed, 16540 insertions, 0 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp
new file mode 100644
index 0000000..536c78c
--- /dev/null
+++ b/src/ARM.cpp
@@ -0,0 +1,411 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include "NDS.h"
+#include "ARM.h"
+#include "ARMInterpreter.h"
+#include "GPU3D.h"
+
+
+u32 ARM::ConditionTable[16] =
+{
+ 0xF0F0, // EQ
+ 0x0F0F, // NE
+ 0xCCCC, // CS
+ 0x3333, // CC
+ 0xFF00, // MI
+ 0x00FF, // PL
+ 0xAAAA, // VS
+ 0x5555, // VC
+ 0x0C0C, // HI
+ 0xF3F3, // LS
+ 0xAA55, // GE
+ 0x55AA, // LT
+ 0x0A05, // GT
+ 0xF5FA, // LE
+ 0xFFFF, // AL
+ 0x0000 // NE
+};
+
+
+ARM::ARM(u32 num)
+{
+ // well uh
+ Num = num;
+
+ for (int i = 0; i < 16; i++)
+ {
+ Waitstates[0][i] = 1;
+ Waitstates[1][i] = 1;
+ Waitstates[2][i] = 1;
+ Waitstates[3][i] = 1;
+ }
+
+ if (!num)
+ {
+ // ARM9
+ Waitstates[0][0x2] = 1; // main RAM timing, assuming cache hit
+ Waitstates[0][0x3] = 4;
+ Waitstates[0][0x4] = 4;
+ Waitstates[0][0x5] = 5;
+ Waitstates[0][0x6] = 5;
+ Waitstates[0][0x7] = 4;
+ Waitstates[0][0x8] = 19;
+ Waitstates[0][0x9] = 19;
+ Waitstates[0][0xF] = 4;
+
+ Waitstates[1][0x2] = 1;
+ Waitstates[1][0x3] = 8;
+ Waitstates[1][0x4] = 8;
+ Waitstates[1][0x5] = 10;
+ Waitstates[1][0x6] = 10;
+ Waitstates[1][0x7] = 8;
+ Waitstates[1][0x8] = 38;
+ Waitstates[1][0x9] = 38;
+ Waitstates[1][0xF] = 8;
+
+ Waitstates[2][0x2] = 1;
+ Waitstates[2][0x3] = 2;
+ Waitstates[2][0x4] = 2;
+ Waitstates[2][0x5] = 2;
+ Waitstates[2][0x6] = 2;
+ Waitstates[2][0x7] = 2;
+ Waitstates[2][0x8] = 12;
+ Waitstates[2][0x9] = 12;
+ Waitstates[2][0xA] = 20;
+ Waitstates[2][0xF] = 2;
+
+ Waitstates[3][0x2] = 1;
+ Waitstates[3][0x3] = 2;
+ Waitstates[3][0x4] = 2;
+ Waitstates[3][0x5] = 4;
+ Waitstates[3][0x6] = 4;
+ Waitstates[3][0x7] = 2;
+ Waitstates[3][0x8] = 24;
+ Waitstates[3][0x9] = 24;
+ Waitstates[3][0xA] = 20;
+ Waitstates[3][0xF] = 2;
+ }
+ else
+ {
+ // ARM7
+ Waitstates[0][0x0] = 1;
+ Waitstates[0][0x2] = 1;
+ Waitstates[0][0x3] = 1;
+ Waitstates[0][0x4] = 1;
+ Waitstates[0][0x6] = 1;
+ Waitstates[0][0x8] = 6;
+ Waitstates[0][0x9] = 6;
+
+ Waitstates[1][0x0] = 1;
+ Waitstates[1][0x2] = 2;
+ Waitstates[1][0x3] = 1;
+ Waitstates[1][0x4] = 1;
+ Waitstates[1][0x6] = 2;
+ Waitstates[1][0x8] = 12;
+ Waitstates[1][0x9] = 12;
+
+ Waitstates[2][0x0] = 1;
+ Waitstates[2][0x2] = 1;
+ Waitstates[2][0x3] = 1;
+ Waitstates[2][0x4] = 1;
+ Waitstates[2][0x6] = 1;
+ Waitstates[2][0x8] = 6;
+ Waitstates[2][0x9] = 6;
+ Waitstates[2][0xA] = 10;
+
+ Waitstates[3][0x0] = 1;
+ Waitstates[3][0x2] = 2;
+ Waitstates[3][0x3] = 1;
+ Waitstates[3][0x4] = 1;
+ Waitstates[3][0x6] = 2;
+ Waitstates[3][0x8] = 12;
+ Waitstates[3][0x9] = 12;
+ Waitstates[3][0xA] = 10;
+ }
+}
+
+ARM::~ARM()
+{
+ // dorp
+}
+
+void ARM::Reset()
+{
+ Cycles = 0;
+ Halted = 0;
+
+ for (int i = 0; i < 16; i++)
+ R[i] = 0;
+
+ CPSR = 0x000000D3;
+
+ ExceptionBase = Num ? 0x00000000 : 0xFFFF0000;
+
+ // zorp
+ JumpTo(ExceptionBase);
+}
+
+void ARM::JumpTo(u32 addr, bool restorecpsr)
+{
+ if (restorecpsr)
+ {
+ RestoreCPSR();
+
+ if (CPSR & 0x20) addr |= 0x1;
+ else addr &= ~0x1;
+ }
+
+ if (addr & 0x1)
+ {
+ addr &= ~0x1;
+ R[15] = addr+2;
+ NextInstr[0] = CodeRead16(addr);
+ NextInstr[1] = CodeRead16(addr+2);
+ CPSR |= 0x20;
+ }
+ else
+ {
+ addr &= ~0x3;
+ R[15] = addr+4;
+ NextInstr[0] = CodeRead32(addr);
+ NextInstr[1] = CodeRead32(addr+4);
+ CPSR &= ~0x20;
+ }
+}
+
+void ARM::RestoreCPSR()
+{
+ u32 oldcpsr = CPSR;
+
+ switch (CPSR & 0x1F)
+ {
+ case 0x11:
+ CPSR = R_FIQ[7];
+ break;
+
+ case 0x12:
+ CPSR = R_IRQ[2];
+ break;
+
+ case 0x13:
+ CPSR = R_SVC[2];
+ break;
+
+ case 0x17:
+ CPSR = R_ABT[2];
+ break;
+
+ case 0x1B:
+ CPSR = R_UND[2];
+ break;
+
+ default:
+ printf("!! attempt to restore CPSR under bad mode %02X, %08X\n", CPSR&0x1F, R[15]);
+ break;
+ }
+
+ UpdateMode(oldcpsr, CPSR);
+}
+
+void ARM::UpdateMode(u32 oldmode, u32 newmode)
+{
+ u32 temp;
+ #define SWAP(a, b) temp = a; a = b; b = temp;
+
+ if ((oldmode & 0x1F) == (newmode & 0x1F)) return;
+
+ switch (oldmode & 0x1F)
+ {
+ case 0x11:
+ SWAP(R[8], R_FIQ[0]);
+ SWAP(R[9], R_FIQ[1]);
+ SWAP(R[10], R_FIQ[2]);
+ SWAP(R[11], R_FIQ[3]);
+ SWAP(R[12], R_FIQ[4]);
+ SWAP(R[13], R_FIQ[5]);
+ SWAP(R[14], R_FIQ[6]);
+ break;
+
+ case 0x12:
+ SWAP(R[13], R_IRQ[0]);
+ SWAP(R[14], R_IRQ[1]);
+ break;
+
+ case 0x13:
+ SWAP(R[13], R_SVC[0]);
+ SWAP(R[14], R_SVC[1]);
+ break;
+
+ case 0x17:
+ SWAP(R[13], R_ABT[0]);
+ SWAP(R[14], R_ABT[1]);
+ break;
+
+ case 0x1B:
+ SWAP(R[13], R_UND[0]);
+ SWAP(R[14], R_UND[1]);
+ break;
+ }
+
+ switch (newmode & 0x1F)
+ {
+ case 0x11:
+ SWAP(R[8], R_FIQ[0]);
+ SWAP(R[9], R_FIQ[1]);
+ SWAP(R[10], R_FIQ[2]);
+ SWAP(R[11], R_FIQ[3]);
+ SWAP(R[12], R_FIQ[4]);
+ SWAP(R[13], R_FIQ[5]);
+ SWAP(R[14], R_FIQ[6]);
+ break;
+
+ case 0x12:
+ SWAP(R[13], R_IRQ[0]);
+ SWAP(R[14], R_IRQ[1]);
+ break;
+
+ case 0x13:
+ SWAP(R[13], R_SVC[0]);
+ SWAP(R[14], R_SVC[1]);
+ break;
+
+ case 0x17:
+ SWAP(R[13], R_ABT[0]);
+ SWAP(R[14], R_ABT[1]);
+ break;
+
+ case 0x1B:
+ SWAP(R[13], R_UND[0]);
+ SWAP(R[14], R_UND[1]);
+ break;
+ }
+
+ #undef SWAP
+}
+
+void ARM::TriggerIRQ()
+{
+ if (CPSR & 0x80)
+ return;
+
+ u32 oldcpsr = CPSR;
+ CPSR &= ~0xFF;
+ CPSR |= 0xD2;
+ UpdateMode(oldcpsr, CPSR);
+
+ R_IRQ[2] = oldcpsr;
+ R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0);
+ JumpTo(ExceptionBase + 0x18);
+}
+
+s32 ARM::Execute()
+{
+ if (Halted)
+ {
+ if (NDS::HaltInterrupted(Num))
+ {
+ Halted = 0;
+ if (NDS::IME[Num]&1)
+ TriggerIRQ();
+ }
+ else
+ {
+ Cycles = CyclesToRun;
+ GPU3D::Run(CyclesToRun >> 1);
+ return Cycles;
+ }
+ }
+
+ Cycles = 0;
+ s32 lastcycles = 0;
+ u32 addr = R[15] - (CPSR&0x20 ? 4:8);
+ u32 cpsr = CPSR;
+
+ while (Cycles < CyclesToRun)
+ {
+ //if(Num==1)printf("%08X %08X\n", R[15] - (CPSR&0x20 ? 4:8), NextInstr);
+
+ if (CPSR & 0x20) // THUMB
+ {
+ // prefetch
+ R[15] += 2;
+ CurInstr = NextInstr[0];
+ NextInstr[0] = NextInstr[1];
+ NextInstr[1] = CodeRead16(R[15]);
+
+ // actually execute
+ u32 icode = (CurInstr >> 6);
+ ARMInterpreter::THUMBInstrTable[icode](this);
+ }
+ else
+ {
+ // prefetch
+ R[15] += 4;
+ CurInstr = NextInstr[0];
+ NextInstr[0] = NextInstr[1];
+ NextInstr[1] = CodeRead32(R[15]);
+
+ // actually execute
+ if (CheckCondition(CurInstr >> 28))
+ {
+ u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0);
+ ARMInterpreter::ARMInstrTable[icode](this);
+ }
+ else if ((CurInstr & 0xFE000000) == 0xFA000000)
+ {
+ ARMInterpreter::A_BLX_IMM(this);
+ }
+ }
+
+ //if (R[15]==0x037F9364) printf("R8=%08X R9=%08X\n", R[8], R[9]);
+
+ // gross hack
+ // TODO, though: move timer code here too?
+ // quick testing shows that moving this to the NDS loop doesn't really slow things down
+ if (Num==0)
+ {
+ s32 diff = Cycles - lastcycles;
+ GPU3D::Run(diff >> 1);
+ lastcycles = Cycles - (diff&1);
+ }
+
+ // TODO optimize this shit!!!
+ if (Halted)
+ {
+ if (Halted == 1)
+ Cycles = CyclesToRun;
+ break;
+ }
+ if (NDS::HaltInterrupted(Num))
+ {
+ if (NDS::IME[Num]&1)
+ TriggerIRQ();
+ }
+
+ // temp. debug cruft
+ addr = R[15] - (CPSR&0x20 ? 4:8);
+ cpsr = CPSR;
+ }
+
+ if (Halted == 2)
+ Halted = 0;
+
+ return Cycles;
+}
diff --git a/src/ARM.h b/src/ARM.h
new file mode 100644
index 0000000..79c2bce
--- /dev/null
+++ b/src/ARM.h
@@ -0,0 +1,234 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef ARM_H
+#define ARM_H
+
+#include "types.h"
+#include "NDS.h"
+#include "CP15.h"
+
+// lame
+#define C_S(x) x
+#define C_N(x) x
+#define C_I(x) x
+
+#define ROR(x, n) (((x) >> (n)) | ((x) << (32-(n))))
+
+class ARM
+{
+public:
+ ARM(u32 num);
+ ~ARM(); // destroy shit
+
+ void Reset();
+
+ void JumpTo(u32 addr, bool restorecpsr = false);
+ void RestoreCPSR();
+
+ void Halt(u32 halt)
+ {
+ Halted = halt;
+ }
+
+ s32 Execute();
+
+ bool CheckCondition(u32 code)
+ {
+ if (code == 0xE) return true;
+ if (ConditionTable[code] & (1 << (CPSR>>28))) return true;
+ return false;
+ }
+
+ void SetC(bool c)
+ {
+ if (c) CPSR |= 0x20000000;
+ else CPSR &= ~0x20000000;
+ }
+
+ void SetNZ(bool n, bool z)
+ {
+ CPSR &= ~0xC0000000;
+ if (n) CPSR |= 0x80000000;
+ if (z) CPSR |= 0x40000000;
+ }
+
+ void SetNZCV(bool n, bool z, bool c, bool v)
+ {
+ CPSR &= ~0xF0000000;
+ if (n) CPSR |= 0x80000000;
+ if (z) CPSR |= 0x40000000;
+ if (c) CPSR |= 0x20000000;
+ if (v) CPSR |= 0x10000000;
+ }
+
+ void UpdateMode(u32 oldmode, u32 newmode);
+
+ void TriggerIRQ();
+
+
+ u16 CodeRead16(u32 addr)
+ {
+ u16 val;
+ // TODO eventually: on ARM9, THUMB opcodes are prefetched with 32bit reads
+ if (!Num)
+ {
+ if (!CP15::HandleCodeRead16(addr, &val))
+ val = NDS::ARM9Read16(addr);
+ }
+ else
+ val = NDS::ARM7Read16(addr);
+
+ Cycles += Waitstates[0][(addr>>24)&0xF];
+ return val;
+ }
+
+ u32 CodeRead32(u32 addr)
+ {
+ u32 val;
+ if (!Num)
+ {
+ if (!CP15::HandleCodeRead32(addr, &val))
+ val = NDS::ARM9Read32(addr);
+ }
+ else
+ val = NDS::ARM7Read32(addr);
+
+ Cycles += Waitstates[1][(addr>>24)&0xF];
+ return val;
+ }
+
+
+ u8 DataRead8(u32 addr, u32 forceuser=0)
+ {
+ u8 val;
+ if (!Num)
+ {
+ if (!CP15::HandleDataRead8(addr, &val, forceuser))
+ val = NDS::ARM9Read8(addr);
+ }
+ else
+ val = NDS::ARM7Read8(addr);
+
+ Cycles += Waitstates[2][(addr>>24)&0xF];
+ return val;
+ }
+
+ u16 DataRead16(u32 addr, u32 forceuser=0)
+ {
+ u16 val;
+ addr &= ~1;
+ if (!Num)
+ {
+ if (!CP15::HandleDataRead16(addr, &val, forceuser))
+ val = NDS::ARM9Read16(addr);
+ }
+ else
+ val = NDS::ARM7Read16(addr);
+
+ Cycles += Waitstates[2][(addr>>24)&0xF];
+ return val;
+ }
+
+ u32 DataRead32(u32 addr, u32 forceuser=0)
+ {
+ u32 val;
+ addr &= ~3;
+ if (!Num)
+ {
+ if (!CP15::HandleDataRead32(addr, &val, forceuser))
+ val = NDS::ARM9Read32(addr);
+ }
+ else
+ val = NDS::ARM7Read32(addr);
+
+ Cycles += Waitstates[3][(addr>>24)&0xF];
+ return val;
+ }
+
+ void DataWrite8(u32 addr, u8 val, u32 forceuser=0)
+ {
+ if (!Num)
+ {
+ if (!CP15::HandleDataWrite8(addr, val, forceuser))
+ NDS::ARM9Write8(addr, val);
+ }
+ else
+ NDS::ARM7Write8(addr, val);
+
+ Cycles += Waitstates[2][(addr>>24)&0xF];
+ }
+
+ void DataWrite16(u32 addr, u16 val, u32 forceuser=0)
+ {
+ addr &= ~1;
+ if (!Num)
+ {
+ if (!CP15::HandleDataWrite16(addr, val, forceuser))
+ NDS::ARM9Write16(addr, val);
+ }
+ else
+ NDS::ARM7Write16(addr, val);
+
+ Cycles += Waitstates[2][(addr>>24)&0xF];
+ }
+
+ void DataWrite32(u32 addr, u32 val, u32 forceuser=0)
+ {
+ addr &= ~3;
+ if (!Num)
+ {
+ if (!CP15::HandleDataWrite32(addr, val, forceuser))
+ NDS::ARM9Write32(addr, val);
+ }
+ else
+ NDS::ARM7Write32(addr, val);
+
+ Cycles += Waitstates[3][(addr>>24)&0xF];
+ }
+
+
+ u32 Num;
+
+ // waitstates:
+ // 0=code16 1=code32 2=data16 3=data32
+ // TODO eventually: nonsequential waitstates
+ s32 Waitstates[4][16];
+
+ s32 Cycles;
+ s32 CyclesToRun;
+ u32 Halted;
+
+ u32 R[16]; // heh
+ u32 CPSR;
+ u32 R_FIQ[8]; // holding SPSR too
+ u32 R_SVC[3];
+ u32 R_ABT[3];
+ u32 R_IRQ[3];
+ u32 R_UND[3];
+ u32 CurInstr;
+ u32 NextInstr[2];
+
+ u32 ExceptionBase;
+
+ static u32 ConditionTable[16];
+
+ u32 debug;
+};
+
+#endif // ARM_H
diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp
new file mode 100644
index 0000000..32b3a00
--- /dev/null
+++ b/src/ARMInterpreter.cpp
@@ -0,0 +1,221 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include "NDS.h"
+#include "CP15.h"
+#include "ARMInterpreter.h"
+#include "ARMInterpreter_ALU.h"
+#include "ARMInterpreter_Branch.h"
+#include "ARMInterpreter_LoadStore.h"
+
+
+namespace ARMInterpreter
+{
+
+
+void A_UNK(ARM* cpu)
+{
+ printf("undefined ARM%d instruction %08X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-8);
+ for (int i = 0; i < 16; i++) printf("R%d: %08X\n", i, cpu->R[i]);
+ NDS::Halt();
+}
+
+void T_UNK(ARM* cpu)
+{
+ printf("undefined THUMB%d instruction %04X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-4);
+ NDS::Halt();
+}
+
+
+
+void A_MSR_IMM(ARM* cpu)
+{
+ u32* psr;
+ if (cpu->CurInstr & (1<<22))
+ {
+ switch (cpu->CPSR & 0x1F)
+ {
+ case 0x11: psr = &cpu->R_FIQ[7]; break;
+ case 0x12: psr = &cpu->R_IRQ[2]; break;
+ case 0x13: psr = &cpu->R_SVC[2]; break;
+ case 0x17: psr = &cpu->R_ABT[2]; break;
+ case 0x1B: psr = &cpu->R_UND[2]; break;
+ default: printf("bad CPU mode %08X\n", cpu->CPSR); return;
+ }
+ }
+ else
+ psr = &cpu->CPSR;
+
+ u32 oldpsr = *psr;
+
+ u32 mask = 0;
+ if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF;
+ if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00;
+ if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000;
+ if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000;
+
+ if (!(cpu->CurInstr & (1<<22)))
+ mask &= 0xFFFFFFDF;
+
+ if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00;
+
+ u32 val = ROR((cpu->CurInstr & 0xFF), ((cpu->CurInstr >> 7) & 0x1E));
+
+ *psr &= ~mask;
+ *psr |= (val & mask);
+
+ if (!(cpu->CurInstr & (1<<22)))
+ cpu->UpdateMode(oldpsr, cpu->CPSR);
+}
+
+void A_MSR_REG(ARM* cpu)
+{
+ u32* psr;
+ if (cpu->CurInstr & (1<<22))
+ {
+ switch (cpu->CPSR & 0x1F)
+ {
+ case 0x11: psr = &cpu->R_FIQ[7]; break;
+ case 0x12: psr = &cpu->R_IRQ[2]; break;
+ case 0x13: psr = &cpu->R_SVC[2]; break;
+ case 0x17: psr = &cpu->R_ABT[2]; break;
+ case 0x1B: psr = &cpu->R_UND[2]; break;
+ default: printf("bad CPU mode %08X\n", cpu->CPSR); return;
+ }
+ }
+ else
+ psr = &cpu->CPSR;
+
+ u32 oldpsr = *psr;
+
+ u32 mask = 0;
+ if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF;
+ if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00;
+ if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000;
+ if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000;
+
+ if (!(cpu->CurInstr & (1<<22)))
+ mask &= 0xFFFFFFDF;
+
+ if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00;
+
+ u32 val = cpu->R[cpu->CurInstr & 0xF];
+
+ *psr &= ~mask;
+ *psr |= (val & mask);
+
+ if (!(cpu->CurInstr & (1<<22)))
+ cpu->UpdateMode(oldpsr, cpu->CPSR);
+}
+
+void A_MRS(ARM* cpu)
+{
+ u32 psr;
+ if (cpu->CurInstr & (1<<22))
+ {
+ switch (cpu->CPSR & 0x1F)
+ {
+ case 0x11: psr = cpu->R_FIQ[7]; break;
+ case 0x12: psr = cpu->R_IRQ[2]; break;
+ case 0x13: psr = cpu->R_SVC[2]; break;
+ case 0x17: psr = cpu->R_ABT[2]; break;
+ case 0x1B: psr = cpu->R_UND[2]; break;
+ default: printf("bad CPU mode %08X\n", cpu->CPSR); return;
+ }
+ }
+ else
+ psr = cpu->CPSR;
+
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = psr;
+}
+
+
+void A_MCR(ARM* cpu)
+{
+ u32 cp = (cpu->CurInstr >> 8) & 0xF;
+ //u32 op = (cpu->CurInstr >> 21) & 0x7;
+ u32 cn = (cpu->CurInstr >> 16) & 0xF;
+ u32 cm = cpu->CurInstr & 0xF;
+ u32 cpinfo = (cpu->CurInstr >> 5) & 0x7;
+
+ if (cpu->Num==0 && cp==15)
+ {
+ CP15::Write((cn<<8)|(cm<<4)|cpinfo, cpu->R[(cpu->CurInstr>>12)&0xF]);
+ }
+ else
+ {
+ printf("bad MCR opcode p%d,%d,%d,%d on ARM%d\n", cp, cn, cm, cpinfo, cpu->Num?7:9);
+ }
+
+ cpu->Cycles += 2; // TODO: checkme
+}
+
+void A_MRC(ARM* cpu)
+{
+ u32 cp = (cpu->CurInstr >> 8) & 0xF;
+ //u32 op = (cpu->CurInstr >> 21) & 0x7;
+ u32 cn = (cpu->CurInstr >> 16) & 0xF;
+ u32 cm = cpu->CurInstr & 0xF;
+ u32 cpinfo = (cpu->CurInstr >> 5) & 0x7;
+
+ if (cpu->Num==0 && cp==15)
+ {
+ cpu->R[(cpu->CurInstr>>12)&0xF] = CP15::Read((cn<<8)|(cm<<4)|cpinfo);
+ }
+ else
+ {
+ printf("bad MRC opcode p%d,%d,%d,%d on ARM%d\n", cp, cn, cm, cpinfo, cpu->Num?7:9);
+ }
+
+ cpu->Cycles += 3; // TODO: checkme
+}
+
+
+
+void A_SVC(ARM* cpu)
+{
+ u32 oldcpsr = cpu->CPSR;
+ cpu->CPSR &= ~0xFF;
+ cpu->CPSR |= 0xD3;
+ cpu->UpdateMode(oldcpsr, cpu->CPSR);
+
+ cpu->R_SVC[2] = oldcpsr;
+ cpu->R[14] = cpu->R[15] - 4;
+ cpu->JumpTo(cpu->ExceptionBase + 0x08);
+}
+
+void T_SVC(ARM* cpu)
+{
+ u32 oldcpsr = cpu->CPSR;
+ cpu->CPSR &= ~0xFF;
+ cpu->CPSR |= 0xD3;
+ cpu->UpdateMode(oldcpsr, cpu->CPSR);
+
+ cpu->R_SVC[2] = oldcpsr;
+ cpu->R[14] = cpu->R[15] - 2;
+ cpu->JumpTo(cpu->ExceptionBase + 0x08);
+}
+
+
+
+#define INSTRFUNC_PROTO(x) void (*x)(ARM* cpu)
+#include "ARM_InstrTable.h"
+#undef INSTRFUNC_PROTO
+
+}
diff --git a/src/ARMInterpreter.h b/src/ARMInterpreter.h
new file mode 100644
index 0000000..2d4c1a8
--- /dev/null
+++ b/src/ARMInterpreter.h
@@ -0,0 +1,35 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef ARMINTERPRETER_H
+#define ARMINTERPRETER_H
+
+#include "types.h"
+#include "ARM.h"
+
+namespace ARMInterpreter
+{
+
+extern void (*ARMInstrTable[4096])(ARM* cpu);
+extern void (*THUMBInstrTable[1024])(ARM* cpu);
+
+void A_BLX_IMM(ARM* cpu); // I'm a special one look at me
+
+}
+
+#endif // ARMINTERPRETER_H
diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp
new file mode 100644
index 0000000..d6c5abd
--- /dev/null
+++ b/src/ARMInterpreter_ALU.cpp
@@ -0,0 +1,1461 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include "ARM.h"
+
+
+#define CARRY_ADD(a, b) ((0xFFFFFFFF-a) < b)
+#define CARRY_SUB(a, b) (a >= b)
+
+#define OVERFLOW_ADD(a, b, res) ((!(((a) ^ (b)) & 0x80000000)) && (((a) ^ (res)) & 0x80000000))
+#define OVERFLOW_SUB(a, b, res) ((((a) ^ (b)) & 0x80000000) && (((a) ^ (res)) & 0x80000000))
+
+
+namespace ARMInterpreter
+{
+
+
+#define LSL_IMM(x, s) \
+ x <<= s;
+
+#define LSR_IMM(x, s) \
+ if (s == 0) x = 0; \
+ else x >>= s;
+
+#define ASR_IMM(x, s) \
+ if (s == 0) x = ((s32)x) >> 31; \
+ else x = ((s32)x) >> s;
+
+#define ROR_IMM(x, s) \
+ if (s == 0) \
+ { \
+ x = (x >> 1) | ((cpu->CPSR & 0x20000000) << 2); \
+ } \
+ else \
+ { \
+ x = ROR(x, s); \
+ }
+
+#define LSL_IMM_S(x, s) \
+ if (s > 0) \
+ { \
+ cpu->SetC(x & (1<<(32-s))); \
+ x <<= s; \
+ }
+
+#define LSR_IMM_S(x, s) \
+ if (s == 0) { \
+ cpu->SetC(x & (1<<31)); \
+ x = 0; \
+ } else { \
+ cpu->SetC(x & (1<<(s-1))); \
+ x >>= s; \
+ }
+
+#define ASR_IMM_S(x, s) \
+ if (s == 0) { \
+ cpu->SetC(x & (1<<31)); \
+ x = ((s32)x) >> 31; \
+ } else { \
+ cpu->SetC(x & (1<<(s-1))); \
+ x = ((s32)x) >> s; \
+ }
+
+#define ROR_IMM_S(x, s) \
+ if (s == 0) \
+ { \
+ u32 newc = (x & 1); \
+ x = (x >> 1) | ((cpu->CPSR & 0x20000000) << 2); \
+ cpu->SetC(newc); \
+ } \
+ else \
+ { \
+ cpu->SetC(x & (1<<(s-1))); \
+ x = ROR(x, s); \
+ }
+
+#define LSL_REG(x, s) \
+ if (s > 31) x = 0; \
+ else x <<= s;
+
+#define LSR_REG(x, s) \
+ if (s > 31) x = 0; \
+ else x >>= s;
+
+#define ASR_REG(x, s) \
+ if (s > 31) x = ((s32)x) >> 31; \
+ else x = ((s32)x) >> s;
+
+#define ROR_REG(x, s) \
+ x = ROR(x, (s&0x1F));
+
+#define LSL_REG_S(x, s) \
+ if (s > 31) { cpu->SetC(x & (1<<0)); x = 0; } \
+ else if (s > 0) { cpu->SetC(x & (1<<(32-s))); x <<= s; }
+
+#define LSR_REG_S(x, s) \
+ if (s > 31) { cpu->SetC(x & (1<<31)); x = 0; } \
+ else if (s > 0) { cpu->SetC(x & (1<<(s-1))); x >>= s; }
+
+#define ASR_REG_S(x, s) \
+ if (s > 31) { cpu->SetC(x & (1<<31)); x = ((s32)x) >> 31; } \
+ else if (s > 0) { cpu->SetC(x & (1<<(s-1))); x = ((s32)x) >> s; }
+
+#define ROR_REG_S(x, s) \
+ if (s > 0) cpu->SetC(x & (1<<(s-1))); \
+ x = ROR(x, (s&0x1F));
+
+
+
+#define A_CALC_OP2_IMM \
+ u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E);
+
+#define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \
+ u32 b = cpu->R[cpu->CurInstr&0xF]; \
+ u32 s = (cpu->CurInstr>>7)&0x1F; \
+ shiftop(b, s);
+
+#define A_CALC_OP2_REG_SHIFT_REG(shiftop) \
+ u32 b = cpu->R[cpu->CurInstr&0xF]; \
+ if ((cpu->CurInstr&0xF)==15) b += 4; \
+ shiftop(b, cpu->R[(cpu->CurInstr>>8)&0xF]);
+
+
+#define A_IMPLEMENT_ALU_OP(x,s) \
+\
+void A_##x##_IMM(ARM* cpu) \
+{ \
+ A_CALC_OP2_IMM \
+ A_##x(0) \
+} \
+void A_##x##_REG_LSL_IMM(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(LSL_IMM) \
+ A_##x(0) \
+} \
+void A_##x##_REG_LSR_IMM(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(LSR_IMM) \
+ A_##x(0) \
+} \
+void A_##x##_REG_ASR_IMM(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(ASR_IMM) \
+ A_##x(0) \
+} \
+void A_##x##_REG_ROR_IMM(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(ROR_IMM) \
+ A_##x(0) \
+} \
+void A_##x##_REG_LSL_REG(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(LSL_REG) \
+ A_##x(1) \
+} \
+void A_##x##_REG_LSR_REG(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(LSR_REG) \
+ A_##x(1) \
+} \
+void A_##x##_REG_ASR_REG(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(ASR_REG) \
+ A_##x(1) \
+} \
+void A_##x##_REG_ROR_REG(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(ROR_REG) \
+ A_##x(1) \
+} \
+void A_##x##_IMM_S(ARM* cpu) \
+{ \
+ A_CALC_OP2_IMM \
+ A_##x##_S(0) \
+} \
+void A_##x##_REG_LSL_IMM_S(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(LSL_IMM##s) \
+ A_##x##_S(0) \
+} \
+void A_##x##_REG_LSR_IMM_S(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(LSR_IMM##s) \
+ A_##x##_S(0) \
+} \
+void A_##x##_REG_ASR_IMM_S(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(ASR_IMM##s) \
+ A_##x##_S(0) \
+} \
+void A_##x##_REG_ROR_IMM_S(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(ROR_IMM##s) \
+ A_##x##_S(0) \
+} \
+void A_##x##_REG_LSL_REG_S(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(LSL_REG##s) \
+ A_##x##_S(1) \
+} \
+void A_##x##_REG_LSR_REG_S(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(LSR_REG##s) \
+ A_##x##_S(1) \
+} \
+void A_##x##_REG_ASR_REG_S(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(ASR_REG##s) \
+ A_##x##_S(1) \
+} \
+void A_##x##_REG_ROR_REG_S(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(ROR_REG##s) \
+ A_##x##_S(1) \
+}
+
+#define A_IMPLEMENT_ALU_TEST(x,s) \
+\
+void A_##x##_IMM(ARM* cpu) \
+{ \
+ A_CALC_OP2_IMM \
+ A_##x(0) \
+} \
+void A_##x##_REG_LSL_IMM(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(LSL_IMM##s) \
+ A_##x(0) \
+} \
+void A_##x##_REG_LSR_IMM(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(LSR_IMM##s) \
+ A_##x(0) \
+} \
+void A_##x##_REG_ASR_IMM(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(ASR_IMM##s) \
+ A_##x(0) \
+} \
+void A_##x##_REG_ROR_IMM(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_IMM(ROR_IMM##s) \
+ A_##x(0) \
+} \
+void A_##x##_REG_LSL_REG(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(LSL_REG##s) \
+ A_##x(1) \
+} \
+void A_##x##_REG_LSR_REG(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(LSR_REG##s) \
+ A_##x(1) \
+} \
+void A_##x##_REG_ASR_REG(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(ASR_REG##s) \
+ A_##x(1) \
+} \
+void A_##x##_REG_ROR_REG(ARM* cpu) \
+{ \
+ A_CALC_OP2_REG_SHIFT_REG(ROR_REG##s) \
+ A_##x(1) \
+}
+
+
+#define A_AND(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a & b; \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+#define A_AND_S(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a & b; \
+ cpu->SetNZ(res & 0x80000000, \
+ !res); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+A_IMPLEMENT_ALU_OP(AND,_S)
+
+
+#define A_EOR(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a ^ b; \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+#define A_EOR_S(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a ^ b; \
+ cpu->SetNZ(res & 0x80000000, \
+ !res); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+A_IMPLEMENT_ALU_OP(EOR,_S)
+
+
+#define A_SUB(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a - b; \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+#define A_SUB_S(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a - b; \
+ cpu->SetNZCV(res & 0x80000000, \
+ !res, \
+ CARRY_SUB(a, b), \
+ OVERFLOW_SUB(a, b, res)); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+A_IMPLEMENT_ALU_OP(SUB,)
+
+
+#define A_RSB(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = b - a; \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+#define A_RSB_S(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = b - a; \
+ cpu->SetNZCV(res & 0x80000000, \
+ !res, \
+ CARRY_SUB(b, a), \
+ OVERFLOW_SUB(b, a, res)); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+A_IMPLEMENT_ALU_OP(RSB,)
+
+
+#define A_ADD(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a + b; \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+#define A_ADD_S(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a + b; \
+ cpu->SetNZCV(res & 0x80000000, \
+ !res, \
+ CARRY_ADD(a, b), \
+ OVERFLOW_ADD(a, b, res)); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+A_IMPLEMENT_ALU_OP(ADD,)
+
+
+#define A_ADC(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+#define A_ADC_S(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res_tmp = a + b; \
+ u32 carry = (cpu->CPSR&0x20000000 ? 1:0); \
+ u32 res = res_tmp + carry; \
+ cpu->SetNZCV(res & 0x80000000, \
+ !res, \
+ CARRY_ADD(a, b) | CARRY_ADD(res_tmp, carry), \
+ OVERFLOW_ADD(a, b, res_tmp) | OVERFLOW_ADD(res_tmp, carry, res)); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+A_IMPLEMENT_ALU_OP(ADC,)
+
+
+#define A_SBC(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+#define A_SBC_S(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res_tmp = a - b; \
+ u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \
+ u32 res = res_tmp - carry; \
+ cpu->SetNZCV(res & 0x80000000, \
+ !res, \
+ CARRY_SUB(a, b) & CARRY_SUB(res_tmp, carry), \
+ OVERFLOW_SUB(a, b, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res)); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+A_IMPLEMENT_ALU_OP(SBC,)
+
+
+#define A_RSC(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+#define A_RSC_S(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res_tmp = b - a; \
+ u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \
+ u32 res = res_tmp - carry; \
+ cpu->SetNZCV(res & 0x80000000, \
+ !res, \
+ CARRY_SUB(b, a) & CARRY_SUB(res_tmp, carry), \
+ OVERFLOW_SUB(b, a, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res)); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+A_IMPLEMENT_ALU_OP(RSC,)
+
+
+#define A_TST(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a & b; \
+ cpu->SetNZ(res & 0x80000000, \
+ !res); \
+ cpu->Cycles += c;
+
+A_IMPLEMENT_ALU_TEST(TST,_S)
+
+
+#define A_TEQ(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a ^ b; \
+ cpu->SetNZ(res & 0x80000000, \
+ !res); \
+ cpu->Cycles += c;
+
+A_IMPLEMENT_ALU_TEST(TEQ,_S)
+
+
+#define A_CMP(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a - b; \
+ cpu->SetNZCV(res & 0x80000000, \
+ !res, \
+ CARRY_SUB(a, b), \
+ OVERFLOW_SUB(a, b, res)); \
+ cpu->Cycles += c;
+
+A_IMPLEMENT_ALU_TEST(CMP,)
+
+
+#define A_CMN(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a + b; \
+ cpu->SetNZCV(res & 0x80000000, \
+ !res, \
+ CARRY_ADD(a, b), \
+ OVERFLOW_ADD(a, b, res)); \
+ cpu->Cycles += c;
+
+A_IMPLEMENT_ALU_TEST(CMN,)
+
+
+#define A_ORR(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a | b; \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+#define A_ORR_S(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a | b; \
+ cpu->SetNZ(res & 0x80000000, \
+ !res); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+A_IMPLEMENT_ALU_OP(ORR,_S)
+
+
+#define A_MOV(c) \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(b); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = b; \
+ }
+
+#define A_MOV_S(c) \
+ cpu->SetNZ(b & 0x80000000, \
+ !b); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(b, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = b; \
+ }
+
+A_IMPLEMENT_ALU_OP(MOV,_S)
+
+
+#define A_BIC(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a & ~b; \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+#define A_BIC_S(c) \
+ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 res = a & ~b; \
+ cpu->SetNZ(res & 0x80000000, \
+ !res); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(res, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \
+ }
+
+A_IMPLEMENT_ALU_OP(BIC,_S)
+
+
+#define A_MVN(c) \
+ b = ~b; \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(b); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = b; \
+ }
+
+#define A_MVN_S(c) \
+ b = ~b; \
+ cpu->SetNZ(b & 0x80000000, \
+ !b); \
+ cpu->Cycles += c; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ cpu->JumpTo(b, true); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = b; \
+ }
+
+A_IMPLEMENT_ALU_OP(MVN,_S)
+
+
+
+void A_MUL(ARM* cpu)
+{
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+
+ u32 res = rm * rs;
+
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
+ if (cpu->CurInstr & (1<<20))
+ {
+ cpu->SetNZ(res & 0x80000000,
+ !res);
+ if (cpu->Num==1) cpu->SetC(0);
+ }
+
+ u32 cycles;
+ if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1;
+ else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
+ else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3;
+ else cycles = 4;
+
+ cpu->Cycles += cycles;
+}
+
+void A_MLA(ARM* cpu)
+{
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+ u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF];
+
+ u32 res = (rm * rs) + rn;
+
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
+ if (cpu->CurInstr & (1<<20))
+ {
+ cpu->SetNZ(res & 0x80000000,
+ !res);
+ if (cpu->Num==1) cpu->SetC(0);
+ }
+
+ u32 cycles;
+ if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
+ else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
+ else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
+ else cycles = 5;
+
+ cpu->Cycles += cycles;
+}
+
+void A_UMULL(ARM* cpu)
+{
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+
+ u64 res = (u64)rm * (u64)rs;
+
+ cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
+ if (cpu->CurInstr & (1<<20))
+ {
+ cpu->SetNZ((u32)(res >> 63ULL),
+ !res);
+ if (cpu->Num==1) cpu->SetC(0);
+ }
+
+ u32 cycles;
+ if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
+ else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
+ else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
+ else cycles = 5;
+
+ cpu->Cycles += cycles;
+}
+
+void A_UMLAL(ARM* cpu)
+{
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+
+ u64 res = (u64)rm * (u64)rs;
+
+ u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL);
+ res += rd;
+
+ cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
+ if (cpu->CurInstr & (1<<20))
+ {
+ cpu->SetNZ((u32)(res >> 63ULL),
+ !res);
+ if (cpu->Num==1) cpu->SetC(0);
+ }
+
+ u32 cycles;
+ if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
+ else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
+ else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
+ else cycles = 5;
+
+ cpu->Cycles += cycles;
+}
+
+void A_SMULL(ARM* cpu)
+{
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+
+ s64 res = (s64)(s32)rm * (s64)(s32)rs;
+
+ cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
+ if (cpu->CurInstr & (1<<20))
+ {
+ cpu->SetNZ((u32)(res >> 63ULL),
+ !res);
+ if (cpu->Num==1) cpu->SetC(0);
+ }
+
+ u32 cycles;
+ if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
+ else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
+ else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
+ else cycles = 5;
+
+ cpu->Cycles += cycles;
+}
+
+void A_SMLAL(ARM* cpu)
+{
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+
+ s64 res = (s64)(s32)rm * (s64)(s32)rs;
+
+ s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL));
+ res += rd;
+
+ cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
+ if (cpu->CurInstr & (1<<20))
+ {
+ cpu->SetNZ((u32)(res >> 63ULL),
+ !res);
+ if (cpu->Num==1) cpu->SetC(0);
+ }
+
+ u32 cycles;
+ if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
+ else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
+ else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
+ else cycles = 5;
+
+ cpu->Cycles += cycles;
+}
+
+void A_SMLAxy(ARM* cpu)
+{
+ // TODO: ARM9 only
+
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+ u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF];
+
+ if (cpu->CurInstr & (1<<5)) rm >>= 16;
+ else rm &= 0xFFFF;
+ if (cpu->CurInstr & (1<<6)) rs >>= 16;
+ else rs &= 0xFFFF;
+
+ u32 res_mul = ((s16)rm * (s16)rs);
+ u32 res = res_mul + rn;
+
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
+ if (OVERFLOW_ADD(res_mul, rn, res))
+ cpu->CPSR |= 0x08000000;
+}
+
+void A_SMLAWy(ARM* cpu)
+{
+ // TODO: ARM9 only
+
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+ u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF];
+
+ if (cpu->CurInstr & (1<<6)) rs >>= 16;
+ else rs &= 0xFFFF;
+
+ u32 res_mul = ((s32)rm * (s16)rs) >> 16; // CHECKME
+ u32 res = res_mul + rn;
+
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
+ if (OVERFLOW_ADD(res_mul, rn, res))
+ cpu->CPSR |= 0x08000000;
+}
+
+void A_SMULxy(ARM* cpu)
+{
+ // TODO: ARM9 only
+
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+
+ if (cpu->CurInstr & (1<<5)) rm >>= 16;
+ else rm &= 0xFFFF;
+ if (cpu->CurInstr & (1<<6)) rs >>= 16;
+ else rs &= 0xFFFF;
+
+ u32 res = ((s16)rm * (s16)rs);
+
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
+}
+
+void A_SMULWy(ARM* cpu)
+{
+ // TODO: ARM9 only
+
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+
+ if (cpu->CurInstr & (1<<6)) rs >>= 16;
+ else rs &= 0xFFFF;
+
+ u32 res = ((s32)rm * (s16)rs) >> 16; // CHECKME
+
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
+}
+
+void A_SMLALxy(ARM* cpu)
+{
+ // TODO: ARM9 only
+
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
+
+ if (cpu->CurInstr & (1<<5)) rm >>= 16;
+ else rm &= 0xFFFF;
+ if (cpu->CurInstr & (1<<6)) rs >>= 16;
+ else rs &= 0xFFFF;
+
+ s64 res = (s64)(s16)rm * (s64)(s16)rs;
+
+ s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL));
+ res += rd;
+
+ cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
+
+ cpu->Cycles += 1;
+}
+
+
+
+void A_CLZ(ARM* cpu)
+{
+ // TODO: ARM9 only
+
+ u32 val = cpu->R[cpu->CurInstr & 0xF];
+
+ u32 res = 0;
+ while ((val & 0xFF000000) == 0)
+ {
+ res += 8;
+ val <<= 8;
+ val |= 0xFF;
+ }
+ while ((val & 0x80000000) == 0)
+ {
+ res++;
+ val <<= 1;
+ val |= 0x1;
+ }
+
+ cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
+}
+
+void A_QADD(ARM* cpu)
+{
+ // TODO: ARM9 only
+
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
+
+ u32 res = rm + rn;
+ if (OVERFLOW_ADD(rm, rn, res))
+ {
+ res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
+ cpu->CPSR |= 0x08000000;
+ }
+
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
+}
+
+void A_QSUB(ARM* cpu)
+{
+ // TODO: ARM9 only
+
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
+
+ u32 res = rm - rn;
+ if (OVERFLOW_SUB(rm, rn, res))
+ {
+ res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
+ cpu->CPSR |= 0x08000000;
+ }
+
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
+}
+
+void A_QDADD(ARM* cpu)
+{
+ // TODO: ARM9 only
+
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
+
+ if (rn & 0x40000000)
+ {
+ rn = (rn & 0x80000000) ? 0x80000000 : 0x7FFFFFFF;
+ cpu->CPSR |= 0x08000000; // CHECKME
+ }
+ else
+ rn <<= 1;
+
+ u32 res = rm + rn;
+ if (OVERFLOW_ADD(rm, rn, res))
+ {
+ res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
+ cpu->CPSR |= 0x08000000;
+ }
+
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
+}
+
+void A_QDSUB(ARM* cpu)
+{
+ // TODO: ARM9 only
+
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+ u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
+
+ if (rn & 0x40000000)
+ {
+ rn = (rn & 0x80000000) ? 0x80000000 : 0x7FFFFFFF;
+ cpu->CPSR |= 0x08000000; // CHECKME
+ }
+ else
+ rn <<= 1;
+
+ u32 res = rm - rn;
+ if (OVERFLOW_SUB(rm, rn, res))
+ {
+ res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
+ cpu->CPSR |= 0x08000000;
+ }
+
+ cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
+}
+
+
+
+// ---- THUMB ----------------------------------
+
+
+
+void T_LSL_IMM(ARM* cpu)
+{
+ u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 s = (cpu->CurInstr >> 6) & 0x1F;
+ LSL_IMM_S(op, s);
+ cpu->R[cpu->CurInstr & 0x7] = op;
+ cpu->SetNZ(op & 0x80000000,
+ !op);
+}
+
+void T_LSR_IMM(ARM* cpu)
+{
+ u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 s = (cpu->CurInstr >> 6) & 0x1F;
+ LSR_IMM_S(op, s);
+ cpu->R[cpu->CurInstr & 0x7] = op;
+ cpu->SetNZ(op & 0x80000000,
+ !op);
+}
+
+void T_ASR_IMM(ARM* cpu)
+{
+ u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 s = (cpu->CurInstr >> 6) & 0x1F;
+ ASR_IMM_S(op, s);
+ cpu->R[cpu->CurInstr & 0x7] = op;
+ cpu->SetNZ(op & 0x80000000,
+ !op);
+}
+
+void T_ADD_REG_(ARM* cpu)
+{
+ u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7];
+ u32 res = a + b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_ADD(a, b),
+ OVERFLOW_ADD(a, b, res));
+}
+
+void T_SUB_REG_(ARM* cpu)
+{
+ u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7];
+ u32 res = a - b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_SUB(a, b),
+ OVERFLOW_SUB(a, b, res));
+}
+
+void T_ADD_IMM_(ARM* cpu)
+{
+ u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 b = (cpu->CurInstr >> 6) & 0x7;
+ u32 res = a + b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_ADD(a, b),
+ OVERFLOW_ADD(a, b, res));
+}
+
+void T_SUB_IMM_(ARM* cpu)
+{
+ u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 b = (cpu->CurInstr >> 6) & 0x7;
+ u32 res = a - b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_SUB(a, b),
+ OVERFLOW_SUB(a, b, res));
+}
+
+void T_MOV_IMM(ARM* cpu)
+{
+ u32 b = cpu->CurInstr & 0xFF;
+ cpu->R[(cpu->CurInstr >> 8) & 0x7] = b;
+ cpu->SetNZ(0,
+ !b);
+}
+
+void T_CMP_IMM(ARM* cpu)
+{
+ u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7];
+ u32 b = cpu->CurInstr & 0xFF;
+ u32 res = a - b;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_SUB(a, b),
+ OVERFLOW_SUB(a, b, res));
+}
+
+void T_ADD_IMM(ARM* cpu)
+{
+ u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7];
+ u32 b = cpu->CurInstr & 0xFF;
+ u32 res = a + b;
+ cpu->R[(cpu->CurInstr >> 8) & 0x7] = res;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_ADD(a, b),
+ OVERFLOW_ADD(a, b, res));
+}
+
+void T_SUB_IMM(ARM* cpu)
+{
+ u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7];
+ u32 b = cpu->CurInstr & 0xFF;
+ u32 res = a - b;
+ cpu->R[(cpu->CurInstr >> 8) & 0x7] = res;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_SUB(a, b),
+ OVERFLOW_SUB(a, b, res));
+}
+
+
+void T_AND_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res = a & b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZ(res & 0x80000000,
+ !res);
+}
+
+void T_EOR_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res = a ^ b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZ(res & 0x80000000,
+ !res);
+}
+
+void T_LSL_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF;
+ LSL_REG_S(a, b);
+ cpu->R[cpu->CurInstr & 0x7] = a;
+ cpu->SetNZ(a & 0x80000000,
+ !a);
+ cpu->Cycles += 1;
+}
+
+void T_LSR_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF;
+ LSR_REG_S(a, b);
+ cpu->R[cpu->CurInstr & 0x7] = a;
+ cpu->SetNZ(a & 0x80000000,
+ !a);
+ cpu->Cycles += 1;
+}
+
+void T_ASR_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF;
+ ASR_REG_S(a, b);
+ cpu->R[cpu->CurInstr & 0x7] = a;
+ cpu->SetNZ(a & 0x80000000,
+ !a);
+ cpu->Cycles += 1;
+}
+
+void T_ADC_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res_tmp = a + b;
+ u32 carry = (cpu->CPSR&0x20000000 ? 1:0);
+ u32 res = res_tmp + carry;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_ADD(a, b) | CARRY_ADD(res_tmp, carry),
+ OVERFLOW_ADD(a, b, res_tmp) | OVERFLOW_ADD(res_tmp, carry, res));
+}
+
+void T_SBC_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res_tmp = a - b;
+ u32 carry = (cpu->CPSR&0x20000000 ? 0:1);
+ u32 res = res_tmp - carry;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_SUB(a, b) & CARRY_SUB(res_tmp, carry),
+ OVERFLOW_SUB(a, b, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res));
+}
+
+void T_ROR_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF;
+ ROR_REG_S(a, b);
+ cpu->R[cpu->CurInstr & 0x7] = a;
+ cpu->SetNZ(a & 0x80000000,
+ !a);
+ cpu->Cycles += 1;
+}
+
+void T_TST_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res = a & b;
+ cpu->SetNZ(res & 0x80000000,
+ !res);
+}
+
+void T_NEG_REG(ARM* cpu)
+{
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res = -b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_SUB(0, b),
+ OVERFLOW_SUB(0, b, res));
+}
+
+void T_CMP_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res = a - b;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_SUB(a, b),
+ OVERFLOW_SUB(a, b, res));
+}
+
+void T_CMN_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res = a + b;
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_ADD(a, b),
+ OVERFLOW_ADD(a, b, res));
+}
+
+void T_ORR_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res = a | b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZ(res & 0x80000000,
+ !res);
+}
+
+void T_MUL_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res = a * b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZ(res & 0x80000000,
+ !res);
+
+ s32 cycles = 0;
+ if (cpu->Num == 0)
+ {
+ cycles += 3;
+ }
+ else
+ {
+ cpu->SetC(0); // carry flag destroyed, they say. whatever that means...
+ if (a & 0xFF000000) cycles += 4;
+ else if (a & 0x00FF0000) cycles += 3;
+ else if (a & 0x0000FF00) cycles += 2;
+ else cycles += 1;
+ }
+ cpu->Cycles += cycles;
+}
+
+void T_BIC_REG(ARM* cpu)
+{
+ u32 a = cpu->R[cpu->CurInstr & 0x7];
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res = a & ~b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZ(res & 0x80000000,
+ !res);
+}
+
+void T_MVN_REG(ARM* cpu)
+{
+ u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
+ u32 res = ~b;
+ cpu->R[cpu->CurInstr & 0x7] = res;
+ cpu->SetNZ(res & 0x80000000,
+ !res);
+}
+
+
+void T_ADD_HIREG(ARM* cpu)
+{
+ u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8);
+ u32 rs = (cpu->CurInstr >> 3) & 0xF;
+
+ u32 a = cpu->R[rd];
+ u32 b = cpu->R[rs];
+
+ if (rd == 15)
+ {
+ cpu->JumpTo((a + b) | 1);
+ }
+ else
+ {
+ cpu->R[rd] = a + b;
+ }
+}
+
+void T_CMP_HIREG(ARM* cpu)
+{
+ u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8);
+ u32 rs = (cpu->CurInstr >> 3) & 0xF;
+
+ u32 a = cpu->R[rd];
+ u32 b = cpu->R[rs];
+ u32 res = a - b;
+
+ cpu->SetNZCV(res & 0x80000000,
+ !res,
+ CARRY_SUB(a, b),
+ OVERFLOW_SUB(a, b, res));
+}
+
+void T_MOV_HIREG(ARM* cpu)
+{
+ u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8);
+ u32 rs = (cpu->CurInstr >> 3) & 0xF;
+
+ if (rd == 15)
+ {
+ cpu->JumpTo(cpu->R[rs] | 1);
+ }
+ else
+ {
+ cpu->R[rd] = cpu->R[rs];
+ }
+}
+
+
+void T_ADD_PCREL(ARM* cpu)
+{
+ u32 val = cpu->R[15] & ~2;
+ val += ((cpu->CurInstr & 0xFF) << 2);
+ cpu->R[(cpu->CurInstr >> 8) & 0x7] = val;
+}
+
+void T_ADD_SPREL(ARM* cpu)
+{
+ u32 val = cpu->R[13];
+ val += ((cpu->CurInstr & 0xFF) << 2);
+ cpu->R[(cpu->CurInstr >> 8) & 0x7] = val;
+}
+
+void T_ADD_SP(ARM* cpu)
+{
+ u32 val = cpu->R[13];
+ if (cpu->CurInstr & (1<<7))
+ val -= ((cpu->CurInstr & 0x7F) << 2);
+ else
+ val += ((cpu->CurInstr & 0x7F) << 2);
+ cpu->R[13] = val;
+}
+
+
+}
diff --git a/src/ARMInterpreter_ALU.h b/src/ARMInterpreter_ALU.h
new file mode 100644
index 0000000..4cc3760
--- /dev/null
+++ b/src/ARMInterpreter_ALU.h
@@ -0,0 +1,135 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef ARMINTERPRETER_ALU_H
+#define ARMINTERPRETER_ALU_H
+
+namespace ARMInterpreter
+{
+
+#define A_PROTO_ALU_OP(x) \
+\
+void A_##x##_IMM(ARM* cpu); \
+void A_##x##_REG_LSL_IMM(ARM* cpu); \
+void A_##x##_REG_LSR_IMM(ARM* cpu); \
+void A_##x##_REG_ASR_IMM(ARM* cpu); \
+void A_##x##_REG_ROR_IMM(ARM* cpu); \
+void A_##x##_REG_LSL_REG(ARM* cpu); \
+void A_##x##_REG_LSR_REG(ARM* cpu); \
+void A_##x##_REG_ASR_REG(ARM* cpu); \
+void A_##x##_REG_ROR_REG(ARM* cpu); \
+void A_##x##_IMM_S(ARM* cpu); \
+void A_##x##_REG_LSL_IMM_S(ARM* cpu); \
+void A_##x##_REG_LSR_IMM_S(ARM* cpu); \
+void A_##x##_REG_ASR_IMM_S(ARM* cpu); \
+void A_##x##_REG_ROR_IMM_S(ARM* cpu); \
+void A_##x##_REG_LSL_REG_S(ARM* cpu); \
+void A_##x##_REG_LSR_REG_S(ARM* cpu); \
+void A_##x##_REG_ASR_REG_S(ARM* cpu); \
+void A_##x##_REG_ROR_REG_S(ARM* cpu);
+
+#define A_PROTO_ALU_TEST(x) \
+\
+void A_##x##_IMM(ARM* cpu); \
+void A_##x##_REG_LSL_IMM(ARM* cpu); \
+void A_##x##_REG_LSR_IMM(ARM* cpu); \
+void A_##x##_REG_ASR_IMM(ARM* cpu); \
+void A_##x##_REG_ROR_IMM(ARM* cpu); \
+void A_##x##_REG_LSL_REG(ARM* cpu); \
+void A_##x##_REG_LSR_REG(ARM* cpu); \
+void A_##x##_REG_ASR_REG(ARM* cpu); \
+void A_##x##_REG_ROR_REG(ARM* cpu);
+
+A_PROTO_ALU_OP(AND)
+A_PROTO_ALU_OP(EOR)
+A_PROTO_ALU_OP(SUB)
+A_PROTO_ALU_OP(RSB)
+A_PROTO_ALU_OP(ADD)
+A_PROTO_ALU_OP(ADC)
+A_PROTO_ALU_OP(SBC)
+A_PROTO_ALU_OP(RSC)
+A_PROTO_ALU_TEST(TST)
+A_PROTO_ALU_TEST(TEQ)
+A_PROTO_ALU_TEST(CMP)
+A_PROTO_ALU_TEST(CMN)
+A_PROTO_ALU_OP(ORR)
+A_PROTO_ALU_OP(MOV)
+A_PROTO_ALU_OP(BIC)
+A_PROTO_ALU_OP(MVN)
+
+void A_MUL(ARM* cpu);
+void A_MLA(ARM* cpu);
+void A_UMULL(ARM* cpu);
+void A_UMLAL(ARM* cpu);
+void A_SMULL(ARM* cpu);
+void A_SMLAL(ARM* cpu);
+void A_SMLAxy(ARM* cpu);
+void A_SMLAWy(ARM* cpu);
+void A_SMULxy(ARM* cpu);
+void A_SMULWy(ARM* cpu);
+void A_SMLALxy(ARM* cpu);
+
+void A_CLZ(ARM* cpu);
+void A_QADD(ARM* cpu);
+void A_QSUB(ARM* cpu);
+void A_QDADD(ARM* cpu);
+void A_QDSUB(ARM* cpu);
+
+
+void T_LSL_IMM(ARM* cpu);
+void T_LSR_IMM(ARM* cpu);
+void T_ASR_IMM(ARM* cpu);
+
+void T_ADD_REG_(ARM* cpu);
+void T_SUB_REG_(ARM* cpu);
+void T_ADD_IMM_(ARM* cpu);
+void T_SUB_IMM_(ARM* cpu);
+
+void T_MOV_IMM(ARM* cpu);
+void T_CMP_IMM(ARM* cpu);
+void T_ADD_IMM(ARM* cpu);
+void T_SUB_IMM(ARM* cpu);
+
+void T_AND_REG(ARM* cpu);
+void T_EOR_REG(ARM* cpu);
+void T_LSL_REG(ARM* cpu);
+void T_LSR_REG(ARM* cpu);
+void T_ASR_REG(ARM* cpu);
+void T_ADC_REG(ARM* cpu);
+void T_SBC_REG(ARM* cpu);
+void T_ROR_REG(ARM* cpu);
+void T_TST_REG(ARM* cpu);
+void T_NEG_REG(ARM* cpu);
+void T_CMP_REG(ARM* cpu);
+void T_CMN_REG(ARM* cpu);
+void T_ORR_REG(ARM* cpu);
+void T_MUL_REG(ARM* cpu);
+void T_BIC_REG(ARM* cpu);
+void T_MVN_REG(ARM* cpu);
+
+void T_ADD_HIREG(ARM* cpu);
+void T_CMP_HIREG(ARM* cpu);
+void T_MOV_HIREG(ARM* cpu);
+
+void T_ADD_PCREL(ARM* cpu);
+void T_ADD_SPREL(ARM* cpu);
+void T_ADD_SP(ARM* cpu);
+
+}
+
+#endif
diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp
new file mode 100644
index 0000000..88f316d
--- /dev/null
+++ b/src/ARMInterpreter_Branch.cpp
@@ -0,0 +1,116 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include "ARM.h"
+
+
+namespace ARMInterpreter
+{
+
+
+void A_B(ARM* cpu)
+{
+ s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
+ cpu->JumpTo(cpu->R[15] + offset);
+}
+
+void A_BL(ARM* cpu)
+{
+ s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
+ cpu->R[14] = cpu->R[15] - 4;
+ cpu->JumpTo(cpu->R[15] + offset);
+}
+
+void A_BLX_IMM(ARM* cpu)
+{
+ s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
+ if (cpu->CurInstr & 0x01000000) offset += 2;
+ cpu->R[14] = cpu->R[15] - 4;
+ cpu->JumpTo(cpu->R[15] + offset + 1);
+}
+
+void A_BX(ARM* cpu)
+{
+ cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
+}
+
+void A_BLX_REG(ARM* cpu)
+{
+ u32 lr = cpu->R[15] - 4;
+ cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
+ cpu->R[14] = lr;
+}
+
+
+
+void T_BCOND(ARM* cpu)
+{
+ if (cpu->CheckCondition((cpu->CurInstr >> 8) & 0xF))
+ {
+ s32 offset = (s32)(cpu->CurInstr << 24) >> 23;
+ cpu->JumpTo(cpu->R[15] + offset + 1);
+ }
+}
+
+void T_BX(ARM* cpu)
+{
+ cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]);
+}
+
+void T_BLX_REG(ARM* cpu)
+{
+ if (cpu->Num==1)
+ {
+ printf("!! THUMB BLX_REG ON ARM7\n");
+ return;
+ }
+
+ u32 lr = cpu->R[15] - 1;
+ cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]);
+ cpu->R[14] = lr;
+}
+
+void T_B(ARM* cpu)
+{
+ s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 20;
+ cpu->JumpTo(cpu->R[15] + offset + 1);
+}
+
+void T_BL_LONG_1(ARM* cpu)
+{
+ s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 9;
+ cpu->R[14] = cpu->R[15] + offset;
+}
+
+void T_BL_LONG_2(ARM* cpu)
+{
+ s32 offset = (cpu->CurInstr & 0x7FF) << 1;
+ u32 pc = cpu->R[14] + offset;
+ cpu->R[14] = (cpu->R[15] - 2) | 1;
+
+ if ((cpu->Num==1) || (cpu->CurInstr & (1<<12)))
+ pc |= 1;
+
+ cpu->JumpTo(pc);
+}
+
+
+
+}
+
diff --git a/src/ARMInterpreter_Branch.h b/src/ARMInterpreter_Branch.h
new file mode 100644
index 0000000..202f490
--- /dev/null
+++ b/src/ARMInterpreter_Branch.h
@@ -0,0 +1,39 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef ARMINTERPRETER_BRANCH_H
+#define ARMINTERPRETER_BRANCH_H
+
+namespace ARMInterpreter
+{
+
+void A_B(ARM* cpu);
+void A_BL(ARM* cpu);
+void A_BX(ARM* cpu);
+void A_BLX_REG(ARM* cpu);
+
+void T_BCOND(ARM* cpu);
+void T_BX(ARM* cpu);
+void T_BLX_REG(ARM* cpu);
+void T_B(ARM* cpu);
+void T_BL_LONG_1(ARM* cpu);
+void T_BL_LONG_2(ARM* cpu);
+
+}
+
+#endif
diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp
new file mode 100644
index 0000000..ccbee34
--- /dev/null
+++ b/src/ARMInterpreter_LoadStore.cpp
@@ -0,0 +1,729 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include "ARM.h"
+
+
+namespace ARMInterpreter
+{
+
+
+// copypasta from ALU. bad
+#define LSL_IMM(x, s) \
+ x <<= s;
+
+#define LSR_IMM(x, s) \
+ if (s == 0) x = 0; \
+ else x >>= s;
+
+#define ASR_IMM(x, s) \
+ if (s == 0) x = ((s32)x) >> 31; \
+ else x = ((s32)x) >> s;
+
+#define ROR_IMM(x, s) \
+ if (s == 0) \
+ { \
+ x = (x >> 1) | ((cpu->CPSR & 0x20000000) << 2); \
+ } \
+ else \
+ { \
+ x = ROR(x, s); \
+ }
+
+
+
+#define A_WB_CALC_OFFSET_IMM \
+ u32 offset = (cpu->CurInstr & 0xFFF); \
+ if (!(cpu->CurInstr & (1<<23))) offset = -offset;
+
+#define A_WB_CALC_OFFSET_REG(shiftop) \
+ u32 offset = cpu->R[cpu->CurInstr & 0xF]; \
+ u32 shift = ((cpu->CurInstr>>7)&0x1F); \
+ shiftop(offset, shift); \
+ if (!(cpu->CurInstr & (1<<23))) offset = -offset;
+
+
+
+#define A_STR \
+ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \
+ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
+
+#define A_STR_POST \
+ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ cpu->DataWrite32(addr, cpu->R[(cpu->CurInstr>>12) & 0xF], cpu->CurInstr & (1<<21)); \
+ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
+
+#define A_STRB \
+ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ cpu->DataWrite8(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \
+ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
+
+#define A_STRB_POST \
+ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ cpu->DataWrite8(addr, cpu->R[(cpu->CurInstr>>12) & 0xF], cpu->CurInstr & (1<<21)); \
+ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
+
+#define A_LDR \
+ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 val = cpu->DataRead32(offset); val = ROR(val, ((offset&0x3)<<3)); \
+ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
+ cpu->Cycles += 1; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ if (cpu->Num==1) val &= ~0x1; \
+ cpu->JumpTo(val); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
+ }
+
+#define A_LDR_POST \
+ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 val = cpu->DataRead32(addr, cpu->CurInstr & (1<<21)); val = ROR(val, ((addr&0x3)<<3)); \
+ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
+ cpu->Cycles += 1; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) \
+ { \
+ if (cpu->Num==1) val &= ~0x1; \
+ cpu->JumpTo(val); \
+ } \
+ else \
+ { \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
+ }
+
+#define A_LDRB \
+ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 val = cpu->DataRead8(offset); \
+ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
+ cpu->Cycles += 1; \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \
+
+#define A_LDRB_POST \
+ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ u32 val = cpu->DataRead8(addr, cpu->CurInstr & (1<<21)); \
+ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
+ cpu->Cycles += 1; \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \
+
+
+
+#define A_IMPLEMENT_WB_LDRSTR(x) \
+\
+void A_##x##_IMM(ARM* cpu) \
+{ \
+ A_WB_CALC_OFFSET_IMM \
+ A_##x \
+} \
+\
+void A_##x##_REG_LSL(ARM* cpu) \
+{ \
+ A_WB_CALC_OFFSET_REG(LSL_IMM) \
+ A_##x \
+} \
+\
+void A_##x##_REG_LSR(ARM* cpu) \
+{ \
+ A_WB_CALC_OFFSET_REG(LSR_IMM) \
+ A_##x \
+} \
+\
+void A_##x##_REG_ASR(ARM* cpu) \
+{ \
+ A_WB_CALC_OFFSET_REG(ASR_IMM) \
+ A_##x \
+} \
+\
+void A_##x##_REG_ROR(ARM* cpu) \
+{ \
+ A_WB_CALC_OFFSET_REG(ROR_IMM) \
+ A_##x \
+} \
+\
+void A_##x##_POST_IMM(ARM* cpu) \
+{ \
+ A_WB_CALC_OFFSET_IMM \
+ A_##x##_POST \
+} \
+\
+void A_##x##_POST_REG_LSL(ARM* cpu) \
+{ \
+ A_WB_CALC_OFFSET_REG(LSL_IMM) \
+ A_##x##_POST \
+} \
+\
+void A_##x##_POST_REG_LSR(ARM* cpu) \
+{ \
+ A_WB_CALC_OFFSET_REG(LSR_IMM) \
+ A_##x##_POST \
+} \
+\
+void A_##x##_POST_REG_ASR(ARM* cpu) \
+{ \
+ A_WB_CALC_OFFSET_REG(ASR_IMM) \
+ A_##x##_POST \
+} \
+\
+void A_##x##_POST_REG_ROR(ARM* cpu) \
+{ \
+ A_WB_CALC_OFFSET_REG(ROR_IMM) \
+ A_##x##_POST \
+}
+
+A_IMPLEMENT_WB_LDRSTR(STR)
+A_IMPLEMENT_WB_LDRSTR(STRB)
+A_IMPLEMENT_WB_LDRSTR(LDR)
+A_IMPLEMENT_WB_LDRSTR(LDRB)
+
+
+
+#define A_HD_CALC_OFFSET_IMM \
+ u32 offset = (cpu->CurInstr & 0xF) | ((cpu->CurInstr >> 4) & 0xF0); \
+ if (!(cpu->CurInstr & (1<<23))) offset = -offset;
+
+#define A_HD_CALC_OFFSET_REG \
+ u32 offset = cpu->R[cpu->CurInstr & 0xF]; \
+ if (!(cpu->CurInstr & (1<<23))) offset = -offset;
+
+
+
+#define A_STRH \
+ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ cpu->DataWrite16(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \
+ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
+
+#define A_STRH_POST \
+ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ cpu->DataWrite16(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \
+ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
+
+// TODO: CHECK LDRD/STRD TIMINGS!! also, ARM9-only
+
+#define A_LDRD \
+ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
+ cpu->Cycles += 1; \
+ u32 r = (cpu->CurInstr>>12) & 0xF; \
+ cpu->R[r ] = cpu->DataRead32(offset ); \
+ cpu->R[r+1] = cpu->DataRead32(offset+4); \
+
+#define A_LDRD_POST \
+ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
+ cpu->Cycles += 1; \
+ u32 r = (cpu->CurInstr>>12) & 0xF; \
+ cpu->R[r ] = cpu->DataRead32(addr ); \
+ cpu->R[r+1] = cpu->DataRead32(addr+4); \
+
+#define A_STRD \
+ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
+ u32 r = (cpu->CurInstr>>12) & 0xF; \
+ cpu->DataWrite32(offset , cpu->R[r ]); \
+ cpu->DataWrite32(offset+4, cpu->R[r+1]); \
+
+#define A_STRD_POST \
+ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
+ u32 r = (cpu->CurInstr>>12) & 0xF; \
+ cpu->DataWrite32(offset , cpu->R[r ]); \
+ cpu->DataWrite32(offset+4, cpu->R[r+1]); \
+
+#define A_LDRH \
+ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = cpu->DataRead16(offset); \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \
+
+#define A_LDRH_POST \
+ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = cpu->DataRead16(addr); \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \
+
+#define A_LDRSB \
+ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->DataRead8(offset); \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \
+
+#define A_LDRSB_POST \
+ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->DataRead8(addr); \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \
+
+#define A_LDRSH \
+ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->DataRead16(offset); \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \
+
+#define A_LDRSH_POST \
+ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
+ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
+ cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->DataRead16(addr); \
+ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \
+
+
+#define A_IMPLEMENT_HD_LDRSTR(x) \
+\
+void A_##x##_IMM(ARM* cpu) \
+{ \
+ A_HD_CALC_OFFSET_IMM \
+ A_##x \
+} \
+\
+void A_##x##_REG(ARM* cpu) \
+{ \
+ A_HD_CALC_OFFSET_REG \
+ A_##x \
+} \
+void A_##x##_POST_IMM(ARM* cpu) \
+{ \
+ A_HD_CALC_OFFSET_IMM \
+ A_##x##_POST \
+} \
+\
+void A_##x##_POST_REG(ARM* cpu) \
+{ \
+ A_HD_CALC_OFFSET_REG \
+ A_##x##_POST \
+}
+
+A_IMPLEMENT_HD_LDRSTR(STRH)
+A_IMPLEMENT_HD_LDRSTR(LDRD)
+A_IMPLEMENT_HD_LDRSTR(STRD)
+A_IMPLEMENT_HD_LDRSTR(LDRH)
+A_IMPLEMENT_HD_LDRSTR(LDRSB)
+A_IMPLEMENT_HD_LDRSTR(LDRSH)
+
+
+
+void A_SWP(ARM* cpu)
+{
+ u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF];
+ u32 rm = cpu->R[cpu->CurInstr & 0xF];
+
+ u32 val = cpu->DataRead32(base);
+ cpu->R[(cpu->CurInstr >> 12) & 0xF] = ROR(val, 8*(base&0x3));
+
+ cpu->DataWrite32(base, rm);
+
+ cpu->Cycles += 1;
+}
+
+void A_SWPB(ARM* cpu)
+{
+ u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF];
+ u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF;
+
+ cpu->R[(cpu->CurInstr >> 12) & 0xF] = cpu->DataRead8(base);
+
+ cpu->DataWrite8(base, rm);
+
+ cpu->Cycles += 1;
+}
+
+
+
+void A_LDM(ARM* cpu)
+{
+ u32 baseid = (cpu->CurInstr >> 16) & 0xF;
+ u32 base = cpu->R[baseid];
+ u32 wbbase;
+ u32 preinc = (cpu->CurInstr & (1<<24));
+
+ if (!(cpu->CurInstr & (1<<23)))
+ {
+ for (int i = 0; i < 16; i++)
+ {
+ if (cpu->CurInstr & (1<<i))
+ base -= 4;
+ }
+
+ if (cpu->CurInstr & (1<<21))
+ {
+ // pre writeback
+ wbbase = base;
+ }
+
+ preinc = !preinc;
+ }
+
+ cpu->Cycles += 1;
+
+ if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
+ cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10);
+
+ for (int i = 0; i < 15; i++)
+ {
+ if (cpu->CurInstr & (1<<i))
+ {
+ if (preinc) base += 4;
+ cpu->R[i] = cpu->DataRead32(base);
+ if (!preinc) base += 4;
+ }
+ }
+
+ if (cpu->CurInstr & (1<<15))
+ {
+ if (preinc) base += 4;
+ u32 pc = cpu->DataRead32(base);
+ if (!preinc) base += 4;
+
+ if (cpu->Num == 1)
+ pc &= ~0x1;
+
+ cpu->JumpTo(pc, cpu->CurInstr & (1<<22));
+ }
+
+ if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
+ cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR);
+
+ if (cpu->CurInstr & (1<<21))
+ {
+ // post writeback
+ if (cpu->CurInstr & (1<<23))
+ wbbase = base;
+
+ if (cpu->CurInstr & (1 << baseid))
+ {
+ if (cpu->Num == 0)
+ {
+ u32 rlist = cpu->CurInstr & 0xFFFF;
+ if ((!(rlist & ~(1 << baseid))) || (rlist & ~((2 << baseid) - 1)))
+ cpu->R[baseid] = wbbase;
+ }
+ }
+ else
+ cpu->R[baseid] = wbbase;
+ }
+}
+
+void A_STM(ARM* cpu)
+{
+ u32 baseid = (cpu->CurInstr >> 16) & 0xF;
+ u32 base = cpu->R[baseid];
+ u32 oldbase = base;
+ u32 preinc = (cpu->CurInstr & (1<<24));
+
+ if (!(cpu->CurInstr & (1<<23)))
+ {
+ for (u32 i = 0; i < 16; i++)
+ {
+ if (cpu->CurInstr & (1<<i))
+ base -= 4;
+ }
+
+ if (cpu->CurInstr & (1<<21))
+ cpu->R[baseid] = base;
+
+ preinc = !preinc;
+ }
+
+ bool isbanked = false;
+ if (cpu->CurInstr & (1<<22))
+ {
+ u32 mode = (cpu->CPSR & 0x1F);
+ if (mode == 0x11)
+ isbanked = (baseid >= 8 && baseid < 15);
+ else if (mode != 0x10 && mode != 0x1F)
+ isbanked = (baseid >= 13 && baseid < 15);
+
+ cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10);
+ }
+
+ for (u32 i = 0; i < 16; i++)
+ {
+ if (cpu->CurInstr & (1<<i))
+ {
+ if (preinc) base += 4;
+
+ if (i == baseid && !isbanked)
+ {
+ if ((cpu->Num == 0) || (!(cpu->CurInstr & (i-1))))
+ cpu->DataWrite32(base, oldbase);
+ else
+ cpu->DataWrite32(base, base); // checkme
+ }
+ else
+ cpu->DataWrite32(base, cpu->R[i]);
+
+ if (!preinc) base += 4;
+ }
+ }
+
+ if (cpu->CurInstr & (1<<22))
+ cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR);
+
+ if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21)))
+ cpu->R[baseid] = base;
+}
+
+
+
+
+// ---- THUMB -----------------------
+
+
+
+void T_LDR_PCREL(ARM* cpu)
+{
+ u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2);
+ cpu->R[(cpu->CurInstr >> 8) & 0x7] = cpu->DataRead32(addr);
+
+ cpu->Cycles += 1;
+}
+
+
+void T_STR_REG(ARM* cpu)
+{
+ u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
+ cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7]);
+}
+
+void T_STRB_REG(ARM* cpu)
+{
+ u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
+ cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7]);
+}
+
+void T_LDR_REG(ARM* cpu)
+{
+ u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
+
+ u32 val = cpu->DataRead32(addr);
+ cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3));
+
+ cpu->Cycles += 1;
+}
+
+void T_LDRB_REG(ARM* cpu)
+{
+ u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
+ cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead8(addr);
+
+ cpu->Cycles += 1;
+}
+
+
+void T_STRH_REG(ARM* cpu)
+{
+ u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
+ cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7]);
+}
+
+void T_LDRSB_REG(ARM* cpu)
+{
+ u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
+ cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->DataRead8(addr);
+
+ cpu->Cycles += 1;
+}
+
+void T_LDRH_REG(ARM* cpu)
+{
+ u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
+ cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead16(addr);
+
+ cpu->Cycles += 1;
+}
+
+void T_LDRSH_REG(ARM* cpu)
+{
+ u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
+ cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->DataRead16(addr);
+
+ cpu->Cycles += 1;
+}
+
+
+void T_STR_IMM(ARM* cpu)
+{
+ u32 offset = (cpu->CurInstr >> 4) & 0x7C;
+ offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
+
+ cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]);
+}
+
+void T_LDR_IMM(ARM* cpu)
+{
+ u32 offset = (cpu->CurInstr >> 4) & 0x7C;
+ offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
+
+ u32 val = cpu->DataRead32(offset);
+ cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3));
+ cpu->Cycles += 1;
+}
+
+void T_STRB_IMM(ARM* cpu)
+{
+ u32 offset = (cpu->CurInstr >> 6) & 0x1F;
+ offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
+
+ cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7]);
+}
+
+void T_LDRB_IMM(ARM* cpu)
+{
+ u32 offset = (cpu->CurInstr >> 6) & 0x1F;
+ offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
+
+ cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead8(offset);
+ cpu->Cycles += 1;
+}
+
+
+void T_STRH_IMM(ARM* cpu)
+{
+ u32 offset = (cpu->CurInstr >> 5) & 0x3E;
+ offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
+
+ cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7]);
+}
+
+void T_LDRH_IMM(ARM* cpu)
+{
+ u32 offset = (cpu->CurInstr >> 5) & 0x3E;
+ offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
+
+ cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead16(offset);
+ cpu->Cycles += 1;
+}
+
+
+void T_STR_SPREL(ARM* cpu)
+{
+ u32 offset = (cpu->CurInstr << 2) & 0x3FC;
+ offset += cpu->R[13];
+
+ cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7]);
+}
+
+void T_LDR_SPREL(ARM* cpu)
+{
+ u32 offset = (cpu->CurInstr << 2) & 0x3FC;
+ offset += cpu->R[13];
+
+ cpu->R[(cpu->CurInstr >> 8) & 0x7] = cpu->DataRead32(offset);
+ cpu->Cycles += 1;
+}
+
+
+void T_PUSH(ARM* cpu)
+{
+ int nregs = 0;
+
+ for (int i = 0; i < 8; i++)
+ {
+ if (cpu->CurInstr & (1<<i))
+ nregs++;
+ }
+
+ if (cpu->CurInstr & (1<<8))
+ nregs++;
+
+ u32 base = cpu->R[13];
+ base -= (nregs<<2);
+ cpu->R[13] = base;
+
+ for (int i = 0; i < 8; i++)
+ {
+ if (cpu->CurInstr & (1<<i))
+ {
+ cpu->DataWrite32(base, cpu->R[i]);
+ base += 4;
+ }
+ }
+
+ if (cpu->CurInstr & (1<<8))
+ {
+ cpu->DataWrite32(base, cpu->R[14]);
+ }
+}
+
+void T_POP(ARM* cpu)
+{
+ u32 base = cpu->R[13];
+
+ cpu->Cycles += 1;
+
+ for (int i = 0; i < 8; i++)
+ {
+ if (cpu->CurInstr & (1<<i))
+ {
+ cpu->R[i] = cpu->DataRead32(base);
+ base += 4;
+ }
+ }
+
+ if (cpu->CurInstr & (1<<8))
+ {
+ u32 pc = cpu->DataRead32(base);
+ if (cpu->Num==1) pc |= 0x1;
+ cpu->JumpTo(pc);
+ base += 4;
+ }
+
+ cpu->R[13] = base;
+}
+
+void T_STMIA(ARM* cpu)
+{
+ u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7];
+
+ for (int i = 0; i < 8; i++)
+ {
+ if (cpu->CurInstr & (1<<i))
+ {
+ cpu->DataWrite32(base, cpu->R[i]);
+ base += 4;
+ }
+ }
+
+ // TODO: check "Rb included in Rlist" case
+ cpu->R[(cpu->CurInstr >> 8) & 0x7] = base;
+}
+
+void T_LDMIA(ARM* cpu)
+{
+ u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7];
+
+ cpu->Cycles += 1;
+
+ for (int i = 0; i < 8; i++)
+ {
+ if (cpu->CurInstr & (1<<i))
+ {
+ cpu->R[i] = cpu->DataRead32(base);
+ base += 4;
+ }
+ }
+
+ if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7))))
+ cpu->R[(cpu->CurInstr >> 8) & 0x7] = base;
+}
+
+
+}
+
diff --git a/src/ARMInterpreter_LoadStore.h b/src/ARMInterpreter_LoadStore.h
new file mode 100644
index 0000000..4ea0e54
--- /dev/null
+++ b/src/ARMInterpreter_LoadStore.h
@@ -0,0 +1,95 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef ARMINTERPRETER_LOADSTORE_H
+#define ARMINTERPRETER_LOADSTORE_H
+
+namespace ARMInterpreter
+{
+
+#define A_PROTO_WB_LDRSTR(x) \
+\
+void A_##x##_IMM(ARM* cpu); \
+void A_##x##_REG_LSL(ARM* cpu); \
+void A_##x##_REG_LSR(ARM* cpu); \
+void A_##x##_REG_ASR(ARM* cpu); \
+void A_##x##_REG_ROR(ARM* cpu); \
+void A_##x##_POST_IMM(ARM* cpu); \
+void A_##x##_POST_REG_LSL(ARM* cpu); \
+void A_##x##_POST_REG_LSR(ARM* cpu); \
+void A_##x##_POST_REG_ASR(ARM* cpu); \
+void A_##x##_POST_REG_ROR(ARM* cpu);
+
+A_PROTO_WB_LDRSTR(STR)
+A_PROTO_WB_LDRSTR(STRB)
+A_PROTO_WB_LDRSTR(LDR)
+A_PROTO_WB_LDRSTR(LDRB)
+
+#define A_PROTO_HD_LDRSTR(x) \
+\
+void A_##x##_IMM(ARM* cpu); \
+void A_##x##_REG(ARM* cpu); \
+void A_##x##_POST_IMM(ARM* cpu); \
+void A_##x##_POST_REG(ARM* cpu);
+
+A_PROTO_HD_LDRSTR(STRH)
+A_PROTO_HD_LDRSTR(LDRD)
+A_PROTO_HD_LDRSTR(STRD)
+A_PROTO_HD_LDRSTR(LDRH)
+A_PROTO_HD_LDRSTR(LDRSB)
+A_PROTO_HD_LDRSTR(LDRSH)
+
+void A_LDM(ARM* cpu);
+void A_STM(ARM* cpu);
+
+void A_SWP(ARM* cpu);
+void A_SWPB(ARM* cpu);
+
+
+void T_LDR_PCREL(ARM* cpu);
+
+void T_STR_REG(ARM* cpu);
+void T_STRB_REG(ARM* cpu);
+void T_LDR_REG(ARM* cpu);
+void T_LDRB_REG(ARM* cpu);
+
+void T_STRH_REG(ARM* cpu);
+void T_LDRSB_REG(ARM* cpu);
+void T_LDRH_REG(ARM* cpu);
+void T_LDRSH_REG(ARM* cpu);
+
+void T_STR_IMM(ARM* cpu);
+void T_LDR_IMM(ARM* cpu);
+void T_STRB_IMM(ARM* cpu);
+void T_LDRB_IMM(ARM* cpu);
+
+void T_STRH_IMM(ARM* cpu);
+void T_LDRH_IMM(ARM* cpu);
+
+void T_STR_SPREL(ARM* cpu);
+void T_LDR_SPREL(ARM* cpu);
+
+void T_PUSH(ARM* cpu);
+void T_POP(ARM* cpu);
+void T_STMIA(ARM* cpu);
+void T_LDMIA(ARM* cpu);
+
+}
+
+#endif
+
diff --git a/src/ARM_InstrTable.h b/src/ARM_InstrTable.h
new file mode 100644
index 0000000..830a2d0
--- /dev/null
+++ b/src/ARM_InstrTable.h
@@ -0,0 +1,1979 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+INSTRFUNC_PROTO(ARMInstrTable[4096]) =
+{
+ // 0000 0000 0000
+ A_AND_REG_LSL_IMM, A_AND_REG_LSL_REG, A_AND_REG_LSR_IMM, A_AND_REG_LSR_REG,
+ A_AND_REG_ASR_IMM, A_AND_REG_ASR_REG, A_AND_REG_ROR_IMM, A_AND_REG_ROR_REG,
+ A_AND_REG_LSL_IMM, A_MUL, A_AND_REG_LSR_IMM, A_STRH_POST_REG,
+ A_AND_REG_ASR_IMM, A_LDRD_POST_REG, A_AND_REG_ROR_IMM, A_STRD_POST_REG,
+
+ // 0000 0001 0000
+ A_AND_REG_LSL_IMM_S, A_AND_REG_LSL_REG_S, A_AND_REG_LSR_IMM_S, A_AND_REG_LSR_REG_S,
+ A_AND_REG_ASR_IMM_S, A_AND_REG_ASR_REG_S, A_AND_REG_ROR_IMM_S, A_AND_REG_ROR_REG_S,
+ A_AND_REG_LSL_IMM_S, A_MUL, A_AND_REG_LSR_IMM_S, A_LDRH_POST_REG,
+ A_AND_REG_ASR_IMM_S, A_LDRSB_POST_REG, A_AND_REG_ROR_IMM_S, A_LDRSH_POST_REG,
+
+ // 0000 0010 0000
+ A_EOR_REG_LSL_IMM, A_EOR_REG_LSL_REG, A_EOR_REG_LSR_IMM, A_EOR_REG_LSR_REG,
+ A_EOR_REG_ASR_IMM, A_EOR_REG_ASR_REG, A_EOR_REG_ROR_IMM, A_EOR_REG_ROR_REG,
+ A_EOR_REG_LSL_IMM, A_MLA, A_EOR_REG_LSR_IMM, A_UNK,
+ A_EOR_REG_ASR_IMM, A_UNK, A_EOR_REG_ROR_IMM, A_UNK,
+
+ // 0000 0011 0000
+ A_EOR_REG_LSL_IMM_S, A_EOR_REG_LSL_REG_S, A_EOR_REG_LSR_IMM_S, A_EOR_REG_LSR_REG_S,
+ A_EOR_REG_ASR_IMM_S, A_EOR_REG_ASR_REG_S, A_EOR_REG_ROR_IMM_S, A_EOR_REG_ROR_REG_S,
+ A_EOR_REG_LSL_IMM_S, A_MLA, A_EOR_REG_ROR_IMM_S, A_UNK,
+ A_EOR_REG_ASR_IMM_S, A_UNK, A_EOR_REG_ROR_IMM_S, A_UNK,
+
+ // 0000 0100 0000
+ A_SUB_REG_LSL_IMM, A_SUB_REG_LSL_REG, A_SUB_REG_LSR_IMM, A_SUB_REG_LSR_REG,
+ A_SUB_REG_ASR_IMM, A_SUB_REG_ASR_REG, A_SUB_REG_ROR_IMM, A_SUB_REG_ROR_REG,
+ A_SUB_REG_LSL_IMM, A_UNK, A_SUB_REG_LSR_IMM, A_STRH_POST_IMM,
+ A_SUB_REG_ASR_IMM, A_LDRD_POST_IMM, A_SUB_REG_ROR_IMM, A_STRD_POST_IMM,
+
+ // 0000 0101 0000
+ A_SUB_REG_LSL_IMM_S, A_SUB_REG_LSL_REG_S, A_SUB_REG_LSR_IMM_S, A_SUB_REG_LSR_REG_S,
+ A_SUB_REG_ASR_IMM_S, A_SUB_REG_ASR_REG_S, A_SUB_REG_ROR_IMM_S, A_SUB_REG_ROR_REG_S,
+ A_SUB_REG_LSL_IMM_S, A_UNK, A_SUB_REG_LSR_IMM_S, A_LDRH_POST_IMM,
+ A_SUB_REG_ASR_IMM_S, A_LDRSB_POST_IMM, A_SUB_REG_ROR_IMM_S, A_LDRSH_POST_IMM,
+
+ // 0000 0110 0000
+ A_RSB_REG_LSL_IMM, A_RSB_REG_LSL_REG, A_RSB_REG_LSR_IMM, A_RSB_REG_LSR_REG,
+ A_RSB_REG_ASR_IMM, A_RSB_REG_ASR_REG, A_RSB_REG_ROR_IMM, A_RSB_REG_ROR_REG,
+ A_RSB_REG_LSL_IMM, A_UNK, A_RSB_REG_LSR_IMM, A_UNK,
+ A_RSB_REG_ASR_IMM, A_UNK, A_RSB_REG_ROR_IMM, A_UNK,
+
+ // 0000 0111 0000
+ A_RSB_REG_LSL_IMM_S, A_RSB_REG_LSL_REG_S, A_RSB_REG_LSR_IMM_S, A_RSB_REG_LSR_REG_S,
+ A_RSB_REG_ASR_IMM_S, A_RSB_REG_ASR_REG_S, A_RSB_REG_ROR_IMM_S, A_RSB_REG_ROR_REG_S,
+ A_RSB_REG_LSL_IMM_S, A_UNK, A_RSB_REG_LSR_IMM_S, A_UNK,
+ A_RSB_REG_ASR_IMM_S, A_UNK, A_RSB_REG_ROR_IMM_S, A_UNK,
+
+ // 0000 1000 0000
+ A_ADD_REG_LSL_IMM, A_ADD_REG_LSL_REG, A_ADD_REG_LSR_IMM, A_ADD_REG_LSR_REG,
+ A_ADD_REG_ASR_IMM, A_ADD_REG_ASR_REG, A_ADD_REG_ROR_IMM, A_ADD_REG_ROR_REG,
+ A_ADD_REG_LSL_IMM, A_UMULL, A_ADD_REG_LSR_IMM, A_STRH_POST_REG,
+ A_ADD_REG_ASR_IMM, A_LDRD_POST_REG, A_ADD_REG_ROR_IMM, A_STRD_POST_REG,
+
+ // 0000 1001 0000
+ A_ADD_REG_LSL_IMM_S, A_ADD_REG_LSL_REG_S, A_ADD_REG_LSR_IMM_S, A_ADD_REG_LSR_REG_S,
+ A_ADD_REG_ASR_IMM_S, A_ADD_REG_ASR_REG_S, A_ADD_REG_ROR_IMM_S, A_ADD_REG_ROR_REG_S,
+ A_ADD_REG_LSL_IMM_S, A_UMULL, A_ADD_REG_LSR_IMM_S, A_LDRH_POST_REG,
+ A_ADD_REG_ASR_IMM_S, A_LDRSB_POST_REG, A_ADD_REG_ROR_IMM_S, A_LDRSH_POST_REG,
+
+ // 0000 1010 0000
+ A_ADC_REG_LSL_IMM, A_ADC_REG_LSL_REG, A_ADC_REG_LSR_IMM, A_ADC_REG_LSR_REG,
+ A_ADC_REG_ASR_IMM, A_ADC_REG_ASR_REG, A_ADC_REG_ROR_IMM, A_ADC_REG_ROR_REG,
+ A_ADC_REG_LSL_IMM, A_UMLAL, A_ADC_REG_LSR_IMM, A_UNK,
+ A_ADC_REG_ASR_IMM, A_UNK, A_ADC_REG_ROR_IMM, A_UNK,
+
+ // 0000 1011 0000
+ A_ADC_REG_LSL_IMM_S, A_ADC_REG_LSL_REG_S, A_ADC_REG_LSR_IMM_S, A_ADC_REG_LSR_REG_S,
+ A_ADC_REG_ASR_IMM_S, A_ADC_REG_ASR_REG_S, A_ADC_REG_ROR_IMM_S, A_ADC_REG_ROR_REG_S,
+ A_ADC_REG_LSL_IMM_S, A_UMLAL, A_ADC_REG_LSR_IMM_S, A_UNK,
+ A_ADC_REG_ASR_IMM_S, A_UNK, A_ADC_REG_ROR_IMM_S, A_UNK,
+
+ // 0000 1100 0000
+ A_SBC_REG_LSL_IMM, A_SBC_REG_LSL_REG, A_SBC_REG_LSR_IMM, A_SBC_REG_LSR_REG,
+ A_SBC_REG_ASR_IMM, A_SBC_REG_ASR_REG, A_SBC_REG_ROR_IMM, A_SBC_REG_ROR_REG,
+ A_SBC_REG_LSL_IMM, A_SMULL, A_SBC_REG_LSR_IMM, A_STRH_POST_IMM,
+ A_SBC_REG_ASR_IMM, A_LDRD_POST_IMM, A_SBC_REG_ROR_IMM, A_STRD_POST_IMM,
+
+ // 0000 1101 0000
+ A_SBC_REG_LSL_IMM_S, A_SBC_REG_LSL_REG_S, A_SBC_REG_LSR_IMM_S, A_SBC_REG_LSR_REG_S,
+ A_SBC_REG_ASR_IMM_S, A_SBC_REG_ASR_REG_S, A_SBC_REG_ROR_IMM_S, A_SBC_REG_ROR_REG_S,
+ A_SBC_REG_LSL_IMM_S, A_SMULL, A_SBC_REG_LSR_IMM_S, A_LDRH_POST_IMM,
+ A_SBC_REG_ASR_IMM_S, A_LDRSB_POST_IMM, A_SBC_REG_ROR_IMM_S, A_LDRSH_POST_IMM,
+
+ // 0000 1110 0000
+ A_RSC_REG_LSL_IMM, A_RSC_REG_LSL_REG, A_RSC_REG_LSR_IMM, A_RSC_REG_LSR_REG,
+ A_RSC_REG_ASR_IMM, A_RSC_REG_ASR_REG, A_RSC_REG_ROR_IMM, A_RSC_REG_ROR_REG,
+ A_RSC_REG_LSL_IMM, A_SMLAL, A_RSC_REG_LSR_IMM, A_UNK,
+ A_RSC_REG_ASR_IMM, A_UNK, A_RSC_REG_ROR_IMM, A_UNK,
+
+ // 0000 1111 0000
+ A_RSC_REG_LSL_IMM_S, A_RSC_REG_LSL_REG_S, A_RSC_REG_LSR_IMM_S, A_RSC_REG_LSR_REG_S,
+ A_RSC_REG_ASR_IMM_S, A_RSC_REG_ASR_REG_S, A_RSC_REG_ROR_IMM_S, A_RSC_REG_ROR_REG_S,
+ A_RSC_REG_LSL_IMM_S, A_SMLAL, A_RSC_REG_LSR_IMM_S, A_UNK,
+ A_RSC_REG_ASR_IMM_S, A_UNK, A_RSC_REG_ROR_IMM_S, A_UNK,
+
+
+
+ // 0001 0000 0000
+ A_MRS, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_QADD, A_UNK, A_UNK,
+ A_SMLAxy, A_SWP, A_SMLAxy, A_STRH_REG,
+ A_SMLAxy, A_LDRD_REG, A_SMLAxy, A_STRD_REG,
+
+ // 0001 0001 0000
+ A_TST_REG_LSL_IMM, A_TST_REG_LSL_REG, A_TST_REG_LSR_IMM, A_TST_REG_LSR_REG,
+ A_TST_REG_ASR_IMM, A_TST_REG_ASR_REG, A_TST_REG_ROR_IMM, A_TST_REG_ROR_REG,
+ A_TST_REG_LSL_IMM, A_UNK, A_TST_REG_LSR_IMM, A_LDRH_REG,
+ A_TST_REG_ASR_IMM, A_LDRSB_REG, A_TST_REG_ROR_IMM, A_LDRSH_REG,
+
+ // 0001 0010 0000
+ A_MSR_REG, A_BX, A_UNK, A_BLX_REG,
+ A_UNK, A_QSUB, A_UNK, A_UNK,
+ A_SMLAWy, A_UNK, A_SMULWy, A_STRH_REG,
+ A_SMLAWy, A_LDRD_REG, A_SMULWy, A_STRD_REG,
+
+ // 0001 0011 0000
+ A_TEQ_REG_LSL_IMM, A_TEQ_REG_LSL_REG, A_TEQ_REG_LSR_IMM, A_TEQ_REG_LSR_REG,
+ A_TEQ_REG_ASR_IMM, A_TEQ_REG_ASR_REG, A_TEQ_REG_ROR_IMM, A_TEQ_REG_ROR_REG,
+ A_TEQ_REG_LSL_IMM, A_UNK, A_TEQ_REG_LSR_IMM, A_LDRH_REG,
+ A_TEQ_REG_ASR_IMM, A_LDRSB_REG, A_TEQ_REG_ROR_IMM, A_LDRSH_REG,
+
+ // 0001 0100 0000
+ A_MRS, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_QDADD, A_UNK, A_UNK,
+ A_SMLALxy, A_SWPB, A_SMLALxy, A_STRH_IMM,
+ A_SMLALxy, A_LDRD_IMM, A_SMLALxy, A_STRD_IMM,
+
+ // 0001 0101 0000
+ A_CMP_REG_LSL_IMM, A_CMP_REG_LSL_REG, A_CMP_REG_LSR_IMM, A_CMP_REG_LSR_REG,
+ A_CMP_REG_ASR_IMM, A_CMP_REG_ASR_REG, A_CMP_REG_ROR_IMM, A_CMP_REG_ROR_REG,
+ A_CMP_REG_LSL_IMM, A_UNK, A_CMP_REG_LSR_IMM, A_LDRH_IMM,
+ A_CMP_REG_ASR_IMM, A_LDRSB_IMM, A_CMP_REG_ROR_IMM, A_LDRSH_IMM,
+
+ // 0001 0110 0000
+ A_MSR_REG, A_CLZ, A_UNK, A_UNK,
+ A_UNK, A_QDSUB, A_UNK, A_UNK,
+ A_SMULxy, A_UNK, A_SMULxy, A_STRH_IMM,
+ A_SMULxy, A_LDRD_IMM, A_SMULxy, A_STRD_IMM,
+
+ // 0001 0111 0000
+ A_CMN_REG_LSL_IMM, A_CMN_REG_LSL_REG, A_CMN_REG_LSR_IMM, A_CMN_REG_LSR_REG,
+ A_CMN_REG_ASR_IMM, A_CMN_REG_ASR_REG, A_CMN_REG_ROR_IMM, A_CMN_REG_ROR_REG,
+ A_CMN_REG_LSL_IMM, A_UNK, A_CMN_REG_LSR_IMM, A_LDRH_IMM,
+ A_CMN_REG_ASR_IMM, A_LDRSB_IMM, A_CMN_REG_ROR_IMM, A_LDRSH_IMM,
+
+ // 0001 1000 0000
+ A_ORR_REG_LSL_IMM, A_ORR_REG_LSL_REG, A_ORR_REG_LSR_IMM, A_ORR_REG_LSR_REG,
+ A_ORR_REG_ASR_IMM, A_ORR_REG_ASR_REG, A_ORR_REG_ROR_IMM, A_ORR_REG_ROR_REG,
+ A_ORR_REG_LSL_IMM, A_UNK, A_ORR_REG_LSR_IMM, A_STRH_REG,
+ A_ORR_REG_ASR_IMM, A_LDRD_REG, A_ORR_REG_ROR_IMM, A_STRD_REG,
+
+ // 0001 1001 0000
+ A_ORR_REG_LSL_IMM_S, A_ORR_REG_LSL_REG_S, A_ORR_REG_LSR_IMM_S, A_ORR_REG_LSR_REG_S,
+ A_ORR_REG_ASR_IMM_S, A_ORR_REG_ASR_REG_S, A_ORR_REG_ROR_IMM_S, A_ORR_REG_ROR_REG_S,
+ A_ORR_REG_LSL_IMM_S, A_UNK, A_ORR_REG_LSR_IMM_S, A_LDRH_REG,
+ A_ORR_REG_ASR_IMM_S, A_LDRSB_REG, A_ORR_REG_ROR_IMM_S, A_LDRSH_REG,
+
+ // 0001 1010 0000
+ A_MOV_REG_LSL_IMM, A_MOV_REG_LSL_REG, A_MOV_REG_LSR_IMM, A_MOV_REG_LSR_REG,
+ A_MOV_REG_ASR_IMM, A_MOV_REG_ASR_REG, A_MOV_REG_ROR_IMM, A_MOV_REG_ROR_REG,
+ A_MOV_REG_LSL_IMM, A_UNK, A_MOV_REG_LSR_IMM, A_STRH_REG,
+ A_MOV_REG_ASR_IMM, A_LDRD_REG, A_MOV_REG_ROR_IMM, A_STRD_REG,
+
+ // 0001 1011 0000
+ A_MOV_REG_LSL_IMM_S, A_MOV_REG_LSL_REG_S, A_MOV_REG_LSR_IMM_S, A_MOV_REG_LSR_REG_S,
+ A_MOV_REG_ASR_IMM_S, A_MOV_REG_ASR_REG_S, A_MOV_REG_ROR_IMM_S, A_MOV_REG_ROR_REG_S,
+ A_MOV_REG_LSL_IMM_S, A_UNK, A_MOV_REG_LSR_IMM_S, A_LDRH_REG,
+ A_MOV_REG_ASR_IMM_S, A_LDRSB_REG, A_MOV_REG_ROR_IMM_S, A_LDRSH_REG,
+
+ // 0001 1100 0000
+ A_BIC_REG_LSL_IMM, A_BIC_REG_LSL_REG, A_BIC_REG_LSR_IMM, A_BIC_REG_LSR_REG,
+ A_BIC_REG_ASR_IMM, A_BIC_REG_ASR_REG, A_BIC_REG_ROR_IMM, A_BIC_REG_ROR_REG,
+ A_BIC_REG_LSL_IMM, A_UNK, A_BIC_REG_LSR_IMM, A_STRH_IMM,
+ A_BIC_REG_ASR_IMM, A_LDRD_IMM, A_BIC_REG_ROR_IMM, A_STRD_IMM,
+
+ // 0001 1101 0000
+ A_BIC_REG_LSL_IMM_S, A_BIC_REG_LSL_REG_S, A_BIC_REG_LSR_IMM_S, A_BIC_REG_LSR_REG_S,
+ A_BIC_REG_ASR_IMM_S, A_BIC_REG_ASR_REG_S, A_BIC_REG_ROR_IMM_S, A_BIC_REG_ROR_REG_S,
+ A_BIC_REG_LSL_IMM_S, A_UNK, A_BIC_REG_LSR_IMM_S, A_LDRH_IMM,
+ A_BIC_REG_ASR_IMM_S, A_LDRSB_IMM, A_BIC_REG_ROR_IMM_S, A_LDRSH_IMM,
+
+ // 0001 1110 0000
+ A_MVN_REG_LSL_IMM, A_MVN_REG_LSL_REG, A_MVN_REG_LSR_IMM, A_MVN_REG_LSR_REG,
+ A_MVN_REG_ASR_IMM, A_MVN_REG_ASR_REG, A_MVN_REG_ROR_IMM, A_MVN_REG_ROR_REG,
+ A_MVN_REG_LSL_IMM, A_UNK, A_MVN_REG_LSR_IMM, A_STRH_IMM,
+ A_MVN_REG_ASR_IMM, A_LDRD_IMM, A_MVN_REG_ROR_IMM, A_STRD_IMM,
+
+ // 0001 1111 0000
+ A_MVN_REG_LSL_IMM_S, A_MVN_REG_LSL_REG_S, A_MVN_REG_LSR_IMM_S, A_MVN_REG_LSR_REG_S,
+ A_MVN_REG_ASR_IMM_S, A_MVN_REG_ASR_REG_S, A_MVN_REG_ROR_IMM_S, A_MVN_REG_ROR_REG_S,
+ A_MVN_REG_LSL_IMM_S, A_UNK, A_MVN_REG_LSR_IMM_S, A_LDRH_IMM,
+ A_MVN_REG_ASR_IMM_S, A_LDRSB_IMM, A_MVN_REG_ROR_IMM_S, A_LDRSH_IMM,
+
+
+
+ // 0010 0000 0000
+ A_AND_IMM, A_AND_IMM, A_AND_IMM, A_AND_IMM,
+ A_AND_IMM, A_AND_IMM, A_AND_IMM, A_AND_IMM,
+ A_AND_IMM, A_AND_IMM, A_AND_IMM, A_AND_IMM,
+ A_AND_IMM, A_AND_IMM, A_AND_IMM, A_AND_IMM,
+
+ // 0010 0001 0000
+ A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S,
+ A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S,
+ A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S,
+ A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S,
+
+ // 0010 0010 0000
+ A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, A_EOR_IMM,
+ A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, A_EOR_IMM,
+ A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, A_EOR_IMM,
+ A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, A_EOR_IMM,
+
+ // 0010 0011 0000
+ A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S,
+ A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S,
+ A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S,
+ A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S,
+
+ // 0010 0100 0000
+ A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, A_SUB_IMM,
+ A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, A_SUB_IMM,
+ A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, A_SUB_IMM,
+ A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, A_SUB_IMM,
+
+ // 0010 0101 0000
+ A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S,
+ A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S,
+ A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S,
+ A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S,
+
+ // 0010 0110 0000
+ A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, A_RSB_IMM,
+ A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, A_RSB_IMM,
+ A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, A_RSB_IMM,
+ A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, A_RSB_IMM,
+
+ // 0010 0111 0000
+ A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S,
+ A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S,
+ A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S,
+ A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S,
+
+ // 0010 1000 0000
+ A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, A_ADD_IMM,
+ A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, A_ADD_IMM,
+ A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, A_ADD_IMM,
+ A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, A_ADD_IMM,
+
+ // 0010 1001 0000
+ A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S,
+ A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S,
+ A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S,
+ A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S,
+
+ // 0010 1010 0000
+ A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, A_ADC_IMM,
+ A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, A_ADC_IMM,
+ A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, A_ADC_IMM,
+ A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, A_ADC_IMM,
+
+ // 0010 1011 0000
+ A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S,
+ A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S,
+ A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S,
+ A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S,
+
+ // 0010 1100 0000
+ A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, A_SBC_IMM,
+ A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, A_SBC_IMM,
+ A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, A_SBC_IMM,
+ A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, A_SBC_IMM,
+
+ // 0010 1101 0000
+ A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S,
+ A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S,
+ A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S,
+ A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S,
+
+ // 0010 1110 0000
+ A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, A_RSC_IMM,
+ A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, A_RSC_IMM,
+ A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, A_RSC_IMM,
+ A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, A_RSC_IMM,
+
+ // 0010 1111 0000
+ A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S,
+ A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S,
+ A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S,
+ A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S,
+
+
+
+ // 0011 0000 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 0011 0001 0000
+ A_TST_IMM, A_TST_IMM, A_TST_IMM, A_TST_IMM,
+ A_TST_IMM, A_TST_IMM, A_TST_IMM, A_TST_IMM,
+ A_TST_IMM, A_TST_IMM, A_TST_IMM, A_TST_IMM,
+ A_TST_IMM, A_TST_IMM, A_TST_IMM, A_TST_IMM,
+
+ // 0011 0010 0000
+ A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM,
+ A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM,
+ A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM,
+ A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM,
+
+ // 0011 0011 0000
+ A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM,
+ A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM,
+ A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM,
+ A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM,
+
+ // 0011 0100 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 0011 0101 0000
+ A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, A_CMP_IMM,
+ A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, A_CMP_IMM,
+ A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, A_CMP_IMM,
+ A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, A_CMP_IMM,
+
+ // 0011 0110 0000
+ A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM,
+ A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM,
+ A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM,
+ A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM,
+
+ // 0011 0111 0000
+ A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, A_CMN_IMM,
+ A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, A_CMN_IMM,
+ A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, A_CMN_IMM,
+ A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, A_CMN_IMM,
+
+ // 0011 1000 0000
+ A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, A_ORR_IMM,
+ A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, A_ORR_IMM,
+ A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, A_ORR_IMM,
+ A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, A_ORR_IMM,
+
+ // 0011 1001 0000
+ A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S,
+ A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S,
+ A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S,
+ A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S,
+
+ // 0011 1010 0000
+ A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, A_MOV_IMM,
+ A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, A_MOV_IMM,
+ A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, A_MOV_IMM,
+ A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, A_MOV_IMM,
+
+ // 0011 1011 0000
+ A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S,
+ A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S,
+ A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S,
+ A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S,
+
+ // 0011 1100 0000
+ A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, A_BIC_IMM,
+ A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, A_BIC_IMM,
+ A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, A_BIC_IMM,
+ A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, A_BIC_IMM,
+
+ // 0011 1101 0000
+ A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S,
+ A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S,
+ A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S,
+ A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S,
+
+ // 0011 1110 0000
+ A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, A_MVN_IMM,
+ A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, A_MVN_IMM,
+ A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, A_MVN_IMM,
+ A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, A_MVN_IMM,
+
+ // 0011 1111 0000
+ A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S,
+ A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S,
+ A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S,
+ A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S,
+
+
+
+ // 0100 0000 0000
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+
+ // 0100 0001 0000
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+
+ // 0100 0010 0000
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+
+ // 0100 0011 0000
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+
+ // 0100 0100 0000
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+
+ // 0100 0101 0000
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+
+ // 0100 0110 0000
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+
+ // 0100 0111 0000
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+
+ // 0100 1000 0000
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+
+ // 0100 1001 0000
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+
+ // 0100 1010 0000
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+ A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM,
+
+ // 0100 1011 0000
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+ A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM,
+
+ // 0100 1100 0000
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+
+ // 0100 1101 0000
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+
+ // 0100 1110 0000
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+ A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM,
+
+ // 0100 1111 0000
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+ A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM,
+
+
+
+ // 0101 0000 0000
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+
+ // 0101 0001 0000
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+
+ // 0101 0010 0000
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+
+ // 0101 0011 0000
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+
+ // 0101 0100 0000
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+
+ // 0101 0101 0000
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+
+ // 0101 0110 0000
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+
+ // 0101 0111 0000
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+
+ // 0101 1000 0000
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+
+ // 0101 1001 0000
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+
+ // 0101 1010 0000
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+ A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM,
+
+ // 0101 1011 0000
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+ A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM,
+
+ // 0101 1100 0000
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+
+ // 0101 1101 0000
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+
+ // 0101 1110 0000
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+ A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM,
+
+ // 0101 1111 0000
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+ A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM,
+
+
+
+ // 0110 0000 0000
+ A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK,
+ A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK,
+ A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK,
+ A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK,
+
+ // 0110 0001 0000
+ A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK,
+ A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK,
+ A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK,
+ A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK,
+
+ // 0110 0010 0000
+ A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK,
+ A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK,
+ A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK,
+ A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK,
+
+ // 0110 0011 0000
+ A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK,
+ A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK,
+ A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK,
+ A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK,
+
+ // 0110 0100 0000
+ A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK,
+ A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK,
+ A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK,
+ A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK,
+
+ // 0110 0101 0000
+ A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK,
+ A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK,
+ A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK,
+ A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK,
+
+ // 0110 0110 0000
+ A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK,
+ A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK,
+ A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK,
+ A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK,
+
+ // 0110 0111 0000
+ A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK,
+ A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK,
+ A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK,
+ A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK,
+
+ // 0110 1000 0000
+ A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK,
+ A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK,
+ A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK,
+ A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK,
+
+ // 0110 1001 0000
+ A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK,
+ A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK,
+ A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK,
+ A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK,
+
+ // 0110 1010 0000
+ A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK,
+ A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK,
+ A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK,
+ A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK,
+
+ // 0110 1011 0000
+ A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK,
+ A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK,
+ A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK,
+ A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK,
+
+ // 0110 1100 0000
+ A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK,
+ A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK,
+ A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK,
+ A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK,
+
+ // 0110 1101 0000
+ A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK,
+ A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK,
+ A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK,
+ A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK,
+
+ // 0110 1110 0000
+ A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK,
+ A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK,
+ A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK,
+ A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK,
+
+ // 0110 1111 0000
+ A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK,
+ A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK,
+ A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK,
+ A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK,
+
+
+
+ // 0111 0000 0000
+ A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK,
+ A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK,
+ A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK,
+ A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK,
+
+ // 0111 0001 0000
+ A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK,
+ A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK,
+ A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK,
+ A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK,
+
+ // 0111 0010 0000
+ A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK,
+ A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK,
+ A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK,
+ A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK,
+
+ // 0111 0011 0000
+ A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK,
+ A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK,
+ A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK,
+ A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK,
+
+ // 0111 0100 0000
+ A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK,
+ A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK,
+ A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK,
+ A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK,
+
+ // 0111 0101 0000
+ A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK,
+ A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK,
+ A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK,
+ A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK,
+
+ // 0111 0110 0000
+ A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK,
+ A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK,
+ A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK,
+ A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK,
+
+ // 0111 0111 0000
+ A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK,
+ A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK,
+ A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK,
+ A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK,
+
+ // 0111 1000 0000
+ A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK,
+ A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK,
+ A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK,
+ A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK,
+
+ // 0111 1001 0000
+ A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK,
+ A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK,
+ A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK,
+ A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK,
+
+ // 0111 1010 0000
+ A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK,
+ A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK,
+ A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK,
+ A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK,
+
+ // 0111 1011 0000
+ A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK,
+ A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK,
+ A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK,
+ A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK,
+
+ // 0111 1100 0000
+ A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK,
+ A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK,
+ A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK,
+ A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK,
+
+ // 0111 1101 0000
+ A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK,
+ A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK,
+ A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK,
+ A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK,
+
+ // 0111 1110 0000
+ A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK,
+ A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK,
+ A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK,
+ A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK,
+
+ // 0111 1111 0000
+ A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK,
+ A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK,
+ A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK,
+ A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK,
+
+
+
+ // 1000 0000 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1000 0001 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1000 0010 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1000 0011 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1000 0100 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1000 0101 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1000 0110 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1000 0111 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1000 1000 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1000 1001 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1000 1010 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1000 1011 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1000 1100 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1000 1101 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1000 1110 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1000 1111 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+
+
+ // 1001 0000 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1001 0001 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1001 0010 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1001 0011 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1001 0100 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1001 0101 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1001 0110 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1001 0111 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1001 1000 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1001 1001 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1001 1010 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1001 1011 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1001 1100 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1001 1101 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+ // 1001 1110 0000
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+ A_STM, A_STM, A_STM, A_STM,
+
+ // 1001 1111 0000
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+ A_LDM, A_LDM, A_LDM, A_LDM,
+
+
+
+ // 1010 0000 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 0001 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 0010 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 0011 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 0100 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 0101 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 0110 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 0111 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 1000 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 1001 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 1010 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 1011 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 1100 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 1101 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 1110 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+ // 1010 1111 0000
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+ A_B, A_B, A_B, A_B,
+
+
+
+ // 1011 0000 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 0001 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 0010 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 0011 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 0100 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 0101 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 0110 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 0111 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 1000 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 1001 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 1010 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 1011 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 1100 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 1101 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 1110 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+ // 1011 1111 0000
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+ A_BL, A_BL, A_BL, A_BL,
+
+
+
+ // 1100 0000 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 0001 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 0010 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 0011 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 0100 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 0101 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 0110 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 0111 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 1000 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 1001 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 1010 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 1011 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 1100 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 1101 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 1110 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1100 1111 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+
+
+ // 1101 0000 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 0001 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 0010 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 0011 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 0100 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 0101 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 0110 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 0111 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 1000 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 1001 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 1010 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 1011 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 1100 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 1101 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 1110 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+ // 1101 1111 0000
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+ A_UNK, A_UNK, A_UNK, A_UNK,
+
+
+
+ // 1110 0000 0000
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+
+ // 1110 0001 0000
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+
+ // 1110 0010 0000
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+
+ // 1110 0011 0000
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+
+ // 1110 0100 0000
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+
+ // 1110 0101 0000
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+
+ // 1110 0110 0000
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+
+ // 1110 0111 0000
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+
+ // 1110 1000 0000
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+
+ // 1110 1001 0000
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+
+ // 1110 1010 0000
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+
+ // 1110 1011 0000
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+
+ // 1110 1100 0000
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+
+ // 1110 1101 0000
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+
+ // 1110 1110 0000
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+ A_UNK, A_MCR, A_UNK, A_MCR,
+
+ // 1110 1111 0000
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+ A_UNK, A_MRC, A_UNK, A_MRC,
+
+
+
+ // 1111 0000 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 0001 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 0010 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 0011 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 0100 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 0101 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 0110 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 0111 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 1000 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 1001 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 1010 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 1011 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 1100 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 1101 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 1110 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+
+ // 1111 1111 0000
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC,
+ A_SVC, A_SVC, A_SVC, A_SVC
+};
+
+INSTRFUNC_PROTO(THUMBInstrTable[1024]) =
+{
+ // 0000 0000 00
+ T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM,
+ T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM,
+ T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM,
+ T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM,
+
+ // 0000 0100 00
+ T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM,
+ T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM,
+ T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM,
+ T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM,
+
+ // 0000 1000 00
+ T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM,
+ T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM,
+ T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM,
+ T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM,
+
+ // 0000 1100 00
+ T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM,
+ T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM,
+ T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM,
+ T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM,
+
+ // 0001 0000 00
+ T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM,
+ T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM,
+ T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM,
+ T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM,
+
+ // 0001 0100 00
+ T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM,
+ T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM,
+ T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM,
+ T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM,
+
+ // 0001 1000 00
+ T_ADD_REG_, T_ADD_REG_, T_ADD_REG_, T_ADD_REG_,
+ T_ADD_REG_, T_ADD_REG_, T_ADD_REG_, T_ADD_REG_,
+ T_SUB_REG_, T_SUB_REG_, T_SUB_REG_, T_SUB_REG_,
+ T_SUB_REG_, T_SUB_REG_, T_SUB_REG_, T_SUB_REG_,
+
+ // 0001 1100 00
+ T_ADD_IMM_, T_ADD_IMM_, T_ADD_IMM_, T_ADD_IMM_,
+ T_ADD_IMM_, T_ADD_IMM_, T_ADD_IMM_, T_ADD_IMM_,
+ T_SUB_IMM_, T_SUB_IMM_, T_SUB_IMM_, T_SUB_IMM_,
+ T_SUB_IMM_, T_SUB_IMM_, T_SUB_IMM_, T_SUB_IMM_,
+
+ // 0010 0000 00
+ T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM,
+ T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM,
+ T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM,
+ T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM,
+
+ // 0010 0100 00
+ T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM,
+ T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM,
+ T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM,
+ T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM,
+
+ // 0010 1000 00
+ T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM,
+ T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM,
+ T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM,
+ T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM,
+
+ // 0010 1100 00
+ T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM,
+ T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM,
+ T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM,
+ T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM,
+
+ // 0011 0000 00
+ T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM,
+ T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM,
+ T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM,
+ T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM,
+
+ // 0011 0100 00
+ T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM,
+ T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM,
+ T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM,
+ T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM,
+
+ // 0011 1000 00
+ T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM,
+ T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM,
+ T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM,
+ T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM,
+
+ // 0011 1100 00
+ T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM,
+ T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM,
+ T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM,
+ T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM,
+
+
+
+ // 0100 0000 00
+ T_AND_REG, T_EOR_REG, T_LSL_REG, T_LSR_REG,
+ T_ASR_REG, T_ADC_REG, T_SBC_REG, T_ROR_REG,
+ T_TST_REG, T_NEG_REG, T_CMP_REG, T_CMN_REG,
+ T_ORR_REG, T_MUL_REG, T_BIC_REG, T_MVN_REG,
+
+ // 0100 0100 00
+ T_UNK, T_ADD_HIREG, T_ADD_HIREG, T_ADD_HIREG,
+ T_UNK, T_CMP_HIREG, T_CMP_HIREG, T_CMP_HIREG,
+ T_UNK, T_MOV_HIREG, T_MOV_HIREG, T_MOV_HIREG,
+ T_BX, T_BX, T_BLX_REG, T_BLX_REG,
+
+ // 0100 1000 00
+ T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL,
+ T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL,
+ T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL,
+ T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL,
+
+ // 0100 1100 00
+ T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL,
+ T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL,
+ T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL,
+ T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL,
+
+ // 0101 0000 00
+ T_STR_REG, T_STR_REG, T_STR_REG, T_STR_REG,
+ T_STR_REG, T_STR_REG, T_STR_REG, T_STR_REG,
+ T_STRH_REG, T_STRH_REG, T_STRH_REG, T_STRH_REG,
+ T_STRH_REG, T_STRH_REG, T_STRH_REG, T_STRH_REG,
+
+ // 0101 0100 00
+ T_STRB_REG, T_STRB_REG, T_STRB_REG, T_STRB_REG,
+ T_STRB_REG, T_STRB_REG, T_STRB_REG, T_STRB_REG,
+ T_LDRSB_REG, T_LDRSB_REG, T_LDRSB_REG, T_LDRSB_REG,
+ T_LDRSB_REG, T_LDRSB_REG, T_LDRSB_REG, T_LDRSB_REG,
+
+ // 0101 1000 00
+ T_LDR_REG, T_LDR_REG, T_LDR_REG, T_LDR_REG,
+ T_LDR_REG, T_LDR_REG, T_LDR_REG, T_LDR_REG,
+ T_LDRH_REG, T_LDRH_REG, T_LDRH_REG, T_LDRH_REG,
+ T_LDRH_REG, T_LDRH_REG, T_LDRH_REG, T_LDRH_REG,
+
+ // 0101 1100 00
+ T_LDRB_REG, T_LDRB_REG, T_LDRB_REG, T_LDRB_REG,
+ T_LDRB_REG, T_LDRB_REG, T_LDRB_REG, T_LDRB_REG,
+ T_LDRSH_REG, T_LDRSH_REG, T_LDRSH_REG, T_LDRSH_REG,
+ T_LDRSH_REG, T_LDRSH_REG, T_LDRSH_REG, T_LDRSH_REG,
+
+ // 0110 0000 00
+ T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM,
+ T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM,
+ T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM,
+ T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM,
+
+ // 0110 0100 00
+ T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM,
+ T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM,
+ T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM,
+ T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM,
+
+ // 0110 1000 00
+ T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM,
+ T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM,
+ T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM,
+ T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM,
+
+ // 0110 1100 00
+ T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM,
+ T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM,
+ T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM,
+ T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM,
+
+ // 0111 0000 00
+ T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM,
+ T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM,
+ T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM,
+ T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM,
+
+ // 0111 0100 00
+ T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM,
+ T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM,
+ T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM,
+ T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM,
+
+ // 0111 1000 00
+ T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM,
+ T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM,
+ T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM,
+ T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM,
+
+ // 0111 1100 00
+ T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM,
+ T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM,
+ T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM,
+ T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM,
+
+
+
+ // 1000 0000 00
+ T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM,
+ T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM,
+ T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM,
+ T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM,
+
+ // 1000 0100 00
+ T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM,
+ T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM,
+ T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM,
+ T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM,
+
+ // 1000 1000 00
+ T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM,
+ T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM,
+ T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM,
+ T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM,
+
+ // 1000 1100 00
+ T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM,
+ T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM,
+ T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM,
+ T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM,
+
+ // 1001 0000 00
+ T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL,
+ T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL,
+ T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL,
+ T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL,
+
+ // 1001 0100 00
+ T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL,
+ T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL,
+ T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL,
+ T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL,
+
+ // 1001 1000 00
+ T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL,
+ T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL,
+ T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL,
+ T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL,
+
+ // 1001 1100 00
+ T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL,
+ T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL,
+ T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL,
+ T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL,
+
+ // 1010 0000 00
+ T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL,
+ T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL,
+ T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL,
+ T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL,
+
+ // 1010 0100 00
+ T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL,
+ T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL,
+ T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL,
+ T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL,
+
+ // 1010 1000 00
+ T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL,
+ T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL,
+ T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL,
+ T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL,
+
+ // 1010 1100 00
+ T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL,
+ T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL,
+ T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL,
+ T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL,
+
+ // 1011 0000 00
+ T_ADD_SP, T_ADD_SP, T_ADD_SP, T_ADD_SP,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+
+ // 1011 0100 00
+ T_PUSH, T_PUSH, T_PUSH, T_PUSH,
+ T_PUSH, T_PUSH, T_PUSH, T_PUSH,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+
+ // 1011 1000 00
+ T_UNK, T_UNK, T_UNK, T_UNK,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+
+ // 1011 1100 00
+ T_POP, T_POP, T_POP, T_POP,
+ T_POP, T_POP, T_POP, T_POP,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+
+
+
+ // 1100 0000 00
+ T_STMIA, T_STMIA, T_STMIA, T_STMIA,
+ T_STMIA, T_STMIA, T_STMIA, T_STMIA,
+ T_STMIA, T_STMIA, T_STMIA, T_STMIA,
+ T_STMIA, T_STMIA, T_STMIA, T_STMIA,
+
+ // 1100 0100 00
+ T_STMIA, T_STMIA, T_STMIA, T_STMIA,
+ T_STMIA, T_STMIA, T_STMIA, T_STMIA,
+ T_STMIA, T_STMIA, T_STMIA, T_STMIA,
+ T_STMIA, T_STMIA, T_STMIA, T_STMIA,
+
+ // 1100 1000 00
+ T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA,
+ T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA,
+ T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA,
+ T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA,
+
+ // 1100 1100 00
+ T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA,
+ T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA,
+ T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA,
+ T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA,
+
+ // 1101 0000 00
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+
+ // 1101 0100 00
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+
+ // 1101 1000 00
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+
+ // 1101 1100 00
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_BCOND, T_BCOND, T_BCOND, T_BCOND,
+ T_UNK, T_UNK, T_UNK, T_UNK,
+ T_SVC, T_SVC, T_SVC, T_SVC,
+
+ // 1110 0000 00
+ T_B, T_B, T_B, T_B,
+ T_B, T_B, T_B, T_B,
+ T_B, T_B, T_B, T_B,
+ T_B, T_B, T_B, T_B,
+
+ // 1110 0100 00
+ T_B, T_B, T_B, T_B,
+ T_B, T_B, T_B, T_B,
+ T_B, T_B, T_B, T_B,
+ T_B, T_B, T_B, T_B,
+
+ // 1110 1000 00
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+
+ // 1110 1100 00
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+
+ // 1111 0000 00
+ T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1,
+ T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1,
+ T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1,
+ T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1,
+
+ // 1111 0100 00
+ T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1,
+ T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1,
+ T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1,
+ T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1,
+
+ // 1111 1000 00
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+
+ // 1111 1100 00
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2,
+ T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2
+};
diff --git a/src/CP15.cpp b/src/CP15.cpp
new file mode 100644
index 0000000..4d1fee6
--- /dev/null
+++ b/src/CP15.cpp
@@ -0,0 +1,300 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "NDS.h"
+#include "ARM.h"
+#include "CP15.h"
+
+
+// derp
+namespace NDS
+{
+extern ARM* ARM9;
+}
+
+namespace CP15
+{
+
+u32 Control;
+
+u32 DTCMSetting, ITCMSetting;
+
+u8 ITCM[0x8000];
+u32 ITCMSize;
+u8 DTCM[0x4000];
+u32 DTCMBase, DTCMSize;
+
+
+void Reset()
+{
+ Control = 0x78; // dunno
+
+ DTCMSetting = 0;
+ ITCMSetting = 0;
+
+ memset(ITCM, 0, 0x8000);
+ memset(DTCM, 0, 0x4000);
+
+ ITCMSize = 0;
+ DTCMBase = 0xFFFFFFFF;
+ DTCMSize = 0;
+}
+
+
+void UpdateDTCMSetting()
+{
+ if (Control & (1<<16))
+ {
+ DTCMBase = DTCMSetting & 0xFFFFF000;
+ DTCMSize = 0x200 << ((DTCMSetting >> 1) & 0x1F);
+ printf("DTCM [%08X] enabled at %08X, size %X\n", DTCMSetting, DTCMBase, DTCMSize);
+ }
+ else
+ {
+ DTCMBase = 0xFFFFFFFF;
+ DTCMSize = 0;
+ printf("DTCM disabled\n");
+ }
+}
+
+void UpdateITCMSetting()
+{
+ if (Control & (1<<18))
+ {
+ ITCMSize = 0x200 << ((ITCMSetting >> 1) & 0x1F);
+ printf("ITCM [%08X] enabled at %08X, size %X\n", ITCMSetting, 0, ITCMSize);
+ }
+ else
+ {
+ ITCMSize = 0;
+ printf("ITCM disabled\n");
+ }
+}
+
+
+void Write(u32 id, u32 val)
+{
+ //printf("CP15 write op %03X %08X %08X\n", id, val, NDS::ARM9->R[15]);
+
+ switch (id)
+ {
+ case 0x100:
+ val &= 0x000FF085;
+ Control &= ~0x000FF085;
+ Control |= val;
+ UpdateDTCMSetting();
+ UpdateITCMSetting();
+ return;
+
+
+ case 0x704:
+ case 0x782:
+ NDS::ARM9->Halt(1);
+ return;
+
+
+ case 0x761:
+ //printf("inval data cache %08X\n", val);
+ return;
+ case 0x762:
+ //printf("inval data cache SI\n");
+ return;
+
+ case 0x7A1:
+ //printf("flush data cache %08X\n", val);
+ return;
+ case 0x7A2:
+ //printf("flush data cache SI\n");
+ return;
+
+
+ case 0x910:
+ DTCMSetting = val;
+ UpdateDTCMSetting();
+ return;
+ case 0x911:
+ ITCMSetting = val;
+ UpdateITCMSetting();
+ return;
+ }
+
+ if ((id&0xF00)!=0x700)
+ printf("unknown CP15 write op %03X %08X\n", id, val);
+}
+
+u32 Read(u32 id)
+{
+ //printf("CP15 read op %03X %08X\n", id, NDS::ARM9->R[15]);
+
+ switch (id)
+ {
+ case 0x000: // CPU ID
+ case 0x003:
+ case 0x004:
+ case 0x005:
+ case 0x006:
+ case 0x007:
+ return 0x41059461;
+
+ case 0x001: // cache type
+ return 0x0F0D2112;
+
+ case 0x002: // TCM size
+ return (6 << 6) | (5 << 18);
+
+
+ case 0x100: // control reg
+ return Control;
+
+
+ case 0x910:
+ return DTCMSetting;
+ case 0x911:
+ return ITCMSetting;
+ }
+
+ printf("unknown CP15 read op %03X\n", id);
+ return 0;
+}
+
+
+// TCM are handled here.
+// TODO: later on, handle PU, and maybe caches
+
+bool HandleCodeRead16(u32 addr, u16* val)
+{
+ if (addr < ITCMSize)
+ {
+ *val = *(u16*)&ITCM[addr & 0x7FFF];
+ return true;
+ }
+
+ return false;
+}
+
+bool HandleCodeRead32(u32 addr, u32* val)
+{
+ if (addr < ITCMSize)
+ {
+ *val = *(u32*)&ITCM[addr & 0x7FFF];
+ return true;
+ }
+
+ return false;
+}
+
+
+bool HandleDataRead8(u32 addr, u8* val, u32 forceuser)
+{
+ if (addr < ITCMSize)
+ {
+ *val = *(u8*)&ITCM[addr & 0x7FFF];
+ return true;
+ }
+ if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
+ {
+ *val = *(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF];
+ return true;
+ }
+
+ return false;
+}
+
+bool HandleDataRead16(u32 addr, u16* val, u32 forceuser)
+{
+ if (addr < ITCMSize)
+ {
+ *val = *(u16*)&ITCM[addr & 0x7FFF];
+ return true;
+ }
+ if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
+ {
+ *val = *(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF];
+ return true;
+ }
+
+ return false;
+}
+
+bool HandleDataRead32(u32 addr, u32* val, u32 forceuser)
+{
+ if (addr < ITCMSize)
+ {
+ *val = *(u32*)&ITCM[addr & 0x7FFF];
+ return true;
+ }
+ if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
+ {
+ *val = *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF];
+ return true;
+ }
+
+ return false;
+}
+
+bool HandleDataWrite8(u32 addr, u8 val, u32 forceuser)
+{
+ if (addr < ITCMSize)
+ {
+ *(u8*)&ITCM[addr & 0x7FFF] = val;
+ return true;
+ }
+ if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
+ {
+ *(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
+ return true;
+ }
+
+ return false;
+}
+
+bool HandleDataWrite16(u32 addr, u16 val, u32 forceuser)
+{
+ if (addr < ITCMSize)
+ {
+ *(u16*)&ITCM[addr & 0x7FFF] = val;
+ return true;
+ }
+ if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
+ {
+ *(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
+ return true;
+ }
+
+ return false;
+}
+
+bool HandleDataWrite32(u32 addr, u32 val, u32 forceuser)
+{
+ if (addr < ITCMSize)
+ {
+ *(u32*)&ITCM[addr & 0x7FFF] = val;
+ return true;
+ }
+ if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
+ {
+ *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
+ return true;
+ }
+
+ return false;
+}
+
+}
diff --git a/src/CP15.h b/src/CP15.h
new file mode 100644
index 0000000..eedea10
--- /dev/null
+++ b/src/CP15.h
@@ -0,0 +1,44 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef CP15_H
+#define CP15_H
+
+namespace CP15
+{
+
+void Reset();
+
+void UpdateDTCMSetting();
+void UpdateITCMSetting();
+
+void Write(u32 id, u32 val);
+u32 Read(u32 id);
+
+bool HandleCodeRead16(u32 addr, u16* val);
+bool HandleCodeRead32(u32 addr, u32* val);
+bool HandleDataRead8(u32 addr, u8* val, u32 forceuser=0);
+bool HandleDataRead16(u32 addr, u16* val, u32 forceuser=0);
+bool HandleDataRead32(u32 addr, u32* val, u32 forceuser=0);
+bool HandleDataWrite8(u32 addr, u8 val, u32 forceuser=0);
+bool HandleDataWrite16(u32 addr, u16 val, u32 forceuser=0);
+bool HandleDataWrite32(u32 addr, u32 val, u32 forceuser=0);
+
+}
+
+#endif
diff --git a/src/DMA.cpp b/src/DMA.cpp
new file mode 100644
index 0000000..b3e4f2f
--- /dev/null
+++ b/src/DMA.cpp
@@ -0,0 +1,269 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include "NDS.h"
+#include "DMA.h"
+#include "NDSCart.h"
+#include "GPU3D.h"
+
+
+// NOTES ON DMA SHIT
+//
+// * could use optimized code paths for common types of DMA transfers. for example, VRAM
+// * needs to eventually be made more accurate anyway. DMA isn't instant.
+
+
+DMA::DMA(u32 cpu, u32 num)
+{
+ CPU = cpu;
+ Num = num;
+
+ if (cpu == 0)
+ CountMask = 0x001FFFFF;
+ else
+ CountMask = (num==3 ? 0x0000FFFF : 0x00003FFF);
+
+ // TODO: merge with the one in ARM.cpp, somewhere
+ for (int i = 0; i < 16; i++)
+ {
+ Waitstates[0][i] = 1;
+ Waitstates[1][i] = 1;
+ }
+
+ if (!cpu)
+ {
+ // ARM9
+ // note: 33MHz cycles
+ Waitstates[0][0x2] = 1;
+ Waitstates[0][0x3] = 1;
+ Waitstates[0][0x4] = 1;
+ Waitstates[0][0x5] = 1;
+ Waitstates[0][0x6] = 1;
+ Waitstates[0][0x7] = 1;
+ Waitstates[0][0x8] = 6;
+ Waitstates[0][0x9] = 6;
+ Waitstates[0][0xA] = 10;
+ Waitstates[0][0xF] = 1;
+
+ Waitstates[1][0x2] = 2;
+ Waitstates[1][0x3] = 1;
+ Waitstates[1][0x4] = 1;
+ Waitstates[1][0x5] = 2;
+ Waitstates[1][0x6] = 2;
+ Waitstates[1][0x7] = 1;
+ Waitstates[1][0x8] = 12;
+ Waitstates[1][0x9] = 12;
+ Waitstates[1][0xA] = 10;
+ Waitstates[1][0xF] = 1;
+ }
+ else
+ {
+ // ARM7
+ Waitstates[0][0x0] = 1;
+ Waitstates[0][0x2] = 1;
+ Waitstates[0][0x3] = 1;
+ Waitstates[0][0x4] = 1;
+ Waitstates[0][0x6] = 1;
+ Waitstates[0][0x8] = 6;
+ Waitstates[0][0x9] = 6;
+ Waitstates[0][0xA] = 10;
+
+ Waitstates[1][0x0] = 1;
+ Waitstates[1][0x2] = 2;
+ Waitstates[1][0x3] = 1;
+ Waitstates[1][0x4] = 1;
+ Waitstates[1][0x6] = 2;
+ Waitstates[1][0x8] = 12;
+ Waitstates[1][0x9] = 12;
+ Waitstates[1][0xA] = 10;
+ }
+
+ Reset();
+}
+
+DMA::~DMA()
+{
+}
+
+void DMA::Reset()
+{
+ SrcAddr = 0;
+ DstAddr = 0;
+ Cnt = 0;
+
+ StartMode = 0;
+ CurSrcAddr = 0;
+ CurDstAddr = 0;
+ RemCount = 0;
+ IterCount = 0;
+ SrcAddrInc = 0;
+ DstAddrInc = 0;
+
+ Running = false;
+}
+
+void DMA::WriteCnt(u32 val)
+{
+ u32 oldcnt = Cnt;
+ Cnt = val;
+
+ if ((!(oldcnt & 0x80000000)) && (val & 0x80000000))
+ {
+ CurSrcAddr = SrcAddr;
+ CurDstAddr = DstAddr;
+
+ switch (Cnt & 0x00600000)
+ {
+ case 0x00000000: DstAddrInc = 1; break;
+ case 0x00200000: DstAddrInc = -1; break;
+ case 0x00400000: DstAddrInc = 0; break;
+ case 0x00600000: DstAddrInc = 1; break;
+ }
+
+ switch (Cnt & 0x01800000)
+ {
+ case 0x00000000: SrcAddrInc = 1; break;
+ case 0x00800000: SrcAddrInc = -1; break;
+ case 0x01000000: SrcAddrInc = 0; break;
+ case 0x01800000: SrcAddrInc = 1; printf("BAD DMA SRC INC MODE 3\n"); break;
+ }
+
+ if (CPU == 0)
+ StartMode = (Cnt >> 27) & 0x7;
+ else
+ StartMode = ((Cnt >> 28) & 0x3) | 0x10;
+
+ if ((StartMode & 0x7) == 0)
+ Start();
+ else if (StartMode == 0x07)
+ GPU3D::CheckFIFODMA();
+
+ if ((StartMode&7)!=0x00 && (StartMode&7)!=0x1 && StartMode!=2 && StartMode!=0x05 && StartMode!=0x12 && StartMode!=0x07)
+ printf("UNIMPLEMENTED ARM%d DMA%d START MODE %02X\n", CPU?7:9, Num, StartMode);
+ }
+}
+
+void DMA::Start()
+{
+ if (Running) return;
+
+ u32 countmask;
+ if (CPU == 0)
+ countmask = 0x001FFFFF;
+ else
+ countmask = (Num==3 ? 0x0000FFFF : 0x00003FFF);
+
+ RemCount = Cnt & countmask;
+ if (!RemCount)
+ RemCount = countmask+1;
+
+ if (StartMode == 0x07 && RemCount > 112)
+ IterCount = 112;
+ else
+ IterCount = RemCount;
+
+ if ((Cnt & 0x00600000) == 0x00600000)
+ CurDstAddr = DstAddr;
+
+ //printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16);
+
+ // special path for cart DMA. this is a gross hack.
+ // emulating it properly requires emulating cart transfer delays, so uh... TODO
+ if (CurSrcAddr==0x04100010 && RemCount==1 && (Cnt & 0x07E00000)==0x07000000 &&
+ StartMode==0x05 || StartMode==0x12)
+ {
+ NDSCart::DMA(CurDstAddr);
+ Cnt &= ~0x80000000;
+ if (Cnt & 0x40000000)
+ NDS::SetIRQ(CPU, NDS::IRQ_DMA0 + Num);
+ return;
+ }
+
+ // TODO eventually: not stop if we're running code in ITCM
+
+ Running = true;
+ NDS::StopCPU(CPU, 1<<Num);
+}
+
+s32 DMA::Run(s32 cycles)
+{
+ if (!Running)
+ return cycles;
+
+ if (!(Cnt & 0x04000000))
+ {
+ u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16;
+ void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16;
+
+ while (IterCount > 0 && cycles > 0)
+ {
+ writefn(CurDstAddr, readfn(CurSrcAddr));
+
+ cycles -= (Waitstates[0][(CurSrcAddr >> 24) & 0xF] + Waitstates[0][(CurDstAddr >> 24) & 0xF]);
+ CurSrcAddr += SrcAddrInc<<1;
+ CurDstAddr += DstAddrInc<<1;
+ IterCount--;
+ RemCount--;
+ }
+ }
+ else
+ {
+ u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;
+ void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32;
+
+ while (IterCount > 0 && cycles > 0)
+ {
+ writefn(CurDstAddr, readfn(CurSrcAddr));
+
+ cycles -= (Waitstates[1][(CurSrcAddr >> 24) & 0xF] + Waitstates[1][(CurDstAddr >> 24) & 0xF]);
+ CurSrcAddr += SrcAddrInc<<2;
+ CurDstAddr += DstAddrInc<<2;
+ IterCount--;
+ RemCount--;
+ }
+ }
+
+ if (RemCount)
+ {
+ Cnt &= ~CountMask;
+ Cnt |= RemCount;
+
+ if (IterCount == 0)
+ {
+ Running = false;
+ NDS::ResumeCPU(CPU, 1<<Num);
+
+ if (StartMode == 0x07)
+ GPU3D::CheckFIFODMA();
+ }
+
+ return cycles;
+ }
+
+ if (!(Cnt & 0x02000000))
+ Cnt &= ~0x80000000;
+
+ if (Cnt & 0x40000000)
+ NDS::SetIRQ(CPU, NDS::IRQ_DMA0 + Num);
+
+ Running = false;
+ NDS::ResumeCPU(CPU, 1<<Num);
+
+ return cycles - 2;
+}
diff --git a/src/DMA.h b/src/DMA.h
new file mode 100644
index 0000000..59a7f03
--- /dev/null
+++ b/src/DMA.h
@@ -0,0 +1,64 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef DMA_H
+#define DMA_H
+
+#include "types.h"
+
+class DMA
+{
+public:
+ DMA(u32 cpu, u32 num);
+ ~DMA();
+
+ void Reset();
+
+ void WriteCnt(u32 val);
+ void Start();
+
+ s32 Run(s32 cycles);
+
+ void StartIfNeeded(u32 mode)
+ {
+ if ((mode == StartMode) && (Cnt & 0x80000000))
+ Start();
+ }
+
+ u32 SrcAddr;
+ u32 DstAddr;
+ u32 Cnt;
+
+private:
+ u32 CPU, Num;
+
+ s32 Waitstates[2][16];
+
+ u32 StartMode;
+ u32 CurSrcAddr;
+ u32 CurDstAddr;
+ u32 RemCount;
+ u32 IterCount;
+ u32 SrcAddrInc;
+ u32 DstAddrInc;
+ u32 CountMask;
+
+ bool Running;
+};
+
+#endif
diff --git a/src/FIFO.h b/src/FIFO.h
new file mode 100644
index 0000000..4130b85
--- /dev/null
+++ b/src/FIFO.h
@@ -0,0 +1,93 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef FIFO_H
+#define FIFO_H
+
+#include "types.h"
+
+template<typename T>
+class FIFO
+{
+public:
+ FIFO(u32 num)
+ {
+ NumEntries = num;
+ Entries = new T[num];
+ Clear();
+ }
+
+ ~FIFO()
+ {
+ delete[] Entries;
+ }
+
+
+ void Clear()
+ {
+ NumOccupied = 0;
+ ReadPos = 0;
+ WritePos = 0;
+ memset(&Entries[ReadPos], 0, sizeof(T));
+ }
+
+
+ void Write(T val)
+ {
+ if (IsFull()) return;
+
+ Entries[WritePos] = val;
+
+ WritePos++;
+ if (WritePos >= NumEntries)
+ WritePos = 0;
+
+ NumOccupied++;
+ }
+
+ T Read()
+ {
+ T ret = Entries[ReadPos];
+ if (IsEmpty())
+ return ret;
+
+ ReadPos++;
+ if (ReadPos >= NumEntries)
+ ReadPos = 0;
+
+ NumOccupied--;
+ return ret;
+ }
+
+ T Peek()
+ {
+ return Entries[ReadPos];
+ }
+
+ u32 Level() { return NumOccupied; }
+ bool IsEmpty() { return NumOccupied == 0; }
+ bool IsFull() { return NumOccupied >= NumEntries; }
+
+private:
+ u32 NumEntries;
+ T* Entries;
+ u32 NumOccupied;
+ u32 ReadPos, WritePos;
+};
+
+#endif
diff --git a/src/GPU.cpp b/src/GPU.cpp
new file mode 100644
index 0000000..28c5d24
--- /dev/null
+++ b/src/GPU.cpp
@@ -0,0 +1,732 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "NDS.h"
+#include "GPU.h"
+
+
+namespace GPU
+{
+
+#define LINE_CYCLES (355*6)
+#define HBLANK_CYCLES (256*6)
+#define FRAME_CYCLES (LINE_CYCLES * 263)
+
+u16 VCount;
+
+u16 DispStat[2], VMatch[2];
+
+u8 Palette[2*1024];
+u8 OAM[2*1024];
+
+u8 VRAM_A[128*1024];
+u8 VRAM_B[128*1024];
+u8 VRAM_C[128*1024];
+u8 VRAM_D[128*1024];
+u8 VRAM_E[ 64*1024];
+u8 VRAM_F[ 16*1024];
+u8 VRAM_G[ 16*1024];
+u8 VRAM_H[ 32*1024];
+u8 VRAM_I[ 16*1024];
+u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I};
+
+u8 VRAMCNT[9];
+u8 VRAMSTAT;
+
+//u32 VRAM_Base[9];
+//u32 VRAM_Mask[9];
+
+u32 VRAMMap_LCDC;
+
+u32 VRAMMap_ABG[0x20];
+u32 VRAMMap_AOBJ[0x10];
+u32 VRAMMap_BBG[0x8];
+u32 VRAMMap_BOBJ[0x8];
+
+u32 VRAMMap_ABGExtPal[4];
+u32 VRAMMap_AOBJExtPal;
+u32 VRAMMap_BBGExtPal[4];
+u32 VRAMMap_BOBJExtPal;
+
+u32 VRAMMap_Texture[4];
+u32 VRAMMap_TexPal[6];
+
+u32 VRAMMap_ARM7[2];
+
+/*u8* VRAM_ABG[128];
+u8* VRAM_AOBJ[128];
+u8* VRAM_BBG[128];
+u8* VRAM_BOBJ[128];
+u8* VRAM_LCD[128];*/
+/*u8* VRAM_ARM7[2];
+
+u8* VRAM_ABGExtPal[4];
+u8* VRAM_AOBJExtPal;
+u8* VRAM_BBGExtPal[4];
+u8* VRAM_BOBJExtPal;*/
+
+u32 Framebuffer[256*192*2];
+
+GPU2D* GPU2D_A;
+GPU2D* GPU2D_B;
+
+
+bool Init()
+{
+ GPU2D_A = new GPU2D(0);
+ GPU2D_B = new GPU2D(1);
+ if (!GPU3D::Init()) return false;
+
+ return true;
+}
+
+void DeInit()
+{
+ delete GPU2D_A;
+ delete GPU2D_B;
+ GPU3D::DeInit();
+}
+
+void Reset()
+{
+ VCount = 0;
+
+ DispStat[0] = 0;
+ DispStat[1] = 0;
+ VMatch[0] = 0;
+ VMatch[1] = 0;
+
+ memset(Palette, 0, 2*1024);
+ memset(OAM, 0, 2*1024);
+
+ memset(VRAM_A, 0, 128*1024);
+ memset(VRAM_B, 0, 128*1024);
+ memset(VRAM_C, 0, 128*1024);
+ memset(VRAM_D, 0, 128*1024);
+ memset(VRAM_E, 0, 64*1024);
+ memset(VRAM_F, 0, 16*1024);
+ memset(VRAM_G, 0, 16*1024);
+ memset(VRAM_H, 0, 32*1024);
+ memset(VRAM_I, 0, 16*1024);
+
+ memset(VRAMCNT, 0, 9);
+ VRAMSTAT = 0;
+
+ VRAMMap_LCDC = 0;
+
+ memset(VRAMMap_ABG, 0, sizeof(VRAMMap_ABG));
+ memset(VRAMMap_AOBJ, 0, sizeof(VRAMMap_AOBJ));
+ memset(VRAMMap_BBG, 0, sizeof(VRAMMap_BBG));
+ memset(VRAMMap_BOBJ, 0, sizeof(VRAMMap_BOBJ));
+
+ memset(VRAMMap_ABGExtPal, 0, sizeof(VRAMMap_ABGExtPal));
+ VRAMMap_AOBJExtPal = 0;
+ memset(VRAMMap_BBGExtPal, 0, sizeof(VRAMMap_BBGExtPal));
+ VRAMMap_BOBJExtPal = 0;
+
+ memset(VRAMMap_Texture, 0, sizeof(VRAMMap_Texture));
+ memset(VRAMMap_TexPal, 0, sizeof(VRAMMap_TexPal));
+
+ VRAMMap_ARM7[0] = 0;
+ VRAMMap_ARM7[1] = 0;
+
+ //memset(VRAM_Base, 0, sizeof(VRAM_Base));
+ //memset(VRAM_Mask, 0, sizeof(VRAM_Mask));
+
+ /*memset(VRAM_ABG, 0, sizeof(u8*)*128);
+ memset(VRAM_AOBJ, 0, sizeof(u8*)*128);
+ memset(VRAM_BBG, 0, sizeof(u8*)*128);
+ memset(VRAM_BOBJ, 0, sizeof(u8*)*128);
+ memset(VRAM_LCD, 0, sizeof(u8*)*128);*/
+ /*memset(VRAM_ARM7, 0, sizeof(u8*)*2);
+
+ memset(VRAM_ABGExtPal, 0, sizeof(u8*)*4);
+ VRAM_AOBJExtPal = NULL;
+ memset(VRAM_BBGExtPal, 0, sizeof(u8*)*4);
+ VRAM_BOBJExtPal = NULL;*/
+
+ for (int i = 0; i < 256*192*2; i++)
+ {
+ Framebuffer[i] = 0xFFFFFFFF;
+ }
+
+ GPU2D_A->Reset();
+ GPU2D_B->Reset();
+ GPU3D::Reset();
+
+ GPU2D_A->SetFramebuffer(&Framebuffer[256*192]);
+ GPU2D_B->SetFramebuffer(&Framebuffer[256*0]);
+}
+
+
+// VRAM mapping notes
+//
+// mirroring:
+// unmapped range reads zero
+// LCD is mirrored every 0x100000 bytes, the gap between each mirror reads zero
+// ABG:
+// bank A,B,C,D,E mirror every 0x80000 bytes
+// bank F,G mirror at base+0x8000, mirror every 0x80000 bytes
+// AOBJ:
+// bank A,B,E mirror every 0x40000 bytes
+// bank F,G mirror at base+0x8000, mirror every 0x40000 bytes
+// BBG:
+// bank C mirrors every 0x20000 bytes
+// bank H mirrors every 0x10000 bytes
+// bank I mirrors at base+0x4000, mirrors every 0x10000 bytes
+// BOBJ:
+// bank D mirrors every 0x20000 bytes
+// bank I mirrors every 0x4000 bytes
+//
+// untested:
+// ARM7 (TODO)
+// extended palette (mirroring doesn't apply)
+// texture/texpal (does mirroring apply?)
+// -> trying to use extpal/texture/texpal with no VRAM mapped.
+// would likely read all black, but has to be tested.
+//
+// overlap:
+// when reading: values are read from each bank and ORed together
+// when writing: value is written to each bank
+
+#define MAP_RANGE(map, base, n) for (int i = 0; i < n; i++) map[(base)+i] |= bankmask;
+#define UNMAP_RANGE(map, base, n) for (int i = 0; i < n; i++) map[(base)+i] &= ~bankmask;
+
+void MapVRAM_AB(u32 bank, u8 cnt)
+{
+ u8 oldcnt = VRAMCNT[bank];
+ VRAMCNT[bank] = cnt;
+
+ if (oldcnt == cnt) return;
+
+ u8 oldofs = (oldcnt >> 3) & 0x3;
+ u8 ofs = (cnt >> 3) & 0x3;
+ u32 bankmask = 1 << bank;
+
+ if (oldcnt & (1<<7))
+ {
+ switch (oldcnt & 0x3)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC &= ~bankmask;
+ break;
+
+ case 1: // ABG
+ UNMAP_RANGE(VRAMMap_ABG, oldofs<<3, 8);
+ break;
+
+ case 2: // AOBJ
+ oldofs &= 0x1;
+ UNMAP_RANGE(VRAMMap_AOBJ, oldofs<<3, 8);
+ break;
+
+ case 3: // texture
+ VRAMMap_Texture[oldofs] &= ~bankmask;
+ break;
+ }
+ }
+
+ if (cnt & (1<<7))
+ {
+ switch (cnt & 0x3)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC |= bankmask;
+ break;
+
+ case 1: // ABG
+ MAP_RANGE(VRAMMap_ABG, ofs<<3, 8);
+ break;
+
+ case 2: // AOBJ
+ ofs &= 0x1;
+ MAP_RANGE(VRAMMap_AOBJ, ofs<<3, 8);
+ break;
+
+ case 3: // texture
+ VRAMMap_Texture[ofs] |= bankmask;
+ break;
+ }
+ }
+}
+
+void MapVRAM_CD(u32 bank, u8 cnt)
+{
+ u8 oldcnt = VRAMCNT[bank];
+ VRAMCNT[bank] = cnt;
+
+ VRAMSTAT &= ~(1 << (bank-2));
+
+ if (oldcnt == cnt) return;
+
+ u8 oldofs = (oldcnt >> 3) & 0x7;
+ u8 ofs = (cnt >> 3) & 0x7;
+ u32 bankmask = 1 << bank;
+
+ if (oldcnt & (1<<7))
+ {
+ switch (oldcnt & 0x7)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC &= ~bankmask;
+ break;
+
+ case 1: // ABG
+ UNMAP_RANGE(VRAMMap_ABG, oldofs<<3, 8);
+ break;
+
+ case 2: // ARM7 VRAM
+ oldofs &= 0x1;
+ VRAMMap_ARM7[oldofs] &= ~bankmask;
+ break;
+
+ case 3: // texture
+ VRAMMap_Texture[oldofs] &= ~bankmask;
+ break;
+
+ case 4: // BBG/BOBJ
+ if (bank == 2)
+ {
+ UNMAP_RANGE(VRAMMap_BBG, 0, 8);
+ }
+ else
+ {
+ UNMAP_RANGE(VRAMMap_BOBJ, 0, 8);
+ }
+ break;
+ }
+ }
+
+ if (cnt & (1<<7))
+ {
+ switch (cnt & 0x7)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC |= bankmask;
+ break;
+
+ case 1: // ABG
+ MAP_RANGE(VRAMMap_ABG, ofs<<3, 8);
+ break;
+
+ case 2: // ARM7 VRAM
+ ofs &= 0x1;
+ VRAMMap_ARM7[ofs] |= bankmask;
+ VRAMSTAT |= (1 << (bank-2));
+ break;
+
+ case 3: // texture
+ VRAMMap_Texture[ofs] |= bankmask;
+ break;
+
+ case 4: // BBG/BOBJ
+ if (bank == 2)
+ {
+ MAP_RANGE(VRAMMap_BBG, 0, 8);
+ }
+ else
+ {
+ MAP_RANGE(VRAMMap_BOBJ, 0, 8);
+ }
+ break;
+ }
+ }
+}
+
+void MapVRAM_E(u32 bank, u8 cnt)
+{
+ u8 oldcnt = VRAMCNT[bank];
+ VRAMCNT[bank] = cnt;
+
+ if (oldcnt == cnt) return;
+
+ u32 bankmask = 1 << bank;
+
+ if (oldcnt & (1<<7))
+ {
+ switch (oldcnt & 0x7)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC &= ~bankmask;
+ break;
+
+ case 1: // ABG
+ UNMAP_RANGE(VRAMMap_ABG, 0, 4);
+ break;
+
+ case 2: // AOBJ
+ UNMAP_RANGE(VRAMMap_AOBJ, 0, 4);
+ break;
+
+ case 3: // texture palette
+ UNMAP_RANGE(VRAMMap_TexPal, 0, 4);
+ break;
+
+ case 4: // ABG ext palette
+ UNMAP_RANGE(VRAMMap_ABGExtPal, 0, 4);
+ GPU2D_A->BGExtPalDirty(0);
+ GPU2D_A->BGExtPalDirty(2);
+ break;
+ }
+ }
+
+ if (cnt & (1<<7))
+ {
+ switch (cnt & 0x7)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC |= bankmask;
+ break;
+
+ case 1: // ABG
+ MAP_RANGE(VRAMMap_ABG, 0, 4);
+ break;
+
+ case 2: // AOBJ
+ MAP_RANGE(VRAMMap_AOBJ, 0, 4);
+ break;
+
+ case 3: // texture palette
+ MAP_RANGE(VRAMMap_TexPal, 0, 4);
+ break;
+
+ case 4: // ABG ext palette
+ MAP_RANGE(VRAMMap_ABGExtPal, 0, 4);
+ GPU2D_A->BGExtPalDirty(0);
+ GPU2D_A->BGExtPalDirty(2);
+ break;
+ }
+ }
+}
+
+void MapVRAM_FG(u32 bank, u8 cnt)
+{
+ u8 oldcnt = VRAMCNT[bank];
+ VRAMCNT[bank] = cnt;
+
+ if (oldcnt == cnt) return;
+
+ u8 oldofs = (oldcnt >> 3) & 0x7;
+ u8 ofs = (cnt >> 3) & 0x7;
+ u32 bankmask = 1 << bank;
+
+ if (oldcnt & (1<<7))
+ {
+ switch (oldcnt & 0x7)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC &= ~bankmask;
+ break;
+
+ case 1: // ABG
+ VRAMMap_ABG[(oldofs & 0x1) + ((oldofs & 0x2) << 1)] &= ~bankmask;
+ VRAMMap_ABG[(oldofs & 0x1) + ((oldofs & 0x2) << 1) + 2] &= ~bankmask;
+ break;
+
+ case 2: // AOBJ
+ VRAMMap_AOBJ[(oldofs & 0x1) + ((oldofs & 0x2) << 1)] &= ~bankmask;
+ VRAMMap_AOBJ[(oldofs & 0x1) + ((oldofs & 0x2) << 1) + 2] &= ~bankmask;
+ break;
+
+ case 3: // texture palette
+ VRAMMap_TexPal[(oldofs & 0x1) + ((oldofs & 0x2) << 1)] &= ~bankmask;
+ break;
+
+ case 4: // ABG ext palette
+ VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask;
+ VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask;
+ GPU2D_A->BGExtPalDirty(0);
+ GPU2D_A->BGExtPalDirty(2);
+ break;
+
+ case 5: // AOBJ ext palette
+ VRAMMap_AOBJExtPal &= ~bankmask;
+ GPU2D_A->OBJExtPalDirty();
+ break;
+ }
+ }
+
+ if (cnt & (1<<7))
+ {
+ switch (cnt & 0x7)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC |= bankmask;
+ break;
+
+ case 1: // ABG
+ VRAMMap_ABG[(ofs & 0x1) + ((ofs & 0x2) << 1)] |= bankmask;
+ VRAMMap_ABG[(ofs & 0x1) + ((ofs & 0x2) << 1) + 2] |= bankmask;
+ break;
+
+ case 2: // AOBJ
+ VRAMMap_AOBJ[(ofs & 0x1) + ((ofs & 0x2) << 1)] |= bankmask;
+ VRAMMap_AOBJ[(ofs & 0x1) + ((ofs & 0x2) << 1) + 2] |= bankmask;
+ break;
+
+ case 3: // texture palette
+ VRAMMap_TexPal[(ofs & 0x1) + ((ofs & 0x2) << 1)] |= bankmask;
+ break;
+
+ case 4: // ABG ext palette
+ VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask;
+ VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask;
+ GPU2D_A->BGExtPalDirty(0);
+ GPU2D_A->BGExtPalDirty(2);
+ break;
+
+ case 5: // AOBJ ext palette
+ VRAMMap_AOBJExtPal |= bankmask;
+ GPU2D_A->OBJExtPalDirty();
+ break;
+ }
+ }
+}
+
+void MapVRAM_H(u32 bank, u8 cnt)
+{
+ u8 oldcnt = VRAMCNT[bank];
+ VRAMCNT[bank] = cnt;
+
+ if (oldcnt == cnt) return;
+
+ u32 bankmask = 1 << bank;
+
+ if (oldcnt & (1<<7))
+ {
+ switch (oldcnt & 0x3)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC &= ~bankmask;
+ break;
+
+ case 1: // BBG
+ VRAMMap_BBG[0] &= ~bankmask;
+ VRAMMap_BBG[1] &= ~bankmask;
+ VRAMMap_BBG[4] &= ~bankmask;
+ VRAMMap_BBG[5] &= ~bankmask;
+ break;
+
+ case 2: // BBG ext palette
+ UNMAP_RANGE(VRAMMap_BBGExtPal, 0, 4);
+ GPU2D_B->BGExtPalDirty(0);
+ GPU2D_B->BGExtPalDirty(2);
+ break;
+ }
+ }
+
+ if (cnt & (1<<7))
+ {
+ switch (cnt & 0x3)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC |= bankmask;
+ break;
+
+ case 1: // BBG
+ VRAMMap_BBG[0] |= bankmask;
+ VRAMMap_BBG[1] |= bankmask;
+ VRAMMap_BBG[4] |= bankmask;
+ VRAMMap_BBG[5] |= bankmask;
+ break;
+
+ case 2: // BBG ext palette
+ MAP_RANGE(VRAMMap_BBGExtPal, 0, 4);
+ GPU2D_B->BGExtPalDirty(0);
+ GPU2D_B->BGExtPalDirty(2);
+ break;
+ }
+ }
+}
+
+void MapVRAM_I(u32 bank, u8 cnt)
+{
+ u8 oldcnt = VRAMCNT[bank];
+ VRAMCNT[bank] = cnt;
+
+ if (oldcnt == cnt) return;
+
+ u32 bankmask = 1 << bank;
+
+ if (oldcnt & (1<<7))
+ {
+ switch (oldcnt & 0x3)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC &= ~bankmask;
+ break;
+
+ case 1: // BBG
+ VRAMMap_BBG[2] &= ~bankmask;
+ VRAMMap_BBG[3] &= ~bankmask;
+ VRAMMap_BBG[6] &= ~bankmask;
+ VRAMMap_BBG[7] &= ~bankmask;
+ break;
+
+ case 2: // BOBJ
+ UNMAP_RANGE(VRAMMap_BOBJ, 0, 8);
+ break;
+
+ case 3: // BOBJ ext palette
+ VRAMMap_BOBJExtPal &= ~bankmask;
+ GPU2D_B->OBJExtPalDirty();
+ break;
+ }
+ }
+
+ if (cnt & (1<<7))
+ {
+ switch (cnt & 0x3)
+ {
+ case 0: // LCDC
+ VRAMMap_LCDC |= bankmask;
+ break;
+
+ case 1: // BBG
+ VRAMMap_BBG[2] |= bankmask;
+ VRAMMap_BBG[3] |= bankmask;
+ VRAMMap_BBG[6] |= bankmask;
+ VRAMMap_BBG[7] |= bankmask;
+ break;
+
+ case 2: // BOBJ
+ MAP_RANGE(VRAMMap_BOBJ, 0, 8);
+ break;
+
+ case 3: // BOBJ ext palette
+ VRAMMap_BOBJExtPal |= bankmask;
+ GPU2D_B->OBJExtPalDirty();
+ break;
+ }
+ }
+}
+
+
+void DisplaySwap(u32 val)
+{
+ if (val)
+ {
+ GPU2D_A->SetFramebuffer(&Framebuffer[256*0]);
+ GPU2D_B->SetFramebuffer(&Framebuffer[256*192]);
+ }
+ else
+ {
+ GPU2D_A->SetFramebuffer(&Framebuffer[256*192]);
+ GPU2D_B->SetFramebuffer(&Framebuffer[256*0]);
+ }
+}
+
+
+void StartFrame()
+{
+ StartScanline(0);
+}
+
+void StartHBlank(u32 line)
+{
+ DispStat[0] |= (1<<1);
+ DispStat[1] |= (1<<1);
+
+ if (line < 192) NDS::CheckDMAs(0, 0x02);
+
+ if (DispStat[0] & (1<<4)) NDS::SetIRQ(0, NDS::IRQ_HBlank);
+ if (DispStat[1] & (1<<4)) NDS::SetIRQ(1, NDS::IRQ_HBlank);
+
+ if (line < 262)
+ NDS::ScheduleEvent(NDS::Event_LCD, true, (LINE_CYCLES - HBLANK_CYCLES), StartScanline, line+1);
+}
+
+void StartScanline(u32 line)
+{
+ VCount = line;
+
+ DispStat[0] &= ~(1<<1);
+ DispStat[1] &= ~(1<<1);
+
+ if (line == VMatch[0])
+ {
+ DispStat[0] |= (1<<2);
+
+ if (DispStat[0] & (1<<5)) NDS::SetIRQ(0, NDS::IRQ_VCount);
+ }
+ else
+ DispStat[0] &= ~(1<<2);
+
+ if (line == VMatch[1])
+ {
+ DispStat[1] |= (1<<2);
+
+ if (DispStat[1] & (1<<5)) NDS::SetIRQ(1, NDS::IRQ_VCount);
+ }
+ else
+ DispStat[1] &= ~(1<<2);
+
+ if (line < 192)
+ {
+ // draw
+ GPU2D_A->DrawScanline(line);
+ GPU2D_B->DrawScanline(line);
+
+ //NDS::ScheduleEvent(LINE_CYCLES, StartScanline, line+1);
+ }
+ else if (line == 262)
+ {
+ // frame end
+
+ DispStat[0] &= ~(1<<0);
+ DispStat[1] &= ~(1<<0);
+ }
+ else
+ {
+ if (line == 192)
+ {
+ // VBlank
+ DispStat[0] |= (1<<0);
+ DispStat[1] |= (1<<0);
+
+ NDS::CheckDMAs(0, 0x01);
+ NDS::CheckDMAs(1, 0x11);
+
+ if (DispStat[0] & (1<<3)) NDS::SetIRQ(0, NDS::IRQ_VBlank);
+ if (DispStat[1] & (1<<3)) NDS::SetIRQ(1, NDS::IRQ_VBlank);
+
+ GPU2D_A->VBlank();
+ GPU2D_B->VBlank();
+ GPU3D::VBlank();
+ }
+
+ //NDS::ScheduleEvent(LINE_CYCLES, StartScanline, line+1);
+ //NDS::ScheduleEvent(NDS::Event_LCD, true, LINE_CYCLES, StartScanline, line+1);
+ }
+
+ NDS::ScheduleEvent(NDS::Event_LCD, true, HBLANK_CYCLES, StartHBlank, line);
+}
+
+
+void SetDispStat(u32 cpu, u16 val)
+{
+ val &= 0xFFB8;
+ DispStat[cpu] &= 0x0047;
+ DispStat[cpu] |= val;
+
+ VMatch[cpu] = (val >> 8) | ((val & 0x80) << 1);
+}
+
+}
diff --git a/src/GPU.h b/src/GPU.h
new file mode 100644
index 0000000..a77f6c0
--- /dev/null
+++ b/src/GPU.h
@@ -0,0 +1,395 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef GPU_H
+#define GPU_H
+
+#include "GPU2D.h"
+#include "GPU3D.h"
+
+namespace GPU
+{
+
+extern u16 VCount;
+
+extern u16 DispStat[2];
+
+extern u8 VRAMCNT[9];
+extern u8 VRAMSTAT;
+
+extern u8 Palette[2*1024];
+extern u8 OAM[2*1024];
+
+extern u8 VRAM_A[128*1024];
+extern u8 VRAM_B[128*1024];
+extern u8 VRAM_C[128*1024];
+extern u8 VRAM_D[128*1024];
+extern u8 VRAM_E[ 64*1024];
+extern u8 VRAM_F[ 16*1024];
+extern u8 VRAM_G[ 16*1024];
+extern u8 VRAM_H[ 32*1024];
+extern u8 VRAM_I[ 16*1024];
+
+extern u8* VRAM[9];
+
+extern u32 VRAMMap_LCDC;
+extern u32 VRAMMap_ABG[0x20];
+extern u32 VRAMMap_AOBJ[0x10];
+extern u32 VRAMMap_BBG[0x8];
+extern u32 VRAMMap_BOBJ[0x8];
+extern u32 VRAMMap_ABGExtPal[4];
+extern u32 VRAMMap_AOBJExtPal;
+extern u32 VRAMMap_BBGExtPal[4];
+extern u32 VRAMMap_BOBJExtPal;
+extern u32 VRAMMap_Texture[4];
+extern u32 VRAMMap_TexPal[6];
+extern u32 VRAMMap_ARM7[2];
+
+extern u32 Framebuffer[256*192*2];
+
+extern GPU2D* GPU2D_A;
+extern GPU2D* GPU2D_B;
+
+
+bool Init();
+void DeInit();
+void Reset();
+
+void MapVRAM_AB(u32 bank, u8 cnt);
+void MapVRAM_CD(u32 bank, u8 cnt);
+void MapVRAM_E(u32 bank, u8 cnt);
+void MapVRAM_FG(u32 bank, u8 cnt);
+void MapVRAM_H(u32 bank, u8 cnt);
+void MapVRAM_I(u32 bank, u8 cnt);
+
+
+template<typename T>
+T ReadVRAM_LCDC(u32 addr)
+{
+ int bank;
+
+ switch (addr & 0xFF8FC000)
+ {
+ case 0x06800000: case 0x06804000: case 0x06808000: case 0x0680C000:
+ case 0x06810000: case 0x06814000: case 0x06818000: case 0x0681C000:
+ bank = 0;
+ addr &= 0x1FFFF;
+ break;
+
+ case 0x06820000: case 0x06824000: case 0x06828000: case 0x0682C000:
+ case 0x06830000: case 0x06834000: case 0x06838000: case 0x0683C000:
+ bank = 1;
+ addr &= 0x1FFFF;
+ break;
+
+ case 0x06840000: case 0x06844000: case 0x06848000: case 0x0684C000:
+ case 0x06850000: case 0x06854000: case 0x06858000: case 0x0685C000:
+ bank = 2;
+ addr &= 0x1FFFF;
+ break;
+
+ case 0x06860000: case 0x06864000: case 0x06868000: case 0x0686C000:
+ case 0x06870000: case 0x06874000: case 0x06878000: case 0x0687C000:
+ bank = 3;
+ addr &= 0x1FFFF;
+ break;
+
+ case 0x06880000: case 0x06884000: case 0x06888000: case 0x0688C000:
+ bank = 4;
+ addr &= 0xFFFF;
+ break;
+
+ case 0x06890000:
+ bank = 5;
+ addr &= 0x3FFF;
+ break;
+
+ case 0x06894000:
+ bank = 6;
+ addr &= 0x3FFF;
+ break;
+
+ case 0x06898000:
+ case 0x0689C000:
+ bank = 7;
+ addr &= 0x7FFF;
+ break;
+
+ case 0x068A0000:
+ bank = 8;
+ addr &= 0x3FFF;
+ break;
+
+ default: return 0;
+ }
+
+ if (VRAMMap_LCDC & (1<<bank)) return *(T*)&VRAM[bank][addr];
+
+ return 0;
+}
+
+template<typename T>
+void WriteVRAM_LCDC(u32 addr, T val)
+{
+ int bank;
+
+ switch (addr & 0xFF8FC000)
+ {
+ case 0x06800000: case 0x06804000: case 0x06808000: case 0x0680C000:
+ case 0x06810000: case 0x06814000: case 0x06818000: case 0x0681C000:
+ bank = 0;
+ addr &= 0x1FFFF;
+ break;
+
+ case 0x06820000: case 0x06824000: case 0x06828000: case 0x0682C000:
+ case 0x06830000: case 0x06834000: case 0x06838000: case 0x0683C000:
+ bank = 1;
+ addr &= 0x1FFFF;
+ break;
+
+ case 0x06840000: case 0x06844000: case 0x06848000: case 0x0684C000:
+ case 0x06850000: case 0x06854000: case 0x06858000: case 0x0685C000:
+ bank = 2;
+ addr &= 0x1FFFF;
+ break;
+
+ case 0x06860000: case 0x06864000: case 0x06868000: case 0x0686C000:
+ case 0x06870000: case 0x06874000: case 0x06878000: case 0x0687C000:
+ bank = 3;
+ addr &= 0x1FFFF;
+ break;
+
+ case 0x06880000: case 0x06884000: case 0x06888000: case 0x0688C000:
+ bank = 4;
+ addr &= 0xFFFF;
+ break;
+
+ case 0x06890000:
+ bank = 5;
+ addr &= 0x3FFF;
+ break;
+
+ case 0x06894000:
+ bank = 6;
+ addr &= 0x3FFF;
+ break;
+
+ case 0x06898000:
+ case 0x0689C000:
+ bank = 7;
+ addr &= 0x7FFF;
+ break;
+
+ case 0x068A0000:
+ bank = 8;
+ addr &= 0x3FFF;
+ break;
+
+ default: return;
+ }
+
+ if (VRAMMap_LCDC & (1<<bank)) *(T*)&VRAM[bank][addr] = val;
+}
+
+
+template<typename T>
+T ReadVRAM_ABG(u32 addr)
+{
+ u32 ret = 0;
+ u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F];
+
+ if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF];
+ if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF];
+ if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF];
+ if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF];
+ if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF];
+ if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
+ if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
+
+ return ret;
+}
+
+template<typename T>
+void WriteVRAM_ABG(u32 addr, T val)
+{
+ u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F];
+
+ if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
+ if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
+ if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
+ if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
+ if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val;
+ if (mask & (1<<5)) *(T*)&VRAM_F[addr & 0x3FFF] = val;
+ if (mask & (1<<6)) *(T*)&VRAM_G[addr & 0x3FFF] = val;
+}
+
+
+template<typename T>
+T ReadVRAM_AOBJ(u32 addr)
+{
+ u32 ret = 0;
+ u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF];
+
+ if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF];
+ if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF];
+ if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF];
+ if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
+ if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
+
+ return ret;
+}
+
+template<typename T>
+void WriteVRAM_AOBJ(u32 addr, T val)
+{
+ u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF];
+
+ if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val;
+ if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val;
+ if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val;
+ if (mask & (1<<5)) *(T*)&VRAM_F[addr & 0x3FFF] = val;
+ if (mask & (1<<6)) *(T*)&VRAM_G[addr & 0x3FFF] = val;
+}
+
+
+template<typename T>
+T ReadVRAM_BBG(u32 addr)
+{
+ u32 ret = 0;
+ u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7];
+
+ if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF];
+ if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF];
+ if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x3FFF];
+
+ return ret;
+}
+
+template<typename T>
+void WriteVRAM_BBG(u32 addr, T val)
+{
+ u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7];
+
+ if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
+ if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val;
+ if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
+}
+
+
+template<typename T>
+T ReadVRAM_BOBJ(u32 addr)
+{
+ u32 ret = 0;
+ u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7];
+
+ if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF];
+ if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x3FFF];
+
+ return ret;
+}
+
+template<typename T>
+void WriteVRAM_BOBJ(u32 addr, T val)
+{
+ u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7];
+
+ if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
+ if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val;
+}
+
+
+template<typename T>
+T ReadVRAM_ARM7(u32 addr)
+{
+ u32 ret = 0;
+ u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1];
+
+ if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF];
+ if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF];
+
+ return ret;
+}
+
+template<typename T>
+void WriteVRAM_ARM7(u32 addr, T val)
+{
+ u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1];
+
+ if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val;
+ if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val;
+}
+
+
+template<typename T>
+T ReadVRAM_BG(u32 addr)
+{
+ if ((addr & 0xFFE00000) == 0x06000000)
+ return ReadVRAM_ABG<T>(addr);
+ else
+ return ReadVRAM_BBG<T>(addr);
+}
+
+template<typename T>
+T ReadVRAM_OBJ(u32 addr)
+{
+ if ((addr & 0xFFE00000) == 0x06400000)
+ return ReadVRAM_AOBJ<T>(addr);
+ else
+ return ReadVRAM_BOBJ<T>(addr);
+}
+
+
+template<typename T>
+T ReadVRAM_Texture(u32 addr)
+{
+ u32 ret = 0;
+ u32 mask = VRAMMap_Texture[(addr >> 17) & 0x3];
+
+ if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF];
+ if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF];
+ if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF];
+ if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF];
+
+ return ret;
+}
+
+template<typename T>
+T ReadVRAM_TexPal(u32 addr)
+{
+ u32 ret = 0;
+ if (addr >= 0x18000) return 0;
+ u32 mask = VRAMMap_TexPal[(addr >> 14) & 0x7];
+
+ if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF];
+ if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF];
+ if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF];
+
+ return ret;
+}
+
+
+void DisplaySwap(u32 val);
+
+void StartFrame();
+void StartScanline(u32 line);
+
+void SetDispStat(u32 cpu, u16 val);
+
+}
+
+#endif
diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp
new file mode 100644
index 0000000..cedfe1e
--- /dev/null
+++ b/src/GPU2D.cpp
@@ -0,0 +1,1604 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "NDS.h"
+#include "GPU.h"
+
+
+// notes on color conversion
+//
+// * BLDCNT special effects are applied on 18bit colors
+// -> layers are converted to 18bit before being composited
+// -> 'brightness up' effect does: x = x + (63-x)*factor
+// * colors are converted as follows: 18bit = 15bit * 2
+// -> white comes out as 62,62,62 and not 63,63,63
+// * VRAM/FIFO display modes convert colors the same way
+// * 3D engine converts colors differently (18bit = 15bit * 2 + 1, except 0 = 0)
+// * 'screen disabled' white is 63,63,63
+//
+// oh also, changing DISPCNT bit16-17 midframe doesn't work (ignored? applied for next frame?)
+// TODO, eventually: check whether other DISPCNT bits can be changed midframe
+//
+// for VRAM display mode, VRAM must be mapped to LCDC
+//
+// sprite blending rules
+// * destination must be selected as 2nd target
+// * sprite must be semitransparent or bitmap sprite
+// * blending is applied instead of the selected color effect, even if it is 'none'.
+// * for bitmap sprites: EVA = alpha+1, EVB = 16-EVA
+// * for bitmap sprites: alpha=0 is always transparent, even if blending doesn't apply
+//
+// 3D blending rules
+//
+// 3D/3D blending seems to follow these equations:
+// dstColor = srcColor*srcAlpha + dstColor*(1-srcAlpha)
+// dstAlpha = max(srcAlpha, dstAlpha)
+// blending isn't applied if dstAlpha is zero.
+//
+// 3D/2D blending rules
+// * if destination selected as 2nd target:
+// blending is applied instead of the selected color effect, using full 5bit alpha from 3D layer
+// this even if the selected color effect is 'none'.
+// apparently this works even if BG0 isn't selected as 1st target
+// * if BG0 is selected as 1st target, destination not selected as 2nd target:
+// brightness up/down effect is applied if selected. if blending is selected, it doesn't apply.
+// * 3D layer pixels with alpha=0 are always transparent.
+
+
+GPU2D::GPU2D(u32 num)
+{
+ Num = num;
+}
+
+GPU2D::~GPU2D()
+{
+}
+
+void GPU2D::Reset()
+{
+ DispCnt = 0;
+ memset(BGCnt, 0, 4*2);
+ memset(BGXPos, 0, 4*2);
+ memset(BGYPos, 0, 4*2);
+ memset(BGXRef, 0, 2*4);
+ memset(BGYRef, 0, 2*4);
+ memset(BGXRefInternal, 0, 2*4);
+ memset(BGYRefInternal, 0, 2*4);
+ memset(BGRotA, 0, 2*2);
+ memset(BGRotB, 0, 2*2);
+ memset(BGRotC, 0, 2*2);
+ memset(BGRotD, 0, 2*2);
+
+ BlendCnt = 0;
+ EVA = 16;
+ EVB = 0;
+ EVY = 0;
+
+ CaptureCnt = 0;
+
+ MasterBrightness = 0;
+
+ BGExtPalStatus[0] = 0;
+ BGExtPalStatus[1] = 0;
+ BGExtPalStatus[2] = 0;
+ BGExtPalStatus[3] = 0;
+ OBJExtPalStatus = 0;
+}
+
+void GPU2D::SetFramebuffer(u32* buf)
+{
+ Framebuffer = buf;
+}
+
+
+u8 GPU2D::Read8(u32 addr)
+{
+ printf("!! GPU2D READ8 %08X\n", addr);
+ return 0;
+}
+
+u16 GPU2D::Read16(u32 addr)
+{
+ switch (addr & 0x00000FFF)
+ {
+ case 0x000: return DispCnt&0xFFFF;
+ case 0x002: return DispCnt>>16;
+
+ case 0x008: return BGCnt[0];
+ case 0x00A: return BGCnt[1];
+ case 0x00C: return BGCnt[2];
+ case 0x00E: return BGCnt[3];
+
+ case 0x050: return BlendCnt;
+
+ case 0x064: return CaptureCnt & 0xFFFF;
+ case 0x066: return CaptureCnt >> 16;
+ }
+
+ printf("unknown GPU read16 %08X\n", addr);
+ return 0;
+}
+
+u32 GPU2D::Read32(u32 addr)
+{
+ switch (addr & 0x00000FFF)
+ {
+ case 0x000: return DispCnt;
+
+ case 0x064: return CaptureCnt;
+ }
+
+ return Read16(addr) | (Read16(addr+2) << 16);
+}
+
+void GPU2D::Write8(u32 addr, u8 val)
+{
+ printf("!! GPU2D WRITE8 %08X %02X\n", addr, val);
+}
+
+void GPU2D::Write16(u32 addr, u16 val)
+{
+ switch (addr & 0x00000FFF)
+ {
+ case 0x000:
+ DispCnt = (DispCnt & 0xFFFF0000) | val;
+ //printf("[L] DISPCNT=%08X\n", DispCnt);
+ return;
+ case 0x002:
+ DispCnt = (DispCnt & 0x0000FFFF) | (val << 16);
+ //printf("[H] DISPCNT=%08X\n", DispCnt);
+ return;
+
+ case 0x008: BGCnt[0] = val; return;
+ case 0x00A: BGCnt[1] = val; return;
+ case 0x00C: BGCnt[2] = val; return;
+ case 0x00E: BGCnt[3] = val; return;
+
+ case 0x010: BGXPos[0] = val; return;
+ case 0x012: BGYPos[0] = val; return;
+ case 0x014: BGXPos[1] = val; return;
+ case 0x016: BGYPos[1] = val; return;
+ case 0x018: BGXPos[2] = val; return;
+ case 0x01A: BGYPos[2] = val; return;
+ case 0x01C: BGXPos[3] = val; return;
+ case 0x01E: BGYPos[3] = val; return;
+
+ case 0x020: BGRotA[0] = val; return;
+ case 0x022: BGRotB[0] = val; return;
+ case 0x024: BGRotC[0] = val; return;
+ case 0x026: BGRotD[0] = val; return;
+ case 0x028:
+ BGXRef[0] = (BGXRef[0] & 0xFFFF0000) | val;
+ if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0];
+ return;
+ case 0x02A:
+ if (val & 0x0800) val |= 0xF000;
+ BGXRef[0] = (BGXRef[0] & 0xFFFF) | (val << 16);
+ if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0];
+ return;
+ case 0x02C:
+ BGYRef[0] = (BGYRef[0] & 0xFFFF0000) | val;
+ if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0];
+ return;
+ case 0x02E:
+ if (val & 0x0800) val |= 0xF000;
+ BGYRef[0] = (BGYRef[0] & 0xFFFF) | (val << 16);
+ if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0];
+ return;
+
+ case 0x030: BGRotA[1] = val; return;
+ case 0x032: BGRotB[1] = val; return;
+ case 0x034: BGRotC[1] = val; return;
+ case 0x036: BGRotD[1] = val; return;
+ case 0x038:
+ BGXRef[1] = (BGXRef[1] & 0xFFFF0000) | val;
+ if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1];
+ return;
+ case 0x03A:
+ if (val & 0x0800) val |= 0xF000;
+ BGXRef[1] = (BGXRef[1] & 0xFFFF) | (val << 16);
+ if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1];
+ return;
+ case 0x03C:
+ BGYRef[1] = (BGYRef[1] & 0xFFFF0000) | val;
+ if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1];
+ return;
+ case 0x03E:
+ if (val & 0x0800) val |= 0xF000;
+ BGYRef[1] = (BGYRef[1] & 0xFFFF) | (val << 16);
+ if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1];
+ return;
+
+ case 0x050: BlendCnt = val; return;
+ case 0x052:
+ EVA = val & 0x1F;
+ if (EVA > 16) EVA = 16;
+ EVB = (val >> 8) & 0x1F;
+ if (EVB > 16) EVB = 16;
+ return;
+ case 0x054:
+ EVY = val & 0x1F;
+ if (EVY > 16) EVY = 16;
+ return;
+
+ case 0x06C: MasterBrightness = val; return;
+ }
+
+ //printf("unknown GPU write16 %08X %04X\n", addr, val);
+}
+
+void GPU2D::Write32(u32 addr, u32 val)
+{
+ switch (addr & 0x00000FFF)
+ {
+ case 0x000:
+ //printf("DISPCNT=%08X\n", val);
+ DispCnt = val;
+ return;
+
+ case 0x028:
+ if (val & 0x08000000) val |= 0xF0000000;
+ BGXRef[0] = val;
+ if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0];
+ return;
+ case 0x02C:
+ if (val & 0x08000000) val |= 0xF0000000;
+ BGYRef[0] = val;
+ if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0];
+ return;
+
+ case 0x038:
+ if (val & 0x08000000) val |= 0xF0000000;
+ BGXRef[1] = val;
+ if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1];
+ return;
+ case 0x03C:
+ if (val & 0x08000000) val |= 0xF0000000;
+ BGYRef[1] = val;
+ if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1];
+ return;
+
+ case 0x064:
+ // TODO: check what happens when writing to it during display
+ // esp. if a capture is happening
+ CaptureCnt = val & 0xEF3F1F1F;
+ return;
+ }
+
+ Write16(addr, val&0xFFFF);
+ Write16(addr+2, val>>16);
+}
+
+
+void GPU2D::DrawScanline(u32 line)
+{
+ u32* dst = &Framebuffer[256*line];
+
+ u32 dispmode = DispCnt >> 16;
+ dispmode &= (Num ? 0x1 : 0x3);
+
+ switch (dispmode)
+ {
+ case 0: // screen off
+ {
+ for (int i = 0; i < 256; i++)
+ dst[i] = 0xFF3F3F3F;
+ }
+ break;
+
+ case 1: // regular display
+ {
+ DrawScanline_Mode1(line, dst);
+ }
+ break;
+
+ case 2: // VRAM display
+ {
+ u32 vrambank = (DispCnt >> 18) & 0x3;
+ if (GPU::VRAMMap_LCDC & (1<<vrambank))
+ {
+ u16* vram = (u16*)GPU::VRAM[vrambank];
+ vram = &vram[line * 256];
+
+ for (int i = 0; i < 256; i++)
+ {
+ u16 color = vram[i];
+ u8 r = (color & 0x001F) << 1;
+ u8 g = (color & 0x03E0) >> 4;
+ u8 b = (color & 0x7C00) >> 9;
+
+ dst[i] = r | (g << 8) | (b << 16);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < 256; i++)
+ {
+ dst[i] = 0;
+ }
+ }
+ }
+ break;
+
+ case 3: // FIFO display
+ {
+ // TODO
+ }
+ break;
+ }
+
+ // capture
+ if ((!Num) && (CaptureCnt & (1<<31)))
+ {
+ u32 capwidth, capheight;
+ switch ((CaptureCnt >> 20) & 0x3)
+ {
+ case 0: capwidth = 128; capheight = 128; break;
+ case 1: capwidth = 256; capheight = 64; break;
+ case 2: capwidth = 256; capheight = 128; break;
+ case 3: capwidth = 256; capheight = 192; break;
+ }
+
+ if (line < capheight)
+ DoCapture(line, capwidth, dst);
+ }
+
+ // master brightness
+ if (dispmode != 0)
+ {
+ if ((MasterBrightness >> 14) == 1)
+ {
+ // up
+ u32 factor = MasterBrightness & 0x1F;
+ if (factor > 16) factor = 16;
+
+ for (int i = 0; i < 256; i++)
+ {
+ u32 val = dst[i];
+
+ u32 r = val & 0x00003F;
+ u32 g = val & 0x003F00;
+ u32 b = val & 0x3F0000;
+
+ r += (((0x00003F - r) * factor) >> 4);
+ g += ((((0x003F00 - g) * factor) >> 4) & 0x003F00);
+ b += ((((0x3F0000 - b) * factor) >> 4) & 0x3F0000);
+
+ dst[i] = r | g | b;
+ }
+ }
+ else if ((MasterBrightness >> 14) == 2)
+ {
+ // down
+ u32 factor = MasterBrightness & 0x1F;
+ if (factor > 16) factor = 16;
+
+ for (int i = 0; i < 256; i++)
+ {
+ u32 val = dst[i];
+
+ u32 r = val & 0x00003F;
+ u32 g = val & 0x003F00;
+ u32 b = val & 0x3F0000;
+
+ r -= ((r * factor) >> 4);
+ g -= (((g * factor) >> 4) & 0x003F00);
+ b -= (((b * factor) >> 4) & 0x3F0000);
+
+ dst[i] = r | g | b;
+ }
+ }
+ }
+
+ // convert to 32-bit RGBA
+ for (int i = 0; i < 256; i++)
+ dst[i] = ((dst[i] & 0x003F3F3F) << 2) |
+ ((dst[i] & 0x00303030) >> 4) |
+ 0xFF000000;
+}
+
+void GPU2D::VBlank()
+{
+ BGXRefInternal[0] = BGXRef[0];
+ BGXRefInternal[1] = BGXRef[1];
+ BGYRefInternal[0] = BGYRef[0];
+ BGYRefInternal[1] = BGYRef[1];
+
+ CaptureCnt &= ~(1<<31);
+}
+
+
+void GPU2D::DoCapture(u32 line, u32 width, u32* src)
+{
+ u32 dstvram = (CaptureCnt >> 16) & 0x3;
+
+ // TODO: confirm this
+ // it should work like VRAM display mode, which requires VRAM to be mapped to LCDC
+ if (!(GPU::VRAMMap_LCDC & (1<<dstvram)))
+ return;
+
+ u16* dst = (u16*)GPU::VRAM[dstvram];
+ u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width);
+
+ if (CaptureCnt & (1<<24))
+ src = (u32*)GPU3D::GetLine(line);
+
+ u16* srcB = NULL;
+ u32 srcBaddr = line * 256;
+
+ if (CaptureCnt & (1<<25))
+ {
+ // TODO: FIFO mode
+ }
+ else
+ {
+ u32 srcvram = (DispCnt >> 18) & 0x3;
+ if (GPU::VRAMMap_LCDC & (1<<srcvram))
+ srcB = (u16*)GPU::VRAM[srcvram];
+
+ if (((DispCnt >> 16) & 0x3) != 2)
+ srcBaddr += ((CaptureCnt >> 26) & 0x3) << 14;
+ }
+
+ dstaddr &= 0xFFFF;
+ srcBaddr &= 0xFFFF;
+
+ switch ((DispCnt >> 29) & 0x3)
+ {
+ case 0: // source A
+ {
+ for (u32 i = 0; i < width; i++)
+ {
+ u32 val = src[i];
+
+ // TODO: check what happens when alpha=0
+
+ u32 r = (val >> 1) & 0x1F;
+ u32 g = (val >> 9) & 0x1F;
+ u32 b = (val >> 17) & 0x1F;
+ u32 a = ((val >> 24) != 0) ? 0x8000 : 0;
+
+ dst[dstaddr] = r | (g << 5) | (b << 10) | a;
+ dstaddr = (dstaddr + 1) & 0xFFFF;
+ }
+ }
+ break;
+
+ case 1: // source B
+ {
+ if (srcB)
+ {
+ for (u32 i = 0; i < width; i++)
+ {
+ dst[dstaddr] = srcB[srcBaddr];
+ srcBaddr = (srcBaddr + 1) & 0xFFFF;
+ dstaddr = (dstaddr + 1) & 0xFFFF;
+ }
+ }
+ else
+ {
+ for (u32 i = 0; i < width; i++)
+ {
+ dst[dstaddr] = 0;
+ dstaddr = (dstaddr + 1) & 0xFFFF;
+ }
+ }
+ }
+ break;
+
+ case 2: // sources A+B
+ case 3:
+ {
+ u32 eva = DispCnt & 0x1F;
+ u32 evb = (DispCnt >> 8) & 0x1F;
+
+ // checkme
+ if (eva > 16) eva = 16;
+ if (evb > 16) evb = 16;
+
+ if (srcB)
+ {
+ for (u32 i = 0; i < width; i++)
+ {
+ u32 val = src[i];
+
+ // TODO: check what happens when alpha=0
+
+ u32 rA = (val >> 1) & 0x1F;
+ u32 gA = (val >> 9) & 0x1F;
+ u32 bA = (val >> 17) & 0x1F;
+ u32 aA = ((val >> 24) != 0) ? 1 : 0;
+
+ val = srcB[srcBaddr];
+
+ u32 rB = val & 0x1F;
+ u32 gB = (val >> 5) & 0x1F;
+ u32 bB = (val >> 10) & 0x1F;
+ u32 aB = val >> 15;
+
+ u32 rD = ((rA * aA * eva) + (rB * aB * evb)) >> 4;
+ u32 gD = ((gA * aA * eva) + (gB * aB * evb)) >> 4;
+ u32 bD = ((bA * aA * eva) + (bB * aB * evb)) >> 4;
+ u32 aD = (eva>0 ? aA : 0) | (evb>0 ? aB : 0);
+
+ dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15);
+ srcBaddr = (srcBaddr + 1) & 0xFFFF;
+ dstaddr = (dstaddr + 1) & 0xFFFF;
+ }
+ }
+ else
+ {
+ for (u32 i = 0; i < width; i++)
+ {
+ u32 val = src[i];
+
+ // TODO: check what happens when alpha=0
+
+ u32 rA = (val >> 1) & 0x1F;
+ u32 gA = (val >> 9) & 0x1F;
+ u32 bA = (val >> 17) & 0x1F;
+ u32 aA = ((val >> 24) != 0) ? 1 : 0;
+
+ u32 rD = (rA * aA * eva) >> 4;
+ u32 gD = (gA * aA * eva) >> 4;
+ u32 bD = (bA * aA * eva) >> 4;
+ u32 aD = (eva>0 ? aA : 0);
+
+ dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15);
+ dstaddr = (dstaddr + 1) & 0xFFFF;
+ }
+ }
+ }
+ break;
+ }
+}
+
+
+void GPU2D::BGExtPalDirty(u32 base)
+{
+ BGExtPalStatus[base] = 0;
+ BGExtPalStatus[base+1] = 0;
+}
+
+void GPU2D::OBJExtPalDirty()
+{
+ OBJExtPalStatus = 0;
+}
+
+
+u16* GPU2D::GetBGExtPal(u32 slot, u32 pal)
+{
+ u16* dst = &BGExtPalCache[slot][pal << 8];
+
+ if (!(BGExtPalStatus[slot] & (1<<pal)))
+ {
+ if (Num)
+ {
+ if (GPU::VRAMMap_BBGExtPal[slot] & (1<<7))
+ memcpy(dst, &GPU::VRAM_H[(slot << 13) + (pal << 9)], 256*2);
+ else
+ memset(dst, 0, 256*2);
+ }
+ else
+ {
+ memset(dst, 0, 256*2);
+
+ if (GPU::VRAMMap_ABGExtPal[slot] & (1<<4))
+ for (int i = 0; i < 256; i+=2)
+ *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_E[(slot << 13) + (pal << 9) + (i << 1)];
+
+ if (GPU::VRAMMap_ABGExtPal[slot] & (1<<5))
+ for (int i = 0; i < 256; i+=2)
+ *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[((slot&1) << 13) + (pal << 9) + (i << 1)];
+
+ if (GPU::VRAMMap_ABGExtPal[slot] & (1<<6))
+ for (int i = 0; i < 256; i+=2)
+ *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[((slot&1) << 13) + (pal << 9) + (i << 1)];
+ }
+
+ BGExtPalStatus[slot] |= (1<<pal);
+ }
+
+ return dst;
+}
+
+u16* GPU2D::GetOBJExtPal(u32 pal)
+{
+ u16* dst = &OBJExtPalCache[pal << 8];
+
+ if (!(OBJExtPalStatus & (1<<pal)))
+ {
+ if (Num)
+ {
+ if (GPU::VRAMMap_BOBJExtPal & (1<<8))
+ memcpy(dst, &GPU::VRAM_I[(pal << 9)], 256*2);
+ else
+ memset(dst, 0, 256*2);
+ }
+ else
+ {
+ memset(dst, 0, 256*2);
+
+ if (GPU::VRAMMap_AOBJExtPal & (1<<5))
+ for (int i = 0; i < 256; i+=2)
+ *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[(pal << 9) + (i << 1)];
+
+ if (GPU::VRAMMap_AOBJExtPal & (1<<6))
+ for (int i = 0; i < 256; i+=2)
+ *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[(pal << 9) + (i << 1)];
+ }
+
+ OBJExtPalStatus |= (1<<pal);
+ }
+
+ return dst;
+}
+
+
+template<u32 bgmode>
+void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst)
+{
+ for (int i = 3; i >= 0; i--)
+ {
+ if ((BGCnt[3] & 0x3) == i)
+ {
+ if (DispCnt & 0x0800)
+ {
+ if (bgmode >= 3)
+ DrawBG_Extended(line, dst, 3);
+ else if (bgmode >= 1)
+ {} // todo: rotscale
+ else
+ DrawBG_Text(line, dst, 3);
+ }
+ }
+ if ((BGCnt[2] & 0x3) == i)
+ {
+ if (DispCnt & 0x0400)
+ {
+ if (bgmode == 5)
+ DrawBG_Extended(line, dst, 2);
+ else if (bgmode == 4 || bgmode == 2)
+ {} // todo: rotscale
+ else
+ DrawBG_Text(line, dst, 2);
+ }
+ }
+ if ((BGCnt[1] & 0x3) == i)
+ {
+ if (DispCnt & 0x0200)
+ {
+ DrawBG_Text(line, dst, 1);
+ }
+ }
+ if ((BGCnt[0] & 0x3) == i)
+ {
+ if (DispCnt & 0x0100)
+ {
+ if ((!Num) && (DispCnt & 0x8))
+ DrawBG_3D(line, dst);
+ else
+ DrawBG_Text(line, dst, 0);
+ }
+ }
+ if (DispCnt & 0x1000)
+ InterleaveSprites(spritebuf, 0x8000 | (i<<16), dst);
+ }
+}
+
+void GPU2D::DrawScanline_Mode1(u32 line, u32* dst)
+{
+ u32 linebuf[256*2];
+
+ u32 backdrop;
+ if (Num) backdrop = *(u16*)&GPU::Palette[0x400];
+ else backdrop = *(u16*)&GPU::Palette[0];
+
+ {
+ u8 r = (backdrop & 0x001F) << 1;
+ u8 g = (backdrop & 0x03E0) >> 4;
+ u8 b = (backdrop & 0x7C00) >> 9;
+
+ backdrop = r | (g << 8) | (b << 16) | 0x20000000;
+
+ for (int i = 0; i < 256; i++)
+ linebuf[i] = backdrop;
+ }
+
+ // prerender sprites
+ u32 spritebuf[256];
+ memset(spritebuf, 0, 256*4);
+ if (DispCnt & 0x1000) DrawSprites(line, spritebuf);
+
+ switch (DispCnt & 0x7)
+ {
+ case 0: DrawScanlineBGMode<0>(line, spritebuf, linebuf); break;
+ case 1: DrawScanlineBGMode<1>(line, spritebuf, linebuf); break;
+ case 2: DrawScanlineBGMode<2>(line, spritebuf, linebuf); break;
+ case 3: DrawScanlineBGMode<3>(line, spritebuf, linebuf); break;
+ case 4: DrawScanlineBGMode<4>(line, spritebuf, linebuf); break;
+ case 5: DrawScanlineBGMode<5>(line, spritebuf, linebuf); break;
+ }
+
+ // color special effects
+ // can likely be optimized
+
+ u32 bldcnteffect = (BlendCnt >> 6) & 0x3;
+
+ for (int i = 0; i < 256; i++)
+ {
+ u32 val1 = linebuf[i];
+ u32 val2 = linebuf[256+i];
+
+ u32 coloreffect, eva, evb;
+
+ u32 flag1 = val1 >> 24;
+ if ((flag1 & 0x80) && (BlendCnt & ((val2 >> 16) & 0xFF00)))
+ {
+ // sprite blending
+
+ coloreffect = 1;
+
+ if (flag1 & 0x40)
+ {
+ eva = flag1 & 0x1F;
+ evb = 16 - eva;
+ }
+ else
+ {
+ eva = EVA;
+ evb = EVB;
+ }
+ }
+ else if ((flag1 & 0x40) && (BlendCnt & ((val2 >> 16) & 0xFF00)))
+ {
+ // 3D layer blending
+
+ eva = (flag1 & 0x1F) + 1;
+ evb = 32 - eva;
+
+ u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 5;
+ u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 5) & 0x007F00;
+ u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 5) & 0x7F0000;
+
+ if (eva <= 16)
+ {
+ r += 0x000001;
+ g += 0x000100;
+ b += 0x010000;
+ }
+
+ if (r > 0x00003F) r = 0x00003F;
+ if (g > 0x003F00) g = 0x003F00;
+ if (b > 0x3F0000) b = 0x3F0000;
+
+ dst[i] = r | g | b | 0xFF000000;
+
+ continue;
+ }
+ else if (BlendCnt & flag1)
+ {
+ if ((bldcnteffect == 1) && (BlendCnt & ((val2 >> 16) & 0xFF00)))
+ {
+ coloreffect = 1;
+ eva = EVA;
+ evb = EVB;
+ }
+ else if (bldcnteffect >= 2)
+ coloreffect = bldcnteffect;
+ else
+ coloreffect = 0;
+ }
+ else
+ coloreffect = 0;
+
+ switch (coloreffect)
+ {
+ case 0:
+ dst[i] = val1;
+ break;
+
+ case 1:
+ {
+ u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4;
+ u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 4) & 0x007F00;
+ u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 4) & 0x7F0000;
+
+ if (r > 0x00003F) r = 0x00003F;
+ if (g > 0x003F00) g = 0x003F00;
+ if (b > 0x3F0000) b = 0x3F0000;
+
+ dst[i] = r | g | b | 0xFF000000;
+ }
+ break;
+
+ case 2:
+ {
+ u32 r = val1 & 0x00003F;
+ u32 g = val1 & 0x003F00;
+ u32 b = val1 & 0x3F0000;
+
+ r += ((0x00003F - r) * EVY) >> 4;
+ g += (((0x003F00 - g) * EVY) >> 4) & 0x003F00;
+ b += (((0x3F0000 - b) * EVY) >> 4) & 0x3F0000;
+
+ dst[i] = r | g | b | 0xFF000000;
+ }
+ break;
+
+ case 3:
+ {
+ u32 r = val1 & 0x00003F;
+ u32 g = val1 & 0x003F00;
+ u32 b = val1 & 0x3F0000;
+
+ r -= (r * EVY) >> 4;
+ g -= ((g * EVY) >> 4) & 0x003F00;
+ b -= ((b * EVY) >> 4) & 0x3F0000;
+
+ dst[i] = r | g | b | 0xFF000000;
+ }
+ break;
+ }
+ }
+}
+
+
+void GPU2D::DrawPixel(u32* dst, u16 color, u32 flag)
+{
+ u8 r = (color & 0x001F) << 1;
+ u8 g = (color & 0x03E0) >> 4;
+ u8 b = (color & 0x7C00) >> 9;
+
+ *(dst+256) = *dst;
+ *dst = r | (g << 8) | (b << 16) | flag;
+}
+
+void GPU2D::DrawBG_3D(u32 line, u32* dst)
+{
+ // TODO: window, as for everything
+ // also check if window can prevent blending from happening
+
+ u32* src = GPU3D::GetLine(line);
+
+ u16 xoff = BGXPos[0];
+ int i = 0;
+ int iend = 256;
+
+ if (xoff & 0x100)
+ {
+ i = (0x100 - (xoff & 0xFF));
+ xoff += i;
+ }
+ if ((xoff - i + iend - 1) & 0x100)
+ {
+ iend -= (xoff & 0xFF);
+ }
+
+ for (; i < iend; i++)
+ {
+ u32 c = src[xoff];
+ xoff++;
+
+ if ((c >> 24) == 0) continue;
+
+ dst[i+256] = dst[i];
+ dst[i] = c | 0x40000000;
+ }
+}
+
+void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum)
+{
+ u16 bgcnt = BGCnt[bgnum];
+
+ u32 tilesetaddr, tilemapaddr;
+ u16* pal;
+ u32 extpal, extpalslot;
+
+ u16 xoff = BGXPos[bgnum];
+ u16 yoff = BGYPos[bgnum] + line;
+
+ u32 widexmask = (bgcnt & 0x4000) ? 0x100 : 0;
+
+ extpal = (DispCnt & 0x40000000);
+ if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum;
+
+ if (Num)
+ {
+ tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
+
+ pal = (u16*)&GPU::Palette[0x400];
+ }
+ else
+ {
+ tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+
+ pal = (u16*)&GPU::Palette[0];
+ }
+
+ // adjust Y position in tilemap
+ if (bgcnt & 0x8000)
+ {
+ tilemapaddr += ((yoff & 0x1F8) << 3);
+ if (bgcnt & 0x4000)
+ tilemapaddr += ((yoff & 0x100) << 3);
+ }
+ else
+ tilemapaddr += ((yoff & 0xF8) << 3);
+
+ u16 curtile;
+ u16* curpal;
+ u32 pixelsaddr;
+
+ if (bgcnt & 0x0080)
+ {
+ // 256-color
+
+ // preload shit as needed
+ if (xoff & 0x7)
+ {
+ // load a new tile
+ curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
+
+ if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
+ else curpal = pal;
+
+ pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6)
+ + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3);
+ }
+
+ for (int i = 0; i < 256; i++)
+ {
+ if (!(xoff & 0x7))
+ {
+ // load a new tile
+ curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
+
+ if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12);
+ else curpal = pal;
+
+ pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6)
+ + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3);
+ }
+
+ // draw pixel
+ u8 color;
+ u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7);
+ color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff);
+
+ if (color)
+ DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum);
+
+ xoff++;
+ }
+ }
+ else
+ {
+ // 16-color
+
+ // preload shit as needed
+ if (xoff & 0x7)
+ {
+ // load a new tile
+ curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
+ curpal = pal + ((curtile & 0xF000) >> 8);
+ pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
+ + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
+ }
+
+ for (int i = 0; i < 256; i++)
+ {
+ if (!(xoff & 0x7))
+ {
+ // load a new tile
+ curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3));
+ curpal = pal + ((curtile & 0xF000) >> 8);
+ pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5)
+ + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2);
+ }
+
+ // draw pixel
+ // TODO: optimize VRAM access
+ u8 color;
+ u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7);
+ if (tilexoff & 0x1)
+ {
+ color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4;
+ }
+ else
+ {
+ color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F;
+ }
+
+ if (color)
+ DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum);
+
+ xoff++;
+ }
+ }
+}
+
+void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum)
+{
+ u16 bgcnt = BGCnt[bgnum];
+
+ u32 tilesetaddr, tilemapaddr;
+ u16* pal;
+ u32 extpal;
+
+ u32 coordmask;
+ u32 yshift;
+ switch (bgcnt & 0xC000)
+ {
+ case 0x0000: coordmask = 0x07800; yshift = 7; break;
+ case 0x4000: coordmask = 0x0F800; yshift = 8; break;
+ case 0x8000: coordmask = 0x1F800; yshift = 9; break;
+ case 0xC000: coordmask = 0x3F800; yshift = 10; break;
+ }
+
+ u32 overflowmask;
+ if (bgcnt & 0x2000) overflowmask = 0;
+ else overflowmask = ~(coordmask | 0x7FF);
+
+ extpal = (DispCnt & 0x40000000);
+
+ s16 rotA = BGRotA[bgnum-2];
+ s16 rotB = BGRotB[bgnum-2];
+ s16 rotC = BGRotC[bgnum-2];
+ s16 rotD = BGRotD[bgnum-2];
+
+ s32 rotX = BGXRefInternal[bgnum-2];
+ s32 rotY = BGYRefInternal[bgnum-2];
+
+ if (bgcnt & 0x0080)
+ {
+ // bitmap modes
+
+ if (Num) tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 6);
+ else tilemapaddr = 0x06000000 + ((bgcnt & 0x1F00) << 6);
+
+ coordmask |= 0x7FF;
+
+ if (bgcnt & 0x0004)
+ {
+ // direct color bitmap
+
+ for (int i = 0; i < 256; i++)
+ {
+ if (!((rotX|rotY) & overflowmask))
+ {
+ u16 color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)) << 1));
+
+ if (color & 0x8000)
+ DrawPixel(&dst[i], color, 0x01000000<<bgnum);
+ }
+
+ rotX += rotA;
+ rotY += rotC;
+ }
+ }
+ else
+ {
+ // 256-color bitmap
+
+ if (Num) pal = (u16*)&GPU::Palette[0x400];
+ else pal = (u16*)&GPU::Palette[0];
+
+ for (int i = 0; i < 256; i++)
+ {
+ if (!((rotX|rotY) & overflowmask))
+ {
+ u8 color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8));
+
+ if (color)
+ DrawPixel(&dst[i], pal[color], 0x01000000<<bgnum);
+ }
+
+ rotX += rotA;
+ rotY += rotC;
+ }
+ }
+ }
+ else
+ {
+ // mixed affine/text mode
+
+ if (Num)
+ {
+ tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3);
+
+ pal = (u16*)&GPU::Palette[0x400];
+ }
+ else
+ {
+ tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12);
+ tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3);
+
+ pal = (u16*)&GPU::Palette[0];
+ }
+
+ u16 curtile;
+ u16* curpal;
+
+ yshift -= 3;
+
+ for (int i = 0; i < 256; i++)
+ {
+ if (!((rotX|rotY) & overflowmask))
+ {
+ curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((rotY & coordmask) >> 11) << yshift) + ((rotX & coordmask) >> 11)) << 1));
+
+ if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12);
+ else curpal = pal;
+
+ // draw pixel
+ u8 color;
+ u32 tilexoff = (rotX >> 8) & 0x7;
+ u32 tileyoff = (rotY >> 8) & 0x7;
+
+ if (curtile & 0x0400) tilexoff = 7-tilexoff;
+ if (curtile & 0x0800) tileyoff = 7-tileyoff;
+
+ color = GPU::ReadVRAM_BG<u8>(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff);
+
+ if (color)
+ DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum);
+ }
+
+ rotX += rotA;
+ rotY += rotC;
+ }
+ }
+
+ BGXRefInternal[bgnum-2] += rotB;
+ BGYRefInternal[bgnum-2] += rotD;
+}
+
+void GPU2D::InterleaveSprites(u32* buf, u32 prio, u32* dst)
+{
+ for (u32 i = 0; i < 256; i++)
+ {
+ if ((buf[i] & 0xF8000) == prio)
+ {
+ u32 blendfunc = 0;
+ DrawPixel(&dst[i], buf[i] & 0x7FFF, buf[i] & 0xFF000000);
+ }
+ }
+}
+
+void GPU2D::DrawSprites(u32 line, u32* dst)
+{
+ u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0];
+
+ const s32 spritewidth[16] =
+ {
+ 8, 16, 8, 0,
+ 16, 32, 8, 0,
+ 32, 32, 16, 0,
+ 64, 64, 32, 0
+ };
+ const s32 spriteheight[16] =
+ {
+ 8, 8, 16, 0,
+ 16, 8, 32, 0,
+ 32, 16, 32, 0,
+ 64, 32, 64, 0
+ };
+
+ for (int bgnum = 0x0C00; bgnum >= 0x0000; bgnum -= 0x0400)
+ {
+ for (int sprnum = 127; sprnum >= 0; sprnum--)
+ {
+ u16* attrib = &oam[sprnum*4];
+
+ if ((attrib[2] & 0x0C00) != bgnum)
+ continue;
+
+ if (attrib[0] & 0x0100)
+ {
+ u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12);
+ s32 width = spritewidth[sizeparam];
+ s32 height = spriteheight[sizeparam];
+ s32 boundwidth = width;
+ s32 boundheight = height;
+
+ if (attrib[0] & 0x0200)
+ {
+ boundwidth <<= 1;
+ boundheight <<= 1;
+ }
+
+ u32 ypos = attrib[0] & 0xFF;
+ ypos = (line - ypos) & 0xFF;
+ if (ypos >= (u32)boundheight)
+ continue;
+
+ s32 xpos = (s32)(attrib[1] << 23) >> 23;
+ if (xpos <= -boundwidth)
+ continue;
+
+ u32 rotparamgroup = (attrib[1] >> 9) & 0x1F;
+
+ DrawSprite_Rotscale(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, dst);
+ }
+ else
+ {
+ if (attrib[0] & 0x0200)
+ continue;
+
+ u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12);
+ s32 width = spritewidth[sizeparam];
+ s32 height = spriteheight[sizeparam];
+
+ u32 ypos = attrib[0] & 0xFF;
+ ypos = (line - ypos) & 0xFF;
+ if (ypos >= (u32)height)
+ continue;
+
+ s32 xpos = (s32)(attrib[1] << 23) >> 23;
+ if (xpos <= -width)
+ continue;
+
+ // yflip
+ if (attrib[1] & 0x2000)
+ ypos = height-1 - ypos;
+
+ DrawSprite_Normal(attrib, width, xpos, ypos, dst);
+ }
+ }
+ }
+}
+
+void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst)
+{
+ u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000;
+ u32 tilenum = attrib[2] & 0x03FF;
+ u32 spritemode = (attrib[0] >> 10) & 0x3;
+
+ u32 ytilefactor;
+ if (DispCnt & 0x10)
+ {
+ tilenum <<= ((DispCnt >> 20) & 0x3);
+ ytilefactor = (width >> 3) << ((attrib[0] & 0x2000) ? 1:0);
+ }
+ else
+ {
+ ytilefactor = 0x20;
+ }
+
+ s32 centerX = boundwidth >> 1;
+ s32 centerY = boundheight >> 1;
+
+ u32 xoff;
+ if (xpos >= 0)
+ {
+ xoff = 0;
+ if ((xpos+boundwidth) > 256)
+ boundwidth = 256-xpos;
+ }
+ else
+ {
+ xoff = -xpos;
+ xpos = 0;
+ }
+
+ s16 rotA = (s16)rotparams[0];
+ s16 rotB = (s16)rotparams[4];
+ s16 rotC = (s16)rotparams[8];
+ s16 rotD = (s16)rotparams[12];
+
+ s32 rotX = ((xoff-centerX) * rotA) + ((ypos-centerY) * rotB) + (width << 7);
+ s32 rotY = ((xoff-centerX) * rotC) + ((ypos-centerY) * rotD) + (height << 7);
+
+ width <<= 8;
+ height <<= 8;
+
+ if (spritemode == 3)
+ {
+ // TODO
+
+ u32 alpha = attrib[2] >> 12;
+ if (!alpha) return;
+ alpha++;
+
+ prio |= (0xC0000000 | (alpha << 24));
+
+ // TODO
+ }
+ else
+ {
+ if (spritemode == 1) prio |= 0x80000000;
+ else prio |= 0x10000000;
+
+ if (attrib[0] & 0x2000)
+ {
+ // 256-color
+ tilenum <<= 5;
+ ytilefactor <<= 5;
+ u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
+
+ u32 extpal = (DispCnt & 0x80000000);
+
+ u16* pal;
+ if (extpal) pal = GetOBJExtPal(attrib[2] >> 12);
+ else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+
+ for (; xoff < boundwidth;)
+ {
+ if ((u32)rotX < width && (u32)rotY < height)
+ {
+ u8 color;
+
+ // blaaaarg
+ color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8));
+
+ if (color)
+ dst[xpos] = pal[color] | prio;
+ }
+
+ rotX += rotA;
+ rotY += rotC;
+ xoff++;
+ xpos++;
+ }
+ }
+ else
+ {
+ // 16-color
+ tilenum <<= 5;
+ ytilefactor <<= 5;
+ u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
+
+ u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+ pal += (attrib[2] & 0xF000) >> 8;
+
+ for (; xoff < boundwidth;)
+ {
+ if ((u32)rotX < width && (u32)rotY < height)
+ {
+ u8 color;
+
+ // blaaaarg
+ color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9));
+
+ if (rotX & 0x100)
+ color >>= 4;
+ else
+ color &= 0x0F;
+
+ if (color)
+ dst[xpos] = pal[color] | prio;
+ }
+
+ rotX += rotA;
+ rotY += rotC;
+ xoff++;
+ xpos++;
+ }
+ }
+ }
+}
+
+void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst)
+{
+ u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000;
+ u32 tilenum = attrib[2] & 0x03FF;
+ u32 spritemode = (attrib[0] >> 10) & 0x3;
+
+ u32 wmask = width - 8; // really ((width - 1) & ~0x7)
+
+ u32 xoff;
+ u32 xend = width;
+ if (xpos >= 0)
+ {
+ xoff = 0;
+ if ((xpos+xend) > 256)
+ xend = 256-xpos;
+ }
+ else
+ {
+ xoff = -xpos;
+ xpos = 0;
+ }
+
+ if (spritemode == 3)
+ {
+ // bitmap sprite
+
+ if (DispCnt & 0x40)
+ {
+ if (DispCnt & 0x20)
+ {
+ // TODO ("reserved")
+ }
+ else
+ {
+ tilenum <<= (7 + ((DispCnt >> 22) & 0x1));
+ tilenum += (ypos * width * 2);
+ }
+ }
+ else
+ {
+ if (DispCnt & 0x20)
+ {
+ tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7);
+ tilenum += (ypos * 256 * 2);
+ }
+ else
+ {
+ tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7);
+ tilenum += (ypos * 128 * 2);
+ }
+ }
+
+ u32 alpha = attrib[2] >> 12;
+ if (!alpha) return;
+ alpha++;
+
+ prio |= (0xC0000000 | (alpha << 24));
+
+ u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
+ pixelsaddr += (xoff << 1);
+
+ for (; xoff < xend;)
+ {
+ u16 color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr);
+ pixelsaddr += 2;
+
+ if (color & 0x8000)
+ dst[xpos] = color | prio;
+
+ xoff++;
+ xpos++;
+ }
+ }
+ else
+ {
+ if (DispCnt & 0x10)
+ {
+ tilenum <<= ((DispCnt >> 20) & 0x3);
+ tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0);
+ }
+ else
+ {
+ tilenum += ((ypos >> 3) * 0x20);
+ }
+
+ if (spritemode == 1) prio |= 0x80000000;
+ else prio |= 0x10000000;
+
+ if (attrib[0] & 0x2000)
+ {
+ // 256-color
+ tilenum <<= 5;
+ u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
+ pixelsaddr += ((ypos & 0x7) << 3);
+
+ u32 extpal = (DispCnt & 0x80000000);
+
+ u16* pal;
+ if (extpal) pal = GetOBJExtPal(attrib[2] >> 12);
+ else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+
+ if (attrib[1] & 0x1000) // xflip. TODO: do better? oh well for now this works
+ {
+ pixelsaddr += (((width-1 - xoff) & wmask) << 3);
+ pixelsaddr += ((width-1 - xoff) & 0x7);
+
+ for (; xoff < xend;)
+ {
+ u8 color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr);
+ pixelsaddr--;
+
+ if (color)
+ dst[xpos] = pal[color] | prio;
+
+ xoff++;
+ xpos++;
+ if (!(xoff & 0x7)) pixelsaddr -= 56;
+ }
+ }
+ else
+ {
+ pixelsaddr += ((xoff & wmask) << 3);
+ pixelsaddr += (xoff & 0x7);
+
+ for (; xoff < xend;)
+ {
+ u8 color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr);
+ pixelsaddr++;
+
+ if (color)
+ dst[xpos] = pal[color] | prio;
+
+ xoff++;
+ xpos++;
+ if (!(xoff & 0x7)) pixelsaddr += 56;
+ }
+ }
+ }
+ else
+ {
+ // 16-color
+ tilenum <<= 5;
+ u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
+ pixelsaddr += ((ypos & 0x7) << 2);
+
+ u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+ pal += (attrib[2] & 0xF000) >> 8;
+
+ if (attrib[1] & 0x1000) // xflip. TODO: do better? oh well for now this works
+ {
+ pixelsaddr += (((width-1 - xoff) & wmask) << 2);
+ pixelsaddr += (((width-1 - xoff) & 0x7) >> 1);
+
+ for (; xoff < xend;)
+ {
+ u8 color;
+ if (xoff & 0x1)
+ {
+ color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F;
+ pixelsaddr--;
+ }
+ else
+ {
+ color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4;
+ }
+
+ if (color)
+ dst[xpos] = pal[color] | prio;
+
+ xoff++;
+ xpos++;
+ if (!(xoff & 0x7)) pixelsaddr -= 28;
+ }
+ }
+ else
+ {
+ pixelsaddr += ((xoff & wmask) << 2);
+ pixelsaddr += ((xoff & 0x7) >> 1);
+
+ for (; xoff < xend;)
+ {
+ u8 color;
+ if (xoff & 0x1)
+ {
+ color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4;
+ pixelsaddr++;
+ }
+ else
+ {
+ color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F;
+ }
+
+ if (color)
+ dst[xpos] = pal[color] | prio;
+
+ xoff++;
+ xpos++;
+ if (!(xoff & 0x7)) pixelsaddr += 28;
+ }
+ }
+ }
+ }
+}
diff --git a/src/GPU2D.h b/src/GPU2D.h
new file mode 100644
index 0000000..4136440
--- /dev/null
+++ b/src/GPU2D.h
@@ -0,0 +1,97 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef GPU2D_H
+#define GPU2D_H
+
+class GPU2D
+{
+public:
+ GPU2D(u32 num);
+ ~GPU2D();
+
+ void Reset();
+
+ void SetFramebuffer(u32* buf);
+
+ u8 Read8(u32 addr);
+ u16 Read16(u32 addr);
+ u32 Read32(u32 addr);
+ void Write8(u32 addr, u8 val);
+ void Write16(u32 addr, u16 val);
+ void Write32(u32 addr, u32 val);
+
+ void DrawScanline(u32 line);
+ void VBlank();
+
+ void BGExtPalDirty(u32 base);
+ void OBJExtPalDirty();
+
+ u16* GetBGExtPal(u32 slot, u32 pal);
+ u16* GetOBJExtPal(u32 pal);
+
+private:
+ u32 Num;
+ u32* Framebuffer;
+
+ u32 DispCnt;
+ u16 BGCnt[4];
+
+ u16 BGXPos[4];
+ u16 BGYPos[4];
+
+ s32 BGXRef[2];
+ s32 BGYRef[2];
+ s32 BGXRefInternal[2];
+ s32 BGYRefInternal[2];
+ s16 BGRotA[2];
+ s16 BGRotB[2];
+ s16 BGRotC[2];
+ s16 BGRotD[2];
+
+ u16 BlendCnt;
+ u8 EVA, EVB;
+ u8 EVY;
+
+ u32 CaptureCnt;
+
+ u16 MasterBrightness;
+
+ u16 BGExtPalCache[4][16*256];
+ u16 OBJExtPalCache[16*256];
+ u32 BGExtPalStatus[4];
+ u32 OBJExtPalStatus;
+
+ template<u32 bgmode> void DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst);
+ void DrawScanline_Mode1(u32 line, u32* dst);
+
+ void DrawPixel(u32* dst, u16 color, u32 flag);
+
+ void DrawBG_3D(u32 line, u32* dst);
+ void DrawBG_Text(u32 line, u32* dst, u32 num);
+ void DrawBG_Extended(u32 line, u32* dst, u32 bgnum);
+
+ void InterleaveSprites(u32* buf, u32 prio, u32* dst);
+ void DrawSprites(u32 line, u32* dst);
+ void DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst);
+ void DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst);
+
+ void DoCapture(u32 line, u32 width, u32* src);
+};
+
+#endif
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
new file mode 100644
index 0000000..8758ce8
--- /dev/null
+++ b/src/GPU3D.cpp
@@ -0,0 +1,1917 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "NDS.h"
+#include "GPU.h"
+#include "FIFO.h"
+
+
+// 3D engine notes
+//
+// vertex/polygon RAM is filled when a complete polygon is defined, after it's been culled and clipped
+// 04000604 reads from bank used by renderer
+// bank used by renderer is emptied at scanline ~192
+// banks are swapped at scanline ~194
+// TODO: needs more investigation. it's weird.
+//
+// clipping rules:
+// * if a shared vertex in a strip is clipped, affected polygons are converted into single polygons
+// strip is resumed at the first eligible polygon
+//
+// clipping exhibits oddities on the real thing. bad precision? fancy algorithm? TODO: investigate.
+//
+// vertex color precision:
+// * vertex colors are kept at 5-bit during clipping. makes for shitty results.
+// * vertex colors are converted to 9-bit before drawing, as such:
+// if (x > 0) x = (x << 4) + 0xF
+// the added bias affects interpolation.
+//
+// depth buffer:
+// Z-buffering mode: val = ((Z * 0x800 * 0x1000) / W) + 0x7FFEFF
+// W-buffering mode: val = W
+//
+// formula for clear depth: (GBAtek is wrong there)
+// clearZ = (val * 0x200) + 0x1FF;
+// if (clearZ >= 0x010000 && clearZ < 0xFFFFFF) clearZ++;
+//
+// alpha is 5-bit
+
+
+namespace GPU3D
+{
+
+const u32 CmdNumParams[256] =
+{
+ // 0x00
+ 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x10
+ 1, 0, 1, 1, 1, 0, 16, 12, 16, 12, 9, 3, 3,
+ 0, 0, 0,
+ // 0x20
+ 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ // 0x30
+ 1, 1, 1, 1, 32,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x40
+ 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x50
+ 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x60
+ 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x70
+ 3, 2, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x80+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+const s32 CmdNumCycles[256] =
+{
+ // 0x00
+ 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x10
+ 1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22,
+ 0, 0, 0,
+ // 0x20
+ 1, 9, 1, 9, 8, 8, 8, 8, 8, 1, 1, 1,
+ 0, 0, 0, 0,
+ // 0x30
+ 4, 4, 6, 1, 32,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x40
+ 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x50
+ 392,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x60
+ 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x70
+ 103, 9, 5,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0x80+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+typedef struct
+{
+ u8 Command;
+ u32 Param;
+
+} CmdFIFOEntry;
+
+FIFO<CmdFIFOEntry>* CmdFIFO;
+FIFO<CmdFIFOEntry>* CmdPIPE;
+
+u32 NumCommands, CurCommand, ParamCount, TotalParams;
+
+u32 DispCnt;
+u32 AlphaRef;
+
+u16 ToonTable[32];
+u16 EdgeTable[8];
+
+u32 FogColor;
+u32 FogOffset;
+u8 FogDensityTable[32];
+
+u32 GXStat;
+
+u32 ExecParams[32];
+u32 ExecParamCount;
+s32 CycleCount;
+
+
+u32 MatrixMode;
+
+s32 ProjMatrix[16];
+s32 PosMatrix[16];
+s32 VecMatrix[16];
+s32 TexMatrix[16];
+
+s32 ClipMatrix[16];
+bool ClipMatrixDirty;
+
+s32 Viewport[4];
+
+s32 ProjMatrixStack[16];
+s32 PosMatrixStack[31][16];
+s32 VecMatrixStack[31][16];
+s32 TexMatrixStack[16];
+s32 ProjMatrixStackPointer;
+s32 PosMatrixStackPointer;
+s32 TexMatrixStackPointer;
+
+void MatrixLoadIdentity(s32* m);
+void UpdateClipMatrix();
+
+
+u32 PolygonMode;
+s16 CurVertex[3];
+u8 VertexColor[3];
+s16 TexCoords[2];
+s16 RawTexCoords[2];
+s16 Normal[3];
+
+s16 LightDirection[4][3];
+u8 LightColor[4][3];
+u8 MatDiffuse[3];
+u8 MatAmbient[3];
+u8 MatSpecular[3];
+u8 MatEmission[3];
+
+bool UseShininessTable;
+u8 ShininessTable[128];
+
+u32 PolygonAttr;
+u32 CurPolygonAttr;
+
+u32 TexParam;
+u32 TexPalette;
+
+Vertex TempVertexBuffer[4];
+u32 VertexNum;
+u32 VertexNumInPoly;
+u32 NumConsecutivePolygons;
+Polygon* LastStripPolygon;
+
+Vertex VertexRAM[6144 * 2];
+Polygon PolygonRAM[2048 * 2];
+
+Vertex* CurVertexRAM;
+Polygon* CurPolygonRAM;
+u32 NumVertices, NumPolygons;
+u32 CurRAMBank;
+
+u32 ClearAttr1, ClearAttr2;
+
+u32 FlushRequest;
+u32 FlushAttributes;
+
+
+
+bool Init()
+{
+ CmdFIFO = new FIFO<CmdFIFOEntry>(256);
+ CmdPIPE = new FIFO<CmdFIFOEntry>(4);
+
+ if (!SoftRenderer::Init()) return false;
+
+ return true;
+}
+
+void DeInit()
+{
+ SoftRenderer::DeInit();
+
+ delete CmdFIFO;
+ delete CmdPIPE;
+}
+
+void Reset()
+{
+ CmdFIFO->Clear();
+ CmdPIPE->Clear();
+
+ NumCommands = 0;
+ CurCommand = 0;
+ ParamCount = 0;
+ TotalParams = 0;
+
+ DispCnt = 0;
+ AlphaRef = 0;
+
+ GXStat = 0;
+
+ memset(ExecParams, 0, 32*4);
+ ExecParamCount = 0;
+ CycleCount = 0;
+
+
+ MatrixMode = 0;
+
+ MatrixLoadIdentity(ProjMatrix);
+ MatrixLoadIdentity(PosMatrix);
+ MatrixLoadIdentity(VecMatrix);
+ MatrixLoadIdentity(TexMatrix);
+
+ ClipMatrixDirty = true;
+ UpdateClipMatrix();
+
+ memset(Viewport, 0, sizeof(Viewport));
+
+ memset(ProjMatrixStack, 0, 16*4);
+ memset(PosMatrixStack, 0, 31 * 16*4);
+ memset(VecMatrixStack, 0, 31 * 16*4);
+ memset(TexMatrixStack, 0, 16*4);
+ ProjMatrixStackPointer = 0;
+ PosMatrixStackPointer = 0;
+ TexMatrixStackPointer = 0;
+
+ VertexNum = 0;
+ VertexNumInPoly = 0;
+
+ CurRAMBank = 0;
+ CurVertexRAM = &VertexRAM[0];
+ CurPolygonRAM = &PolygonRAM[0];
+ NumVertices = 0;
+ NumPolygons = 0;
+
+ ClearAttr1 = 0;
+ ClearAttr2 = 0;
+
+ FlushRequest = 0;
+ FlushAttributes = 0;
+
+ SoftRenderer::Reset();
+}
+
+
+
+void MatrixLoadIdentity(s32* m)
+{
+ m[0] = 0x1000; m[1] = 0; m[2] = 0; m[3] = 0;
+ m[4] = 0; m[5] = 0x1000; m[6] = 0; m[7] = 0;
+ m[8] = 0; m[9] = 0; m[10] = 0x1000; m[11] = 0;
+ m[12] = 0; m[13] = 0; m[14] = 0; m[15] = 0x1000;
+}
+
+void MatrixLoad4x4(s32* m, s32* s)
+{
+ memcpy(m, s, 16*4);
+}
+
+void MatrixLoad4x3(s32* m, s32* s)
+{
+ m[0] = s[0]; m[1] = s[1]; m[2] = s[2]; m[3] = 0;
+ m[4] = s[3]; m[5] = s[4]; m[6] = s[5]; m[7] = 0;
+ m[8] = s[6]; m[9] = s[7]; m[10] = s[8]; m[11] = 0;
+ m[12] = s[9]; m[13] = s[10]; m[14] = s[11]; m[15] = 0x1000;
+}
+
+void MatrixMult4x4(s32* m, s32* s)
+{
+ s32 tmp[16];
+ memcpy(tmp, m, 16*4);
+
+ // m = s*m
+ m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8] + (s64)s[3]*tmp[12]) >> 12;
+ m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9] + (s64)s[3]*tmp[13]) >> 12;
+ m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10] + (s64)s[3]*tmp[14]) >> 12;
+ m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11] + (s64)s[3]*tmp[15]) >> 12;
+
+ m[4] = ((s64)s[4]*tmp[0] + (s64)s[5]*tmp[4] + (s64)s[6]*tmp[8] + (s64)s[7]*tmp[12]) >> 12;
+ m[5] = ((s64)s[4]*tmp[1] + (s64)s[5]*tmp[5] + (s64)s[6]*tmp[9] + (s64)s[7]*tmp[13]) >> 12;
+ m[6] = ((s64)s[4]*tmp[2] + (s64)s[5]*tmp[6] + (s64)s[6]*tmp[10] + (s64)s[7]*tmp[14]) >> 12;
+ m[7] = ((s64)s[4]*tmp[3] + (s64)s[5]*tmp[7] + (s64)s[6]*tmp[11] + (s64)s[7]*tmp[15]) >> 12;
+
+ m[8] = ((s64)s[8]*tmp[0] + (s64)s[9]*tmp[4] + (s64)s[10]*tmp[8] + (s64)s[11]*tmp[12]) >> 12;
+ m[9] = ((s64)s[8]*tmp[1] + (s64)s[9]*tmp[5] + (s64)s[10]*tmp[9] + (s64)s[11]*tmp[13]) >> 12;
+ m[10] = ((s64)s[8]*tmp[2] + (s64)s[9]*tmp[6] + (s64)s[10]*tmp[10] + (s64)s[11]*tmp[14]) >> 12;
+ m[11] = ((s64)s[8]*tmp[3] + (s64)s[9]*tmp[7] + (s64)s[10]*tmp[11] + (s64)s[11]*tmp[15]) >> 12;
+
+ m[12] = ((s64)s[12]*tmp[0] + (s64)s[13]*tmp[4] + (s64)s[14]*tmp[8] + (s64)s[15]*tmp[12]) >> 12;
+ m[13] = ((s64)s[12]*tmp[1] + (s64)s[13]*tmp[5] + (s64)s[14]*tmp[9] + (s64)s[15]*tmp[13]) >> 12;
+ m[14] = ((s64)s[12]*tmp[2] + (s64)s[13]*tmp[6] + (s64)s[14]*tmp[10] + (s64)s[15]*tmp[14]) >> 12;
+ m[15] = ((s64)s[12]*tmp[3] + (s64)s[13]*tmp[7] + (s64)s[14]*tmp[11] + (s64)s[15]*tmp[15]) >> 12;
+}
+
+void MatrixMult4x3(s32* m, s32* s)
+{
+ s32 tmp[16];
+ memcpy(tmp, m, 16*4);
+
+ // m = s*m
+ m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12;
+ m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12;
+ m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12;
+ m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12;
+
+ m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12;
+ m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12;
+ m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12;
+ m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12;
+
+ m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12;
+ m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12;
+ m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12;
+ m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12;
+
+ m[12] = ((s64)s[9]*tmp[0] + (s64)s[10]*tmp[4] + (s64)s[11]*tmp[8] + (s64)0x1000*tmp[12]) >> 12;
+ m[13] = ((s64)s[9]*tmp[1] + (s64)s[10]*tmp[5] + (s64)s[11]*tmp[9] + (s64)0x1000*tmp[13]) >> 12;
+ m[14] = ((s64)s[9]*tmp[2] + (s64)s[10]*tmp[6] + (s64)s[11]*tmp[10] + (s64)0x1000*tmp[14]) >> 12;
+ m[15] = ((s64)s[9]*tmp[3] + (s64)s[10]*tmp[7] + (s64)s[11]*tmp[11] + (s64)0x1000*tmp[15]) >> 12;
+}
+
+void MatrixMult3x3(s32* m, s32* s)
+{
+ s32 tmp[12];
+ memcpy(tmp, m, 12*4);
+
+ // m = s*m
+ m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12;
+ m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12;
+ m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12;
+ m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12;
+
+ m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12;
+ m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12;
+ m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12;
+ m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12;
+
+ m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12;
+ m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12;
+ m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12;
+ m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12;
+}
+
+void MatrixScale(s32* m, s32* s)
+{
+ m[0] = ((s64)s[0]*m[0]) >> 12;
+ m[1] = ((s64)s[0]*m[1]) >> 12;
+ m[2] = ((s64)s[0]*m[2]) >> 12;
+ m[3] = ((s64)s[0]*m[3]) >> 12;
+
+ m[4] = ((s64)s[1]*m[4]) >> 12;
+ m[5] = ((s64)s[1]*m[5]) >> 12;
+ m[6] = ((s64)s[1]*m[6]) >> 12;
+ m[7] = ((s64)s[1]*m[7]) >> 12;
+
+ m[8] = ((s64)s[2]*m[8]) >> 12;
+ m[9] = ((s64)s[2]*m[9]) >> 12;
+ m[10] = ((s64)s[2]*m[10]) >> 12;
+ m[11] = ((s64)s[2]*m[11]) >> 12;
+}
+
+void MatrixTranslate(s32* m, s32* s)
+{
+ m[12] += ((s64)s[0]*m[0] + (s64)s[1]*m[4] + (s64)s[2]*m[8]) >> 12;
+ m[13] += ((s64)s[0]*m[1] + (s64)s[1]*m[5] + (s64)s[2]*m[9]) >> 12;
+ m[14] += ((s64)s[0]*m[2] + (s64)s[1]*m[6] + (s64)s[2]*m[10]) >> 12;
+}
+
+void UpdateClipMatrix()
+{
+ if (!ClipMatrixDirty) return;
+ ClipMatrixDirty = false;
+
+ memcpy(ClipMatrix, ProjMatrix, 16*4);
+ MatrixMult4x4(ClipMatrix, PosMatrix);
+}
+
+
+
+template<int comp, s32 plane>
+void ClipSegment(Vertex* outbuf, Vertex* vout, Vertex* vin)
+{
+ s64 factor_num = vin->Position[3] - (plane*vin->Position[comp]);
+ s32 factor_den = factor_num - (vout->Position[3] - (plane*vout->Position[comp]));
+
+ Vertex mid;
+#define INTERPOLATE(var) { mid.var = (vin->var + ((vout->var - vin->var) * factor_num) / factor_den); }
+
+ if (comp != 0) INTERPOLATE(Position[0]);
+ if (comp != 1) INTERPOLATE(Position[1]);
+ if (comp != 2) INTERPOLATE(Position[2]);
+ INTERPOLATE(Position[3]);
+ mid.Position[comp] = plane*mid.Position[3];
+
+ INTERPOLATE(Color[0]);
+ INTERPOLATE(Color[1]);
+ INTERPOLATE(Color[2]);
+
+ INTERPOLATE(TexCoords[0]);
+ INTERPOLATE(TexCoords[1]);
+
+ mid.Clipped = true;
+
+#undef INTERPOLATE
+ *outbuf = mid;
+}
+
+void SubmitPolygon()
+{
+ Vertex clippedvertices[2][10];
+ Vertex* reusedvertices[2];
+ int clipstart = 0;
+ int lastpolyverts = 0;
+
+ int nverts = PolygonMode & 0x1 ? 4:3;
+ int prev, next;
+ int c;
+
+ // culling
+
+ Vertex *v0, *v1, *v2;
+ s64 normalX, normalY, normalZ;
+ s64 dot;
+
+ v0 = &TempVertexBuffer[0];
+ v1 = &TempVertexBuffer[1];
+ v2 = &TempVertexBuffer[2];
+ normalX = (((s64)v0->Position[1] * v2->Position[3]) - ((s64)v0->Position[3] * v2->Position[1])) >> 12;
+ normalY = (((s64)v0->Position[3] * v2->Position[0]) - ((s64)v0->Position[0] * v2->Position[3])) >> 12;
+ normalZ = (((s64)v0->Position[0] * v2->Position[1]) - ((s64)v0->Position[1] * v2->Position[0])) >> 12;
+ dot = ((s64)(v1->Position[0] >> 0) * normalX) + ((s64)(v1->Position[1] >> 0) * normalY) + ((s64)(v1->Position[3] >> 0) * normalZ);
+
+ bool facingview = (dot < 0);
+
+ if (facingview)
+ {
+ if (!(CurPolygonAttr & (1<<7)))
+ {
+ LastStripPolygon = NULL;
+ return;
+ }
+ }
+ else if (dot > 0)
+ {
+ if (!(CurPolygonAttr & (1<<6)))
+ {
+ LastStripPolygon = NULL;
+ return;
+ }
+ }
+
+ // for strips, check whether we can attach to the previous polygon
+ // this requires two vertices shared with the previous polygon, and that
+ // the two polygons be of the same type
+
+ if (PolygonMode >= 2 && LastStripPolygon)
+ {
+ int id0, id1;
+ if (PolygonMode == 2)
+ {
+ if (NumConsecutivePolygons & 1)
+ {
+ id0 = 2;
+ id1 = 1;
+ }
+ else
+ {
+ id0 = 0;
+ id1 = 2;
+ }
+
+ lastpolyverts = 3;
+ }
+ else
+ {
+ id0 = 3;
+ id1 = 2;
+
+ lastpolyverts = 4;
+ }
+
+ if (LastStripPolygon->NumVertices == lastpolyverts &&
+ !LastStripPolygon->Vertices[id0]->Clipped &&
+ !LastStripPolygon->Vertices[id1]->Clipped)
+ {
+ reusedvertices[0] = LastStripPolygon->Vertices[id0];
+ reusedvertices[1] = LastStripPolygon->Vertices[id1];
+
+ clippedvertices[0][0] = *reusedvertices[0];
+ clippedvertices[0][1] = *reusedvertices[1];
+ clippedvertices[1][0] = *reusedvertices[0];
+ clippedvertices[1][1] = *reusedvertices[1];
+
+ clipstart = 2;
+ }
+ }
+
+ // clip.
+ // for each vertex:
+ // if it's outside, check if the previous and next vertices are inside
+ // if so, place a new vertex at the edge of the view volume
+
+ // X clipping
+
+ c = clipstart;
+ for (int i = clipstart; i < nverts; i++)
+ {
+ prev = i-1; if (prev < 0) prev = nverts-1;
+ next = i+1; if (next >= nverts) next = 0;
+
+ Vertex vtx = TempVertexBuffer[i];
+ if (vtx.Position[0] > vtx.Position[3])
+ {
+ Vertex* vprev = &TempVertexBuffer[prev];
+ if (vprev->Position[0] <= vprev->Position[3])
+ {
+ ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vprev);
+ c++;
+ }
+
+ Vertex* vnext = &TempVertexBuffer[next];
+ if (vnext->Position[0] <= vnext->Position[3])
+ {
+ ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vnext);
+ c++;
+ }
+ }
+ else
+ clippedvertices[0][c++] = vtx;
+ }
+
+ nverts = c; c = clipstart;
+ for (int i = clipstart; i < nverts; i++)
+ {
+ prev = i-1; if (prev < 0) prev = nverts-1;
+ next = i+1; if (next >= nverts) next = 0;
+
+ Vertex vtx = clippedvertices[0][i];
+ if (vtx.Position[0] < -vtx.Position[3])
+ {
+ Vertex* vprev = &clippedvertices[0][prev];
+ if (vprev->Position[0] >= -vprev->Position[3])
+ {
+ ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vprev);
+ c++;
+ }
+
+ Vertex* vnext = &clippedvertices[0][next];
+ if (vnext->Position[0] >= -vnext->Position[3])
+ {
+ ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vnext);
+ c++;
+ }
+ }
+ else
+ clippedvertices[1][c++] = vtx;
+ }
+
+ for (int i = 0; i < c; i++)
+ {
+ Vertex* vtx = &clippedvertices[1][i];
+
+ vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF;
+ vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF;
+ vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF;
+ }
+
+ // Y clipping
+
+ nverts = c; c = clipstart;
+ for (int i = clipstart; i < nverts; i++)
+ {
+ prev = i-1; if (prev < 0) prev = nverts-1;
+ next = i+1; if (next >= nverts) next = 0;
+
+ Vertex vtx = clippedvertices[1][i];
+ if (vtx.Position[1] > vtx.Position[3])
+ {
+ Vertex* vprev = &clippedvertices[1][prev];
+ if (vprev->Position[1] <= vprev->Position[3])
+ {
+ ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vprev);
+ c++;
+ }
+
+ Vertex* vnext = &clippedvertices[1][next];
+ if (vnext->Position[1] <= vnext->Position[3])
+ {
+ ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vnext);
+ c++;
+ }
+ }
+ else
+ clippedvertices[0][c++] = vtx;
+ }
+
+ nverts = c; c = clipstart;
+ for (int i = clipstart; i < nverts; i++)
+ {
+ prev = i-1; if (prev < 0) prev = nverts-1;
+ next = i+1; if (next >= nverts) next = 0;
+
+ Vertex vtx = clippedvertices[0][i];
+ if (vtx.Position[1] < -vtx.Position[3])
+ {
+ Vertex* vprev = &clippedvertices[0][prev];
+ if (vprev->Position[1] >= -vprev->Position[3])
+ {
+ ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vprev);
+ c++;
+ }
+
+ Vertex* vnext = &clippedvertices[0][next];
+ if (vnext->Position[1] >= -vnext->Position[3])
+ {
+ ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vnext);
+ c++;
+ }
+ }
+ else
+ clippedvertices[1][c++] = vtx;
+ }
+
+ for (int i = 0; i < c; i++)
+ {
+ Vertex* vtx = &clippedvertices[1][i];
+
+ vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF;
+ vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF;
+ vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF;
+ }
+
+ // Z clipping
+
+ bool farplaneclip = false;
+ nverts = c; c = clipstart;
+ for (int i = clipstart; i < nverts; i++)
+ {
+ prev = i-1; if (prev < 0) prev = nverts-1;
+ next = i+1; if (next >= nverts) next = 0;
+
+ Vertex vtx = clippedvertices[1][i];
+ if (vtx.Position[2] > vtx.Position[3])
+ {
+ farplaneclip = true;
+
+ Vertex* vprev = &clippedvertices[1][prev];
+ if (vprev->Position[2] <= vprev->Position[3])
+ {
+ ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vprev);
+ c++;
+ }
+
+ Vertex* vnext = &clippedvertices[1][next];
+ if (vnext->Position[2] <= vnext->Position[3])
+ {
+ ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vnext);
+ c++;
+ }
+ }
+ else
+ clippedvertices[0][c++] = vtx;
+ }
+
+ if (farplaneclip && (!(CurPolygonAttr & (1<<12))))
+ return;
+
+ nverts = c; c = clipstart;
+ for (int i = clipstart; i < nverts; i++)
+ {
+ prev = i-1; if (prev < 0) prev = nverts-1;
+ next = i+1; if (next >= nverts) next = 0;
+
+ Vertex vtx = clippedvertices[0][i];
+ if (vtx.Position[2] < -vtx.Position[3])
+ {
+ Vertex* vprev = &clippedvertices[0][prev];
+ if (vprev->Position[2] >= -vprev->Position[3])
+ {
+ ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vprev);
+ c++;
+ }
+
+ Vertex* vnext = &clippedvertices[0][next];
+ if (vnext->Position[2] >= -vnext->Position[3])
+ {
+ ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vnext);
+ c++;
+ }
+ }
+ else
+ clippedvertices[1][c++] = vtx;
+ }
+
+ for (int i = 0; i < c; i++)
+ {
+ Vertex* vtx = &clippedvertices[1][i];
+
+ vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF;
+ vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF;
+ vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF;
+ }
+
+ if (c == 0)
+ {
+ LastStripPolygon = NULL;
+ return;
+ }
+
+ // build the actual polygon
+
+ if (NumPolygons >= 2048 || NumVertices+c > 6144)
+ {
+ LastStripPolygon = NULL;
+ // TODO: set DISP3DCNT overflow flag
+ return;
+ }
+
+ Polygon* poly = &CurPolygonRAM[NumPolygons++];
+ poly->NumVertices = 0;
+
+ poly->Attr = CurPolygonAttr;
+ poly->TexParam = TexParam;
+ poly->TexPalette = TexPalette;
+
+ poly->FacingView = facingview;
+
+ u32 texfmt = (TexParam >> 26) & 0x7;
+ u32 polyalpha = (CurPolygonAttr >> 16) & 0x1F;
+ poly->Translucent = (texfmt == 1 || texfmt == 6 || (polyalpha > 0 && polyalpha < 31));
+
+ if (LastStripPolygon && clipstart > 0)
+ {
+ if (c == lastpolyverts)
+ {
+ poly->Vertices[0] = reusedvertices[0];
+ poly->Vertices[1] = reusedvertices[1];
+ }
+ else
+ {
+ Vertex v0 = *reusedvertices[0];
+ Vertex v1 = *reusedvertices[1];
+
+ CurVertexRAM[NumVertices] = v0;
+ poly->Vertices[0] = &CurVertexRAM[NumVertices];
+ CurVertexRAM[NumVertices+1] = v1;
+ poly->Vertices[1] = &CurVertexRAM[NumVertices+1];
+ NumVertices += 2;
+ }
+
+ poly->NumVertices += 2;
+ }
+
+ for (int i = clipstart; i < c; i++)
+ {
+ Vertex* vtx = &CurVertexRAM[NumVertices];
+ *vtx = clippedvertices[1][i];
+ poly->Vertices[i] = vtx;
+
+ NumVertices++;
+ poly->NumVertices++;
+
+ // viewport transform
+ s32 posX, posY, posZ;
+ s32 w = vtx->Position[3];
+ if (w == 0)
+ {
+ posX = 0;
+ posY = 0;
+ posZ = 0;
+ w = 0x1000;
+ }
+ else
+ {
+ posX = (((s64)(vtx->Position[0] + w) * Viewport[2]) / (((s64)w) << 1)) + Viewport[0];
+ posY = (((s64)(-vtx->Position[1] + w) * Viewport[3]) / (((s64)w) << 1)) + Viewport[1];
+
+ if (FlushAttributes & 0x2) posZ = w;
+ else posZ = (((s64)vtx->Position[2] * 0x800000) / w) + 0x7FFEFF;
+ }
+
+ if (posX < 0) posX = 0;
+ else if (posX > 256) posX = 256;
+ if (posY < 0) posY = 0;
+ else if (posY > 192) posY = 192;
+ if (posZ < 0) posZ = 0;
+ else if (posZ > 0xFFFFFF) posZ = 0xFFFFFF;
+
+ vtx->FinalPosition[0] = posX;
+ vtx->FinalPosition[1] = posY;
+ vtx->FinalPosition[2] = posZ;
+ vtx->FinalPosition[3] = w;
+
+ vtx->FinalColor[0] = vtx->Color[0] >> 12;
+ if (vtx->FinalColor[0]) vtx->FinalColor[0] = ((vtx->FinalColor[0] << 4) + 0xF);
+ vtx->FinalColor[1] = vtx->Color[1] >> 12;
+ if (vtx->FinalColor[1]) vtx->FinalColor[1] = ((vtx->FinalColor[1] << 4) + 0xF);
+ vtx->FinalColor[2] = vtx->Color[2] >> 12;
+ if (vtx->FinalColor[2]) vtx->FinalColor[2] = ((vtx->FinalColor[2] << 4) + 0xF);
+ }
+
+ // determine bounds of the polygon
+ u32 vtop = 0, vbot = 0;
+ s32 ytop = 192, ybot = 0;
+ s32 xtop = 256, xbot = 0;
+
+ for (int i = 0; i < c; i++)
+ {
+ Vertex* vtx = poly->Vertices[i];
+
+ if (vtx->FinalPosition[1] < ytop || (vtx->FinalPosition[1] == ytop && vtx->FinalPosition[0] < xtop))
+ {
+ xtop = vtx->FinalPosition[0];
+ ytop = vtx->FinalPosition[1];
+ vtop = i;
+ }
+ if (vtx->FinalPosition[1] > ybot || (vtx->FinalPosition[1] == ybot && vtx->FinalPosition[0] > xbot))
+ {
+ xbot = vtx->FinalPosition[0];
+ ybot = vtx->FinalPosition[1];
+ vbot = i;
+ }
+ }
+
+ poly->VTop = vtop; poly->VBottom = vbot;
+ poly->YTop = ytop; poly->YBottom = ybot;
+ poly->XTop = xtop; poly->XBottom = xbot;
+
+ if (PolygonMode >= 2)
+ LastStripPolygon = poly;
+ else
+ LastStripPolygon = NULL;
+}
+
+void SubmitVertex()
+{
+ s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000};
+ Vertex* vertextrans = &TempVertexBuffer[VertexNumInPoly];
+
+ UpdateClipMatrix();
+ vertextrans->Position[0] = (vertex[0]*ClipMatrix[0] + vertex[1]*ClipMatrix[4] + vertex[2]*ClipMatrix[8] + vertex[3]*ClipMatrix[12]) >> 12;
+ vertextrans->Position[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12;
+ vertextrans->Position[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12;
+ vertextrans->Position[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12;
+
+ vertextrans->Color[0] = (VertexColor[0] << 12) + 0xFFF;
+ vertextrans->Color[1] = (VertexColor[1] << 12) + 0xFFF;
+ vertextrans->Color[2] = (VertexColor[2] << 12) + 0xFFF;
+
+ if ((TexParam >> 30) == 3)
+ {
+ vertextrans->TexCoords[0] = (vertex[0]*TexMatrix[0] + vertex[1]*TexMatrix[4] + vertex[2]*TexMatrix[8] + vertex[3]*(RawTexCoords[0]<<8)) >> 20;
+ vertextrans->TexCoords[1] = (vertex[0]*TexMatrix[1] + vertex[1]*TexMatrix[5] + vertex[2]*TexMatrix[9] + vertex[3]*(RawTexCoords[1]<<8)) >> 20;
+ }
+ else
+ {
+ vertextrans->TexCoords[0] = TexCoords[0];
+ vertextrans->TexCoords[1] = TexCoords[1];
+ }
+
+ vertextrans->Clipped = false;
+
+ VertexNum++;
+ VertexNumInPoly++;
+
+ switch (PolygonMode)
+ {
+ case 0: // triangle
+ if (VertexNumInPoly == 3)
+ {
+ VertexNumInPoly = 0;
+ SubmitPolygon();
+ NumConsecutivePolygons++;
+ }
+ break;
+
+ case 1: // quad
+ if (VertexNumInPoly == 4)
+ {
+ VertexNumInPoly = 0;
+ SubmitPolygon();
+ NumConsecutivePolygons++;
+ }
+ break;
+
+ case 2: // triangle strip
+ if (NumConsecutivePolygons & 1)
+ {
+ Vertex tmp = TempVertexBuffer[1];
+ TempVertexBuffer[1] = TempVertexBuffer[0];
+ TempVertexBuffer[0] = tmp;
+
+ VertexNumInPoly = 2;
+ SubmitPolygon();
+ NumConsecutivePolygons++;
+
+ TempVertexBuffer[1] = TempVertexBuffer[2];
+ }
+ else if (VertexNumInPoly == 3)
+ {
+ VertexNumInPoly = 2;
+ SubmitPolygon();
+ NumConsecutivePolygons++;
+
+ TempVertexBuffer[0] = TempVertexBuffer[1];
+ TempVertexBuffer[1] = TempVertexBuffer[2];
+ }
+ break;
+
+ case 3: // quad strip
+ if (VertexNumInPoly == 4)
+ {
+ Vertex tmp = TempVertexBuffer[3];
+ TempVertexBuffer[3] = TempVertexBuffer[2];
+ TempVertexBuffer[2] = tmp;
+
+ VertexNumInPoly = 2;
+ SubmitPolygon();
+ NumConsecutivePolygons++;
+
+ TempVertexBuffer[0] = TempVertexBuffer[3];
+ TempVertexBuffer[1] = TempVertexBuffer[2];
+ }
+ break;
+ }
+}
+
+s32 CalculateLighting()
+{
+ if ((TexParam >> 30) == 2)
+ {
+ TexCoords[0] = RawTexCoords[0] + (((s64)Normal[0]*TexMatrix[0] + (s64)Normal[1]*TexMatrix[4] + (s64)Normal[2]*TexMatrix[8]) >> 21);
+ TexCoords[1] = RawTexCoords[1] + (((s64)Normal[0]*TexMatrix[1] + (s64)Normal[1]*TexMatrix[5] + (s64)Normal[2]*TexMatrix[9]) >> 21);
+ }
+
+ s32 normaltrans[3];
+ normaltrans[0] = (Normal[0]*VecMatrix[0] + Normal[1]*VecMatrix[4] + Normal[2]*VecMatrix[8]) >> 12;
+ normaltrans[1] = (Normal[0]*VecMatrix[1] + Normal[1]*VecMatrix[5] + Normal[2]*VecMatrix[9]) >> 12;
+ normaltrans[2] = (Normal[0]*VecMatrix[2] + Normal[1]*VecMatrix[6] + Normal[2]*VecMatrix[10]) >> 12;
+
+ VertexColor[0] = MatEmission[0];
+ VertexColor[1] = MatEmission[1];
+ VertexColor[2] = MatEmission[2];
+
+ s32 c = 0;
+ for (int i = 0; i < 4; i++)
+ {
+ if (!(CurPolygonAttr & (1<<i)))
+ continue;
+
+ s32 difflevel = (-(LightDirection[i][0]*normaltrans[0] +
+ LightDirection[i][1]*normaltrans[1] +
+ LightDirection[i][2]*normaltrans[2])) >> 10;
+ if (difflevel < 0) difflevel = 0;
+ else if (difflevel > 255) difflevel = 255;
+
+ s32 shinelevel = -(((LightDirection[i][0]>>1)*normaltrans[0] +
+ (LightDirection[i][1]>>1)*normaltrans[1] +
+ ((LightDirection[i][2]-0x200)>>1)*normaltrans[2]) >> 10);
+ if (shinelevel < 0) shinelevel = 0;
+ shinelevel = ((shinelevel * shinelevel) >> 7) - 0x100; // really (2*shinelevel*shinelevel)-1
+ if (shinelevel < 0) shinelevel = 0;
+ else if (shinelevel > 255) shinelevel = 255;
+
+ if (UseShininessTable)
+ {
+ // checkme
+ shinelevel >>= 1;
+ shinelevel = ShininessTable[shinelevel];
+ }
+
+ VertexColor[0] += ((MatSpecular[0] * LightColor[i][0] * shinelevel) >> 13);
+ VertexColor[0] += ((MatDiffuse[0] * LightColor[i][0] * difflevel) >> 13);
+ VertexColor[0] += ((MatAmbient[0] * LightColor[i][0]) >> 5);
+
+ VertexColor[1] += ((MatSpecular[1] * LightColor[i][1] * shinelevel) >> 13);
+ VertexColor[1] += ((MatDiffuse[1] * LightColor[i][1] * difflevel) >> 13);
+ VertexColor[1] += ((MatAmbient[1] * LightColor[i][1]) >> 5);
+
+ VertexColor[2] += ((MatSpecular[2] * LightColor[i][2] * shinelevel) >> 13);
+ VertexColor[2] += ((MatDiffuse[2] * LightColor[i][2] * difflevel) >> 13);
+ VertexColor[2] += ((MatAmbient[2] * LightColor[i][2]) >> 5);
+
+ if (VertexColor[0] > 31) VertexColor[0] = 31;
+ if (VertexColor[1] > 31) VertexColor[1] = 31;
+ if (VertexColor[2] > 31) VertexColor[2] = 31;
+
+ c++;
+ }
+
+ // checkme: cycle count
+ return c;
+}
+
+
+
+void CmdFIFOWrite(CmdFIFOEntry& entry)
+{
+ if (CmdFIFO->IsEmpty() && !CmdPIPE->IsFull())
+ {
+ CmdPIPE->Write(entry);
+ }
+ else
+ {
+ if (CmdFIFO->IsFull())
+ {
+ //printf("!!! GX FIFO FULL\n");
+ //return;
+
+ // temp. hack
+ // SM64DS seems to overflow the FIFO occasionally
+ // either leftover bugs in our implementation, or the game accidentally doing that
+ // TODO: investigate.
+ // TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore)
+
+ while (CmdFIFO->IsFull())
+ ExecuteCommand();
+ }
+
+ CmdFIFO->Write(entry);
+ }
+}
+
+CmdFIFOEntry CmdFIFORead()
+{
+ CmdFIFOEntry ret = CmdPIPE->Read();
+
+ if (CmdPIPE->Level() <= 2)
+ {
+ if (!CmdFIFO->IsEmpty())
+ CmdPIPE->Write(CmdFIFO->Read());
+ if (!CmdFIFO->IsEmpty())
+ CmdPIPE->Write(CmdFIFO->Read());
+
+ CheckFIFODMA();
+ CheckFIFOIRQ();
+ }
+
+ return ret;
+}
+
+
+
+void ExecuteCommand()
+{
+ CmdFIFOEntry entry = CmdFIFORead();
+
+ //printf("FIFO: processing %02X %08X. Levels: FIFO=%d, PIPE=%d\n", entry.Command, entry.Param, CmdFIFO->Level(), CmdPIPE->Level());
+
+ ExecParams[ExecParamCount] = entry.Param;
+ ExecParamCount++;
+
+ if (ExecParamCount >= CmdNumParams[entry.Command])
+ {
+ CycleCount += CmdNumCycles[entry.Command];
+ ExecParamCount = 0;
+
+ GXStat &= ~(1<<14);
+ if (CycleCount > 0)
+ GXStat |= (1<<27);
+
+ switch (entry.Command)
+ {
+ case 0x10: // matrix mode
+ MatrixMode = ExecParams[0] & 0x3;
+ break;
+
+ case 0x11: // push matrix
+ if (MatrixMode == 0)
+ {
+ if (ProjMatrixStackPointer > 0)
+ {
+ printf("!! PROJ MATRIX STACK OVERFLOW\n");
+ GXStat |= (1<<15);
+ break;
+ }
+
+ memcpy(ProjMatrixStack, ProjMatrix, 16*4);
+ ProjMatrixStackPointer++;
+ GXStat |= (1<<14);
+ }
+ else if (MatrixMode == 3)
+ {
+ if (TexMatrixStackPointer > 0)
+ {
+ printf("!! TEX MATRIX STACK OVERFLOW\n");
+ GXStat |= (1<<15);
+ break;
+ }
+
+ memcpy(TexMatrixStack, TexMatrix, 16*4);
+ TexMatrixStackPointer++;
+ GXStat |= (1<<14);
+ }
+ else
+ {
+ if (PosMatrixStackPointer > 30)
+ {
+ printf("!! POS MATRIX STACK OVERFLOW\n");
+ GXStat |= (1<<15);
+ break;
+ }
+
+ memcpy(PosMatrixStack[PosMatrixStackPointer], PosMatrix, 16*4);
+ if (MatrixMode == 2)
+ memcpy(VecMatrixStack[PosMatrixStackPointer], VecMatrix, 16*4);
+ PosMatrixStackPointer++;
+ GXStat |= (1<<14);
+ }
+ break;
+
+ case 0x12: // pop matrix
+ if (MatrixMode == 0)
+ {
+ if (ProjMatrixStackPointer <= 0)
+ {
+ printf("!! PROJ MATRIX STACK UNDERFLOW\n");
+ GXStat |= (1<<15);
+ break;
+ }
+
+ ProjMatrixStackPointer--;
+ memcpy(ProjMatrix, ProjMatrixStack, 16*4);
+ GXStat |= (1<<14);
+ ClipMatrixDirty = true;
+ }
+ else if (MatrixMode == 3)
+ {
+ if (TexMatrixStackPointer <= 0)
+ {
+ printf("!! TEX MATRIX STACK UNDERFLOW\n");
+ GXStat |= (1<<15);
+ break;
+ }
+
+ TexMatrixStackPointer--;
+ memcpy(TexMatrix, TexMatrixStack, 16*4);
+ GXStat |= (1<<14);
+ }
+ else
+ {
+ s32 offset = (s32)(ExecParams[0] << 26) >> 26;
+ PosMatrixStackPointer -= offset;
+
+ if (PosMatrixStackPointer < 0 || PosMatrixStackPointer > 30)
+ {
+ printf("!! POS MATRIX STACK UNDER/OVERFLOW %d\n", PosMatrixStackPointer);
+ PosMatrixStackPointer += offset;
+ GXStat |= (1<<15);
+ break;
+ }
+
+ memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4);
+ if (MatrixMode == 2)
+ memcpy(VecMatrix, VecMatrixStack[PosMatrixStackPointer], 16*4);
+ GXStat |= (1<<14);
+ ClipMatrixDirty = true;
+ }
+ break;
+
+ case 0x13: // store matrix
+ if (MatrixMode == 0)
+ {
+ memcpy(ProjMatrixStack, ProjMatrix, 16*4);
+ }
+ else if (MatrixMode == 3)
+ {
+ memcpy(TexMatrixStack, TexMatrix, 16*4);
+ }
+ else
+ {
+ u32 addr = ExecParams[0] & 0x1F;
+ if (addr > 30)
+ {
+ printf("!! POS MATRIX STORE ADDR 31\n");
+ GXStat |= (1<<15);
+ break;
+ }
+
+ memcpy(PosMatrixStack[addr], PosMatrix, 16*4);
+ if (MatrixMode == 2)
+ memcpy(VecMatrixStack[addr], VecMatrix, 16*4);
+ }
+ break;
+
+ case 0x14: // restore matrix
+ if (MatrixMode == 0)
+ {
+ memcpy(ProjMatrix, ProjMatrixStack, 16*4);
+ ClipMatrixDirty = true;
+ }
+ else if (MatrixMode == 3)
+ {
+ memcpy(TexMatrix, TexMatrixStack, 16*4);
+ }
+ else
+ {
+ u32 addr = ExecParams[0] & 0x1F;
+ if (addr > 30)
+ {
+ printf("!! POS MATRIX STORE ADDR 31\n");
+ GXStat |= (1<<15);
+ break;
+ }
+
+ memcpy(PosMatrix, PosMatrixStack[addr], 16*4);
+ if (MatrixMode == 2)
+ memcpy(VecMatrix, VecMatrixStack[addr], 16*4);
+ ClipMatrixDirty = true;
+ }
+ break;
+
+ case 0x15: // identity
+ if (MatrixMode == 0)
+ {
+ MatrixLoadIdentity(ProjMatrix);
+ ClipMatrixDirty = true;
+ }
+ else if (MatrixMode == 3)
+ MatrixLoadIdentity(TexMatrix);
+ else
+ {
+ MatrixLoadIdentity(PosMatrix);
+ if (MatrixMode == 2)
+ MatrixLoadIdentity(VecMatrix);
+ ClipMatrixDirty = true;
+ }
+ break;
+
+ case 0x16: // load 4x4
+ if (MatrixMode == 0)
+ {
+ MatrixLoad4x4(ProjMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ else if (MatrixMode == 3)
+ MatrixLoad4x4(TexMatrix, (s32*)ExecParams);
+ else
+ {
+ MatrixLoad4x4(PosMatrix, (s32*)ExecParams);
+ if (MatrixMode == 2)
+ MatrixLoad4x4(VecMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ break;
+
+ case 0x17: // load 4x3
+ if (MatrixMode == 0)
+ {
+ MatrixLoad4x3(ProjMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ else if (MatrixMode == 3)
+ MatrixLoad4x3(TexMatrix, (s32*)ExecParams);
+ else
+ {
+ MatrixLoad4x3(PosMatrix, (s32*)ExecParams);
+ if (MatrixMode == 2)
+ MatrixLoad4x3(VecMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ break;
+
+ case 0x18: // mult 4x4
+ if (MatrixMode == 0)
+ {
+ MatrixMult4x4(ProjMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ else if (MatrixMode == 3)
+ MatrixMult4x4(TexMatrix, (s32*)ExecParams);
+ else
+ {
+ MatrixMult4x4(PosMatrix, (s32*)ExecParams);
+ if (MatrixMode == 2)
+ {
+ MatrixMult4x4(VecMatrix, (s32*)ExecParams);
+ CycleCount += 30;
+ }
+ ClipMatrixDirty = true;
+ }
+ break;
+
+ case 0x19: // mult 4x3
+ if (MatrixMode == 0)
+ {
+ MatrixMult4x3(ProjMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ else if (MatrixMode == 3)
+ MatrixMult4x3(TexMatrix, (s32*)ExecParams);
+ else
+ {
+ MatrixMult4x3(PosMatrix, (s32*)ExecParams);
+ if (MatrixMode == 2)
+ {
+ MatrixMult4x3(VecMatrix, (s32*)ExecParams);
+ CycleCount += 30;
+ }
+ ClipMatrixDirty = true;
+ }
+ break;
+
+ case 0x1A: // mult 3x3
+ if (MatrixMode == 0)
+ {
+ MatrixMult3x3(ProjMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ else if (MatrixMode == 3)
+ MatrixMult3x3(TexMatrix, (s32*)ExecParams);
+ else
+ {
+ MatrixMult3x3(PosMatrix, (s32*)ExecParams);
+ if (MatrixMode == 2)
+ {
+ MatrixMult3x3(VecMatrix, (s32*)ExecParams);
+ CycleCount += 30;
+ }
+ ClipMatrixDirty = true;
+ }
+ break;
+
+ case 0x1B: // scale
+ if (MatrixMode == 0)
+ {
+ MatrixScale(ProjMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ else if (MatrixMode == 3)
+ MatrixScale(TexMatrix, (s32*)ExecParams);
+ else
+ {
+ MatrixScale(PosMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ break;
+
+ case 0x1C: // translate
+ if (MatrixMode == 0)
+ {
+ MatrixTranslate(ProjMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ else if (MatrixMode == 3)
+ MatrixTranslate(TexMatrix, (s32*)ExecParams);
+ else
+ {
+ MatrixTranslate(PosMatrix, (s32*)ExecParams);
+ if (MatrixMode == 2)
+ MatrixTranslate(VecMatrix, (s32*)ExecParams);
+ ClipMatrixDirty = true;
+ }
+ break;
+
+ case 0x20: // vertex color
+ {
+ u32 c = ExecParams[0];
+ u32 r = c & 0x1F;
+ u32 g = (c >> 5) & 0x1F;
+ u32 b = (c >> 10) & 0x1F;
+ VertexColor[0] = r;
+ VertexColor[1] = g;
+ VertexColor[2] = b;
+ }
+ break;
+
+ case 0x21: // normal
+ Normal[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6;
+ Normal[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6;
+ Normal[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6;
+ CycleCount += CalculateLighting();
+ break;
+
+ case 0x22: // texcoord
+ RawTexCoords[0] = ExecParams[0] & 0xFFFF;
+ RawTexCoords[1] = ExecParams[0] >> 16;
+ if ((TexParam >> 30) == 1)
+ {
+ TexCoords[0] = (RawTexCoords[0]*TexMatrix[0] + RawTexCoords[1]*TexMatrix[4] + TexMatrix[8] + TexMatrix[12]) >> 12;
+ TexCoords[1] = (RawTexCoords[0]*TexMatrix[1] + RawTexCoords[1]*TexMatrix[5] + TexMatrix[9] + TexMatrix[13]) >> 12;
+ }
+ else
+ {
+ TexCoords[0] = RawTexCoords[0];
+ TexCoords[1] = RawTexCoords[1];
+ }
+ break;
+
+ case 0x23: // full vertex
+ CurVertex[0] = ExecParams[0] & 0xFFFF;
+ CurVertex[1] = ExecParams[0] >> 16;
+ CurVertex[2] = ExecParams[1] & 0xFFFF;
+ SubmitVertex();
+ break;
+
+ case 0x24: // 10-bit vertex
+ CurVertex[0] = (ExecParams[0] & 0x000003FF) << 6;
+ CurVertex[1] = (ExecParams[0] & 0x000FFC00) >> 4;
+ CurVertex[2] = (ExecParams[0] & 0x3FF00000) >> 14;
+ SubmitVertex();
+ break;
+
+ case 0x25: // vertex XY
+ CurVertex[0] = ExecParams[0] & 0xFFFF;
+ CurVertex[1] = ExecParams[0] >> 16;
+ SubmitVertex();
+ break;
+
+ case 0x26: // vertex XZ
+ CurVertex[0] = ExecParams[0] & 0xFFFF;
+ CurVertex[2] = ExecParams[0] >> 16;
+ SubmitVertex();
+ break;
+
+ case 0x27: // vertex YZ
+ CurVertex[1] = ExecParams[0] & 0xFFFF;
+ CurVertex[2] = ExecParams[0] >> 16;
+ SubmitVertex();
+ break;
+
+ case 0x28: // 10-bit delta vertex
+ CurVertex[0] += (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6;
+ CurVertex[1] += (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6;
+ CurVertex[2] += (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6;
+ SubmitVertex();
+ break;
+
+ case 0x29: // polygon attributes
+ PolygonAttr = ExecParams[0];
+ break;
+
+ case 0x2A: // texture param
+ TexParam = ExecParams[0];
+ break;
+
+ case 0x2B: // texture palette
+ TexPalette = ExecParams[0] & 0x1FFF;
+ break;
+
+ case 0x30: // diffuse/ambient material
+ MatDiffuse[0] = ExecParams[0] & 0x1F;
+ MatDiffuse[1] = (ExecParams[0] >> 5) & 0x1F;
+ MatDiffuse[2] = (ExecParams[0] >> 10) & 0x1F;
+ MatAmbient[0] = (ExecParams[0] >> 16) & 0x1F;
+ MatAmbient[1] = (ExecParams[0] >> 21) & 0x1F;
+ MatAmbient[2] = (ExecParams[0] >> 26) & 0x1F;
+ if (ExecParams[0] & 0x8000)
+ {
+ VertexColor[0] = MatDiffuse[0];
+ VertexColor[1] = MatDiffuse[1];
+ VertexColor[2] = MatDiffuse[2];
+ }
+ break;
+
+ case 0x31: // specular/emission material
+ MatSpecular[0] = ExecParams[0] & 0x1F;
+ MatSpecular[1] = (ExecParams[0] >> 5) & 0x1F;
+ MatSpecular[2] = (ExecParams[0] >> 10) & 0x1F;
+ MatEmission[0] = (ExecParams[0] >> 16) & 0x1F;
+ MatEmission[1] = (ExecParams[0] >> 21) & 0x1F;
+ MatEmission[2] = (ExecParams[0] >> 26) & 0x1F;
+ UseShininessTable = (ExecParams[0] & 0x8000) != 0;
+ break;
+
+ case 0x32: // light direction
+ {
+ u32 l = ExecParams[0] >> 30;
+ s16 dir[3];
+ dir[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6;
+ dir[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6;
+ dir[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6;
+ LightDirection[l][0] = (dir[0]*VecMatrix[0] + dir[1]*VecMatrix[4] + dir[2]*VecMatrix[8]) >> 12;
+ LightDirection[l][1] = (dir[0]*VecMatrix[1] + dir[1]*VecMatrix[5] + dir[2]*VecMatrix[9]) >> 12;
+ LightDirection[l][2] = (dir[0]*VecMatrix[2] + dir[1]*VecMatrix[6] + dir[2]*VecMatrix[10]) >> 12;
+ }
+ break;
+
+ case 0x33: // light color
+ {
+ u32 l = ExecParams[0] >> 30;
+ LightColor[l][0] = ExecParams[0] & 0x1F;
+ LightColor[l][1] = (ExecParams[0] >> 5) & 0x1F;
+ LightColor[l][2] = (ExecParams[0] >> 10) & 0x1F;
+ }
+ break;
+
+ case 0x34: // shininess table
+ {
+ for (int i = 0; i < 128; i += 4)
+ {
+ u32 val = ExecParams[i >> 2];
+ ShininessTable[i + 0] = val & 0xFF;
+ ShininessTable[i + 1] = (val >> 8) & 0xFF;
+ ShininessTable[i + 2] = (val >> 16) & 0xFF;
+ ShininessTable[i + 3] = val >> 24;
+ }
+ }
+ break;
+
+ case 0x40: // begin polygons
+ PolygonMode = ExecParams[0] & 0x3;
+ VertexNum = 0;
+ VertexNumInPoly = 0;
+ NumConsecutivePolygons = 0;
+ LastStripPolygon = NULL;
+ CurPolygonAttr = PolygonAttr;
+ break;
+
+ case 0x50: // flush
+ FlushRequest = 1;
+ FlushAttributes = ExecParams[0] & 0x3;
+ CycleCount = 392;
+ break;
+
+ case 0x60: // viewport x1,y1,x2,y2
+ Viewport[0] = ExecParams[0] & 0xFF;
+ Viewport[1] = (ExecParams[0] >> 8) & 0xFF;
+ Viewport[2] = ((ExecParams[0] >> 16) & 0xFF) - Viewport[0] + 1;
+ Viewport[3] = (ExecParams[0] >> 24) - Viewport[1] + 1;
+ break;
+
+ default:
+ //if (entry.Command != 0x41)
+ //printf("!! UNKNOWN GX COMMAND %02X %08X\n", entry.Command, entry.Param);
+ break;
+ }
+ }
+}
+
+void Run(s32 cycles)
+{
+ if (FlushRequest)
+ return;
+ if (CycleCount <= 0 && CmdPIPE->IsEmpty())
+ return;
+
+ CycleCount -= cycles;
+
+ if (CycleCount <= 0)
+ {
+ while (CycleCount <= 0 && !CmdPIPE->IsEmpty())
+ ExecuteCommand();
+ }
+
+ if (CycleCount <= 0 && CmdPIPE->IsEmpty())
+ {
+ CycleCount = 0;
+ GXStat &= ~((1<<27)|(1<<14));
+ }
+}
+
+
+void CheckFIFOIRQ()
+{
+ bool irq = false;
+ switch (GXStat >> 30)
+ {
+ case 1: irq = (CmdFIFO->Level() < 128); break;
+ case 2: irq = CmdFIFO->IsEmpty(); break;
+ }
+
+ if (irq) NDS::SetIRQ(0, NDS::IRQ_GXFIFO);
+ else NDS::ClearIRQ(0, NDS::IRQ_GXFIFO);
+}
+
+void CheckFIFODMA()
+{
+ if (CmdFIFO->Level() < 128)
+ NDS::CheckDMAs(0, 0x07);
+}
+
+
+void VBlank()
+{
+ if (FlushRequest)
+ {
+ SoftRenderer::RenderFrame(CurVertexRAM, CurPolygonRAM, NumPolygons);
+
+ CurRAMBank = CurRAMBank?0:1;
+ CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0];
+ CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0];
+
+ NumVertices = 0;
+ NumPolygons = 0;
+
+ FlushRequest = 0;
+ }
+}
+
+u32* GetLine(int line)
+{
+ return SoftRenderer::GetLine(line);
+}
+
+
+u8 Read8(u32 addr)
+{
+ printf("unknown GPU3D read8 %08X\n", addr);
+ return 0;
+}
+
+u16 Read16(u32 addr)
+{
+ switch (addr)
+ {
+ case 0x04000060:
+ return DispCnt;
+ }
+
+ printf("unknown GPU3D read16 %08X\n", addr);
+ return 0;
+}
+
+u32 Read32(u32 addr)
+{
+ switch (addr)
+ {
+ case 0x04000060:
+ return DispCnt;
+
+ case 0x04000320:
+ return 46; // TODO, eventually
+
+ case 0x04000600:
+ {
+ u32 fifolevel = CmdFIFO->Level();
+
+ return GXStat |
+ ((PosMatrixStackPointer & 0x1F) << 8) |
+ ((ProjMatrixStackPointer & 0x1) << 13) |
+ (fifolevel << 16) |
+ (fifolevel < 128 ? (1<<25) : 0) |
+ (fifolevel == 0 ? (1<<26) : 0);
+ }
+
+ case 0x04000680: return VecMatrix[0];
+ case 0x04000684: return VecMatrix[1];
+ case 0x04000688: return VecMatrix[2];
+ case 0x0400068C: return VecMatrix[4];
+ case 0x04000690: return VecMatrix[5];
+ case 0x04000694: return VecMatrix[6];
+ case 0x04000698: return VecMatrix[8];
+ case 0x0400069C: return VecMatrix[9];
+ case 0x040006A0: return VecMatrix[10];
+ }
+
+ if (addr >= 0x04000640 && addr < 0x04000680)
+ {
+ UpdateClipMatrix();
+ return ClipMatrix[(addr & 0x3C) >> 2];
+ }
+
+ //printf("unknown GPU3D read32 %08X\n", addr);
+ return 0;
+}
+
+void Write8(u32 addr, u8 val)
+{
+ switch (addr)
+ {
+ case 0x04000340:
+ AlphaRef = val & 0x1F;
+ return;
+ }
+
+ if (addr >= 0x04000360 && addr < 0x04000380)
+ {
+ FogDensityTable[addr - 0x04000360] = val;
+ return;
+ }
+
+ printf("unknown GPU3D write8 %08X %02X\n", addr, val);
+}
+
+void Write16(u32 addr, u16 val)
+{
+ switch (addr)
+ {
+ case 0x04000060:
+ DispCnt = val;
+ return;
+
+ case 0x04000340:
+ AlphaRef = val & 0x1F;
+ return;
+
+ case 0x04000350:
+ ClearAttr1 = (ClearAttr1 & 0xFFFF0000) | val;
+ return;
+ case 0x04000352:
+ ClearAttr1 = (ClearAttr1 & 0xFFFF) | (val << 16);
+ return;
+ case 0x04000354:
+ ClearAttr2 = (ClearAttr2 & 0xFFFF0000) | val;
+ return;
+ case 0x04000356:
+ ClearAttr2 = (ClearAttr2 & 0xFFFF) | (val << 16);
+ return;
+
+ case 0x04000358:
+ FogColor = (FogColor & 0xFFFF0000) | val;
+ return;
+ case 0x0400035A:
+ FogColor = (FogColor & 0xFFFF) | (val << 16);
+ return;
+ case 0x0400035C:
+ FogOffset = val;
+ return;
+ }
+
+ if (addr >= 0x04000330 && addr < 0x04000340)
+ {
+ EdgeTable[(addr - 0x04000330) >> 1] = val;
+ return;
+ }
+
+ if (addr >= 0x04000360 && addr < 0x04000380)
+ {
+ addr -= 0x04000360;
+ FogDensityTable[addr] = val & 0xFF;
+ FogDensityTable[addr+1] = val >> 8;
+ return;
+ }
+
+ if (addr >= 0x04000380 && addr < 0x040003C0)
+ {
+ ToonTable[(addr - 0x04000380) >> 1] = val;
+ return;
+ }
+
+ printf("unknown GPU3D write16 %08X %04X\n", addr, val);
+}
+
+void Write32(u32 addr, u32 val)
+{
+ switch (addr)
+ {
+ case 0x04000060:
+ DispCnt = val & 0xFFFF;
+ return;
+
+ case 0x04000340:
+ AlphaRef = val & 0x1F;
+ return;
+
+ case 0x04000350:
+ ClearAttr1 = val;
+ return;
+ case 0x04000354:
+ ClearAttr2 = val;
+ return;
+
+ case 0x04000358:
+ FogColor = val;
+ return;
+ case 0x0400035C:
+ FogOffset = val;
+ return;
+
+ case 0x04000600:
+ if (val & 0x8000)
+ {
+ GXStat &= ~0x8000;
+ ProjMatrixStackPointer = 0;
+ //PosMatrixStackPointer = 0;
+ TexMatrixStackPointer = 0;
+ }
+ val &= 0xC0000000;
+ GXStat &= 0x3FFFFFFF;
+ GXStat |= val;
+ CheckFIFOIRQ();
+ return;
+ }
+
+ if (addr >= 0x04000400 && addr < 0x04000440)
+ {
+ if (NumCommands == 0)
+ {
+ NumCommands = 4;
+ CurCommand = val;
+ ParamCount = 0;
+ TotalParams = CmdNumParams[CurCommand & 0xFF];
+
+ if (TotalParams > 0) return;
+ }
+ else
+ ParamCount++;
+
+ for (;;)
+ {
+ if ((CurCommand & 0xFF) || (NumCommands == 4 && CurCommand == 0))
+ {
+ CmdFIFOEntry entry;
+ entry.Command = CurCommand & 0xFF;
+ entry.Param = val;
+ CmdFIFOWrite(entry);
+ }
+
+ if (ParamCount >= TotalParams)
+ {
+ CurCommand >>= 8;
+ NumCommands--;
+ if (NumCommands == 0) break;
+
+ ParamCount = 0;
+ TotalParams = CmdNumParams[CurCommand & 0xFF];
+ }
+ if (ParamCount < TotalParams)
+ break;
+ }
+
+ return;
+ }
+
+ if (addr >= 0x04000440 && addr < 0x040005CC)
+ {
+ CmdFIFOEntry entry;
+ entry.Command = (addr & 0x1FC) >> 2;
+ entry.Param = val;
+ CmdFIFOWrite(entry);
+ return;
+ }
+
+ if (addr >= 0x04000330 && addr < 0x04000340)
+ {
+ addr = (addr - 0x04000330) >> 1;
+ EdgeTable[addr] = val & 0xFFFF;
+ EdgeTable[addr+1] = val >> 16;
+ return;
+ }
+
+ if (addr >= 0x04000360 && addr < 0x04000380)
+ {
+ addr -= 0x04000360;
+ FogDensityTable[addr] = val & 0xFF;
+ FogDensityTable[addr+1] = (val >> 8) & 0xFF;
+ FogDensityTable[addr+2] = (val >> 16) & 0xFF;
+ FogDensityTable[addr+3] = val >> 24;
+ return;
+ }
+
+ if (addr >= 0x04000380 && addr < 0x040003C0)
+ {
+ addr = (addr - 0x04000380) >> 1;
+ ToonTable[addr] = val & 0xFFFF;
+ ToonTable[addr+1] = val >> 16;
+ return;
+ }
+
+ printf("unknown GPU3D write32 %08X %08X\n", addr, val);
+}
+
+}
+
diff --git a/src/GPU3D.h b/src/GPU3D.h
new file mode 100644
index 0000000..c1adc2f
--- /dev/null
+++ b/src/GPU3D.h
@@ -0,0 +1,98 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef GPU3D_H
+#define GPU3D_H
+
+namespace GPU3D
+{
+
+typedef struct
+{
+ s32 Position[4];
+ s32 Color[3];
+ s16 TexCoords[2];
+
+ bool Clipped;
+
+ // final vertex attributes.
+ // allows them to be reused in polygon strips.
+
+ s32 FinalPosition[4];
+ s32 FinalColor[3];
+
+} Vertex;
+
+typedef struct
+{
+ Vertex* Vertices[10];
+ u32 NumVertices;
+
+ u32 Attr;
+ u32 TexParam;
+ u32 TexPalette;
+
+ bool FacingView;
+ bool Translucent;
+
+ u32 VTop, VBottom; // vertex indices
+ s32 YTop, YBottom; // Y coords
+ s32 XTop, XBottom; // associated X coords
+
+} Polygon;
+
+extern u32 DispCnt;
+extern u32 AlphaRef;
+extern s32 Viewport[4];
+extern u32 ClearAttr1, ClearAttr2;
+
+bool Init();
+void DeInit();
+void Reset();
+
+void ExecuteCommand();
+
+void Run(s32 cycles);
+void CheckFIFOIRQ();
+void CheckFIFODMA();
+
+void VBlank();
+u32* GetLine(int line);
+
+u8 Read8(u32 addr);
+u16 Read16(u32 addr);
+u32 Read32(u32 addr);
+void Write8(u32 addr, u8 val);
+void Write16(u32 addr, u16 val);
+void Write32(u32 addr, u32 val);
+
+namespace SoftRenderer
+{
+
+bool Init();
+void DeInit();
+void Reset();
+
+void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys);
+u32* GetLine(int line);
+
+}
+
+}
+
+#endif
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
new file mode 100644
index 0000000..5c9dc8e
--- /dev/null
+++ b/src/GPU3D_Soft.cpp
@@ -0,0 +1,853 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "NDS.h"
+#include "GPU.h"
+
+
+namespace GPU3D
+{
+namespace SoftRenderer
+{
+
+u32 ColorBuffer[256*192];
+u32 DepthBuffer[256*192];
+u32 AttrBuffer[256*192];
+
+// attribute buffer:
+// bit0-5: polygon ID
+// bit8: fog enable
+
+
+bool Init()
+{
+ return true;
+}
+
+void DeInit()
+{
+}
+
+void Reset()
+{
+ memset(ColorBuffer, 0, 256*192 * 4);
+ memset(DepthBuffer, 0, 256*192 * 4);
+ memset(AttrBuffer, 0, 256*192 * 4);
+}
+
+
+void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
+{
+ u32 vramaddr = (texparam & 0xFFFF) << 3;
+
+ u32 width = 8 << ((texparam >> 20) & 0x7);
+ u32 height = 8 << ((texparam >> 23) & 0x7);
+
+ s >>= 4;
+ t >>= 4;
+
+ // texture wrapping
+ // TODO: optimize this somehow
+
+ if (texparam & (1<<16))
+ {
+ if (texparam & (1<<18))
+ {
+ if (s & width) s = (width-1) - (s & (width-1));
+ else s = (s & (width-1));
+ }
+ else
+ s &= width-1;
+ }
+ else
+ {
+ if (s < 0) s = 0;
+ else if (s >= width) s = width-1;
+ }
+
+ if (texparam & (1<<17))
+ {
+ if (texparam & (1<<19))
+ {
+ if (t & height) t = (height-1) - (t & (height-1));
+ else t = (t & (height-1));
+ }
+ else
+ t &= height-1;
+ }
+ else
+ {
+ if (t < 0) t = 0;
+ else if (t >= height) t = height-1;
+ }
+
+ u8 alpha0;
+ if (texparam & (1<<29)) alpha0 = 0;
+ else alpha0 = 31;
+
+ switch ((texparam >> 26) & 0x7)
+ {
+ case 1: // A3I5
+ {
+ vramaddr += ((t * width) + s);
+ u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+
+ texpal <<= 4;
+ *color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
+ *alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
+ }
+ break;
+
+ case 2: // 4-color
+ {
+ vramaddr += (((t * width) + s) >> 2);
+ u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ pixel >>= ((s & 0x3) << 1);
+ pixel &= 0x3;
+
+ texpal <<= 3;
+ *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
+ *alpha = (pixel==0) ? alpha0 : 31;
+ }
+ break;
+
+ case 3: // 16-color
+ {
+ vramaddr += (((t * width) + s) >> 1);
+ u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ if (s & 0x1) pixel >>= 4;
+ else pixel &= 0xF;
+
+ texpal <<= 4;
+ *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
+ *alpha = (pixel==0) ? alpha0 : 31;
+ }
+ break;
+
+ case 4: // 256-color
+ {
+ vramaddr += ((t * width) + s);
+ u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+
+ texpal <<= 4;
+ *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1));
+ *alpha = (pixel==0) ? alpha0 : 31;
+ }
+ break;
+
+ case 5: // compressed
+ {
+ vramaddr += ((t & 0x3FC) * (width>>2)) + (s & 0x3FC);
+ vramaddr += (t & 0x3);
+
+ u32 slot1addr = 0x20000 + ((vramaddr & 0x1FFFC) >> 1);
+ if (vramaddr >= 0x40000)
+ slot1addr += 0x10000;
+
+ u8 val = GPU::ReadVRAM_Texture<u8>(vramaddr);
+ val >>= (2 * (s & 0x3));
+
+ u16 palinfo = GPU::ReadVRAM_Texture<u16>(slot1addr);
+ u32 paloffset = (palinfo & 0x3FFF) << 2;
+ texpal <<= 4;
+
+ switch (val & 0x3)
+ {
+ case 0:
+ *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
+ *alpha = 31;
+ break;
+
+ case 1:
+ *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+ *alpha = 31;
+ break;
+
+ case 2:
+ if ((palinfo >> 14) == 1)
+ {
+ u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
+ u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+
+ u32 r0 = color0 & 0x001F;
+ u32 g0 = color0 & 0x03E0;
+ u32 b0 = color0 & 0x7C00;
+ u32 r1 = color1 & 0x001F;
+ u32 g1 = color1 & 0x03E0;
+ u32 b1 = color1 & 0x7C00;
+
+ u32 r = (r0 + r1) >> 1;
+ u32 g = ((g0 + g1) >> 1) & 0x03E0;
+ u32 b = ((b0 + b1) >> 1) & 0x7C00;
+
+ *color = r | g | b;
+ }
+ else if ((palinfo >> 14) == 3)
+ {
+ u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
+ u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+
+ u32 r0 = color0 & 0x001F;
+ u32 g0 = color0 & 0x03E0;
+ u32 b0 = color0 & 0x7C00;
+ u32 r1 = color1 & 0x001F;
+ u32 g1 = color1 & 0x03E0;
+ u32 b1 = color1 & 0x7C00;
+
+ u32 r = (r0*5 + r1*3) >> 3;
+ u32 g = ((g0*5 + g1*3) >> 3) & 0x03E0;
+ u32 b = ((b0*5 + b1*3) >> 3) & 0x7C00;
+
+ *color = r | g | b;
+ }
+ else
+ *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 4);
+ *alpha = 31;
+ break;
+
+ case 3:
+ if ((palinfo >> 14) == 2)
+ {
+ *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 6);
+ *alpha = 31;
+ }
+ else if ((palinfo >> 14) == 3)
+ {
+ u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset);
+ u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2);
+
+ u32 r0 = color0 & 0x001F;
+ u32 g0 = color0 & 0x03E0;
+ u32 b0 = color0 & 0x7C00;
+ u32 r1 = color1 & 0x001F;
+ u32 g1 = color1 & 0x03E0;
+ u32 b1 = color1 & 0x7C00;
+
+ u32 r = (r0*3 + r1*5) >> 3;
+ u32 g = ((g0*3 + g1*5) >> 3) & 0x03E0;
+ u32 b = ((b0*3 + b1*5) >> 3) & 0x7C00;
+
+ *color = r | g | b;
+ *alpha = 31;
+ }
+ else
+ {
+ *color = 0;
+ *alpha = 0;
+ }
+ break;
+ }
+ }
+ break;
+
+ case 6: // A5I3
+ {
+ vramaddr += ((t * width) + s);
+ u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr);
+
+ texpal <<= 4;
+ *color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1));
+ *alpha = (pixel >> 3);
+ }
+ break;
+
+ case 7: // direct color
+ {
+ vramaddr += (((t * width) + s) << 1);
+ *color = GPU::ReadVRAM_Texture<u16>(vramaddr);
+ *alpha = (*color & 0x8000) ? 31 : 0;
+ }
+ break;
+ }
+}
+
+bool DepthTest(Polygon* polygon, s32 x, s32 y, s32 z)
+{
+ u32 oldz = DepthBuffer[(256*y) + x];
+
+ if (polygon->Attr & (1<<14))
+ {
+ s32 diff = oldz - z;
+ if ((u32)(diff + 0x200) <= 0x400)
+ return true;
+ }
+ else
+ if (z < oldz)
+ return true;
+
+ return false;
+}
+
+u32 RenderPixel(Polygon* polygon, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
+{
+ u32 attr = polygon->Attr;
+ u8 r, g, b, a;
+
+ u32 polyalpha = (polygon->Attr >> 16) & 0x1F;
+ bool wireframe = (polyalpha == 0);
+
+ if ((DispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0))
+ {
+ u8 tr, tg, tb;
+
+ u16 tcolor; u8 talpha;
+ TextureLookup(polygon->TexParam, polygon->TexPalette, s, t, &tcolor, &talpha);
+
+ tr = (tcolor << 1) & 0x3E; if (tr) tr++;
+ tg = (tcolor >> 4) & 0x3E; if (tg) tg++;
+ tb = (tcolor >> 9) & 0x3E; if (tb) tb++;
+
+ // TODO: other blending modes
+ r = ((tr+1) * (vr+1) - 1) >> 6;
+ g = ((tg+1) * (vg+1) - 1) >> 6;
+ b = ((tb+1) * (vb+1) - 1) >> 6;
+ a = ((talpha+1) * (polyalpha+1) - 1) >> 5;
+ }
+ else
+ {
+ r = vr;
+ g = vg;
+ b = vb;
+ a = polyalpha;
+ }
+
+ if (wireframe) a = 31;
+
+ return r | (g << 8) | (b << 16) | (a << 24);
+}
+
+void RenderPolygon(Polygon* polygon)
+{
+ int nverts = polygon->NumVertices;
+ bool isline = false;
+
+ int vtop = polygon->VTop, vbot = polygon->VBottom;
+ s32 ytop = polygon->YTop, ybot = polygon->YBottom;
+ s32 xtop = polygon->XTop, xbot = polygon->XBottom;
+
+ if (ytop > 191) return;
+
+ // draw, line per line
+
+ u32 polyalpha = (polygon->Attr >> 16) & 0x1F;
+ bool wireframe = (polyalpha == 0);
+
+ int lcur = vtop, rcur = vtop;
+ int lnext, rnext;
+
+ s32 dxl, dxr;
+ s32 lslope, rslope;
+ bool l_xmajor, r_xmajor;
+
+ if (ybot == ytop)
+ {
+ ybot++;
+ isline = true;
+
+ vtop = 0; vbot = 0;
+ xtop = 256; xbot = 0;
+ int i;
+
+ i = 1;
+ if (polygon->Vertices[i]->FinalPosition[0] < polygon->Vertices[vtop]->FinalPosition[0]) vtop = i;
+ if (polygon->Vertices[i]->FinalPosition[0] > polygon->Vertices[vbot]->FinalPosition[0]) vbot = i;
+
+ i = nverts - 1;
+ if (polygon->Vertices[i]->FinalPosition[0] < polygon->Vertices[vtop]->FinalPosition[0]) vtop = i;
+ if (polygon->Vertices[i]->FinalPosition[0] > polygon->Vertices[vbot]->FinalPosition[0]) vbot = i;
+
+ lcur = vtop; lnext = vtop;
+ rcur = vbot; rnext = vbot;
+
+ lslope = 0; l_xmajor = false;
+ rslope = 0; r_xmajor = false;
+ }
+ else
+ {
+ //while (polygon->Vertices[lnext]->FinalPosition[1] )
+ if (polygon->FacingView)
+ {
+ lnext = lcur + 1;
+ if (lnext >= nverts) lnext = 0;
+ rnext = rcur - 1;
+ if (rnext < 0) rnext = nverts - 1;
+ }
+ else
+ {
+ lnext = lcur - 1;
+ if (lnext < 0) lnext = nverts - 1;
+ rnext = rcur + 1;
+ if (rnext >= nverts) rnext = 0;
+ }
+
+ if (polygon->Vertices[lnext]->FinalPosition[1] == polygon->Vertices[lcur]->FinalPosition[1])
+ lslope = 0;
+ else
+ lslope = ((polygon->Vertices[lnext]->FinalPosition[0] - polygon->Vertices[lcur]->FinalPosition[0]) << 12) /
+ (polygon->Vertices[lnext]->FinalPosition[1] - polygon->Vertices[lcur]->FinalPosition[1]);
+
+ if (polygon->Vertices[rnext]->FinalPosition[1] == polygon->Vertices[rcur]->FinalPosition[1])
+ rslope = 0;
+ else
+ rslope = ((polygon->Vertices[rnext]->FinalPosition[0] - polygon->Vertices[rcur]->FinalPosition[0]) << 12) /
+ (polygon->Vertices[rnext]->FinalPosition[1] - polygon->Vertices[rcur]->FinalPosition[1]);
+
+ l_xmajor = (lslope < -0x1000) || (lslope > 0x1000);
+ r_xmajor = (rslope < -0x1000) || (rslope > 0x1000);
+ }
+
+ if (l_xmajor) dxl = (lslope > 0) ? 0x800 : (-lslope-0x800)+0x1000;
+ else if (lslope) dxl = (lslope > 0) ? 0 : 0x1000;
+ else dxl = 0;
+
+ if (r_xmajor) dxr = (rslope > 0) ? rslope-0x800 : 0x800+0x1000;
+ else if (rslope) dxr = (rslope > 0) ? 0 : 0x1000;
+ else dxr = 0x1000;
+
+ if (ybot > 192) ybot = 192;
+ for (s32 y = ytop; y < ybot; y++)
+ {
+ if (!isline)
+ {
+ if (y >= polygon->Vertices[lnext]->FinalPosition[1] && lcur != vbot)
+ {
+ while (y >= polygon->Vertices[lnext]->FinalPosition[1] && lcur != vbot)
+ {
+ lcur = lnext;
+
+ if (polygon->FacingView)
+ {
+ lnext = lcur + 1;
+ if (lnext >= nverts) lnext = 0;
+ }
+ else
+ {
+ lnext = lcur - 1;
+ if (lnext < 0) lnext = nverts - 1;
+ }
+ }
+
+ if (polygon->Vertices[lnext]->FinalPosition[1] == polygon->Vertices[lcur]->FinalPosition[1])
+ lslope = 0;
+ else
+ lslope = ((polygon->Vertices[lnext]->FinalPosition[0] - polygon->Vertices[lcur]->FinalPosition[0]) << 12) /
+ (polygon->Vertices[lnext]->FinalPosition[1] - polygon->Vertices[lcur]->FinalPosition[1]);
+
+ l_xmajor = (lslope < -0x1000) || (lslope > 0x1000);
+
+ if (l_xmajor) dxl = (lslope > 0) ? 0x800 : (-lslope-0x800)+0x1000;
+ else if (lslope) dxl = (lslope > 0) ? 0 : 0x1000;
+ else dxl = 0;
+ }
+
+ if (y >= polygon->Vertices[rnext]->FinalPosition[1] && rcur != vbot)
+ {
+ while (y >= polygon->Vertices[rnext]->FinalPosition[1] && rcur != vbot)
+ {
+ rcur = rnext;
+
+ if (polygon->FacingView)
+ {
+ rnext = rcur - 1;
+ if (rnext < 0) rnext = nverts - 1;
+ }
+ else
+ {
+ rnext = rcur + 1;
+ if (rnext >= nverts) rnext = 0;
+ }
+ }
+
+ if (polygon->Vertices[rnext]->FinalPosition[1] == polygon->Vertices[rcur]->FinalPosition[1])
+ rslope = 0;
+ else
+ rslope = ((polygon->Vertices[rnext]->FinalPosition[0] - polygon->Vertices[rcur]->FinalPosition[0]) << 12) /
+ (polygon->Vertices[rnext]->FinalPosition[1] - polygon->Vertices[rcur]->FinalPosition[1]);
+
+ r_xmajor = (rslope < -0x1000) || (rslope > 0x1000);
+
+ if (r_xmajor) dxr = (rslope > 0) ? rslope-0x800 : 0x800+0x1000;
+ else if (rslope) dxr = (rslope > 0) ? 0 : 0x1000;
+ else dxr = 0x1000;
+ }
+ }
+
+ Vertex *vlcur, *vlnext, *vrcur, *vrnext;
+ s32 xstart, xend;
+ s32 xstart_int, xend_int;
+ s32 slope_start, slope_end;
+
+ if (lslope == 0 && rslope == 0 &&
+ polygon->Vertices[lcur]->FinalPosition[0] == polygon->Vertices[rcur]->FinalPosition[0])
+ {
+ xstart = polygon->Vertices[lcur]->FinalPosition[0];
+ xend = xstart;
+ }
+ else
+ {
+ if (lslope > 0)
+ {
+ xstart = polygon->Vertices[lcur]->FinalPosition[0] + (dxl >> 12);
+ if (xstart < polygon->Vertices[lcur]->FinalPosition[0])
+ xstart = polygon->Vertices[lcur]->FinalPosition[0];
+ else if (xstart > polygon->Vertices[lnext]->FinalPosition[0]-1)
+ xstart = polygon->Vertices[lnext]->FinalPosition[0]-1;
+ }
+ else if (lslope < 0)
+ {
+ xstart = polygon->Vertices[lcur]->FinalPosition[0] - (dxl >> 12);
+ if (xstart < polygon->Vertices[lnext]->FinalPosition[0])
+ xstart = polygon->Vertices[lnext]->FinalPosition[0];
+ else if (xstart > polygon->Vertices[lcur]->FinalPosition[0]-1)
+ xstart = polygon->Vertices[lcur]->FinalPosition[0]-1;
+ }
+ else
+ xstart = polygon->Vertices[lcur]->FinalPosition[0];
+
+ if (rslope > 0)
+ {
+ xend = polygon->Vertices[rcur]->FinalPosition[0] + (dxr >> 12);
+ if (xend < polygon->Vertices[rcur]->FinalPosition[0])
+ xend = polygon->Vertices[rcur]->FinalPosition[0];
+ else if (xend > polygon->Vertices[rnext]->FinalPosition[0]-1)
+ xend = polygon->Vertices[rnext]->FinalPosition[0]-1;
+ }
+ else if (rslope < 0)
+ {
+ xend = polygon->Vertices[rcur]->FinalPosition[0] - (dxr >> 12);
+ if (xend < polygon->Vertices[rnext]->FinalPosition[0])
+ xend = polygon->Vertices[rnext]->FinalPosition[0];
+ else if (xend > polygon->Vertices[rcur]->FinalPosition[0]-1)
+ xend = polygon->Vertices[rcur]->FinalPosition[0]-1;
+ }
+ else
+ xend = polygon->Vertices[rcur]->FinalPosition[0] - 1;
+ }
+
+ // if the left and right edges are swapped, render backwards.
+ // note: we 'forget' to swap the xmajor flags, on purpose
+ // the hardware has the same bug
+ if (xstart > xend)
+ {
+ vlcur = polygon->Vertices[rcur];
+ vlnext = polygon->Vertices[rnext];
+ vrcur = polygon->Vertices[lcur];
+ vrnext = polygon->Vertices[lnext];
+
+ slope_start = rslope;
+ slope_end = lslope;
+
+ s32 tmp = xstart; xstart = xend; xend = tmp;
+ }
+ else
+ {
+ vlcur = polygon->Vertices[lcur];
+ vlnext = polygon->Vertices[lnext];
+ vrcur = polygon->Vertices[rcur];
+ vrnext = polygon->Vertices[rnext];
+
+ slope_start = lslope;
+ slope_end = rslope;
+ }
+
+ // interpolate attributes along Y
+ s64 lfactor1, lfactor2;
+ s64 rfactor1, rfactor2;
+
+ if (l_xmajor)
+ {
+ lfactor1 = (vlnext->FinalPosition[0] - xstart) * vlnext->FinalPosition[3];
+ lfactor2 = (xstart - vlcur->FinalPosition[0]) * vlcur->FinalPosition[3];
+ }
+ else
+ {
+ lfactor1 = (vlnext->FinalPosition[1] - y) * vlnext->FinalPosition[3];
+ lfactor2 = (y - vlcur->FinalPosition[1]) * vlcur->FinalPosition[3];
+ }
+
+ s64 ldenom = lfactor1 + lfactor2;
+ if (ldenom == 0)
+ {
+ lfactor1 = 0x1000;
+ lfactor2 = 0;
+ ldenom = 0x1000;
+ }
+
+ if (r_xmajor)
+ {
+ rfactor1 = (vrnext->FinalPosition[0] - xend+1) * vrnext->FinalPosition[3];
+ rfactor2 = (xend+1 - vrcur->FinalPosition[0]) * vrcur->FinalPosition[3];
+ }
+ else
+ {
+ rfactor1 = (vrnext->FinalPosition[1] - y) * vrnext->FinalPosition[3];
+ rfactor2 = (y - vrcur->FinalPosition[1]) * vrcur->FinalPosition[3];
+ }
+
+ s64 rdenom = rfactor1 + rfactor2;
+ if (rdenom == 0)
+ {
+ rfactor1 = 0x1000;
+ rfactor2 = 0;
+ rdenom = 0x1000;
+ }
+
+ s32 zl = ((lfactor1 * vlcur->FinalPosition[2]) + (lfactor2 * vlnext->FinalPosition[2])) / ldenom;
+ s32 zr = ((rfactor1 * vrcur->FinalPosition[2]) + (rfactor2 * vrnext->FinalPosition[2])) / rdenom;
+
+ s32 wl = ((lfactor1 * vlcur->FinalPosition[3]) + (lfactor2 * vlnext->FinalPosition[3])) / ldenom;
+ s32 wr = ((rfactor1 * vrcur->FinalPosition[3]) + (rfactor2 * vrnext->FinalPosition[3])) / rdenom;
+
+ s32 rl = ((lfactor1 * vlcur->FinalColor[0]) + (lfactor2 * vlnext->FinalColor[0])) / ldenom;
+ s32 gl = ((lfactor1 * vlcur->FinalColor[1]) + (lfactor2 * vlnext->FinalColor[1])) / ldenom;
+ s32 bl = ((lfactor1 * vlcur->FinalColor[2]) + (lfactor2 * vlnext->FinalColor[2])) / ldenom;
+
+ s32 sl = ((lfactor1 * vlcur->TexCoords[0]) + (lfactor2 * vlnext->TexCoords[0])) / ldenom;
+ s32 tl = ((lfactor1 * vlcur->TexCoords[1]) + (lfactor2 * vlnext->TexCoords[1])) / ldenom;
+
+ s32 rr = ((rfactor1 * vrcur->FinalColor[0]) + (rfactor2 * vrnext->FinalColor[0])) / rdenom;
+ s32 gr = ((rfactor1 * vrcur->FinalColor[1]) + (rfactor2 * vrnext->FinalColor[1])) / rdenom;
+ s32 br = ((rfactor1 * vrcur->FinalColor[2]) + (rfactor2 * vrnext->FinalColor[2])) / rdenom;
+
+ s32 sr = ((rfactor1 * vrcur->TexCoords[0]) + (rfactor2 * vrnext->TexCoords[0])) / rdenom;
+ s32 tr = ((rfactor1 * vrcur->TexCoords[1]) + (rfactor2 * vrnext->TexCoords[1])) / rdenom;
+
+ // calculate edges
+ s32 l_edgeend, r_edgestart;
+
+ if (l_xmajor)
+ {
+ if (slope_start > 0) l_edgeend = vlcur->FinalPosition[0] + ((dxl + slope_start) >> 12);
+ else l_edgeend = vlcur->FinalPosition[0] - ((dxl - slope_start) >> 12);
+
+ if (l_edgeend == xstart) l_edgeend++;
+ }
+ else
+ l_edgeend = xstart + 1;
+
+ if (r_xmajor)
+ {
+ if (slope_end > 0) r_edgestart = vrcur->FinalPosition[0] + ((dxr + slope_end) >> 12);
+ else r_edgestart = vrcur->FinalPosition[0] - ((dxr - slope_end) >> 12);
+
+ if (r_edgestart == xend_int) r_edgestart--;
+ }
+ else
+ r_edgestart = xend - 1;
+
+ // edge fill rules for opaque pixels:
+ // * right edge is filled if slope > 1
+ // * left edge is filled if slope <= 1
+ // * edges with slope = 0 are always filled
+ // edges are always filled if the pixels are translucent
+ // in wireframe mode, there are special rules for equal Z (TODO)
+
+ for (s32 x = xstart; x <= xend; x++)
+ {
+ if (x < 0) continue;
+ if (x > 255) break;
+
+ int edge = 0;
+ if (y == ytop) edge |= 0x4;
+ else if (y == ybot-1) edge |= 0x8;
+ if (x < l_edgeend) edge |= 0x1;
+ else if (x > r_edgestart) edge |= 0x2;
+
+ // wireframe polygons. really ugly, but works
+ if (wireframe && edge==0) continue;
+
+ s64 factor1 = (xend+1 - x) * wr;
+ s64 factor2 = (x - xstart) * wl;
+ s64 denom = factor1 + factor2;
+ if (denom == 0)
+ {
+ factor1 = 0x1000;
+ factor2 = 0;
+ denom = 0x1000;
+ }
+
+ s32 z = ((factor1 * zl) + (factor2 * zr)) / denom;
+ if (!DepthTest(polygon, x, y, z)) continue;
+
+ u32 vr = ((factor1 * rl) + (factor2 * rr)) / denom;
+ u32 vg = ((factor1 * gl) + (factor2 * gr)) / denom;
+ u32 vb = ((factor1 * bl) + (factor2 * br)) / denom;
+
+ s16 s = ((factor1 * sl) + (factor2 * sr)) / denom;
+ s16 t = ((factor1 * tl) + (factor2 * tr)) / denom;
+
+ u32 color = RenderPixel(polygon, x, y, z, vr>>3, vg>>3, vb>>3, s, t);
+ u32 attr = 0;
+ u32 pixeladdr = (y*256) + x;
+
+ u8 alpha = color >> 24;
+
+ // alpha test
+ if (DispCnt & (1<<2))
+ {
+ if (alpha <= AlphaRef) continue;
+ }
+ else
+ {
+ if (alpha == 0) continue;
+ }
+
+ // alpha blending disable
+ // TODO: check alpha test when blending is disabled
+ if (!(DispCnt & (1<<3)))
+ alpha = 31;
+
+ u32 dstcolor = ColorBuffer[pixeladdr];
+ u32 dstalpha = dstcolor >> 24;
+
+ if (alpha == 31)
+ {
+ // edge fill rules for opaque pixels
+ // TODO, eventually: antialiasing
+ if (!wireframe)
+ {
+ if ((edge & 0x1) && slope_start > 0x1000)
+ continue;
+ if ((edge & 0x2) && (slope_end != 0 && slope_end <= 0x1000))
+ continue;
+ }
+
+ DepthBuffer[pixeladdr] = z;
+ }
+ else if (dstalpha == 0)
+ {
+ // TODO: conditional Z-buffer update
+ DepthBuffer[pixeladdr] = z;
+ }
+ else
+ {
+ u32 srcR = color & 0x3F;
+ u32 srcG = (color >> 8) & 0x3F;
+ u32 srcB = (color >> 16) & 0x3F;
+
+ u32 dstR = dstcolor & 0x3F;
+ u32 dstG = (dstcolor >> 8) & 0x3F;
+ u32 dstB = (dstcolor >> 16) & 0x3F;
+
+ alpha++;
+ dstR = ((srcR * alpha) + (dstR * (32-alpha))) >> 5;
+ dstG = ((srcG * alpha) + (dstG * (32-alpha))) >> 5;
+ dstB = ((srcB * alpha) + (dstB * (32-alpha))) >> 5;
+
+ alpha--;
+ if (alpha > dstalpha) dstalpha = alpha;
+
+ color = dstR | (dstG << 8) | (dstB << 16) | (dstalpha << 24);
+
+ // TODO: conditional Z-buffer update
+ DepthBuffer[pixeladdr] = z;
+ }
+
+ ColorBuffer[pixeladdr] = color;
+ AttrBuffer[pixeladdr] = attr;
+ }
+
+ if (lslope > 0) dxl += lslope;
+ else dxl -= lslope;
+ if (rslope > 0) dxr += rslope;
+ else dxr -= rslope;
+ }
+}
+
+void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys)
+{
+ u32 polyid = (ClearAttr1 >> 24) & 0x3F;
+
+ if (DispCnt & (1<<14))
+ {
+ u8 xoff = (ClearAttr2 >> 16) & 0xFF;
+ u8 yoff = (ClearAttr2 >> 24) & 0xFF;
+
+ for (int y = 0; y < 256*192; y += 256)
+ {
+ for (int x = 0; x < 256; x++)
+ {
+ u16 val2 = GPU::ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
+ u16 val3 = GPU::ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
+
+ // TODO: confirm color conversion
+ u32 r = (val2 << 1) & 0x3E; if (r) r++;
+ u32 g = (val2 >> 4) & 0x3E; if (g) g++;
+ u32 b = (val2 >> 9) & 0x3E; if (b) b++;
+ u32 a = (val2 & 0x8000) ? 0x1F000000 : 0;
+ u32 color = r | (g << 8) | (b << 16) | a;
+
+ u32 z = ((val3 & 0x7FFF) * 0x200) + 0x1FF;
+ if (z >= 0x10000 && z < 0xFFFFFF) z++;
+
+ ColorBuffer[y+x] = color;
+ DepthBuffer[y+x] = z;
+ AttrBuffer[y+x] = polyid | ((val3 & 0x8000) >> 7);
+
+ xoff++;
+ }
+
+ yoff++;
+ }
+ }
+ else
+ {
+ // TODO: confirm color conversion
+ u32 r = (ClearAttr1 << 1) & 0x3E; if (r) r++;
+ u32 g = (ClearAttr1 >> 4) & 0x3E; if (g) g++;
+ u32 b = (ClearAttr1 >> 9) & 0x3E; if (b) b++;
+ u32 a = (ClearAttr1 >> 16) & 0x1F;
+ u32 color = r | (g << 8) | (b << 16) | (a << 24);
+
+ u32 z = ((ClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
+ if (z >= 0x10000 && z < 0xFFFFFF) z++;
+
+ polyid |= ((ClearAttr1 & 0x8000) >> 7);
+
+ for (int i = 0; i < 256*192; i++)
+ {
+ ColorBuffer[i] = color;
+ DepthBuffer[i] = z;
+ AttrBuffer[i] = polyid;
+ }
+ }
+
+ // TODO: Y-sorting of translucent polygons
+
+ for (int i = 0; i < npolys; i++)
+ {
+ if (polygons[i].Translucent) continue;
+ RenderPolygon(&polygons[i]);
+ }
+
+ for (int i = 0; i < npolys; i++)
+ {
+ if (!polygons[i].Translucent) continue;
+ RenderPolygon(&polygons[i]);
+ }
+}
+
+u32* GetLine(int line)
+{
+ return &ColorBuffer[line * 256];
+}
+
+}
+}
diff --git a/src/NDS.cpp b/src/NDS.cpp
new file mode 100644
index 0000000..574f557
--- /dev/null
+++ b/src/NDS.cpp
@@ -0,0 +1,2192 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "NDS.h"
+#include "ARM.h"
+#include "CP15.h"
+#include "NDSCart.h"
+#include "DMA.h"
+#include "FIFO.h"
+#include "GPU.h"
+#include "SPI.h"
+#include "RTC.h"
+#include "Wifi.h"
+
+
+namespace NDS
+{
+
+// TODO LIST
+// * stick all the variables in a big structure?
+// would make it easier to deal with savestates
+
+/*SchedEvent SchedBuffer[SCHED_BUF_LEN];
+SchedEvent* SchedQueue;
+
+bool NeedReschedule;*/
+
+ARM* ARM9;
+ARM* ARM7;
+
+/*s32 ARM9Cycles, ARM7Cycles;
+s32 CompensatedCycles;
+s32 SchedCycles;*/
+s32 CurIterationCycles;
+s32 ARM7Offset;
+
+SchedEvent SchedList[Event_MAX];
+u32 SchedListMask;
+
+u32 CPUStop;
+
+u8 ARM9BIOS[0x1000];
+u8 ARM7BIOS[0x4000];
+
+u8 MainRAM[0x400000];
+
+u8 SharedWRAM[0x8000];
+u8 WRAMCnt;
+u8* SWRAM_ARM9;
+u8* SWRAM_ARM7;
+u32 SWRAM_ARM9Mask;
+u32 SWRAM_ARM7Mask;
+
+u8 ARM7WRAM[0x10000];
+
+u16 ExMemCnt[2];
+
+u8 ROMSeed0[2*8];
+u8 ROMSeed1[2*8];
+
+// IO shit
+u32 IME[2];
+u32 IE[2], IF[2];
+
+u8 PostFlag9;
+u8 PostFlag7;
+u16 PowerControl9;
+u16 PowerControl7;
+
+u16 ARM7BIOSProt;
+
+Timer Timers[8];
+
+DMA* DMAs[8];
+u32 DMA9Fill[4];
+
+u16 IPCSync9, IPCSync7;
+u16 IPCFIFOCnt9, IPCFIFOCnt7;
+FIFO<u32>* IPCFIFO9; // FIFO in which the ARM9 writes
+FIFO<u32>* IPCFIFO7;
+
+u16 DivCnt;
+u32 DivNumerator[2];
+u32 DivDenominator[2];
+u32 DivQuotient[2];
+u32 DivRemainder[2];
+
+u16 SqrtCnt;
+u32 SqrtVal[2];
+u32 SqrtRes;
+
+u32 KeyInput;
+
+u16 _soundbias; // temp
+
+bool Running;
+
+
+bool Init()
+{
+ ARM9 = new ARM(0);
+ ARM7 = new ARM(1);
+
+ DMAs[0] = new DMA(0, 0);
+ DMAs[1] = new DMA(0, 1);
+ DMAs[2] = new DMA(0, 2);
+ DMAs[3] = new DMA(0, 3);
+ DMAs[4] = new DMA(1, 0);
+ DMAs[5] = new DMA(1, 1);
+ DMAs[6] = new DMA(1, 2);
+ DMAs[7] = new DMA(1, 3);
+
+ IPCFIFO9 = new FIFO<u32>(16);
+ IPCFIFO7 = new FIFO<u32>(16);
+
+ if (!NDSCart::Init()) return false;
+ if (!GPU::Init()) return false;
+ if (!SPI::Init()) return false;
+ if (!RTC::Init()) return false;
+
+ Reset();
+ return true;
+}
+
+void DeInit()
+{
+ delete ARM9;
+ delete ARM7;
+
+ for (int i = 0; i < 8; i++)
+ delete DMAs[i];
+
+ delete IPCFIFO9;
+ delete IPCFIFO7;
+
+ NDSCart::DeInit();
+ GPU::DeInit();
+ SPI::DeInit();
+ RTC::DeInit();
+}
+
+
+void SetupDirectBoot()
+{
+ u32 bootparams[8];
+ memcpy(bootparams, &NDSCart::CartROM[0x20], 8*4);
+
+ printf("ARM9: offset=%08X entry=%08X RAM=%08X size=%08X\n",
+ bootparams[0], bootparams[1], bootparams[2], bootparams[3]);
+ printf("ARM7: offset=%08X entry=%08X RAM=%08X size=%08X\n",
+ bootparams[4], bootparams[5], bootparams[6], bootparams[7]);
+
+ MapSharedWRAM(3);
+
+ for (u32 i = 0; i < bootparams[3]; i+=4)
+ {
+ u32 tmp = *(u32*)&NDSCart::CartROM[bootparams[0]+i];
+ ARM9Write32(bootparams[2]+i, tmp);
+ }
+
+ for (u32 i = 0; i < bootparams[7]; i+=4)
+ {
+ u32 tmp = *(u32*)&NDSCart::CartROM[bootparams[4]+i];
+ ARM7Write32(bootparams[6]+i, tmp);
+ }
+
+ for (u32 i = 0; i < 0x170; i+=4)
+ {
+ u32 tmp = *(u32*)&NDSCart::CartROM[i];
+ ARM9Write32(0x027FFE00+i, tmp);
+ }
+
+ ARM9Write32(0x027FF800, 0x00001FC2);
+ ARM9Write32(0x027FF804, 0x00001FC2);
+ ARM9Write16(0x027FF808, *(u16*)&NDSCart::CartROM[0x15E]);
+ ARM9Write16(0x027FF80A, *(u16*)&NDSCart::CartROM[0x6C]);
+
+ ARM9Write16(0x027FF850, 0x5835);
+
+ ARM9Write32(0x027FFC00, 0x00001FC2);
+ ARM9Write32(0x027FFC04, 0x00001FC2);
+ ARM9Write16(0x027FFC08, *(u16*)&NDSCart::CartROM[0x15E]);
+ ARM9Write16(0x027FFC0A, *(u16*)&NDSCart::CartROM[0x6C]);
+
+ ARM9Write16(0x027FFC10, 0x5835);
+ ARM9Write16(0x027FFC30, 0xFFFF);
+ ARM9Write16(0x027FFC40, 0x0001);
+
+ CP15::Write(0x910, 0x0300000A);
+ CP15::Write(0x911, 0x00000020);
+ CP15::Write(0x100, 0x00050000);
+
+ ARM9->JumpTo(bootparams[1]);
+ ARM7->JumpTo(bootparams[5]);
+
+ PowerControl9 = 0x820F;
+ GPU::DisplaySwap(PowerControl9);
+
+ ARM7BIOSProt = 0x1204;
+}
+
+void Reset()
+{
+ FILE* f;
+ u32 i;
+
+ f = fopen("bios9.bin", "rb");
+ if (!f)
+ printf("ARM9 BIOS not found\n");
+ else
+ {
+ fseek(f, 0, SEEK_SET);
+ fread(ARM9BIOS, 0x1000, 1, f);
+
+ printf("ARM9 BIOS loaded\n");
+ fclose(f);
+ }
+
+ f = fopen("bios7.bin", "rb");
+ if (!f)
+ printf("ARM7 BIOS not found\n");
+ else
+ {
+ fseek(f, 0, SEEK_SET);
+ fread(ARM7BIOS, 0x4000, 1, f);
+
+ printf("ARM7 BIOS loaded\n");
+ fclose(f);
+ }
+
+ memset(MainRAM, 0, 0x400000);
+ memset(SharedWRAM, 0, 0x8000);
+ memset(ARM7WRAM, 0, 0x10000);
+
+ MapSharedWRAM(0);
+
+ ExMemCnt[0] = 0;
+ ExMemCnt[1] = 0;
+ memset(ROMSeed0, 0, 2*8);
+ memset(ROMSeed1, 0, 2*8);
+
+ IME[0] = 0;
+ IME[1] = 0;
+
+ PostFlag9 = 0x00;
+ PostFlag7 = 0x00;
+ PowerControl9 = 0x0001;
+ PowerControl7 = 0x0001;
+
+ ARM7BIOSProt = 0;
+
+ IPCSync9 = 0;
+ IPCSync7 = 0;
+ IPCFIFOCnt9 = 0;
+ IPCFIFOCnt7 = 0;
+ IPCFIFO9->Clear();
+ IPCFIFO7->Clear();
+
+ DivCnt = 0;
+ SqrtCnt = 0;
+
+ ARM9->Reset();
+ ARM7->Reset();
+ CP15::Reset();
+
+ CPUStop = 0;
+
+ memset(Timers, 0, 8*sizeof(Timer));
+
+ for (i = 0; i < 8; i++) DMAs[i]->Reset();
+ memset(DMA9Fill, 0, 4*4);
+
+ NDSCart::Reset();
+ GPU::Reset();
+ SPI::Reset();
+ RTC::Reset();
+ Wifi::Reset();
+
+ // memset(SchedBuffer, 0, sizeof(SchedEvent)*SCHED_BUF_LEN);
+ // SchedQueue = NULL;
+ memset(SchedList, 0, sizeof(SchedList));
+ SchedListMask = 0;
+
+ /*ARM9Cycles = 0;
+ ARM7Cycles = 0;
+ SchedCycles = 0;*/
+ CurIterationCycles = 0;
+ ARM7Offset = 0;
+
+ KeyInput = 0x007F03FF;
+
+ _soundbias = 0;
+
+ // test
+ //LoadROM();
+ //LoadFirmware();
+ // a_interp2.nds a_rounding (10) (11) a_slope (5)
+ if (NDSCart::LoadROM("rom/nsmb.nds"))
+ Running = true; // hax
+}
+
+
+void CalcIterationCycles()
+{
+ CurIterationCycles = 16;
+
+ for (int i = 0; i < Event_MAX; i++)
+ {
+ if (!(SchedListMask & (1<<i)))
+ continue;
+
+ if (SchedList[i].WaitCycles < CurIterationCycles)
+ CurIterationCycles = SchedList[i].WaitCycles;
+ }
+}
+
+void RunSystem(s32 cycles)
+{
+ for (int i = 0; i < 8; i++)
+ {
+ if ((Timers[i].Cnt & 0x84) == 0x80)
+ Timers[i].Counter += (ARM9->Cycles >> 1) << Timers[i].CycleShift;
+ }
+ for (int i = 4; i < 8; i++)
+ {
+ if ((Timers[i].Cnt & 0x84) == 0x80)
+ Timers[i].Counter += ARM7->Cycles << Timers[i].CycleShift;
+ }
+
+ for (int i = 0; i < Event_MAX; i++)
+ {
+ if (!(SchedListMask & (1<<i)))
+ continue;
+
+ SchedList[i].WaitCycles -= cycles;
+ if (SchedList[i].WaitCycles < 1)
+ {
+ SchedListMask &= ~(1<<i);
+ SchedList[i].Func(SchedList[i].Param);
+ }
+ }
+}
+
+void RunFrame()
+{
+ s32 framecycles = 560190;
+
+ if (!Running) return; // dorp
+
+
+ GPU::StartFrame();
+
+ while (Running && framecycles>0)
+ {
+ s32 ndscyclestorun;
+ s32 ndscycles = 0;
+
+ CalcIterationCycles();
+
+ if (CPUStop & 0xFFFF)
+ {
+ s32 cycles = CurIterationCycles;
+ cycles = DMAs[0]->Run(cycles);
+ if (cycles > 0) cycles = DMAs[1]->Run(cycles);
+ if (cycles > 0) cycles = DMAs[2]->Run(cycles);
+ if (cycles > 0) cycles = DMAs[3]->Run(cycles);
+ ndscyclestorun = CurIterationCycles - cycles;
+
+ // TODO: run other timing critical shit, like timers
+ GPU3D::Run(ndscyclestorun);
+ }
+ else
+ {
+ ARM9->CyclesToRun = CurIterationCycles << 1;
+ ARM9->Execute();
+ ndscyclestorun = ARM9->Cycles >> 1;
+ }
+
+ if (CPUStop & 0xFFFF0000)
+ {
+ s32 cycles = ndscyclestorun - ARM7Offset;
+ cycles = DMAs[4]->Run(cycles);
+ if (cycles > 0) cycles = DMAs[5]->Run(cycles);
+ if (cycles > 0) cycles = DMAs[6]->Run(cycles);
+ if (cycles > 0) cycles = DMAs[7]->Run(cycles);
+ ARM7Offset = cycles;
+ }
+ else
+ {
+ ARM7->CyclesToRun = ndscyclestorun - ARM7Offset;
+ ARM7->Execute();
+ ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun;
+ }
+
+ RunSystem(ndscyclestorun);
+ //GPU3D::Run(ndscyclestorun);
+
+ /*while (ndscycles < ndscyclestorun)
+ {
+ ARM7->CyclesToRun = ndscyclestorun - ndscycles - ARM7Offset;
+ ARM7->Execute();
+ ARM7Offset = 0;
+
+ RunEvents(ARM7->Cycles);
+ ndscycles += ARM7->Cycles;
+ }
+
+ ARM7Offset = ndscycles - ndscyclestorun;*/
+
+ framecycles -= ndscyclestorun;
+ }
+}
+
+void Reschedule()
+{
+ CalcIterationCycles();
+
+ ARM9->CyclesToRun = CurIterationCycles << 1;
+ //ARM7->CyclesToRun = CurIterationCycles - ARM7Offset;
+ //ARM7->CyclesToRun = (ARM9->Cycles >> 1) - ARM7->Cycles - ARM7Offset;
+}
+
+void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 param)
+{
+ if (SchedListMask & (1<<id))
+ {
+ printf("!! EVENT %d ALREADY SCHEDULED\n", id);
+ return;
+ }
+
+ SchedEvent* evt = &SchedList[id];
+
+ if (periodic) evt->WaitCycles += delay;
+ else evt->WaitCycles = delay + (ARM9->Cycles >> 1);
+
+ evt->Func = func;
+ evt->Param = param;
+
+ SchedListMask |= (1<<id);
+
+ Reschedule();
+}
+
+void CancelEvent(u32 id)
+{
+ SchedListMask &= ~(1<<id);
+}
+
+
+void PressKey(u32 key)
+{
+ KeyInput &= ~(1 << key);
+}
+
+void ReleaseKey(u32 key)
+{
+ KeyInput |= (1 << key);
+}
+
+void TouchScreen(u16 x, u16 y)
+{
+ SPI_TSC::SetTouchCoords(x, y);
+}
+
+void ReleaseScreen()
+{
+ SPI_TSC::SetTouchCoords(0x000, 0xFFF);
+}
+
+
+void Halt()
+{
+ printf("Halt()\n");
+ Running = false;
+}
+
+
+void MapSharedWRAM(u8 val)
+{
+ WRAMCnt = val;
+
+ switch (WRAMCnt & 0x3)
+ {
+ case 0:
+ SWRAM_ARM9 = &SharedWRAM[0];
+ SWRAM_ARM9Mask = 0x7FFF;
+ SWRAM_ARM7 = NULL;
+ SWRAM_ARM7Mask = 0;
+ break;
+
+ case 1:
+ SWRAM_ARM9 = &SharedWRAM[0x4000];
+ SWRAM_ARM9Mask = 0x3FFF;
+ SWRAM_ARM7 = &SharedWRAM[0];
+ SWRAM_ARM7Mask = 0x3FFF;
+ break;
+
+ case 2:
+ SWRAM_ARM9 = &SharedWRAM[0];
+ SWRAM_ARM9Mask = 0x3FFF;
+ SWRAM_ARM7 = &SharedWRAM[0x4000];
+ SWRAM_ARM7Mask = 0x3FFF;
+ break;
+
+ case 3:
+ SWRAM_ARM9 = NULL;
+ SWRAM_ARM9Mask = 0;
+ SWRAM_ARM7 = &SharedWRAM[0];
+ SWRAM_ARM7Mask = 0x7FFF;
+ break;
+ }
+}
+
+
+void SetIRQ(u32 cpu, u32 irq)
+{
+ IF[cpu] |= (1 << irq);
+}
+
+void ClearIRQ(u32 cpu, u32 irq)
+{
+ IF[cpu] &= ~(1 << irq);
+}
+
+bool HaltInterrupted(u32 cpu)
+{
+ if (cpu == 0)
+ {
+ if (!(IME[0] & 0x1))
+ return false;
+ }
+
+ if (IF[cpu] & IE[cpu])
+ return true;
+
+ return false;
+}
+
+void StopCPU(u32 cpu, u32 mask)
+{
+ if (cpu) mask <<= 16;
+ CPUStop |= mask;
+}
+
+void ResumeCPU(u32 cpu, u32 mask)
+{
+ if (cpu) mask <<= 16;
+ CPUStop &= ~mask;
+}
+
+
+
+void CheckDMAs(u32 cpu, u32 mode)
+{
+ cpu <<= 2;
+ DMAs[cpu+0]->StartIfNeeded(mode);
+ DMAs[cpu+1]->StartIfNeeded(mode);
+ DMAs[cpu+2]->StartIfNeeded(mode);
+ DMAs[cpu+3]->StartIfNeeded(mode);
+}
+
+
+
+//const s32 TimerPrescaler[4] = {1, 64, 256, 1024};
+const s32 TimerPrescaler[4] = {0, 6, 8, 10};
+
+u16 TimerGetCounter(u32 timer)
+{
+ u32 ret = Timers[timer].Counter;
+
+ if ((Timers[timer].Cnt & 0x84) == 0x80)
+ {
+ u32 c = (timer & 0x4) ? ARM7->Cycles : (ARM9->Cycles>>1);
+ ret += (c << Timers[timer].CycleShift);
+ }
+
+ return ret >> 16;
+}
+
+void TimerOverflow(u32 param)
+{
+ Timer* timer = &Timers[param];
+ timer->Counter = 0;
+
+ u32 tid = param & 0x3;
+ u32 cpu = param >> 2;
+
+ for (;;)
+ {
+ if (tid == (param&0x3))
+ ScheduleEvent(Event_Timer9_0 + param, true, (0x10000 - timer->Reload) << TimerPrescaler[timer->Cnt & 0x03], TimerOverflow, param);
+ //timer->Event = ScheduleEvent(TimerPrescaler[timer->Control&0x3], TimerIncrement, param);
+
+ if (timer->Counter == 0)
+ {
+ timer->Counter = timer->Reload << 16;
+
+ if (timer->Cnt & (1<<6))
+ SetIRQ(cpu, IRQ_Timer0 + tid);
+
+ // cascade
+ if (tid == 3)
+ break;
+ timer++;
+ if ((timer->Cnt & 0x84) != 0x84)
+ break;
+ timer->Counter += 0x10000;
+ tid++;
+ continue;
+ }
+
+ break;
+ }
+}
+
+void TimerStart(u32 id, u16 cnt)
+{
+ Timer* timer = &Timers[id];
+ u16 curstart = timer->Cnt & (1<<7);
+ u16 newstart = cnt & (1<<7);
+
+ timer->Cnt = cnt;
+
+ if ((!curstart) && newstart)
+ {
+ timer->Counter = timer->Reload << 16;
+ timer->CycleShift = 16 - TimerPrescaler[cnt & 0x03];
+
+ // start the timer, if it's not a cascading timer
+ if (!(cnt & (1<<2)))
+ ScheduleEvent(Event_Timer9_0 + id, false, (0x10000 - timer->Reload) << TimerPrescaler[cnt & 0x03], TimerOverflow, id);
+ else
+ CancelEvent(Event_Timer9_0 + id);
+ }
+ else if (curstart && (!newstart))
+ {
+ CancelEvent(Event_Timer9_0 + id);
+ }
+}
+
+
+
+void StartDiv()
+{
+ // TODO: division isn't instant!
+
+ DivCnt &= ~0x2000;
+
+ switch (DivCnt & 0x0003)
+ {
+ case 0x0000:
+ {
+ s32 num = (s32)DivNumerator[0];
+ s32 den = (s32)DivDenominator[0];
+ if (den == 0)
+ {
+ DivQuotient[0] = (num<0) ? 1:-1;
+ DivQuotient[1] = (num<0) ? -1:1;
+ *(s64*)&DivRemainder[0] = num;
+ }
+ else if (num == -0x80000000 && den == -1)
+ {
+ *(s64*)&DivQuotient[0] = 0x80000000;
+ }
+ else
+ {
+ *(s64*)&DivQuotient[0] = (s64)(num / den);
+ *(s64*)&DivRemainder[0] = (s64)(num % den);
+ }
+ }
+ break;
+
+ case 0x0001:
+ case 0x0003:
+ {
+ s64 num = *(s64*)&DivNumerator[0];
+ s32 den = (s32)DivDenominator[0];
+ if (den == 0)
+ {
+ *(s64*)&DivQuotient[0] = (num<0) ? 1:-1;
+ *(s64*)&DivRemainder[0] = num;
+ }
+ else if (num == -0x8000000000000000 && den == -1)
+ {
+ *(s64*)&DivQuotient[0] = 0x8000000000000000;
+ }
+ else
+ {
+ *(s64*)&DivQuotient[0] = (s64)(num / den);
+ *(s64*)&DivRemainder[0] = (s64)(num % den);
+ }
+ }
+ break;
+
+ case 0x0002:
+ {
+ s64 num = *(s64*)&DivNumerator[0];
+ s64 den = *(s64*)&DivDenominator[0];
+ if (den == 0)
+ {
+ *(s64*)&DivQuotient[0] = (num<0) ? 1:-1;
+ *(s64*)&DivRemainder[0] = num;
+ }
+ else if (num == -0x8000000000000000 && den == -1)
+ {
+ *(s64*)&DivQuotient[0] = 0x8000000000000000;
+ }
+ else
+ {
+ *(s64*)&DivQuotient[0] = (s64)(num / den);
+ *(s64*)&DivRemainder[0] = (s64)(num % den);
+ }
+ }
+ break;
+ }
+
+ if ((DivDenominator[0] | DivDenominator[1]) == 0)
+ DivCnt |= 0x2000;
+}
+
+// http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2
+void StartSqrt()
+{
+ // TODO: sqrt isn't instant either. oh well
+
+ u64 val;
+ u32 res = 0;
+ u64 rem = 0;
+ u32 prod = 0;
+ u32 nbits, topshift;
+
+ if (SqrtCnt & 0x0001)
+ {
+ val = *(u64*)&SqrtVal[0];
+ nbits = 32;
+ topshift = 62;
+ }
+ else
+ {
+ val = (u64)SqrtVal[0]; // 32bit
+ nbits = 16;
+ topshift = 30;
+ }
+
+ for (u32 i = 0; i < nbits; i++)
+ {
+ rem = (rem << 2) + ((val >> topshift) & 0x3);
+ val <<= 2;
+ res <<= 1;
+
+ prod = (res << 1) + 1;
+ if (rem >= prod)
+ {
+ rem -= prod;
+ res++;
+ }
+ }
+
+ SqrtRes = res;
+}
+
+
+
+void debug(u32 param)
+{
+ printf("ARM9 PC=%08X LR=%08X %08X\n", ARM9->R[15], ARM9->R[14], ARM9->R_IRQ[1]);
+ printf("ARM7 PC=%08X LR=%08X %08X\n", ARM7->R[15], ARM7->R[14], ARM7->R_IRQ[1]);
+
+ for (int i = 0; i < 9; i++)
+ printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]);
+}
+
+
+
+u8 ARM9Read8(u32 addr)
+{
+ if ((addr & 0xFFFFF000) == 0xFFFF0000)
+ {
+ return *(u8*)&ARM9BIOS[addr & 0xFFF];
+ }
+
+ switch (addr & 0xFF000000)
+ {
+ case 0x02000000:
+ return *(u8*)&MainRAM[addr & 0x3FFFFF];
+
+ case 0x03000000:
+ if (SWRAM_ARM9) return *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
+ else return 0;
+
+ case 0x04000000:
+ return ARM9IORead8(addr);
+
+ case 0x05000000:
+ return *(u8*)&GPU::Palette[addr & 0x7FF];
+
+ case 0x06000000:
+ {
+ switch (addr & 0x00E00000)
+ {
+ case 0x00000000: return GPU::ReadVRAM_ABG<u8>(addr);
+ case 0x00200000: return GPU::ReadVRAM_BBG<u8>(addr);
+ case 0x00400000: return GPU::ReadVRAM_AOBJ<u8>(addr);
+ case 0x00600000: return GPU::ReadVRAM_BOBJ<u8>(addr);
+ default: return GPU::ReadVRAM_LCDC<u8>(addr);
+ }
+ }
+ return 0;
+
+ case 0x07000000:
+ return *(u8*)&GPU::OAM[addr & 0x7FF];
+
+ case 0x08000000:
+ case 0x09000000:
+ return 0xFF;
+ }
+
+ printf("unknown arm9 read8 %08X\n", addr);
+ return 0;
+}
+
+u16 ARM9Read16(u32 addr)
+{
+ if ((addr & 0xFFFFF000) == 0xFFFF0000)
+ {
+ return *(u16*)&ARM9BIOS[addr & 0xFFF];
+ }
+
+ switch (addr & 0xFF000000)
+ {
+ case 0x02000000:
+ return *(u16*)&MainRAM[addr & 0x3FFFFF];
+
+ case 0x03000000:
+ if (SWRAM_ARM9) return *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
+ else return 0;
+
+ case 0x04000000:
+ return ARM9IORead16(addr);
+
+ case 0x05000000:
+ return *(u16*)&GPU::Palette[addr & 0x7FF];
+
+ case 0x06000000:
+ {
+ switch (addr & 0x00E00000)
+ {
+ case 0x00000000: return GPU::ReadVRAM_ABG<u16>(addr);
+ case 0x00200000: return GPU::ReadVRAM_BBG<u16>(addr);
+ case 0x00400000: return GPU::ReadVRAM_AOBJ<u16>(addr);
+ case 0x00600000: return GPU::ReadVRAM_BOBJ<u16>(addr);
+ default: return GPU::ReadVRAM_LCDC<u16>(addr);
+ }
+ }
+ return 0;
+
+ case 0x07000000:
+ return *(u16*)&GPU::OAM[addr & 0x7FF];
+
+ case 0x08000000:
+ case 0x09000000:
+ return 0xFFFF;
+ }
+
+ //printf("unknown arm9 read16 %08X %08X %08X %08X\n", addr, ARM9->R[15], ARM9->R[1], ARM9->R[2]);
+ return 0;
+}
+
+u32 ARM9Read32(u32 addr)
+{
+ if ((addr & 0xFFFFF000) == 0xFFFF0000)
+ {
+ return *(u32*)&ARM9BIOS[addr & 0xFFF];
+ }
+
+ switch (addr & 0xFF000000)
+ {
+ case 0x02000000:
+ return *(u32*)&MainRAM[addr & 0x3FFFFF];
+
+ case 0x03000000:
+ if (SWRAM_ARM9) return *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
+ else return 0;
+
+ case 0x04000000:
+ return ARM9IORead32(addr);
+
+ case 0x05000000:
+ return *(u32*)&GPU::Palette[addr & 0x7FF];
+
+ case 0x06000000:
+ {
+ switch (addr & 0x00E00000)
+ {
+ case 0x00000000: return GPU::ReadVRAM_ABG<u32>(addr);
+ case 0x00200000: return GPU::ReadVRAM_BBG<u32>(addr);
+ case 0x00400000: return GPU::ReadVRAM_AOBJ<u32>(addr);
+ case 0x00600000: return GPU::ReadVRAM_BOBJ<u32>(addr);
+ default: return GPU::ReadVRAM_LCDC<u32>(addr);
+ }
+ }
+ return 0;
+
+ case 0x07000000:
+ return *(u32*)&GPU::OAM[addr & 0x7FF];
+
+ case 0x08000000:
+ case 0x09000000:
+ return 0xFFFFFFFF;
+ }
+
+ printf("unknown arm9 read32 %08X | %08X %08X %08X\n", addr, ARM9->R[15], ARM9->R[12], ARM9Read32(0x027FF820));
+ return 0;
+}
+
+void ARM9Write8(u32 addr, u8 val)
+{
+ switch (addr & 0xFF000000)
+ {
+ case 0x02000000:
+ *(u8*)&MainRAM[addr & 0x3FFFFF] = val;
+ return;
+
+ case 0x03000000:
+ if (SWRAM_ARM9) *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
+ return;
+
+ case 0x04000000:
+ ARM9IOWrite8(addr, val);
+ return;
+
+ case 0x05000000:
+ case 0x06000000:
+ case 0x07000000:
+ return;
+ }
+
+ printf("unknown arm9 write8 %08X %02X\n", addr, val);
+}
+
+void ARM9Write16(u32 addr, u16 val)
+{
+ switch (addr & 0xFF000000)
+ {
+ case 0x02000000:
+ *(u16*)&MainRAM[addr & 0x3FFFFF] = val;
+ return;
+
+ case 0x03000000:
+ if (SWRAM_ARM9) *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
+ return;
+
+ case 0x04000000:
+ ARM9IOWrite16(addr, val);
+ return;
+
+ case 0x05000000:
+ *(u16*)&GPU::Palette[addr & 0x7FF] = val;
+ return;
+
+ case 0x06000000:
+ switch (addr & 0x00E00000)
+ {
+ case 0x00000000: GPU::WriteVRAM_ABG<u16>(addr, val); break;
+ case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); break;
+ case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); break;
+ case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); break;
+ default: GPU::WriteVRAM_LCDC<u16>(addr, val); break;
+ }
+ return;
+
+ case 0x07000000:
+ *(u16*)&GPU::OAM[addr & 0x7FF] = val;
+ return;
+ }
+
+ //printf("unknown arm9 write16 %08X %04X\n", addr, val);
+}
+
+void ARM9Write32(u32 addr, u32 val)
+{
+ switch (addr & 0xFF000000)
+ {
+ case 0x02000000:
+ *(u32*)&MainRAM[addr & 0x3FFFFF] = val;
+ return;
+
+ case 0x03000000:
+ if (SWRAM_ARM9) *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
+ return;
+
+ case 0x04000000:
+ ARM9IOWrite32(addr, val);
+ return;
+
+ case 0x05000000:
+ *(u32*)&GPU::Palette[addr & 0x7FF] = val;
+ return;
+
+ case 0x06000000:
+ switch (addr & 0x00E00000)
+ {
+ case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); break;
+ case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); break;
+ case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); break;
+ case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); break;
+ default: GPU::WriteVRAM_LCDC<u32>(addr, val); break;
+ }
+ return;
+
+ case 0x07000000:
+ *(u32*)&GPU::OAM[addr & 0x7FF] = val;
+ return;
+ }
+
+ printf("unknown arm9 write32 %08X %08X | %08X\n", addr, val, ARM9->R[15]);
+}
+
+
+
+u8 ARM7Read8(u32 addr)
+{
+ if (addr < 0x00004000)
+ {
+ if (ARM7->R[15] >= 0x4000)
+ return 0xFF;
+ if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt)
+ return 0xFF;
+
+ return *(u8*)&ARM7BIOS[addr];
+ }
+
+ switch (addr & 0xFF800000)
+ {
+ case 0x02000000:
+ case 0x02800000:
+ return *(u8*)&MainRAM[addr & 0x3FFFFF];
+
+ case 0x03000000:
+ if (SWRAM_ARM7) return *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
+ else return *(u8*)&ARM7WRAM[addr & 0xFFFF];
+
+ case 0x03800000:
+ return *(u8*)&ARM7WRAM[addr & 0xFFFF];
+
+ case 0x04000000:
+ return ARM7IORead8(addr);
+
+ case 0x06000000:
+ case 0x06800000:
+ return GPU::ReadVRAM_ARM7<u8>(addr);
+ }
+
+ printf("unknown arm7 read8 %08X %08X %08X/%08X\n", addr, ARM7->R[15], ARM7->R[0], ARM7->R[1]);
+ return 0;
+}
+
+u16 ARM7Read16(u32 addr)
+{
+ if (addr < 0x00004000)
+ {
+ if (ARM7->R[15] >= 0x4000)
+ return 0xFFFF;
+ if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt)
+ return 0xFFFF;
+
+ return *(u16*)&ARM7BIOS[addr];
+ }
+
+ switch (addr & 0xFF800000)
+ {
+ case 0x02000000:
+ case 0x02800000:
+ return *(u16*)&MainRAM[addr & 0x3FFFFF];
+
+ case 0x03000000:
+ if (SWRAM_ARM7) return *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
+ else return *(u16*)&ARM7WRAM[addr & 0xFFFF];
+
+ case 0x03800000:
+ return *(u16*)&ARM7WRAM[addr & 0xFFFF];
+
+ case 0x04000000:
+ return ARM7IORead16(addr);
+
+ case 0x04800000:
+ return Wifi::Read(addr);
+
+ case 0x06000000:
+ case 0x06800000:
+ return GPU::ReadVRAM_ARM7<u16>(addr);
+ }
+
+ printf("unknown arm7 read16 %08X %08X\n", addr, ARM7->R[15]);
+ return 0;
+}
+
+u32 ARM7Read32(u32 addr)
+{
+ if (addr < 0x00004000)
+ {
+ if (ARM7->R[15] >= 0x4000)
+ return 0xFFFFFFFF;
+ if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt)
+ return 0xFFFFFFFF;
+
+ return *(u32*)&ARM7BIOS[addr];
+ }
+
+ switch (addr & 0xFF800000)
+ {
+ case 0x02000000:
+ case 0x02800000:
+ return *(u32*)&MainRAM[addr & 0x3FFFFF];
+
+ case 0x03000000:
+ if (SWRAM_ARM7) return *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
+ else return *(u32*)&ARM7WRAM[addr & 0xFFFF];
+
+ case 0x03800000:
+ return *(u32*)&ARM7WRAM[addr & 0xFFFF];
+
+ case 0x04000000:
+ return ARM7IORead32(addr);
+
+ case 0x06000000:
+ case 0x06800000:
+ return GPU::ReadVRAM_ARM7<u32>(addr);
+ }
+
+ printf("unknown arm7 read32 %08X | %08X\n", addr, ARM7->R[15]);
+ return 0;
+}
+
+void ARM7Write8(u32 addr, u8 val)
+{
+ switch (addr & 0xFF800000)
+ {
+ case 0x02000000:
+ case 0x02800000:
+ *(u8*)&MainRAM[addr & 0x3FFFFF] = val;
+ return;
+
+ case 0x03000000:
+ if (SWRAM_ARM7) *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
+ else *(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
+ return;
+
+ case 0x03800000:
+ *(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
+ return;
+
+ case 0x04000000:
+ ARM7IOWrite8(addr, val);
+ return;
+
+ case 0x06000000:
+ case 0x06800000:
+ GPU::WriteVRAM_ARM7<u8>(addr, val);
+ return;
+ }
+
+ printf("unknown arm7 write8 %08X %02X | %08X | %08X %08X %08X %08X\n", addr, val, ARM7->R[15], IME[1], IE[1], ARM7->R[0], ARM7->R[1]);
+}
+
+void ARM7Write16(u32 addr, u16 val)
+{
+ switch (addr & 0xFF800000)
+ {
+ case 0x02000000:
+ case 0x02800000:
+ *(u16*)&MainRAM[addr & 0x3FFFFF] = val;
+ return;
+
+ case 0x03000000:
+ if (SWRAM_ARM7) *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
+ else *(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
+ return;
+
+ case 0x03800000:
+ *(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
+ return;
+
+ case 0x04000000:
+ ARM7IOWrite16(addr, val);
+ return;
+
+ case 0x04800000:
+ Wifi::Write(addr, val);
+ return;
+
+ case 0x06000000:
+ case 0x06800000:
+ GPU::WriteVRAM_ARM7<u16>(addr, val);
+ return;
+ }
+
+ printf("unknown arm7 write16 %08X %04X | %08X\n", addr, val, ARM7->R[15]);
+}
+
+void ARM7Write32(u32 addr, u32 val)
+{
+ switch (addr & 0xFF800000)
+ {
+ case 0x02000000:
+ case 0x02800000:
+ *(u32*)&MainRAM[addr & 0x3FFFFF] = val;
+ return;
+
+ case 0x03000000:
+ if (SWRAM_ARM7) *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
+ else *(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
+ return;
+
+ case 0x03800000:
+ *(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
+ return;
+
+ case 0x04000000:
+ ARM7IOWrite32(addr, val);
+ return;
+
+ case 0x06000000:
+ case 0x06800000:
+ GPU::WriteVRAM_ARM7<u32>(addr, val);
+ return;
+ }
+
+ printf("unknown arm7 write32 %08X %08X | %08X %08X\n", addr, val, ARM7->R[15], ARM7->CurInstr);
+}
+
+
+
+
+u8 ARM9IORead8(u32 addr)
+{
+ switch (addr)
+ {
+ case 0x040001A2: return NDSCart::ReadSPIData();
+
+ case 0x04000208: return IME[0];
+
+ case 0x04000240: return GPU::VRAMCNT[0];
+ case 0x04000241: return GPU::VRAMCNT[1];
+ case 0x04000242: return GPU::VRAMCNT[2];
+ case 0x04000243: return GPU::VRAMCNT[3];
+ case 0x04000244: return GPU::VRAMCNT[4];
+ case 0x04000245: return GPU::VRAMCNT[5];
+ case 0x04000246: return GPU::VRAMCNT[6];
+ case 0x04000247: return WRAMCnt;
+ case 0x04000248: return GPU::VRAMCNT[7];
+ case 0x04000249: return GPU::VRAMCNT[8];
+
+ case 0x04000300: return PostFlag9;
+ }
+
+ if (addr >= 0x04000000 && addr < 0x04000060)
+ {
+ return GPU::GPU2D_A->Read8(addr);
+ }
+ if (addr >= 0x04001000 && addr < 0x04001060)
+ {
+ return GPU::GPU2D_B->Read8(addr);
+ }
+ if (addr >= 0x04000320 && addr < 0x040006A4)
+ {
+ return GPU3D::Read8(addr);
+ }
+
+ printf("unknown ARM9 IO read8 %08X\n", addr);
+ return 0;
+}
+
+u16 ARM9IORead16(u32 addr)
+{
+ switch (addr)
+ {
+ case 0x04000004: return GPU::DispStat[0];
+ case 0x04000006: return GPU::VCount;
+
+ case 0x04000060: return GPU3D::Read16(addr);
+ case 0x04000064:
+ case 0x04000066: return GPU::GPU2D_A->Read16(addr);
+
+ case 0x040000B8: return DMAs[0]->Cnt & 0xFFFF;
+ case 0x040000BA: return DMAs[0]->Cnt >> 16;
+ case 0x040000C4: return DMAs[1]->Cnt & 0xFFFF;
+ case 0x040000C6: return DMAs[1]->Cnt >> 16;
+ case 0x040000D0: return DMAs[2]->Cnt & 0xFFFF;
+ case 0x040000D2: return DMAs[2]->Cnt >> 16;
+ case 0x040000DC: return DMAs[3]->Cnt & 0xFFFF;
+ case 0x040000DE: return DMAs[3]->Cnt >> 16;
+
+ case 0x040000E0: return ((u16*)DMA9Fill)[0];
+ case 0x040000E2: return ((u16*)DMA9Fill)[1];
+ case 0x040000E4: return ((u16*)DMA9Fill)[2];
+ case 0x040000E6: return ((u16*)DMA9Fill)[3];
+ case 0x040000E8: return ((u16*)DMA9Fill)[4];
+ case 0x040000EA: return ((u16*)DMA9Fill)[5];
+ case 0x040000EC: return ((u16*)DMA9Fill)[6];
+ case 0x040000EE: return ((u16*)DMA9Fill)[7];
+
+ case 0x04000100: return TimerGetCounter(0);
+ case 0x04000102: return Timers[0].Cnt;
+ case 0x04000104: return TimerGetCounter(1);
+ case 0x04000106: return Timers[1].Cnt;
+ case 0x04000108: return TimerGetCounter(2);
+ case 0x0400010A: return Timers[2].Cnt;
+ case 0x0400010C: return TimerGetCounter(3);
+ case 0x0400010E: return Timers[3].Cnt;
+
+ case 0x04000130: return KeyInput & 0xFFFF;
+
+ case 0x04000180: return IPCSync9;
+ case 0x04000184:
+ {
+ u16 val = IPCFIFOCnt9;
+ if (IPCFIFO9->IsEmpty()) val |= 0x0001;
+ else if (IPCFIFO9->IsFull()) val |= 0x0002;
+ if (IPCFIFO7->IsEmpty()) val |= 0x0100;
+ else if (IPCFIFO7->IsFull()) val |= 0x0200;
+ return val;
+ }
+
+ case 0x040001A0: return NDSCart::SPICnt;
+ case 0x040001A2: return NDSCart::ReadSPIData();
+
+ case 0x04000204: return ExMemCnt[0];
+ case 0x04000208: return IME[0];
+
+ case 0x04000240: return GPU::VRAMCNT[0] | (GPU::VRAMCNT[1] << 8);
+ case 0x04000242: return GPU::VRAMCNT[2] | (GPU::VRAMCNT[3] << 8);
+ case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8);
+ case 0x04000246: return GPU::VRAMCNT[6] | (WRAMCnt << 8);
+ case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8);
+
+ case 0x04000280: return DivCnt;
+
+ case 0x040002B0: return SqrtCnt;
+
+ case 0x04000300: return PostFlag9;
+ case 0x04000304: return PowerControl9;
+ }
+
+ if (addr >= 0x04000000 && addr < 0x04000060)
+ {
+ return GPU::GPU2D_A->Read16(addr);
+ }
+ if (addr >= 0x04001000 && addr < 0x04001060)
+ {
+ return GPU::GPU2D_B->Read16(addr);
+ }
+ if (addr >= 0x04000320 && addr < 0x040006A4)
+ {
+ return GPU3D::Read16(addr);
+ }
+
+ printf("unknown ARM9 IO read16 %08X %08X\n", addr, ARM9->R[15]);
+ return 0;
+}
+
+u32 ARM9IORead32(u32 addr)
+{
+ switch (addr)
+ {
+ case 0x04000004: return GPU::DispStat[0] | (GPU::VCount << 16);
+
+ case 0x04000060: return GPU3D::Read32(addr);
+ case 0x04000064: return GPU::GPU2D_A->Read32(addr);
+
+ case 0x040000B0: return DMAs[0]->SrcAddr;
+ case 0x040000B4: return DMAs[0]->DstAddr;
+ case 0x040000B8: return DMAs[0]->Cnt;
+ case 0x040000BC: return DMAs[1]->SrcAddr;
+ case 0x040000C0: return DMAs[1]->DstAddr;
+ case 0x040000C4: return DMAs[1]->Cnt;
+ case 0x040000C8: return DMAs[2]->SrcAddr;
+ case 0x040000CC: return DMAs[2]->DstAddr;
+ case 0x040000D0: return DMAs[2]->Cnt;
+ case 0x040000D4: return DMAs[3]->SrcAddr;
+ case 0x040000D8: return DMAs[3]->DstAddr;
+ case 0x040000DC: return DMAs[3]->Cnt;
+
+ case 0x040000E0: return DMA9Fill[0];
+ case 0x040000E4: return DMA9Fill[1];
+ case 0x040000E8: return DMA9Fill[2];
+ case 0x040000EC: return DMA9Fill[3];
+
+ case 0x04000100: return TimerGetCounter(0) | (Timers[0].Cnt << 16);
+ case 0x04000104: return TimerGetCounter(1) | (Timers[1].Cnt << 16);
+ case 0x04000108: return TimerGetCounter(2) | (Timers[2].Cnt << 16);
+ case 0x0400010C: return TimerGetCounter(3) | (Timers[3].Cnt << 16);
+
+ case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16);
+ case 0x040001A4: return NDSCart::ROMCnt;
+
+ case 0x04000208: return IME[0];
+ case 0x04000210: return IE[0];
+ case 0x04000214: return IF[0];
+
+ case 0x04000240: return GPU::VRAMCNT[0] | (GPU::VRAMCNT[1] << 8) | (GPU::VRAMCNT[2] << 16) | (GPU::VRAMCNT[3] << 24);
+ case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8) | (GPU::VRAMCNT[6] << 16) | (WRAMCnt << 24);
+ case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8);
+
+ case 0x04000290: return DivNumerator[0];
+ case 0x04000294: return DivNumerator[1];
+ case 0x04000298: return DivDenominator[0];
+ case 0x0400029C: return DivDenominator[1];
+ case 0x040002A0: return DivQuotient[0];
+ case 0x040002A4: return DivQuotient[1];
+ case 0x040002A8: return DivRemainder[0];
+ case 0x040002AC: return DivRemainder[1];
+
+ case 0x040002B4: return SqrtRes;
+ case 0x040002B8: return SqrtVal[0];
+ case 0x040002BC: return SqrtVal[1];
+
+ case 0x04100000:
+ if (IPCFIFOCnt9 & 0x8000)
+ {
+ u32 ret;
+ if (IPCFIFO7->IsEmpty())
+ {
+ IPCFIFOCnt9 |= 0x4000;
+ ret = IPCFIFO7->Peek();
+ }
+ else
+ {
+ ret = IPCFIFO7->Read();
+
+ if (IPCFIFO7->IsEmpty() && (IPCFIFOCnt7 & 0x0004))
+ SetIRQ(1, IRQ_IPCSendDone);
+ }
+ return ret;
+ }
+ else
+ return IPCFIFO7->Peek();
+
+ case 0x04100010:
+ if (!(ExMemCnt[0] & (1<<11))) return NDSCart::ReadROMData();
+ return 0;
+ }
+
+ if (addr >= 0x04000000 && addr < 0x04000060)
+ {
+ return GPU::GPU2D_A->Read32(addr);
+ }
+ if (addr >= 0x04001000 && addr < 0x04001060)
+ {
+ return GPU::GPU2D_B->Read32(addr);
+ }
+ if (addr >= 0x04000320 && addr < 0x040006A4)
+ {
+ return GPU3D::Read32(addr);
+ }
+
+ printf("unknown ARM9 IO read32 %08X\n", addr);
+ return 0;
+}
+
+void ARM9IOWrite8(u32 addr, u8 val)
+{
+ switch (addr)
+ {
+ case 0x040001A0:
+ if (!(ExMemCnt[0] & (1<<11)))
+ {
+ NDSCart::WriteSPICnt((NDSCart::SPICnt & 0xFF00) | val);
+ }
+ return;
+ case 0x040001A1:
+ if (!(ExMemCnt[0] & (1<<11)))
+ {
+ NDSCart::WriteSPICnt((NDSCart::SPICnt & 0x00FF) | (val << 8));
+ }
+ return;
+ case 0x040001A2:
+ NDSCart::WriteSPIData(val);
+ return;
+
+ case 0x040001A8: NDSCart::ROMCommand[0] = val; return;
+ case 0x040001A9: NDSCart::ROMCommand[1] = val; return;
+ case 0x040001AA: NDSCart::ROMCommand[2] = val; return;
+ case 0x040001AB: NDSCart::ROMCommand[3] = val; return;
+ case 0x040001AC: NDSCart::ROMCommand[4] = val; return;
+ case 0x040001AD: NDSCart::ROMCommand[5] = val; return;
+ case 0x040001AE: NDSCart::ROMCommand[6] = val; return;
+ case 0x040001AF: NDSCart::ROMCommand[7] = val; return;
+
+ case 0x04000208: IME[0] = val & 0x1; return;
+
+ case 0x04000240: GPU::MapVRAM_AB(0, val); return;
+ case 0x04000241: GPU::MapVRAM_AB(1, val); return;
+ case 0x04000242: GPU::MapVRAM_CD(2, val); return;
+ case 0x04000243: GPU::MapVRAM_CD(3, val); return;
+ case 0x04000244: GPU::MapVRAM_E(4, val); return;
+ case 0x04000245: GPU::MapVRAM_FG(5, val); return;
+ case 0x04000246: GPU::MapVRAM_FG(6, val); return;
+ case 0x04000247: MapSharedWRAM(val); return;
+ case 0x04000248: GPU::MapVRAM_H(7, val); return;
+ case 0x04000249: GPU::MapVRAM_I(8, val); return;
+
+ case 0x04000300:
+ if (PostFlag9 & 0x01) val |= 0x01;
+ PostFlag9 = val & 0x03;
+ return;
+ }
+
+ if (addr >= 0x04000000 && addr < 0x04000060)
+ {
+ GPU::GPU2D_A->Write8(addr, val);
+ return;
+ }
+ if (addr >= 0x04001000 && addr < 0x04001060)
+ {
+ GPU::GPU2D_B->Write8(addr, val);
+ return;
+ }
+ if (addr >= 0x04000320 && addr < 0x040006A4)
+ {
+ GPU3D::Write8(addr, val);
+ return;
+ }
+
+ printf("unknown ARM9 IO write8 %08X %02X\n", addr, val);
+}
+
+void ARM9IOWrite16(u32 addr, u16 val)
+{
+ switch (addr)
+ {
+ case 0x04000004: GPU::SetDispStat(0, val); return;
+
+ case 0x04000060: GPU3D::Write16(addr, val); return;
+
+ case 0x040000B8: DMAs[0]->WriteCnt((DMAs[0]->Cnt & 0xFFFF0000) | val); return;
+ case 0x040000BA: DMAs[0]->WriteCnt((DMAs[0]->Cnt & 0x0000FFFF) | (val << 16)); return;
+ case 0x040000C4: DMAs[1]->WriteCnt((DMAs[1]->Cnt & 0xFFFF0000) | val); return;
+ case 0x040000C6: DMAs[1]->WriteCnt((DMAs[1]->Cnt & 0x0000FFFF) | (val << 16)); return;
+ case 0x040000D0: DMAs[2]->WriteCnt((DMAs[2]->Cnt & 0xFFFF0000) | val); return;
+ case 0x040000D2: DMAs[2]->WriteCnt((DMAs[2]->Cnt & 0x0000FFFF) | (val << 16)); return;
+ case 0x040000DC: DMAs[3]->WriteCnt((DMAs[3]->Cnt & 0xFFFF0000) | val); return;
+ case 0x040000DE: DMAs[3]->WriteCnt((DMAs[3]->Cnt & 0x0000FFFF) | (val << 16)); return;
+
+ case 0x04000100: Timers[0].Reload = val; return;
+ case 0x04000102: TimerStart(0, val); return;
+ case 0x04000104: Timers[1].Reload = val; return;
+ case 0x04000106: TimerStart(1, val); return;
+ case 0x04000108: Timers[2].Reload = val; return;
+ case 0x0400010A: TimerStart(2, val); return;
+ case 0x0400010C: Timers[3].Reload = val; return;
+ case 0x0400010E: TimerStart(3, val); return;
+
+ case 0x04000180:
+ IPCSync7 &= 0xFFF0;
+ IPCSync7 |= ((val & 0x0F00) >> 8);
+ IPCSync9 &= 0xB0FF;
+ IPCSync9 |= (val & 0x4F00);
+ if ((val & 0x2000) && (IPCSync7 & 0x4000))
+ {
+ SetIRQ(1, IRQ_IPCSync);
+ }
+ //CompensateARM7();
+ return;
+
+ case 0x04000184:
+ if (val & 0x0008)
+ IPCFIFO9->Clear();
+ if ((val & 0x0004) && (!(IPCFIFOCnt9 & 0x0004)) && IPCFIFO9->IsEmpty())
+ SetIRQ(0, IRQ_IPCSendDone);
+ if ((val & 0x0400) && (!(IPCFIFOCnt9 & 0x0400)) && (!IPCFIFO7->IsEmpty()))
+ SetIRQ(0, IRQ_IPCRecv);
+ if (val & 0x4000)
+ IPCFIFOCnt9 &= ~0x4000;
+ IPCFIFOCnt9 = val & 0x8404;
+ return;
+
+ case 0x040001A0:
+ if (!(ExMemCnt[0] & (1<<11))) NDSCart::WriteSPICnt(val);
+ return;
+ case 0x040001A2:
+ NDSCart::WriteSPIData(val & 0xFF);
+ return;
+
+ case 0x040001B8: ROMSeed0[4] = val & 0x7F; return;
+ case 0x040001BA: ROMSeed1[4] = val & 0x7F; return;
+
+ case 0x04000204:
+ ExMemCnt[0] = val;
+ ExMemCnt[1] = (ExMemCnt[1] & 0x007F) | (val & 0xFF80);
+ return;
+
+ case 0x04000208: IME[0] = val & 0x1; return;
+
+ case 0x04000240:
+ GPU::MapVRAM_AB(0, val & 0xFF);
+ GPU::MapVRAM_AB(1, val >> 8);
+ return;
+ case 0x04000242:
+ GPU::MapVRAM_CD(2, val & 0xFF);
+ GPU::MapVRAM_CD(3, val >> 8);
+ return;
+ case 0x04000244:
+ GPU::MapVRAM_E(4, val & 0xFF);
+ GPU::MapVRAM_FG(5, val >> 8);
+ return;
+ case 0x04000246:
+ GPU::MapVRAM_FG(6, val & 0xFF);
+ MapSharedWRAM(val >> 8);
+ return;
+ case 0x04000248:
+ GPU::MapVRAM_H(7, val & 0xFF);
+ GPU::MapVRAM_I(8, val >> 8);
+ return;
+
+ case 0x04000280: DivCnt = val; StartDiv(); return;
+
+ case 0x040002B0: SqrtCnt = val; StartSqrt(); return;
+
+ case 0x04000300:
+ if (PostFlag9 & 0x01) val |= 0x01;
+ PostFlag9 = val & 0x03;
+ return;
+
+ case 0x04000304:
+ PowerControl9 = val;
+ GPU::DisplaySwap(PowerControl9>>15);
+ return;
+ }
+
+ if ((addr >= 0x04000000 && addr < 0x04000060) || (addr == 0x0400006C))
+ {
+ GPU::GPU2D_A->Write16(addr, val);
+ return;
+ }
+ if ((addr >= 0x04001000 && addr < 0x04001060) || (addr == 0x0400106C))
+ {
+ GPU::GPU2D_B->Write16(addr, val);
+ return;
+ }
+ if (addr >= 0x04000320 && addr < 0x040006A4)
+ {
+ GPU3D::Write16(addr, val);
+ return;
+ }
+
+ printf("unknown ARM9 IO write16 %08X %04X %08X\n", addr, val, ARM9->R[14]);
+}
+
+void ARM9IOWrite32(u32 addr, u32 val)
+{
+ switch (addr)
+ {
+ case 0x04000060: GPU3D::Write32(addr, val); return;
+ case 0x04000064: GPU::GPU2D_A->Write32(addr, val); return;
+
+ case 0x040000B0: DMAs[0]->SrcAddr = val; return;
+ case 0x040000B4: DMAs[0]->DstAddr = val; return;
+ case 0x040000B8: DMAs[0]->WriteCnt(val); return;
+ case 0x040000BC: DMAs[1]->SrcAddr = val; return;
+ case 0x040000C0: DMAs[1]->DstAddr = val; return;
+ case 0x040000C4: DMAs[1]->WriteCnt(val); return;
+ case 0x040000C8: DMAs[2]->SrcAddr = val; return;
+ case 0x040000CC: DMAs[2]->DstAddr = val; return;
+ case 0x040000D0: DMAs[2]->WriteCnt(val); return;
+ case 0x040000D4: DMAs[3]->SrcAddr = val; return;
+ case 0x040000D8: DMAs[3]->DstAddr = val; return;
+ case 0x040000DC: DMAs[3]->WriteCnt(val); return;
+
+ case 0x040000E0: DMA9Fill[0] = val; return;
+ case 0x040000E4: DMA9Fill[1] = val; return;
+ case 0x040000E8: DMA9Fill[2] = val; return;
+ case 0x040000EC: DMA9Fill[3] = val; return;
+
+ case 0x04000100:
+ Timers[0].Reload = val & 0xFFFF;
+ TimerStart(0, val>>16);
+ return;
+ case 0x04000104:
+ Timers[1].Reload = val & 0xFFFF;
+ TimerStart(1, val>>16);
+ return;
+ case 0x04000108:
+ Timers[2].Reload = val & 0xFFFF;
+ TimerStart(2, val>>16);
+ return;
+ case 0x0400010C:
+ Timers[3].Reload = val & 0xFFFF;
+ TimerStart(3, val>>16);
+ return;
+
+ case 0x04000188:
+ if (IPCFIFOCnt9 & 0x8000)
+ {
+ if (IPCFIFO9->IsFull())
+ IPCFIFOCnt9 |= 0x4000;
+ else
+ {
+ bool wasempty = IPCFIFO9->IsEmpty();
+ IPCFIFO9->Write(val);
+ if ((IPCFIFOCnt7 & 0x0400) && wasempty)
+ SetIRQ(1, IRQ_IPCRecv);
+ }
+ }
+ return;
+
+ case 0x040001A0:
+ if (!(ExMemCnt[0] & (1<<11)))
+ {
+ NDSCart::WriteSPICnt(val & 0xFFFF);
+ NDSCart::WriteSPIData((val >> 16) & 0xFF);
+ }
+ return;
+ case 0x040001A4:
+ if (!(ExMemCnt[0] & (1<<11))) NDSCart::WriteROMCnt(val);
+ return;
+
+ case 0x040001B0: *(u32*)&ROMSeed0[0] = val; return;
+ case 0x040001B4: *(u32*)&ROMSeed1[0] = val; return;
+
+ case 0x04000208: IME[0] = val & 0x1; return;
+ case 0x04000210: IE[0] = val; return;
+ case 0x04000214: IF[0] &= ~val; GPU3D::CheckFIFOIRQ(); return;
+
+ case 0x04000240:
+ GPU::MapVRAM_AB(0, val & 0xFF);
+ GPU::MapVRAM_AB(1, (val >> 8) & 0xFF);
+ GPU::MapVRAM_CD(2, (val >> 16) & 0xFF);
+ GPU::MapVRAM_CD(3, val >> 24);
+ return;
+ case 0x04000244:
+ GPU::MapVRAM_E(4, val & 0xFF);
+ GPU::MapVRAM_FG(5, (val >> 8) & 0xFF);
+ GPU::MapVRAM_FG(6, (val >> 16) & 0xFF);
+ MapSharedWRAM(val >> 24);
+ return;
+ case 0x04000248:
+ GPU::MapVRAM_H(7, val & 0xFF);
+ GPU::MapVRAM_I(8, (val >> 8) & 0xFF);
+ return;
+
+ case 0x04000290: DivNumerator[0] = val; StartDiv(); return;
+ case 0x04000294: DivNumerator[1] = val; StartDiv(); return;
+ case 0x04000298: DivDenominator[0] = val; StartDiv(); return;
+ case 0x0400029C: DivDenominator[1] = val; StartDiv(); return;
+
+ case 0x040002B8: SqrtVal[0] = val; StartSqrt(); return;
+ case 0x040002BC: SqrtVal[1] = val; StartSqrt(); return;
+ }
+
+ if (addr >= 0x04000000 && addr < 0x04000060)
+ {
+ GPU::GPU2D_A->Write32(addr, val);
+ return;
+ }
+ if (addr >= 0x04001000 && addr < 0x04001060)
+ {
+ GPU::GPU2D_B->Write32(addr, val);
+ return;
+ }
+ if (addr >= 0x04000320 && addr < 0x040006A4)
+ {
+ GPU3D::Write32(addr, val);
+ return;
+ }
+
+ printf("unknown ARM9 IO write32 %08X %08X\n", addr, val);
+}
+
+
+u8 ARM7IORead8(u32 addr)
+{
+ switch (addr)
+ {
+ case 0x04000138: return RTC::Read() & 0xFF;
+
+ case 0x040001A2: return NDSCart::ReadSPIData();
+
+ case 0x040001C2: return SPI::ReadData();
+
+ case 0x04000208: return IME[1];
+
+ case 0x04000240: return GPU::VRAMSTAT;
+ case 0x04000241: return WRAMCnt;
+
+ case 0x04000300: return PostFlag7;
+ }
+
+ if (addr >= 0x04000400 && addr < 0x04000520)
+ {
+ // sound I/O
+ return 0;
+ }
+
+ printf("unknown ARM7 IO read8 %08X\n", addr);
+ return 0;
+}
+
+u16 ARM7IORead16(u32 addr)
+{
+ switch (addr)
+ {
+ case 0x04000004: return GPU::DispStat[1];
+ case 0x04000006: return GPU::VCount;
+
+ case 0x040000B8: return DMAs[4]->Cnt & 0xFFFF;
+ case 0x040000BA: return DMAs[4]->Cnt >> 16;
+ case 0x040000C4: return DMAs[5]->Cnt & 0xFFFF;
+ case 0x040000C6: return DMAs[5]->Cnt >> 16;
+ case 0x040000D0: return DMAs[6]->Cnt & 0xFFFF;
+ case 0x040000D2: return DMAs[6]->Cnt >> 16;
+ case 0x040000DC: return DMAs[7]->Cnt & 0xFFFF;
+ case 0x040000DE: return DMAs[7]->Cnt >> 16;
+
+ case 0x04000100: return TimerGetCounter(4);
+ case 0x04000102: return Timers[4].Cnt;
+ case 0x04000104: return TimerGetCounter(5);
+ case 0x04000106: return Timers[5].Cnt;
+ case 0x04000108: return TimerGetCounter(6);
+ case 0x0400010A: return Timers[6].Cnt;
+ case 0x0400010C: return TimerGetCounter(7);
+ case 0x0400010E: return Timers[7].Cnt;
+
+ case 0x04000130: return KeyInput & 0xFFFF;
+ case 0x04000136: return KeyInput >> 16;
+
+ case 0x04000134: return 0x8000;
+ case 0x04000138: return RTC::Read();
+
+ case 0x04000180: return IPCSync7;
+ case 0x04000184:
+ {
+ u16 val = IPCFIFOCnt7;
+ if (IPCFIFO7->IsEmpty()) val |= 0x0001;
+ else if (IPCFIFO7->IsFull()) val |= 0x0002;
+ if (IPCFIFO9->IsEmpty()) val |= 0x0100;
+ else if (IPCFIFO9->IsFull()) val |= 0x0200;
+ return val;
+ }
+
+ case 0x040001A0: return NDSCart::SPICnt;
+ case 0x040001A2: return NDSCart::ReadSPIData();
+
+ case 0x040001C0: return SPI::Cnt;
+ case 0x040001C2: return SPI::ReadData();
+
+ case 0x04000204: return ExMemCnt[1];
+ case 0x04000208: return IME[1];
+
+ case 0x04000300: return PostFlag7;
+ case 0x04000304: return PowerControl7;
+ case 0x04000308: return ARM7BIOSProt;
+
+ case 0x04000504: return _soundbias;
+ }
+
+ if (addr >= 0x04000400 && addr < 0x04000520)
+ {
+ // sound I/O
+ return 0;
+ }
+
+ printf("unknown ARM7 IO read16 %08X %08X\n", addr, ARM9->R[15]);
+ return 0;
+}
+
+u32 ARM7IORead32(u32 addr)
+{
+ switch (addr)
+ {
+ case 0x04000004: return GPU::DispStat[1] | (GPU::VCount << 16);
+
+ case 0x040000B0: return DMAs[4]->SrcAddr;
+ case 0x040000B4: return DMAs[4]->DstAddr;
+ case 0x040000B8: return DMAs[4]->Cnt;
+ case 0x040000BC: return DMAs[5]->SrcAddr;
+ case 0x040000C0: return DMAs[5]->DstAddr;
+ case 0x040000C4: return DMAs[5]->Cnt;
+ case 0x040000C8: return DMAs[6]->SrcAddr;
+ case 0x040000CC: return DMAs[6]->DstAddr;
+ case 0x040000D0: return DMAs[6]->Cnt;
+ case 0x040000D4: return DMAs[7]->SrcAddr;
+ case 0x040000D8: return DMAs[7]->DstAddr;
+ case 0x040000DC: return DMAs[7]->Cnt;
+
+ case 0x04000100: return TimerGetCounter(4) | (Timers[4].Cnt << 16);
+ case 0x04000104: return TimerGetCounter(5) | (Timers[5].Cnt << 16);
+ case 0x04000108: return TimerGetCounter(6) | (Timers[6].Cnt << 16);
+ case 0x0400010C: return TimerGetCounter(7) | (Timers[7].Cnt << 16);
+
+ case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16);
+ case 0x040001A4: return NDSCart::ROMCnt;
+
+ case 0x040001C0:
+ return SPI::Cnt | (SPI::ReadData() << 16);
+
+ case 0x04000208: return IME[1];
+ case 0x04000210: return IE[1];
+ case 0x04000214: return IF[1];
+
+ case 0x04100000:
+ if (IPCFIFOCnt7 & 0x8000)
+ {
+ u32 ret;
+ if (IPCFIFO9->IsEmpty())
+ {
+ IPCFIFOCnt7 |= 0x4000;
+ ret = IPCFIFO9->Peek();
+ }
+ else
+ {
+ ret = IPCFIFO9->Read();
+
+ if (IPCFIFO9->IsEmpty() && (IPCFIFOCnt9 & 0x0004))
+ SetIRQ(0, IRQ_IPCSendDone);
+ }
+ return ret;
+ }
+ else
+ return IPCFIFO9->Peek();
+
+ case 0x04100010:
+ if (ExMemCnt[0] & (1<<11)) return NDSCart::ReadROMData();
+ return 0;
+ }
+
+ if (addr >= 0x04000400 && addr < 0x04000520)
+ {
+ // sound I/O
+ return 0;
+ }
+
+ printf("unknown ARM7 IO read32 %08X\n", addr);
+ return 0;
+}
+
+void ARM7IOWrite8(u32 addr, u8 val)
+{
+ switch (addr)
+ {
+ case 0x04000138: RTC::Write(val, true); return;
+
+ case 0x040001A0:
+ if (ExMemCnt[0] & (1<<11))
+ {
+ NDSCart::WriteSPICnt((NDSCart::SPICnt & 0xFF00) | val);
+ }
+ return;
+ case 0x040001A1:
+ if (ExMemCnt[0] & (1<<11))
+ {
+ NDSCart::WriteSPICnt((NDSCart::SPICnt & 0x00FF) | (val << 8));
+ }
+ return;
+ case 0x040001A2:
+ NDSCart::WriteSPIData(val);
+ return;
+
+ case 0x040001A8: NDSCart::ROMCommand[0] = val; return;
+ case 0x040001A9: NDSCart::ROMCommand[1] = val; return;
+ case 0x040001AA: NDSCart::ROMCommand[2] = val; return;
+ case 0x040001AB: NDSCart::ROMCommand[3] = val; return;
+ case 0x040001AC: NDSCart::ROMCommand[4] = val; return;
+ case 0x040001AD: NDSCart::ROMCommand[5] = val; return;
+ case 0x040001AE: NDSCart::ROMCommand[6] = val; return;
+ case 0x040001AF: NDSCart::ROMCommand[7] = val; return;
+
+ case 0x040001C2:
+ SPI::WriteData(val);
+ return;
+
+ case 0x04000208: IME[1] = val & 0x1; return;
+
+ case 0x04000300:
+ if (ARM7->R[15] >= 0x4000)
+ return;
+ if (!(PostFlag7 & 0x01))
+ PostFlag7 = val & 0x01;
+ return;
+
+ case 0x04000301:
+ if (val == 0x80) ARM7->Halt(1);
+ return;
+ }
+
+ if (addr >= 0x04000400 && addr < 0x04000520)
+ {
+ // sound I/O
+ return;
+ }
+
+ printf("unknown ARM7 IO write8 %08X %02X\n", addr, val);
+}
+
+void ARM7IOWrite16(u32 addr, u16 val)
+{
+ switch (addr)
+ {
+ case 0x04000004: GPU::SetDispStat(1, val); return;
+
+ case 0x040000B8: DMAs[4]->WriteCnt((DMAs[4]->Cnt & 0xFFFF0000) | val); return;
+ case 0x040000BA: DMAs[4]->WriteCnt((DMAs[4]->Cnt & 0x0000FFFF) | (val << 16)); return;
+ case 0x040000C4: DMAs[5]->WriteCnt((DMAs[5]->Cnt & 0xFFFF0000) | val); return;
+ case 0x040000C6: DMAs[5]->WriteCnt((DMAs[5]->Cnt & 0x0000FFFF) | (val << 16)); return;
+ case 0x040000D0: DMAs[6]->WriteCnt((DMAs[6]->Cnt & 0xFFFF0000) | val); return;
+ case 0x040000D2: DMAs[6]->WriteCnt((DMAs[6]->Cnt & 0x0000FFFF) | (val << 16)); return;
+ case 0x040000DC: DMAs[7]->WriteCnt((DMAs[7]->Cnt & 0xFFFF0000) | val); return;
+ case 0x040000DE: DMAs[7]->WriteCnt((DMAs[7]->Cnt & 0x0000FFFF) | (val << 16)); return;
+
+ case 0x04000100: Timers[4].Reload = val; return;
+ case 0x04000102: TimerStart(4, val); return;
+ case 0x04000104: Timers[5].Reload = val; return;
+ case 0x04000106: TimerStart(5, val); return;
+ case 0x04000108: Timers[6].Reload = val; return;
+ case 0x0400010A: TimerStart(6, val); return;
+ case 0x0400010C: Timers[7].Reload = val; return;
+ case 0x0400010E: TimerStart(7, val); return;
+
+ case 0x04000134: return;printf("set debug port %04X %08X\n", val, ARM7Read32(ARM7->R[13]+4)); return;
+
+ case 0x04000138: RTC::Write(val, false); return;
+
+ case 0x04000180:
+ IPCSync9 &= 0xFFF0;
+ IPCSync9 |= ((val & 0x0F00) >> 8);
+ IPCSync7 &= 0xB0FF;
+ IPCSync7 |= (val & 0x4F00);
+ if ((val & 0x2000) && (IPCSync9 & 0x4000))
+ {
+ SetIRQ(0, IRQ_IPCSync);
+ }
+ return;
+
+ case 0x04000184:
+ if (val & 0x0008)
+ IPCFIFO7->Clear();
+ if ((val & 0x0004) && (!(IPCFIFOCnt7 & 0x0004)) && IPCFIFO7->IsEmpty())
+ SetIRQ(1, IRQ_IPCSendDone);
+ if ((val & 0x0400) && (!(IPCFIFOCnt7 & 0x0400)) && (!IPCFIFO9->IsEmpty()))
+ SetIRQ(1, IRQ_IPCRecv);
+ if (val & 0x4000)
+ IPCFIFOCnt7 &= ~0x4000;
+ IPCFIFOCnt7 = val & 0x8404;
+ return;
+
+ case 0x040001A0:
+ if (ExMemCnt[0] & (1<<11))
+ NDSCart::WriteSPICnt(val);
+ return;
+ case 0x040001A2:
+ NDSCart::WriteSPIData(val & 0xFF);
+ return;
+
+ case 0x040001B8: ROMSeed0[12] = val & 0x7F; return;
+ case 0x040001BA: ROMSeed1[12] = val & 0x7F; return;
+
+ case 0x040001C0:
+ SPI::WriteCnt(val);
+ return;
+ case 0x040001C2:
+ SPI::WriteData(val & 0xFF);
+ return;
+
+ case 0x04000204:
+ ExMemCnt[1] = (ExMemCnt[1] & 0xFF80) | (val & 0x007F);
+ return;
+
+ case 0x04000208: IME[1] = val & 0x1; return;
+
+ case 0x04000300:
+ if (ARM7->R[15] >= 0x4000)
+ return;
+ if (!(PostFlag7 & 0x01))
+ PostFlag7 = val & 0x01;
+ return;
+
+ case 0x04000304: PowerControl7 = val; return;
+
+ case 0x04000308:
+ if (ARM7BIOSProt == 0)
+ ARM7BIOSProt = val;
+ return;
+
+ case 0x04000504: // removeme
+ _soundbias = val & 0x3FF;
+ return;
+ }
+
+ if (addr >= 0x04000400 && addr < 0x04000520)
+ {
+ // sound I/O
+ return;
+ }
+
+ printf("unknown ARM7 IO write16 %08X %04X\n", addr, val);
+}
+
+void ARM7IOWrite32(u32 addr, u32 val)
+{
+ switch (addr)
+ {
+ case 0x040000B0: DMAs[4]->SrcAddr = val; return;
+ case 0x040000B4: DMAs[4]->DstAddr = val; return;
+ case 0x040000B8: DMAs[4]->WriteCnt(val); return;
+ case 0x040000BC: DMAs[5]->SrcAddr = val; return;
+ case 0x040000C0: DMAs[5]->DstAddr = val; return;
+ case 0x040000C4: DMAs[5]->WriteCnt(val); return;
+ case 0x040000C8: DMAs[6]->SrcAddr = val; return;
+ case 0x040000CC: DMAs[6]->DstAddr = val; return;
+ case 0x040000D0: DMAs[6]->WriteCnt(val); return;
+ case 0x040000D4: DMAs[7]->SrcAddr = val; return;
+ case 0x040000D8: DMAs[7]->DstAddr = val; return;
+ case 0x040000DC: DMAs[7]->WriteCnt(val); return;
+
+ case 0x04000100:
+ Timers[4].Reload = val & 0xFFFF;
+ TimerStart(4, val>>16);
+ return;
+ case 0x04000104:
+ Timers[5].Reload = val & 0xFFFF;
+ TimerStart(5, val>>16);
+ return;
+ case 0x04000108:
+ Timers[6].Reload = val & 0xFFFF;
+ TimerStart(6, val>>16);
+ return;
+ case 0x0400010C:
+ Timers[7].Reload = val & 0xFFFF;
+ TimerStart(7, val>>16);
+ return;
+
+ case 0x04000188:
+ if (IPCFIFOCnt7 & 0x8000)
+ {
+ if (IPCFIFO7->IsFull())
+ IPCFIFOCnt7 |= 0x4000;
+ else
+ {
+ bool wasempty = IPCFIFO7->IsEmpty();
+ IPCFIFO7->Write(val);
+ if ((IPCFIFOCnt9 & 0x0400) && wasempty)
+ SetIRQ(0, IRQ_IPCRecv);
+ }
+ }
+ return;
+
+ case 0x040001A0:
+ if (ExMemCnt[0] & (1<<11))
+ {
+ NDSCart::WriteSPICnt(val & 0xFFFF);
+ NDSCart::WriteSPIData((val >> 16) & 0xFF);
+ }
+ return;
+ case 0x040001A4:
+ if (ExMemCnt[0] & (1<<11)) NDSCart::WriteROMCnt(val);
+ return;
+
+ case 0x040001B0: *(u32*)&ROMSeed0[8] = val; return;
+ case 0x040001B4: *(u32*)&ROMSeed1[8] = val; return;
+
+ case 0x04000208: IME[1] = val & 0x1; return;
+ case 0x04000210: IE[1] = val; return;
+ case 0x04000214: IF[1] &= ~val; return;
+ }
+
+ if (addr >= 0x04000400 && addr < 0x04000520)
+ {
+ // sound I/O
+ return;
+ }
+
+ printf("unknown ARM7 IO write32 %08X %08X\n", addr, val);
+}
+
+}
diff --git a/src/NDS.h b/src/NDS.h
new file mode 100644
index 0000000..ed706af
--- /dev/null
+++ b/src/NDS.h
@@ -0,0 +1,181 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef NDS_H
+#define NDS_H
+
+#include "types.h"
+
+namespace NDS
+{
+
+/*#define SCHED_BUF_LEN 64
+
+typedef struct _SchedEvent
+{
+ u32 Delay;
+ void (*Func)(u32);
+ u32 Param;
+ struct _SchedEvent* PrevEvent;
+ struct _SchedEvent* NextEvent;
+
+} SchedEvent;*/
+
+enum
+{
+ Event_LCD = 0,
+
+ Event_Timer9_0,
+ Event_Timer9_1,
+ Event_Timer9_2,
+ Event_Timer9_3,
+ Event_Timer7_0,
+ Event_Timer7_1,
+ Event_Timer7_2,
+ Event_Timer7_3,
+
+ Event_MAX
+};
+
+typedef struct
+{
+ void (*Func)(u32 param);
+ s32 WaitCycles;
+ u32 Param;
+
+} SchedEvent;
+
+enum
+{
+ IRQ_VBlank = 0,
+ IRQ_HBlank,
+ IRQ_VCount,
+ IRQ_Timer0,
+ IRQ_Timer1,
+ IRQ_Timer2,
+ IRQ_Timer3,
+ IRQ_RTC,
+ IRQ_DMA0,
+ IRQ_DMA1,
+ IRQ_DMA2,
+ IRQ_DMA3,
+ IRQ_Keypad,
+ IRQ_GBASlot,
+ IRQ_Unused14,
+ IRQ_Unused15,
+ IRQ_IPCSync,
+ IRQ_IPCSendDone,
+ IRQ_IPCRecv,
+ IRQ_CartSendDone,
+ IRQ_CartIREQMC,
+ IRQ_GXFIFO,
+ IRQ_LidOpen,
+ IRQ_SPI,
+ IRQ_Wifi
+};
+
+typedef struct
+{
+ u16 Reload;
+ u16 Cnt;
+ u32 Counter;
+ u32 CycleShift;
+ //SchedEvent* Event;
+
+} Timer;
+
+// hax
+extern u32 IME[2];
+extern u32 IE[2];
+extern u32 IF[2];
+extern Timer Timers[8];
+
+extern u16 ExMemCnt[2];
+extern u8 ROMSeed0[2*8];
+extern u8 ROMSeed1[2*8];
+
+extern u8 ARM9BIOS[0x1000];
+extern u8 ARM7BIOS[0x4000];
+
+bool Init();
+void DeInit();
+void Reset();
+
+void SetupDirectBoot();
+
+void RunFrame();
+
+void PressKey(u32 key);
+void ReleaseKey(u32 key);
+void TouchScreen(u16 x, u16 y);
+void ReleaseScreen();
+
+/*SchedEvent* ScheduleEvent(s32 Delay, void (*Func)(u32), u32 Param);
+void CancelEvent(SchedEvent* event);
+void RunEvents(s32 cycles);*/
+void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 param);
+void CancelEvent(u32 id);
+
+// DO NOT CALL FROM ARM7!!
+void CompensateARM7();
+
+void debug(u32 p);
+
+void Halt();
+
+void MapSharedWRAM(u8 val);
+
+void SetIRQ(u32 cpu, u32 irq);
+void ClearIRQ(u32 cpu, u32 irq);
+bool HaltInterrupted(u32 cpu);
+void StopCPU(u32 cpu, u32 mask);
+void ResumeCPU(u32 cpu, u32 mask);
+
+void CheckDMAs(u32 cpu, u32 mode);
+
+u8 ARM9Read8(u32 addr);
+u16 ARM9Read16(u32 addr);
+u32 ARM9Read32(u32 addr);
+void ARM9Write8(u32 addr, u8 val);
+void ARM9Write16(u32 addr, u16 val);
+void ARM9Write32(u32 addr, u32 val);
+
+u8 ARM7Read8(u32 addr);
+u16 ARM7Read16(u32 addr);
+u32 ARM7Read32(u32 addr);
+void ARM7Write8(u32 addr, u8 val);
+void ARM7Write16(u32 addr, u16 val);
+void ARM7Write32(u32 addr, u32 val);
+
+u8 ARM9IORead8(u32 addr);
+u16 ARM9IORead16(u32 addr);
+u32 ARM9IORead32(u32 addr);
+void ARM9IOWrite8(u32 addr, u8 val);
+void ARM9IOWrite16(u32 addr, u16 val);
+void ARM9IOWrite32(u32 addr, u32 val);
+
+u8 ARM7IORead8(u32 addr);
+u16 ARM7IORead16(u32 addr);
+u32 ARM7IORead32(u32 addr);
+void ARM7IOWrite8(u32 addr, u8 val);
+void ARM7IOWrite16(u32 addr, u16 val);
+void ARM7IOWrite32(u32 addr, u32 val);
+
+}
+
+#endif // NDS_H
diff --git a/src/NDSCart.cpp b/src/NDSCart.cpp
new file mode 100644
index 0000000..416da26
--- /dev/null
+++ b/src/NDSCart.cpp
@@ -0,0 +1,939 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "NDS.h"
+#include "NDSCart.h"
+
+
+namespace NDSCart_SRAM
+{
+
+u8* SRAM;
+u32 SRAMLength;
+
+char SRAMPath[256];
+
+void (*WriteFunc)(u8 val, bool islast);
+
+u32 Discover_MemoryType;
+u32 Discover_Likeliness;
+u8* Discover_Buffer;
+u32 Discover_DataPos;
+
+u32 Hold;
+u8 CurCmd;
+u32 DataPos;
+u8 Data;
+
+u8 StatusReg;
+u32 Addr;
+
+
+void Write_Null(u8 val, bool islast);
+void Write_EEPROMTiny(u8 val, bool islast);
+void Write_EEPROM(u8 val, bool islast);
+void Write_Flash(u8 val, bool islast);
+void Write_Discover(u8 val, bool islast);
+
+
+bool Init()
+{
+ SRAM = NULL;
+ Discover_Buffer = NULL;
+ return true;
+}
+
+void DeInit()
+{
+ if (SRAM) delete[] SRAM;
+ if (Discover_Buffer) delete[] Discover_Buffer;
+}
+
+void Reset()
+{
+}
+
+void LoadSave(char* path)
+{
+ if (SRAM) delete[] SRAM;
+ if (Discover_Buffer) delete[] Discover_Buffer;
+
+ Discover_Buffer = NULL;
+
+ strncpy(SRAMPath, path, 255);
+ SRAMPath[255] = '\0';
+
+ FILE* f = fopen(path, "rb");
+ if (f)
+ {
+ fseek(f, 0, SEEK_END);
+ SRAMLength = (u32)ftell(f);
+ SRAM = new u8[SRAMLength];
+
+ fseek(f, 0, SEEK_SET);
+ fread(SRAM, SRAMLength, 1, f);
+
+ fclose(f);
+
+ switch (SRAMLength)
+ {
+ case 512: WriteFunc = Write_EEPROMTiny; break;
+ case 8192:
+ case 65536: WriteFunc = Write_EEPROM; break;
+ case 256*1024:
+ case 512*1024:
+ case 1024*1024:
+ case 8192*1024: WriteFunc = Write_Flash; break;
+ default:
+ printf("!! BAD SAVE LENGTH %d\n", SRAMLength);
+ WriteFunc = Write_Null;
+ break;
+ }
+ }
+ else
+ {
+ SRAMLength = 0;
+ WriteFunc = Write_Discover;
+ Discover_MemoryType = 2;
+ Discover_Likeliness = 0;
+
+ Discover_DataPos = 0;
+ Discover_Buffer = new u8[256*1024];
+ memset(Discover_Buffer, 0, 256*1024);
+ }
+
+ Hold = 0;
+ CurCmd = 0;
+ Data = 0;
+ StatusReg = 0x00;
+}
+
+u8 Read()
+{
+ return Data;
+}
+
+void SetMemoryType()
+{
+ switch (Discover_MemoryType)
+ {
+ case 1:
+ printf("Save memory type: EEPROM 4k\n");
+ WriteFunc = Write_EEPROMTiny;
+ SRAMLength = 512;
+ break;
+
+ case 2:
+ printf("Save memory type: EEPROM 64k\n");
+ WriteFunc = Write_EEPROM;
+ SRAMLength = 8192;
+ break;
+
+ case 3:
+ printf("Save memory type: EEPROM 512k\n");
+ WriteFunc = Write_EEPROM;
+ SRAMLength = 65536;
+ break;
+
+ case 4:
+ printf("Save memory type: Flash. Hope the size is 256K.\n");
+ WriteFunc = Write_Flash;
+ SRAMLength = 256*1024;
+ break;
+
+ case 5:
+ printf("Save memory type: ...something else\n");
+ WriteFunc = Write_Null;
+ SRAMLength = 0;
+ break;
+ }
+
+ if (!SRAMLength)
+ return;
+
+ SRAM = new u8[SRAMLength];
+
+ // replay writes that occured during discovery
+ u8 prev_cmd = CurCmd;
+ u32 pos = 0;
+ while (pos < 256*1024)
+ {
+ u32 len = *(u32*)&Discover_Buffer[pos];
+ pos += 4;
+ if (len == 0) break;
+
+ CurCmd = Discover_Buffer[pos++];
+ DataPos = 0;
+ Addr = 0;
+ Data = 0;
+ for (u32 i = 1; i < len; i++)
+ {
+ WriteFunc(Discover_Buffer[pos++], (i==(len-1)));
+ DataPos++;
+ }
+ }
+
+ CurCmd = prev_cmd;
+
+ delete[] Discover_Buffer;
+}
+
+void Write_Discover(u8 val, bool islast)
+{
+ // attempt at autodetecting the type of save memory.
+ // we basically hope the game will be nice and clear whole pages of memory.
+
+ if (CurCmd == 0x03 || CurCmd == 0x0B)
+ {
+ if (Discover_Likeliness)
+ {
+ // apply. and pray.
+ SetMemoryType();
+
+ DataPos = 0;
+ Addr = 0;
+ Data = 0;
+ return WriteFunc(val, islast);
+ }
+ else
+ {
+ Data = 0;
+ return;
+ }
+ }
+
+ if (CurCmd == 0x02 || CurCmd == 0x0A)
+ {
+ if (DataPos == 0)
+ Discover_Buffer[Discover_DataPos + 4] = CurCmd;
+
+ Discover_Buffer[Discover_DataPos + 5 + DataPos] = val;
+
+ if (islast)
+ {
+ u32 len = DataPos+1;
+
+ *(u32*)&Discover_Buffer[Discover_DataPos] = len+1;
+ Discover_DataPos += 5+len;
+
+ if (Discover_Likeliness <= len)
+ {
+ Discover_Likeliness = len;
+
+ if (len > 3+256) // bigger Flash, FRAM, whatever
+ {
+ Discover_MemoryType = 5;
+ }
+ else if (len > 2+128) // Flash
+ {
+ Discover_MemoryType = 4;
+ }
+ else if (len > 2+32) // EEPROM 512k
+ {
+ Discover_MemoryType = 3;
+ }
+ else if (len > 1+16 || (len != 1+16 && CurCmd != 0x0A)) // EEPROM 64k
+ {
+ Discover_MemoryType = 2;
+ }
+ else // EEPROM 4k
+ {
+ Discover_MemoryType = 1;
+ }
+ }
+
+ printf("discover: type=%d likeliness=%d\n", Discover_MemoryType, Discover_Likeliness);
+ }
+ }
+}
+
+void Write_Null(u8 val, bool islast) {}
+
+void Write_EEPROMTiny(u8 val, bool islast)
+{
+ // TODO
+}
+
+void Write_EEPROM(u8 val, bool islast)
+{
+ switch (CurCmd)
+ {
+ case 0x02:
+ if (DataPos < 2)
+ {
+ Addr <<= 8;
+ Addr |= val;
+ Data = 0;
+ }
+ else
+ {
+ SRAM[Addr & (SRAMLength-1)] = val;
+ Addr++;
+ }
+ break;
+
+ case 0x03:
+ if (DataPos < 2)
+ {
+ Addr <<= 8;
+ Addr |= val;
+ Data = 0;
+ }
+ else
+ {
+ Data = SRAM[Addr & (SRAMLength-1)];
+ Addr++;
+ }
+ break;
+
+ case 0x9F:
+ Data = 0xFF;
+ break;
+
+ default:
+ if (DataPos==0)
+ printf("unknown EEPROM save command %02X\n", CurCmd);
+ break;
+ }
+}
+
+void Write_Flash(u8 val, bool islast)
+{
+ switch (CurCmd)
+ {
+ case 0x03:
+ if (DataPos < 3)
+ {
+ Addr <<= 8;
+ Addr |= val;
+ Data = 0;
+ }
+ else
+ {
+ // CHECKME: does Flash also wraparound when the address is out of bounds?
+ if (Addr >= SRAMLength)
+ Data = 0;
+ else
+ Data = SRAM[Addr];
+
+ Addr++;
+ }
+ break;
+
+ case 0x0A:
+ if (DataPos < 3)
+ {
+ Addr <<= 8;
+ Addr |= val;
+ Data = 0;
+ }
+ else
+ {
+ if (Addr < SRAMLength)
+ SRAM[Addr] = val;
+
+ Addr++;
+ }
+ break;
+
+ case 0x9F:
+ Data = 0xFF;
+ break;
+
+ default:
+ if (DataPos==0)
+ printf("unknown Flash save command %02X\n", CurCmd);
+ break;
+ }
+}
+
+void Write(u8 val, u32 hold)
+{
+ bool islast = false;
+
+ if (!hold)
+ {
+ if (Hold) islast = true;
+ Hold = 0;
+ }
+
+ if (hold && (!Hold))
+ {
+ CurCmd = val;
+ Hold = 1;
+ Data = 0;
+ DataPos = 0;
+ Addr = 0;
+ //printf("save SPI command %02X\n", CurCmd);
+ return;
+ }
+
+ switch (CurCmd)
+ {
+ case 0x02:
+ case 0x03:
+ case 0x0A:
+ case 0x0B:
+ case 0x9F:
+ WriteFunc(val, islast);
+ DataPos++;
+ break;
+
+ case 0x04: // write disable
+ StatusReg &= ~(1<<1);
+ Data = 0;
+ break;
+
+ case 0x05: // read status reg
+ Data = StatusReg;
+ break;
+
+ case 0x06: // write enable
+ StatusReg |= (1<<1);
+ Data = 0;
+ break;
+
+ default:
+ if (DataPos==0)
+ printf("unknown save SPI command %02X\n", CurCmd);
+ break;
+ }
+
+ if (islast && (CurCmd == 0x02 || CurCmd == 0x0A))
+ {
+ FILE* f = fopen(SRAMPath, "wb");
+ if (f)
+ {
+ fwrite(SRAM, SRAMLength, 1, f);
+ fclose(f);
+ }
+ }
+}
+
+}
+
+
+namespace NDSCart
+{
+
+u16 SPICnt;
+u32 ROMCnt;
+
+u8 ROMCommand[8];
+u32 ROMDataOut;
+
+u8 DataOut[0x4000];
+u32 DataOutPos;
+u32 DataOutLen;
+
+bool CartInserted;
+u8* CartROM;
+u32 CartROMSize;
+u32 CartID;
+bool CartIsHomebrew;
+
+u32 CmdEncMode;
+u32 DataEncMode;
+
+u32 Key1_KeyBuf[0x412];
+
+u64 Key2_X;
+u64 Key2_Y;
+
+
+u32 ByteSwap(u32 val)
+{
+ return (val >> 24) | ((val >> 8) & 0xFF00) | ((val << 8) & 0xFF0000) | (val << 24);
+}
+
+void Key1_Encrypt(u32* data)
+{
+ u32 y = data[0];
+ u32 x = data[1];
+ u32 z;
+
+ for (u32 i = 0x0; i <= 0xF; i++)
+ {
+ z = Key1_KeyBuf[i] ^ x;
+ x = Key1_KeyBuf[0x012 + (z >> 24) ];
+ x += Key1_KeyBuf[0x112 + ((z >> 16) & 0xFF)];
+ x ^= Key1_KeyBuf[0x212 + ((z >> 8) & 0xFF)];
+ x += Key1_KeyBuf[0x312 + (z & 0xFF)];
+ x ^= y;
+ y = z;
+ }
+
+ data[0] = x ^ Key1_KeyBuf[0x10];
+ data[1] = y ^ Key1_KeyBuf[0x11];
+}
+
+void Key1_Decrypt(u32* data)
+{
+ u32 y = data[0];
+ u32 x = data[1];
+ u32 z;
+
+ for (u32 i = 0x11; i >= 0x2; i--)
+ {
+ z = Key1_KeyBuf[i] ^ x;
+ x = Key1_KeyBuf[0x012 + (z >> 24) ];
+ x += Key1_KeyBuf[0x112 + ((z >> 16) & 0xFF)];
+ x ^= Key1_KeyBuf[0x212 + ((z >> 8) & 0xFF)];
+ x += Key1_KeyBuf[0x312 + (z & 0xFF)];
+ x ^= y;
+ y = z;
+ }
+
+ data[0] = x ^ Key1_KeyBuf[0x1];
+ data[1] = y ^ Key1_KeyBuf[0x0];
+}
+
+void Key1_ApplyKeycode(u32* keycode, u32 mod)
+{
+ Key1_Encrypt(&keycode[1]);
+ Key1_Encrypt(&keycode[0]);
+
+ u32 temp[2] = {0,0};
+
+ for (u32 i = 0; i <= 0x11; i++)
+ {
+ Key1_KeyBuf[i] ^= ByteSwap(keycode[i % mod]);
+ }
+ for (u32 i = 0; i <= 0x410; i+=2)
+ {
+ Key1_Encrypt(temp);
+ Key1_KeyBuf[i ] = temp[1];
+ Key1_KeyBuf[i+1] = temp[0];
+ }
+}
+
+void Key1_InitKeycode(u32 idcode, u32 level, u32 mod)
+{
+ memcpy(Key1_KeyBuf, &NDS::ARM7BIOS[0x30], 0x1048); // hax
+
+ u32 keycode[3] = {idcode, idcode>>1, idcode<<1};
+ if (level >= 1) Key1_ApplyKeycode(keycode, mod);
+ if (level >= 2) Key1_ApplyKeycode(keycode, mod);
+ if (level >= 3)
+ {
+ keycode[1] <<= 1;
+ keycode[2] >>= 1;
+ Key1_ApplyKeycode(keycode, mod);
+ }
+}
+
+
+void Key2_Encrypt(u8* data, u32 len)
+{
+ for (u32 i = 0; i < len; i++)
+ {
+ Key2_X = (((Key2_X >> 5) ^
+ (Key2_X >> 17) ^
+ (Key2_X >> 18) ^
+ (Key2_X >> 31)) & 0xFF)
+ + (Key2_X << 8);
+ Key2_Y = (((Key2_Y >> 5) ^
+ (Key2_Y >> 23) ^
+ (Key2_Y >> 18) ^
+ (Key2_Y >> 31)) & 0xFF)
+ + (Key2_Y << 8);
+
+ Key2_X &= 0x0000007FFFFFFFFFULL;
+ Key2_Y &= 0x0000007FFFFFFFFFULL;
+ }
+}
+
+
+bool Init()
+{
+ if (!NDSCart_SRAM::Init()) return false;
+
+ return true;
+}
+
+void DeInit()
+{
+ NDSCart_SRAM::DeInit();
+}
+
+void Reset()
+{
+ SPICnt = 0;
+ ROMCnt = 0;
+
+ memset(ROMCommand, 0, 8);
+ ROMDataOut = 0;
+
+ Key2_X = 0;
+ Key2_Y = 0;
+
+ memset(DataOut, 0, 0x4000);
+ DataOutPos = 0;
+ DataOutLen = 0;
+
+ CartInserted = false;
+ CartROM = NULL;
+ CartROMSize = 0;
+ CartID = 0;
+ CartIsHomebrew = false;
+
+ CmdEncMode = 0;
+ DataEncMode = 0;
+
+ NDSCart_SRAM::Reset();
+}
+
+
+bool LoadROM(char* path)
+{
+ // TODO: streaming mode? for really big ROMs or systems with limited RAM
+ // for now we're lazy
+
+ FILE* f = fopen(path, "rb");
+ if (!f)
+ {
+ printf("Failed to open ROM file %s\n", path);
+ return false;
+ }
+
+ fseek(f, 0, SEEK_END);
+ u32 len = (u32)ftell(f);
+
+ CartROMSize = 0x200;
+ while (CartROMSize < len)
+ CartROMSize <<= 1;
+
+ u32 gamecode;
+ fseek(f, 0x0C, SEEK_SET);
+ fread(&gamecode, 4, 1, f);
+
+ CartROM = new u8[CartROMSize];
+ memset(CartROM, 0, CartROMSize);
+ fseek(f, 0, SEEK_SET);
+ fread(CartROM, 1, len, f);
+
+ fclose(f);
+ //CartROM = f;
+
+ // temp. TODO: later make this user selectable
+ // calling this sets up shit for booting from the cart directly.
+ // normal behavior is booting from the BIOS.
+ NDS::SetupDirectBoot();
+
+ CartInserted = true;
+
+ // generate a ROM ID
+ // note: most games don't check the actual value
+ // it just has to stay the same throughout gameplay
+ CartID = 0x00001FC2;
+
+ u32 arm9base = *(u32*)&CartROM[0x20];
+ if (arm9base < 0x8000)
+ {
+ if (arm9base >= 0x4000)
+ {
+ // reencrypt secure area if needed
+ if (*(u32*)&CartROM[arm9base] == 0xE7FFDEFF)
+ {
+ printf("Re-encrypting cart secure area\n");
+
+ strncpy((char*)&CartROM[arm9base], "encryObj", 8);
+
+ Key1_InitKeycode(gamecode, 3, 2);
+ for (u32 i = 0; i < 0x800; i += 8)
+ Key1_Encrypt((u32*)&CartROM[arm9base + i]);
+
+ Key1_InitKeycode(gamecode, 2, 2);
+ Key1_Encrypt((u32*)&CartROM[arm9base]);
+ }
+ }
+ else
+ CartIsHomebrew = true;
+ }
+
+ // encryption
+ Key1_InitKeycode(gamecode, 2, 2);
+
+
+ // save
+ char savepath[256];
+ strncpy(savepath, path, 255);
+ savepath[255] = '\0';
+ strncpy(savepath + strlen(path) - 3, "sav", 3);
+ printf("Save file: %s\n", savepath);
+ NDSCart_SRAM::LoadSave(savepath);
+
+ return true;
+}
+
+void ReadROM(u32 addr, u32 len, u32 offset)
+{
+ if (!CartInserted) return;
+
+ if (addr >= CartROMSize) return;
+ if ((addr+len) > CartROMSize)
+ len = CartROMSize - addr;
+
+ memcpy(DataOut+offset, CartROM+addr, len);
+}
+
+void ReadROM_B7(u32 addr, u32 len, u32 offset)
+{
+ addr &= (CartROMSize-1);
+ if (!CartIsHomebrew)
+ {
+ if (addr < 0x8000)
+ addr = 0x8000 + (addr & 0x1FF);
+ }
+
+ memcpy(DataOut+offset, CartROM+addr, len);
+}
+
+
+void EndTransfer()
+{
+ ROMCnt &= ~(1<<23);
+ ROMCnt &= ~(1<<31);
+
+ if (SPICnt & (1<<14))
+ NDS::SetIRQ((NDS::ExMemCnt[0]>>11)&0x1, NDS::IRQ_CartSendDone);
+}
+
+void ROMPrepareData(u32 param)
+{
+ if (DataOutPos >= DataOutLen)
+ ROMDataOut = 0;
+ else
+ ROMDataOut = *(u32*)&DataOut[DataOutPos];
+
+ DataOutPos += 4;
+
+ ROMCnt |= (1<<23);
+ NDS::CheckDMAs(0, 0x06);
+ NDS::CheckDMAs(1, 0x12);
+
+ //if (DataOutPos < DataOutLen)
+ // NDS::ScheduleEvent((ROMCnt & (1<<27)) ? 8:5, ROMPrepareData, 0);
+}
+
+void WriteROMCnt(u32 val)
+{
+ ROMCnt = val & 0xFF7F7FFF;
+
+ if (!(SPICnt & (1<<15))) return;
+
+ if (val & (1<<15))
+ {
+ u32 snum = (NDS::ExMemCnt[0]>>8)&0x8;
+ u64 seed0 = *(u32*)&NDS::ROMSeed0[snum] | ((u64)NDS::ROMSeed0[snum+4] << 32);
+ u64 seed1 = *(u32*)&NDS::ROMSeed1[snum] | ((u64)NDS::ROMSeed1[snum+4] << 32);
+
+ Key2_X = 0;
+ Key2_Y = 0;
+ for (u32 i = 0; i < 39; i++)
+ {
+ if (seed0 & (1ULL << i)) Key2_X |= (1ULL << (38-i));
+ if (seed1 & (1ULL << i)) Key2_Y |= (1ULL << (38-i));
+ }
+
+ printf("seed0: %02X%08X\n", (u32)(seed0>>32), (u32)seed0);
+ printf("seed1: %02X%08X\n", (u32)(seed1>>32), (u32)seed1);
+ printf("key2 X: %02X%08X\n", (u32)(Key2_X>>32), (u32)Key2_X);
+ printf("key2 Y: %02X%08X\n", (u32)(Key2_Y>>32), (u32)Key2_Y);
+ }
+
+ if (!(ROMCnt & (1<<31))) return;
+
+ u32 datasize = (ROMCnt >> 24) & 0x7;
+ if (datasize == 7)
+ datasize = 4;
+ else if (datasize > 0)
+ datasize = 0x100 << datasize;
+
+ DataOutPos = 0;
+ DataOutLen = datasize;
+
+ // handle KEY1 encryption as needed.
+ // KEY2 encryption is implemented in hardware and doesn't need to be handled.
+ u8 cmd[8];
+ if (CmdEncMode == 1)
+ {
+ *(u32*)&cmd[0] = ByteSwap(*(u32*)&ROMCommand[4]);
+ *(u32*)&cmd[4] = ByteSwap(*(u32*)&ROMCommand[0]);
+ Key1_Decrypt((u32*)cmd);
+ u32 tmp = ByteSwap(*(u32*)&cmd[4]);
+ *(u32*)&cmd[4] = ByteSwap(*(u32*)&cmd[0]);
+ *(u32*)&cmd[0] = tmp;
+ }
+ else
+ {
+ *(u32*)&cmd[0] = *(u32*)&ROMCommand[0];
+ *(u32*)&cmd[4] = *(u32*)&ROMCommand[4];
+ }
+
+ /*printf("ROM COMMAND %04X %08X %02X%02X%02X%02X%02X%02X%02X%02X SIZE %04X\n",
+ SPICnt, ROMCnt,
+ cmd[0], cmd[1], cmd[2], cmd[3],
+ cmd[4], cmd[5], cmd[6], cmd[7],
+ datasize);*/
+
+ switch (cmd[0])
+ {
+ case 0x9F:
+ memset(DataOut, 0xFF, DataOutLen);
+ break;
+
+ case 0x00:
+ memset(DataOut, 0, DataOutLen);
+ if (DataOutLen > 0x1000)
+ {
+ ReadROM(0, 0x1000, 0);
+ for (u32 pos = 0x1000; pos < DataOutLen; pos += 0x1000)
+ memcpy(DataOut+pos, DataOut, 0x1000);
+ }
+ else
+ ReadROM(0, DataOutLen, 0);
+ break;
+
+ case 0x90:
+ case 0xB8:
+ for (u32 pos = 0; pos < DataOutLen; pos += 4)
+ *(u32*)&DataOut[pos] = CartID;
+ break;
+
+ case 0x3C:
+ CmdEncMode = 1;
+ break;
+
+ case 0xB7:
+ {
+ u32 addr = (cmd[1]<<24) | (cmd[2]<<16) | (cmd[3]<<8) | cmd[4];
+ memset(DataOut, 0, DataOutLen);
+
+ if (((addr + DataOutLen - 1) >> 12) != (addr >> 12))
+ {
+ u32 len1 = 0x1000 - (addr & 0xFFF);
+ ReadROM_B7(addr, len1, 0);
+ ReadROM_B7(addr+len1, DataOutLen-len1, len1);
+ }
+ else
+ ReadROM_B7(addr, DataOutLen, 0);
+ }
+ break;
+
+ default:
+ switch (cmd[0] & 0xF0)
+ {
+ case 0x40:
+ DataEncMode = 2;
+ break;
+
+ case 0x10:
+ for (u32 pos = 0; pos < DataOutLen; pos += 4)
+ *(u32*)&DataOut[pos] = CartID;
+ break;
+
+ case 0x20:
+ {
+ u32 addr = (cmd[2] & 0xF0) << 8;
+ ReadROM(addr, 0x1000, 0);
+ }
+ break;
+
+ case 0xA0:
+ CmdEncMode = 2;
+ break;
+ }
+ break;
+ }
+
+ //ROMCnt &= ~(1<<23);
+ ROMCnt |= (1<<23);
+
+ if (datasize == 0)
+ EndTransfer();
+ else
+ {
+ NDS::CheckDMAs(0, 0x05);
+ NDS::CheckDMAs(1, 0x12);
+ }
+ //NDS::ScheduleEvent((ROMCnt & (1<<27)) ? 8:5, ROMPrepareData, 0);
+}
+
+u32 ReadROMData()
+{
+ /*if (ROMCnt & (1<<23))
+ {
+ ROMCnt &= ~(1<<23);
+ if (DataOutPos >= DataOutLen)
+ EndTransfer();
+ }
+
+ return ROMDataOut;*/
+ u32 ret;
+ if (DataOutPos >= DataOutLen)
+ ret = 0;
+ else
+ ret = *(u32*)&DataOut[DataOutPos];
+
+ DataOutPos += 4;
+
+ if (DataOutPos == DataOutLen)
+ EndTransfer();
+
+ return ret;
+}
+
+void DMA(u32 addr)
+{
+ void (*writefn)(u32,u32) = (NDS::ExMemCnt[0] & (1<<11)) ? NDS::ARM7Write32 : NDS::ARM9Write32;
+ for (u32 i = 0; i < DataOutLen; i+=4)
+ {
+ writefn(addr+i, *(u32*)&DataOut[i]);
+ }
+
+ EndTransfer();
+}
+
+
+void WriteSPICnt(u16 val)
+{
+ SPICnt = (SPICnt & 0x0080) | (val & 0xE043);
+}
+
+u8 ReadSPIData()
+{
+ if (!(SPICnt & (1<<15))) return 0;
+ if (!(SPICnt & (1<<13))) return 0;
+
+ return NDSCart_SRAM::Read();
+}
+
+void WriteSPIData(u8 val)
+{
+ if (!(SPICnt & (1<<15))) return;
+ if (!(SPICnt & (1<<13))) return;
+
+ // TODO: take delays into account
+
+ NDSCart_SRAM::Write(val, SPICnt&(1<<6));
+}
+
+}
diff --git a/src/NDSCart.h b/src/NDSCart.h
new file mode 100644
index 0000000..61dd11a
--- /dev/null
+++ b/src/NDSCart.h
@@ -0,0 +1,55 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef NDSCART_H
+#define NDSCART_H
+
+#include "types.h"
+
+namespace NDSCart
+{
+
+extern u16 SPICnt;
+extern u32 ROMCnt;
+
+extern u8 ROMCommand[8];
+extern u32 ROMDataOut;
+
+extern u8 EncSeed0[5];
+extern u8 EncSeed1[5];
+
+extern u8* CartROM;
+extern u32 CartROMSize;
+
+bool Init();
+void DeInit();
+void Reset();
+
+bool LoadROM(char* path);
+
+void WriteROMCnt(u32 val);
+u32 ReadROMData();
+void DMA(u32 addr);
+
+void WriteSPICnt(u16 val);
+u8 ReadSPIData();
+void WriteSPIData(u8 val);
+
+}
+
+#endif
diff --git a/src/RTC.cpp b/src/RTC.cpp
new file mode 100644
index 0000000..842fdae
--- /dev/null
+++ b/src/RTC.cpp
@@ -0,0 +1,255 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "RTC.h"
+
+
+namespace RTC
+{
+
+u16 IO;
+
+u8 Input;
+u32 InputBit;
+u32 InputPos;
+
+u8 Output[8];
+u32 OutputBit;
+u32 OutputPos;
+
+u8 CurCmd;
+
+u8 StatusReg1;
+u8 StatusReg2;
+u8 Alarm1[3];
+u8 Alarm2[3];
+u8 ClockAdjust;
+u8 FreeReg;
+
+
+bool Init()
+{
+ return true;
+}
+
+void DeInit()
+{
+}
+
+void Reset()
+{
+ Input = 0;
+ InputBit = 0;
+ InputPos = 0;
+
+ memset(Output, 0, sizeof(Output));
+ OutputPos = 0;
+
+ CurCmd = 0;
+
+ StatusReg1 = 0;
+ StatusReg2 = 0;
+ memset(Alarm1, 0, sizeof(Alarm1));
+ memset(Alarm2, 0, sizeof(Alarm2));
+ ClockAdjust = 0;
+ FreeReg = 0;
+}
+
+
+void ByteIn(u8 val)
+{
+ //printf("RTC IN: %02X\n", val);
+ if (InputPos == 0)
+ {
+ if ((val & 0xF0) == 0x60)
+ {
+ u8 rev[16] = {0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6};
+ CurCmd = rev[val & 0xF];
+ }
+ else
+ CurCmd = val;
+
+ if (CurCmd & 0x80)
+ {
+ switch (CurCmd & 0x70)
+ {
+ case 0x00: Output[0] = StatusReg1; break;
+ case 0x40: Output[0] = StatusReg2; break;
+
+ case 0x20:
+ // TODO: get actual system time
+ Output[0] = 0x17;
+ Output[1] = 0x01;
+ Output[2] = 0x19;
+ Output[3] = 0x04; // day of week. checkme. apparently 04=Thursday
+ Output[4] = 0x06;
+ Output[5] = 0x30;
+ Output[6] = 0x30;
+ break;
+
+ case 0x60:
+ // TODO: get actual system time
+ Output[0] = 0x06;
+ Output[1] = 0x30;
+ Output[2] = 0x30;
+ break;
+
+ case 0x10:
+ if (StatusReg2 & 0x04)
+ {
+ Output[0] = Alarm1[0];
+ Output[1] = Alarm1[1];
+ Output[2] = Alarm1[2];
+ }
+ else
+ Output[0] = Alarm1[2];
+ break;
+
+ case 0x50:
+ Output[0] = Alarm2[0];
+ Output[1] = Alarm2[1];
+ Output[2] = Alarm2[2];
+ break;
+
+ case 0x30: Output[0] = ClockAdjust; break;
+ case 0x70: Output[0] = FreeReg; break;
+ }
+ }
+ InputPos++;
+ return;
+ }
+
+ switch (CurCmd & 0x70)
+ {
+ case 0x00:
+ if (InputPos == 1) StatusReg1 = val & 0x0E;
+ break;
+
+ case 0x40:
+ if (InputPos == 1) StatusReg2 = val;
+ if (StatusReg2 & 0x4F) printf("RTC INTERRUPT ON: %02X\n", StatusReg2);
+ break;
+
+ case 0x20:
+ // TODO: set time somehow??
+ break;
+
+ case 0x60:
+ // same shit
+ break;
+
+ case 0x10:
+ if (StatusReg2 & 0x04)
+ {
+ if (InputPos <= 3) Alarm1[InputPos-1] = val;
+ }
+ else
+ {
+ if (InputPos == 1) Alarm1[2] = val;
+ }
+ break;
+
+ case 0x50:
+ if (InputPos <= 3) Alarm2[InputPos-1] = val;
+ break;
+
+ case 0x30:
+ if (InputPos == 1) ClockAdjust = val;
+ break;
+
+ case 0x70:
+ if (InputPos == 1) FreeReg = val;
+ break;
+ }
+
+ InputPos++;
+}
+
+
+u16 Read()
+{
+ //printf("RTC READ %04X\n", IO);
+ return IO;
+}
+
+void Write(u16 val, bool byte)
+{
+ if (byte) val |= (IO & 0xFF00);
+
+ //printf("RTC WRITE %04X\n", val);
+ if (val & 0x0004)
+ {
+ if (!(IO & 0x0004))
+ {
+ // start transfer
+ Input = 0;
+ InputBit = 0;
+ InputPos = 0;
+
+ memset(Output, 0, sizeof(Output));
+ OutputBit = 0;
+ OutputPos = 0;
+ }
+ else
+ {
+ if (!(val & 0x0002)) // clock low
+ {
+ if (val & 0x0010)
+ {
+ // write
+ if (val & 0x0001)
+ Input |= (1<<InputBit);
+
+ InputBit++;
+ if (InputBit >= 8)
+ {
+ InputBit = 0;
+ ByteIn(Input);
+ Input = 0;
+ InputPos++;
+ }
+ }
+ else
+ {
+ // read
+ if (Output[OutputPos] & (1<<OutputBit))
+ IO |= 0x0001;
+ else
+ IO &= 0xFFFE;
+
+ OutputBit++;
+ if (OutputBit >= 8)
+ {
+ OutputBit = 0;
+ if (OutputPos < 7)
+ OutputPos++;
+ }
+ }
+ }
+ }
+ }
+
+ if (val & 0x0010)
+ IO = val;
+ else
+ IO = (IO & 0x0001) | (val & 0xFFFE);
+}
+
+}
diff --git a/src/RTC.h b/src/RTC.h
new file mode 100644
index 0000000..6ada5c1
--- /dev/null
+++ b/src/RTC.h
@@ -0,0 +1,36 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef RTC_H
+#define RTC_H
+
+#include "types.h"
+
+namespace RTC
+{
+
+bool Init();
+void DeInit();
+void Reset();
+
+u16 Read();
+void Write(u16 val, bool byte);
+
+}
+
+#endif
diff --git a/src/SPI.cpp b/src/SPI.cpp
new file mode 100644
index 0000000..13ab2ab
--- /dev/null
+++ b/src/SPI.cpp
@@ -0,0 +1,457 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "NDS.h"
+#include "SPI.h"
+
+
+namespace SPI_Firmware
+{
+
+u8* Firmware;
+u32 FirmwareLength;
+
+u32 Hold;
+u8 CurCmd;
+u32 DataPos;
+u8 Data;
+
+u8 StatusReg;
+u32 Addr;
+
+
+u16 CRC16(u8* data, u32 len, u32 start)
+{
+ u16 blarg[8] = {0xC0C1, 0xC181, 0xC301, 0xC601, 0xCC01, 0xD801, 0xF001, 0xA001};
+
+ for (u32 i = 0; i < len; i++)
+ {
+ start ^= data[i];
+
+ for (int j = 0; j < 8; j++)
+ {
+ if (start & 0x1)
+ {
+ start >>= 1;
+ start ^= (blarg[j] << (7-j));
+ }
+ else
+ start >>= 1;
+ }
+ }
+
+ return start & 0xFFFF;
+}
+
+bool VerifyCRC16(u32 start, u32 offset, u32 len, u32 crcoffset)
+{
+ u16 crc_stored = *(u16*)&Firmware[crcoffset];
+ u16 crc_calced = CRC16(&Firmware[offset], len, start);
+ //printf("%04X vs %04X\n", crc_stored, crc_calced);
+ return (crc_stored == crc_calced);
+}
+
+
+bool Init()
+{
+ Firmware = NULL;
+ return true;
+}
+
+void DeInit()
+{
+ if (Firmware) delete[] Firmware;
+}
+
+void Reset()
+{
+ if (Firmware) delete[] Firmware;
+ Firmware = NULL;
+
+ FILE* f = fopen("firmware.bin", "rb");
+ if (!f)
+ {
+ printf("firmware.bin not found\n");
+
+ // TODO: generate default firmware
+ return;
+ }
+
+ fseek(f, 0, SEEK_END);
+ FirmwareLength = (u32)ftell(f);
+ Firmware = new u8[FirmwareLength];
+
+ fseek(f, 0, SEEK_SET);
+ fread(Firmware, FirmwareLength, 1, f);
+
+ fclose(f);
+
+ u32 userdata = 0x3FE00;
+ if (*(u16*)&Firmware[0x3FF70] == ((*(u16*)&Firmware[0x3FE70] + 1) & 0x7F))
+ {
+ if (VerifyCRC16(0xFFFF, 0x3FF00, 0x70, 0x3FF72))
+ userdata = 0x3FF00;
+ }
+
+ // fix touchscreen coords
+ *(u16*)&Firmware[userdata+0x58] = 0;
+ *(u16*)&Firmware[userdata+0x5A] = 0;
+ Firmware[userdata+0x5C] = 1;
+ Firmware[userdata+0x5D] = 1;
+ *(u16*)&Firmware[userdata+0x5E] = 254<<4;
+ *(u16*)&Firmware[userdata+0x60] = 190<<4;
+ Firmware[userdata+0x62] = 255;
+ Firmware[userdata+0x63] = 191;
+
+ // disable autoboot
+ //Firmware[userdata+0x64] &= 0xBF;
+
+ *(u16*)&Firmware[userdata+0x72] = CRC16(&Firmware[userdata], 0x70, 0xFFFF);
+
+ // verify shit
+ printf("FW: WIFI CRC16 = %s\n", VerifyCRC16(0x0000, 0x2C, *(u16*)&Firmware[0x2C], 0x2A)?"GOOD":"BAD");
+ printf("FW: AP1 CRC16 = %s\n", VerifyCRC16(0x0000, 0x3FA00, 0xFE, 0x3FAFE)?"GOOD":"BAD");
+ printf("FW: AP2 CRC16 = %s\n", VerifyCRC16(0x0000, 0x3FB00, 0xFE, 0x3FBFE)?"GOOD":"BAD");
+ printf("FW: AP3 CRC16 = %s\n", VerifyCRC16(0x0000, 0x3FC00, 0xFE, 0x3FCFE)?"GOOD":"BAD");
+ printf("FW: USER0 CRC16 = %s\n", VerifyCRC16(0xFFFF, 0x3FE00, 0x70, 0x3FE72)?"GOOD":"BAD");
+ printf("FW: USER1 CRC16 = %s\n", VerifyCRC16(0xFFFF, 0x3FF00, 0x70, 0x3FF72)?"GOOD":"BAD");
+
+ Hold = 0;
+ CurCmd = 0;
+ Data = 0;
+ StatusReg = 0x00;
+}
+
+u8 Read()
+{
+ return Data;
+}
+
+void Write(u8 val, u32 hold)
+{
+ if (!hold)
+ {
+ Hold = 0;
+ }
+
+ if (hold && (!Hold))
+ {
+ CurCmd = val;
+ Hold = 1;
+ Data = 0;
+ DataPos = 1;
+ Addr = 0;
+ //printf("firmware SPI command %02X\n", CurCmd);
+ return;
+ }
+
+ switch (CurCmd)
+ {
+ case 0x03: // read
+ {
+ if (DataPos < 4)
+ {
+ Addr <<= 8;
+ Addr |= val;
+ Data = 0;
+
+ //if (DataPos == 3) printf("firmware SPI read %08X\n", Addr);
+ }
+ else
+ {
+ if (Addr >= FirmwareLength)
+ Data = 0;
+ else
+ Data = Firmware[Addr];
+
+ Addr++;
+ }
+
+ DataPos++;
+ }
+ break;
+
+ case 0x04: // write disable
+ StatusReg &= ~(1<<1);
+ Data = 0;
+ break;
+
+ case 0x05: // read status reg
+ Data = StatusReg;
+ break;
+
+ case 0x06: // write enable
+ StatusReg |= (1<<1);
+ Data = 0;
+ break;
+
+ case 0x9F: // read JEDEC ID
+ {
+ switch (DataPos)
+ {
+ case 1: Data = 0x20; break;
+ case 2: Data = 0x40; break;
+ case 3: Data = 0x12; break;
+ default: Data = 0; break;
+ }
+ DataPos++;
+ }
+ break;
+
+ default:
+ printf("unknown firmware SPI command %02X\n", CurCmd);
+ break;
+ }
+}
+
+}
+
+namespace SPI_Powerman
+{
+
+u32 Hold;
+u32 DataPos;
+u8 Index;
+u8 Data;
+
+u8 Registers[8];
+u8 RegMasks[8];
+
+
+bool Init()
+{
+ return true;
+}
+
+void DeInit()
+{
+}
+
+void Reset()
+{
+ Hold = 0;
+ Index = 0;
+ Data = 0;
+
+ memset(Registers, 0, sizeof(Registers));
+ memset(RegMasks, 0, sizeof(RegMasks));
+
+ Registers[4] = 0x40;
+
+ RegMasks[0] = 0x7F;
+ RegMasks[1] = 0x01;
+ RegMasks[2] = 0x01;
+ RegMasks[3] = 0x03;
+ RegMasks[4] = 0x0F;
+}
+
+u8 Read()
+{
+ return Data;
+}
+
+void Write(u8 val, u32 hold)
+{
+ if (!hold)
+ {
+ Hold = 0;
+ }
+
+ if (hold && (!Hold))
+ {
+ Index = val;
+ Hold = 1;
+ Data = 0;
+ DataPos = 1;
+ return;
+ }
+
+ if (DataPos == 1)
+ {
+ if (Index & 0x80)
+ {
+ Data = Registers[Index & 0x07];
+ }
+ else
+ {
+ Registers[Index & 0x07] =
+ (Registers[Index & 0x07] & ~RegMasks[Index & 0x07]) |
+ (val & RegMasks[Index & 0x07]);
+ }
+ }
+ else
+ Data = 0;
+}
+
+}
+
+
+namespace SPI_TSC
+{
+
+u32 DataPos;
+u8 ControlByte;
+u8 Data;
+
+u16 ConvResult;
+
+u16 TouchX, TouchY;
+
+
+bool Init()
+{
+ return true;
+}
+
+void DeInit()
+{
+}
+
+void Reset()
+{
+ ControlByte = 0;
+ Data = 0;
+
+ ConvResult = 0;
+}
+
+void SetTouchCoords(u16 x, u16 y)
+{
+ // scr.x = (adc.x-adc.x1) * (scr.x2-scr.x1) / (adc.x2-adc.x1) + (scr.x1-1)
+ // scr.y = (adc.y-adc.y1) * (scr.y2-scr.y1) / (adc.y2-adc.y1) + (scr.y1-1)
+ // adc.x = ((scr.x * ((adc.x2-adc.x1) + (scr.x1-1))) / (scr.x2-scr.x1)) + adc.x1
+ // adc.y = ((scr.y * ((adc.y2-adc.y1) + (scr.y1-1))) / (scr.y2-scr.y1)) + adc.y1
+ TouchX = x;
+ TouchY = y;
+
+ if (y == 0xFFF) return;
+
+ TouchX <<= 4;
+ TouchY <<= 4;
+}
+
+u8 Read()
+{
+ return Data;
+}
+
+void Write(u8 val, u32 hold)
+{
+ if (DataPos == 1)
+ Data = (ConvResult >> 5) & 0xFF;
+ else if (DataPos == 2)
+ Data = (ConvResult << 3) & 0xFF;
+ else
+ Data = 0;
+
+ if (val & 0x80)
+ {
+ ControlByte = val;
+ DataPos = 1;
+
+ switch (ControlByte & 0x70)
+ {
+ case 0x10: ConvResult = TouchY; break;
+ case 0x50: ConvResult = TouchX; break;
+ default: ConvResult = 0xFFF; break;
+ }
+
+ if (ControlByte & 0x08)
+ ConvResult &= 0x0FF0; // checkme
+ }
+ else
+ DataPos++;
+}
+
+}
+
+
+namespace SPI
+{
+
+u16 Cnt;
+
+u32 CurDevice;
+
+
+bool Init()
+{
+ if (!SPI_Firmware::Init()) return false;
+ if (!SPI_Powerman::Init()) return false;
+ if (!SPI_TSC::Init()) return false;
+
+ return true;
+}
+
+void DeInit()
+{
+ SPI_Firmware::DeInit();
+ SPI_Powerman::DeInit();
+ SPI_TSC::DeInit();
+}
+
+void Reset()
+{
+ Cnt = 0;
+
+ SPI_Firmware::Reset();
+ SPI_Powerman::Reset();
+ SPI_TSC::Init();
+}
+
+
+void WriteCnt(u16 val)
+{
+ Cnt = (Cnt & 0x0080) | (val & 0xCF03);
+ if (val & 0x0400) printf("!! CRAPOED 16BIT SPI MODE\n");
+}
+
+u8 ReadData()
+{
+ if (!(Cnt & (1<<15))) return 0;
+
+ switch (Cnt & 0x0300)
+ {
+ case 0x0000: return SPI_Powerman::Read();
+ case 0x0100: return SPI_Firmware::Read();
+ case 0x0200: return SPI_TSC::Read();
+ default: return 0;
+ }
+}
+
+void WriteData(u8 val)
+{
+ if (!(Cnt & (1<<15))) return;
+
+ // TODO: take delays into account
+
+ switch (Cnt & 0x0300)
+ {
+ case 0x0000: SPI_Powerman::Write(val, Cnt&(1<<11)); break;
+ case 0x0100: SPI_Firmware::Write(val, Cnt&(1<<11)); break;
+ case 0x0200: SPI_TSC::Write(val, Cnt&(1<<11)); break;
+ default: printf("SPI to unknown device %04X %02X\n", Cnt, val); break;
+ }
+
+ if (Cnt & (1<<14))
+ NDS::SetIRQ(1, NDS::IRQ_SPI);
+}
+
+}
diff --git a/src/SPI.h b/src/SPI.h
new file mode 100644
index 0000000..73a4180
--- /dev/null
+++ b/src/SPI.h
@@ -0,0 +1,46 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef SPI_H
+#define SPI_H
+
+namespace SPI_TSC
+{
+
+void SetTouchCoords(u16 x, u16 y);
+
+}
+
+namespace SPI
+{
+
+extern u16 Cnt;
+
+bool Init();
+void DeInit();
+void Reset();
+
+u16 ReadCnt();
+void WriteCnt(u16 val);
+
+u8 ReadData();
+void WriteData(u8 val);
+
+}
+
+#endif
diff --git a/src/Wifi.cpp b/src/Wifi.cpp
new file mode 100644
index 0000000..0f1c239
--- /dev/null
+++ b/src/Wifi.cpp
@@ -0,0 +1,120 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "NDS.h"
+#include "Wifi.h"
+
+
+namespace Wifi
+{
+
+u16 BBCnt;
+u8 BBWrite;
+u8 BBRegs[0x100];
+u8 BBRegsRO[0x100];
+
+
+void Reset()
+{
+ BBCnt = 0;
+ BBWrite = 0;
+ memset(BBRegs, 0, 0x100);
+ memset(BBRegsRO, 0, 0x100);
+
+ #define BBREG_FIXED(id, val) BBRegs[id] = val; BBRegsRO[id] = 1;
+ BBREG_FIXED(0x00, 0x6D);
+ BBREG_FIXED(0x0D, 0x00);
+ BBREG_FIXED(0x0E, 0x00);
+ BBREG_FIXED(0x0F, 0x00);
+ BBREG_FIXED(0x10, 0x00);
+ BBREG_FIXED(0x11, 0x00);
+ BBREG_FIXED(0x12, 0x00);
+ BBREG_FIXED(0x16, 0x00);
+ BBREG_FIXED(0x17, 0x00);
+ BBREG_FIXED(0x18, 0x00);
+ BBREG_FIXED(0x19, 0x00);
+ BBREG_FIXED(0x1A, 0x00);
+ BBREG_FIXED(0x27, 0x00);
+ BBREG_FIXED(0x4D, 0x00); // 00 or BF
+ BBREG_FIXED(0x5D, 0x01);
+ BBREG_FIXED(0x5E, 0x00);
+ BBREG_FIXED(0x5F, 0x00);
+ BBREG_FIXED(0x60, 0x00);
+ BBREG_FIXED(0x61, 0x00);
+ BBREG_FIXED(0x64, 0xFF); // FF or 3F
+ BBREG_FIXED(0x66, 0x00);
+ for (int i = 0x69; i < 0x100; i++)
+ {
+ BBREG_FIXED(i, 0x00);
+ }
+ #undef BBREG_FIXED
+}
+
+
+u16 Read(u32 addr)
+{
+ addr &= 0x7FFF;
+
+ switch (addr)
+ {
+ case 0x158:
+ return BBCnt;
+
+ case 0x15C:
+ if ((BBCnt & 0xF000) != 0x6000)
+ {
+ printf("WIFI: bad BB read, CNT=%04X\n", BBCnt);
+ return 0;
+ }
+ return BBRegs[BBCnt & 0xFF];
+
+ case 0x15E:
+ return 0; // cheap
+ }
+
+ printf("WIFI: unknown read %08X\n", addr);
+ return 0;
+}
+
+void Write(u32 addr, u16 val)
+{
+ addr &= 0x7FFF;
+
+ switch (addr)
+ {
+ case 0x158:
+ BBCnt = val;
+ if ((BBCnt & 0xF000) == 0x5000)
+ {
+ u32 regid = BBCnt & 0xFF;
+ if (!BBRegsRO[regid])
+ BBRegs[regid] = val & 0xFF;
+ }
+ return;
+
+ case 0x15A:
+ BBWrite = val;
+ return;
+ }
+
+ printf("WIFI: unknown write %08X %04X\n", addr, val);
+}
+
+}
diff --git a/src/Wifi.h b/src/Wifi.h
new file mode 100644
index 0000000..a1755ea
--- /dev/null
+++ b/src/Wifi.h
@@ -0,0 +1,35 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef WIFI_H
+#define WIFI_H
+
+namespace Wifi
+{
+
+//
+
+
+void Reset();
+
+u16 Read(u32 addr);
+void Write(u32 addr, u16 val);
+
+}
+
+#endif
diff --git a/src/main.cpp b/src/main.cpp
new file mode 100644
index 0000000..3e713da
--- /dev/null
+++ b/src/main.cpp
@@ -0,0 +1,272 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <windows.h>
+#include "NDS.h"
+#include "GPU.h"
+
+
+#define VERSION "0.1"
+
+
+HINSTANCE instance;
+HWND melon;
+BITMAPV4HEADER bmp;
+bool quit;
+
+bool touching;
+
+
+LRESULT CALLBACK derpo(HWND window, UINT msg, WPARAM wparam, LPARAM lparam)
+{
+ switch (msg)
+ {
+ case WM_CLOSE:
+ printf("close\n");
+ {
+ FILE* f = fopen("debug/wram.bin", "wb");
+ if (f)
+ {
+ for (u32 i = 0x37F8000; i < 0x3808000; i+=4)
+ {
+ u32 blarg = NDS::ARM7Read32(i);
+ fwrite(&blarg, 4, 1, f);
+ }
+ fclose(f);
+ }
+ f = fopen("debug/arm7vram.bin", "wb");
+ if (f)
+ {
+ for (u32 i = 0x6000000; i < 0x6040000; i+=4)
+ {
+ u32 blarg = NDS::ARM7Read32(i);
+ fwrite(&blarg, 4, 1, f);
+ }
+ fclose(f);
+ }
+ f = fopen("debug/mainram.bin", "wb");
+ if (f)
+ {
+ for (u32 i = 0x2000000; i < 0x2400000; i+=4)
+ {
+ u32 blarg = NDS::ARM9Read32(i);
+ fwrite(&blarg, 4, 1, f);
+ }
+ fclose(f);
+ }
+ }
+ PostQuitMessage(0);
+ return 0;
+
+ case WM_KEYDOWN:
+ switch (wparam)
+ {
+ case VK_RETURN: NDS::PressKey(3); break;
+ case VK_SPACE: NDS::PressKey(2); break;
+ case VK_UP: NDS::PressKey(6); break;
+ case VK_DOWN: NDS::PressKey(7); break;
+ case VK_LEFT: NDS::PressKey(5); break;
+ case VK_RIGHT: NDS::PressKey(4); break;
+ case 'A': NDS::PressKey(0); break;
+ case 'B': NDS::PressKey(1); break;
+ case 'X': NDS::PressKey(16); break;
+ case 'Y': NDS::PressKey(17); break;
+ case 'L': NDS::PressKey(9); break;
+ case 'R': NDS::PressKey(8); break;
+ case 'D': NDS::debug(0); break;
+ }
+ return 0;
+
+ case WM_KEYUP:
+ switch (wparam)
+ {
+ case VK_RETURN: NDS::ReleaseKey(3); break;
+ case VK_SPACE: NDS::ReleaseKey(2); break;
+ case VK_UP: NDS::ReleaseKey(6); break;
+ case VK_DOWN: NDS::ReleaseKey(7); break;
+ case VK_LEFT: NDS::ReleaseKey(5); break;
+ case VK_RIGHT: NDS::ReleaseKey(4); break;
+ case 'A': NDS::ReleaseKey(0); break;
+ case 'B': NDS::ReleaseKey(1); break;
+ case 'X': NDS::ReleaseKey(16); break;
+ case 'Y': NDS::ReleaseKey(17); break;
+ case 'L': NDS::ReleaseKey(9); break;
+ case 'R': NDS::ReleaseKey(8); break;
+ }
+ return 0;
+
+ case WM_LBUTTONDOWN:
+ if (!touching)
+ {
+ s16 x = (s16)(lparam & 0xFFFF);
+ s16 y = (s16)(lparam >> 16);
+
+ y -= 192;
+ if (x >= 0 && x < 256 && y >= 0 && y < 192)
+ {
+ NDS::TouchScreen(x, y);
+ NDS::PressKey(16+6);
+ touching = true;
+ }
+ }
+ return 0;
+
+ case WM_LBUTTONUP:
+ case WM_NCLBUTTONUP:
+ if (touching)
+ {
+ NDS::ReleaseScreen();
+ NDS::ReleaseKey(16+6);
+ touching = false;
+ }
+ return 0;
+
+ case WM_MOUSEMOVE:
+ if (touching)
+ {
+ s16 x = (s16)(lparam & 0xFFFF);
+ s16 y = (s16)(lparam >> 16);
+
+ y -= 192;
+ if (x >= 0 && x < 256 && y >= 0 && y < 192)
+ NDS::TouchScreen(x, y);
+ }
+ return 0;
+
+ case WM_PAINT:
+ {
+ PAINTSTRUCT partisocialiste;
+ HDC dc = BeginPaint(window, &partisocialiste);
+
+ SetDIBitsToDevice(dc, 0, 0, 256, 384, 0, 0, 0, 384, GPU::Framebuffer, (BITMAPINFO*)&bmp, DIB_RGB_COLORS);
+
+ EndPaint(window, &partisocialiste);
+ }
+ return 0;
+ }
+
+ return DefWindowProc(window, msg, wparam, lparam);
+}
+
+
+int main()
+{
+ printf("melonDS version uh... 0.1??\n");
+ printf("it's a DS emulator!!!\n");
+ printf("http://melonds.kuribo64.net/\n");
+ quit = false;
+ touching = false;
+
+ instance = GetModuleHandle(NULL);
+
+ //SetThreadAffinityMask(GetCurrentThread(), 0x8);
+
+ // god this shit sucks
+ WNDCLASSEX shit;
+ shit.cbSize = sizeof(shit);
+ shit.style = CS_HREDRAW | CS_VREDRAW;
+ shit.lpfnWndProc = derpo;
+ shit.cbClsExtra = 0;
+ shit.cbWndExtra = 0;
+ shit.hInstance = instance;
+ shit.hIcon = NULL;
+ shit.hIconSm = NULL;
+ shit.hCursor = NULL;
+ shit.hbrBackground = (HBRUSH)(COLOR_WINDOWFRAME+1);
+ shit.lpszMenuName = NULL;
+ shit.lpszClassName = "v0ltmeters";
+ RegisterClassEx(&shit);
+
+ RECT rekt;
+ rekt.left = 0; rekt.top = 0;
+ rekt.right = 256; rekt.bottom = 384;
+ AdjustWindowRect(&rekt, WS_OVERLAPPEDWINDOW, FALSE);
+
+ melon = CreateWindow("v0ltmeters",
+ "melonDS " VERSION,
+ WS_OVERLAPPEDWINDOW,
+ CW_USEDEFAULT, CW_USEDEFAULT,
+ rekt.right-rekt.left, rekt.bottom-rekt.top,
+ NULL,
+ NULL,
+ instance,
+ NULL);
+
+ ShowWindow(melon, SW_SHOW);
+
+ // more sucky shit!
+ memset(&bmp, 0, sizeof(bmp));
+ bmp.bV4Size = sizeof(bmp);
+ bmp.bV4Width = 256;
+ bmp.bV4Height = -384;
+ bmp.bV4Planes = 1;
+ bmp.bV4BitCount = 32;
+ bmp.bV4V4Compression = BI_RGB|BI_BITFIELDS;
+ bmp.bV4RedMask = 0x000000FF;
+ bmp.bV4GreenMask = 0x0000FF00;
+ bmp.bV4BlueMask = 0x00FF0000;
+
+ NDS::Init();
+
+ u32 nframes = 0;
+ u32 lasttick = GetTickCount();
+
+ for (;;)
+ {
+ MSG msg;
+ while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
+ {
+ if (msg.message == WM_QUIT)
+ {
+ quit = true;
+ break;
+ }
+
+ TranslateMessage(&msg);
+ DispatchMessage(&msg);
+ }
+ if (quit) break;
+
+ NDS::RunFrame();
+
+ //HDC dc = GetDC(melon);
+ //SetDIBitsToDevice(dc, 0, 0, 256, 384, 0, 0, 0, 384, GPU::Framebuffer, (BITMAPINFO*)&bmp, DIB_RGB_COLORS);
+ InvalidateRect(melon, NULL, false);
+ UpdateWindow(melon);
+
+ nframes++;
+ if (nframes >= 30)
+ {
+ u32 tick = GetTickCount();
+ u32 diff = tick - lasttick;
+ lasttick = tick;
+
+ u32 fps = (nframes * 1000) / diff;
+ nframes = 0;
+
+ char melontitle[100];
+ sprintf(melontitle, "melonDS " VERSION " | %d FPS", fps);
+ SetWindowText(melon, melontitle);
+ }
+ }
+ printf("deinit\n");
+ NDS::DeInit();
+
+ return 0;
+}
diff --git a/src/types.h b/src/types.h
new file mode 100644
index 0000000..8a6c7e3
--- /dev/null
+++ b/src/types.h
@@ -0,0 +1,31 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef TYPES_H
+#define TYPES_H
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long int u64;
+typedef signed char s8;
+typedef signed short s16;
+typedef signed int s32;
+typedef signed long long int s64;
+
+#endif // TYPES_H