diff options
Diffstat (limited to 'src')
35 files changed, 16540 insertions, 0 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp new file mode 100644 index 0000000..536c78c --- /dev/null +++ b/src/ARM.cpp @@ -0,0 +1,411 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include "NDS.h" +#include "ARM.h" +#include "ARMInterpreter.h" +#include "GPU3D.h" + + +u32 ARM::ConditionTable[16] = +{ + 0xF0F0, // EQ + 0x0F0F, // NE + 0xCCCC, // CS + 0x3333, // CC + 0xFF00, // MI + 0x00FF, // PL + 0xAAAA, // VS + 0x5555, // VC + 0x0C0C, // HI + 0xF3F3, // LS + 0xAA55, // GE + 0x55AA, // LT + 0x0A05, // GT + 0xF5FA, // LE + 0xFFFF, // AL + 0x0000 // NE +}; + + +ARM::ARM(u32 num) +{ + // well uh + Num = num; + + for (int i = 0; i < 16; i++) + { + Waitstates[0][i] = 1; + Waitstates[1][i] = 1; + Waitstates[2][i] = 1; + Waitstates[3][i] = 1; + } + + if (!num) + { + // ARM9 + Waitstates[0][0x2] = 1; // main RAM timing, assuming cache hit + Waitstates[0][0x3] = 4; + Waitstates[0][0x4] = 4; + Waitstates[0][0x5] = 5; + Waitstates[0][0x6] = 5; + Waitstates[0][0x7] = 4; + Waitstates[0][0x8] = 19; + Waitstates[0][0x9] = 19; + Waitstates[0][0xF] = 4; + + Waitstates[1][0x2] = 1; + Waitstates[1][0x3] = 8; + Waitstates[1][0x4] = 8; + Waitstates[1][0x5] = 10; + Waitstates[1][0x6] = 10; + Waitstates[1][0x7] = 8; + Waitstates[1][0x8] = 38; + Waitstates[1][0x9] = 38; + Waitstates[1][0xF] = 8; + + Waitstates[2][0x2] = 1; + Waitstates[2][0x3] = 2; + Waitstates[2][0x4] = 2; + Waitstates[2][0x5] = 2; + Waitstates[2][0x6] = 2; + Waitstates[2][0x7] = 2; + Waitstates[2][0x8] = 12; + Waitstates[2][0x9] = 12; + Waitstates[2][0xA] = 20; + Waitstates[2][0xF] = 2; + + Waitstates[3][0x2] = 1; + Waitstates[3][0x3] = 2; + Waitstates[3][0x4] = 2; + Waitstates[3][0x5] = 4; + Waitstates[3][0x6] = 4; + Waitstates[3][0x7] = 2; + Waitstates[3][0x8] = 24; + Waitstates[3][0x9] = 24; + Waitstates[3][0xA] = 20; + Waitstates[3][0xF] = 2; + } + else + { + // ARM7 + Waitstates[0][0x0] = 1; + Waitstates[0][0x2] = 1; + Waitstates[0][0x3] = 1; + Waitstates[0][0x4] = 1; + Waitstates[0][0x6] = 1; + Waitstates[0][0x8] = 6; + Waitstates[0][0x9] = 6; + + Waitstates[1][0x0] = 1; + Waitstates[1][0x2] = 2; + Waitstates[1][0x3] = 1; + Waitstates[1][0x4] = 1; + Waitstates[1][0x6] = 2; + Waitstates[1][0x8] = 12; + Waitstates[1][0x9] = 12; + + Waitstates[2][0x0] = 1; + Waitstates[2][0x2] = 1; + Waitstates[2][0x3] = 1; + Waitstates[2][0x4] = 1; + Waitstates[2][0x6] = 1; + Waitstates[2][0x8] = 6; + Waitstates[2][0x9] = 6; + Waitstates[2][0xA] = 10; + + Waitstates[3][0x0] = 1; + Waitstates[3][0x2] = 2; + Waitstates[3][0x3] = 1; + Waitstates[3][0x4] = 1; + Waitstates[3][0x6] = 2; + Waitstates[3][0x8] = 12; + Waitstates[3][0x9] = 12; + Waitstates[3][0xA] = 10; + } +} + +ARM::~ARM() +{ + // dorp +} + +void ARM::Reset() +{ + Cycles = 0; + Halted = 0; + + for (int i = 0; i < 16; i++) + R[i] = 0; + + CPSR = 0x000000D3; + + ExceptionBase = Num ? 0x00000000 : 0xFFFF0000; + + // zorp + JumpTo(ExceptionBase); +} + +void ARM::JumpTo(u32 addr, bool restorecpsr) +{ + if (restorecpsr) + { + RestoreCPSR(); + + if (CPSR & 0x20) addr |= 0x1; + else addr &= ~0x1; + } + + if (addr & 0x1) + { + addr &= ~0x1; + R[15] = addr+2; + NextInstr[0] = CodeRead16(addr); + NextInstr[1] = CodeRead16(addr+2); + CPSR |= 0x20; + } + else + { + addr &= ~0x3; + R[15] = addr+4; + NextInstr[0] = CodeRead32(addr); + NextInstr[1] = CodeRead32(addr+4); + CPSR &= ~0x20; + } +} + +void ARM::RestoreCPSR() +{ + u32 oldcpsr = CPSR; + + switch (CPSR & 0x1F) + { + case 0x11: + CPSR = R_FIQ[7]; + break; + + case 0x12: + CPSR = R_IRQ[2]; + break; + + case 0x13: + CPSR = R_SVC[2]; + break; + + case 0x17: + CPSR = R_ABT[2]; + break; + + case 0x1B: + CPSR = R_UND[2]; + break; + + default: + printf("!! attempt to restore CPSR under bad mode %02X, %08X\n", CPSR&0x1F, R[15]); + break; + } + + UpdateMode(oldcpsr, CPSR); +} + +void ARM::UpdateMode(u32 oldmode, u32 newmode) +{ + u32 temp; + #define SWAP(a, b) temp = a; a = b; b = temp; + + if ((oldmode & 0x1F) == (newmode & 0x1F)) return; + + switch (oldmode & 0x1F) + { + case 0x11: + SWAP(R[8], R_FIQ[0]); + SWAP(R[9], R_FIQ[1]); + SWAP(R[10], R_FIQ[2]); + SWAP(R[11], R_FIQ[3]); + SWAP(R[12], R_FIQ[4]); + SWAP(R[13], R_FIQ[5]); + SWAP(R[14], R_FIQ[6]); + break; + + case 0x12: + SWAP(R[13], R_IRQ[0]); + SWAP(R[14], R_IRQ[1]); + break; + + case 0x13: + SWAP(R[13], R_SVC[0]); + SWAP(R[14], R_SVC[1]); + break; + + case 0x17: + SWAP(R[13], R_ABT[0]); + SWAP(R[14], R_ABT[1]); + break; + + case 0x1B: + SWAP(R[13], R_UND[0]); + SWAP(R[14], R_UND[1]); + break; + } + + switch (newmode & 0x1F) + { + case 0x11: + SWAP(R[8], R_FIQ[0]); + SWAP(R[9], R_FIQ[1]); + SWAP(R[10], R_FIQ[2]); + SWAP(R[11], R_FIQ[3]); + SWAP(R[12], R_FIQ[4]); + SWAP(R[13], R_FIQ[5]); + SWAP(R[14], R_FIQ[6]); + break; + + case 0x12: + SWAP(R[13], R_IRQ[0]); + SWAP(R[14], R_IRQ[1]); + break; + + case 0x13: + SWAP(R[13], R_SVC[0]); + SWAP(R[14], R_SVC[1]); + break; + + case 0x17: + SWAP(R[13], R_ABT[0]); + SWAP(R[14], R_ABT[1]); + break; + + case 0x1B: + SWAP(R[13], R_UND[0]); + SWAP(R[14], R_UND[1]); + break; + } + + #undef SWAP +} + +void ARM::TriggerIRQ() +{ + if (CPSR & 0x80) + return; + + u32 oldcpsr = CPSR; + CPSR &= ~0xFF; + CPSR |= 0xD2; + UpdateMode(oldcpsr, CPSR); + + R_IRQ[2] = oldcpsr; + R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); + JumpTo(ExceptionBase + 0x18); +} + +s32 ARM::Execute() +{ + if (Halted) + { + if (NDS::HaltInterrupted(Num)) + { + Halted = 0; + if (NDS::IME[Num]&1) + TriggerIRQ(); + } + else + { + Cycles = CyclesToRun; + GPU3D::Run(CyclesToRun >> 1); + return Cycles; + } + } + + Cycles = 0; + s32 lastcycles = 0; + u32 addr = R[15] - (CPSR&0x20 ? 4:8); + u32 cpsr = CPSR; + + while (Cycles < CyclesToRun) + { + //if(Num==1)printf("%08X %08X\n", R[15] - (CPSR&0x20 ? 4:8), NextInstr); + + if (CPSR & 0x20) // THUMB + { + // prefetch + R[15] += 2; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + NextInstr[1] = CodeRead16(R[15]); + + // actually execute + u32 icode = (CurInstr >> 6); + ARMInterpreter::THUMBInstrTable[icode](this); + } + else + { + // prefetch + R[15] += 4; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + NextInstr[1] = CodeRead32(R[15]); + + // actually execute + if (CheckCondition(CurInstr >> 28)) + { + u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); + ARMInterpreter::ARMInstrTable[icode](this); + } + else if ((CurInstr & 0xFE000000) == 0xFA000000) + { + ARMInterpreter::A_BLX_IMM(this); + } + } + + //if (R[15]==0x037F9364) printf("R8=%08X R9=%08X\n", R[8], R[9]); + + // gross hack + // TODO, though: move timer code here too? + // quick testing shows that moving this to the NDS loop doesn't really slow things down + if (Num==0) + { + s32 diff = Cycles - lastcycles; + GPU3D::Run(diff >> 1); + lastcycles = Cycles - (diff&1); + } + + // TODO optimize this shit!!! + if (Halted) + { + if (Halted == 1) + Cycles = CyclesToRun; + break; + } + if (NDS::HaltInterrupted(Num)) + { + if (NDS::IME[Num]&1) + TriggerIRQ(); + } + + // temp. debug cruft + addr = R[15] - (CPSR&0x20 ? 4:8); + cpsr = CPSR; + } + + if (Halted == 2) + Halted = 0; + + return Cycles; +} diff --git a/src/ARM.h b/src/ARM.h new file mode 100644 index 0000000..79c2bce --- /dev/null +++ b/src/ARM.h @@ -0,0 +1,234 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef ARM_H +#define ARM_H + +#include "types.h" +#include "NDS.h" +#include "CP15.h" + +// lame +#define C_S(x) x +#define C_N(x) x +#define C_I(x) x + +#define ROR(x, n) (((x) >> (n)) | ((x) << (32-(n)))) + +class ARM +{ +public: + ARM(u32 num); + ~ARM(); // destroy shit + + void Reset(); + + void JumpTo(u32 addr, bool restorecpsr = false); + void RestoreCPSR(); + + void Halt(u32 halt) + { + Halted = halt; + } + + s32 Execute(); + + bool CheckCondition(u32 code) + { + if (code == 0xE) return true; + if (ConditionTable[code] & (1 << (CPSR>>28))) return true; + return false; + } + + void SetC(bool c) + { + if (c) CPSR |= 0x20000000; + else CPSR &= ~0x20000000; + } + + void SetNZ(bool n, bool z) + { + CPSR &= ~0xC0000000; + if (n) CPSR |= 0x80000000; + if (z) CPSR |= 0x40000000; + } + + void SetNZCV(bool n, bool z, bool c, bool v) + { + CPSR &= ~0xF0000000; + if (n) CPSR |= 0x80000000; + if (z) CPSR |= 0x40000000; + if (c) CPSR |= 0x20000000; + if (v) CPSR |= 0x10000000; + } + + void UpdateMode(u32 oldmode, u32 newmode); + + void TriggerIRQ(); + + + u16 CodeRead16(u32 addr) + { + u16 val; + // TODO eventually: on ARM9, THUMB opcodes are prefetched with 32bit reads + if (!Num) + { + if (!CP15::HandleCodeRead16(addr, &val)) + val = NDS::ARM9Read16(addr); + } + else + val = NDS::ARM7Read16(addr); + + Cycles += Waitstates[0][(addr>>24)&0xF]; + return val; + } + + u32 CodeRead32(u32 addr) + { + u32 val; + if (!Num) + { + if (!CP15::HandleCodeRead32(addr, &val)) + val = NDS::ARM9Read32(addr); + } + else + val = NDS::ARM7Read32(addr); + + Cycles += Waitstates[1][(addr>>24)&0xF]; + return val; + } + + + u8 DataRead8(u32 addr, u32 forceuser=0) + { + u8 val; + if (!Num) + { + if (!CP15::HandleDataRead8(addr, &val, forceuser)) + val = NDS::ARM9Read8(addr); + } + else + val = NDS::ARM7Read8(addr); + + Cycles += Waitstates[2][(addr>>24)&0xF]; + return val; + } + + u16 DataRead16(u32 addr, u32 forceuser=0) + { + u16 val; + addr &= ~1; + if (!Num) + { + if (!CP15::HandleDataRead16(addr, &val, forceuser)) + val = NDS::ARM9Read16(addr); + } + else + val = NDS::ARM7Read16(addr); + + Cycles += Waitstates[2][(addr>>24)&0xF]; + return val; + } + + u32 DataRead32(u32 addr, u32 forceuser=0) + { + u32 val; + addr &= ~3; + if (!Num) + { + if (!CP15::HandleDataRead32(addr, &val, forceuser)) + val = NDS::ARM9Read32(addr); + } + else + val = NDS::ARM7Read32(addr); + + Cycles += Waitstates[3][(addr>>24)&0xF]; + return val; + } + + void DataWrite8(u32 addr, u8 val, u32 forceuser=0) + { + if (!Num) + { + if (!CP15::HandleDataWrite8(addr, val, forceuser)) + NDS::ARM9Write8(addr, val); + } + else + NDS::ARM7Write8(addr, val); + + Cycles += Waitstates[2][(addr>>24)&0xF]; + } + + void DataWrite16(u32 addr, u16 val, u32 forceuser=0) + { + addr &= ~1; + if (!Num) + { + if (!CP15::HandleDataWrite16(addr, val, forceuser)) + NDS::ARM9Write16(addr, val); + } + else + NDS::ARM7Write16(addr, val); + + Cycles += Waitstates[2][(addr>>24)&0xF]; + } + + void DataWrite32(u32 addr, u32 val, u32 forceuser=0) + { + addr &= ~3; + if (!Num) + { + if (!CP15::HandleDataWrite32(addr, val, forceuser)) + NDS::ARM9Write32(addr, val); + } + else + NDS::ARM7Write32(addr, val); + + Cycles += Waitstates[3][(addr>>24)&0xF]; + } + + + u32 Num; + + // waitstates: + // 0=code16 1=code32 2=data16 3=data32 + // TODO eventually: nonsequential waitstates + s32 Waitstates[4][16]; + + s32 Cycles; + s32 CyclesToRun; + u32 Halted; + + u32 R[16]; // heh + u32 CPSR; + u32 R_FIQ[8]; // holding SPSR too + u32 R_SVC[3]; + u32 R_ABT[3]; + u32 R_IRQ[3]; + u32 R_UND[3]; + u32 CurInstr; + u32 NextInstr[2]; + + u32 ExceptionBase; + + static u32 ConditionTable[16]; + + u32 debug; +}; + +#endif // ARM_H diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp new file mode 100644 index 0000000..32b3a00 --- /dev/null +++ b/src/ARMInterpreter.cpp @@ -0,0 +1,221 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include "NDS.h" +#include "CP15.h" +#include "ARMInterpreter.h" +#include "ARMInterpreter_ALU.h" +#include "ARMInterpreter_Branch.h" +#include "ARMInterpreter_LoadStore.h" + + +namespace ARMInterpreter +{ + + +void A_UNK(ARM* cpu) +{ + printf("undefined ARM%d instruction %08X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-8); + for (int i = 0; i < 16; i++) printf("R%d: %08X\n", i, cpu->R[i]); + NDS::Halt(); +} + +void T_UNK(ARM* cpu) +{ + printf("undefined THUMB%d instruction %04X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-4); + NDS::Halt(); +} + + + +void A_MSR_IMM(ARM* cpu) +{ + u32* psr; + if (cpu->CurInstr & (1<<22)) + { + switch (cpu->CPSR & 0x1F) + { + case 0x11: psr = &cpu->R_FIQ[7]; break; + case 0x12: psr = &cpu->R_IRQ[2]; break; + case 0x13: psr = &cpu->R_SVC[2]; break; + case 0x17: psr = &cpu->R_ABT[2]; break; + case 0x1B: psr = &cpu->R_UND[2]; break; + default: printf("bad CPU mode %08X\n", cpu->CPSR); return; + } + } + else + psr = &cpu->CPSR; + + u32 oldpsr = *psr; + + u32 mask = 0; + if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF; + if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; + if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; + if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000; + + if (!(cpu->CurInstr & (1<<22))) + mask &= 0xFFFFFFDF; + + if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00; + + u32 val = ROR((cpu->CurInstr & 0xFF), ((cpu->CurInstr >> 7) & 0x1E)); + + *psr &= ~mask; + *psr |= (val & mask); + + if (!(cpu->CurInstr & (1<<22))) + cpu->UpdateMode(oldpsr, cpu->CPSR); +} + +void A_MSR_REG(ARM* cpu) +{ + u32* psr; + if (cpu->CurInstr & (1<<22)) + { + switch (cpu->CPSR & 0x1F) + { + case 0x11: psr = &cpu->R_FIQ[7]; break; + case 0x12: psr = &cpu->R_IRQ[2]; break; + case 0x13: psr = &cpu->R_SVC[2]; break; + case 0x17: psr = &cpu->R_ABT[2]; break; + case 0x1B: psr = &cpu->R_UND[2]; break; + default: printf("bad CPU mode %08X\n", cpu->CPSR); return; + } + } + else + psr = &cpu->CPSR; + + u32 oldpsr = *psr; + + u32 mask = 0; + if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF; + if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; + if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; + if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000; + + if (!(cpu->CurInstr & (1<<22))) + mask &= 0xFFFFFFDF; + + if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00; + + u32 val = cpu->R[cpu->CurInstr & 0xF]; + + *psr &= ~mask; + *psr |= (val & mask); + + if (!(cpu->CurInstr & (1<<22))) + cpu->UpdateMode(oldpsr, cpu->CPSR); +} + +void A_MRS(ARM* cpu) +{ + u32 psr; + if (cpu->CurInstr & (1<<22)) + { + switch (cpu->CPSR & 0x1F) + { + case 0x11: psr = cpu->R_FIQ[7]; break; + case 0x12: psr = cpu->R_IRQ[2]; break; + case 0x13: psr = cpu->R_SVC[2]; break; + case 0x17: psr = cpu->R_ABT[2]; break; + case 0x1B: psr = cpu->R_UND[2]; break; + default: printf("bad CPU mode %08X\n", cpu->CPSR); return; + } + } + else + psr = cpu->CPSR; + + cpu->R[(cpu->CurInstr>>12) & 0xF] = psr; +} + + +void A_MCR(ARM* cpu) +{ + u32 cp = (cpu->CurInstr >> 8) & 0xF; + //u32 op = (cpu->CurInstr >> 21) & 0x7; + u32 cn = (cpu->CurInstr >> 16) & 0xF; + u32 cm = cpu->CurInstr & 0xF; + u32 cpinfo = (cpu->CurInstr >> 5) & 0x7; + + if (cpu->Num==0 && cp==15) + { + CP15::Write((cn<<8)|(cm<<4)|cpinfo, cpu->R[(cpu->CurInstr>>12)&0xF]); + } + else + { + printf("bad MCR opcode p%d,%d,%d,%d on ARM%d\n", cp, cn, cm, cpinfo, cpu->Num?7:9); + } + + cpu->Cycles += 2; // TODO: checkme +} + +void A_MRC(ARM* cpu) +{ + u32 cp = (cpu->CurInstr >> 8) & 0xF; + //u32 op = (cpu->CurInstr >> 21) & 0x7; + u32 cn = (cpu->CurInstr >> 16) & 0xF; + u32 cm = cpu->CurInstr & 0xF; + u32 cpinfo = (cpu->CurInstr >> 5) & 0x7; + + if (cpu->Num==0 && cp==15) + { + cpu->R[(cpu->CurInstr>>12)&0xF] = CP15::Read((cn<<8)|(cm<<4)|cpinfo); + } + else + { + printf("bad MRC opcode p%d,%d,%d,%d on ARM%d\n", cp, cn, cm, cpinfo, cpu->Num?7:9); + } + + cpu->Cycles += 3; // TODO: checkme +} + + + +void A_SVC(ARM* cpu) +{ + u32 oldcpsr = cpu->CPSR; + cpu->CPSR &= ~0xFF; + cpu->CPSR |= 0xD3; + cpu->UpdateMode(oldcpsr, cpu->CPSR); + + cpu->R_SVC[2] = oldcpsr; + cpu->R[14] = cpu->R[15] - 4; + cpu->JumpTo(cpu->ExceptionBase + 0x08); +} + +void T_SVC(ARM* cpu) +{ + u32 oldcpsr = cpu->CPSR; + cpu->CPSR &= ~0xFF; + cpu->CPSR |= 0xD3; + cpu->UpdateMode(oldcpsr, cpu->CPSR); + + cpu->R_SVC[2] = oldcpsr; + cpu->R[14] = cpu->R[15] - 2; + cpu->JumpTo(cpu->ExceptionBase + 0x08); +} + + + +#define INSTRFUNC_PROTO(x) void (*x)(ARM* cpu) +#include "ARM_InstrTable.h" +#undef INSTRFUNC_PROTO + +} diff --git a/src/ARMInterpreter.h b/src/ARMInterpreter.h new file mode 100644 index 0000000..2d4c1a8 --- /dev/null +++ b/src/ARMInterpreter.h @@ -0,0 +1,35 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef ARMINTERPRETER_H +#define ARMINTERPRETER_H + +#include "types.h" +#include "ARM.h" + +namespace ARMInterpreter +{ + +extern void (*ARMInstrTable[4096])(ARM* cpu); +extern void (*THUMBInstrTable[1024])(ARM* cpu); + +void A_BLX_IMM(ARM* cpu); // I'm a special one look at me + +} + +#endif // ARMINTERPRETER_H diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp new file mode 100644 index 0000000..d6c5abd --- /dev/null +++ b/src/ARMInterpreter_ALU.cpp @@ -0,0 +1,1461 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include "ARM.h" + + +#define CARRY_ADD(a, b) ((0xFFFFFFFF-a) < b) +#define CARRY_SUB(a, b) (a >= b) + +#define OVERFLOW_ADD(a, b, res) ((!(((a) ^ (b)) & 0x80000000)) && (((a) ^ (res)) & 0x80000000)) +#define OVERFLOW_SUB(a, b, res) ((((a) ^ (b)) & 0x80000000) && (((a) ^ (res)) & 0x80000000)) + + +namespace ARMInterpreter +{ + + +#define LSL_IMM(x, s) \ + x <<= s; + +#define LSR_IMM(x, s) \ + if (s == 0) x = 0; \ + else x >>= s; + +#define ASR_IMM(x, s) \ + if (s == 0) x = ((s32)x) >> 31; \ + else x = ((s32)x) >> s; + +#define ROR_IMM(x, s) \ + if (s == 0) \ + { \ + x = (x >> 1) | ((cpu->CPSR & 0x20000000) << 2); \ + } \ + else \ + { \ + x = ROR(x, s); \ + } + +#define LSL_IMM_S(x, s) \ + if (s > 0) \ + { \ + cpu->SetC(x & (1<<(32-s))); \ + x <<= s; \ + } + +#define LSR_IMM_S(x, s) \ + if (s == 0) { \ + cpu->SetC(x & (1<<31)); \ + x = 0; \ + } else { \ + cpu->SetC(x & (1<<(s-1))); \ + x >>= s; \ + } + +#define ASR_IMM_S(x, s) \ + if (s == 0) { \ + cpu->SetC(x & (1<<31)); \ + x = ((s32)x) >> 31; \ + } else { \ + cpu->SetC(x & (1<<(s-1))); \ + x = ((s32)x) >> s; \ + } + +#define ROR_IMM_S(x, s) \ + if (s == 0) \ + { \ + u32 newc = (x & 1); \ + x = (x >> 1) | ((cpu->CPSR & 0x20000000) << 2); \ + cpu->SetC(newc); \ + } \ + else \ + { \ + cpu->SetC(x & (1<<(s-1))); \ + x = ROR(x, s); \ + } + +#define LSL_REG(x, s) \ + if (s > 31) x = 0; \ + else x <<= s; + +#define LSR_REG(x, s) \ + if (s > 31) x = 0; \ + else x >>= s; + +#define ASR_REG(x, s) \ + if (s > 31) x = ((s32)x) >> 31; \ + else x = ((s32)x) >> s; + +#define ROR_REG(x, s) \ + x = ROR(x, (s&0x1F)); + +#define LSL_REG_S(x, s) \ + if (s > 31) { cpu->SetC(x & (1<<0)); x = 0; } \ + else if (s > 0) { cpu->SetC(x & (1<<(32-s))); x <<= s; } + +#define LSR_REG_S(x, s) \ + if (s > 31) { cpu->SetC(x & (1<<31)); x = 0; } \ + else if (s > 0) { cpu->SetC(x & (1<<(s-1))); x >>= s; } + +#define ASR_REG_S(x, s) \ + if (s > 31) { cpu->SetC(x & (1<<31)); x = ((s32)x) >> 31; } \ + else if (s > 0) { cpu->SetC(x & (1<<(s-1))); x = ((s32)x) >> s; } + +#define ROR_REG_S(x, s) \ + if (s > 0) cpu->SetC(x & (1<<(s-1))); \ + x = ROR(x, (s&0x1F)); + + + +#define A_CALC_OP2_IMM \ + u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); + +#define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \ + u32 b = cpu->R[cpu->CurInstr&0xF]; \ + u32 s = (cpu->CurInstr>>7)&0x1F; \ + shiftop(b, s); + +#define A_CALC_OP2_REG_SHIFT_REG(shiftop) \ + u32 b = cpu->R[cpu->CurInstr&0xF]; \ + if ((cpu->CurInstr&0xF)==15) b += 4; \ + shiftop(b, cpu->R[(cpu->CurInstr>>8)&0xF]); + + +#define A_IMPLEMENT_ALU_OP(x,s) \ +\ +void A_##x##_IMM(ARM* cpu) \ +{ \ + A_CALC_OP2_IMM \ + A_##x(0) \ +} \ +void A_##x##_REG_LSL_IMM(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(LSL_IMM) \ + A_##x(0) \ +} \ +void A_##x##_REG_LSR_IMM(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(LSR_IMM) \ + A_##x(0) \ +} \ +void A_##x##_REG_ASR_IMM(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(ASR_IMM) \ + A_##x(0) \ +} \ +void A_##x##_REG_ROR_IMM(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(ROR_IMM) \ + A_##x(0) \ +} \ +void A_##x##_REG_LSL_REG(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(LSL_REG) \ + A_##x(1) \ +} \ +void A_##x##_REG_LSR_REG(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(LSR_REG) \ + A_##x(1) \ +} \ +void A_##x##_REG_ASR_REG(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(ASR_REG) \ + A_##x(1) \ +} \ +void A_##x##_REG_ROR_REG(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(ROR_REG) \ + A_##x(1) \ +} \ +void A_##x##_IMM_S(ARM* cpu) \ +{ \ + A_CALC_OP2_IMM \ + A_##x##_S(0) \ +} \ +void A_##x##_REG_LSL_IMM_S(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(LSL_IMM##s) \ + A_##x##_S(0) \ +} \ +void A_##x##_REG_LSR_IMM_S(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(LSR_IMM##s) \ + A_##x##_S(0) \ +} \ +void A_##x##_REG_ASR_IMM_S(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(ASR_IMM##s) \ + A_##x##_S(0) \ +} \ +void A_##x##_REG_ROR_IMM_S(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(ROR_IMM##s) \ + A_##x##_S(0) \ +} \ +void A_##x##_REG_LSL_REG_S(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(LSL_REG##s) \ + A_##x##_S(1) \ +} \ +void A_##x##_REG_LSR_REG_S(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(LSR_REG##s) \ + A_##x##_S(1) \ +} \ +void A_##x##_REG_ASR_REG_S(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(ASR_REG##s) \ + A_##x##_S(1) \ +} \ +void A_##x##_REG_ROR_REG_S(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(ROR_REG##s) \ + A_##x##_S(1) \ +} + +#define A_IMPLEMENT_ALU_TEST(x,s) \ +\ +void A_##x##_IMM(ARM* cpu) \ +{ \ + A_CALC_OP2_IMM \ + A_##x(0) \ +} \ +void A_##x##_REG_LSL_IMM(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(LSL_IMM##s) \ + A_##x(0) \ +} \ +void A_##x##_REG_LSR_IMM(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(LSR_IMM##s) \ + A_##x(0) \ +} \ +void A_##x##_REG_ASR_IMM(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(ASR_IMM##s) \ + A_##x(0) \ +} \ +void A_##x##_REG_ROR_IMM(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_IMM(ROR_IMM##s) \ + A_##x(0) \ +} \ +void A_##x##_REG_LSL_REG(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(LSL_REG##s) \ + A_##x(1) \ +} \ +void A_##x##_REG_LSR_REG(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(LSR_REG##s) \ + A_##x(1) \ +} \ +void A_##x##_REG_ASR_REG(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(ASR_REG##s) \ + A_##x(1) \ +} \ +void A_##x##_REG_ROR_REG(ARM* cpu) \ +{ \ + A_CALC_OP2_REG_SHIFT_REG(ROR_REG##s) \ + A_##x(1) \ +} + + +#define A_AND(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a & b; \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +#define A_AND_S(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a & b; \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +A_IMPLEMENT_ALU_OP(AND,_S) + + +#define A_EOR(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a ^ b; \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +#define A_EOR_S(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a ^ b; \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +A_IMPLEMENT_ALU_OP(EOR,_S) + + +#define A_SUB(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a - b; \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +#define A_SUB_S(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a - b; \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CARRY_SUB(a, b), \ + OVERFLOW_SUB(a, b, res)); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +A_IMPLEMENT_ALU_OP(SUB,) + + +#define A_RSB(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = b - a; \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +#define A_RSB_S(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = b - a; \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CARRY_SUB(b, a), \ + OVERFLOW_SUB(b, a, res)); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +A_IMPLEMENT_ALU_OP(RSB,) + + +#define A_ADD(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a + b; \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +#define A_ADD_S(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a + b; \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CARRY_ADD(a, b), \ + OVERFLOW_ADD(a, b, res)); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +A_IMPLEMENT_ALU_OP(ADD,) + + +#define A_ADC(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +#define A_ADC_S(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res_tmp = a + b; \ + u32 carry = (cpu->CPSR&0x20000000 ? 1:0); \ + u32 res = res_tmp + carry; \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CARRY_ADD(a, b) | CARRY_ADD(res_tmp, carry), \ + OVERFLOW_ADD(a, b, res_tmp) | OVERFLOW_ADD(res_tmp, carry, res)); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +A_IMPLEMENT_ALU_OP(ADC,) + + +#define A_SBC(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +#define A_SBC_S(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res_tmp = a - b; \ + u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \ + u32 res = res_tmp - carry; \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CARRY_SUB(a, b) & CARRY_SUB(res_tmp, carry), \ + OVERFLOW_SUB(a, b, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res)); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +A_IMPLEMENT_ALU_OP(SBC,) + + +#define A_RSC(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +#define A_RSC_S(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res_tmp = b - a; \ + u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \ + u32 res = res_tmp - carry; \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CARRY_SUB(b, a) & CARRY_SUB(res_tmp, carry), \ + OVERFLOW_SUB(b, a, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res)); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +A_IMPLEMENT_ALU_OP(RSC,) + + +#define A_TST(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a & b; \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ + cpu->Cycles += c; + +A_IMPLEMENT_ALU_TEST(TST,_S) + + +#define A_TEQ(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a ^ b; \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ + cpu->Cycles += c; + +A_IMPLEMENT_ALU_TEST(TEQ,_S) + + +#define A_CMP(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a - b; \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CARRY_SUB(a, b), \ + OVERFLOW_SUB(a, b, res)); \ + cpu->Cycles += c; + +A_IMPLEMENT_ALU_TEST(CMP,) + + +#define A_CMN(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a + b; \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CARRY_ADD(a, b), \ + OVERFLOW_ADD(a, b, res)); \ + cpu->Cycles += c; + +A_IMPLEMENT_ALU_TEST(CMN,) + + +#define A_ORR(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a | b; \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +#define A_ORR_S(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a | b; \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +A_IMPLEMENT_ALU_OP(ORR,_S) + + +#define A_MOV(c) \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(b); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = b; \ + } + +#define A_MOV_S(c) \ + cpu->SetNZ(b & 0x80000000, \ + !b); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(b, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = b; \ + } + +A_IMPLEMENT_ALU_OP(MOV,_S) + + +#define A_BIC(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a & ~b; \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +#define A_BIC_S(c) \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 res = a & ~b; \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(res, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = res; \ + } + +A_IMPLEMENT_ALU_OP(BIC,_S) + + +#define A_MVN(c) \ + b = ~b; \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(b); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = b; \ + } + +#define A_MVN_S(c) \ + b = ~b; \ + cpu->SetNZ(b & 0x80000000, \ + !b); \ + cpu->Cycles += c; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + cpu->JumpTo(b, true); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = b; \ + } + +A_IMPLEMENT_ALU_OP(MVN,_S) + + + +void A_MUL(ARM* cpu) +{ + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + u32 res = rm * rs; + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ(res & 0x80000000, + !res); + if (cpu->Num==1) cpu->SetC(0); + } + + u32 cycles; + if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1; + else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2; + else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3; + else cycles = 4; + + cpu->Cycles += cycles; +} + +void A_MLA(ARM* cpu) +{ + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; + + u32 res = (rm * rs) + rn; + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ(res & 0x80000000, + !res); + if (cpu->Num==1) cpu->SetC(0); + } + + u32 cycles; + if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; + else cycles = 5; + + cpu->Cycles += cycles; +} + +void A_UMULL(ARM* cpu) +{ + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + u64 res = (u64)rm * (u64)rs; + + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } + + u32 cycles; + if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; + else cycles = 5; + + cpu->Cycles += cycles; +} + +void A_UMLAL(ARM* cpu) +{ + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + u64 res = (u64)rm * (u64)rs; + + u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); + res += rd; + + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } + + u32 cycles; + if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; + else cycles = 5; + + cpu->Cycles += cycles; +} + +void A_SMULL(ARM* cpu) +{ + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + s64 res = (s64)(s32)rm * (s64)(s32)rs; + + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } + + u32 cycles; + if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; + else cycles = 5; + + cpu->Cycles += cycles; +} + +void A_SMLAL(ARM* cpu) +{ + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + s64 res = (s64)(s32)rm * (s64)(s32)rs; + + s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); + res += rd; + + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } + + u32 cycles; + if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; + else cycles = 5; + + cpu->Cycles += cycles; +} + +void A_SMLAxy(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; + + if (cpu->CurInstr & (1<<5)) rm >>= 16; + else rm &= 0xFFFF; + if (cpu->CurInstr & (1<<6)) rs >>= 16; + else rs &= 0xFFFF; + + u32 res_mul = ((s16)rm * (s16)rs); + u32 res = res_mul + rn; + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (OVERFLOW_ADD(res_mul, rn, res)) + cpu->CPSR |= 0x08000000; +} + +void A_SMLAWy(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; + + if (cpu->CurInstr & (1<<6)) rs >>= 16; + else rs &= 0xFFFF; + + u32 res_mul = ((s32)rm * (s16)rs) >> 16; // CHECKME + u32 res = res_mul + rn; + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (OVERFLOW_ADD(res_mul, rn, res)) + cpu->CPSR |= 0x08000000; +} + +void A_SMULxy(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + if (cpu->CurInstr & (1<<5)) rm >>= 16; + else rm &= 0xFFFF; + if (cpu->CurInstr & (1<<6)) rs >>= 16; + else rs &= 0xFFFF; + + u32 res = ((s16)rm * (s16)rs); + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + +void A_SMULWy(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + if (cpu->CurInstr & (1<<6)) rs >>= 16; + else rs &= 0xFFFF; + + u32 res = ((s32)rm * (s16)rs) >> 16; // CHECKME + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + +void A_SMLALxy(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + + if (cpu->CurInstr & (1<<5)) rm >>= 16; + else rm &= 0xFFFF; + if (cpu->CurInstr & (1<<6)) rs >>= 16; + else rs &= 0xFFFF; + + s64 res = (s64)(s16)rm * (s64)(s16)rs; + + s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); + res += rd; + + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + + cpu->Cycles += 1; +} + + + +void A_CLZ(ARM* cpu) +{ + // TODO: ARM9 only + + u32 val = cpu->R[cpu->CurInstr & 0xF]; + + u32 res = 0; + while ((val & 0xFF000000) == 0) + { + res += 8; + val <<= 8; + val |= 0xFF; + } + while ((val & 0x80000000) == 0) + { + res++; + val <<= 1; + val |= 0x1; + } + + cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; +} + +void A_QADD(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + u32 res = rm + rn; + if (OVERFLOW_ADD(rm, rn, res)) + { + res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; + cpu->CPSR |= 0x08000000; + } + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + +void A_QSUB(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + u32 res = rm - rn; + if (OVERFLOW_SUB(rm, rn, res)) + { + res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; + cpu->CPSR |= 0x08000000; + } + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + +void A_QDADD(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + if (rn & 0x40000000) + { + rn = (rn & 0x80000000) ? 0x80000000 : 0x7FFFFFFF; + cpu->CPSR |= 0x08000000; // CHECKME + } + else + rn <<= 1; + + u32 res = rm + rn; + if (OVERFLOW_ADD(rm, rn, res)) + { + res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; + cpu->CPSR |= 0x08000000; + } + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + +void A_QDSUB(ARM* cpu) +{ + // TODO: ARM9 only + + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + + if (rn & 0x40000000) + { + rn = (rn & 0x80000000) ? 0x80000000 : 0x7FFFFFFF; + cpu->CPSR |= 0x08000000; // CHECKME + } + else + rn <<= 1; + + u32 res = rm - rn; + if (OVERFLOW_SUB(rm, rn, res)) + { + res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; + cpu->CPSR |= 0x08000000; + } + + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; +} + + + +// ---- THUMB ---------------------------------- + + + +void T_LSL_IMM(ARM* cpu) +{ + u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 s = (cpu->CurInstr >> 6) & 0x1F; + LSL_IMM_S(op, s); + cpu->R[cpu->CurInstr & 0x7] = op; + cpu->SetNZ(op & 0x80000000, + !op); +} + +void T_LSR_IMM(ARM* cpu) +{ + u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 s = (cpu->CurInstr >> 6) & 0x1F; + LSR_IMM_S(op, s); + cpu->R[cpu->CurInstr & 0x7] = op; + cpu->SetNZ(op & 0x80000000, + !op); +} + +void T_ASR_IMM(ARM* cpu) +{ + u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 s = (cpu->CurInstr >> 6) & 0x1F; + ASR_IMM_S(op, s); + cpu->R[cpu->CurInstr & 0x7] = op; + cpu->SetNZ(op & 0x80000000, + !op); +} + +void T_ADD_REG_(ARM* cpu) +{ + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7]; + u32 res = a + b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_ADD(a, b), + OVERFLOW_ADD(a, b, res)); +} + +void T_SUB_REG_(ARM* cpu) +{ + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7]; + u32 res = a - b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_SUB(a, b), + OVERFLOW_SUB(a, b, res)); +} + +void T_ADD_IMM_(ARM* cpu) +{ + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 b = (cpu->CurInstr >> 6) & 0x7; + u32 res = a + b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_ADD(a, b), + OVERFLOW_ADD(a, b, res)); +} + +void T_SUB_IMM_(ARM* cpu) +{ + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 b = (cpu->CurInstr >> 6) & 0x7; + u32 res = a - b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_SUB(a, b), + OVERFLOW_SUB(a, b, res)); +} + +void T_MOV_IMM(ARM* cpu) +{ + u32 b = cpu->CurInstr & 0xFF; + cpu->R[(cpu->CurInstr >> 8) & 0x7] = b; + cpu->SetNZ(0, + !b); +} + +void T_CMP_IMM(ARM* cpu) +{ + u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + u32 b = cpu->CurInstr & 0xFF; + u32 res = a - b; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_SUB(a, b), + OVERFLOW_SUB(a, b, res)); +} + +void T_ADD_IMM(ARM* cpu) +{ + u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + u32 b = cpu->CurInstr & 0xFF; + u32 res = a + b; + cpu->R[(cpu->CurInstr >> 8) & 0x7] = res; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_ADD(a, b), + OVERFLOW_ADD(a, b, res)); +} + +void T_SUB_IMM(ARM* cpu) +{ + u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + u32 b = cpu->CurInstr & 0xFF; + u32 res = a - b; + cpu->R[(cpu->CurInstr >> 8) & 0x7] = res; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_SUB(a, b), + OVERFLOW_SUB(a, b, res)); +} + + +void T_AND_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res = a & b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZ(res & 0x80000000, + !res); +} + +void T_EOR_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res = a ^ b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZ(res & 0x80000000, + !res); +} + +void T_LSL_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + LSL_REG_S(a, b); + cpu->R[cpu->CurInstr & 0x7] = a; + cpu->SetNZ(a & 0x80000000, + !a); + cpu->Cycles += 1; +} + +void T_LSR_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + LSR_REG_S(a, b); + cpu->R[cpu->CurInstr & 0x7] = a; + cpu->SetNZ(a & 0x80000000, + !a); + cpu->Cycles += 1; +} + +void T_ASR_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + ASR_REG_S(a, b); + cpu->R[cpu->CurInstr & 0x7] = a; + cpu->SetNZ(a & 0x80000000, + !a); + cpu->Cycles += 1; +} + +void T_ADC_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res_tmp = a + b; + u32 carry = (cpu->CPSR&0x20000000 ? 1:0); + u32 res = res_tmp + carry; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_ADD(a, b) | CARRY_ADD(res_tmp, carry), + OVERFLOW_ADD(a, b, res_tmp) | OVERFLOW_ADD(res_tmp, carry, res)); +} + +void T_SBC_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res_tmp = a - b; + u32 carry = (cpu->CPSR&0x20000000 ? 0:1); + u32 res = res_tmp - carry; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_SUB(a, b) & CARRY_SUB(res_tmp, carry), + OVERFLOW_SUB(a, b, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res)); +} + +void T_ROR_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + ROR_REG_S(a, b); + cpu->R[cpu->CurInstr & 0x7] = a; + cpu->SetNZ(a & 0x80000000, + !a); + cpu->Cycles += 1; +} + +void T_TST_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res = a & b; + cpu->SetNZ(res & 0x80000000, + !res); +} + +void T_NEG_REG(ARM* cpu) +{ + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res = -b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_SUB(0, b), + OVERFLOW_SUB(0, b, res)); +} + +void T_CMP_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res = a - b; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_SUB(a, b), + OVERFLOW_SUB(a, b, res)); +} + +void T_CMN_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res = a + b; + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_ADD(a, b), + OVERFLOW_ADD(a, b, res)); +} + +void T_ORR_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res = a | b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZ(res & 0x80000000, + !res); +} + +void T_MUL_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res = a * b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZ(res & 0x80000000, + !res); + + s32 cycles = 0; + if (cpu->Num == 0) + { + cycles += 3; + } + else + { + cpu->SetC(0); // carry flag destroyed, they say. whatever that means... + if (a & 0xFF000000) cycles += 4; + else if (a & 0x00FF0000) cycles += 3; + else if (a & 0x0000FF00) cycles += 2; + else cycles += 1; + } + cpu->Cycles += cycles; +} + +void T_BIC_REG(ARM* cpu) +{ + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res = a & ~b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZ(res & 0x80000000, + !res); +} + +void T_MVN_REG(ARM* cpu) +{ + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 res = ~b; + cpu->R[cpu->CurInstr & 0x7] = res; + cpu->SetNZ(res & 0x80000000, + !res); +} + + +void T_ADD_HIREG(ARM* cpu) +{ + u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); + u32 rs = (cpu->CurInstr >> 3) & 0xF; + + u32 a = cpu->R[rd]; + u32 b = cpu->R[rs]; + + if (rd == 15) + { + cpu->JumpTo((a + b) | 1); + } + else + { + cpu->R[rd] = a + b; + } +} + +void T_CMP_HIREG(ARM* cpu) +{ + u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); + u32 rs = (cpu->CurInstr >> 3) & 0xF; + + u32 a = cpu->R[rd]; + u32 b = cpu->R[rs]; + u32 res = a - b; + + cpu->SetNZCV(res & 0x80000000, + !res, + CARRY_SUB(a, b), + OVERFLOW_SUB(a, b, res)); +} + +void T_MOV_HIREG(ARM* cpu) +{ + u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); + u32 rs = (cpu->CurInstr >> 3) & 0xF; + + if (rd == 15) + { + cpu->JumpTo(cpu->R[rs] | 1); + } + else + { + cpu->R[rd] = cpu->R[rs]; + } +} + + +void T_ADD_PCREL(ARM* cpu) +{ + u32 val = cpu->R[15] & ~2; + val += ((cpu->CurInstr & 0xFF) << 2); + cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; +} + +void T_ADD_SPREL(ARM* cpu) +{ + u32 val = cpu->R[13]; + val += ((cpu->CurInstr & 0xFF) << 2); + cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; +} + +void T_ADD_SP(ARM* cpu) +{ + u32 val = cpu->R[13]; + if (cpu->CurInstr & (1<<7)) + val -= ((cpu->CurInstr & 0x7F) << 2); + else + val += ((cpu->CurInstr & 0x7F) << 2); + cpu->R[13] = val; +} + + +} diff --git a/src/ARMInterpreter_ALU.h b/src/ARMInterpreter_ALU.h new file mode 100644 index 0000000..4cc3760 --- /dev/null +++ b/src/ARMInterpreter_ALU.h @@ -0,0 +1,135 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef ARMINTERPRETER_ALU_H +#define ARMINTERPRETER_ALU_H + +namespace ARMInterpreter +{ + +#define A_PROTO_ALU_OP(x) \ +\ +void A_##x##_IMM(ARM* cpu); \ +void A_##x##_REG_LSL_IMM(ARM* cpu); \ +void A_##x##_REG_LSR_IMM(ARM* cpu); \ +void A_##x##_REG_ASR_IMM(ARM* cpu); \ +void A_##x##_REG_ROR_IMM(ARM* cpu); \ +void A_##x##_REG_LSL_REG(ARM* cpu); \ +void A_##x##_REG_LSR_REG(ARM* cpu); \ +void A_##x##_REG_ASR_REG(ARM* cpu); \ +void A_##x##_REG_ROR_REG(ARM* cpu); \ +void A_##x##_IMM_S(ARM* cpu); \ +void A_##x##_REG_LSL_IMM_S(ARM* cpu); \ +void A_##x##_REG_LSR_IMM_S(ARM* cpu); \ +void A_##x##_REG_ASR_IMM_S(ARM* cpu); \ +void A_##x##_REG_ROR_IMM_S(ARM* cpu); \ +void A_##x##_REG_LSL_REG_S(ARM* cpu); \ +void A_##x##_REG_LSR_REG_S(ARM* cpu); \ +void A_##x##_REG_ASR_REG_S(ARM* cpu); \ +void A_##x##_REG_ROR_REG_S(ARM* cpu); + +#define A_PROTO_ALU_TEST(x) \ +\ +void A_##x##_IMM(ARM* cpu); \ +void A_##x##_REG_LSL_IMM(ARM* cpu); \ +void A_##x##_REG_LSR_IMM(ARM* cpu); \ +void A_##x##_REG_ASR_IMM(ARM* cpu); \ +void A_##x##_REG_ROR_IMM(ARM* cpu); \ +void A_##x##_REG_LSL_REG(ARM* cpu); \ +void A_##x##_REG_LSR_REG(ARM* cpu); \ +void A_##x##_REG_ASR_REG(ARM* cpu); \ +void A_##x##_REG_ROR_REG(ARM* cpu); + +A_PROTO_ALU_OP(AND) +A_PROTO_ALU_OP(EOR) +A_PROTO_ALU_OP(SUB) +A_PROTO_ALU_OP(RSB) +A_PROTO_ALU_OP(ADD) +A_PROTO_ALU_OP(ADC) +A_PROTO_ALU_OP(SBC) +A_PROTO_ALU_OP(RSC) +A_PROTO_ALU_TEST(TST) +A_PROTO_ALU_TEST(TEQ) +A_PROTO_ALU_TEST(CMP) +A_PROTO_ALU_TEST(CMN) +A_PROTO_ALU_OP(ORR) +A_PROTO_ALU_OP(MOV) +A_PROTO_ALU_OP(BIC) +A_PROTO_ALU_OP(MVN) + +void A_MUL(ARM* cpu); +void A_MLA(ARM* cpu); +void A_UMULL(ARM* cpu); +void A_UMLAL(ARM* cpu); +void A_SMULL(ARM* cpu); +void A_SMLAL(ARM* cpu); +void A_SMLAxy(ARM* cpu); +void A_SMLAWy(ARM* cpu); +void A_SMULxy(ARM* cpu); +void A_SMULWy(ARM* cpu); +void A_SMLALxy(ARM* cpu); + +void A_CLZ(ARM* cpu); +void A_QADD(ARM* cpu); +void A_QSUB(ARM* cpu); +void A_QDADD(ARM* cpu); +void A_QDSUB(ARM* cpu); + + +void T_LSL_IMM(ARM* cpu); +void T_LSR_IMM(ARM* cpu); +void T_ASR_IMM(ARM* cpu); + +void T_ADD_REG_(ARM* cpu); +void T_SUB_REG_(ARM* cpu); +void T_ADD_IMM_(ARM* cpu); +void T_SUB_IMM_(ARM* cpu); + +void T_MOV_IMM(ARM* cpu); +void T_CMP_IMM(ARM* cpu); +void T_ADD_IMM(ARM* cpu); +void T_SUB_IMM(ARM* cpu); + +void T_AND_REG(ARM* cpu); +void T_EOR_REG(ARM* cpu); +void T_LSL_REG(ARM* cpu); +void T_LSR_REG(ARM* cpu); +void T_ASR_REG(ARM* cpu); +void T_ADC_REG(ARM* cpu); +void T_SBC_REG(ARM* cpu); +void T_ROR_REG(ARM* cpu); +void T_TST_REG(ARM* cpu); +void T_NEG_REG(ARM* cpu); +void T_CMP_REG(ARM* cpu); +void T_CMN_REG(ARM* cpu); +void T_ORR_REG(ARM* cpu); +void T_MUL_REG(ARM* cpu); +void T_BIC_REG(ARM* cpu); +void T_MVN_REG(ARM* cpu); + +void T_ADD_HIREG(ARM* cpu); +void T_CMP_HIREG(ARM* cpu); +void T_MOV_HIREG(ARM* cpu); + +void T_ADD_PCREL(ARM* cpu); +void T_ADD_SPREL(ARM* cpu); +void T_ADD_SP(ARM* cpu); + +} + +#endif diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp new file mode 100644 index 0000000..88f316d --- /dev/null +++ b/src/ARMInterpreter_Branch.cpp @@ -0,0 +1,116 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include "ARM.h" + + +namespace ARMInterpreter +{ + + +void A_B(ARM* cpu) +{ + s32 offset = (s32)(cpu->CurInstr << 8) >> 6; + cpu->JumpTo(cpu->R[15] + offset); +} + +void A_BL(ARM* cpu) +{ + s32 offset = (s32)(cpu->CurInstr << 8) >> 6; + cpu->R[14] = cpu->R[15] - 4; + cpu->JumpTo(cpu->R[15] + offset); +} + +void A_BLX_IMM(ARM* cpu) +{ + s32 offset = (s32)(cpu->CurInstr << 8) >> 6; + if (cpu->CurInstr & 0x01000000) offset += 2; + cpu->R[14] = cpu->R[15] - 4; + cpu->JumpTo(cpu->R[15] + offset + 1); +} + +void A_BX(ARM* cpu) +{ + cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); +} + +void A_BLX_REG(ARM* cpu) +{ + u32 lr = cpu->R[15] - 4; + cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); + cpu->R[14] = lr; +} + + + +void T_BCOND(ARM* cpu) +{ + if (cpu->CheckCondition((cpu->CurInstr >> 8) & 0xF)) + { + s32 offset = (s32)(cpu->CurInstr << 24) >> 23; + cpu->JumpTo(cpu->R[15] + offset + 1); + } +} + +void T_BX(ARM* cpu) +{ + cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); +} + +void T_BLX_REG(ARM* cpu) +{ + if (cpu->Num==1) + { + printf("!! THUMB BLX_REG ON ARM7\n"); + return; + } + + u32 lr = cpu->R[15] - 1; + cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); + cpu->R[14] = lr; +} + +void T_B(ARM* cpu) +{ + s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 20; + cpu->JumpTo(cpu->R[15] + offset + 1); +} + +void T_BL_LONG_1(ARM* cpu) +{ + s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 9; + cpu->R[14] = cpu->R[15] + offset; +} + +void T_BL_LONG_2(ARM* cpu) +{ + s32 offset = (cpu->CurInstr & 0x7FF) << 1; + u32 pc = cpu->R[14] + offset; + cpu->R[14] = (cpu->R[15] - 2) | 1; + + if ((cpu->Num==1) || (cpu->CurInstr & (1<<12))) + pc |= 1; + + cpu->JumpTo(pc); +} + + + +} + diff --git a/src/ARMInterpreter_Branch.h b/src/ARMInterpreter_Branch.h new file mode 100644 index 0000000..202f490 --- /dev/null +++ b/src/ARMInterpreter_Branch.h @@ -0,0 +1,39 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef ARMINTERPRETER_BRANCH_H +#define ARMINTERPRETER_BRANCH_H + +namespace ARMInterpreter +{ + +void A_B(ARM* cpu); +void A_BL(ARM* cpu); +void A_BX(ARM* cpu); +void A_BLX_REG(ARM* cpu); + +void T_BCOND(ARM* cpu); +void T_BX(ARM* cpu); +void T_BLX_REG(ARM* cpu); +void T_B(ARM* cpu); +void T_BL_LONG_1(ARM* cpu); +void T_BL_LONG_2(ARM* cpu); + +} + +#endif diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp new file mode 100644 index 0000000..ccbee34 --- /dev/null +++ b/src/ARMInterpreter_LoadStore.cpp @@ -0,0 +1,729 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include "ARM.h" + + +namespace ARMInterpreter +{ + + +// copypasta from ALU. bad +#define LSL_IMM(x, s) \ + x <<= s; + +#define LSR_IMM(x, s) \ + if (s == 0) x = 0; \ + else x >>= s; + +#define ASR_IMM(x, s) \ + if (s == 0) x = ((s32)x) >> 31; \ + else x = ((s32)x) >> s; + +#define ROR_IMM(x, s) \ + if (s == 0) \ + { \ + x = (x >> 1) | ((cpu->CPSR & 0x20000000) << 2); \ + } \ + else \ + { \ + x = ROR(x, s); \ + } + + + +#define A_WB_CALC_OFFSET_IMM \ + u32 offset = (cpu->CurInstr & 0xFFF); \ + if (!(cpu->CurInstr & (1<<23))) offset = -offset; + +#define A_WB_CALC_OFFSET_REG(shiftop) \ + u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ + u32 shift = ((cpu->CurInstr>>7)&0x1F); \ + shiftop(offset, shift); \ + if (!(cpu->CurInstr & (1<<23))) offset = -offset; + + + +#define A_STR \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + +#define A_STR_POST \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + cpu->DataWrite32(addr, cpu->R[(cpu->CurInstr>>12) & 0xF], cpu->CurInstr & (1<<21)); \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; + +#define A_STRB \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + cpu->DataWrite8(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + +#define A_STRB_POST \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + cpu->DataWrite8(addr, cpu->R[(cpu->CurInstr>>12) & 0xF], cpu->CurInstr & (1<<21)); \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; + +#define A_LDR \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 val = cpu->DataRead32(offset); val = ROR(val, ((offset&0x3)<<3)); \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + cpu->Cycles += 1; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + if (cpu->Num==1) val &= ~0x1; \ + cpu->JumpTo(val); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + } + +#define A_LDR_POST \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 val = cpu->DataRead32(addr, cpu->CurInstr & (1<<21)); val = ROR(val, ((addr&0x3)<<3)); \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + cpu->Cycles += 1; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + { \ + if (cpu->Num==1) val &= ~0x1; \ + cpu->JumpTo(val); \ + } \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + } + +#define A_LDRB \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 val = cpu->DataRead8(offset); \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + cpu->Cycles += 1; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \ + +#define A_LDRB_POST \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 val = cpu->DataRead8(addr, cpu->CurInstr & (1<<21)); \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + cpu->Cycles += 1; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \ + + + +#define A_IMPLEMENT_WB_LDRSTR(x) \ +\ +void A_##x##_IMM(ARM* cpu) \ +{ \ + A_WB_CALC_OFFSET_IMM \ + A_##x \ +} \ +\ +void A_##x##_REG_LSL(ARM* cpu) \ +{ \ + A_WB_CALC_OFFSET_REG(LSL_IMM) \ + A_##x \ +} \ +\ +void A_##x##_REG_LSR(ARM* cpu) \ +{ \ + A_WB_CALC_OFFSET_REG(LSR_IMM) \ + A_##x \ +} \ +\ +void A_##x##_REG_ASR(ARM* cpu) \ +{ \ + A_WB_CALC_OFFSET_REG(ASR_IMM) \ + A_##x \ +} \ +\ +void A_##x##_REG_ROR(ARM* cpu) \ +{ \ + A_WB_CALC_OFFSET_REG(ROR_IMM) \ + A_##x \ +} \ +\ +void A_##x##_POST_IMM(ARM* cpu) \ +{ \ + A_WB_CALC_OFFSET_IMM \ + A_##x##_POST \ +} \ +\ +void A_##x##_POST_REG_LSL(ARM* cpu) \ +{ \ + A_WB_CALC_OFFSET_REG(LSL_IMM) \ + A_##x##_POST \ +} \ +\ +void A_##x##_POST_REG_LSR(ARM* cpu) \ +{ \ + A_WB_CALC_OFFSET_REG(LSR_IMM) \ + A_##x##_POST \ +} \ +\ +void A_##x##_POST_REG_ASR(ARM* cpu) \ +{ \ + A_WB_CALC_OFFSET_REG(ASR_IMM) \ + A_##x##_POST \ +} \ +\ +void A_##x##_POST_REG_ROR(ARM* cpu) \ +{ \ + A_WB_CALC_OFFSET_REG(ROR_IMM) \ + A_##x##_POST \ +} + +A_IMPLEMENT_WB_LDRSTR(STR) +A_IMPLEMENT_WB_LDRSTR(STRB) +A_IMPLEMENT_WB_LDRSTR(LDR) +A_IMPLEMENT_WB_LDRSTR(LDRB) + + + +#define A_HD_CALC_OFFSET_IMM \ + u32 offset = (cpu->CurInstr & 0xF) | ((cpu->CurInstr >> 4) & 0xF0); \ + if (!(cpu->CurInstr & (1<<23))) offset = -offset; + +#define A_HD_CALC_OFFSET_REG \ + u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ + if (!(cpu->CurInstr & (1<<23))) offset = -offset; + + + +#define A_STRH \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + cpu->DataWrite16(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + +#define A_STRH_POST \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + cpu->DataWrite16(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + +// TODO: CHECK LDRD/STRD TIMINGS!! also, ARM9-only + +#define A_LDRD \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + cpu->Cycles += 1; \ + u32 r = (cpu->CurInstr>>12) & 0xF; \ + cpu->R[r ] = cpu->DataRead32(offset ); \ + cpu->R[r+1] = cpu->DataRead32(offset+4); \ + +#define A_LDRD_POST \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + cpu->Cycles += 1; \ + u32 r = (cpu->CurInstr>>12) & 0xF; \ + cpu->R[r ] = cpu->DataRead32(addr ); \ + cpu->R[r+1] = cpu->DataRead32(addr+4); \ + +#define A_STRD \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + u32 r = (cpu->CurInstr>>12) & 0xF; \ + cpu->DataWrite32(offset , cpu->R[r ]); \ + cpu->DataWrite32(offset+4, cpu->R[r+1]); \ + +#define A_STRD_POST \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + u32 r = (cpu->CurInstr>>12) & 0xF; \ + cpu->DataWrite32(offset , cpu->R[r ]); \ + cpu->DataWrite32(offset+4, cpu->R[r+1]); \ + +#define A_LDRH \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = cpu->DataRead16(offset); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \ + +#define A_LDRH_POST \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = cpu->DataRead16(addr); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \ + +#define A_LDRSB \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->DataRead8(offset); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \ + +#define A_LDRSB_POST \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->DataRead8(addr); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \ + +#define A_LDRSH \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->DataRead16(offset); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \ + +#define A_LDRSH_POST \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->DataRead16(addr); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \ + + +#define A_IMPLEMENT_HD_LDRSTR(x) \ +\ +void A_##x##_IMM(ARM* cpu) \ +{ \ + A_HD_CALC_OFFSET_IMM \ + A_##x \ +} \ +\ +void A_##x##_REG(ARM* cpu) \ +{ \ + A_HD_CALC_OFFSET_REG \ + A_##x \ +} \ +void A_##x##_POST_IMM(ARM* cpu) \ +{ \ + A_HD_CALC_OFFSET_IMM \ + A_##x##_POST \ +} \ +\ +void A_##x##_POST_REG(ARM* cpu) \ +{ \ + A_HD_CALC_OFFSET_REG \ + A_##x##_POST \ +} + +A_IMPLEMENT_HD_LDRSTR(STRH) +A_IMPLEMENT_HD_LDRSTR(LDRD) +A_IMPLEMENT_HD_LDRSTR(STRD) +A_IMPLEMENT_HD_LDRSTR(LDRH) +A_IMPLEMENT_HD_LDRSTR(LDRSB) +A_IMPLEMENT_HD_LDRSTR(LDRSH) + + + +void A_SWP(ARM* cpu) +{ + u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + + u32 val = cpu->DataRead32(base); + cpu->R[(cpu->CurInstr >> 12) & 0xF] = ROR(val, 8*(base&0x3)); + + cpu->DataWrite32(base, rm); + + cpu->Cycles += 1; +} + +void A_SWPB(ARM* cpu) +{ + u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF; + + cpu->R[(cpu->CurInstr >> 12) & 0xF] = cpu->DataRead8(base); + + cpu->DataWrite8(base, rm); + + cpu->Cycles += 1; +} + + + +void A_LDM(ARM* cpu) +{ + u32 baseid = (cpu->CurInstr >> 16) & 0xF; + u32 base = cpu->R[baseid]; + u32 wbbase; + u32 preinc = (cpu->CurInstr & (1<<24)); + + if (!(cpu->CurInstr & (1<<23))) + { + for (int i = 0; i < 16; i++) + { + if (cpu->CurInstr & (1<<i)) + base -= 4; + } + + if (cpu->CurInstr & (1<<21)) + { + // pre writeback + wbbase = base; + } + + preinc = !preinc; + } + + cpu->Cycles += 1; + + if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) + cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10); + + for (int i = 0; i < 15; i++) + { + if (cpu->CurInstr & (1<<i)) + { + if (preinc) base += 4; + cpu->R[i] = cpu->DataRead32(base); + if (!preinc) base += 4; + } + } + + if (cpu->CurInstr & (1<<15)) + { + if (preinc) base += 4; + u32 pc = cpu->DataRead32(base); + if (!preinc) base += 4; + + if (cpu->Num == 1) + pc &= ~0x1; + + cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); + } + + if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) + cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR); + + if (cpu->CurInstr & (1<<21)) + { + // post writeback + if (cpu->CurInstr & (1<<23)) + wbbase = base; + + if (cpu->CurInstr & (1 << baseid)) + { + if (cpu->Num == 0) + { + u32 rlist = cpu->CurInstr & 0xFFFF; + if ((!(rlist & ~(1 << baseid))) || (rlist & ~((2 << baseid) - 1))) + cpu->R[baseid] = wbbase; + } + } + else + cpu->R[baseid] = wbbase; + } +} + +void A_STM(ARM* cpu) +{ + u32 baseid = (cpu->CurInstr >> 16) & 0xF; + u32 base = cpu->R[baseid]; + u32 oldbase = base; + u32 preinc = (cpu->CurInstr & (1<<24)); + + if (!(cpu->CurInstr & (1<<23))) + { + for (u32 i = 0; i < 16; i++) + { + if (cpu->CurInstr & (1<<i)) + base -= 4; + } + + if (cpu->CurInstr & (1<<21)) + cpu->R[baseid] = base; + + preinc = !preinc; + } + + bool isbanked = false; + if (cpu->CurInstr & (1<<22)) + { + u32 mode = (cpu->CPSR & 0x1F); + if (mode == 0x11) + isbanked = (baseid >= 8 && baseid < 15); + else if (mode != 0x10 && mode != 0x1F) + isbanked = (baseid >= 13 && baseid < 15); + + cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10); + } + + for (u32 i = 0; i < 16; i++) + { + if (cpu->CurInstr & (1<<i)) + { + if (preinc) base += 4; + + if (i == baseid && !isbanked) + { + if ((cpu->Num == 0) || (!(cpu->CurInstr & (i-1)))) + cpu->DataWrite32(base, oldbase); + else + cpu->DataWrite32(base, base); // checkme + } + else + cpu->DataWrite32(base, cpu->R[i]); + + if (!preinc) base += 4; + } + } + + if (cpu->CurInstr & (1<<22)) + cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR); + + if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) + cpu->R[baseid] = base; +} + + + + +// ---- THUMB ----------------------- + + + +void T_LDR_PCREL(ARM* cpu) +{ + u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); + cpu->R[(cpu->CurInstr >> 8) & 0x7] = cpu->DataRead32(addr); + + cpu->Cycles += 1; +} + + +void T_STR_REG(ARM* cpu) +{ + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7]); +} + +void T_STRB_REG(ARM* cpu) +{ + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7]); +} + +void T_LDR_REG(ARM* cpu) +{ + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + + u32 val = cpu->DataRead32(addr); + cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); + + cpu->Cycles += 1; +} + +void T_LDRB_REG(ARM* cpu) +{ + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead8(addr); + + cpu->Cycles += 1; +} + + +void T_STRH_REG(ARM* cpu) +{ + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7]); +} + +void T_LDRSB_REG(ARM* cpu) +{ + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->DataRead8(addr); + + cpu->Cycles += 1; +} + +void T_LDRH_REG(ARM* cpu) +{ + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead16(addr); + + cpu->Cycles += 1; +} + +void T_LDRSH_REG(ARM* cpu) +{ + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->DataRead16(addr); + + cpu->Cycles += 1; +} + + +void T_STR_IMM(ARM* cpu) +{ + u32 offset = (cpu->CurInstr >> 4) & 0x7C; + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + + cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]); +} + +void T_LDR_IMM(ARM* cpu) +{ + u32 offset = (cpu->CurInstr >> 4) & 0x7C; + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + + u32 val = cpu->DataRead32(offset); + cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); + cpu->Cycles += 1; +} + +void T_STRB_IMM(ARM* cpu) +{ + u32 offset = (cpu->CurInstr >> 6) & 0x1F; + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + + cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7]); +} + +void T_LDRB_IMM(ARM* cpu) +{ + u32 offset = (cpu->CurInstr >> 6) & 0x1F; + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + + cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead8(offset); + cpu->Cycles += 1; +} + + +void T_STRH_IMM(ARM* cpu) +{ + u32 offset = (cpu->CurInstr >> 5) & 0x3E; + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + + cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7]); +} + +void T_LDRH_IMM(ARM* cpu) +{ + u32 offset = (cpu->CurInstr >> 5) & 0x3E; + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + + cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead16(offset); + cpu->Cycles += 1; +} + + +void T_STR_SPREL(ARM* cpu) +{ + u32 offset = (cpu->CurInstr << 2) & 0x3FC; + offset += cpu->R[13]; + + cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7]); +} + +void T_LDR_SPREL(ARM* cpu) +{ + u32 offset = (cpu->CurInstr << 2) & 0x3FC; + offset += cpu->R[13]; + + cpu->R[(cpu->CurInstr >> 8) & 0x7] = cpu->DataRead32(offset); + cpu->Cycles += 1; +} + + +void T_PUSH(ARM* cpu) +{ + int nregs = 0; + + for (int i = 0; i < 8; i++) + { + if (cpu->CurInstr & (1<<i)) + nregs++; + } + + if (cpu->CurInstr & (1<<8)) + nregs++; + + u32 base = cpu->R[13]; + base -= (nregs<<2); + cpu->R[13] = base; + + for (int i = 0; i < 8; i++) + { + if (cpu->CurInstr & (1<<i)) + { + cpu->DataWrite32(base, cpu->R[i]); + base += 4; + } + } + + if (cpu->CurInstr & (1<<8)) + { + cpu->DataWrite32(base, cpu->R[14]); + } +} + +void T_POP(ARM* cpu) +{ + u32 base = cpu->R[13]; + + cpu->Cycles += 1; + + for (int i = 0; i < 8; i++) + { + if (cpu->CurInstr & (1<<i)) + { + cpu->R[i] = cpu->DataRead32(base); + base += 4; + } + } + + if (cpu->CurInstr & (1<<8)) + { + u32 pc = cpu->DataRead32(base); + if (cpu->Num==1) pc |= 0x1; + cpu->JumpTo(pc); + base += 4; + } + + cpu->R[13] = base; +} + +void T_STMIA(ARM* cpu) +{ + u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + + for (int i = 0; i < 8; i++) + { + if (cpu->CurInstr & (1<<i)) + { + cpu->DataWrite32(base, cpu->R[i]); + base += 4; + } + } + + // TODO: check "Rb included in Rlist" case + cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; +} + +void T_LDMIA(ARM* cpu) +{ + u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + + cpu->Cycles += 1; + + for (int i = 0; i < 8; i++) + { + if (cpu->CurInstr & (1<<i)) + { + cpu->R[i] = cpu->DataRead32(base); + base += 4; + } + } + + if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7)))) + cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; +} + + +} + diff --git a/src/ARMInterpreter_LoadStore.h b/src/ARMInterpreter_LoadStore.h new file mode 100644 index 0000000..4ea0e54 --- /dev/null +++ b/src/ARMInterpreter_LoadStore.h @@ -0,0 +1,95 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef ARMINTERPRETER_LOADSTORE_H +#define ARMINTERPRETER_LOADSTORE_H + +namespace ARMInterpreter +{ + +#define A_PROTO_WB_LDRSTR(x) \ +\ +void A_##x##_IMM(ARM* cpu); \ +void A_##x##_REG_LSL(ARM* cpu); \ +void A_##x##_REG_LSR(ARM* cpu); \ +void A_##x##_REG_ASR(ARM* cpu); \ +void A_##x##_REG_ROR(ARM* cpu); \ +void A_##x##_POST_IMM(ARM* cpu); \ +void A_##x##_POST_REG_LSL(ARM* cpu); \ +void A_##x##_POST_REG_LSR(ARM* cpu); \ +void A_##x##_POST_REG_ASR(ARM* cpu); \ +void A_##x##_POST_REG_ROR(ARM* cpu); + +A_PROTO_WB_LDRSTR(STR) +A_PROTO_WB_LDRSTR(STRB) +A_PROTO_WB_LDRSTR(LDR) +A_PROTO_WB_LDRSTR(LDRB) + +#define A_PROTO_HD_LDRSTR(x) \ +\ +void A_##x##_IMM(ARM* cpu); \ +void A_##x##_REG(ARM* cpu); \ +void A_##x##_POST_IMM(ARM* cpu); \ +void A_##x##_POST_REG(ARM* cpu); + +A_PROTO_HD_LDRSTR(STRH) +A_PROTO_HD_LDRSTR(LDRD) +A_PROTO_HD_LDRSTR(STRD) +A_PROTO_HD_LDRSTR(LDRH) +A_PROTO_HD_LDRSTR(LDRSB) +A_PROTO_HD_LDRSTR(LDRSH) + +void A_LDM(ARM* cpu); +void A_STM(ARM* cpu); + +void A_SWP(ARM* cpu); +void A_SWPB(ARM* cpu); + + +void T_LDR_PCREL(ARM* cpu); + +void T_STR_REG(ARM* cpu); +void T_STRB_REG(ARM* cpu); +void T_LDR_REG(ARM* cpu); +void T_LDRB_REG(ARM* cpu); + +void T_STRH_REG(ARM* cpu); +void T_LDRSB_REG(ARM* cpu); +void T_LDRH_REG(ARM* cpu); +void T_LDRSH_REG(ARM* cpu); + +void T_STR_IMM(ARM* cpu); +void T_LDR_IMM(ARM* cpu); +void T_STRB_IMM(ARM* cpu); +void T_LDRB_IMM(ARM* cpu); + +void T_STRH_IMM(ARM* cpu); +void T_LDRH_IMM(ARM* cpu); + +void T_STR_SPREL(ARM* cpu); +void T_LDR_SPREL(ARM* cpu); + +void T_PUSH(ARM* cpu); +void T_POP(ARM* cpu); +void T_STMIA(ARM* cpu); +void T_LDMIA(ARM* cpu); + +} + +#endif + diff --git a/src/ARM_InstrTable.h b/src/ARM_InstrTable.h new file mode 100644 index 0000000..830a2d0 --- /dev/null +++ b/src/ARM_InstrTable.h @@ -0,0 +1,1979 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +INSTRFUNC_PROTO(ARMInstrTable[4096]) = +{ + // 0000 0000 0000 + A_AND_REG_LSL_IMM, A_AND_REG_LSL_REG, A_AND_REG_LSR_IMM, A_AND_REG_LSR_REG, + A_AND_REG_ASR_IMM, A_AND_REG_ASR_REG, A_AND_REG_ROR_IMM, A_AND_REG_ROR_REG, + A_AND_REG_LSL_IMM, A_MUL, A_AND_REG_LSR_IMM, A_STRH_POST_REG, + A_AND_REG_ASR_IMM, A_LDRD_POST_REG, A_AND_REG_ROR_IMM, A_STRD_POST_REG, + + // 0000 0001 0000 + A_AND_REG_LSL_IMM_S, A_AND_REG_LSL_REG_S, A_AND_REG_LSR_IMM_S, A_AND_REG_LSR_REG_S, + A_AND_REG_ASR_IMM_S, A_AND_REG_ASR_REG_S, A_AND_REG_ROR_IMM_S, A_AND_REG_ROR_REG_S, + A_AND_REG_LSL_IMM_S, A_MUL, A_AND_REG_LSR_IMM_S, A_LDRH_POST_REG, + A_AND_REG_ASR_IMM_S, A_LDRSB_POST_REG, A_AND_REG_ROR_IMM_S, A_LDRSH_POST_REG, + + // 0000 0010 0000 + A_EOR_REG_LSL_IMM, A_EOR_REG_LSL_REG, A_EOR_REG_LSR_IMM, A_EOR_REG_LSR_REG, + A_EOR_REG_ASR_IMM, A_EOR_REG_ASR_REG, A_EOR_REG_ROR_IMM, A_EOR_REG_ROR_REG, + A_EOR_REG_LSL_IMM, A_MLA, A_EOR_REG_LSR_IMM, A_UNK, + A_EOR_REG_ASR_IMM, A_UNK, A_EOR_REG_ROR_IMM, A_UNK, + + // 0000 0011 0000 + A_EOR_REG_LSL_IMM_S, A_EOR_REG_LSL_REG_S, A_EOR_REG_LSR_IMM_S, A_EOR_REG_LSR_REG_S, + A_EOR_REG_ASR_IMM_S, A_EOR_REG_ASR_REG_S, A_EOR_REG_ROR_IMM_S, A_EOR_REG_ROR_REG_S, + A_EOR_REG_LSL_IMM_S, A_MLA, A_EOR_REG_ROR_IMM_S, A_UNK, + A_EOR_REG_ASR_IMM_S, A_UNK, A_EOR_REG_ROR_IMM_S, A_UNK, + + // 0000 0100 0000 + A_SUB_REG_LSL_IMM, A_SUB_REG_LSL_REG, A_SUB_REG_LSR_IMM, A_SUB_REG_LSR_REG, + A_SUB_REG_ASR_IMM, A_SUB_REG_ASR_REG, A_SUB_REG_ROR_IMM, A_SUB_REG_ROR_REG, + A_SUB_REG_LSL_IMM, A_UNK, A_SUB_REG_LSR_IMM, A_STRH_POST_IMM, + A_SUB_REG_ASR_IMM, A_LDRD_POST_IMM, A_SUB_REG_ROR_IMM, A_STRD_POST_IMM, + + // 0000 0101 0000 + A_SUB_REG_LSL_IMM_S, A_SUB_REG_LSL_REG_S, A_SUB_REG_LSR_IMM_S, A_SUB_REG_LSR_REG_S, + A_SUB_REG_ASR_IMM_S, A_SUB_REG_ASR_REG_S, A_SUB_REG_ROR_IMM_S, A_SUB_REG_ROR_REG_S, + A_SUB_REG_LSL_IMM_S, A_UNK, A_SUB_REG_LSR_IMM_S, A_LDRH_POST_IMM, + A_SUB_REG_ASR_IMM_S, A_LDRSB_POST_IMM, A_SUB_REG_ROR_IMM_S, A_LDRSH_POST_IMM, + + // 0000 0110 0000 + A_RSB_REG_LSL_IMM, A_RSB_REG_LSL_REG, A_RSB_REG_LSR_IMM, A_RSB_REG_LSR_REG, + A_RSB_REG_ASR_IMM, A_RSB_REG_ASR_REG, A_RSB_REG_ROR_IMM, A_RSB_REG_ROR_REG, + A_RSB_REG_LSL_IMM, A_UNK, A_RSB_REG_LSR_IMM, A_UNK, + A_RSB_REG_ASR_IMM, A_UNK, A_RSB_REG_ROR_IMM, A_UNK, + + // 0000 0111 0000 + A_RSB_REG_LSL_IMM_S, A_RSB_REG_LSL_REG_S, A_RSB_REG_LSR_IMM_S, A_RSB_REG_LSR_REG_S, + A_RSB_REG_ASR_IMM_S, A_RSB_REG_ASR_REG_S, A_RSB_REG_ROR_IMM_S, A_RSB_REG_ROR_REG_S, + A_RSB_REG_LSL_IMM_S, A_UNK, A_RSB_REG_LSR_IMM_S, A_UNK, + A_RSB_REG_ASR_IMM_S, A_UNK, A_RSB_REG_ROR_IMM_S, A_UNK, + + // 0000 1000 0000 + A_ADD_REG_LSL_IMM, A_ADD_REG_LSL_REG, A_ADD_REG_LSR_IMM, A_ADD_REG_LSR_REG, + A_ADD_REG_ASR_IMM, A_ADD_REG_ASR_REG, A_ADD_REG_ROR_IMM, A_ADD_REG_ROR_REG, + A_ADD_REG_LSL_IMM, A_UMULL, A_ADD_REG_LSR_IMM, A_STRH_POST_REG, + A_ADD_REG_ASR_IMM, A_LDRD_POST_REG, A_ADD_REG_ROR_IMM, A_STRD_POST_REG, + + // 0000 1001 0000 + A_ADD_REG_LSL_IMM_S, A_ADD_REG_LSL_REG_S, A_ADD_REG_LSR_IMM_S, A_ADD_REG_LSR_REG_S, + A_ADD_REG_ASR_IMM_S, A_ADD_REG_ASR_REG_S, A_ADD_REG_ROR_IMM_S, A_ADD_REG_ROR_REG_S, + A_ADD_REG_LSL_IMM_S, A_UMULL, A_ADD_REG_LSR_IMM_S, A_LDRH_POST_REG, + A_ADD_REG_ASR_IMM_S, A_LDRSB_POST_REG, A_ADD_REG_ROR_IMM_S, A_LDRSH_POST_REG, + + // 0000 1010 0000 + A_ADC_REG_LSL_IMM, A_ADC_REG_LSL_REG, A_ADC_REG_LSR_IMM, A_ADC_REG_LSR_REG, + A_ADC_REG_ASR_IMM, A_ADC_REG_ASR_REG, A_ADC_REG_ROR_IMM, A_ADC_REG_ROR_REG, + A_ADC_REG_LSL_IMM, A_UMLAL, A_ADC_REG_LSR_IMM, A_UNK, + A_ADC_REG_ASR_IMM, A_UNK, A_ADC_REG_ROR_IMM, A_UNK, + + // 0000 1011 0000 + A_ADC_REG_LSL_IMM_S, A_ADC_REG_LSL_REG_S, A_ADC_REG_LSR_IMM_S, A_ADC_REG_LSR_REG_S, + A_ADC_REG_ASR_IMM_S, A_ADC_REG_ASR_REG_S, A_ADC_REG_ROR_IMM_S, A_ADC_REG_ROR_REG_S, + A_ADC_REG_LSL_IMM_S, A_UMLAL, A_ADC_REG_LSR_IMM_S, A_UNK, + A_ADC_REG_ASR_IMM_S, A_UNK, A_ADC_REG_ROR_IMM_S, A_UNK, + + // 0000 1100 0000 + A_SBC_REG_LSL_IMM, A_SBC_REG_LSL_REG, A_SBC_REG_LSR_IMM, A_SBC_REG_LSR_REG, + A_SBC_REG_ASR_IMM, A_SBC_REG_ASR_REG, A_SBC_REG_ROR_IMM, A_SBC_REG_ROR_REG, + A_SBC_REG_LSL_IMM, A_SMULL, A_SBC_REG_LSR_IMM, A_STRH_POST_IMM, + A_SBC_REG_ASR_IMM, A_LDRD_POST_IMM, A_SBC_REG_ROR_IMM, A_STRD_POST_IMM, + + // 0000 1101 0000 + A_SBC_REG_LSL_IMM_S, A_SBC_REG_LSL_REG_S, A_SBC_REG_LSR_IMM_S, A_SBC_REG_LSR_REG_S, + A_SBC_REG_ASR_IMM_S, A_SBC_REG_ASR_REG_S, A_SBC_REG_ROR_IMM_S, A_SBC_REG_ROR_REG_S, + A_SBC_REG_LSL_IMM_S, A_SMULL, A_SBC_REG_LSR_IMM_S, A_LDRH_POST_IMM, + A_SBC_REG_ASR_IMM_S, A_LDRSB_POST_IMM, A_SBC_REG_ROR_IMM_S, A_LDRSH_POST_IMM, + + // 0000 1110 0000 + A_RSC_REG_LSL_IMM, A_RSC_REG_LSL_REG, A_RSC_REG_LSR_IMM, A_RSC_REG_LSR_REG, + A_RSC_REG_ASR_IMM, A_RSC_REG_ASR_REG, A_RSC_REG_ROR_IMM, A_RSC_REG_ROR_REG, + A_RSC_REG_LSL_IMM, A_SMLAL, A_RSC_REG_LSR_IMM, A_UNK, + A_RSC_REG_ASR_IMM, A_UNK, A_RSC_REG_ROR_IMM, A_UNK, + + // 0000 1111 0000 + A_RSC_REG_LSL_IMM_S, A_RSC_REG_LSL_REG_S, A_RSC_REG_LSR_IMM_S, A_RSC_REG_LSR_REG_S, + A_RSC_REG_ASR_IMM_S, A_RSC_REG_ASR_REG_S, A_RSC_REG_ROR_IMM_S, A_RSC_REG_ROR_REG_S, + A_RSC_REG_LSL_IMM_S, A_SMLAL, A_RSC_REG_LSR_IMM_S, A_UNK, + A_RSC_REG_ASR_IMM_S, A_UNK, A_RSC_REG_ROR_IMM_S, A_UNK, + + + + // 0001 0000 0000 + A_MRS, A_UNK, A_UNK, A_UNK, + A_UNK, A_QADD, A_UNK, A_UNK, + A_SMLAxy, A_SWP, A_SMLAxy, A_STRH_REG, + A_SMLAxy, A_LDRD_REG, A_SMLAxy, A_STRD_REG, + + // 0001 0001 0000 + A_TST_REG_LSL_IMM, A_TST_REG_LSL_REG, A_TST_REG_LSR_IMM, A_TST_REG_LSR_REG, + A_TST_REG_ASR_IMM, A_TST_REG_ASR_REG, A_TST_REG_ROR_IMM, A_TST_REG_ROR_REG, + A_TST_REG_LSL_IMM, A_UNK, A_TST_REG_LSR_IMM, A_LDRH_REG, + A_TST_REG_ASR_IMM, A_LDRSB_REG, A_TST_REG_ROR_IMM, A_LDRSH_REG, + + // 0001 0010 0000 + A_MSR_REG, A_BX, A_UNK, A_BLX_REG, + A_UNK, A_QSUB, A_UNK, A_UNK, + A_SMLAWy, A_UNK, A_SMULWy, A_STRH_REG, + A_SMLAWy, A_LDRD_REG, A_SMULWy, A_STRD_REG, + + // 0001 0011 0000 + A_TEQ_REG_LSL_IMM, A_TEQ_REG_LSL_REG, A_TEQ_REG_LSR_IMM, A_TEQ_REG_LSR_REG, + A_TEQ_REG_ASR_IMM, A_TEQ_REG_ASR_REG, A_TEQ_REG_ROR_IMM, A_TEQ_REG_ROR_REG, + A_TEQ_REG_LSL_IMM, A_UNK, A_TEQ_REG_LSR_IMM, A_LDRH_REG, + A_TEQ_REG_ASR_IMM, A_LDRSB_REG, A_TEQ_REG_ROR_IMM, A_LDRSH_REG, + + // 0001 0100 0000 + A_MRS, A_UNK, A_UNK, A_UNK, + A_UNK, A_QDADD, A_UNK, A_UNK, + A_SMLALxy, A_SWPB, A_SMLALxy, A_STRH_IMM, + A_SMLALxy, A_LDRD_IMM, A_SMLALxy, A_STRD_IMM, + + // 0001 0101 0000 + A_CMP_REG_LSL_IMM, A_CMP_REG_LSL_REG, A_CMP_REG_LSR_IMM, A_CMP_REG_LSR_REG, + A_CMP_REG_ASR_IMM, A_CMP_REG_ASR_REG, A_CMP_REG_ROR_IMM, A_CMP_REG_ROR_REG, + A_CMP_REG_LSL_IMM, A_UNK, A_CMP_REG_LSR_IMM, A_LDRH_IMM, + A_CMP_REG_ASR_IMM, A_LDRSB_IMM, A_CMP_REG_ROR_IMM, A_LDRSH_IMM, + + // 0001 0110 0000 + A_MSR_REG, A_CLZ, A_UNK, A_UNK, + A_UNK, A_QDSUB, A_UNK, A_UNK, + A_SMULxy, A_UNK, A_SMULxy, A_STRH_IMM, + A_SMULxy, A_LDRD_IMM, A_SMULxy, A_STRD_IMM, + + // 0001 0111 0000 + A_CMN_REG_LSL_IMM, A_CMN_REG_LSL_REG, A_CMN_REG_LSR_IMM, A_CMN_REG_LSR_REG, + A_CMN_REG_ASR_IMM, A_CMN_REG_ASR_REG, A_CMN_REG_ROR_IMM, A_CMN_REG_ROR_REG, + A_CMN_REG_LSL_IMM, A_UNK, A_CMN_REG_LSR_IMM, A_LDRH_IMM, + A_CMN_REG_ASR_IMM, A_LDRSB_IMM, A_CMN_REG_ROR_IMM, A_LDRSH_IMM, + + // 0001 1000 0000 + A_ORR_REG_LSL_IMM, A_ORR_REG_LSL_REG, A_ORR_REG_LSR_IMM, A_ORR_REG_LSR_REG, + A_ORR_REG_ASR_IMM, A_ORR_REG_ASR_REG, A_ORR_REG_ROR_IMM, A_ORR_REG_ROR_REG, + A_ORR_REG_LSL_IMM, A_UNK, A_ORR_REG_LSR_IMM, A_STRH_REG, + A_ORR_REG_ASR_IMM, A_LDRD_REG, A_ORR_REG_ROR_IMM, A_STRD_REG, + + // 0001 1001 0000 + A_ORR_REG_LSL_IMM_S, A_ORR_REG_LSL_REG_S, A_ORR_REG_LSR_IMM_S, A_ORR_REG_LSR_REG_S, + A_ORR_REG_ASR_IMM_S, A_ORR_REG_ASR_REG_S, A_ORR_REG_ROR_IMM_S, A_ORR_REG_ROR_REG_S, + A_ORR_REG_LSL_IMM_S, A_UNK, A_ORR_REG_LSR_IMM_S, A_LDRH_REG, + A_ORR_REG_ASR_IMM_S, A_LDRSB_REG, A_ORR_REG_ROR_IMM_S, A_LDRSH_REG, + + // 0001 1010 0000 + A_MOV_REG_LSL_IMM, A_MOV_REG_LSL_REG, A_MOV_REG_LSR_IMM, A_MOV_REG_LSR_REG, + A_MOV_REG_ASR_IMM, A_MOV_REG_ASR_REG, A_MOV_REG_ROR_IMM, A_MOV_REG_ROR_REG, + A_MOV_REG_LSL_IMM, A_UNK, A_MOV_REG_LSR_IMM, A_STRH_REG, + A_MOV_REG_ASR_IMM, A_LDRD_REG, A_MOV_REG_ROR_IMM, A_STRD_REG, + + // 0001 1011 0000 + A_MOV_REG_LSL_IMM_S, A_MOV_REG_LSL_REG_S, A_MOV_REG_LSR_IMM_S, A_MOV_REG_LSR_REG_S, + A_MOV_REG_ASR_IMM_S, A_MOV_REG_ASR_REG_S, A_MOV_REG_ROR_IMM_S, A_MOV_REG_ROR_REG_S, + A_MOV_REG_LSL_IMM_S, A_UNK, A_MOV_REG_LSR_IMM_S, A_LDRH_REG, + A_MOV_REG_ASR_IMM_S, A_LDRSB_REG, A_MOV_REG_ROR_IMM_S, A_LDRSH_REG, + + // 0001 1100 0000 + A_BIC_REG_LSL_IMM, A_BIC_REG_LSL_REG, A_BIC_REG_LSR_IMM, A_BIC_REG_LSR_REG, + A_BIC_REG_ASR_IMM, A_BIC_REG_ASR_REG, A_BIC_REG_ROR_IMM, A_BIC_REG_ROR_REG, + A_BIC_REG_LSL_IMM, A_UNK, A_BIC_REG_LSR_IMM, A_STRH_IMM, + A_BIC_REG_ASR_IMM, A_LDRD_IMM, A_BIC_REG_ROR_IMM, A_STRD_IMM, + + // 0001 1101 0000 + A_BIC_REG_LSL_IMM_S, A_BIC_REG_LSL_REG_S, A_BIC_REG_LSR_IMM_S, A_BIC_REG_LSR_REG_S, + A_BIC_REG_ASR_IMM_S, A_BIC_REG_ASR_REG_S, A_BIC_REG_ROR_IMM_S, A_BIC_REG_ROR_REG_S, + A_BIC_REG_LSL_IMM_S, A_UNK, A_BIC_REG_LSR_IMM_S, A_LDRH_IMM, + A_BIC_REG_ASR_IMM_S, A_LDRSB_IMM, A_BIC_REG_ROR_IMM_S, A_LDRSH_IMM, + + // 0001 1110 0000 + A_MVN_REG_LSL_IMM, A_MVN_REG_LSL_REG, A_MVN_REG_LSR_IMM, A_MVN_REG_LSR_REG, + A_MVN_REG_ASR_IMM, A_MVN_REG_ASR_REG, A_MVN_REG_ROR_IMM, A_MVN_REG_ROR_REG, + A_MVN_REG_LSL_IMM, A_UNK, A_MVN_REG_LSR_IMM, A_STRH_IMM, + A_MVN_REG_ASR_IMM, A_LDRD_IMM, A_MVN_REG_ROR_IMM, A_STRD_IMM, + + // 0001 1111 0000 + A_MVN_REG_LSL_IMM_S, A_MVN_REG_LSL_REG_S, A_MVN_REG_LSR_IMM_S, A_MVN_REG_LSR_REG_S, + A_MVN_REG_ASR_IMM_S, A_MVN_REG_ASR_REG_S, A_MVN_REG_ROR_IMM_S, A_MVN_REG_ROR_REG_S, + A_MVN_REG_LSL_IMM_S, A_UNK, A_MVN_REG_LSR_IMM_S, A_LDRH_IMM, + A_MVN_REG_ASR_IMM_S, A_LDRSB_IMM, A_MVN_REG_ROR_IMM_S, A_LDRSH_IMM, + + + + // 0010 0000 0000 + A_AND_IMM, A_AND_IMM, A_AND_IMM, A_AND_IMM, + A_AND_IMM, A_AND_IMM, A_AND_IMM, A_AND_IMM, + A_AND_IMM, A_AND_IMM, A_AND_IMM, A_AND_IMM, + A_AND_IMM, A_AND_IMM, A_AND_IMM, A_AND_IMM, + + // 0010 0001 0000 + A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, + A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, + A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, + A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, A_AND_IMM_S, + + // 0010 0010 0000 + A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, + A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, + A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, + A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, A_EOR_IMM, + + // 0010 0011 0000 + A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, + A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, + A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, + A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, A_EOR_IMM_S, + + // 0010 0100 0000 + A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, + A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, + A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, + A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, A_SUB_IMM, + + // 0010 0101 0000 + A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, + A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, + A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, + A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, A_SUB_IMM_S, + + // 0010 0110 0000 + A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, + A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, + A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, + A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, A_RSB_IMM, + + // 0010 0111 0000 + A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, + A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, + A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, + A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, A_RSB_IMM_S, + + // 0010 1000 0000 + A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, + A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, + A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, + A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, A_ADD_IMM, + + // 0010 1001 0000 + A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, + A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, + A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, + A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, A_ADD_IMM_S, + + // 0010 1010 0000 + A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, + A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, + A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, + A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, A_ADC_IMM, + + // 0010 1011 0000 + A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, + A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, + A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, + A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, A_ADC_IMM_S, + + // 0010 1100 0000 + A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, + A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, + A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, + A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, A_SBC_IMM, + + // 0010 1101 0000 + A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, + A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, + A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, + A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, A_SBC_IMM_S, + + // 0010 1110 0000 + A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, + A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, + A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, + A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, A_RSC_IMM, + + // 0010 1111 0000 + A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, + A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, + A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, + A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, A_RSC_IMM_S, + + + + // 0011 0000 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 0011 0001 0000 + A_TST_IMM, A_TST_IMM, A_TST_IMM, A_TST_IMM, + A_TST_IMM, A_TST_IMM, A_TST_IMM, A_TST_IMM, + A_TST_IMM, A_TST_IMM, A_TST_IMM, A_TST_IMM, + A_TST_IMM, A_TST_IMM, A_TST_IMM, A_TST_IMM, + + // 0011 0010 0000 + A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, + A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, + A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, + A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, + + // 0011 0011 0000 + A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, + A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, + A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, + A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, A_TEQ_IMM, + + // 0011 0100 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 0011 0101 0000 + A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, + A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, + A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, + A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, A_CMP_IMM, + + // 0011 0110 0000 + A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, + A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, + A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, + A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, A_MSR_IMM, + + // 0011 0111 0000 + A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, + A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, + A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, + A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, A_CMN_IMM, + + // 0011 1000 0000 + A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, + A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, + A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, + A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, A_ORR_IMM, + + // 0011 1001 0000 + A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, + A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, + A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, + A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, A_ORR_IMM_S, + + // 0011 1010 0000 + A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, + A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, + A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, + A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, A_MOV_IMM, + + // 0011 1011 0000 + A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, + A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, + A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, + A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, A_MOV_IMM_S, + + // 0011 1100 0000 + A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, + A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, + A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, + A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, A_BIC_IMM, + + // 0011 1101 0000 + A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, + A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, + A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, + A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, A_BIC_IMM_S, + + // 0011 1110 0000 + A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, + A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, + A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, + A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, A_MVN_IMM, + + // 0011 1111 0000 + A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, + A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, + A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, + A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, A_MVN_IMM_S, + + + + // 0100 0000 0000 + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + + // 0100 0001 0000 + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + + // 0100 0010 0000 + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + + // 0100 0011 0000 + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + + // 0100 0100 0000 + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + + // 0100 0101 0000 + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + + // 0100 0110 0000 + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + + // 0100 0111 0000 + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + + // 0100 1000 0000 + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + + // 0100 1001 0000 + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + + // 0100 1010 0000 + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, A_STR_POST_IMM, + + // 0100 1011 0000 + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, A_LDR_POST_IMM, + + // 0100 1100 0000 + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + + // 0100 1101 0000 + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + + // 0100 1110 0000 + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, A_STRB_POST_IMM, + + // 0100 1111 0000 + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, A_LDRB_POST_IMM, + + + + // 0101 0000 0000 + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + + // 0101 0001 0000 + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + + // 0101 0010 0000 + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + + // 0101 0011 0000 + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + + // 0101 0100 0000 + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + + // 0101 0101 0000 + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + + // 0101 0110 0000 + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + + // 0101 0111 0000 + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + + // 0101 1000 0000 + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + + // 0101 1001 0000 + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + + // 0101 1010 0000 + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + A_STR_IMM, A_STR_IMM, A_STR_IMM, A_STR_IMM, + + // 0101 1011 0000 + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, A_LDR_IMM, + + // 0101 1100 0000 + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + + // 0101 1101 0000 + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + + // 0101 1110 0000 + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, A_STRB_IMM, + + // 0101 1111 0000 + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, A_LDRB_IMM, + + + + // 0110 0000 0000 + A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK, + A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK, + A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK, + A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK, + + // 0110 0001 0000 + A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK, + A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK, + A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK, + A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK, + + // 0110 0010 0000 + A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK, + A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK, + A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK, + A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK, + + // 0110 0011 0000 + A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK, + A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK, + A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK, + A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK, + + // 0110 0100 0000 + A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK, + A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK, + A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK, + A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK, + + // 0110 0101 0000 + A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK, + A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK, + A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK, + A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK, + + // 0110 0110 0000 + A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK, + A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK, + A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK, + A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK, + + // 0110 0111 0000 + A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK, + A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK, + A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK, + A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK, + + // 0110 1000 0000 + A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK, + A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK, + A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK, + A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK, + + // 0110 1001 0000 + A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK, + A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK, + A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK, + A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK, + + // 0110 1010 0000 + A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK, + A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK, + A_STR_POST_REG_LSL, A_UNK, A_STR_POST_REG_LSR, A_UNK, + A_STR_POST_REG_ASR, A_UNK, A_STR_POST_REG_ROR, A_UNK, + + // 0110 1011 0000 + A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK, + A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK, + A_LDR_POST_REG_LSL, A_UNK, A_LDR_POST_REG_LSR, A_UNK, + A_LDR_POST_REG_ASR, A_UNK, A_LDR_POST_REG_ROR, A_UNK, + + // 0110 1100 0000 + A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK, + A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK, + A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK, + A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK, + + // 0110 1101 0000 + A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK, + A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK, + A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK, + A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK, + + // 0110 1110 0000 + A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK, + A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK, + A_STRB_POST_REG_LSL, A_UNK, A_STRB_POST_REG_LSR, A_UNK, + A_STRB_POST_REG_ASR, A_UNK, A_STRB_POST_REG_ROR, A_UNK, + + // 0110 1111 0000 + A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK, + A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK, + A_LDRB_POST_REG_LSL, A_UNK, A_LDRB_POST_REG_LSR, A_UNK, + A_LDRB_POST_REG_ASR, A_UNK, A_LDRB_POST_REG_ROR, A_UNK, + + + + // 0111 0000 0000 + A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK, + A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK, + A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK, + A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK, + + // 0111 0001 0000 + A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK, + A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK, + A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK, + A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK, + + // 0111 0010 0000 + A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK, + A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK, + A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK, + A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK, + + // 0111 0011 0000 + A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK, + A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK, + A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK, + A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK, + + // 0111 0100 0000 + A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK, + A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK, + A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK, + A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK, + + // 0111 0101 0000 + A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK, + A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK, + A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK, + A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK, + + // 0111 0110 0000 + A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK, + A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK, + A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK, + A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK, + + // 0111 0111 0000 + A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK, + A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK, + A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK, + A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK, + + // 0111 1000 0000 + A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK, + A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK, + A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK, + A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK, + + // 0111 1001 0000 + A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK, + A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK, + A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK, + A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK, + + // 0111 1010 0000 + A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK, + A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK, + A_STR_REG_LSL, A_UNK, A_STR_REG_LSR, A_UNK, + A_STR_REG_ASR, A_UNK, A_STR_REG_ROR, A_UNK, + + // 0111 1011 0000 + A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK, + A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK, + A_LDR_REG_LSL, A_UNK, A_LDR_REG_LSR, A_UNK, + A_LDR_REG_ASR, A_UNK, A_LDR_REG_ROR, A_UNK, + + // 0111 1100 0000 + A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK, + A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK, + A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK, + A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK, + + // 0111 1101 0000 + A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK, + A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK, + A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK, + A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK, + + // 0111 1110 0000 + A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK, + A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK, + A_STRB_REG_LSL, A_UNK, A_STRB_REG_LSR, A_UNK, + A_STRB_REG_ASR, A_UNK, A_STRB_REG_ROR, A_UNK, + + // 0111 1111 0000 + A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK, + A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK, + A_LDRB_REG_LSL, A_UNK, A_LDRB_REG_LSR, A_UNK, + A_LDRB_REG_ASR, A_UNK, A_LDRB_REG_ROR, A_UNK, + + + + // 1000 0000 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1000 0001 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1000 0010 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1000 0011 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1000 0100 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1000 0101 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1000 0110 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1000 0111 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1000 1000 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1000 1001 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1000 1010 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1000 1011 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1000 1100 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1000 1101 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1000 1110 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1000 1111 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + + + // 1001 0000 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1001 0001 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1001 0010 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1001 0011 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1001 0100 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1001 0101 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1001 0110 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1001 0111 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1001 1000 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1001 1001 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1001 1010 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1001 1011 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1001 1100 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1001 1101 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + // 1001 1110 0000 + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + A_STM, A_STM, A_STM, A_STM, + + // 1001 1111 0000 + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + A_LDM, A_LDM, A_LDM, A_LDM, + + + + // 1010 0000 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 0001 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 0010 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 0011 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 0100 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 0101 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 0110 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 0111 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 1000 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 1001 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 1010 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 1011 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 1100 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 1101 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 1110 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + // 1010 1111 0000 + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + A_B, A_B, A_B, A_B, + + + + // 1011 0000 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 0001 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 0010 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 0011 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 0100 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 0101 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 0110 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 0111 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 1000 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 1001 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 1010 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 1011 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 1100 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 1101 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 1110 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + // 1011 1111 0000 + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + A_BL, A_BL, A_BL, A_BL, + + + + // 1100 0000 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 0001 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 0010 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 0011 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 0100 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 0101 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 0110 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 0111 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 1000 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 1001 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 1010 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 1011 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 1100 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 1101 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 1110 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1100 1111 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + + + // 1101 0000 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 0001 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 0010 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 0011 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 0100 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 0101 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 0110 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 0111 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 1000 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 1001 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 1010 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 1011 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 1100 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 1101 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 1110 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + // 1101 1111 0000 + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + A_UNK, A_UNK, A_UNK, A_UNK, + + + + // 1110 0000 0000 + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + + // 1110 0001 0000 + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + + // 1110 0010 0000 + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + + // 1110 0011 0000 + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + + // 1110 0100 0000 + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + + // 1110 0101 0000 + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + + // 1110 0110 0000 + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + + // 1110 0111 0000 + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + + // 1110 1000 0000 + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + + // 1110 1001 0000 + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + + // 1110 1010 0000 + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + + // 1110 1011 0000 + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + + // 1110 1100 0000 + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + + // 1110 1101 0000 + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + + // 1110 1110 0000 + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + A_UNK, A_MCR, A_UNK, A_MCR, + + // 1110 1111 0000 + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + A_UNK, A_MRC, A_UNK, A_MRC, + + + + // 1111 0000 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 0001 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 0010 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 0011 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 0100 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 0101 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 0110 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 0111 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 1000 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 1001 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 1010 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 1011 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 1100 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 1101 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 1110 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + + // 1111 1111 0000 + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC, + A_SVC, A_SVC, A_SVC, A_SVC +}; + +INSTRFUNC_PROTO(THUMBInstrTable[1024]) = +{ + // 0000 0000 00 + T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, + T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, + T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, + T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, + + // 0000 0100 00 + T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, + T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, + T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, + T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, T_LSL_IMM, + + // 0000 1000 00 + T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, + T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, + T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, + T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, + + // 0000 1100 00 + T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, + T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, + T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, + T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, T_LSR_IMM, + + // 0001 0000 00 + T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, + T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, + T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, + T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, + + // 0001 0100 00 + T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, + T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, + T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, + T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, T_ASR_IMM, + + // 0001 1000 00 + T_ADD_REG_, T_ADD_REG_, T_ADD_REG_, T_ADD_REG_, + T_ADD_REG_, T_ADD_REG_, T_ADD_REG_, T_ADD_REG_, + T_SUB_REG_, T_SUB_REG_, T_SUB_REG_, T_SUB_REG_, + T_SUB_REG_, T_SUB_REG_, T_SUB_REG_, T_SUB_REG_, + + // 0001 1100 00 + T_ADD_IMM_, T_ADD_IMM_, T_ADD_IMM_, T_ADD_IMM_, + T_ADD_IMM_, T_ADD_IMM_, T_ADD_IMM_, T_ADD_IMM_, + T_SUB_IMM_, T_SUB_IMM_, T_SUB_IMM_, T_SUB_IMM_, + T_SUB_IMM_, T_SUB_IMM_, T_SUB_IMM_, T_SUB_IMM_, + + // 0010 0000 00 + T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, + T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, + T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, + T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, + + // 0010 0100 00 + T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, + T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, + T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, + T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, T_MOV_IMM, + + // 0010 1000 00 + T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, + T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, + T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, + T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, + + // 0010 1100 00 + T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, + T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, + T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, + T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, T_CMP_IMM, + + // 0011 0000 00 + T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, + T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, + T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, + T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, + + // 0011 0100 00 + T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, + T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, + T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, + T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, T_ADD_IMM, + + // 0011 1000 00 + T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, + T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, + T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, + T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, + + // 0011 1100 00 + T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, + T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, + T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, + T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, T_SUB_IMM, + + + + // 0100 0000 00 + T_AND_REG, T_EOR_REG, T_LSL_REG, T_LSR_REG, + T_ASR_REG, T_ADC_REG, T_SBC_REG, T_ROR_REG, + T_TST_REG, T_NEG_REG, T_CMP_REG, T_CMN_REG, + T_ORR_REG, T_MUL_REG, T_BIC_REG, T_MVN_REG, + + // 0100 0100 00 + T_UNK, T_ADD_HIREG, T_ADD_HIREG, T_ADD_HIREG, + T_UNK, T_CMP_HIREG, T_CMP_HIREG, T_CMP_HIREG, + T_UNK, T_MOV_HIREG, T_MOV_HIREG, T_MOV_HIREG, + T_BX, T_BX, T_BLX_REG, T_BLX_REG, + + // 0100 1000 00 + T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, + T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, + T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, + T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, + + // 0100 1100 00 + T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, + T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, + T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, + T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, T_LDR_PCREL, + + // 0101 0000 00 + T_STR_REG, T_STR_REG, T_STR_REG, T_STR_REG, + T_STR_REG, T_STR_REG, T_STR_REG, T_STR_REG, + T_STRH_REG, T_STRH_REG, T_STRH_REG, T_STRH_REG, + T_STRH_REG, T_STRH_REG, T_STRH_REG, T_STRH_REG, + + // 0101 0100 00 + T_STRB_REG, T_STRB_REG, T_STRB_REG, T_STRB_REG, + T_STRB_REG, T_STRB_REG, T_STRB_REG, T_STRB_REG, + T_LDRSB_REG, T_LDRSB_REG, T_LDRSB_REG, T_LDRSB_REG, + T_LDRSB_REG, T_LDRSB_REG, T_LDRSB_REG, T_LDRSB_REG, + + // 0101 1000 00 + T_LDR_REG, T_LDR_REG, T_LDR_REG, T_LDR_REG, + T_LDR_REG, T_LDR_REG, T_LDR_REG, T_LDR_REG, + T_LDRH_REG, T_LDRH_REG, T_LDRH_REG, T_LDRH_REG, + T_LDRH_REG, T_LDRH_REG, T_LDRH_REG, T_LDRH_REG, + + // 0101 1100 00 + T_LDRB_REG, T_LDRB_REG, T_LDRB_REG, T_LDRB_REG, + T_LDRB_REG, T_LDRB_REG, T_LDRB_REG, T_LDRB_REG, + T_LDRSH_REG, T_LDRSH_REG, T_LDRSH_REG, T_LDRSH_REG, + T_LDRSH_REG, T_LDRSH_REG, T_LDRSH_REG, T_LDRSH_REG, + + // 0110 0000 00 + T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM, + T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM, + T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM, + T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM, + + // 0110 0100 00 + T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM, + T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM, + T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM, + T_STR_IMM, T_STR_IMM, T_STR_IMM, T_STR_IMM, + + // 0110 1000 00 + T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, + T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, + T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, + T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, + + // 0110 1100 00 + T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, + T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, + T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, + T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, T_LDR_IMM, + + // 0111 0000 00 + T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, + T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, + T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, + T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, + + // 0111 0100 00 + T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, + T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, + T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, + T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, T_STRB_IMM, + + // 0111 1000 00 + T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, + T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, + T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, + T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, + + // 0111 1100 00 + T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, + T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, + T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, + T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, T_LDRB_IMM, + + + + // 1000 0000 00 + T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, + T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, + T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, + T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, + + // 1000 0100 00 + T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, + T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, + T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, + T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, T_STRH_IMM, + + // 1000 1000 00 + T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, + T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, + T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, + T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, + + // 1000 1100 00 + T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, + T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, + T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, + T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, T_LDRH_IMM, + + // 1001 0000 00 + T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, + T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, + T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, + T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, + + // 1001 0100 00 + T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, + T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, + T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, + T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, T_STR_SPREL, + + // 1001 1000 00 + T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, + T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, + T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, + T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, + + // 1001 1100 00 + T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, + T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, + T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, + T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, T_LDR_SPREL, + + // 1010 0000 00 + T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, + T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, + T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, + T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, + + // 1010 0100 00 + T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, + T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, + T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, + T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, T_ADD_PCREL, + + // 1010 1000 00 + T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, + T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, + T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, + T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, + + // 1010 1100 00 + T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, + T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, + T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, + T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, T_ADD_SPREL, + + // 1011 0000 00 + T_ADD_SP, T_ADD_SP, T_ADD_SP, T_ADD_SP, + T_UNK, T_UNK, T_UNK, T_UNK, + T_UNK, T_UNK, T_UNK, T_UNK, + T_UNK, T_UNK, T_UNK, T_UNK, + + // 1011 0100 00 + T_PUSH, T_PUSH, T_PUSH, T_PUSH, + T_PUSH, T_PUSH, T_PUSH, T_PUSH, + T_UNK, T_UNK, T_UNK, T_UNK, + T_UNK, T_UNK, T_UNK, T_UNK, + + // 1011 1000 00 + T_UNK, T_UNK, T_UNK, T_UNK, + T_UNK, T_UNK, T_UNK, T_UNK, + T_UNK, T_UNK, T_UNK, T_UNK, + T_UNK, T_UNK, T_UNK, T_UNK, + + // 1011 1100 00 + T_POP, T_POP, T_POP, T_POP, + T_POP, T_POP, T_POP, T_POP, + T_UNK, T_UNK, T_UNK, T_UNK, + T_UNK, T_UNK, T_UNK, T_UNK, + + + + // 1100 0000 00 + T_STMIA, T_STMIA, T_STMIA, T_STMIA, + T_STMIA, T_STMIA, T_STMIA, T_STMIA, + T_STMIA, T_STMIA, T_STMIA, T_STMIA, + T_STMIA, T_STMIA, T_STMIA, T_STMIA, + + // 1100 0100 00 + T_STMIA, T_STMIA, T_STMIA, T_STMIA, + T_STMIA, T_STMIA, T_STMIA, T_STMIA, + T_STMIA, T_STMIA, T_STMIA, T_STMIA, + T_STMIA, T_STMIA, T_STMIA, T_STMIA, + + // 1100 1000 00 + T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA, + T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA, + T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA, + T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA, + + // 1100 1100 00 + T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA, + T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA, + T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA, + T_LDMIA, T_LDMIA, T_LDMIA, T_LDMIA, + + // 1101 0000 00 + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + + // 1101 0100 00 + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + + // 1101 1000 00 + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + + // 1101 1100 00 + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_BCOND, T_BCOND, T_BCOND, T_BCOND, + T_UNK, T_UNK, T_UNK, T_UNK, + T_SVC, T_SVC, T_SVC, T_SVC, + + // 1110 0000 00 + T_B, T_B, T_B, T_B, + T_B, T_B, T_B, T_B, + T_B, T_B, T_B, T_B, + T_B, T_B, T_B, T_B, + + // 1110 0100 00 + T_B, T_B, T_B, T_B, + T_B, T_B, T_B, T_B, + T_B, T_B, T_B, T_B, + T_B, T_B, T_B, T_B, + + // 1110 1000 00 + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + + // 1110 1100 00 + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + + // 1111 0000 00 + T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, + T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, + T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, + T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, + + // 1111 0100 00 + T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, + T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, + T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, + T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, T_BL_LONG_1, + + // 1111 1000 00 + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + + // 1111 1100 00 + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, + T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2, T_BL_LONG_2 +}; diff --git a/src/CP15.cpp b/src/CP15.cpp new file mode 100644 index 0000000..4d1fee6 --- /dev/null +++ b/src/CP15.cpp @@ -0,0 +1,300 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "ARM.h" +#include "CP15.h" + + +// derp +namespace NDS +{ +extern ARM* ARM9; +} + +namespace CP15 +{ + +u32 Control; + +u32 DTCMSetting, ITCMSetting; + +u8 ITCM[0x8000]; +u32 ITCMSize; +u8 DTCM[0x4000]; +u32 DTCMBase, DTCMSize; + + +void Reset() +{ + Control = 0x78; // dunno + + DTCMSetting = 0; + ITCMSetting = 0; + + memset(ITCM, 0, 0x8000); + memset(DTCM, 0, 0x4000); + + ITCMSize = 0; + DTCMBase = 0xFFFFFFFF; + DTCMSize = 0; +} + + +void UpdateDTCMSetting() +{ + if (Control & (1<<16)) + { + DTCMBase = DTCMSetting & 0xFFFFF000; + DTCMSize = 0x200 << ((DTCMSetting >> 1) & 0x1F); + printf("DTCM [%08X] enabled at %08X, size %X\n", DTCMSetting, DTCMBase, DTCMSize); + } + else + { + DTCMBase = 0xFFFFFFFF; + DTCMSize = 0; + printf("DTCM disabled\n"); + } +} + +void UpdateITCMSetting() +{ + if (Control & (1<<18)) + { + ITCMSize = 0x200 << ((ITCMSetting >> 1) & 0x1F); + printf("ITCM [%08X] enabled at %08X, size %X\n", ITCMSetting, 0, ITCMSize); + } + else + { + ITCMSize = 0; + printf("ITCM disabled\n"); + } +} + + +void Write(u32 id, u32 val) +{ + //printf("CP15 write op %03X %08X %08X\n", id, val, NDS::ARM9->R[15]); + + switch (id) + { + case 0x100: + val &= 0x000FF085; + Control &= ~0x000FF085; + Control |= val; + UpdateDTCMSetting(); + UpdateITCMSetting(); + return; + + + case 0x704: + case 0x782: + NDS::ARM9->Halt(1); + return; + + + case 0x761: + //printf("inval data cache %08X\n", val); + return; + case 0x762: + //printf("inval data cache SI\n"); + return; + + case 0x7A1: + //printf("flush data cache %08X\n", val); + return; + case 0x7A2: + //printf("flush data cache SI\n"); + return; + + + case 0x910: + DTCMSetting = val; + UpdateDTCMSetting(); + return; + case 0x911: + ITCMSetting = val; + UpdateITCMSetting(); + return; + } + + if ((id&0xF00)!=0x700) + printf("unknown CP15 write op %03X %08X\n", id, val); +} + +u32 Read(u32 id) +{ + //printf("CP15 read op %03X %08X\n", id, NDS::ARM9->R[15]); + + switch (id) + { + case 0x000: // CPU ID + case 0x003: + case 0x004: + case 0x005: + case 0x006: + case 0x007: + return 0x41059461; + + case 0x001: // cache type + return 0x0F0D2112; + + case 0x002: // TCM size + return (6 << 6) | (5 << 18); + + + case 0x100: // control reg + return Control; + + + case 0x910: + return DTCMSetting; + case 0x911: + return ITCMSetting; + } + + printf("unknown CP15 read op %03X\n", id); + return 0; +} + + +// TCM are handled here. +// TODO: later on, handle PU, and maybe caches + +bool HandleCodeRead16(u32 addr, u16* val) +{ + if (addr < ITCMSize) + { + *val = *(u16*)&ITCM[addr & 0x7FFF]; + return true; + } + + return false; +} + +bool HandleCodeRead32(u32 addr, u32* val) +{ + if (addr < ITCMSize) + { + *val = *(u32*)&ITCM[addr & 0x7FFF]; + return true; + } + + return false; +} + + +bool HandleDataRead8(u32 addr, u8* val, u32 forceuser) +{ + if (addr < ITCMSize) + { + *val = *(u8*)&ITCM[addr & 0x7FFF]; + return true; + } + if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) + { + *val = *(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF]; + return true; + } + + return false; +} + +bool HandleDataRead16(u32 addr, u16* val, u32 forceuser) +{ + if (addr < ITCMSize) + { + *val = *(u16*)&ITCM[addr & 0x7FFF]; + return true; + } + if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) + { + *val = *(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF]; + return true; + } + + return false; +} + +bool HandleDataRead32(u32 addr, u32* val, u32 forceuser) +{ + if (addr < ITCMSize) + { + *val = *(u32*)&ITCM[addr & 0x7FFF]; + return true; + } + if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) + { + *val = *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF]; + return true; + } + + return false; +} + +bool HandleDataWrite8(u32 addr, u8 val, u32 forceuser) +{ + if (addr < ITCMSize) + { + *(u8*)&ITCM[addr & 0x7FFF] = val; + return true; + } + if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) + { + *(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val; + return true; + } + + return false; +} + +bool HandleDataWrite16(u32 addr, u16 val, u32 forceuser) +{ + if (addr < ITCMSize) + { + *(u16*)&ITCM[addr & 0x7FFF] = val; + return true; + } + if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) + { + *(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val; + return true; + } + + return false; +} + +bool HandleDataWrite32(u32 addr, u32 val, u32 forceuser) +{ + if (addr < ITCMSize) + { + *(u32*)&ITCM[addr & 0x7FFF] = val; + return true; + } + if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) + { + *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val; + return true; + } + + return false; +} + +} diff --git a/src/CP15.h b/src/CP15.h new file mode 100644 index 0000000..eedea10 --- /dev/null +++ b/src/CP15.h @@ -0,0 +1,44 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef CP15_H +#define CP15_H + +namespace CP15 +{ + +void Reset(); + +void UpdateDTCMSetting(); +void UpdateITCMSetting(); + +void Write(u32 id, u32 val); +u32 Read(u32 id); + +bool HandleCodeRead16(u32 addr, u16* val); +bool HandleCodeRead32(u32 addr, u32* val); +bool HandleDataRead8(u32 addr, u8* val, u32 forceuser=0); +bool HandleDataRead16(u32 addr, u16* val, u32 forceuser=0); +bool HandleDataRead32(u32 addr, u32* val, u32 forceuser=0); +bool HandleDataWrite8(u32 addr, u8 val, u32 forceuser=0); +bool HandleDataWrite16(u32 addr, u16 val, u32 forceuser=0); +bool HandleDataWrite32(u32 addr, u32 val, u32 forceuser=0); + +} + +#endif diff --git a/src/DMA.cpp b/src/DMA.cpp new file mode 100644 index 0000000..b3e4f2f --- /dev/null +++ b/src/DMA.cpp @@ -0,0 +1,269 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include "NDS.h" +#include "DMA.h" +#include "NDSCart.h" +#include "GPU3D.h" + + +// NOTES ON DMA SHIT +// +// * could use optimized code paths for common types of DMA transfers. for example, VRAM +// * needs to eventually be made more accurate anyway. DMA isn't instant. + + +DMA::DMA(u32 cpu, u32 num) +{ + CPU = cpu; + Num = num; + + if (cpu == 0) + CountMask = 0x001FFFFF; + else + CountMask = (num==3 ? 0x0000FFFF : 0x00003FFF); + + // TODO: merge with the one in ARM.cpp, somewhere + for (int i = 0; i < 16; i++) + { + Waitstates[0][i] = 1; + Waitstates[1][i] = 1; + } + + if (!cpu) + { + // ARM9 + // note: 33MHz cycles + Waitstates[0][0x2] = 1; + Waitstates[0][0x3] = 1; + Waitstates[0][0x4] = 1; + Waitstates[0][0x5] = 1; + Waitstates[0][0x6] = 1; + Waitstates[0][0x7] = 1; + Waitstates[0][0x8] = 6; + Waitstates[0][0x9] = 6; + Waitstates[0][0xA] = 10; + Waitstates[0][0xF] = 1; + + Waitstates[1][0x2] = 2; + Waitstates[1][0x3] = 1; + Waitstates[1][0x4] = 1; + Waitstates[1][0x5] = 2; + Waitstates[1][0x6] = 2; + Waitstates[1][0x7] = 1; + Waitstates[1][0x8] = 12; + Waitstates[1][0x9] = 12; + Waitstates[1][0xA] = 10; + Waitstates[1][0xF] = 1; + } + else + { + // ARM7 + Waitstates[0][0x0] = 1; + Waitstates[0][0x2] = 1; + Waitstates[0][0x3] = 1; + Waitstates[0][0x4] = 1; + Waitstates[0][0x6] = 1; + Waitstates[0][0x8] = 6; + Waitstates[0][0x9] = 6; + Waitstates[0][0xA] = 10; + + Waitstates[1][0x0] = 1; + Waitstates[1][0x2] = 2; + Waitstates[1][0x3] = 1; + Waitstates[1][0x4] = 1; + Waitstates[1][0x6] = 2; + Waitstates[1][0x8] = 12; + Waitstates[1][0x9] = 12; + Waitstates[1][0xA] = 10; + } + + Reset(); +} + +DMA::~DMA() +{ +} + +void DMA::Reset() +{ + SrcAddr = 0; + DstAddr = 0; + Cnt = 0; + + StartMode = 0; + CurSrcAddr = 0; + CurDstAddr = 0; + RemCount = 0; + IterCount = 0; + SrcAddrInc = 0; + DstAddrInc = 0; + + Running = false; +} + +void DMA::WriteCnt(u32 val) +{ + u32 oldcnt = Cnt; + Cnt = val; + + if ((!(oldcnt & 0x80000000)) && (val & 0x80000000)) + { + CurSrcAddr = SrcAddr; + CurDstAddr = DstAddr; + + switch (Cnt & 0x00600000) + { + case 0x00000000: DstAddrInc = 1; break; + case 0x00200000: DstAddrInc = -1; break; + case 0x00400000: DstAddrInc = 0; break; + case 0x00600000: DstAddrInc = 1; break; + } + + switch (Cnt & 0x01800000) + { + case 0x00000000: SrcAddrInc = 1; break; + case 0x00800000: SrcAddrInc = -1; break; + case 0x01000000: SrcAddrInc = 0; break; + case 0x01800000: SrcAddrInc = 1; printf("BAD DMA SRC INC MODE 3\n"); break; + } + + if (CPU == 0) + StartMode = (Cnt >> 27) & 0x7; + else + StartMode = ((Cnt >> 28) & 0x3) | 0x10; + + if ((StartMode & 0x7) == 0) + Start(); + else if (StartMode == 0x07) + GPU3D::CheckFIFODMA(); + + if ((StartMode&7)!=0x00 && (StartMode&7)!=0x1 && StartMode!=2 && StartMode!=0x05 && StartMode!=0x12 && StartMode!=0x07) + printf("UNIMPLEMENTED ARM%d DMA%d START MODE %02X\n", CPU?7:9, Num, StartMode); + } +} + +void DMA::Start() +{ + if (Running) return; + + u32 countmask; + if (CPU == 0) + countmask = 0x001FFFFF; + else + countmask = (Num==3 ? 0x0000FFFF : 0x00003FFF); + + RemCount = Cnt & countmask; + if (!RemCount) + RemCount = countmask+1; + + if (StartMode == 0x07 && RemCount > 112) + IterCount = 112; + else + IterCount = RemCount; + + if ((Cnt & 0x00600000) == 0x00600000) + CurDstAddr = DstAddr; + + //printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16); + + // special path for cart DMA. this is a gross hack. + // emulating it properly requires emulating cart transfer delays, so uh... TODO + if (CurSrcAddr==0x04100010 && RemCount==1 && (Cnt & 0x07E00000)==0x07000000 && + StartMode==0x05 || StartMode==0x12) + { + NDSCart::DMA(CurDstAddr); + Cnt &= ~0x80000000; + if (Cnt & 0x40000000) + NDS::SetIRQ(CPU, NDS::IRQ_DMA0 + Num); + return; + } + + // TODO eventually: not stop if we're running code in ITCM + + Running = true; + NDS::StopCPU(CPU, 1<<Num); +} + +s32 DMA::Run(s32 cycles) +{ + if (!Running) + return cycles; + + if (!(Cnt & 0x04000000)) + { + u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16; + void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16; + + while (IterCount > 0 && cycles > 0) + { + writefn(CurDstAddr, readfn(CurSrcAddr)); + + cycles -= (Waitstates[0][(CurSrcAddr >> 24) & 0xF] + Waitstates[0][(CurDstAddr >> 24) & 0xF]); + CurSrcAddr += SrcAddrInc<<1; + CurDstAddr += DstAddrInc<<1; + IterCount--; + RemCount--; + } + } + else + { + u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32; + void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; + + while (IterCount > 0 && cycles > 0) + { + writefn(CurDstAddr, readfn(CurSrcAddr)); + + cycles -= (Waitstates[1][(CurSrcAddr >> 24) & 0xF] + Waitstates[1][(CurDstAddr >> 24) & 0xF]); + CurSrcAddr += SrcAddrInc<<2; + CurDstAddr += DstAddrInc<<2; + IterCount--; + RemCount--; + } + } + + if (RemCount) + { + Cnt &= ~CountMask; + Cnt |= RemCount; + + if (IterCount == 0) + { + Running = false; + NDS::ResumeCPU(CPU, 1<<Num); + + if (StartMode == 0x07) + GPU3D::CheckFIFODMA(); + } + + return cycles; + } + + if (!(Cnt & 0x02000000)) + Cnt &= ~0x80000000; + + if (Cnt & 0x40000000) + NDS::SetIRQ(CPU, NDS::IRQ_DMA0 + Num); + + Running = false; + NDS::ResumeCPU(CPU, 1<<Num); + + return cycles - 2; +} diff --git a/src/DMA.h b/src/DMA.h new file mode 100644 index 0000000..59a7f03 --- /dev/null +++ b/src/DMA.h @@ -0,0 +1,64 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef DMA_H +#define DMA_H + +#include "types.h" + +class DMA +{ +public: + DMA(u32 cpu, u32 num); + ~DMA(); + + void Reset(); + + void WriteCnt(u32 val); + void Start(); + + s32 Run(s32 cycles); + + void StartIfNeeded(u32 mode) + { + if ((mode == StartMode) && (Cnt & 0x80000000)) + Start(); + } + + u32 SrcAddr; + u32 DstAddr; + u32 Cnt; + +private: + u32 CPU, Num; + + s32 Waitstates[2][16]; + + u32 StartMode; + u32 CurSrcAddr; + u32 CurDstAddr; + u32 RemCount; + u32 IterCount; + u32 SrcAddrInc; + u32 DstAddrInc; + u32 CountMask; + + bool Running; +}; + +#endif diff --git a/src/FIFO.h b/src/FIFO.h new file mode 100644 index 0000000..4130b85 --- /dev/null +++ b/src/FIFO.h @@ -0,0 +1,93 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef FIFO_H +#define FIFO_H + +#include "types.h" + +template<typename T> +class FIFO +{ +public: + FIFO(u32 num) + { + NumEntries = num; + Entries = new T[num]; + Clear(); + } + + ~FIFO() + { + delete[] Entries; + } + + + void Clear() + { + NumOccupied = 0; + ReadPos = 0; + WritePos = 0; + memset(&Entries[ReadPos], 0, sizeof(T)); + } + + + void Write(T val) + { + if (IsFull()) return; + + Entries[WritePos] = val; + + WritePos++; + if (WritePos >= NumEntries) + WritePos = 0; + + NumOccupied++; + } + + T Read() + { + T ret = Entries[ReadPos]; + if (IsEmpty()) + return ret; + + ReadPos++; + if (ReadPos >= NumEntries) + ReadPos = 0; + + NumOccupied--; + return ret; + } + + T Peek() + { + return Entries[ReadPos]; + } + + u32 Level() { return NumOccupied; } + bool IsEmpty() { return NumOccupied == 0; } + bool IsFull() { return NumOccupied >= NumEntries; } + +private: + u32 NumEntries; + T* Entries; + u32 NumOccupied; + u32 ReadPos, WritePos; +}; + +#endif diff --git a/src/GPU.cpp b/src/GPU.cpp new file mode 100644 index 0000000..28c5d24 --- /dev/null +++ b/src/GPU.cpp @@ -0,0 +1,732 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "GPU.h" + + +namespace GPU +{ + +#define LINE_CYCLES (355*6) +#define HBLANK_CYCLES (256*6) +#define FRAME_CYCLES (LINE_CYCLES * 263) + +u16 VCount; + +u16 DispStat[2], VMatch[2]; + +u8 Palette[2*1024]; +u8 OAM[2*1024]; + +u8 VRAM_A[128*1024]; +u8 VRAM_B[128*1024]; +u8 VRAM_C[128*1024]; +u8 VRAM_D[128*1024]; +u8 VRAM_E[ 64*1024]; +u8 VRAM_F[ 16*1024]; +u8 VRAM_G[ 16*1024]; +u8 VRAM_H[ 32*1024]; +u8 VRAM_I[ 16*1024]; +u8* VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; + +u8 VRAMCNT[9]; +u8 VRAMSTAT; + +//u32 VRAM_Base[9]; +//u32 VRAM_Mask[9]; + +u32 VRAMMap_LCDC; + +u32 VRAMMap_ABG[0x20]; +u32 VRAMMap_AOBJ[0x10]; +u32 VRAMMap_BBG[0x8]; +u32 VRAMMap_BOBJ[0x8]; + +u32 VRAMMap_ABGExtPal[4]; +u32 VRAMMap_AOBJExtPal; +u32 VRAMMap_BBGExtPal[4]; +u32 VRAMMap_BOBJExtPal; + +u32 VRAMMap_Texture[4]; +u32 VRAMMap_TexPal[6]; + +u32 VRAMMap_ARM7[2]; + +/*u8* VRAM_ABG[128]; +u8* VRAM_AOBJ[128]; +u8* VRAM_BBG[128]; +u8* VRAM_BOBJ[128]; +u8* VRAM_LCD[128];*/ +/*u8* VRAM_ARM7[2]; + +u8* VRAM_ABGExtPal[4]; +u8* VRAM_AOBJExtPal; +u8* VRAM_BBGExtPal[4]; +u8* VRAM_BOBJExtPal;*/ + +u32 Framebuffer[256*192*2]; + +GPU2D* GPU2D_A; +GPU2D* GPU2D_B; + + +bool Init() +{ + GPU2D_A = new GPU2D(0); + GPU2D_B = new GPU2D(1); + if (!GPU3D::Init()) return false; + + return true; +} + +void DeInit() +{ + delete GPU2D_A; + delete GPU2D_B; + GPU3D::DeInit(); +} + +void Reset() +{ + VCount = 0; + + DispStat[0] = 0; + DispStat[1] = 0; + VMatch[0] = 0; + VMatch[1] = 0; + + memset(Palette, 0, 2*1024); + memset(OAM, 0, 2*1024); + + memset(VRAM_A, 0, 128*1024); + memset(VRAM_B, 0, 128*1024); + memset(VRAM_C, 0, 128*1024); + memset(VRAM_D, 0, 128*1024); + memset(VRAM_E, 0, 64*1024); + memset(VRAM_F, 0, 16*1024); + memset(VRAM_G, 0, 16*1024); + memset(VRAM_H, 0, 32*1024); + memset(VRAM_I, 0, 16*1024); + + memset(VRAMCNT, 0, 9); + VRAMSTAT = 0; + + VRAMMap_LCDC = 0; + + memset(VRAMMap_ABG, 0, sizeof(VRAMMap_ABG)); + memset(VRAMMap_AOBJ, 0, sizeof(VRAMMap_AOBJ)); + memset(VRAMMap_BBG, 0, sizeof(VRAMMap_BBG)); + memset(VRAMMap_BOBJ, 0, sizeof(VRAMMap_BOBJ)); + + memset(VRAMMap_ABGExtPal, 0, sizeof(VRAMMap_ABGExtPal)); + VRAMMap_AOBJExtPal = 0; + memset(VRAMMap_BBGExtPal, 0, sizeof(VRAMMap_BBGExtPal)); + VRAMMap_BOBJExtPal = 0; + + memset(VRAMMap_Texture, 0, sizeof(VRAMMap_Texture)); + memset(VRAMMap_TexPal, 0, sizeof(VRAMMap_TexPal)); + + VRAMMap_ARM7[0] = 0; + VRAMMap_ARM7[1] = 0; + + //memset(VRAM_Base, 0, sizeof(VRAM_Base)); + //memset(VRAM_Mask, 0, sizeof(VRAM_Mask)); + + /*memset(VRAM_ABG, 0, sizeof(u8*)*128); + memset(VRAM_AOBJ, 0, sizeof(u8*)*128); + memset(VRAM_BBG, 0, sizeof(u8*)*128); + memset(VRAM_BOBJ, 0, sizeof(u8*)*128); + memset(VRAM_LCD, 0, sizeof(u8*)*128);*/ + /*memset(VRAM_ARM7, 0, sizeof(u8*)*2); + + memset(VRAM_ABGExtPal, 0, sizeof(u8*)*4); + VRAM_AOBJExtPal = NULL; + memset(VRAM_BBGExtPal, 0, sizeof(u8*)*4); + VRAM_BOBJExtPal = NULL;*/ + + for (int i = 0; i < 256*192*2; i++) + { + Framebuffer[i] = 0xFFFFFFFF; + } + + GPU2D_A->Reset(); + GPU2D_B->Reset(); + GPU3D::Reset(); + + GPU2D_A->SetFramebuffer(&Framebuffer[256*192]); + GPU2D_B->SetFramebuffer(&Framebuffer[256*0]); +} + + +// VRAM mapping notes +// +// mirroring: +// unmapped range reads zero +// LCD is mirrored every 0x100000 bytes, the gap between each mirror reads zero +// ABG: +// bank A,B,C,D,E mirror every 0x80000 bytes +// bank F,G mirror at base+0x8000, mirror every 0x80000 bytes +// AOBJ: +// bank A,B,E mirror every 0x40000 bytes +// bank F,G mirror at base+0x8000, mirror every 0x40000 bytes +// BBG: +// bank C mirrors every 0x20000 bytes +// bank H mirrors every 0x10000 bytes +// bank I mirrors at base+0x4000, mirrors every 0x10000 bytes +// BOBJ: +// bank D mirrors every 0x20000 bytes +// bank I mirrors every 0x4000 bytes +// +// untested: +// ARM7 (TODO) +// extended palette (mirroring doesn't apply) +// texture/texpal (does mirroring apply?) +// -> trying to use extpal/texture/texpal with no VRAM mapped. +// would likely read all black, but has to be tested. +// +// overlap: +// when reading: values are read from each bank and ORed together +// when writing: value is written to each bank + +#define MAP_RANGE(map, base, n) for (int i = 0; i < n; i++) map[(base)+i] |= bankmask; +#define UNMAP_RANGE(map, base, n) for (int i = 0; i < n; i++) map[(base)+i] &= ~bankmask; + +void MapVRAM_AB(u32 bank, u8 cnt) +{ + u8 oldcnt = VRAMCNT[bank]; + VRAMCNT[bank] = cnt; + + if (oldcnt == cnt) return; + + u8 oldofs = (oldcnt >> 3) & 0x3; + u8 ofs = (cnt >> 3) & 0x3; + u32 bankmask = 1 << bank; + + if (oldcnt & (1<<7)) + { + switch (oldcnt & 0x3) + { + case 0: // LCDC + VRAMMap_LCDC &= ~bankmask; + break; + + case 1: // ABG + UNMAP_RANGE(VRAMMap_ABG, oldofs<<3, 8); + break; + + case 2: // AOBJ + oldofs &= 0x1; + UNMAP_RANGE(VRAMMap_AOBJ, oldofs<<3, 8); + break; + + case 3: // texture + VRAMMap_Texture[oldofs] &= ~bankmask; + break; + } + } + + if (cnt & (1<<7)) + { + switch (cnt & 0x3) + { + case 0: // LCDC + VRAMMap_LCDC |= bankmask; + break; + + case 1: // ABG + MAP_RANGE(VRAMMap_ABG, ofs<<3, 8); + break; + + case 2: // AOBJ + ofs &= 0x1; + MAP_RANGE(VRAMMap_AOBJ, ofs<<3, 8); + break; + + case 3: // texture + VRAMMap_Texture[ofs] |= bankmask; + break; + } + } +} + +void MapVRAM_CD(u32 bank, u8 cnt) +{ + u8 oldcnt = VRAMCNT[bank]; + VRAMCNT[bank] = cnt; + + VRAMSTAT &= ~(1 << (bank-2)); + + if (oldcnt == cnt) return; + + u8 oldofs = (oldcnt >> 3) & 0x7; + u8 ofs = (cnt >> 3) & 0x7; + u32 bankmask = 1 << bank; + + if (oldcnt & (1<<7)) + { + switch (oldcnt & 0x7) + { + case 0: // LCDC + VRAMMap_LCDC &= ~bankmask; + break; + + case 1: // ABG + UNMAP_RANGE(VRAMMap_ABG, oldofs<<3, 8); + break; + + case 2: // ARM7 VRAM + oldofs &= 0x1; + VRAMMap_ARM7[oldofs] &= ~bankmask; + break; + + case 3: // texture + VRAMMap_Texture[oldofs] &= ~bankmask; + break; + + case 4: // BBG/BOBJ + if (bank == 2) + { + UNMAP_RANGE(VRAMMap_BBG, 0, 8); + } + else + { + UNMAP_RANGE(VRAMMap_BOBJ, 0, 8); + } + break; + } + } + + if (cnt & (1<<7)) + { + switch (cnt & 0x7) + { + case 0: // LCDC + VRAMMap_LCDC |= bankmask; + break; + + case 1: // ABG + MAP_RANGE(VRAMMap_ABG, ofs<<3, 8); + break; + + case 2: // ARM7 VRAM + ofs &= 0x1; + VRAMMap_ARM7[ofs] |= bankmask; + VRAMSTAT |= (1 << (bank-2)); + break; + + case 3: // texture + VRAMMap_Texture[ofs] |= bankmask; + break; + + case 4: // BBG/BOBJ + if (bank == 2) + { + MAP_RANGE(VRAMMap_BBG, 0, 8); + } + else + { + MAP_RANGE(VRAMMap_BOBJ, 0, 8); + } + break; + } + } +} + +void MapVRAM_E(u32 bank, u8 cnt) +{ + u8 oldcnt = VRAMCNT[bank]; + VRAMCNT[bank] = cnt; + + if (oldcnt == cnt) return; + + u32 bankmask = 1 << bank; + + if (oldcnt & (1<<7)) + { + switch (oldcnt & 0x7) + { + case 0: // LCDC + VRAMMap_LCDC &= ~bankmask; + break; + + case 1: // ABG + UNMAP_RANGE(VRAMMap_ABG, 0, 4); + break; + + case 2: // AOBJ + UNMAP_RANGE(VRAMMap_AOBJ, 0, 4); + break; + + case 3: // texture palette + UNMAP_RANGE(VRAMMap_TexPal, 0, 4); + break; + + case 4: // ABG ext palette + UNMAP_RANGE(VRAMMap_ABGExtPal, 0, 4); + GPU2D_A->BGExtPalDirty(0); + GPU2D_A->BGExtPalDirty(2); + break; + } + } + + if (cnt & (1<<7)) + { + switch (cnt & 0x7) + { + case 0: // LCDC + VRAMMap_LCDC |= bankmask; + break; + + case 1: // ABG + MAP_RANGE(VRAMMap_ABG, 0, 4); + break; + + case 2: // AOBJ + MAP_RANGE(VRAMMap_AOBJ, 0, 4); + break; + + case 3: // texture palette + MAP_RANGE(VRAMMap_TexPal, 0, 4); + break; + + case 4: // ABG ext palette + MAP_RANGE(VRAMMap_ABGExtPal, 0, 4); + GPU2D_A->BGExtPalDirty(0); + GPU2D_A->BGExtPalDirty(2); + break; + } + } +} + +void MapVRAM_FG(u32 bank, u8 cnt) +{ + u8 oldcnt = VRAMCNT[bank]; + VRAMCNT[bank] = cnt; + + if (oldcnt == cnt) return; + + u8 oldofs = (oldcnt >> 3) & 0x7; + u8 ofs = (cnt >> 3) & 0x7; + u32 bankmask = 1 << bank; + + if (oldcnt & (1<<7)) + { + switch (oldcnt & 0x7) + { + case 0: // LCDC + VRAMMap_LCDC &= ~bankmask; + break; + + case 1: // ABG + VRAMMap_ABG[(oldofs & 0x1) + ((oldofs & 0x2) << 1)] &= ~bankmask; + VRAMMap_ABG[(oldofs & 0x1) + ((oldofs & 0x2) << 1) + 2] &= ~bankmask; + break; + + case 2: // AOBJ + VRAMMap_AOBJ[(oldofs & 0x1) + ((oldofs & 0x2) << 1)] &= ~bankmask; + VRAMMap_AOBJ[(oldofs & 0x1) + ((oldofs & 0x2) << 1) + 2] &= ~bankmask; + break; + + case 3: // texture palette + VRAMMap_TexPal[(oldofs & 0x1) + ((oldofs & 0x2) << 1)] &= ~bankmask; + break; + + case 4: // ABG ext palette + VRAMMap_ABGExtPal[((oldofs & 0x1) << 1)] &= ~bankmask; + VRAMMap_ABGExtPal[((oldofs & 0x1) << 1) + 1] &= ~bankmask; + GPU2D_A->BGExtPalDirty(0); + GPU2D_A->BGExtPalDirty(2); + break; + + case 5: // AOBJ ext palette + VRAMMap_AOBJExtPal &= ~bankmask; + GPU2D_A->OBJExtPalDirty(); + break; + } + } + + if (cnt & (1<<7)) + { + switch (cnt & 0x7) + { + case 0: // LCDC + VRAMMap_LCDC |= bankmask; + break; + + case 1: // ABG + VRAMMap_ABG[(ofs & 0x1) + ((ofs & 0x2) << 1)] |= bankmask; + VRAMMap_ABG[(ofs & 0x1) + ((ofs & 0x2) << 1) + 2] |= bankmask; + break; + + case 2: // AOBJ + VRAMMap_AOBJ[(ofs & 0x1) + ((ofs & 0x2) << 1)] |= bankmask; + VRAMMap_AOBJ[(ofs & 0x1) + ((ofs & 0x2) << 1) + 2] |= bankmask; + break; + + case 3: // texture palette + VRAMMap_TexPal[(ofs & 0x1) + ((ofs & 0x2) << 1)] |= bankmask; + break; + + case 4: // ABG ext palette + VRAMMap_ABGExtPal[((ofs & 0x1) << 1)] |= bankmask; + VRAMMap_ABGExtPal[((ofs & 0x1) << 1) + 1] |= bankmask; + GPU2D_A->BGExtPalDirty(0); + GPU2D_A->BGExtPalDirty(2); + break; + + case 5: // AOBJ ext palette + VRAMMap_AOBJExtPal |= bankmask; + GPU2D_A->OBJExtPalDirty(); + break; + } + } +} + +void MapVRAM_H(u32 bank, u8 cnt) +{ + u8 oldcnt = VRAMCNT[bank]; + VRAMCNT[bank] = cnt; + + if (oldcnt == cnt) return; + + u32 bankmask = 1 << bank; + + if (oldcnt & (1<<7)) + { + switch (oldcnt & 0x3) + { + case 0: // LCDC + VRAMMap_LCDC &= ~bankmask; + break; + + case 1: // BBG + VRAMMap_BBG[0] &= ~bankmask; + VRAMMap_BBG[1] &= ~bankmask; + VRAMMap_BBG[4] &= ~bankmask; + VRAMMap_BBG[5] &= ~bankmask; + break; + + case 2: // BBG ext palette + UNMAP_RANGE(VRAMMap_BBGExtPal, 0, 4); + GPU2D_B->BGExtPalDirty(0); + GPU2D_B->BGExtPalDirty(2); + break; + } + } + + if (cnt & (1<<7)) + { + switch (cnt & 0x3) + { + case 0: // LCDC + VRAMMap_LCDC |= bankmask; + break; + + case 1: // BBG + VRAMMap_BBG[0] |= bankmask; + VRAMMap_BBG[1] |= bankmask; + VRAMMap_BBG[4] |= bankmask; + VRAMMap_BBG[5] |= bankmask; + break; + + case 2: // BBG ext palette + MAP_RANGE(VRAMMap_BBGExtPal, 0, 4); + GPU2D_B->BGExtPalDirty(0); + GPU2D_B->BGExtPalDirty(2); + break; + } + } +} + +void MapVRAM_I(u32 bank, u8 cnt) +{ + u8 oldcnt = VRAMCNT[bank]; + VRAMCNT[bank] = cnt; + + if (oldcnt == cnt) return; + + u32 bankmask = 1 << bank; + + if (oldcnt & (1<<7)) + { + switch (oldcnt & 0x3) + { + case 0: // LCDC + VRAMMap_LCDC &= ~bankmask; + break; + + case 1: // BBG + VRAMMap_BBG[2] &= ~bankmask; + VRAMMap_BBG[3] &= ~bankmask; + VRAMMap_BBG[6] &= ~bankmask; + VRAMMap_BBG[7] &= ~bankmask; + break; + + case 2: // BOBJ + UNMAP_RANGE(VRAMMap_BOBJ, 0, 8); + break; + + case 3: // BOBJ ext palette + VRAMMap_BOBJExtPal &= ~bankmask; + GPU2D_B->OBJExtPalDirty(); + break; + } + } + + if (cnt & (1<<7)) + { + switch (cnt & 0x3) + { + case 0: // LCDC + VRAMMap_LCDC |= bankmask; + break; + + case 1: // BBG + VRAMMap_BBG[2] |= bankmask; + VRAMMap_BBG[3] |= bankmask; + VRAMMap_BBG[6] |= bankmask; + VRAMMap_BBG[7] |= bankmask; + break; + + case 2: // BOBJ + MAP_RANGE(VRAMMap_BOBJ, 0, 8); + break; + + case 3: // BOBJ ext palette + VRAMMap_BOBJExtPal |= bankmask; + GPU2D_B->OBJExtPalDirty(); + break; + } + } +} + + +void DisplaySwap(u32 val) +{ + if (val) + { + GPU2D_A->SetFramebuffer(&Framebuffer[256*0]); + GPU2D_B->SetFramebuffer(&Framebuffer[256*192]); + } + else + { + GPU2D_A->SetFramebuffer(&Framebuffer[256*192]); + GPU2D_B->SetFramebuffer(&Framebuffer[256*0]); + } +} + + +void StartFrame() +{ + StartScanline(0); +} + +void StartHBlank(u32 line) +{ + DispStat[0] |= (1<<1); + DispStat[1] |= (1<<1); + + if (line < 192) NDS::CheckDMAs(0, 0x02); + + if (DispStat[0] & (1<<4)) NDS::SetIRQ(0, NDS::IRQ_HBlank); + if (DispStat[1] & (1<<4)) NDS::SetIRQ(1, NDS::IRQ_HBlank); + + if (line < 262) + NDS::ScheduleEvent(NDS::Event_LCD, true, (LINE_CYCLES - HBLANK_CYCLES), StartScanline, line+1); +} + +void StartScanline(u32 line) +{ + VCount = line; + + DispStat[0] &= ~(1<<1); + DispStat[1] &= ~(1<<1); + + if (line == VMatch[0]) + { + DispStat[0] |= (1<<2); + + if (DispStat[0] & (1<<5)) NDS::SetIRQ(0, NDS::IRQ_VCount); + } + else + DispStat[0] &= ~(1<<2); + + if (line == VMatch[1]) + { + DispStat[1] |= (1<<2); + + if (DispStat[1] & (1<<5)) NDS::SetIRQ(1, NDS::IRQ_VCount); + } + else + DispStat[1] &= ~(1<<2); + + if (line < 192) + { + // draw + GPU2D_A->DrawScanline(line); + GPU2D_B->DrawScanline(line); + + //NDS::ScheduleEvent(LINE_CYCLES, StartScanline, line+1); + } + else if (line == 262) + { + // frame end + + DispStat[0] &= ~(1<<0); + DispStat[1] &= ~(1<<0); + } + else + { + if (line == 192) + { + // VBlank + DispStat[0] |= (1<<0); + DispStat[1] |= (1<<0); + + NDS::CheckDMAs(0, 0x01); + NDS::CheckDMAs(1, 0x11); + + if (DispStat[0] & (1<<3)) NDS::SetIRQ(0, NDS::IRQ_VBlank); + if (DispStat[1] & (1<<3)) NDS::SetIRQ(1, NDS::IRQ_VBlank); + + GPU2D_A->VBlank(); + GPU2D_B->VBlank(); + GPU3D::VBlank(); + } + + //NDS::ScheduleEvent(LINE_CYCLES, StartScanline, line+1); + //NDS::ScheduleEvent(NDS::Event_LCD, true, LINE_CYCLES, StartScanline, line+1); + } + + NDS::ScheduleEvent(NDS::Event_LCD, true, HBLANK_CYCLES, StartHBlank, line); +} + + +void SetDispStat(u32 cpu, u16 val) +{ + val &= 0xFFB8; + DispStat[cpu] &= 0x0047; + DispStat[cpu] |= val; + + VMatch[cpu] = (val >> 8) | ((val & 0x80) << 1); +} + +} diff --git a/src/GPU.h b/src/GPU.h new file mode 100644 index 0000000..a77f6c0 --- /dev/null +++ b/src/GPU.h @@ -0,0 +1,395 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef GPU_H +#define GPU_H + +#include "GPU2D.h" +#include "GPU3D.h" + +namespace GPU +{ + +extern u16 VCount; + +extern u16 DispStat[2]; + +extern u8 VRAMCNT[9]; +extern u8 VRAMSTAT; + +extern u8 Palette[2*1024]; +extern u8 OAM[2*1024]; + +extern u8 VRAM_A[128*1024]; +extern u8 VRAM_B[128*1024]; +extern u8 VRAM_C[128*1024]; +extern u8 VRAM_D[128*1024]; +extern u8 VRAM_E[ 64*1024]; +extern u8 VRAM_F[ 16*1024]; +extern u8 VRAM_G[ 16*1024]; +extern u8 VRAM_H[ 32*1024]; +extern u8 VRAM_I[ 16*1024]; + +extern u8* VRAM[9]; + +extern u32 VRAMMap_LCDC; +extern u32 VRAMMap_ABG[0x20]; +extern u32 VRAMMap_AOBJ[0x10]; +extern u32 VRAMMap_BBG[0x8]; +extern u32 VRAMMap_BOBJ[0x8]; +extern u32 VRAMMap_ABGExtPal[4]; +extern u32 VRAMMap_AOBJExtPal; +extern u32 VRAMMap_BBGExtPal[4]; +extern u32 VRAMMap_BOBJExtPal; +extern u32 VRAMMap_Texture[4]; +extern u32 VRAMMap_TexPal[6]; +extern u32 VRAMMap_ARM7[2]; + +extern u32 Framebuffer[256*192*2]; + +extern GPU2D* GPU2D_A; +extern GPU2D* GPU2D_B; + + +bool Init(); +void DeInit(); +void Reset(); + +void MapVRAM_AB(u32 bank, u8 cnt); +void MapVRAM_CD(u32 bank, u8 cnt); +void MapVRAM_E(u32 bank, u8 cnt); +void MapVRAM_FG(u32 bank, u8 cnt); +void MapVRAM_H(u32 bank, u8 cnt); +void MapVRAM_I(u32 bank, u8 cnt); + + +template<typename T> +T ReadVRAM_LCDC(u32 addr) +{ + int bank; + + switch (addr & 0xFF8FC000) + { + case 0x06800000: case 0x06804000: case 0x06808000: case 0x0680C000: + case 0x06810000: case 0x06814000: case 0x06818000: case 0x0681C000: + bank = 0; + addr &= 0x1FFFF; + break; + + case 0x06820000: case 0x06824000: case 0x06828000: case 0x0682C000: + case 0x06830000: case 0x06834000: case 0x06838000: case 0x0683C000: + bank = 1; + addr &= 0x1FFFF; + break; + + case 0x06840000: case 0x06844000: case 0x06848000: case 0x0684C000: + case 0x06850000: case 0x06854000: case 0x06858000: case 0x0685C000: + bank = 2; + addr &= 0x1FFFF; + break; + + case 0x06860000: case 0x06864000: case 0x06868000: case 0x0686C000: + case 0x06870000: case 0x06874000: case 0x06878000: case 0x0687C000: + bank = 3; + addr &= 0x1FFFF; + break; + + case 0x06880000: case 0x06884000: case 0x06888000: case 0x0688C000: + bank = 4; + addr &= 0xFFFF; + break; + + case 0x06890000: + bank = 5; + addr &= 0x3FFF; + break; + + case 0x06894000: + bank = 6; + addr &= 0x3FFF; + break; + + case 0x06898000: + case 0x0689C000: + bank = 7; + addr &= 0x7FFF; + break; + + case 0x068A0000: + bank = 8; + addr &= 0x3FFF; + break; + + default: return 0; + } + + if (VRAMMap_LCDC & (1<<bank)) return *(T*)&VRAM[bank][addr]; + + return 0; +} + +template<typename T> +void WriteVRAM_LCDC(u32 addr, T val) +{ + int bank; + + switch (addr & 0xFF8FC000) + { + case 0x06800000: case 0x06804000: case 0x06808000: case 0x0680C000: + case 0x06810000: case 0x06814000: case 0x06818000: case 0x0681C000: + bank = 0; + addr &= 0x1FFFF; + break; + + case 0x06820000: case 0x06824000: case 0x06828000: case 0x0682C000: + case 0x06830000: case 0x06834000: case 0x06838000: case 0x0683C000: + bank = 1; + addr &= 0x1FFFF; + break; + + case 0x06840000: case 0x06844000: case 0x06848000: case 0x0684C000: + case 0x06850000: case 0x06854000: case 0x06858000: case 0x0685C000: + bank = 2; + addr &= 0x1FFFF; + break; + + case 0x06860000: case 0x06864000: case 0x06868000: case 0x0686C000: + case 0x06870000: case 0x06874000: case 0x06878000: case 0x0687C000: + bank = 3; + addr &= 0x1FFFF; + break; + + case 0x06880000: case 0x06884000: case 0x06888000: case 0x0688C000: + bank = 4; + addr &= 0xFFFF; + break; + + case 0x06890000: + bank = 5; + addr &= 0x3FFF; + break; + + case 0x06894000: + bank = 6; + addr &= 0x3FFF; + break; + + case 0x06898000: + case 0x0689C000: + bank = 7; + addr &= 0x7FFF; + break; + + case 0x068A0000: + bank = 8; + addr &= 0x3FFF; + break; + + default: return; + } + + if (VRAMMap_LCDC & (1<<bank)) *(T*)&VRAM[bank][addr] = val; +} + + +template<typename T> +T ReadVRAM_ABG(u32 addr) +{ + u32 ret = 0; + u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F]; + + if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF]; + if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF]; + if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; + if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; + if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; + if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; + + return ret; +} + +template<typename T> +void WriteVRAM_ABG(u32 addr, T val) +{ + u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F]; + + if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; + if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; + if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; + if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; + if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val; + if (mask & (1<<5)) *(T*)&VRAM_F[addr & 0x3FFF] = val; + if (mask & (1<<6)) *(T*)&VRAM_G[addr & 0x3FFF] = val; +} + + +template<typename T> +T ReadVRAM_AOBJ(u32 addr) +{ + u32 ret = 0; + u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; + + if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF]; + if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF]; + if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; + if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; + + return ret; +} + +template<typename T> +void WriteVRAM_AOBJ(u32 addr, T val) +{ + u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; + + if (mask & (1<<0)) *(T*)&VRAM_A[addr & 0x1FFFF] = val; + if (mask & (1<<1)) *(T*)&VRAM_B[addr & 0x1FFFF] = val; + if (mask & (1<<4)) *(T*)&VRAM_E[addr & 0xFFFF] = val; + if (mask & (1<<5)) *(T*)&VRAM_F[addr & 0x3FFF] = val; + if (mask & (1<<6)) *(T*)&VRAM_G[addr & 0x3FFF] = val; +} + + +template<typename T> +T ReadVRAM_BBG(u32 addr) +{ + u32 ret = 0; + u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; + + if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; + if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF]; + if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x3FFF]; + + return ret; +} + +template<typename T> +void WriteVRAM_BBG(u32 addr, T val) +{ + u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; + + if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; + if (mask & (1<<7)) *(T*)&VRAM_H[addr & 0x7FFF] = val; + if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; +} + + +template<typename T> +T ReadVRAM_BOBJ(u32 addr) +{ + u32 ret = 0; + u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; + + if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; + if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x3FFF]; + + return ret; +} + +template<typename T> +void WriteVRAM_BOBJ(u32 addr, T val) +{ + u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; + + if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; + if (mask & (1<<8)) *(T*)&VRAM_I[addr & 0x3FFF] = val; +} + + +template<typename T> +T ReadVRAM_ARM7(u32 addr) +{ + u32 ret = 0; + u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; + + if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; + if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; + + return ret; +} + +template<typename T> +void WriteVRAM_ARM7(u32 addr, T val) +{ + u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; + + if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; + if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; +} + + +template<typename T> +T ReadVRAM_BG(u32 addr) +{ + if ((addr & 0xFFE00000) == 0x06000000) + return ReadVRAM_ABG<T>(addr); + else + return ReadVRAM_BBG<T>(addr); +} + +template<typename T> +T ReadVRAM_OBJ(u32 addr) +{ + if ((addr & 0xFFE00000) == 0x06400000) + return ReadVRAM_AOBJ<T>(addr); + else + return ReadVRAM_BOBJ<T>(addr); +} + + +template<typename T> +T ReadVRAM_Texture(u32 addr) +{ + u32 ret = 0; + u32 mask = VRAMMap_Texture[(addr >> 17) & 0x3]; + + if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF]; + if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF]; + if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; + if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; + + return ret; +} + +template<typename T> +T ReadVRAM_TexPal(u32 addr) +{ + u32 ret = 0; + if (addr >= 0x18000) return 0; + u32 mask = VRAMMap_TexPal[(addr >> 14) & 0x7]; + + if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; + if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; + + return ret; +} + + +void DisplaySwap(u32 val); + +void StartFrame(); +void StartScanline(u32 line); + +void SetDispStat(u32 cpu, u16 val); + +} + +#endif diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp new file mode 100644 index 0000000..cedfe1e --- /dev/null +++ b/src/GPU2D.cpp @@ -0,0 +1,1604 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "GPU.h" + + +// notes on color conversion +// +// * BLDCNT special effects are applied on 18bit colors +// -> layers are converted to 18bit before being composited +// -> 'brightness up' effect does: x = x + (63-x)*factor +// * colors are converted as follows: 18bit = 15bit * 2 +// -> white comes out as 62,62,62 and not 63,63,63 +// * VRAM/FIFO display modes convert colors the same way +// * 3D engine converts colors differently (18bit = 15bit * 2 + 1, except 0 = 0) +// * 'screen disabled' white is 63,63,63 +// +// oh also, changing DISPCNT bit16-17 midframe doesn't work (ignored? applied for next frame?) +// TODO, eventually: check whether other DISPCNT bits can be changed midframe +// +// for VRAM display mode, VRAM must be mapped to LCDC +// +// sprite blending rules +// * destination must be selected as 2nd target +// * sprite must be semitransparent or bitmap sprite +// * blending is applied instead of the selected color effect, even if it is 'none'. +// * for bitmap sprites: EVA = alpha+1, EVB = 16-EVA +// * for bitmap sprites: alpha=0 is always transparent, even if blending doesn't apply +// +// 3D blending rules +// +// 3D/3D blending seems to follow these equations: +// dstColor = srcColor*srcAlpha + dstColor*(1-srcAlpha) +// dstAlpha = max(srcAlpha, dstAlpha) +// blending isn't applied if dstAlpha is zero. +// +// 3D/2D blending rules +// * if destination selected as 2nd target: +// blending is applied instead of the selected color effect, using full 5bit alpha from 3D layer +// this even if the selected color effect is 'none'. +// apparently this works even if BG0 isn't selected as 1st target +// * if BG0 is selected as 1st target, destination not selected as 2nd target: +// brightness up/down effect is applied if selected. if blending is selected, it doesn't apply. +// * 3D layer pixels with alpha=0 are always transparent. + + +GPU2D::GPU2D(u32 num) +{ + Num = num; +} + +GPU2D::~GPU2D() +{ +} + +void GPU2D::Reset() +{ + DispCnt = 0; + memset(BGCnt, 0, 4*2); + memset(BGXPos, 0, 4*2); + memset(BGYPos, 0, 4*2); + memset(BGXRef, 0, 2*4); + memset(BGYRef, 0, 2*4); + memset(BGXRefInternal, 0, 2*4); + memset(BGYRefInternal, 0, 2*4); + memset(BGRotA, 0, 2*2); + memset(BGRotB, 0, 2*2); + memset(BGRotC, 0, 2*2); + memset(BGRotD, 0, 2*2); + + BlendCnt = 0; + EVA = 16; + EVB = 0; + EVY = 0; + + CaptureCnt = 0; + + MasterBrightness = 0; + + BGExtPalStatus[0] = 0; + BGExtPalStatus[1] = 0; + BGExtPalStatus[2] = 0; + BGExtPalStatus[3] = 0; + OBJExtPalStatus = 0; +} + +void GPU2D::SetFramebuffer(u32* buf) +{ + Framebuffer = buf; +} + + +u8 GPU2D::Read8(u32 addr) +{ + printf("!! GPU2D READ8 %08X\n", addr); + return 0; +} + +u16 GPU2D::Read16(u32 addr) +{ + switch (addr & 0x00000FFF) + { + case 0x000: return DispCnt&0xFFFF; + case 0x002: return DispCnt>>16; + + case 0x008: return BGCnt[0]; + case 0x00A: return BGCnt[1]; + case 0x00C: return BGCnt[2]; + case 0x00E: return BGCnt[3]; + + case 0x050: return BlendCnt; + + case 0x064: return CaptureCnt & 0xFFFF; + case 0x066: return CaptureCnt >> 16; + } + + printf("unknown GPU read16 %08X\n", addr); + return 0; +} + +u32 GPU2D::Read32(u32 addr) +{ + switch (addr & 0x00000FFF) + { + case 0x000: return DispCnt; + + case 0x064: return CaptureCnt; + } + + return Read16(addr) | (Read16(addr+2) << 16); +} + +void GPU2D::Write8(u32 addr, u8 val) +{ + printf("!! GPU2D WRITE8 %08X %02X\n", addr, val); +} + +void GPU2D::Write16(u32 addr, u16 val) +{ + switch (addr & 0x00000FFF) + { + case 0x000: + DispCnt = (DispCnt & 0xFFFF0000) | val; + //printf("[L] DISPCNT=%08X\n", DispCnt); + return; + case 0x002: + DispCnt = (DispCnt & 0x0000FFFF) | (val << 16); + //printf("[H] DISPCNT=%08X\n", DispCnt); + return; + + case 0x008: BGCnt[0] = val; return; + case 0x00A: BGCnt[1] = val; return; + case 0x00C: BGCnt[2] = val; return; + case 0x00E: BGCnt[3] = val; return; + + case 0x010: BGXPos[0] = val; return; + case 0x012: BGYPos[0] = val; return; + case 0x014: BGXPos[1] = val; return; + case 0x016: BGYPos[1] = val; return; + case 0x018: BGXPos[2] = val; return; + case 0x01A: BGYPos[2] = val; return; + case 0x01C: BGXPos[3] = val; return; + case 0x01E: BGYPos[3] = val; return; + + case 0x020: BGRotA[0] = val; return; + case 0x022: BGRotB[0] = val; return; + case 0x024: BGRotC[0] = val; return; + case 0x026: BGRotD[0] = val; return; + case 0x028: + BGXRef[0] = (BGXRef[0] & 0xFFFF0000) | val; + if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0]; + return; + case 0x02A: + if (val & 0x0800) val |= 0xF000; + BGXRef[0] = (BGXRef[0] & 0xFFFF) | (val << 16); + if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0]; + return; + case 0x02C: + BGYRef[0] = (BGYRef[0] & 0xFFFF0000) | val; + if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0]; + return; + case 0x02E: + if (val & 0x0800) val |= 0xF000; + BGYRef[0] = (BGYRef[0] & 0xFFFF) | (val << 16); + if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0]; + return; + + case 0x030: BGRotA[1] = val; return; + case 0x032: BGRotB[1] = val; return; + case 0x034: BGRotC[1] = val; return; + case 0x036: BGRotD[1] = val; return; + case 0x038: + BGXRef[1] = (BGXRef[1] & 0xFFFF0000) | val; + if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1]; + return; + case 0x03A: + if (val & 0x0800) val |= 0xF000; + BGXRef[1] = (BGXRef[1] & 0xFFFF) | (val << 16); + if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1]; + return; + case 0x03C: + BGYRef[1] = (BGYRef[1] & 0xFFFF0000) | val; + if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1]; + return; + case 0x03E: + if (val & 0x0800) val |= 0xF000; + BGYRef[1] = (BGYRef[1] & 0xFFFF) | (val << 16); + if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1]; + return; + + case 0x050: BlendCnt = val; return; + case 0x052: + EVA = val & 0x1F; + if (EVA > 16) EVA = 16; + EVB = (val >> 8) & 0x1F; + if (EVB > 16) EVB = 16; + return; + case 0x054: + EVY = val & 0x1F; + if (EVY > 16) EVY = 16; + return; + + case 0x06C: MasterBrightness = val; return; + } + + //printf("unknown GPU write16 %08X %04X\n", addr, val); +} + +void GPU2D::Write32(u32 addr, u32 val) +{ + switch (addr & 0x00000FFF) + { + case 0x000: + //printf("DISPCNT=%08X\n", val); + DispCnt = val; + return; + + case 0x028: + if (val & 0x08000000) val |= 0xF0000000; + BGXRef[0] = val; + if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0]; + return; + case 0x02C: + if (val & 0x08000000) val |= 0xF0000000; + BGYRef[0] = val; + if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0]; + return; + + case 0x038: + if (val & 0x08000000) val |= 0xF0000000; + BGXRef[1] = val; + if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1]; + return; + case 0x03C: + if (val & 0x08000000) val |= 0xF0000000; + BGYRef[1] = val; + if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1]; + return; + + case 0x064: + // TODO: check what happens when writing to it during display + // esp. if a capture is happening + CaptureCnt = val & 0xEF3F1F1F; + return; + } + + Write16(addr, val&0xFFFF); + Write16(addr+2, val>>16); +} + + +void GPU2D::DrawScanline(u32 line) +{ + u32* dst = &Framebuffer[256*line]; + + u32 dispmode = DispCnt >> 16; + dispmode &= (Num ? 0x1 : 0x3); + + switch (dispmode) + { + case 0: // screen off + { + for (int i = 0; i < 256; i++) + dst[i] = 0xFF3F3F3F; + } + break; + + case 1: // regular display + { + DrawScanline_Mode1(line, dst); + } + break; + + case 2: // VRAM display + { + u32 vrambank = (DispCnt >> 18) & 0x3; + if (GPU::VRAMMap_LCDC & (1<<vrambank)) + { + u16* vram = (u16*)GPU::VRAM[vrambank]; + vram = &vram[line * 256]; + + for (int i = 0; i < 256; i++) + { + u16 color = vram[i]; + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + dst[i] = r | (g << 8) | (b << 16); + } + } + else + { + for (int i = 0; i < 256; i++) + { + dst[i] = 0; + } + } + } + break; + + case 3: // FIFO display + { + // TODO + } + break; + } + + // capture + if ((!Num) && (CaptureCnt & (1<<31))) + { + u32 capwidth, capheight; + switch ((CaptureCnt >> 20) & 0x3) + { + case 0: capwidth = 128; capheight = 128; break; + case 1: capwidth = 256; capheight = 64; break; + case 2: capwidth = 256; capheight = 128; break; + case 3: capwidth = 256; capheight = 192; break; + } + + if (line < capheight) + DoCapture(line, capwidth, dst); + } + + // master brightness + if (dispmode != 0) + { + if ((MasterBrightness >> 14) == 1) + { + // up + u32 factor = MasterBrightness & 0x1F; + if (factor > 16) factor = 16; + + for (int i = 0; i < 256; i++) + { + u32 val = dst[i]; + + u32 r = val & 0x00003F; + u32 g = val & 0x003F00; + u32 b = val & 0x3F0000; + + r += (((0x00003F - r) * factor) >> 4); + g += ((((0x003F00 - g) * factor) >> 4) & 0x003F00); + b += ((((0x3F0000 - b) * factor) >> 4) & 0x3F0000); + + dst[i] = r | g | b; + } + } + else if ((MasterBrightness >> 14) == 2) + { + // down + u32 factor = MasterBrightness & 0x1F; + if (factor > 16) factor = 16; + + for (int i = 0; i < 256; i++) + { + u32 val = dst[i]; + + u32 r = val & 0x00003F; + u32 g = val & 0x003F00; + u32 b = val & 0x3F0000; + + r -= ((r * factor) >> 4); + g -= (((g * factor) >> 4) & 0x003F00); + b -= (((b * factor) >> 4) & 0x3F0000); + + dst[i] = r | g | b; + } + } + } + + // convert to 32-bit RGBA + for (int i = 0; i < 256; i++) + dst[i] = ((dst[i] & 0x003F3F3F) << 2) | + ((dst[i] & 0x00303030) >> 4) | + 0xFF000000; +} + +void GPU2D::VBlank() +{ + BGXRefInternal[0] = BGXRef[0]; + BGXRefInternal[1] = BGXRef[1]; + BGYRefInternal[0] = BGYRef[0]; + BGYRefInternal[1] = BGYRef[1]; + + CaptureCnt &= ~(1<<31); +} + + +void GPU2D::DoCapture(u32 line, u32 width, u32* src) +{ + u32 dstvram = (CaptureCnt >> 16) & 0x3; + + // TODO: confirm this + // it should work like VRAM display mode, which requires VRAM to be mapped to LCDC + if (!(GPU::VRAMMap_LCDC & (1<<dstvram))) + return; + + u16* dst = (u16*)GPU::VRAM[dstvram]; + u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width); + + if (CaptureCnt & (1<<24)) + src = (u32*)GPU3D::GetLine(line); + + u16* srcB = NULL; + u32 srcBaddr = line * 256; + + if (CaptureCnt & (1<<25)) + { + // TODO: FIFO mode + } + else + { + u32 srcvram = (DispCnt >> 18) & 0x3; + if (GPU::VRAMMap_LCDC & (1<<srcvram)) + srcB = (u16*)GPU::VRAM[srcvram]; + + if (((DispCnt >> 16) & 0x3) != 2) + srcBaddr += ((CaptureCnt >> 26) & 0x3) << 14; + } + + dstaddr &= 0xFFFF; + srcBaddr &= 0xFFFF; + + switch ((DispCnt >> 29) & 0x3) + { + case 0: // source A + { + for (u32 i = 0; i < width; i++) + { + u32 val = src[i]; + + // TODO: check what happens when alpha=0 + + u32 r = (val >> 1) & 0x1F; + u32 g = (val >> 9) & 0x1F; + u32 b = (val >> 17) & 0x1F; + u32 a = ((val >> 24) != 0) ? 0x8000 : 0; + + dst[dstaddr] = r | (g << 5) | (b << 10) | a; + dstaddr = (dstaddr + 1) & 0xFFFF; + } + } + break; + + case 1: // source B + { + if (srcB) + { + for (u32 i = 0; i < width; i++) + { + dst[dstaddr] = srcB[srcBaddr]; + srcBaddr = (srcBaddr + 1) & 0xFFFF; + dstaddr = (dstaddr + 1) & 0xFFFF; + } + } + else + { + for (u32 i = 0; i < width; i++) + { + dst[dstaddr] = 0; + dstaddr = (dstaddr + 1) & 0xFFFF; + } + } + } + break; + + case 2: // sources A+B + case 3: + { + u32 eva = DispCnt & 0x1F; + u32 evb = (DispCnt >> 8) & 0x1F; + + // checkme + if (eva > 16) eva = 16; + if (evb > 16) evb = 16; + + if (srcB) + { + for (u32 i = 0; i < width; i++) + { + u32 val = src[i]; + + // TODO: check what happens when alpha=0 + + u32 rA = (val >> 1) & 0x1F; + u32 gA = (val >> 9) & 0x1F; + u32 bA = (val >> 17) & 0x1F; + u32 aA = ((val >> 24) != 0) ? 1 : 0; + + val = srcB[srcBaddr]; + + u32 rB = val & 0x1F; + u32 gB = (val >> 5) & 0x1F; + u32 bB = (val >> 10) & 0x1F; + u32 aB = val >> 15; + + u32 rD = ((rA * aA * eva) + (rB * aB * evb)) >> 4; + u32 gD = ((gA * aA * eva) + (gB * aB * evb)) >> 4; + u32 bD = ((bA * aA * eva) + (bB * aB * evb)) >> 4; + u32 aD = (eva>0 ? aA : 0) | (evb>0 ? aB : 0); + + dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15); + srcBaddr = (srcBaddr + 1) & 0xFFFF; + dstaddr = (dstaddr + 1) & 0xFFFF; + } + } + else + { + for (u32 i = 0; i < width; i++) + { + u32 val = src[i]; + + // TODO: check what happens when alpha=0 + + u32 rA = (val >> 1) & 0x1F; + u32 gA = (val >> 9) & 0x1F; + u32 bA = (val >> 17) & 0x1F; + u32 aA = ((val >> 24) != 0) ? 1 : 0; + + u32 rD = (rA * aA * eva) >> 4; + u32 gD = (gA * aA * eva) >> 4; + u32 bD = (bA * aA * eva) >> 4; + u32 aD = (eva>0 ? aA : 0); + + dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15); + dstaddr = (dstaddr + 1) & 0xFFFF; + } + } + } + break; + } +} + + +void GPU2D::BGExtPalDirty(u32 base) +{ + BGExtPalStatus[base] = 0; + BGExtPalStatus[base+1] = 0; +} + +void GPU2D::OBJExtPalDirty() +{ + OBJExtPalStatus = 0; +} + + +u16* GPU2D::GetBGExtPal(u32 slot, u32 pal) +{ + u16* dst = &BGExtPalCache[slot][pal << 8]; + + if (!(BGExtPalStatus[slot] & (1<<pal))) + { + if (Num) + { + if (GPU::VRAMMap_BBGExtPal[slot] & (1<<7)) + memcpy(dst, &GPU::VRAM_H[(slot << 13) + (pal << 9)], 256*2); + else + memset(dst, 0, 256*2); + } + else + { + memset(dst, 0, 256*2); + + if (GPU::VRAMMap_ABGExtPal[slot] & (1<<4)) + for (int i = 0; i < 256; i+=2) + *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_E[(slot << 13) + (pal << 9) + (i << 1)]; + + if (GPU::VRAMMap_ABGExtPal[slot] & (1<<5)) + for (int i = 0; i < 256; i+=2) + *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[((slot&1) << 13) + (pal << 9) + (i << 1)]; + + if (GPU::VRAMMap_ABGExtPal[slot] & (1<<6)) + for (int i = 0; i < 256; i+=2) + *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[((slot&1) << 13) + (pal << 9) + (i << 1)]; + } + + BGExtPalStatus[slot] |= (1<<pal); + } + + return dst; +} + +u16* GPU2D::GetOBJExtPal(u32 pal) +{ + u16* dst = &OBJExtPalCache[pal << 8]; + + if (!(OBJExtPalStatus & (1<<pal))) + { + if (Num) + { + if (GPU::VRAMMap_BOBJExtPal & (1<<8)) + memcpy(dst, &GPU::VRAM_I[(pal << 9)], 256*2); + else + memset(dst, 0, 256*2); + } + else + { + memset(dst, 0, 256*2); + + if (GPU::VRAMMap_AOBJExtPal & (1<<5)) + for (int i = 0; i < 256; i+=2) + *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_F[(pal << 9) + (i << 1)]; + + if (GPU::VRAMMap_AOBJExtPal & (1<<6)) + for (int i = 0; i < 256; i+=2) + *(u32*)&dst[i] |= *(u32*)&GPU::VRAM_G[(pal << 9) + (i << 1)]; + } + + OBJExtPalStatus |= (1<<pal); + } + + return dst; +} + + +template<u32 bgmode> +void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst) +{ + for (int i = 3; i >= 0; i--) + { + if ((BGCnt[3] & 0x3) == i) + { + if (DispCnt & 0x0800) + { + if (bgmode >= 3) + DrawBG_Extended(line, dst, 3); + else if (bgmode >= 1) + {} // todo: rotscale + else + DrawBG_Text(line, dst, 3); + } + } + if ((BGCnt[2] & 0x3) == i) + { + if (DispCnt & 0x0400) + { + if (bgmode == 5) + DrawBG_Extended(line, dst, 2); + else if (bgmode == 4 || bgmode == 2) + {} // todo: rotscale + else + DrawBG_Text(line, dst, 2); + } + } + if ((BGCnt[1] & 0x3) == i) + { + if (DispCnt & 0x0200) + { + DrawBG_Text(line, dst, 1); + } + } + if ((BGCnt[0] & 0x3) == i) + { + if (DispCnt & 0x0100) + { + if ((!Num) && (DispCnt & 0x8)) + DrawBG_3D(line, dst); + else + DrawBG_Text(line, dst, 0); + } + } + if (DispCnt & 0x1000) + InterleaveSprites(spritebuf, 0x8000 | (i<<16), dst); + } +} + +void GPU2D::DrawScanline_Mode1(u32 line, u32* dst) +{ + u32 linebuf[256*2]; + + u32 backdrop; + if (Num) backdrop = *(u16*)&GPU::Palette[0x400]; + else backdrop = *(u16*)&GPU::Palette[0]; + + { + u8 r = (backdrop & 0x001F) << 1; + u8 g = (backdrop & 0x03E0) >> 4; + u8 b = (backdrop & 0x7C00) >> 9; + + backdrop = r | (g << 8) | (b << 16) | 0x20000000; + + for (int i = 0; i < 256; i++) + linebuf[i] = backdrop; + } + + // prerender sprites + u32 spritebuf[256]; + memset(spritebuf, 0, 256*4); + if (DispCnt & 0x1000) DrawSprites(line, spritebuf); + + switch (DispCnt & 0x7) + { + case 0: DrawScanlineBGMode<0>(line, spritebuf, linebuf); break; + case 1: DrawScanlineBGMode<1>(line, spritebuf, linebuf); break; + case 2: DrawScanlineBGMode<2>(line, spritebuf, linebuf); break; + case 3: DrawScanlineBGMode<3>(line, spritebuf, linebuf); break; + case 4: DrawScanlineBGMode<4>(line, spritebuf, linebuf); break; + case 5: DrawScanlineBGMode<5>(line, spritebuf, linebuf); break; + } + + // color special effects + // can likely be optimized + + u32 bldcnteffect = (BlendCnt >> 6) & 0x3; + + for (int i = 0; i < 256; i++) + { + u32 val1 = linebuf[i]; + u32 val2 = linebuf[256+i]; + + u32 coloreffect, eva, evb; + + u32 flag1 = val1 >> 24; + if ((flag1 & 0x80) && (BlendCnt & ((val2 >> 16) & 0xFF00))) + { + // sprite blending + + coloreffect = 1; + + if (flag1 & 0x40) + { + eva = flag1 & 0x1F; + evb = 16 - eva; + } + else + { + eva = EVA; + evb = EVB; + } + } + else if ((flag1 & 0x40) && (BlendCnt & ((val2 >> 16) & 0xFF00))) + { + // 3D layer blending + + eva = (flag1 & 0x1F) + 1; + evb = 32 - eva; + + u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 5; + u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 5) & 0x007F00; + u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 5) & 0x7F0000; + + if (eva <= 16) + { + r += 0x000001; + g += 0x000100; + b += 0x010000; + } + + if (r > 0x00003F) r = 0x00003F; + if (g > 0x003F00) g = 0x003F00; + if (b > 0x3F0000) b = 0x3F0000; + + dst[i] = r | g | b | 0xFF000000; + + continue; + } + else if (BlendCnt & flag1) + { + if ((bldcnteffect == 1) && (BlendCnt & ((val2 >> 16) & 0xFF00))) + { + coloreffect = 1; + eva = EVA; + evb = EVB; + } + else if (bldcnteffect >= 2) + coloreffect = bldcnteffect; + else + coloreffect = 0; + } + else + coloreffect = 0; + + switch (coloreffect) + { + case 0: + dst[i] = val1; + break; + + case 1: + { + u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4; + u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 4) & 0x007F00; + u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 4) & 0x7F0000; + + if (r > 0x00003F) r = 0x00003F; + if (g > 0x003F00) g = 0x003F00; + if (b > 0x3F0000) b = 0x3F0000; + + dst[i] = r | g | b | 0xFF000000; + } + break; + + case 2: + { + u32 r = val1 & 0x00003F; + u32 g = val1 & 0x003F00; + u32 b = val1 & 0x3F0000; + + r += ((0x00003F - r) * EVY) >> 4; + g += (((0x003F00 - g) * EVY) >> 4) & 0x003F00; + b += (((0x3F0000 - b) * EVY) >> 4) & 0x3F0000; + + dst[i] = r | g | b | 0xFF000000; + } + break; + + case 3: + { + u32 r = val1 & 0x00003F; + u32 g = val1 & 0x003F00; + u32 b = val1 & 0x3F0000; + + r -= (r * EVY) >> 4; + g -= ((g * EVY) >> 4) & 0x003F00; + b -= ((b * EVY) >> 4) & 0x3F0000; + + dst[i] = r | g | b | 0xFF000000; + } + break; + } + } +} + + +void GPU2D::DrawPixel(u32* dst, u16 color, u32 flag) +{ + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + *(dst+256) = *dst; + *dst = r | (g << 8) | (b << 16) | flag; +} + +void GPU2D::DrawBG_3D(u32 line, u32* dst) +{ + // TODO: window, as for everything + // also check if window can prevent blending from happening + + u32* src = GPU3D::GetLine(line); + + u16 xoff = BGXPos[0]; + int i = 0; + int iend = 256; + + if (xoff & 0x100) + { + i = (0x100 - (xoff & 0xFF)); + xoff += i; + } + if ((xoff - i + iend - 1) & 0x100) + { + iend -= (xoff & 0xFF); + } + + for (; i < iend; i++) + { + u32 c = src[xoff]; + xoff++; + + if ((c >> 24) == 0) continue; + + dst[i+256] = dst[i]; + dst[i] = c | 0x40000000; + } +} + +void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) +{ + u16 bgcnt = BGCnt[bgnum]; + + u32 tilesetaddr, tilemapaddr; + u16* pal; + u32 extpal, extpalslot; + + u16 xoff = BGXPos[bgnum]; + u16 yoff = BGYPos[bgnum] + line; + + u32 widexmask = (bgcnt & 0x4000) ? 0x100 : 0; + + extpal = (DispCnt & 0x40000000); + if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum; + + if (Num) + { + tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); + tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); + + pal = (u16*)&GPU::Palette[0x400]; + } + else + { + tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + + pal = (u16*)&GPU::Palette[0]; + } + + // adjust Y position in tilemap + if (bgcnt & 0x8000) + { + tilemapaddr += ((yoff & 0x1F8) << 3); + if (bgcnt & 0x4000) + tilemapaddr += ((yoff & 0x100) << 3); + } + else + tilemapaddr += ((yoff & 0xF8) << 3); + + u16 curtile; + u16* curpal; + u32 pixelsaddr; + + if (bgcnt & 0x0080) + { + // 256-color + + // preload shit as needed + if (xoff & 0x7) + { + // load a new tile + curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); + + if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); + else curpal = pal; + + pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6) + + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3); + } + + for (int i = 0; i < 256; i++) + { + if (!(xoff & 0x7)) + { + // load a new tile + curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); + + if (extpal) curpal = GetBGExtPal(extpalslot, curtile>>12); + else curpal = pal; + + pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6) + + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3); + } + + // draw pixel + u8 color; + u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7); + color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff); + + if (color) + DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + + xoff++; + } + } + else + { + // 16-color + + // preload shit as needed + if (xoff & 0x7) + { + // load a new tile + curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); + curpal = pal + ((curtile & 0xF000) >> 8); + pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) + + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); + } + + for (int i = 0; i < 256; i++) + { + if (!(xoff & 0x7)) + { + // load a new tile + curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)); + curpal = pal + ((curtile & 0xF000) >> 8); + pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) + + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); + } + + // draw pixel + // TODO: optimize VRAM access + u8 color; + u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7); + if (tilexoff & 0x1) + { + color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4; + } + else + { + color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F; + } + + if (color) + DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + + xoff++; + } + } +} + +void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) +{ + u16 bgcnt = BGCnt[bgnum]; + + u32 tilesetaddr, tilemapaddr; + u16* pal; + u32 extpal; + + u32 coordmask; + u32 yshift; + switch (bgcnt & 0xC000) + { + case 0x0000: coordmask = 0x07800; yshift = 7; break; + case 0x4000: coordmask = 0x0F800; yshift = 8; break; + case 0x8000: coordmask = 0x1F800; yshift = 9; break; + case 0xC000: coordmask = 0x3F800; yshift = 10; break; + } + + u32 overflowmask; + if (bgcnt & 0x2000) overflowmask = 0; + else overflowmask = ~(coordmask | 0x7FF); + + extpal = (DispCnt & 0x40000000); + + s16 rotA = BGRotA[bgnum-2]; + s16 rotB = BGRotB[bgnum-2]; + s16 rotC = BGRotC[bgnum-2]; + s16 rotD = BGRotD[bgnum-2]; + + s32 rotX = BGXRefInternal[bgnum-2]; + s32 rotY = BGYRefInternal[bgnum-2]; + + if (bgcnt & 0x0080) + { + // bitmap modes + + if (Num) tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 6); + else tilemapaddr = 0x06000000 + ((bgcnt & 0x1F00) << 6); + + coordmask |= 0x7FF; + + if (bgcnt & 0x0004) + { + // direct color bitmap + + for (int i = 0; i < 256; i++) + { + if (!((rotX|rotY) & overflowmask)) + { + u16 color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)) << 1)); + + if (color & 0x8000) + DrawPixel(&dst[i], color, 0x01000000<<bgnum); + } + + rotX += rotA; + rotY += rotC; + } + } + else + { + // 256-color bitmap + + if (Num) pal = (u16*)&GPU::Palette[0x400]; + else pal = (u16*)&GPU::Palette[0]; + + for (int i = 0; i < 256; i++) + { + if (!((rotX|rotY) & overflowmask)) + { + u8 color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)); + + if (color) + DrawPixel(&dst[i], pal[color], 0x01000000<<bgnum); + } + + rotX += rotA; + rotY += rotC; + } + } + } + else + { + // mixed affine/text mode + + if (Num) + { + tilesetaddr = 0x06200000 + ((bgcnt & 0x003C) << 12); + tilemapaddr = 0x06200000 + ((bgcnt & 0x1F00) << 3); + + pal = (u16*)&GPU::Palette[0x400]; + } + else + { + tilesetaddr = 0x06000000 + ((DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = 0x06000000 + ((DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + + pal = (u16*)&GPU::Palette[0]; + } + + u16 curtile; + u16* curpal; + + yshift -= 3; + + for (int i = 0; i < 256; i++) + { + if (!((rotX|rotY) & overflowmask)) + { + curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((rotY & coordmask) >> 11) << yshift) + ((rotX & coordmask) >> 11)) << 1)); + + if (extpal) curpal = GetBGExtPal(bgnum, curtile>>12); + else curpal = pal; + + // draw pixel + u8 color; + u32 tilexoff = (rotX >> 8) & 0x7; + u32 tileyoff = (rotY >> 8) & 0x7; + + if (curtile & 0x0400) tilexoff = 7-tilexoff; + if (curtile & 0x0800) tileyoff = 7-tileyoff; + + color = GPU::ReadVRAM_BG<u8>(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff); + + if (color) + DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + } + + rotX += rotA; + rotY += rotC; + } + } + + BGXRefInternal[bgnum-2] += rotB; + BGYRefInternal[bgnum-2] += rotD; +} + +void GPU2D::InterleaveSprites(u32* buf, u32 prio, u32* dst) +{ + for (u32 i = 0; i < 256; i++) + { + if ((buf[i] & 0xF8000) == prio) + { + u32 blendfunc = 0; + DrawPixel(&dst[i], buf[i] & 0x7FFF, buf[i] & 0xFF000000); + } + } +} + +void GPU2D::DrawSprites(u32 line, u32* dst) +{ + u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0]; + + const s32 spritewidth[16] = + { + 8, 16, 8, 0, + 16, 32, 8, 0, + 32, 32, 16, 0, + 64, 64, 32, 0 + }; + const s32 spriteheight[16] = + { + 8, 8, 16, 0, + 16, 8, 32, 0, + 32, 16, 32, 0, + 64, 32, 64, 0 + }; + + for (int bgnum = 0x0C00; bgnum >= 0x0000; bgnum -= 0x0400) + { + for (int sprnum = 127; sprnum >= 0; sprnum--) + { + u16* attrib = &oam[sprnum*4]; + + if ((attrib[2] & 0x0C00) != bgnum) + continue; + + if (attrib[0] & 0x0100) + { + u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); + s32 width = spritewidth[sizeparam]; + s32 height = spriteheight[sizeparam]; + s32 boundwidth = width; + s32 boundheight = height; + + if (attrib[0] & 0x0200) + { + boundwidth <<= 1; + boundheight <<= 1; + } + + u32 ypos = attrib[0] & 0xFF; + ypos = (line - ypos) & 0xFF; + if (ypos >= (u32)boundheight) + continue; + + s32 xpos = (s32)(attrib[1] << 23) >> 23; + if (xpos <= -boundwidth) + continue; + + u32 rotparamgroup = (attrib[1] >> 9) & 0x1F; + + DrawSprite_Rotscale(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, dst); + } + else + { + if (attrib[0] & 0x0200) + continue; + + u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); + s32 width = spritewidth[sizeparam]; + s32 height = spriteheight[sizeparam]; + + u32 ypos = attrib[0] & 0xFF; + ypos = (line - ypos) & 0xFF; + if (ypos >= (u32)height) + continue; + + s32 xpos = (s32)(attrib[1] << 23) >> 23; + if (xpos <= -width) + continue; + + // yflip + if (attrib[1] & 0x2000) + ypos = height-1 - ypos; + + DrawSprite_Normal(attrib, width, xpos, ypos, dst); + } + } + } +} + +void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst) +{ + u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000; + u32 tilenum = attrib[2] & 0x03FF; + u32 spritemode = (attrib[0] >> 10) & 0x3; + + u32 ytilefactor; + if (DispCnt & 0x10) + { + tilenum <<= ((DispCnt >> 20) & 0x3); + ytilefactor = (width >> 3) << ((attrib[0] & 0x2000) ? 1:0); + } + else + { + ytilefactor = 0x20; + } + + s32 centerX = boundwidth >> 1; + s32 centerY = boundheight >> 1; + + u32 xoff; + if (xpos >= 0) + { + xoff = 0; + if ((xpos+boundwidth) > 256) + boundwidth = 256-xpos; + } + else + { + xoff = -xpos; + xpos = 0; + } + + s16 rotA = (s16)rotparams[0]; + s16 rotB = (s16)rotparams[4]; + s16 rotC = (s16)rotparams[8]; + s16 rotD = (s16)rotparams[12]; + + s32 rotX = ((xoff-centerX) * rotA) + ((ypos-centerY) * rotB) + (width << 7); + s32 rotY = ((xoff-centerX) * rotC) + ((ypos-centerY) * rotD) + (height << 7); + + width <<= 8; + height <<= 8; + + if (spritemode == 3) + { + // TODO + + u32 alpha = attrib[2] >> 12; + if (!alpha) return; + alpha++; + + prio |= (0xC0000000 | (alpha << 24)); + + // TODO + } + else + { + if (spritemode == 1) prio |= 0x80000000; + else prio |= 0x10000000; + + if (attrib[0] & 0x2000) + { + // 256-color + tilenum <<= 5; + ytilefactor <<= 5; + u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + + u32 extpal = (DispCnt & 0x80000000); + + u16* pal; + if (extpal) pal = GetOBJExtPal(attrib[2] >> 12); + else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + + for (; xoff < boundwidth;) + { + if ((u32)rotX < width && (u32)rotY < height) + { + u8 color; + + // blaaaarg + color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)); + + if (color) + dst[xpos] = pal[color] | prio; + } + + rotX += rotA; + rotY += rotC; + xoff++; + xpos++; + } + } + else + { + // 16-color + tilenum <<= 5; + ytilefactor <<= 5; + u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + + u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + pal += (attrib[2] & 0xF000) >> 8; + + for (; xoff < boundwidth;) + { + if ((u32)rotX < width && (u32)rotY < height) + { + u8 color; + + // blaaaarg + color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)); + + if (rotX & 0x100) + color >>= 4; + else + color &= 0x0F; + + if (color) + dst[xpos] = pal[color] | prio; + } + + rotX += rotA; + rotY += rotC; + xoff++; + xpos++; + } + } + } +} + +void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst) +{ + u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000; + u32 tilenum = attrib[2] & 0x03FF; + u32 spritemode = (attrib[0] >> 10) & 0x3; + + u32 wmask = width - 8; // really ((width - 1) & ~0x7) + + u32 xoff; + u32 xend = width; + if (xpos >= 0) + { + xoff = 0; + if ((xpos+xend) > 256) + xend = 256-xpos; + } + else + { + xoff = -xpos; + xpos = 0; + } + + if (spritemode == 3) + { + // bitmap sprite + + if (DispCnt & 0x40) + { + if (DispCnt & 0x20) + { + // TODO ("reserved") + } + else + { + tilenum <<= (7 + ((DispCnt >> 22) & 0x1)); + tilenum += (ypos * width * 2); + } + } + else + { + if (DispCnt & 0x20) + { + tilenum = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); + tilenum += (ypos * 256 * 2); + } + else + { + tilenum = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); + tilenum += (ypos * 128 * 2); + } + } + + u32 alpha = attrib[2] >> 12; + if (!alpha) return; + alpha++; + + prio |= (0xC0000000 | (alpha << 24)); + + u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr += (xoff << 1); + + for (; xoff < xend;) + { + u16 color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr); + pixelsaddr += 2; + + if (color & 0x8000) + dst[xpos] = color | prio; + + xoff++; + xpos++; + } + } + else + { + if (DispCnt & 0x10) + { + tilenum <<= ((DispCnt >> 20) & 0x3); + tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); + } + else + { + tilenum += ((ypos >> 3) * 0x20); + } + + if (spritemode == 1) prio |= 0x80000000; + else prio |= 0x10000000; + + if (attrib[0] & 0x2000) + { + // 256-color + tilenum <<= 5; + u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr += ((ypos & 0x7) << 3); + + u32 extpal = (DispCnt & 0x80000000); + + u16* pal; + if (extpal) pal = GetOBJExtPal(attrib[2] >> 12); + else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + + if (attrib[1] & 0x1000) // xflip. TODO: do better? oh well for now this works + { + pixelsaddr += (((width-1 - xoff) & wmask) << 3); + pixelsaddr += ((width-1 - xoff) & 0x7); + + for (; xoff < xend;) + { + u8 color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr); + pixelsaddr--; + + if (color) + dst[xpos] = pal[color] | prio; + + xoff++; + xpos++; + if (!(xoff & 0x7)) pixelsaddr -= 56; + } + } + else + { + pixelsaddr += ((xoff & wmask) << 3); + pixelsaddr += (xoff & 0x7); + + for (; xoff < xend;) + { + u8 color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr); + pixelsaddr++; + + if (color) + dst[xpos] = pal[color] | prio; + + xoff++; + xpos++; + if (!(xoff & 0x7)) pixelsaddr += 56; + } + } + } + else + { + // 16-color + tilenum <<= 5; + u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; + pixelsaddr += ((ypos & 0x7) << 2); + + u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + pal += (attrib[2] & 0xF000) >> 8; + + if (attrib[1] & 0x1000) // xflip. TODO: do better? oh well for now this works + { + pixelsaddr += (((width-1 - xoff) & wmask) << 2); + pixelsaddr += (((width-1 - xoff) & 0x7) >> 1); + + for (; xoff < xend;) + { + u8 color; + if (xoff & 0x1) + { + color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F; + pixelsaddr--; + } + else + { + color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4; + } + + if (color) + dst[xpos] = pal[color] | prio; + + xoff++; + xpos++; + if (!(xoff & 0x7)) pixelsaddr -= 28; + } + } + else + { + pixelsaddr += ((xoff & wmask) << 2); + pixelsaddr += ((xoff & 0x7) >> 1); + + for (; xoff < xend;) + { + u8 color; + if (xoff & 0x1) + { + color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) >> 4; + pixelsaddr++; + } + else + { + color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr) & 0x0F; + } + + if (color) + dst[xpos] = pal[color] | prio; + + xoff++; + xpos++; + if (!(xoff & 0x7)) pixelsaddr += 28; + } + } + } + } +} diff --git a/src/GPU2D.h b/src/GPU2D.h new file mode 100644 index 0000000..4136440 --- /dev/null +++ b/src/GPU2D.h @@ -0,0 +1,97 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef GPU2D_H +#define GPU2D_H + +class GPU2D +{ +public: + GPU2D(u32 num); + ~GPU2D(); + + void Reset(); + + void SetFramebuffer(u32* buf); + + u8 Read8(u32 addr); + u16 Read16(u32 addr); + u32 Read32(u32 addr); + void Write8(u32 addr, u8 val); + void Write16(u32 addr, u16 val); + void Write32(u32 addr, u32 val); + + void DrawScanline(u32 line); + void VBlank(); + + void BGExtPalDirty(u32 base); + void OBJExtPalDirty(); + + u16* GetBGExtPal(u32 slot, u32 pal); + u16* GetOBJExtPal(u32 pal); + +private: + u32 Num; + u32* Framebuffer; + + u32 DispCnt; + u16 BGCnt[4]; + + u16 BGXPos[4]; + u16 BGYPos[4]; + + s32 BGXRef[2]; + s32 BGYRef[2]; + s32 BGXRefInternal[2]; + s32 BGYRefInternal[2]; + s16 BGRotA[2]; + s16 BGRotB[2]; + s16 BGRotC[2]; + s16 BGRotD[2]; + + u16 BlendCnt; + u8 EVA, EVB; + u8 EVY; + + u32 CaptureCnt; + + u16 MasterBrightness; + + u16 BGExtPalCache[4][16*256]; + u16 OBJExtPalCache[16*256]; + u32 BGExtPalStatus[4]; + u32 OBJExtPalStatus; + + template<u32 bgmode> void DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst); + void DrawScanline_Mode1(u32 line, u32* dst); + + void DrawPixel(u32* dst, u16 color, u32 flag); + + void DrawBG_3D(u32 line, u32* dst); + void DrawBG_Text(u32 line, u32* dst, u32 num); + void DrawBG_Extended(u32 line, u32* dst, u32 bgnum); + + void InterleaveSprites(u32* buf, u32 prio, u32* dst); + void DrawSprites(u32 line, u32* dst); + void DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst); + void DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst); + + void DoCapture(u32 line, u32 width, u32* src); +}; + +#endif diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp new file mode 100644 index 0000000..8758ce8 --- /dev/null +++ b/src/GPU3D.cpp @@ -0,0 +1,1917 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "GPU.h" +#include "FIFO.h" + + +// 3D engine notes +// +// vertex/polygon RAM is filled when a complete polygon is defined, after it's been culled and clipped +// 04000604 reads from bank used by renderer +// bank used by renderer is emptied at scanline ~192 +// banks are swapped at scanline ~194 +// TODO: needs more investigation. it's weird. +// +// clipping rules: +// * if a shared vertex in a strip is clipped, affected polygons are converted into single polygons +// strip is resumed at the first eligible polygon +// +// clipping exhibits oddities on the real thing. bad precision? fancy algorithm? TODO: investigate. +// +// vertex color precision: +// * vertex colors are kept at 5-bit during clipping. makes for shitty results. +// * vertex colors are converted to 9-bit before drawing, as such: +// if (x > 0) x = (x << 4) + 0xF +// the added bias affects interpolation. +// +// depth buffer: +// Z-buffering mode: val = ((Z * 0x800 * 0x1000) / W) + 0x7FFEFF +// W-buffering mode: val = W +// +// formula for clear depth: (GBAtek is wrong there) +// clearZ = (val * 0x200) + 0x1FF; +// if (clearZ >= 0x010000 && clearZ < 0xFFFFFF) clearZ++; +// +// alpha is 5-bit + + +namespace GPU3D +{ + +const u32 CmdNumParams[256] = +{ + // 0x00 + 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x10 + 1, 0, 1, 1, 1, 0, 16, 12, 16, 12, 9, 3, 3, + 0, 0, 0, + // 0x20 + 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, + // 0x30 + 1, 1, 1, 1, 32, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x40 + 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x50 + 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x60 + 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x70 + 3, 2, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x80+ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +const s32 CmdNumCycles[256] = +{ + // 0x00 + 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x10 + 1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22, + 0, 0, 0, + // 0x20 + 1, 9, 1, 9, 8, 8, 8, 8, 8, 1, 1, 1, + 0, 0, 0, 0, + // 0x30 + 4, 4, 6, 1, 32, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x40 + 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x50 + 392, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x60 + 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x70 + 103, 9, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0x80+ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +typedef struct +{ + u8 Command; + u32 Param; + +} CmdFIFOEntry; + +FIFO<CmdFIFOEntry>* CmdFIFO; +FIFO<CmdFIFOEntry>* CmdPIPE; + +u32 NumCommands, CurCommand, ParamCount, TotalParams; + +u32 DispCnt; +u32 AlphaRef; + +u16 ToonTable[32]; +u16 EdgeTable[8]; + +u32 FogColor; +u32 FogOffset; +u8 FogDensityTable[32]; + +u32 GXStat; + +u32 ExecParams[32]; +u32 ExecParamCount; +s32 CycleCount; + + +u32 MatrixMode; + +s32 ProjMatrix[16]; +s32 PosMatrix[16]; +s32 VecMatrix[16]; +s32 TexMatrix[16]; + +s32 ClipMatrix[16]; +bool ClipMatrixDirty; + +s32 Viewport[4]; + +s32 ProjMatrixStack[16]; +s32 PosMatrixStack[31][16]; +s32 VecMatrixStack[31][16]; +s32 TexMatrixStack[16]; +s32 ProjMatrixStackPointer; +s32 PosMatrixStackPointer; +s32 TexMatrixStackPointer; + +void MatrixLoadIdentity(s32* m); +void UpdateClipMatrix(); + + +u32 PolygonMode; +s16 CurVertex[3]; +u8 VertexColor[3]; +s16 TexCoords[2]; +s16 RawTexCoords[2]; +s16 Normal[3]; + +s16 LightDirection[4][3]; +u8 LightColor[4][3]; +u8 MatDiffuse[3]; +u8 MatAmbient[3]; +u8 MatSpecular[3]; +u8 MatEmission[3]; + +bool UseShininessTable; +u8 ShininessTable[128]; + +u32 PolygonAttr; +u32 CurPolygonAttr; + +u32 TexParam; +u32 TexPalette; + +Vertex TempVertexBuffer[4]; +u32 VertexNum; +u32 VertexNumInPoly; +u32 NumConsecutivePolygons; +Polygon* LastStripPolygon; + +Vertex VertexRAM[6144 * 2]; +Polygon PolygonRAM[2048 * 2]; + +Vertex* CurVertexRAM; +Polygon* CurPolygonRAM; +u32 NumVertices, NumPolygons; +u32 CurRAMBank; + +u32 ClearAttr1, ClearAttr2; + +u32 FlushRequest; +u32 FlushAttributes; + + + +bool Init() +{ + CmdFIFO = new FIFO<CmdFIFOEntry>(256); + CmdPIPE = new FIFO<CmdFIFOEntry>(4); + + if (!SoftRenderer::Init()) return false; + + return true; +} + +void DeInit() +{ + SoftRenderer::DeInit(); + + delete CmdFIFO; + delete CmdPIPE; +} + +void Reset() +{ + CmdFIFO->Clear(); + CmdPIPE->Clear(); + + NumCommands = 0; + CurCommand = 0; + ParamCount = 0; + TotalParams = 0; + + DispCnt = 0; + AlphaRef = 0; + + GXStat = 0; + + memset(ExecParams, 0, 32*4); + ExecParamCount = 0; + CycleCount = 0; + + + MatrixMode = 0; + + MatrixLoadIdentity(ProjMatrix); + MatrixLoadIdentity(PosMatrix); + MatrixLoadIdentity(VecMatrix); + MatrixLoadIdentity(TexMatrix); + + ClipMatrixDirty = true; + UpdateClipMatrix(); + + memset(Viewport, 0, sizeof(Viewport)); + + memset(ProjMatrixStack, 0, 16*4); + memset(PosMatrixStack, 0, 31 * 16*4); + memset(VecMatrixStack, 0, 31 * 16*4); + memset(TexMatrixStack, 0, 16*4); + ProjMatrixStackPointer = 0; + PosMatrixStackPointer = 0; + TexMatrixStackPointer = 0; + + VertexNum = 0; + VertexNumInPoly = 0; + + CurRAMBank = 0; + CurVertexRAM = &VertexRAM[0]; + CurPolygonRAM = &PolygonRAM[0]; + NumVertices = 0; + NumPolygons = 0; + + ClearAttr1 = 0; + ClearAttr2 = 0; + + FlushRequest = 0; + FlushAttributes = 0; + + SoftRenderer::Reset(); +} + + + +void MatrixLoadIdentity(s32* m) +{ + m[0] = 0x1000; m[1] = 0; m[2] = 0; m[3] = 0; + m[4] = 0; m[5] = 0x1000; m[6] = 0; m[7] = 0; + m[8] = 0; m[9] = 0; m[10] = 0x1000; m[11] = 0; + m[12] = 0; m[13] = 0; m[14] = 0; m[15] = 0x1000; +} + +void MatrixLoad4x4(s32* m, s32* s) +{ + memcpy(m, s, 16*4); +} + +void MatrixLoad4x3(s32* m, s32* s) +{ + m[0] = s[0]; m[1] = s[1]; m[2] = s[2]; m[3] = 0; + m[4] = s[3]; m[5] = s[4]; m[6] = s[5]; m[7] = 0; + m[8] = s[6]; m[9] = s[7]; m[10] = s[8]; m[11] = 0; + m[12] = s[9]; m[13] = s[10]; m[14] = s[11]; m[15] = 0x1000; +} + +void MatrixMult4x4(s32* m, s32* s) +{ + s32 tmp[16]; + memcpy(tmp, m, 16*4); + + // m = s*m + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8] + (s64)s[3]*tmp[12]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9] + (s64)s[3]*tmp[13]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10] + (s64)s[3]*tmp[14]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11] + (s64)s[3]*tmp[15]) >> 12; + + m[4] = ((s64)s[4]*tmp[0] + (s64)s[5]*tmp[4] + (s64)s[6]*tmp[8] + (s64)s[7]*tmp[12]) >> 12; + m[5] = ((s64)s[4]*tmp[1] + (s64)s[5]*tmp[5] + (s64)s[6]*tmp[9] + (s64)s[7]*tmp[13]) >> 12; + m[6] = ((s64)s[4]*tmp[2] + (s64)s[5]*tmp[6] + (s64)s[6]*tmp[10] + (s64)s[7]*tmp[14]) >> 12; + m[7] = ((s64)s[4]*tmp[3] + (s64)s[5]*tmp[7] + (s64)s[6]*tmp[11] + (s64)s[7]*tmp[15]) >> 12; + + m[8] = ((s64)s[8]*tmp[0] + (s64)s[9]*tmp[4] + (s64)s[10]*tmp[8] + (s64)s[11]*tmp[12]) >> 12; + m[9] = ((s64)s[8]*tmp[1] + (s64)s[9]*tmp[5] + (s64)s[10]*tmp[9] + (s64)s[11]*tmp[13]) >> 12; + m[10] = ((s64)s[8]*tmp[2] + (s64)s[9]*tmp[6] + (s64)s[10]*tmp[10] + (s64)s[11]*tmp[14]) >> 12; + m[11] = ((s64)s[8]*tmp[3] + (s64)s[9]*tmp[7] + (s64)s[10]*tmp[11] + (s64)s[11]*tmp[15]) >> 12; + + m[12] = ((s64)s[12]*tmp[0] + (s64)s[13]*tmp[4] + (s64)s[14]*tmp[8] + (s64)s[15]*tmp[12]) >> 12; + m[13] = ((s64)s[12]*tmp[1] + (s64)s[13]*tmp[5] + (s64)s[14]*tmp[9] + (s64)s[15]*tmp[13]) >> 12; + m[14] = ((s64)s[12]*tmp[2] + (s64)s[13]*tmp[6] + (s64)s[14]*tmp[10] + (s64)s[15]*tmp[14]) >> 12; + m[15] = ((s64)s[12]*tmp[3] + (s64)s[13]*tmp[7] + (s64)s[14]*tmp[11] + (s64)s[15]*tmp[15]) >> 12; +} + +void MatrixMult4x3(s32* m, s32* s) +{ + s32 tmp[16]; + memcpy(tmp, m, 16*4); + + // m = s*m + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12; + + m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12; + m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12; + m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12; + m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12; + + m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12; + m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12; + m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12; + m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12; + + m[12] = ((s64)s[9]*tmp[0] + (s64)s[10]*tmp[4] + (s64)s[11]*tmp[8] + (s64)0x1000*tmp[12]) >> 12; + m[13] = ((s64)s[9]*tmp[1] + (s64)s[10]*tmp[5] + (s64)s[11]*tmp[9] + (s64)0x1000*tmp[13]) >> 12; + m[14] = ((s64)s[9]*tmp[2] + (s64)s[10]*tmp[6] + (s64)s[11]*tmp[10] + (s64)0x1000*tmp[14]) >> 12; + m[15] = ((s64)s[9]*tmp[3] + (s64)s[10]*tmp[7] + (s64)s[11]*tmp[11] + (s64)0x1000*tmp[15]) >> 12; +} + +void MatrixMult3x3(s32* m, s32* s) +{ + s32 tmp[12]; + memcpy(tmp, m, 12*4); + + // m = s*m + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12; + + m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12; + m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12; + m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12; + m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12; + + m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12; + m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12; + m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12; + m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12; +} + +void MatrixScale(s32* m, s32* s) +{ + m[0] = ((s64)s[0]*m[0]) >> 12; + m[1] = ((s64)s[0]*m[1]) >> 12; + m[2] = ((s64)s[0]*m[2]) >> 12; + m[3] = ((s64)s[0]*m[3]) >> 12; + + m[4] = ((s64)s[1]*m[4]) >> 12; + m[5] = ((s64)s[1]*m[5]) >> 12; + m[6] = ((s64)s[1]*m[6]) >> 12; + m[7] = ((s64)s[1]*m[7]) >> 12; + + m[8] = ((s64)s[2]*m[8]) >> 12; + m[9] = ((s64)s[2]*m[9]) >> 12; + m[10] = ((s64)s[2]*m[10]) >> 12; + m[11] = ((s64)s[2]*m[11]) >> 12; +} + +void MatrixTranslate(s32* m, s32* s) +{ + m[12] += ((s64)s[0]*m[0] + (s64)s[1]*m[4] + (s64)s[2]*m[8]) >> 12; + m[13] += ((s64)s[0]*m[1] + (s64)s[1]*m[5] + (s64)s[2]*m[9]) >> 12; + m[14] += ((s64)s[0]*m[2] + (s64)s[1]*m[6] + (s64)s[2]*m[10]) >> 12; +} + +void UpdateClipMatrix() +{ + if (!ClipMatrixDirty) return; + ClipMatrixDirty = false; + + memcpy(ClipMatrix, ProjMatrix, 16*4); + MatrixMult4x4(ClipMatrix, PosMatrix); +} + + + +template<int comp, s32 plane> +void ClipSegment(Vertex* outbuf, Vertex* vout, Vertex* vin) +{ + s64 factor_num = vin->Position[3] - (plane*vin->Position[comp]); + s32 factor_den = factor_num - (vout->Position[3] - (plane*vout->Position[comp])); + + Vertex mid; +#define INTERPOLATE(var) { mid.var = (vin->var + ((vout->var - vin->var) * factor_num) / factor_den); } + + if (comp != 0) INTERPOLATE(Position[0]); + if (comp != 1) INTERPOLATE(Position[1]); + if (comp != 2) INTERPOLATE(Position[2]); + INTERPOLATE(Position[3]); + mid.Position[comp] = plane*mid.Position[3]; + + INTERPOLATE(Color[0]); + INTERPOLATE(Color[1]); + INTERPOLATE(Color[2]); + + INTERPOLATE(TexCoords[0]); + INTERPOLATE(TexCoords[1]); + + mid.Clipped = true; + +#undef INTERPOLATE + *outbuf = mid; +} + +void SubmitPolygon() +{ + Vertex clippedvertices[2][10]; + Vertex* reusedvertices[2]; + int clipstart = 0; + int lastpolyverts = 0; + + int nverts = PolygonMode & 0x1 ? 4:3; + int prev, next; + int c; + + // culling + + Vertex *v0, *v1, *v2; + s64 normalX, normalY, normalZ; + s64 dot; + + v0 = &TempVertexBuffer[0]; + v1 = &TempVertexBuffer[1]; + v2 = &TempVertexBuffer[2]; + normalX = (((s64)v0->Position[1] * v2->Position[3]) - ((s64)v0->Position[3] * v2->Position[1])) >> 12; + normalY = (((s64)v0->Position[3] * v2->Position[0]) - ((s64)v0->Position[0] * v2->Position[3])) >> 12; + normalZ = (((s64)v0->Position[0] * v2->Position[1]) - ((s64)v0->Position[1] * v2->Position[0])) >> 12; + dot = ((s64)(v1->Position[0] >> 0) * normalX) + ((s64)(v1->Position[1] >> 0) * normalY) + ((s64)(v1->Position[3] >> 0) * normalZ); + + bool facingview = (dot < 0); + + if (facingview) + { + if (!(CurPolygonAttr & (1<<7))) + { + LastStripPolygon = NULL; + return; + } + } + else if (dot > 0) + { + if (!(CurPolygonAttr & (1<<6))) + { + LastStripPolygon = NULL; + return; + } + } + + // for strips, check whether we can attach to the previous polygon + // this requires two vertices shared with the previous polygon, and that + // the two polygons be of the same type + + if (PolygonMode >= 2 && LastStripPolygon) + { + int id0, id1; + if (PolygonMode == 2) + { + if (NumConsecutivePolygons & 1) + { + id0 = 2; + id1 = 1; + } + else + { + id0 = 0; + id1 = 2; + } + + lastpolyverts = 3; + } + else + { + id0 = 3; + id1 = 2; + + lastpolyverts = 4; + } + + if (LastStripPolygon->NumVertices == lastpolyverts && + !LastStripPolygon->Vertices[id0]->Clipped && + !LastStripPolygon->Vertices[id1]->Clipped) + { + reusedvertices[0] = LastStripPolygon->Vertices[id0]; + reusedvertices[1] = LastStripPolygon->Vertices[id1]; + + clippedvertices[0][0] = *reusedvertices[0]; + clippedvertices[0][1] = *reusedvertices[1]; + clippedvertices[1][0] = *reusedvertices[0]; + clippedvertices[1][1] = *reusedvertices[1]; + + clipstart = 2; + } + } + + // clip. + // for each vertex: + // if it's outside, check if the previous and next vertices are inside + // if so, place a new vertex at the edge of the view volume + + // X clipping + + c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = TempVertexBuffer[i]; + if (vtx.Position[0] > vtx.Position[3]) + { + Vertex* vprev = &TempVertexBuffer[prev]; + if (vprev->Position[0] <= vprev->Position[3]) + { + ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &TempVertexBuffer[next]; + if (vnext->Position[0] <= vnext->Position[3]) + { + ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[0][c++] = vtx; + } + + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = clippedvertices[0][i]; + if (vtx.Position[0] < -vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[0][prev]; + if (vprev->Position[0] >= -vprev->Position[3]) + { + ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[0][next]; + if (vnext->Position[0] >= -vnext->Position[3]) + { + ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[1][c++] = vtx; + } + + for (int i = 0; i < c; i++) + { + Vertex* vtx = &clippedvertices[1][i]; + + vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF; + vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF; + vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF; + } + + // Y clipping + + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = clippedvertices[1][i]; + if (vtx.Position[1] > vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[1][prev]; + if (vprev->Position[1] <= vprev->Position[3]) + { + ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[1][next]; + if (vnext->Position[1] <= vnext->Position[3]) + { + ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[0][c++] = vtx; + } + + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = clippedvertices[0][i]; + if (vtx.Position[1] < -vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[0][prev]; + if (vprev->Position[1] >= -vprev->Position[3]) + { + ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[0][next]; + if (vnext->Position[1] >= -vnext->Position[3]) + { + ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[1][c++] = vtx; + } + + for (int i = 0; i < c; i++) + { + Vertex* vtx = &clippedvertices[1][i]; + + vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF; + vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF; + vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF; + } + + // Z clipping + + bool farplaneclip = false; + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = clippedvertices[1][i]; + if (vtx.Position[2] > vtx.Position[3]) + { + farplaneclip = true; + + Vertex* vprev = &clippedvertices[1][prev]; + if (vprev->Position[2] <= vprev->Position[3]) + { + ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[1][next]; + if (vnext->Position[2] <= vnext->Position[3]) + { + ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[0][c++] = vtx; + } + + if (farplaneclip && (!(CurPolygonAttr & (1<<12)))) + return; + + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = clippedvertices[0][i]; + if (vtx.Position[2] < -vtx.Position[3]) + { + Vertex* vprev = &clippedvertices[0][prev]; + if (vprev->Position[2] >= -vprev->Position[3]) + { + ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vprev); + c++; + } + + Vertex* vnext = &clippedvertices[0][next]; + if (vnext->Position[2] >= -vnext->Position[3]) + { + ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vnext); + c++; + } + } + else + clippedvertices[1][c++] = vtx; + } + + for (int i = 0; i < c; i++) + { + Vertex* vtx = &clippedvertices[1][i]; + + vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF; + vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF; + vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF; + } + + if (c == 0) + { + LastStripPolygon = NULL; + return; + } + + // build the actual polygon + + if (NumPolygons >= 2048 || NumVertices+c > 6144) + { + LastStripPolygon = NULL; + // TODO: set DISP3DCNT overflow flag + return; + } + + Polygon* poly = &CurPolygonRAM[NumPolygons++]; + poly->NumVertices = 0; + + poly->Attr = CurPolygonAttr; + poly->TexParam = TexParam; + poly->TexPalette = TexPalette; + + poly->FacingView = facingview; + + u32 texfmt = (TexParam >> 26) & 0x7; + u32 polyalpha = (CurPolygonAttr >> 16) & 0x1F; + poly->Translucent = (texfmt == 1 || texfmt == 6 || (polyalpha > 0 && polyalpha < 31)); + + if (LastStripPolygon && clipstart > 0) + { + if (c == lastpolyverts) + { + poly->Vertices[0] = reusedvertices[0]; + poly->Vertices[1] = reusedvertices[1]; + } + else + { + Vertex v0 = *reusedvertices[0]; + Vertex v1 = *reusedvertices[1]; + + CurVertexRAM[NumVertices] = v0; + poly->Vertices[0] = &CurVertexRAM[NumVertices]; + CurVertexRAM[NumVertices+1] = v1; + poly->Vertices[1] = &CurVertexRAM[NumVertices+1]; + NumVertices += 2; + } + + poly->NumVertices += 2; + } + + for (int i = clipstart; i < c; i++) + { + Vertex* vtx = &CurVertexRAM[NumVertices]; + *vtx = clippedvertices[1][i]; + poly->Vertices[i] = vtx; + + NumVertices++; + poly->NumVertices++; + + // viewport transform + s32 posX, posY, posZ; + s32 w = vtx->Position[3]; + if (w == 0) + { + posX = 0; + posY = 0; + posZ = 0; + w = 0x1000; + } + else + { + posX = (((s64)(vtx->Position[0] + w) * Viewport[2]) / (((s64)w) << 1)) + Viewport[0]; + posY = (((s64)(-vtx->Position[1] + w) * Viewport[3]) / (((s64)w) << 1)) + Viewport[1]; + + if (FlushAttributes & 0x2) posZ = w; + else posZ = (((s64)vtx->Position[2] * 0x800000) / w) + 0x7FFEFF; + } + + if (posX < 0) posX = 0; + else if (posX > 256) posX = 256; + if (posY < 0) posY = 0; + else if (posY > 192) posY = 192; + if (posZ < 0) posZ = 0; + else if (posZ > 0xFFFFFF) posZ = 0xFFFFFF; + + vtx->FinalPosition[0] = posX; + vtx->FinalPosition[1] = posY; + vtx->FinalPosition[2] = posZ; + vtx->FinalPosition[3] = w; + + vtx->FinalColor[0] = vtx->Color[0] >> 12; + if (vtx->FinalColor[0]) vtx->FinalColor[0] = ((vtx->FinalColor[0] << 4) + 0xF); + vtx->FinalColor[1] = vtx->Color[1] >> 12; + if (vtx->FinalColor[1]) vtx->FinalColor[1] = ((vtx->FinalColor[1] << 4) + 0xF); + vtx->FinalColor[2] = vtx->Color[2] >> 12; + if (vtx->FinalColor[2]) vtx->FinalColor[2] = ((vtx->FinalColor[2] << 4) + 0xF); + } + + // determine bounds of the polygon + u32 vtop = 0, vbot = 0; + s32 ytop = 192, ybot = 0; + s32 xtop = 256, xbot = 0; + + for (int i = 0; i < c; i++) + { + Vertex* vtx = poly->Vertices[i]; + + if (vtx->FinalPosition[1] < ytop || (vtx->FinalPosition[1] == ytop && vtx->FinalPosition[0] < xtop)) + { + xtop = vtx->FinalPosition[0]; + ytop = vtx->FinalPosition[1]; + vtop = i; + } + if (vtx->FinalPosition[1] > ybot || (vtx->FinalPosition[1] == ybot && vtx->FinalPosition[0] > xbot)) + { + xbot = vtx->FinalPosition[0]; + ybot = vtx->FinalPosition[1]; + vbot = i; + } + } + + poly->VTop = vtop; poly->VBottom = vbot; + poly->YTop = ytop; poly->YBottom = ybot; + poly->XTop = xtop; poly->XBottom = xbot; + + if (PolygonMode >= 2) + LastStripPolygon = poly; + else + LastStripPolygon = NULL; +} + +void SubmitVertex() +{ + s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000}; + Vertex* vertextrans = &TempVertexBuffer[VertexNumInPoly]; + + UpdateClipMatrix(); + vertextrans->Position[0] = (vertex[0]*ClipMatrix[0] + vertex[1]*ClipMatrix[4] + vertex[2]*ClipMatrix[8] + vertex[3]*ClipMatrix[12]) >> 12; + vertextrans->Position[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12; + vertextrans->Position[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; + vertextrans->Position[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; + + vertextrans->Color[0] = (VertexColor[0] << 12) + 0xFFF; + vertextrans->Color[1] = (VertexColor[1] << 12) + 0xFFF; + vertextrans->Color[2] = (VertexColor[2] << 12) + 0xFFF; + + if ((TexParam >> 30) == 3) + { + vertextrans->TexCoords[0] = (vertex[0]*TexMatrix[0] + vertex[1]*TexMatrix[4] + vertex[2]*TexMatrix[8] + vertex[3]*(RawTexCoords[0]<<8)) >> 20; + vertextrans->TexCoords[1] = (vertex[0]*TexMatrix[1] + vertex[1]*TexMatrix[5] + vertex[2]*TexMatrix[9] + vertex[3]*(RawTexCoords[1]<<8)) >> 20; + } + else + { + vertextrans->TexCoords[0] = TexCoords[0]; + vertextrans->TexCoords[1] = TexCoords[1]; + } + + vertextrans->Clipped = false; + + VertexNum++; + VertexNumInPoly++; + + switch (PolygonMode) + { + case 0: // triangle + if (VertexNumInPoly == 3) + { + VertexNumInPoly = 0; + SubmitPolygon(); + NumConsecutivePolygons++; + } + break; + + case 1: // quad + if (VertexNumInPoly == 4) + { + VertexNumInPoly = 0; + SubmitPolygon(); + NumConsecutivePolygons++; + } + break; + + case 2: // triangle strip + if (NumConsecutivePolygons & 1) + { + Vertex tmp = TempVertexBuffer[1]; + TempVertexBuffer[1] = TempVertexBuffer[0]; + TempVertexBuffer[0] = tmp; + + VertexNumInPoly = 2; + SubmitPolygon(); + NumConsecutivePolygons++; + + TempVertexBuffer[1] = TempVertexBuffer[2]; + } + else if (VertexNumInPoly == 3) + { + VertexNumInPoly = 2; + SubmitPolygon(); + NumConsecutivePolygons++; + + TempVertexBuffer[0] = TempVertexBuffer[1]; + TempVertexBuffer[1] = TempVertexBuffer[2]; + } + break; + + case 3: // quad strip + if (VertexNumInPoly == 4) + { + Vertex tmp = TempVertexBuffer[3]; + TempVertexBuffer[3] = TempVertexBuffer[2]; + TempVertexBuffer[2] = tmp; + + VertexNumInPoly = 2; + SubmitPolygon(); + NumConsecutivePolygons++; + + TempVertexBuffer[0] = TempVertexBuffer[3]; + TempVertexBuffer[1] = TempVertexBuffer[2]; + } + break; + } +} + +s32 CalculateLighting() +{ + if ((TexParam >> 30) == 2) + { + TexCoords[0] = RawTexCoords[0] + (((s64)Normal[0]*TexMatrix[0] + (s64)Normal[1]*TexMatrix[4] + (s64)Normal[2]*TexMatrix[8]) >> 21); + TexCoords[1] = RawTexCoords[1] + (((s64)Normal[0]*TexMatrix[1] + (s64)Normal[1]*TexMatrix[5] + (s64)Normal[2]*TexMatrix[9]) >> 21); + } + + s32 normaltrans[3]; + normaltrans[0] = (Normal[0]*VecMatrix[0] + Normal[1]*VecMatrix[4] + Normal[2]*VecMatrix[8]) >> 12; + normaltrans[1] = (Normal[0]*VecMatrix[1] + Normal[1]*VecMatrix[5] + Normal[2]*VecMatrix[9]) >> 12; + normaltrans[2] = (Normal[0]*VecMatrix[2] + Normal[1]*VecMatrix[6] + Normal[2]*VecMatrix[10]) >> 12; + + VertexColor[0] = MatEmission[0]; + VertexColor[1] = MatEmission[1]; + VertexColor[2] = MatEmission[2]; + + s32 c = 0; + for (int i = 0; i < 4; i++) + { + if (!(CurPolygonAttr & (1<<i))) + continue; + + s32 difflevel = (-(LightDirection[i][0]*normaltrans[0] + + LightDirection[i][1]*normaltrans[1] + + LightDirection[i][2]*normaltrans[2])) >> 10; + if (difflevel < 0) difflevel = 0; + else if (difflevel > 255) difflevel = 255; + + s32 shinelevel = -(((LightDirection[i][0]>>1)*normaltrans[0] + + (LightDirection[i][1]>>1)*normaltrans[1] + + ((LightDirection[i][2]-0x200)>>1)*normaltrans[2]) >> 10); + if (shinelevel < 0) shinelevel = 0; + shinelevel = ((shinelevel * shinelevel) >> 7) - 0x100; // really (2*shinelevel*shinelevel)-1 + if (shinelevel < 0) shinelevel = 0; + else if (shinelevel > 255) shinelevel = 255; + + if (UseShininessTable) + { + // checkme + shinelevel >>= 1; + shinelevel = ShininessTable[shinelevel]; + } + + VertexColor[0] += ((MatSpecular[0] * LightColor[i][0] * shinelevel) >> 13); + VertexColor[0] += ((MatDiffuse[0] * LightColor[i][0] * difflevel) >> 13); + VertexColor[0] += ((MatAmbient[0] * LightColor[i][0]) >> 5); + + VertexColor[1] += ((MatSpecular[1] * LightColor[i][1] * shinelevel) >> 13); + VertexColor[1] += ((MatDiffuse[1] * LightColor[i][1] * difflevel) >> 13); + VertexColor[1] += ((MatAmbient[1] * LightColor[i][1]) >> 5); + + VertexColor[2] += ((MatSpecular[2] * LightColor[i][2] * shinelevel) >> 13); + VertexColor[2] += ((MatDiffuse[2] * LightColor[i][2] * difflevel) >> 13); + VertexColor[2] += ((MatAmbient[2] * LightColor[i][2]) >> 5); + + if (VertexColor[0] > 31) VertexColor[0] = 31; + if (VertexColor[1] > 31) VertexColor[1] = 31; + if (VertexColor[2] > 31) VertexColor[2] = 31; + + c++; + } + + // checkme: cycle count + return c; +} + + + +void CmdFIFOWrite(CmdFIFOEntry& entry) +{ + if (CmdFIFO->IsEmpty() && !CmdPIPE->IsFull()) + { + CmdPIPE->Write(entry); + } + else + { + if (CmdFIFO->IsFull()) + { + //printf("!!! GX FIFO FULL\n"); + //return; + + // temp. hack + // SM64DS seems to overflow the FIFO occasionally + // either leftover bugs in our implementation, or the game accidentally doing that + // TODO: investigate. + // TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore) + + while (CmdFIFO->IsFull()) + ExecuteCommand(); + } + + CmdFIFO->Write(entry); + } +} + +CmdFIFOEntry CmdFIFORead() +{ + CmdFIFOEntry ret = CmdPIPE->Read(); + + if (CmdPIPE->Level() <= 2) + { + if (!CmdFIFO->IsEmpty()) + CmdPIPE->Write(CmdFIFO->Read()); + if (!CmdFIFO->IsEmpty()) + CmdPIPE->Write(CmdFIFO->Read()); + + CheckFIFODMA(); + CheckFIFOIRQ(); + } + + return ret; +} + + + +void ExecuteCommand() +{ + CmdFIFOEntry entry = CmdFIFORead(); + + //printf("FIFO: processing %02X %08X. Levels: FIFO=%d, PIPE=%d\n", entry.Command, entry.Param, CmdFIFO->Level(), CmdPIPE->Level()); + + ExecParams[ExecParamCount] = entry.Param; + ExecParamCount++; + + if (ExecParamCount >= CmdNumParams[entry.Command]) + { + CycleCount += CmdNumCycles[entry.Command]; + ExecParamCount = 0; + + GXStat &= ~(1<<14); + if (CycleCount > 0) + GXStat |= (1<<27); + + switch (entry.Command) + { + case 0x10: // matrix mode + MatrixMode = ExecParams[0] & 0x3; + break; + + case 0x11: // push matrix + if (MatrixMode == 0) + { + if (ProjMatrixStackPointer > 0) + { + printf("!! PROJ MATRIX STACK OVERFLOW\n"); + GXStat |= (1<<15); + break; + } + + memcpy(ProjMatrixStack, ProjMatrix, 16*4); + ProjMatrixStackPointer++; + GXStat |= (1<<14); + } + else if (MatrixMode == 3) + { + if (TexMatrixStackPointer > 0) + { + printf("!! TEX MATRIX STACK OVERFLOW\n"); + GXStat |= (1<<15); + break; + } + + memcpy(TexMatrixStack, TexMatrix, 16*4); + TexMatrixStackPointer++; + GXStat |= (1<<14); + } + else + { + if (PosMatrixStackPointer > 30) + { + printf("!! POS MATRIX STACK OVERFLOW\n"); + GXStat |= (1<<15); + break; + } + + memcpy(PosMatrixStack[PosMatrixStackPointer], PosMatrix, 16*4); + if (MatrixMode == 2) + memcpy(VecMatrixStack[PosMatrixStackPointer], VecMatrix, 16*4); + PosMatrixStackPointer++; + GXStat |= (1<<14); + } + break; + + case 0x12: // pop matrix + if (MatrixMode == 0) + { + if (ProjMatrixStackPointer <= 0) + { + printf("!! PROJ MATRIX STACK UNDERFLOW\n"); + GXStat |= (1<<15); + break; + } + + ProjMatrixStackPointer--; + memcpy(ProjMatrix, ProjMatrixStack, 16*4); + GXStat |= (1<<14); + ClipMatrixDirty = true; + } + else if (MatrixMode == 3) + { + if (TexMatrixStackPointer <= 0) + { + printf("!! TEX MATRIX STACK UNDERFLOW\n"); + GXStat |= (1<<15); + break; + } + + TexMatrixStackPointer--; + memcpy(TexMatrix, TexMatrixStack, 16*4); + GXStat |= (1<<14); + } + else + { + s32 offset = (s32)(ExecParams[0] << 26) >> 26; + PosMatrixStackPointer -= offset; + + if (PosMatrixStackPointer < 0 || PosMatrixStackPointer > 30) + { + printf("!! POS MATRIX STACK UNDER/OVERFLOW %d\n", PosMatrixStackPointer); + PosMatrixStackPointer += offset; + GXStat |= (1<<15); + break; + } + + memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4); + if (MatrixMode == 2) + memcpy(VecMatrix, VecMatrixStack[PosMatrixStackPointer], 16*4); + GXStat |= (1<<14); + ClipMatrixDirty = true; + } + break; + + case 0x13: // store matrix + if (MatrixMode == 0) + { + memcpy(ProjMatrixStack, ProjMatrix, 16*4); + } + else if (MatrixMode == 3) + { + memcpy(TexMatrixStack, TexMatrix, 16*4); + } + else + { + u32 addr = ExecParams[0] & 0x1F; + if (addr > 30) + { + printf("!! POS MATRIX STORE ADDR 31\n"); + GXStat |= (1<<15); + break; + } + + memcpy(PosMatrixStack[addr], PosMatrix, 16*4); + if (MatrixMode == 2) + memcpy(VecMatrixStack[addr], VecMatrix, 16*4); + } + break; + + case 0x14: // restore matrix + if (MatrixMode == 0) + { + memcpy(ProjMatrix, ProjMatrixStack, 16*4); + ClipMatrixDirty = true; + } + else if (MatrixMode == 3) + { + memcpy(TexMatrix, TexMatrixStack, 16*4); + } + else + { + u32 addr = ExecParams[0] & 0x1F; + if (addr > 30) + { + printf("!! POS MATRIX STORE ADDR 31\n"); + GXStat |= (1<<15); + break; + } + + memcpy(PosMatrix, PosMatrixStack[addr], 16*4); + if (MatrixMode == 2) + memcpy(VecMatrix, VecMatrixStack[addr], 16*4); + ClipMatrixDirty = true; + } + break; + + case 0x15: // identity + if (MatrixMode == 0) + { + MatrixLoadIdentity(ProjMatrix); + ClipMatrixDirty = true; + } + else if (MatrixMode == 3) + MatrixLoadIdentity(TexMatrix); + else + { + MatrixLoadIdentity(PosMatrix); + if (MatrixMode == 2) + MatrixLoadIdentity(VecMatrix); + ClipMatrixDirty = true; + } + break; + + case 0x16: // load 4x4 + if (MatrixMode == 0) + { + MatrixLoad4x4(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + else if (MatrixMode == 3) + MatrixLoad4x4(TexMatrix, (s32*)ExecParams); + else + { + MatrixLoad4x4(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + MatrixLoad4x4(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + break; + + case 0x17: // load 4x3 + if (MatrixMode == 0) + { + MatrixLoad4x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + else if (MatrixMode == 3) + MatrixLoad4x3(TexMatrix, (s32*)ExecParams); + else + { + MatrixLoad4x3(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + MatrixLoad4x3(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + break; + + case 0x18: // mult 4x4 + if (MatrixMode == 0) + { + MatrixMult4x4(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + else if (MatrixMode == 3) + MatrixMult4x4(TexMatrix, (s32*)ExecParams); + else + { + MatrixMult4x4(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + { + MatrixMult4x4(VecMatrix, (s32*)ExecParams); + CycleCount += 30; + } + ClipMatrixDirty = true; + } + break; + + case 0x19: // mult 4x3 + if (MatrixMode == 0) + { + MatrixMult4x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + else if (MatrixMode == 3) + MatrixMult4x3(TexMatrix, (s32*)ExecParams); + else + { + MatrixMult4x3(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + { + MatrixMult4x3(VecMatrix, (s32*)ExecParams); + CycleCount += 30; + } + ClipMatrixDirty = true; + } + break; + + case 0x1A: // mult 3x3 + if (MatrixMode == 0) + { + MatrixMult3x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + else if (MatrixMode == 3) + MatrixMult3x3(TexMatrix, (s32*)ExecParams); + else + { + MatrixMult3x3(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + { + MatrixMult3x3(VecMatrix, (s32*)ExecParams); + CycleCount += 30; + } + ClipMatrixDirty = true; + } + break; + + case 0x1B: // scale + if (MatrixMode == 0) + { + MatrixScale(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + else if (MatrixMode == 3) + MatrixScale(TexMatrix, (s32*)ExecParams); + else + { + MatrixScale(PosMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + break; + + case 0x1C: // translate + if (MatrixMode == 0) + { + MatrixTranslate(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + else if (MatrixMode == 3) + MatrixTranslate(TexMatrix, (s32*)ExecParams); + else + { + MatrixTranslate(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + MatrixTranslate(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + } + break; + + case 0x20: // vertex color + { + u32 c = ExecParams[0]; + u32 r = c & 0x1F; + u32 g = (c >> 5) & 0x1F; + u32 b = (c >> 10) & 0x1F; + VertexColor[0] = r; + VertexColor[1] = g; + VertexColor[2] = b; + } + break; + + case 0x21: // normal + Normal[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; + Normal[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; + Normal[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; + CycleCount += CalculateLighting(); + break; + + case 0x22: // texcoord + RawTexCoords[0] = ExecParams[0] & 0xFFFF; + RawTexCoords[1] = ExecParams[0] >> 16; + if ((TexParam >> 30) == 1) + { + TexCoords[0] = (RawTexCoords[0]*TexMatrix[0] + RawTexCoords[1]*TexMatrix[4] + TexMatrix[8] + TexMatrix[12]) >> 12; + TexCoords[1] = (RawTexCoords[0]*TexMatrix[1] + RawTexCoords[1]*TexMatrix[5] + TexMatrix[9] + TexMatrix[13]) >> 12; + } + else + { + TexCoords[0] = RawTexCoords[0]; + TexCoords[1] = RawTexCoords[1]; + } + break; + + case 0x23: // full vertex + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[1] = ExecParams[0] >> 16; + CurVertex[2] = ExecParams[1] & 0xFFFF; + SubmitVertex(); + break; + + case 0x24: // 10-bit vertex + CurVertex[0] = (ExecParams[0] & 0x000003FF) << 6; + CurVertex[1] = (ExecParams[0] & 0x000FFC00) >> 4; + CurVertex[2] = (ExecParams[0] & 0x3FF00000) >> 14; + SubmitVertex(); + break; + + case 0x25: // vertex XY + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[1] = ExecParams[0] >> 16; + SubmitVertex(); + break; + + case 0x26: // vertex XZ + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[2] = ExecParams[0] >> 16; + SubmitVertex(); + break; + + case 0x27: // vertex YZ + CurVertex[1] = ExecParams[0] & 0xFFFF; + CurVertex[2] = ExecParams[0] >> 16; + SubmitVertex(); + break; + + case 0x28: // 10-bit delta vertex + CurVertex[0] += (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; + CurVertex[1] += (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; + CurVertex[2] += (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; + SubmitVertex(); + break; + + case 0x29: // polygon attributes + PolygonAttr = ExecParams[0]; + break; + + case 0x2A: // texture param + TexParam = ExecParams[0]; + break; + + case 0x2B: // texture palette + TexPalette = ExecParams[0] & 0x1FFF; + break; + + case 0x30: // diffuse/ambient material + MatDiffuse[0] = ExecParams[0] & 0x1F; + MatDiffuse[1] = (ExecParams[0] >> 5) & 0x1F; + MatDiffuse[2] = (ExecParams[0] >> 10) & 0x1F; + MatAmbient[0] = (ExecParams[0] >> 16) & 0x1F; + MatAmbient[1] = (ExecParams[0] >> 21) & 0x1F; + MatAmbient[2] = (ExecParams[0] >> 26) & 0x1F; + if (ExecParams[0] & 0x8000) + { + VertexColor[0] = MatDiffuse[0]; + VertexColor[1] = MatDiffuse[1]; + VertexColor[2] = MatDiffuse[2]; + } + break; + + case 0x31: // specular/emission material + MatSpecular[0] = ExecParams[0] & 0x1F; + MatSpecular[1] = (ExecParams[0] >> 5) & 0x1F; + MatSpecular[2] = (ExecParams[0] >> 10) & 0x1F; + MatEmission[0] = (ExecParams[0] >> 16) & 0x1F; + MatEmission[1] = (ExecParams[0] >> 21) & 0x1F; + MatEmission[2] = (ExecParams[0] >> 26) & 0x1F; + UseShininessTable = (ExecParams[0] & 0x8000) != 0; + break; + + case 0x32: // light direction + { + u32 l = ExecParams[0] >> 30; + s16 dir[3]; + dir[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; + dir[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; + dir[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; + LightDirection[l][0] = (dir[0]*VecMatrix[0] + dir[1]*VecMatrix[4] + dir[2]*VecMatrix[8]) >> 12; + LightDirection[l][1] = (dir[0]*VecMatrix[1] + dir[1]*VecMatrix[5] + dir[2]*VecMatrix[9]) >> 12; + LightDirection[l][2] = (dir[0]*VecMatrix[2] + dir[1]*VecMatrix[6] + dir[2]*VecMatrix[10]) >> 12; + } + break; + + case 0x33: // light color + { + u32 l = ExecParams[0] >> 30; + LightColor[l][0] = ExecParams[0] & 0x1F; + LightColor[l][1] = (ExecParams[0] >> 5) & 0x1F; + LightColor[l][2] = (ExecParams[0] >> 10) & 0x1F; + } + break; + + case 0x34: // shininess table + { + for (int i = 0; i < 128; i += 4) + { + u32 val = ExecParams[i >> 2]; + ShininessTable[i + 0] = val & 0xFF; + ShininessTable[i + 1] = (val >> 8) & 0xFF; + ShininessTable[i + 2] = (val >> 16) & 0xFF; + ShininessTable[i + 3] = val >> 24; + } + } + break; + + case 0x40: // begin polygons + PolygonMode = ExecParams[0] & 0x3; + VertexNum = 0; + VertexNumInPoly = 0; + NumConsecutivePolygons = 0; + LastStripPolygon = NULL; + CurPolygonAttr = PolygonAttr; + break; + + case 0x50: // flush + FlushRequest = 1; + FlushAttributes = ExecParams[0] & 0x3; + CycleCount = 392; + break; + + case 0x60: // viewport x1,y1,x2,y2 + Viewport[0] = ExecParams[0] & 0xFF; + Viewport[1] = (ExecParams[0] >> 8) & 0xFF; + Viewport[2] = ((ExecParams[0] >> 16) & 0xFF) - Viewport[0] + 1; + Viewport[3] = (ExecParams[0] >> 24) - Viewport[1] + 1; + break; + + default: + //if (entry.Command != 0x41) + //printf("!! UNKNOWN GX COMMAND %02X %08X\n", entry.Command, entry.Param); + break; + } + } +} + +void Run(s32 cycles) +{ + if (FlushRequest) + return; + if (CycleCount <= 0 && CmdPIPE->IsEmpty()) + return; + + CycleCount -= cycles; + + if (CycleCount <= 0) + { + while (CycleCount <= 0 && !CmdPIPE->IsEmpty()) + ExecuteCommand(); + } + + if (CycleCount <= 0 && CmdPIPE->IsEmpty()) + { + CycleCount = 0; + GXStat &= ~((1<<27)|(1<<14)); + } +} + + +void CheckFIFOIRQ() +{ + bool irq = false; + switch (GXStat >> 30) + { + case 1: irq = (CmdFIFO->Level() < 128); break; + case 2: irq = CmdFIFO->IsEmpty(); break; + } + + if (irq) NDS::SetIRQ(0, NDS::IRQ_GXFIFO); + else NDS::ClearIRQ(0, NDS::IRQ_GXFIFO); +} + +void CheckFIFODMA() +{ + if (CmdFIFO->Level() < 128) + NDS::CheckDMAs(0, 0x07); +} + + +void VBlank() +{ + if (FlushRequest) + { + SoftRenderer::RenderFrame(CurVertexRAM, CurPolygonRAM, NumPolygons); + + CurRAMBank = CurRAMBank?0:1; + CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0]; + CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0]; + + NumVertices = 0; + NumPolygons = 0; + + FlushRequest = 0; + } +} + +u32* GetLine(int line) +{ + return SoftRenderer::GetLine(line); +} + + +u8 Read8(u32 addr) +{ + printf("unknown GPU3D read8 %08X\n", addr); + return 0; +} + +u16 Read16(u32 addr) +{ + switch (addr) + { + case 0x04000060: + return DispCnt; + } + + printf("unknown GPU3D read16 %08X\n", addr); + return 0; +} + +u32 Read32(u32 addr) +{ + switch (addr) + { + case 0x04000060: + return DispCnt; + + case 0x04000320: + return 46; // TODO, eventually + + case 0x04000600: + { + u32 fifolevel = CmdFIFO->Level(); + + return GXStat | + ((PosMatrixStackPointer & 0x1F) << 8) | + ((ProjMatrixStackPointer & 0x1) << 13) | + (fifolevel << 16) | + (fifolevel < 128 ? (1<<25) : 0) | + (fifolevel == 0 ? (1<<26) : 0); + } + + case 0x04000680: return VecMatrix[0]; + case 0x04000684: return VecMatrix[1]; + case 0x04000688: return VecMatrix[2]; + case 0x0400068C: return VecMatrix[4]; + case 0x04000690: return VecMatrix[5]; + case 0x04000694: return VecMatrix[6]; + case 0x04000698: return VecMatrix[8]; + case 0x0400069C: return VecMatrix[9]; + case 0x040006A0: return VecMatrix[10]; + } + + if (addr >= 0x04000640 && addr < 0x04000680) + { + UpdateClipMatrix(); + return ClipMatrix[(addr & 0x3C) >> 2]; + } + + //printf("unknown GPU3D read32 %08X\n", addr); + return 0; +} + +void Write8(u32 addr, u8 val) +{ + switch (addr) + { + case 0x04000340: + AlphaRef = val & 0x1F; + return; + } + + if (addr >= 0x04000360 && addr < 0x04000380) + { + FogDensityTable[addr - 0x04000360] = val; + return; + } + + printf("unknown GPU3D write8 %08X %02X\n", addr, val); +} + +void Write16(u32 addr, u16 val) +{ + switch (addr) + { + case 0x04000060: + DispCnt = val; + return; + + case 0x04000340: + AlphaRef = val & 0x1F; + return; + + case 0x04000350: + ClearAttr1 = (ClearAttr1 & 0xFFFF0000) | val; + return; + case 0x04000352: + ClearAttr1 = (ClearAttr1 & 0xFFFF) | (val << 16); + return; + case 0x04000354: + ClearAttr2 = (ClearAttr2 & 0xFFFF0000) | val; + return; + case 0x04000356: + ClearAttr2 = (ClearAttr2 & 0xFFFF) | (val << 16); + return; + + case 0x04000358: + FogColor = (FogColor & 0xFFFF0000) | val; + return; + case 0x0400035A: + FogColor = (FogColor & 0xFFFF) | (val << 16); + return; + case 0x0400035C: + FogOffset = val; + return; + } + + if (addr >= 0x04000330 && addr < 0x04000340) + { + EdgeTable[(addr - 0x04000330) >> 1] = val; + return; + } + + if (addr >= 0x04000360 && addr < 0x04000380) + { + addr -= 0x04000360; + FogDensityTable[addr] = val & 0xFF; + FogDensityTable[addr+1] = val >> 8; + return; + } + + if (addr >= 0x04000380 && addr < 0x040003C0) + { + ToonTable[(addr - 0x04000380) >> 1] = val; + return; + } + + printf("unknown GPU3D write16 %08X %04X\n", addr, val); +} + +void Write32(u32 addr, u32 val) +{ + switch (addr) + { + case 0x04000060: + DispCnt = val & 0xFFFF; + return; + + case 0x04000340: + AlphaRef = val & 0x1F; + return; + + case 0x04000350: + ClearAttr1 = val; + return; + case 0x04000354: + ClearAttr2 = val; + return; + + case 0x04000358: + FogColor = val; + return; + case 0x0400035C: + FogOffset = val; + return; + + case 0x04000600: + if (val & 0x8000) + { + GXStat &= ~0x8000; + ProjMatrixStackPointer = 0; + //PosMatrixStackPointer = 0; + TexMatrixStackPointer = 0; + } + val &= 0xC0000000; + GXStat &= 0x3FFFFFFF; + GXStat |= val; + CheckFIFOIRQ(); + return; + } + + if (addr >= 0x04000400 && addr < 0x04000440) + { + if (NumCommands == 0) + { + NumCommands = 4; + CurCommand = val; + ParamCount = 0; + TotalParams = CmdNumParams[CurCommand & 0xFF]; + + if (TotalParams > 0) return; + } + else + ParamCount++; + + for (;;) + { + if ((CurCommand & 0xFF) || (NumCommands == 4 && CurCommand == 0)) + { + CmdFIFOEntry entry; + entry.Command = CurCommand & 0xFF; + entry.Param = val; + CmdFIFOWrite(entry); + } + + if (ParamCount >= TotalParams) + { + CurCommand >>= 8; + NumCommands--; + if (NumCommands == 0) break; + + ParamCount = 0; + TotalParams = CmdNumParams[CurCommand & 0xFF]; + } + if (ParamCount < TotalParams) + break; + } + + return; + } + + if (addr >= 0x04000440 && addr < 0x040005CC) + { + CmdFIFOEntry entry; + entry.Command = (addr & 0x1FC) >> 2; + entry.Param = val; + CmdFIFOWrite(entry); + return; + } + + if (addr >= 0x04000330 && addr < 0x04000340) + { + addr = (addr - 0x04000330) >> 1; + EdgeTable[addr] = val & 0xFFFF; + EdgeTable[addr+1] = val >> 16; + return; + } + + if (addr >= 0x04000360 && addr < 0x04000380) + { + addr -= 0x04000360; + FogDensityTable[addr] = val & 0xFF; + FogDensityTable[addr+1] = (val >> 8) & 0xFF; + FogDensityTable[addr+2] = (val >> 16) & 0xFF; + FogDensityTable[addr+3] = val >> 24; + return; + } + + if (addr >= 0x04000380 && addr < 0x040003C0) + { + addr = (addr - 0x04000380) >> 1; + ToonTable[addr] = val & 0xFFFF; + ToonTable[addr+1] = val >> 16; + return; + } + + printf("unknown GPU3D write32 %08X %08X\n", addr, val); +} + +} + diff --git a/src/GPU3D.h b/src/GPU3D.h new file mode 100644 index 0000000..c1adc2f --- /dev/null +++ b/src/GPU3D.h @@ -0,0 +1,98 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef GPU3D_H +#define GPU3D_H + +namespace GPU3D +{ + +typedef struct +{ + s32 Position[4]; + s32 Color[3]; + s16 TexCoords[2]; + + bool Clipped; + + // final vertex attributes. + // allows them to be reused in polygon strips. + + s32 FinalPosition[4]; + s32 FinalColor[3]; + +} Vertex; + +typedef struct +{ + Vertex* Vertices[10]; + u32 NumVertices; + + u32 Attr; + u32 TexParam; + u32 TexPalette; + + bool FacingView; + bool Translucent; + + u32 VTop, VBottom; // vertex indices + s32 YTop, YBottom; // Y coords + s32 XTop, XBottom; // associated X coords + +} Polygon; + +extern u32 DispCnt; +extern u32 AlphaRef; +extern s32 Viewport[4]; +extern u32 ClearAttr1, ClearAttr2; + +bool Init(); +void DeInit(); +void Reset(); + +void ExecuteCommand(); + +void Run(s32 cycles); +void CheckFIFOIRQ(); +void CheckFIFODMA(); + +void VBlank(); +u32* GetLine(int line); + +u8 Read8(u32 addr); +u16 Read16(u32 addr); +u32 Read32(u32 addr); +void Write8(u32 addr, u8 val); +void Write16(u32 addr, u16 val); +void Write32(u32 addr, u32 val); + +namespace SoftRenderer +{ + +bool Init(); +void DeInit(); +void Reset(); + +void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys); +u32* GetLine(int line); + +} + +} + +#endif diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp new file mode 100644 index 0000000..5c9dc8e --- /dev/null +++ b/src/GPU3D_Soft.cpp @@ -0,0 +1,853 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "GPU.h" + + +namespace GPU3D +{ +namespace SoftRenderer +{ + +u32 ColorBuffer[256*192]; +u32 DepthBuffer[256*192]; +u32 AttrBuffer[256*192]; + +// attribute buffer: +// bit0-5: polygon ID +// bit8: fog enable + + +bool Init() +{ + return true; +} + +void DeInit() +{ +} + +void Reset() +{ + memset(ColorBuffer, 0, 256*192 * 4); + memset(DepthBuffer, 0, 256*192 * 4); + memset(AttrBuffer, 0, 256*192 * 4); +} + + +void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) +{ + u32 vramaddr = (texparam & 0xFFFF) << 3; + + u32 width = 8 << ((texparam >> 20) & 0x7); + u32 height = 8 << ((texparam >> 23) & 0x7); + + s >>= 4; + t >>= 4; + + // texture wrapping + // TODO: optimize this somehow + + if (texparam & (1<<16)) + { + if (texparam & (1<<18)) + { + if (s & width) s = (width-1) - (s & (width-1)); + else s = (s & (width-1)); + } + else + s &= width-1; + } + else + { + if (s < 0) s = 0; + else if (s >= width) s = width-1; + } + + if (texparam & (1<<17)) + { + if (texparam & (1<<19)) + { + if (t & height) t = (height-1) - (t & (height-1)); + else t = (t & (height-1)); + } + else + t &= height-1; + } + else + { + if (t < 0) t = 0; + else if (t >= height) t = height-1; + } + + u8 alpha0; + if (texparam & (1<<29)) alpha0 = 0; + else alpha0 = 31; + + switch ((texparam >> 26) & 0x7) + { + case 1: // A3I5 + { + vramaddr += ((t * width) + s); + u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); + + texpal <<= 4; + *color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1)); + *alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6); + } + break; + + case 2: // 4-color + { + vramaddr += (((t * width) + s) >> 2); + u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); + pixel >>= ((s & 0x3) << 1); + pixel &= 0x3; + + texpal <<= 3; + *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); + *alpha = (pixel==0) ? alpha0 : 31; + } + break; + + case 3: // 16-color + { + vramaddr += (((t * width) + s) >> 1); + u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); + if (s & 0x1) pixel >>= 4; + else pixel &= 0xF; + + texpal <<= 4; + *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); + *alpha = (pixel==0) ? alpha0 : 31; + } + break; + + case 4: // 256-color + { + vramaddr += ((t * width) + s); + u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); + + texpal <<= 4; + *color = GPU::ReadVRAM_TexPal<u16>(texpal + (pixel<<1)); + *alpha = (pixel==0) ? alpha0 : 31; + } + break; + + case 5: // compressed + { + vramaddr += ((t & 0x3FC) * (width>>2)) + (s & 0x3FC); + vramaddr += (t & 0x3); + + u32 slot1addr = 0x20000 + ((vramaddr & 0x1FFFC) >> 1); + if (vramaddr >= 0x40000) + slot1addr += 0x10000; + + u8 val = GPU::ReadVRAM_Texture<u8>(vramaddr); + val >>= (2 * (s & 0x3)); + + u16 palinfo = GPU::ReadVRAM_Texture<u16>(slot1addr); + u32 paloffset = (palinfo & 0x3FFF) << 2; + texpal <<= 4; + + switch (val & 0x3) + { + case 0: + *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); + *alpha = 31; + break; + + case 1: + *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); + *alpha = 31; + break; + + case 2: + if ((palinfo >> 14) == 1) + { + u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); + u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); + + u32 r0 = color0 & 0x001F; + u32 g0 = color0 & 0x03E0; + u32 b0 = color0 & 0x7C00; + u32 r1 = color1 & 0x001F; + u32 g1 = color1 & 0x03E0; + u32 b1 = color1 & 0x7C00; + + u32 r = (r0 + r1) >> 1; + u32 g = ((g0 + g1) >> 1) & 0x03E0; + u32 b = ((b0 + b1) >> 1) & 0x7C00; + + *color = r | g | b; + } + else if ((palinfo >> 14) == 3) + { + u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); + u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); + + u32 r0 = color0 & 0x001F; + u32 g0 = color0 & 0x03E0; + u32 b0 = color0 & 0x7C00; + u32 r1 = color1 & 0x001F; + u32 g1 = color1 & 0x03E0; + u32 b1 = color1 & 0x7C00; + + u32 r = (r0*5 + r1*3) >> 3; + u32 g = ((g0*5 + g1*3) >> 3) & 0x03E0; + u32 b = ((b0*5 + b1*3) >> 3) & 0x7C00; + + *color = r | g | b; + } + else + *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 4); + *alpha = 31; + break; + + case 3: + if ((palinfo >> 14) == 2) + { + *color = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 6); + *alpha = 31; + } + else if ((palinfo >> 14) == 3) + { + u16 color0 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset); + u16 color1 = GPU::ReadVRAM_TexPal<u16>(texpal + paloffset + 2); + + u32 r0 = color0 & 0x001F; + u32 g0 = color0 & 0x03E0; + u32 b0 = color0 & 0x7C00; + u32 r1 = color1 & 0x001F; + u32 g1 = color1 & 0x03E0; + u32 b1 = color1 & 0x7C00; + + u32 r = (r0*3 + r1*5) >> 3; + u32 g = ((g0*3 + g1*5) >> 3) & 0x03E0; + u32 b = ((b0*3 + b1*5) >> 3) & 0x7C00; + + *color = r | g | b; + *alpha = 31; + } + else + { + *color = 0; + *alpha = 0; + } + break; + } + } + break; + + case 6: // A5I3 + { + vramaddr += ((t * width) + s); + u8 pixel = GPU::ReadVRAM_Texture<u8>(vramaddr); + + texpal <<= 4; + *color = GPU::ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1)); + *alpha = (pixel >> 3); + } + break; + + case 7: // direct color + { + vramaddr += (((t * width) + s) << 1); + *color = GPU::ReadVRAM_Texture<u16>(vramaddr); + *alpha = (*color & 0x8000) ? 31 : 0; + } + break; + } +} + +bool DepthTest(Polygon* polygon, s32 x, s32 y, s32 z) +{ + u32 oldz = DepthBuffer[(256*y) + x]; + + if (polygon->Attr & (1<<14)) + { + s32 diff = oldz - z; + if ((u32)(diff + 0x200) <= 0x400) + return true; + } + else + if (z < oldz) + return true; + + return false; +} + +u32 RenderPixel(Polygon* polygon, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb, s16 s, s16 t) +{ + u32 attr = polygon->Attr; + u8 r, g, b, a; + + u32 polyalpha = (polygon->Attr >> 16) & 0x1F; + bool wireframe = (polyalpha == 0); + + if ((DispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0)) + { + u8 tr, tg, tb; + + u16 tcolor; u8 talpha; + TextureLookup(polygon->TexParam, polygon->TexPalette, s, t, &tcolor, &talpha); + + tr = (tcolor << 1) & 0x3E; if (tr) tr++; + tg = (tcolor >> 4) & 0x3E; if (tg) tg++; + tb = (tcolor >> 9) & 0x3E; if (tb) tb++; + + // TODO: other blending modes + r = ((tr+1) * (vr+1) - 1) >> 6; + g = ((tg+1) * (vg+1) - 1) >> 6; + b = ((tb+1) * (vb+1) - 1) >> 6; + a = ((talpha+1) * (polyalpha+1) - 1) >> 5; + } + else + { + r = vr; + g = vg; + b = vb; + a = polyalpha; + } + + if (wireframe) a = 31; + + return r | (g << 8) | (b << 16) | (a << 24); +} + +void RenderPolygon(Polygon* polygon) +{ + int nverts = polygon->NumVertices; + bool isline = false; + + int vtop = polygon->VTop, vbot = polygon->VBottom; + s32 ytop = polygon->YTop, ybot = polygon->YBottom; + s32 xtop = polygon->XTop, xbot = polygon->XBottom; + + if (ytop > 191) return; + + // draw, line per line + + u32 polyalpha = (polygon->Attr >> 16) & 0x1F; + bool wireframe = (polyalpha == 0); + + int lcur = vtop, rcur = vtop; + int lnext, rnext; + + s32 dxl, dxr; + s32 lslope, rslope; + bool l_xmajor, r_xmajor; + + if (ybot == ytop) + { + ybot++; + isline = true; + + vtop = 0; vbot = 0; + xtop = 256; xbot = 0; + int i; + + i = 1; + if (polygon->Vertices[i]->FinalPosition[0] < polygon->Vertices[vtop]->FinalPosition[0]) vtop = i; + if (polygon->Vertices[i]->FinalPosition[0] > polygon->Vertices[vbot]->FinalPosition[0]) vbot = i; + + i = nverts - 1; + if (polygon->Vertices[i]->FinalPosition[0] < polygon->Vertices[vtop]->FinalPosition[0]) vtop = i; + if (polygon->Vertices[i]->FinalPosition[0] > polygon->Vertices[vbot]->FinalPosition[0]) vbot = i; + + lcur = vtop; lnext = vtop; + rcur = vbot; rnext = vbot; + + lslope = 0; l_xmajor = false; + rslope = 0; r_xmajor = false; + } + else + { + //while (polygon->Vertices[lnext]->FinalPosition[1] ) + if (polygon->FacingView) + { + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; + } + else + { + lnext = lcur - 1; + if (lnext < 0) lnext = nverts - 1; + rnext = rcur + 1; + if (rnext >= nverts) rnext = 0; + } + + if (polygon->Vertices[lnext]->FinalPosition[1] == polygon->Vertices[lcur]->FinalPosition[1]) + lslope = 0; + else + lslope = ((polygon->Vertices[lnext]->FinalPosition[0] - polygon->Vertices[lcur]->FinalPosition[0]) << 12) / + (polygon->Vertices[lnext]->FinalPosition[1] - polygon->Vertices[lcur]->FinalPosition[1]); + + if (polygon->Vertices[rnext]->FinalPosition[1] == polygon->Vertices[rcur]->FinalPosition[1]) + rslope = 0; + else + rslope = ((polygon->Vertices[rnext]->FinalPosition[0] - polygon->Vertices[rcur]->FinalPosition[0]) << 12) / + (polygon->Vertices[rnext]->FinalPosition[1] - polygon->Vertices[rcur]->FinalPosition[1]); + + l_xmajor = (lslope < -0x1000) || (lslope > 0x1000); + r_xmajor = (rslope < -0x1000) || (rslope > 0x1000); + } + + if (l_xmajor) dxl = (lslope > 0) ? 0x800 : (-lslope-0x800)+0x1000; + else if (lslope) dxl = (lslope > 0) ? 0 : 0x1000; + else dxl = 0; + + if (r_xmajor) dxr = (rslope > 0) ? rslope-0x800 : 0x800+0x1000; + else if (rslope) dxr = (rslope > 0) ? 0 : 0x1000; + else dxr = 0x1000; + + if (ybot > 192) ybot = 192; + for (s32 y = ytop; y < ybot; y++) + { + if (!isline) + { + if (y >= polygon->Vertices[lnext]->FinalPosition[1] && lcur != vbot) + { + while (y >= polygon->Vertices[lnext]->FinalPosition[1] && lcur != vbot) + { + lcur = lnext; + + if (polygon->FacingView) + { + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; + } + else + { + lnext = lcur - 1; + if (lnext < 0) lnext = nverts - 1; + } + } + + if (polygon->Vertices[lnext]->FinalPosition[1] == polygon->Vertices[lcur]->FinalPosition[1]) + lslope = 0; + else + lslope = ((polygon->Vertices[lnext]->FinalPosition[0] - polygon->Vertices[lcur]->FinalPosition[0]) << 12) / + (polygon->Vertices[lnext]->FinalPosition[1] - polygon->Vertices[lcur]->FinalPosition[1]); + + l_xmajor = (lslope < -0x1000) || (lslope > 0x1000); + + if (l_xmajor) dxl = (lslope > 0) ? 0x800 : (-lslope-0x800)+0x1000; + else if (lslope) dxl = (lslope > 0) ? 0 : 0x1000; + else dxl = 0; + } + + if (y >= polygon->Vertices[rnext]->FinalPosition[1] && rcur != vbot) + { + while (y >= polygon->Vertices[rnext]->FinalPosition[1] && rcur != vbot) + { + rcur = rnext; + + if (polygon->FacingView) + { + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; + } + else + { + rnext = rcur + 1; + if (rnext >= nverts) rnext = 0; + } + } + + if (polygon->Vertices[rnext]->FinalPosition[1] == polygon->Vertices[rcur]->FinalPosition[1]) + rslope = 0; + else + rslope = ((polygon->Vertices[rnext]->FinalPosition[0] - polygon->Vertices[rcur]->FinalPosition[0]) << 12) / + (polygon->Vertices[rnext]->FinalPosition[1] - polygon->Vertices[rcur]->FinalPosition[1]); + + r_xmajor = (rslope < -0x1000) || (rslope > 0x1000); + + if (r_xmajor) dxr = (rslope > 0) ? rslope-0x800 : 0x800+0x1000; + else if (rslope) dxr = (rslope > 0) ? 0 : 0x1000; + else dxr = 0x1000; + } + } + + Vertex *vlcur, *vlnext, *vrcur, *vrnext; + s32 xstart, xend; + s32 xstart_int, xend_int; + s32 slope_start, slope_end; + + if (lslope == 0 && rslope == 0 && + polygon->Vertices[lcur]->FinalPosition[0] == polygon->Vertices[rcur]->FinalPosition[0]) + { + xstart = polygon->Vertices[lcur]->FinalPosition[0]; + xend = xstart; + } + else + { + if (lslope > 0) + { + xstart = polygon->Vertices[lcur]->FinalPosition[0] + (dxl >> 12); + if (xstart < polygon->Vertices[lcur]->FinalPosition[0]) + xstart = polygon->Vertices[lcur]->FinalPosition[0]; + else if (xstart > polygon->Vertices[lnext]->FinalPosition[0]-1) + xstart = polygon->Vertices[lnext]->FinalPosition[0]-1; + } + else if (lslope < 0) + { + xstart = polygon->Vertices[lcur]->FinalPosition[0] - (dxl >> 12); + if (xstart < polygon->Vertices[lnext]->FinalPosition[0]) + xstart = polygon->Vertices[lnext]->FinalPosition[0]; + else if (xstart > polygon->Vertices[lcur]->FinalPosition[0]-1) + xstart = polygon->Vertices[lcur]->FinalPosition[0]-1; + } + else + xstart = polygon->Vertices[lcur]->FinalPosition[0]; + + if (rslope > 0) + { + xend = polygon->Vertices[rcur]->FinalPosition[0] + (dxr >> 12); + if (xend < polygon->Vertices[rcur]->FinalPosition[0]) + xend = polygon->Vertices[rcur]->FinalPosition[0]; + else if (xend > polygon->Vertices[rnext]->FinalPosition[0]-1) + xend = polygon->Vertices[rnext]->FinalPosition[0]-1; + } + else if (rslope < 0) + { + xend = polygon->Vertices[rcur]->FinalPosition[0] - (dxr >> 12); + if (xend < polygon->Vertices[rnext]->FinalPosition[0]) + xend = polygon->Vertices[rnext]->FinalPosition[0]; + else if (xend > polygon->Vertices[rcur]->FinalPosition[0]-1) + xend = polygon->Vertices[rcur]->FinalPosition[0]-1; + } + else + xend = polygon->Vertices[rcur]->FinalPosition[0] - 1; + } + + // if the left and right edges are swapped, render backwards. + // note: we 'forget' to swap the xmajor flags, on purpose + // the hardware has the same bug + if (xstart > xend) + { + vlcur = polygon->Vertices[rcur]; + vlnext = polygon->Vertices[rnext]; + vrcur = polygon->Vertices[lcur]; + vrnext = polygon->Vertices[lnext]; + + slope_start = rslope; + slope_end = lslope; + + s32 tmp = xstart; xstart = xend; xend = tmp; + } + else + { + vlcur = polygon->Vertices[lcur]; + vlnext = polygon->Vertices[lnext]; + vrcur = polygon->Vertices[rcur]; + vrnext = polygon->Vertices[rnext]; + + slope_start = lslope; + slope_end = rslope; + } + + // interpolate attributes along Y + s64 lfactor1, lfactor2; + s64 rfactor1, rfactor2; + + if (l_xmajor) + { + lfactor1 = (vlnext->FinalPosition[0] - xstart) * vlnext->FinalPosition[3]; + lfactor2 = (xstart - vlcur->FinalPosition[0]) * vlcur->FinalPosition[3]; + } + else + { + lfactor1 = (vlnext->FinalPosition[1] - y) * vlnext->FinalPosition[3]; + lfactor2 = (y - vlcur->FinalPosition[1]) * vlcur->FinalPosition[3]; + } + + s64 ldenom = lfactor1 + lfactor2; + if (ldenom == 0) + { + lfactor1 = 0x1000; + lfactor2 = 0; + ldenom = 0x1000; + } + + if (r_xmajor) + { + rfactor1 = (vrnext->FinalPosition[0] - xend+1) * vrnext->FinalPosition[3]; + rfactor2 = (xend+1 - vrcur->FinalPosition[0]) * vrcur->FinalPosition[3]; + } + else + { + rfactor1 = (vrnext->FinalPosition[1] - y) * vrnext->FinalPosition[3]; + rfactor2 = (y - vrcur->FinalPosition[1]) * vrcur->FinalPosition[3]; + } + + s64 rdenom = rfactor1 + rfactor2; + if (rdenom == 0) + { + rfactor1 = 0x1000; + rfactor2 = 0; + rdenom = 0x1000; + } + + s32 zl = ((lfactor1 * vlcur->FinalPosition[2]) + (lfactor2 * vlnext->FinalPosition[2])) / ldenom; + s32 zr = ((rfactor1 * vrcur->FinalPosition[2]) + (rfactor2 * vrnext->FinalPosition[2])) / rdenom; + + s32 wl = ((lfactor1 * vlcur->FinalPosition[3]) + (lfactor2 * vlnext->FinalPosition[3])) / ldenom; + s32 wr = ((rfactor1 * vrcur->FinalPosition[3]) + (rfactor2 * vrnext->FinalPosition[3])) / rdenom; + + s32 rl = ((lfactor1 * vlcur->FinalColor[0]) + (lfactor2 * vlnext->FinalColor[0])) / ldenom; + s32 gl = ((lfactor1 * vlcur->FinalColor[1]) + (lfactor2 * vlnext->FinalColor[1])) / ldenom; + s32 bl = ((lfactor1 * vlcur->FinalColor[2]) + (lfactor2 * vlnext->FinalColor[2])) / ldenom; + + s32 sl = ((lfactor1 * vlcur->TexCoords[0]) + (lfactor2 * vlnext->TexCoords[0])) / ldenom; + s32 tl = ((lfactor1 * vlcur->TexCoords[1]) + (lfactor2 * vlnext->TexCoords[1])) / ldenom; + + s32 rr = ((rfactor1 * vrcur->FinalColor[0]) + (rfactor2 * vrnext->FinalColor[0])) / rdenom; + s32 gr = ((rfactor1 * vrcur->FinalColor[1]) + (rfactor2 * vrnext->FinalColor[1])) / rdenom; + s32 br = ((rfactor1 * vrcur->FinalColor[2]) + (rfactor2 * vrnext->FinalColor[2])) / rdenom; + + s32 sr = ((rfactor1 * vrcur->TexCoords[0]) + (rfactor2 * vrnext->TexCoords[0])) / rdenom; + s32 tr = ((rfactor1 * vrcur->TexCoords[1]) + (rfactor2 * vrnext->TexCoords[1])) / rdenom; + + // calculate edges + s32 l_edgeend, r_edgestart; + + if (l_xmajor) + { + if (slope_start > 0) l_edgeend = vlcur->FinalPosition[0] + ((dxl + slope_start) >> 12); + else l_edgeend = vlcur->FinalPosition[0] - ((dxl - slope_start) >> 12); + + if (l_edgeend == xstart) l_edgeend++; + } + else + l_edgeend = xstart + 1; + + if (r_xmajor) + { + if (slope_end > 0) r_edgestart = vrcur->FinalPosition[0] + ((dxr + slope_end) >> 12); + else r_edgestart = vrcur->FinalPosition[0] - ((dxr - slope_end) >> 12); + + if (r_edgestart == xend_int) r_edgestart--; + } + else + r_edgestart = xend - 1; + + // edge fill rules for opaque pixels: + // * right edge is filled if slope > 1 + // * left edge is filled if slope <= 1 + // * edges with slope = 0 are always filled + // edges are always filled if the pixels are translucent + // in wireframe mode, there are special rules for equal Z (TODO) + + for (s32 x = xstart; x <= xend; x++) + { + if (x < 0) continue; + if (x > 255) break; + + int edge = 0; + if (y == ytop) edge |= 0x4; + else if (y == ybot-1) edge |= 0x8; + if (x < l_edgeend) edge |= 0x1; + else if (x > r_edgestart) edge |= 0x2; + + // wireframe polygons. really ugly, but works + if (wireframe && edge==0) continue; + + s64 factor1 = (xend+1 - x) * wr; + s64 factor2 = (x - xstart) * wl; + s64 denom = factor1 + factor2; + if (denom == 0) + { + factor1 = 0x1000; + factor2 = 0; + denom = 0x1000; + } + + s32 z = ((factor1 * zl) + (factor2 * zr)) / denom; + if (!DepthTest(polygon, x, y, z)) continue; + + u32 vr = ((factor1 * rl) + (factor2 * rr)) / denom; + u32 vg = ((factor1 * gl) + (factor2 * gr)) / denom; + u32 vb = ((factor1 * bl) + (factor2 * br)) / denom; + + s16 s = ((factor1 * sl) + (factor2 * sr)) / denom; + s16 t = ((factor1 * tl) + (factor2 * tr)) / denom; + + u32 color = RenderPixel(polygon, x, y, z, vr>>3, vg>>3, vb>>3, s, t); + u32 attr = 0; + u32 pixeladdr = (y*256) + x; + + u8 alpha = color >> 24; + + // alpha test + if (DispCnt & (1<<2)) + { + if (alpha <= AlphaRef) continue; + } + else + { + if (alpha == 0) continue; + } + + // alpha blending disable + // TODO: check alpha test when blending is disabled + if (!(DispCnt & (1<<3))) + alpha = 31; + + u32 dstcolor = ColorBuffer[pixeladdr]; + u32 dstalpha = dstcolor >> 24; + + if (alpha == 31) + { + // edge fill rules for opaque pixels + // TODO, eventually: antialiasing + if (!wireframe) + { + if ((edge & 0x1) && slope_start > 0x1000) + continue; + if ((edge & 0x2) && (slope_end != 0 && slope_end <= 0x1000)) + continue; + } + + DepthBuffer[pixeladdr] = z; + } + else if (dstalpha == 0) + { + // TODO: conditional Z-buffer update + DepthBuffer[pixeladdr] = z; + } + else + { + u32 srcR = color & 0x3F; + u32 srcG = (color >> 8) & 0x3F; + u32 srcB = (color >> 16) & 0x3F; + + u32 dstR = dstcolor & 0x3F; + u32 dstG = (dstcolor >> 8) & 0x3F; + u32 dstB = (dstcolor >> 16) & 0x3F; + + alpha++; + dstR = ((srcR * alpha) + (dstR * (32-alpha))) >> 5; + dstG = ((srcG * alpha) + (dstG * (32-alpha))) >> 5; + dstB = ((srcB * alpha) + (dstB * (32-alpha))) >> 5; + + alpha--; + if (alpha > dstalpha) dstalpha = alpha; + + color = dstR | (dstG << 8) | (dstB << 16) | (dstalpha << 24); + + // TODO: conditional Z-buffer update + DepthBuffer[pixeladdr] = z; + } + + ColorBuffer[pixeladdr] = color; + AttrBuffer[pixeladdr] = attr; + } + + if (lslope > 0) dxl += lslope; + else dxl -= lslope; + if (rslope > 0) dxr += rslope; + else dxr -= rslope; + } +} + +void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) +{ + u32 polyid = (ClearAttr1 >> 24) & 0x3F; + + if (DispCnt & (1<<14)) + { + u8 xoff = (ClearAttr2 >> 16) & 0xFF; + u8 yoff = (ClearAttr2 >> 24) & 0xFF; + + for (int y = 0; y < 256*192; y += 256) + { + for (int x = 0; x < 256; x++) + { + u16 val2 = GPU::ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1)); + u16 val3 = GPU::ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1)); + + // TODO: confirm color conversion + u32 r = (val2 << 1) & 0x3E; if (r) r++; + u32 g = (val2 >> 4) & 0x3E; if (g) g++; + u32 b = (val2 >> 9) & 0x3E; if (b) b++; + u32 a = (val2 & 0x8000) ? 0x1F000000 : 0; + u32 color = r | (g << 8) | (b << 16) | a; + + u32 z = ((val3 & 0x7FFF) * 0x200) + 0x1FF; + if (z >= 0x10000 && z < 0xFFFFFF) z++; + + ColorBuffer[y+x] = color; + DepthBuffer[y+x] = z; + AttrBuffer[y+x] = polyid | ((val3 & 0x8000) >> 7); + + xoff++; + } + + yoff++; + } + } + else + { + // TODO: confirm color conversion + u32 r = (ClearAttr1 << 1) & 0x3E; if (r) r++; + u32 g = (ClearAttr1 >> 4) & 0x3E; if (g) g++; + u32 b = (ClearAttr1 >> 9) & 0x3E; if (b) b++; + u32 a = (ClearAttr1 >> 16) & 0x1F; + u32 color = r | (g << 8) | (b << 16) | (a << 24); + + u32 z = ((ClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; + if (z >= 0x10000 && z < 0xFFFFFF) z++; + + polyid |= ((ClearAttr1 & 0x8000) >> 7); + + for (int i = 0; i < 256*192; i++) + { + ColorBuffer[i] = color; + DepthBuffer[i] = z; + AttrBuffer[i] = polyid; + } + } + + // TODO: Y-sorting of translucent polygons + + for (int i = 0; i < npolys; i++) + { + if (polygons[i].Translucent) continue; + RenderPolygon(&polygons[i]); + } + + for (int i = 0; i < npolys; i++) + { + if (!polygons[i].Translucent) continue; + RenderPolygon(&polygons[i]); + } +} + +u32* GetLine(int line) +{ + return &ColorBuffer[line * 256]; +} + +} +} diff --git a/src/NDS.cpp b/src/NDS.cpp new file mode 100644 index 0000000..574f557 --- /dev/null +++ b/src/NDS.cpp @@ -0,0 +1,2192 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "ARM.h" +#include "CP15.h" +#include "NDSCart.h" +#include "DMA.h" +#include "FIFO.h" +#include "GPU.h" +#include "SPI.h" +#include "RTC.h" +#include "Wifi.h" + + +namespace NDS +{ + +// TODO LIST +// * stick all the variables in a big structure? +// would make it easier to deal with savestates + +/*SchedEvent SchedBuffer[SCHED_BUF_LEN]; +SchedEvent* SchedQueue; + +bool NeedReschedule;*/ + +ARM* ARM9; +ARM* ARM7; + +/*s32 ARM9Cycles, ARM7Cycles; +s32 CompensatedCycles; +s32 SchedCycles;*/ +s32 CurIterationCycles; +s32 ARM7Offset; + +SchedEvent SchedList[Event_MAX]; +u32 SchedListMask; + +u32 CPUStop; + +u8 ARM9BIOS[0x1000]; +u8 ARM7BIOS[0x4000]; + +u8 MainRAM[0x400000]; + +u8 SharedWRAM[0x8000]; +u8 WRAMCnt; +u8* SWRAM_ARM9; +u8* SWRAM_ARM7; +u32 SWRAM_ARM9Mask; +u32 SWRAM_ARM7Mask; + +u8 ARM7WRAM[0x10000]; + +u16 ExMemCnt[2]; + +u8 ROMSeed0[2*8]; +u8 ROMSeed1[2*8]; + +// IO shit +u32 IME[2]; +u32 IE[2], IF[2]; + +u8 PostFlag9; +u8 PostFlag7; +u16 PowerControl9; +u16 PowerControl7; + +u16 ARM7BIOSProt; + +Timer Timers[8]; + +DMA* DMAs[8]; +u32 DMA9Fill[4]; + +u16 IPCSync9, IPCSync7; +u16 IPCFIFOCnt9, IPCFIFOCnt7; +FIFO<u32>* IPCFIFO9; // FIFO in which the ARM9 writes +FIFO<u32>* IPCFIFO7; + +u16 DivCnt; +u32 DivNumerator[2]; +u32 DivDenominator[2]; +u32 DivQuotient[2]; +u32 DivRemainder[2]; + +u16 SqrtCnt; +u32 SqrtVal[2]; +u32 SqrtRes; + +u32 KeyInput; + +u16 _soundbias; // temp + +bool Running; + + +bool Init() +{ + ARM9 = new ARM(0); + ARM7 = new ARM(1); + + DMAs[0] = new DMA(0, 0); + DMAs[1] = new DMA(0, 1); + DMAs[2] = new DMA(0, 2); + DMAs[3] = new DMA(0, 3); + DMAs[4] = new DMA(1, 0); + DMAs[5] = new DMA(1, 1); + DMAs[6] = new DMA(1, 2); + DMAs[7] = new DMA(1, 3); + + IPCFIFO9 = new FIFO<u32>(16); + IPCFIFO7 = new FIFO<u32>(16); + + if (!NDSCart::Init()) return false; + if (!GPU::Init()) return false; + if (!SPI::Init()) return false; + if (!RTC::Init()) return false; + + Reset(); + return true; +} + +void DeInit() +{ + delete ARM9; + delete ARM7; + + for (int i = 0; i < 8; i++) + delete DMAs[i]; + + delete IPCFIFO9; + delete IPCFIFO7; + + NDSCart::DeInit(); + GPU::DeInit(); + SPI::DeInit(); + RTC::DeInit(); +} + + +void SetupDirectBoot() +{ + u32 bootparams[8]; + memcpy(bootparams, &NDSCart::CartROM[0x20], 8*4); + + printf("ARM9: offset=%08X entry=%08X RAM=%08X size=%08X\n", + bootparams[0], bootparams[1], bootparams[2], bootparams[3]); + printf("ARM7: offset=%08X entry=%08X RAM=%08X size=%08X\n", + bootparams[4], bootparams[5], bootparams[6], bootparams[7]); + + MapSharedWRAM(3); + + for (u32 i = 0; i < bootparams[3]; i+=4) + { + u32 tmp = *(u32*)&NDSCart::CartROM[bootparams[0]+i]; + ARM9Write32(bootparams[2]+i, tmp); + } + + for (u32 i = 0; i < bootparams[7]; i+=4) + { + u32 tmp = *(u32*)&NDSCart::CartROM[bootparams[4]+i]; + ARM7Write32(bootparams[6]+i, tmp); + } + + for (u32 i = 0; i < 0x170; i+=4) + { + u32 tmp = *(u32*)&NDSCart::CartROM[i]; + ARM9Write32(0x027FFE00+i, tmp); + } + + ARM9Write32(0x027FF800, 0x00001FC2); + ARM9Write32(0x027FF804, 0x00001FC2); + ARM9Write16(0x027FF808, *(u16*)&NDSCart::CartROM[0x15E]); + ARM9Write16(0x027FF80A, *(u16*)&NDSCart::CartROM[0x6C]); + + ARM9Write16(0x027FF850, 0x5835); + + ARM9Write32(0x027FFC00, 0x00001FC2); + ARM9Write32(0x027FFC04, 0x00001FC2); + ARM9Write16(0x027FFC08, *(u16*)&NDSCart::CartROM[0x15E]); + ARM9Write16(0x027FFC0A, *(u16*)&NDSCart::CartROM[0x6C]); + + ARM9Write16(0x027FFC10, 0x5835); + ARM9Write16(0x027FFC30, 0xFFFF); + ARM9Write16(0x027FFC40, 0x0001); + + CP15::Write(0x910, 0x0300000A); + CP15::Write(0x911, 0x00000020); + CP15::Write(0x100, 0x00050000); + + ARM9->JumpTo(bootparams[1]); + ARM7->JumpTo(bootparams[5]); + + PowerControl9 = 0x820F; + GPU::DisplaySwap(PowerControl9); + + ARM7BIOSProt = 0x1204; +} + +void Reset() +{ + FILE* f; + u32 i; + + f = fopen("bios9.bin", "rb"); + if (!f) + printf("ARM9 BIOS not found\n"); + else + { + fseek(f, 0, SEEK_SET); + fread(ARM9BIOS, 0x1000, 1, f); + + printf("ARM9 BIOS loaded\n"); + fclose(f); + } + + f = fopen("bios7.bin", "rb"); + if (!f) + printf("ARM7 BIOS not found\n"); + else + { + fseek(f, 0, SEEK_SET); + fread(ARM7BIOS, 0x4000, 1, f); + + printf("ARM7 BIOS loaded\n"); + fclose(f); + } + + memset(MainRAM, 0, 0x400000); + memset(SharedWRAM, 0, 0x8000); + memset(ARM7WRAM, 0, 0x10000); + + MapSharedWRAM(0); + + ExMemCnt[0] = 0; + ExMemCnt[1] = 0; + memset(ROMSeed0, 0, 2*8); + memset(ROMSeed1, 0, 2*8); + + IME[0] = 0; + IME[1] = 0; + + PostFlag9 = 0x00; + PostFlag7 = 0x00; + PowerControl9 = 0x0001; + PowerControl7 = 0x0001; + + ARM7BIOSProt = 0; + + IPCSync9 = 0; + IPCSync7 = 0; + IPCFIFOCnt9 = 0; + IPCFIFOCnt7 = 0; + IPCFIFO9->Clear(); + IPCFIFO7->Clear(); + + DivCnt = 0; + SqrtCnt = 0; + + ARM9->Reset(); + ARM7->Reset(); + CP15::Reset(); + + CPUStop = 0; + + memset(Timers, 0, 8*sizeof(Timer)); + + for (i = 0; i < 8; i++) DMAs[i]->Reset(); + memset(DMA9Fill, 0, 4*4); + + NDSCart::Reset(); + GPU::Reset(); + SPI::Reset(); + RTC::Reset(); + Wifi::Reset(); + + // memset(SchedBuffer, 0, sizeof(SchedEvent)*SCHED_BUF_LEN); + // SchedQueue = NULL; + memset(SchedList, 0, sizeof(SchedList)); + SchedListMask = 0; + + /*ARM9Cycles = 0; + ARM7Cycles = 0; + SchedCycles = 0;*/ + CurIterationCycles = 0; + ARM7Offset = 0; + + KeyInput = 0x007F03FF; + + _soundbias = 0; + + // test + //LoadROM(); + //LoadFirmware(); + // a_interp2.nds a_rounding (10) (11) a_slope (5) + if (NDSCart::LoadROM("rom/nsmb.nds")) + Running = true; // hax +} + + +void CalcIterationCycles() +{ + CurIterationCycles = 16; + + for (int i = 0; i < Event_MAX; i++) + { + if (!(SchedListMask & (1<<i))) + continue; + + if (SchedList[i].WaitCycles < CurIterationCycles) + CurIterationCycles = SchedList[i].WaitCycles; + } +} + +void RunSystem(s32 cycles) +{ + for (int i = 0; i < 8; i++) + { + if ((Timers[i].Cnt & 0x84) == 0x80) + Timers[i].Counter += (ARM9->Cycles >> 1) << Timers[i].CycleShift; + } + for (int i = 4; i < 8; i++) + { + if ((Timers[i].Cnt & 0x84) == 0x80) + Timers[i].Counter += ARM7->Cycles << Timers[i].CycleShift; + } + + for (int i = 0; i < Event_MAX; i++) + { + if (!(SchedListMask & (1<<i))) + continue; + + SchedList[i].WaitCycles -= cycles; + if (SchedList[i].WaitCycles < 1) + { + SchedListMask &= ~(1<<i); + SchedList[i].Func(SchedList[i].Param); + } + } +} + +void RunFrame() +{ + s32 framecycles = 560190; + + if (!Running) return; // dorp + + + GPU::StartFrame(); + + while (Running && framecycles>0) + { + s32 ndscyclestorun; + s32 ndscycles = 0; + + CalcIterationCycles(); + + if (CPUStop & 0xFFFF) + { + s32 cycles = CurIterationCycles; + cycles = DMAs[0]->Run(cycles); + if (cycles > 0) cycles = DMAs[1]->Run(cycles); + if (cycles > 0) cycles = DMAs[2]->Run(cycles); + if (cycles > 0) cycles = DMAs[3]->Run(cycles); + ndscyclestorun = CurIterationCycles - cycles; + + // TODO: run other timing critical shit, like timers + GPU3D::Run(ndscyclestorun); + } + else + { + ARM9->CyclesToRun = CurIterationCycles << 1; + ARM9->Execute(); + ndscyclestorun = ARM9->Cycles >> 1; + } + + if (CPUStop & 0xFFFF0000) + { + s32 cycles = ndscyclestorun - ARM7Offset; + cycles = DMAs[4]->Run(cycles); + if (cycles > 0) cycles = DMAs[5]->Run(cycles); + if (cycles > 0) cycles = DMAs[6]->Run(cycles); + if (cycles > 0) cycles = DMAs[7]->Run(cycles); + ARM7Offset = cycles; + } + else + { + ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; + ARM7->Execute(); + ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun; + } + + RunSystem(ndscyclestorun); + //GPU3D::Run(ndscyclestorun); + + /*while (ndscycles < ndscyclestorun) + { + ARM7->CyclesToRun = ndscyclestorun - ndscycles - ARM7Offset; + ARM7->Execute(); + ARM7Offset = 0; + + RunEvents(ARM7->Cycles); + ndscycles += ARM7->Cycles; + } + + ARM7Offset = ndscycles - ndscyclestorun;*/ + + framecycles -= ndscyclestorun; + } +} + +void Reschedule() +{ + CalcIterationCycles(); + + ARM9->CyclesToRun = CurIterationCycles << 1; + //ARM7->CyclesToRun = CurIterationCycles - ARM7Offset; + //ARM7->CyclesToRun = (ARM9->Cycles >> 1) - ARM7->Cycles - ARM7Offset; +} + +void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 param) +{ + if (SchedListMask & (1<<id)) + { + printf("!! EVENT %d ALREADY SCHEDULED\n", id); + return; + } + + SchedEvent* evt = &SchedList[id]; + + if (periodic) evt->WaitCycles += delay; + else evt->WaitCycles = delay + (ARM9->Cycles >> 1); + + evt->Func = func; + evt->Param = param; + + SchedListMask |= (1<<id); + + Reschedule(); +} + +void CancelEvent(u32 id) +{ + SchedListMask &= ~(1<<id); +} + + +void PressKey(u32 key) +{ + KeyInput &= ~(1 << key); +} + +void ReleaseKey(u32 key) +{ + KeyInput |= (1 << key); +} + +void TouchScreen(u16 x, u16 y) +{ + SPI_TSC::SetTouchCoords(x, y); +} + +void ReleaseScreen() +{ + SPI_TSC::SetTouchCoords(0x000, 0xFFF); +} + + +void Halt() +{ + printf("Halt()\n"); + Running = false; +} + + +void MapSharedWRAM(u8 val) +{ + WRAMCnt = val; + + switch (WRAMCnt & 0x3) + { + case 0: + SWRAM_ARM9 = &SharedWRAM[0]; + SWRAM_ARM9Mask = 0x7FFF; + SWRAM_ARM7 = NULL; + SWRAM_ARM7Mask = 0; + break; + + case 1: + SWRAM_ARM9 = &SharedWRAM[0x4000]; + SWRAM_ARM9Mask = 0x3FFF; + SWRAM_ARM7 = &SharedWRAM[0]; + SWRAM_ARM7Mask = 0x3FFF; + break; + + case 2: + SWRAM_ARM9 = &SharedWRAM[0]; + SWRAM_ARM9Mask = 0x3FFF; + SWRAM_ARM7 = &SharedWRAM[0x4000]; + SWRAM_ARM7Mask = 0x3FFF; + break; + + case 3: + SWRAM_ARM9 = NULL; + SWRAM_ARM9Mask = 0; + SWRAM_ARM7 = &SharedWRAM[0]; + SWRAM_ARM7Mask = 0x7FFF; + break; + } +} + + +void SetIRQ(u32 cpu, u32 irq) +{ + IF[cpu] |= (1 << irq); +} + +void ClearIRQ(u32 cpu, u32 irq) +{ + IF[cpu] &= ~(1 << irq); +} + +bool HaltInterrupted(u32 cpu) +{ + if (cpu == 0) + { + if (!(IME[0] & 0x1)) + return false; + } + + if (IF[cpu] & IE[cpu]) + return true; + + return false; +} + +void StopCPU(u32 cpu, u32 mask) +{ + if (cpu) mask <<= 16; + CPUStop |= mask; +} + +void ResumeCPU(u32 cpu, u32 mask) +{ + if (cpu) mask <<= 16; + CPUStop &= ~mask; +} + + + +void CheckDMAs(u32 cpu, u32 mode) +{ + cpu <<= 2; + DMAs[cpu+0]->StartIfNeeded(mode); + DMAs[cpu+1]->StartIfNeeded(mode); + DMAs[cpu+2]->StartIfNeeded(mode); + DMAs[cpu+3]->StartIfNeeded(mode); +} + + + +//const s32 TimerPrescaler[4] = {1, 64, 256, 1024}; +const s32 TimerPrescaler[4] = {0, 6, 8, 10}; + +u16 TimerGetCounter(u32 timer) +{ + u32 ret = Timers[timer].Counter; + + if ((Timers[timer].Cnt & 0x84) == 0x80) + { + u32 c = (timer & 0x4) ? ARM7->Cycles : (ARM9->Cycles>>1); + ret += (c << Timers[timer].CycleShift); + } + + return ret >> 16; +} + +void TimerOverflow(u32 param) +{ + Timer* timer = &Timers[param]; + timer->Counter = 0; + + u32 tid = param & 0x3; + u32 cpu = param >> 2; + + for (;;) + { + if (tid == (param&0x3)) + ScheduleEvent(Event_Timer9_0 + param, true, (0x10000 - timer->Reload) << TimerPrescaler[timer->Cnt & 0x03], TimerOverflow, param); + //timer->Event = ScheduleEvent(TimerPrescaler[timer->Control&0x3], TimerIncrement, param); + + if (timer->Counter == 0) + { + timer->Counter = timer->Reload << 16; + + if (timer->Cnt & (1<<6)) + SetIRQ(cpu, IRQ_Timer0 + tid); + + // cascade + if (tid == 3) + break; + timer++; + if ((timer->Cnt & 0x84) != 0x84) + break; + timer->Counter += 0x10000; + tid++; + continue; + } + + break; + } +} + +void TimerStart(u32 id, u16 cnt) +{ + Timer* timer = &Timers[id]; + u16 curstart = timer->Cnt & (1<<7); + u16 newstart = cnt & (1<<7); + + timer->Cnt = cnt; + + if ((!curstart) && newstart) + { + timer->Counter = timer->Reload << 16; + timer->CycleShift = 16 - TimerPrescaler[cnt & 0x03]; + + // start the timer, if it's not a cascading timer + if (!(cnt & (1<<2))) + ScheduleEvent(Event_Timer9_0 + id, false, (0x10000 - timer->Reload) << TimerPrescaler[cnt & 0x03], TimerOverflow, id); + else + CancelEvent(Event_Timer9_0 + id); + } + else if (curstart && (!newstart)) + { + CancelEvent(Event_Timer9_0 + id); + } +} + + + +void StartDiv() +{ + // TODO: division isn't instant! + + DivCnt &= ~0x2000; + + switch (DivCnt & 0x0003) + { + case 0x0000: + { + s32 num = (s32)DivNumerator[0]; + s32 den = (s32)DivDenominator[0]; + if (den == 0) + { + DivQuotient[0] = (num<0) ? 1:-1; + DivQuotient[1] = (num<0) ? -1:1; + *(s64*)&DivRemainder[0] = num; + } + else if (num == -0x80000000 && den == -1) + { + *(s64*)&DivQuotient[0] = 0x80000000; + } + else + { + *(s64*)&DivQuotient[0] = (s64)(num / den); + *(s64*)&DivRemainder[0] = (s64)(num % den); + } + } + break; + + case 0x0001: + case 0x0003: + { + s64 num = *(s64*)&DivNumerator[0]; + s32 den = (s32)DivDenominator[0]; + if (den == 0) + { + *(s64*)&DivQuotient[0] = (num<0) ? 1:-1; + *(s64*)&DivRemainder[0] = num; + } + else if (num == -0x8000000000000000 && den == -1) + { + *(s64*)&DivQuotient[0] = 0x8000000000000000; + } + else + { + *(s64*)&DivQuotient[0] = (s64)(num / den); + *(s64*)&DivRemainder[0] = (s64)(num % den); + } + } + break; + + case 0x0002: + { + s64 num = *(s64*)&DivNumerator[0]; + s64 den = *(s64*)&DivDenominator[0]; + if (den == 0) + { + *(s64*)&DivQuotient[0] = (num<0) ? 1:-1; + *(s64*)&DivRemainder[0] = num; + } + else if (num == -0x8000000000000000 && den == -1) + { + *(s64*)&DivQuotient[0] = 0x8000000000000000; + } + else + { + *(s64*)&DivQuotient[0] = (s64)(num / den); + *(s64*)&DivRemainder[0] = (s64)(num % den); + } + } + break; + } + + if ((DivDenominator[0] | DivDenominator[1]) == 0) + DivCnt |= 0x2000; +} + +// http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2 +void StartSqrt() +{ + // TODO: sqrt isn't instant either. oh well + + u64 val; + u32 res = 0; + u64 rem = 0; + u32 prod = 0; + u32 nbits, topshift; + + if (SqrtCnt & 0x0001) + { + val = *(u64*)&SqrtVal[0]; + nbits = 32; + topshift = 62; + } + else + { + val = (u64)SqrtVal[0]; // 32bit + nbits = 16; + topshift = 30; + } + + for (u32 i = 0; i < nbits; i++) + { + rem = (rem << 2) + ((val >> topshift) & 0x3); + val <<= 2; + res <<= 1; + + prod = (res << 1) + 1; + if (rem >= prod) + { + rem -= prod; + res++; + } + } + + SqrtRes = res; +} + + + +void debug(u32 param) +{ + printf("ARM9 PC=%08X LR=%08X %08X\n", ARM9->R[15], ARM9->R[14], ARM9->R_IRQ[1]); + printf("ARM7 PC=%08X LR=%08X %08X\n", ARM7->R[15], ARM7->R[14], ARM7->R_IRQ[1]); + + for (int i = 0; i < 9; i++) + printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]); +} + + + +u8 ARM9Read8(u32 addr) +{ + if ((addr & 0xFFFFF000) == 0xFFFF0000) + { + return *(u8*)&ARM9BIOS[addr & 0xFFF]; + } + + switch (addr & 0xFF000000) + { + case 0x02000000: + return *(u8*)&MainRAM[addr & 0x3FFFFF]; + + case 0x03000000: + if (SWRAM_ARM9) return *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask]; + else return 0; + + case 0x04000000: + return ARM9IORead8(addr); + + case 0x05000000: + return *(u8*)&GPU::Palette[addr & 0x7FF]; + + case 0x06000000: + { + switch (addr & 0x00E00000) + { + case 0x00000000: return GPU::ReadVRAM_ABG<u8>(addr); + case 0x00200000: return GPU::ReadVRAM_BBG<u8>(addr); + case 0x00400000: return GPU::ReadVRAM_AOBJ<u8>(addr); + case 0x00600000: return GPU::ReadVRAM_BOBJ<u8>(addr); + default: return GPU::ReadVRAM_LCDC<u8>(addr); + } + } + return 0; + + case 0x07000000: + return *(u8*)&GPU::OAM[addr & 0x7FF]; + + case 0x08000000: + case 0x09000000: + return 0xFF; + } + + printf("unknown arm9 read8 %08X\n", addr); + return 0; +} + +u16 ARM9Read16(u32 addr) +{ + if ((addr & 0xFFFFF000) == 0xFFFF0000) + { + return *(u16*)&ARM9BIOS[addr & 0xFFF]; + } + + switch (addr & 0xFF000000) + { + case 0x02000000: + return *(u16*)&MainRAM[addr & 0x3FFFFF]; + + case 0x03000000: + if (SWRAM_ARM9) return *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask]; + else return 0; + + case 0x04000000: + return ARM9IORead16(addr); + + case 0x05000000: + return *(u16*)&GPU::Palette[addr & 0x7FF]; + + case 0x06000000: + { + switch (addr & 0x00E00000) + { + case 0x00000000: return GPU::ReadVRAM_ABG<u16>(addr); + case 0x00200000: return GPU::ReadVRAM_BBG<u16>(addr); + case 0x00400000: return GPU::ReadVRAM_AOBJ<u16>(addr); + case 0x00600000: return GPU::ReadVRAM_BOBJ<u16>(addr); + default: return GPU::ReadVRAM_LCDC<u16>(addr); + } + } + return 0; + + case 0x07000000: + return *(u16*)&GPU::OAM[addr & 0x7FF]; + + case 0x08000000: + case 0x09000000: + return 0xFFFF; + } + + //printf("unknown arm9 read16 %08X %08X %08X %08X\n", addr, ARM9->R[15], ARM9->R[1], ARM9->R[2]); + return 0; +} + +u32 ARM9Read32(u32 addr) +{ + if ((addr & 0xFFFFF000) == 0xFFFF0000) + { + return *(u32*)&ARM9BIOS[addr & 0xFFF]; + } + + switch (addr & 0xFF000000) + { + case 0x02000000: + return *(u32*)&MainRAM[addr & 0x3FFFFF]; + + case 0x03000000: + if (SWRAM_ARM9) return *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask]; + else return 0; + + case 0x04000000: + return ARM9IORead32(addr); + + case 0x05000000: + return *(u32*)&GPU::Palette[addr & 0x7FF]; + + case 0x06000000: + { + switch (addr & 0x00E00000) + { + case 0x00000000: return GPU::ReadVRAM_ABG<u32>(addr); + case 0x00200000: return GPU::ReadVRAM_BBG<u32>(addr); + case 0x00400000: return GPU::ReadVRAM_AOBJ<u32>(addr); + case 0x00600000: return GPU::ReadVRAM_BOBJ<u32>(addr); + default: return GPU::ReadVRAM_LCDC<u32>(addr); + } + } + return 0; + + case 0x07000000: + return *(u32*)&GPU::OAM[addr & 0x7FF]; + + case 0x08000000: + case 0x09000000: + return 0xFFFFFFFF; + } + + printf("unknown arm9 read32 %08X | %08X %08X %08X\n", addr, ARM9->R[15], ARM9->R[12], ARM9Read32(0x027FF820)); + return 0; +} + +void ARM9Write8(u32 addr, u8 val) +{ + switch (addr & 0xFF000000) + { + case 0x02000000: + *(u8*)&MainRAM[addr & 0x3FFFFF] = val; + return; + + case 0x03000000: + if (SWRAM_ARM9) *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val; + return; + + case 0x04000000: + ARM9IOWrite8(addr, val); + return; + + case 0x05000000: + case 0x06000000: + case 0x07000000: + return; + } + + printf("unknown arm9 write8 %08X %02X\n", addr, val); +} + +void ARM9Write16(u32 addr, u16 val) +{ + switch (addr & 0xFF000000) + { + case 0x02000000: + *(u16*)&MainRAM[addr & 0x3FFFFF] = val; + return; + + case 0x03000000: + if (SWRAM_ARM9) *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val; + return; + + case 0x04000000: + ARM9IOWrite16(addr, val); + return; + + case 0x05000000: + *(u16*)&GPU::Palette[addr & 0x7FF] = val; + return; + + case 0x06000000: + switch (addr & 0x00E00000) + { + case 0x00000000: GPU::WriteVRAM_ABG<u16>(addr, val); break; + case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); break; + case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); break; + case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); break; + default: GPU::WriteVRAM_LCDC<u16>(addr, val); break; + } + return; + + case 0x07000000: + *(u16*)&GPU::OAM[addr & 0x7FF] = val; + return; + } + + //printf("unknown arm9 write16 %08X %04X\n", addr, val); +} + +void ARM9Write32(u32 addr, u32 val) +{ + switch (addr & 0xFF000000) + { + case 0x02000000: + *(u32*)&MainRAM[addr & 0x3FFFFF] = val; + return; + + case 0x03000000: + if (SWRAM_ARM9) *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val; + return; + + case 0x04000000: + ARM9IOWrite32(addr, val); + return; + + case 0x05000000: + *(u32*)&GPU::Palette[addr & 0x7FF] = val; + return; + + case 0x06000000: + switch (addr & 0x00E00000) + { + case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); break; + case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); break; + case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); break; + case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); break; + default: GPU::WriteVRAM_LCDC<u32>(addr, val); break; + } + return; + + case 0x07000000: + *(u32*)&GPU::OAM[addr & 0x7FF] = val; + return; + } + + printf("unknown arm9 write32 %08X %08X | %08X\n", addr, val, ARM9->R[15]); +} + + + +u8 ARM7Read8(u32 addr) +{ + if (addr < 0x00004000) + { + if (ARM7->R[15] >= 0x4000) + return 0xFF; + if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt) + return 0xFF; + + return *(u8*)&ARM7BIOS[addr]; + } + + switch (addr & 0xFF800000) + { + case 0x02000000: + case 0x02800000: + return *(u8*)&MainRAM[addr & 0x3FFFFF]; + + case 0x03000000: + if (SWRAM_ARM7) return *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask]; + else return *(u8*)&ARM7WRAM[addr & 0xFFFF]; + + case 0x03800000: + return *(u8*)&ARM7WRAM[addr & 0xFFFF]; + + case 0x04000000: + return ARM7IORead8(addr); + + case 0x06000000: + case 0x06800000: + return GPU::ReadVRAM_ARM7<u8>(addr); + } + + printf("unknown arm7 read8 %08X %08X %08X/%08X\n", addr, ARM7->R[15], ARM7->R[0], ARM7->R[1]); + return 0; +} + +u16 ARM7Read16(u32 addr) +{ + if (addr < 0x00004000) + { + if (ARM7->R[15] >= 0x4000) + return 0xFFFF; + if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt) + return 0xFFFF; + + return *(u16*)&ARM7BIOS[addr]; + } + + switch (addr & 0xFF800000) + { + case 0x02000000: + case 0x02800000: + return *(u16*)&MainRAM[addr & 0x3FFFFF]; + + case 0x03000000: + if (SWRAM_ARM7) return *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask]; + else return *(u16*)&ARM7WRAM[addr & 0xFFFF]; + + case 0x03800000: + return *(u16*)&ARM7WRAM[addr & 0xFFFF]; + + case 0x04000000: + return ARM7IORead16(addr); + + case 0x04800000: + return Wifi::Read(addr); + + case 0x06000000: + case 0x06800000: + return GPU::ReadVRAM_ARM7<u16>(addr); + } + + printf("unknown arm7 read16 %08X %08X\n", addr, ARM7->R[15]); + return 0; +} + +u32 ARM7Read32(u32 addr) +{ + if (addr < 0x00004000) + { + if (ARM7->R[15] >= 0x4000) + return 0xFFFFFFFF; + if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt) + return 0xFFFFFFFF; + + return *(u32*)&ARM7BIOS[addr]; + } + + switch (addr & 0xFF800000) + { + case 0x02000000: + case 0x02800000: + return *(u32*)&MainRAM[addr & 0x3FFFFF]; + + case 0x03000000: + if (SWRAM_ARM7) return *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask]; + else return *(u32*)&ARM7WRAM[addr & 0xFFFF]; + + case 0x03800000: + return *(u32*)&ARM7WRAM[addr & 0xFFFF]; + + case 0x04000000: + return ARM7IORead32(addr); + + case 0x06000000: + case 0x06800000: + return GPU::ReadVRAM_ARM7<u32>(addr); + } + + printf("unknown arm7 read32 %08X | %08X\n", addr, ARM7->R[15]); + return 0; +} + +void ARM7Write8(u32 addr, u8 val) +{ + switch (addr & 0xFF800000) + { + case 0x02000000: + case 0x02800000: + *(u8*)&MainRAM[addr & 0x3FFFFF] = val; + return; + + case 0x03000000: + if (SWRAM_ARM7) *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val; + else *(u8*)&ARM7WRAM[addr & 0xFFFF] = val; + return; + + case 0x03800000: + *(u8*)&ARM7WRAM[addr & 0xFFFF] = val; + return; + + case 0x04000000: + ARM7IOWrite8(addr, val); + return; + + case 0x06000000: + case 0x06800000: + GPU::WriteVRAM_ARM7<u8>(addr, val); + return; + } + + printf("unknown arm7 write8 %08X %02X | %08X | %08X %08X %08X %08X\n", addr, val, ARM7->R[15], IME[1], IE[1], ARM7->R[0], ARM7->R[1]); +} + +void ARM7Write16(u32 addr, u16 val) +{ + switch (addr & 0xFF800000) + { + case 0x02000000: + case 0x02800000: + *(u16*)&MainRAM[addr & 0x3FFFFF] = val; + return; + + case 0x03000000: + if (SWRAM_ARM7) *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val; + else *(u16*)&ARM7WRAM[addr & 0xFFFF] = val; + return; + + case 0x03800000: + *(u16*)&ARM7WRAM[addr & 0xFFFF] = val; + return; + + case 0x04000000: + ARM7IOWrite16(addr, val); + return; + + case 0x04800000: + Wifi::Write(addr, val); + return; + + case 0x06000000: + case 0x06800000: + GPU::WriteVRAM_ARM7<u16>(addr, val); + return; + } + + printf("unknown arm7 write16 %08X %04X | %08X\n", addr, val, ARM7->R[15]); +} + +void ARM7Write32(u32 addr, u32 val) +{ + switch (addr & 0xFF800000) + { + case 0x02000000: + case 0x02800000: + *(u32*)&MainRAM[addr & 0x3FFFFF] = val; + return; + + case 0x03000000: + if (SWRAM_ARM7) *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val; + else *(u32*)&ARM7WRAM[addr & 0xFFFF] = val; + return; + + case 0x03800000: + *(u32*)&ARM7WRAM[addr & 0xFFFF] = val; + return; + + case 0x04000000: + ARM7IOWrite32(addr, val); + return; + + case 0x06000000: + case 0x06800000: + GPU::WriteVRAM_ARM7<u32>(addr, val); + return; + } + + printf("unknown arm7 write32 %08X %08X | %08X %08X\n", addr, val, ARM7->R[15], ARM7->CurInstr); +} + + + + +u8 ARM9IORead8(u32 addr) +{ + switch (addr) + { + case 0x040001A2: return NDSCart::ReadSPIData(); + + case 0x04000208: return IME[0]; + + case 0x04000240: return GPU::VRAMCNT[0]; + case 0x04000241: return GPU::VRAMCNT[1]; + case 0x04000242: return GPU::VRAMCNT[2]; + case 0x04000243: return GPU::VRAMCNT[3]; + case 0x04000244: return GPU::VRAMCNT[4]; + case 0x04000245: return GPU::VRAMCNT[5]; + case 0x04000246: return GPU::VRAMCNT[6]; + case 0x04000247: return WRAMCnt; + case 0x04000248: return GPU::VRAMCNT[7]; + case 0x04000249: return GPU::VRAMCNT[8]; + + case 0x04000300: return PostFlag9; + } + + if (addr >= 0x04000000 && addr < 0x04000060) + { + return GPU::GPU2D_A->Read8(addr); + } + if (addr >= 0x04001000 && addr < 0x04001060) + { + return GPU::GPU2D_B->Read8(addr); + } + if (addr >= 0x04000320 && addr < 0x040006A4) + { + return GPU3D::Read8(addr); + } + + printf("unknown ARM9 IO read8 %08X\n", addr); + return 0; +} + +u16 ARM9IORead16(u32 addr) +{ + switch (addr) + { + case 0x04000004: return GPU::DispStat[0]; + case 0x04000006: return GPU::VCount; + + case 0x04000060: return GPU3D::Read16(addr); + case 0x04000064: + case 0x04000066: return GPU::GPU2D_A->Read16(addr); + + case 0x040000B8: return DMAs[0]->Cnt & 0xFFFF; + case 0x040000BA: return DMAs[0]->Cnt >> 16; + case 0x040000C4: return DMAs[1]->Cnt & 0xFFFF; + case 0x040000C6: return DMAs[1]->Cnt >> 16; + case 0x040000D0: return DMAs[2]->Cnt & 0xFFFF; + case 0x040000D2: return DMAs[2]->Cnt >> 16; + case 0x040000DC: return DMAs[3]->Cnt & 0xFFFF; + case 0x040000DE: return DMAs[3]->Cnt >> 16; + + case 0x040000E0: return ((u16*)DMA9Fill)[0]; + case 0x040000E2: return ((u16*)DMA9Fill)[1]; + case 0x040000E4: return ((u16*)DMA9Fill)[2]; + case 0x040000E6: return ((u16*)DMA9Fill)[3]; + case 0x040000E8: return ((u16*)DMA9Fill)[4]; + case 0x040000EA: return ((u16*)DMA9Fill)[5]; + case 0x040000EC: return ((u16*)DMA9Fill)[6]; + case 0x040000EE: return ((u16*)DMA9Fill)[7]; + + case 0x04000100: return TimerGetCounter(0); + case 0x04000102: return Timers[0].Cnt; + case 0x04000104: return TimerGetCounter(1); + case 0x04000106: return Timers[1].Cnt; + case 0x04000108: return TimerGetCounter(2); + case 0x0400010A: return Timers[2].Cnt; + case 0x0400010C: return TimerGetCounter(3); + case 0x0400010E: return Timers[3].Cnt; + + case 0x04000130: return KeyInput & 0xFFFF; + + case 0x04000180: return IPCSync9; + case 0x04000184: + { + u16 val = IPCFIFOCnt9; + if (IPCFIFO9->IsEmpty()) val |= 0x0001; + else if (IPCFIFO9->IsFull()) val |= 0x0002; + if (IPCFIFO7->IsEmpty()) val |= 0x0100; + else if (IPCFIFO7->IsFull()) val |= 0x0200; + return val; + } + + case 0x040001A0: return NDSCart::SPICnt; + case 0x040001A2: return NDSCart::ReadSPIData(); + + case 0x04000204: return ExMemCnt[0]; + case 0x04000208: return IME[0]; + + case 0x04000240: return GPU::VRAMCNT[0] | (GPU::VRAMCNT[1] << 8); + case 0x04000242: return GPU::VRAMCNT[2] | (GPU::VRAMCNT[3] << 8); + case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8); + case 0x04000246: return GPU::VRAMCNT[6] | (WRAMCnt << 8); + case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8); + + case 0x04000280: return DivCnt; + + case 0x040002B0: return SqrtCnt; + + case 0x04000300: return PostFlag9; + case 0x04000304: return PowerControl9; + } + + if (addr >= 0x04000000 && addr < 0x04000060) + { + return GPU::GPU2D_A->Read16(addr); + } + if (addr >= 0x04001000 && addr < 0x04001060) + { + return GPU::GPU2D_B->Read16(addr); + } + if (addr >= 0x04000320 && addr < 0x040006A4) + { + return GPU3D::Read16(addr); + } + + printf("unknown ARM9 IO read16 %08X %08X\n", addr, ARM9->R[15]); + return 0; +} + +u32 ARM9IORead32(u32 addr) +{ + switch (addr) + { + case 0x04000004: return GPU::DispStat[0] | (GPU::VCount << 16); + + case 0x04000060: return GPU3D::Read32(addr); + case 0x04000064: return GPU::GPU2D_A->Read32(addr); + + case 0x040000B0: return DMAs[0]->SrcAddr; + case 0x040000B4: return DMAs[0]->DstAddr; + case 0x040000B8: return DMAs[0]->Cnt; + case 0x040000BC: return DMAs[1]->SrcAddr; + case 0x040000C0: return DMAs[1]->DstAddr; + case 0x040000C4: return DMAs[1]->Cnt; + case 0x040000C8: return DMAs[2]->SrcAddr; + case 0x040000CC: return DMAs[2]->DstAddr; + case 0x040000D0: return DMAs[2]->Cnt; + case 0x040000D4: return DMAs[3]->SrcAddr; + case 0x040000D8: return DMAs[3]->DstAddr; + case 0x040000DC: return DMAs[3]->Cnt; + + case 0x040000E0: return DMA9Fill[0]; + case 0x040000E4: return DMA9Fill[1]; + case 0x040000E8: return DMA9Fill[2]; + case 0x040000EC: return DMA9Fill[3]; + + case 0x04000100: return TimerGetCounter(0) | (Timers[0].Cnt << 16); + case 0x04000104: return TimerGetCounter(1) | (Timers[1].Cnt << 16); + case 0x04000108: return TimerGetCounter(2) | (Timers[2].Cnt << 16); + case 0x0400010C: return TimerGetCounter(3) | (Timers[3].Cnt << 16); + + case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16); + case 0x040001A4: return NDSCart::ROMCnt; + + case 0x04000208: return IME[0]; + case 0x04000210: return IE[0]; + case 0x04000214: return IF[0]; + + case 0x04000240: return GPU::VRAMCNT[0] | (GPU::VRAMCNT[1] << 8) | (GPU::VRAMCNT[2] << 16) | (GPU::VRAMCNT[3] << 24); + case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8) | (GPU::VRAMCNT[6] << 16) | (WRAMCnt << 24); + case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8); + + case 0x04000290: return DivNumerator[0]; + case 0x04000294: return DivNumerator[1]; + case 0x04000298: return DivDenominator[0]; + case 0x0400029C: return DivDenominator[1]; + case 0x040002A0: return DivQuotient[0]; + case 0x040002A4: return DivQuotient[1]; + case 0x040002A8: return DivRemainder[0]; + case 0x040002AC: return DivRemainder[1]; + + case 0x040002B4: return SqrtRes; + case 0x040002B8: return SqrtVal[0]; + case 0x040002BC: return SqrtVal[1]; + + case 0x04100000: + if (IPCFIFOCnt9 & 0x8000) + { + u32 ret; + if (IPCFIFO7->IsEmpty()) + { + IPCFIFOCnt9 |= 0x4000; + ret = IPCFIFO7->Peek(); + } + else + { + ret = IPCFIFO7->Read(); + + if (IPCFIFO7->IsEmpty() && (IPCFIFOCnt7 & 0x0004)) + SetIRQ(1, IRQ_IPCSendDone); + } + return ret; + } + else + return IPCFIFO7->Peek(); + + case 0x04100010: + if (!(ExMemCnt[0] & (1<<11))) return NDSCart::ReadROMData(); + return 0; + } + + if (addr >= 0x04000000 && addr < 0x04000060) + { + return GPU::GPU2D_A->Read32(addr); + } + if (addr >= 0x04001000 && addr < 0x04001060) + { + return GPU::GPU2D_B->Read32(addr); + } + if (addr >= 0x04000320 && addr < 0x040006A4) + { + return GPU3D::Read32(addr); + } + + printf("unknown ARM9 IO read32 %08X\n", addr); + return 0; +} + +void ARM9IOWrite8(u32 addr, u8 val) +{ + switch (addr) + { + case 0x040001A0: + if (!(ExMemCnt[0] & (1<<11))) + { + NDSCart::WriteSPICnt((NDSCart::SPICnt & 0xFF00) | val); + } + return; + case 0x040001A1: + if (!(ExMemCnt[0] & (1<<11))) + { + NDSCart::WriteSPICnt((NDSCart::SPICnt & 0x00FF) | (val << 8)); + } + return; + case 0x040001A2: + NDSCart::WriteSPIData(val); + return; + + case 0x040001A8: NDSCart::ROMCommand[0] = val; return; + case 0x040001A9: NDSCart::ROMCommand[1] = val; return; + case 0x040001AA: NDSCart::ROMCommand[2] = val; return; + case 0x040001AB: NDSCart::ROMCommand[3] = val; return; + case 0x040001AC: NDSCart::ROMCommand[4] = val; return; + case 0x040001AD: NDSCart::ROMCommand[5] = val; return; + case 0x040001AE: NDSCart::ROMCommand[6] = val; return; + case 0x040001AF: NDSCart::ROMCommand[7] = val; return; + + case 0x04000208: IME[0] = val & 0x1; return; + + case 0x04000240: GPU::MapVRAM_AB(0, val); return; + case 0x04000241: GPU::MapVRAM_AB(1, val); return; + case 0x04000242: GPU::MapVRAM_CD(2, val); return; + case 0x04000243: GPU::MapVRAM_CD(3, val); return; + case 0x04000244: GPU::MapVRAM_E(4, val); return; + case 0x04000245: GPU::MapVRAM_FG(5, val); return; + case 0x04000246: GPU::MapVRAM_FG(6, val); return; + case 0x04000247: MapSharedWRAM(val); return; + case 0x04000248: GPU::MapVRAM_H(7, val); return; + case 0x04000249: GPU::MapVRAM_I(8, val); return; + + case 0x04000300: + if (PostFlag9 & 0x01) val |= 0x01; + PostFlag9 = val & 0x03; + return; + } + + if (addr >= 0x04000000 && addr < 0x04000060) + { + GPU::GPU2D_A->Write8(addr, val); + return; + } + if (addr >= 0x04001000 && addr < 0x04001060) + { + GPU::GPU2D_B->Write8(addr, val); + return; + } + if (addr >= 0x04000320 && addr < 0x040006A4) + { + GPU3D::Write8(addr, val); + return; + } + + printf("unknown ARM9 IO write8 %08X %02X\n", addr, val); +} + +void ARM9IOWrite16(u32 addr, u16 val) +{ + switch (addr) + { + case 0x04000004: GPU::SetDispStat(0, val); return; + + case 0x04000060: GPU3D::Write16(addr, val); return; + + case 0x040000B8: DMAs[0]->WriteCnt((DMAs[0]->Cnt & 0xFFFF0000) | val); return; + case 0x040000BA: DMAs[0]->WriteCnt((DMAs[0]->Cnt & 0x0000FFFF) | (val << 16)); return; + case 0x040000C4: DMAs[1]->WriteCnt((DMAs[1]->Cnt & 0xFFFF0000) | val); return; + case 0x040000C6: DMAs[1]->WriteCnt((DMAs[1]->Cnt & 0x0000FFFF) | (val << 16)); return; + case 0x040000D0: DMAs[2]->WriteCnt((DMAs[2]->Cnt & 0xFFFF0000) | val); return; + case 0x040000D2: DMAs[2]->WriteCnt((DMAs[2]->Cnt & 0x0000FFFF) | (val << 16)); return; + case 0x040000DC: DMAs[3]->WriteCnt((DMAs[3]->Cnt & 0xFFFF0000) | val); return; + case 0x040000DE: DMAs[3]->WriteCnt((DMAs[3]->Cnt & 0x0000FFFF) | (val << 16)); return; + + case 0x04000100: Timers[0].Reload = val; return; + case 0x04000102: TimerStart(0, val); return; + case 0x04000104: Timers[1].Reload = val; return; + case 0x04000106: TimerStart(1, val); return; + case 0x04000108: Timers[2].Reload = val; return; + case 0x0400010A: TimerStart(2, val); return; + case 0x0400010C: Timers[3].Reload = val; return; + case 0x0400010E: TimerStart(3, val); return; + + case 0x04000180: + IPCSync7 &= 0xFFF0; + IPCSync7 |= ((val & 0x0F00) >> 8); + IPCSync9 &= 0xB0FF; + IPCSync9 |= (val & 0x4F00); + if ((val & 0x2000) && (IPCSync7 & 0x4000)) + { + SetIRQ(1, IRQ_IPCSync); + } + //CompensateARM7(); + return; + + case 0x04000184: + if (val & 0x0008) + IPCFIFO9->Clear(); + if ((val & 0x0004) && (!(IPCFIFOCnt9 & 0x0004)) && IPCFIFO9->IsEmpty()) + SetIRQ(0, IRQ_IPCSendDone); + if ((val & 0x0400) && (!(IPCFIFOCnt9 & 0x0400)) && (!IPCFIFO7->IsEmpty())) + SetIRQ(0, IRQ_IPCRecv); + if (val & 0x4000) + IPCFIFOCnt9 &= ~0x4000; + IPCFIFOCnt9 = val & 0x8404; + return; + + case 0x040001A0: + if (!(ExMemCnt[0] & (1<<11))) NDSCart::WriteSPICnt(val); + return; + case 0x040001A2: + NDSCart::WriteSPIData(val & 0xFF); + return; + + case 0x040001B8: ROMSeed0[4] = val & 0x7F; return; + case 0x040001BA: ROMSeed1[4] = val & 0x7F; return; + + case 0x04000204: + ExMemCnt[0] = val; + ExMemCnt[1] = (ExMemCnt[1] & 0x007F) | (val & 0xFF80); + return; + + case 0x04000208: IME[0] = val & 0x1; return; + + case 0x04000240: + GPU::MapVRAM_AB(0, val & 0xFF); + GPU::MapVRAM_AB(1, val >> 8); + return; + case 0x04000242: + GPU::MapVRAM_CD(2, val & 0xFF); + GPU::MapVRAM_CD(3, val >> 8); + return; + case 0x04000244: + GPU::MapVRAM_E(4, val & 0xFF); + GPU::MapVRAM_FG(5, val >> 8); + return; + case 0x04000246: + GPU::MapVRAM_FG(6, val & 0xFF); + MapSharedWRAM(val >> 8); + return; + case 0x04000248: + GPU::MapVRAM_H(7, val & 0xFF); + GPU::MapVRAM_I(8, val >> 8); + return; + + case 0x04000280: DivCnt = val; StartDiv(); return; + + case 0x040002B0: SqrtCnt = val; StartSqrt(); return; + + case 0x04000300: + if (PostFlag9 & 0x01) val |= 0x01; + PostFlag9 = val & 0x03; + return; + + case 0x04000304: + PowerControl9 = val; + GPU::DisplaySwap(PowerControl9>>15); + return; + } + + if ((addr >= 0x04000000 && addr < 0x04000060) || (addr == 0x0400006C)) + { + GPU::GPU2D_A->Write16(addr, val); + return; + } + if ((addr >= 0x04001000 && addr < 0x04001060) || (addr == 0x0400106C)) + { + GPU::GPU2D_B->Write16(addr, val); + return; + } + if (addr >= 0x04000320 && addr < 0x040006A4) + { + GPU3D::Write16(addr, val); + return; + } + + printf("unknown ARM9 IO write16 %08X %04X %08X\n", addr, val, ARM9->R[14]); +} + +void ARM9IOWrite32(u32 addr, u32 val) +{ + switch (addr) + { + case 0x04000060: GPU3D::Write32(addr, val); return; + case 0x04000064: GPU::GPU2D_A->Write32(addr, val); return; + + case 0x040000B0: DMAs[0]->SrcAddr = val; return; + case 0x040000B4: DMAs[0]->DstAddr = val; return; + case 0x040000B8: DMAs[0]->WriteCnt(val); return; + case 0x040000BC: DMAs[1]->SrcAddr = val; return; + case 0x040000C0: DMAs[1]->DstAddr = val; return; + case 0x040000C4: DMAs[1]->WriteCnt(val); return; + case 0x040000C8: DMAs[2]->SrcAddr = val; return; + case 0x040000CC: DMAs[2]->DstAddr = val; return; + case 0x040000D0: DMAs[2]->WriteCnt(val); return; + case 0x040000D4: DMAs[3]->SrcAddr = val; return; + case 0x040000D8: DMAs[3]->DstAddr = val; return; + case 0x040000DC: DMAs[3]->WriteCnt(val); return; + + case 0x040000E0: DMA9Fill[0] = val; return; + case 0x040000E4: DMA9Fill[1] = val; return; + case 0x040000E8: DMA9Fill[2] = val; return; + case 0x040000EC: DMA9Fill[3] = val; return; + + case 0x04000100: + Timers[0].Reload = val & 0xFFFF; + TimerStart(0, val>>16); + return; + case 0x04000104: + Timers[1].Reload = val & 0xFFFF; + TimerStart(1, val>>16); + return; + case 0x04000108: + Timers[2].Reload = val & 0xFFFF; + TimerStart(2, val>>16); + return; + case 0x0400010C: + Timers[3].Reload = val & 0xFFFF; + TimerStart(3, val>>16); + return; + + case 0x04000188: + if (IPCFIFOCnt9 & 0x8000) + { + if (IPCFIFO9->IsFull()) + IPCFIFOCnt9 |= 0x4000; + else + { + bool wasempty = IPCFIFO9->IsEmpty(); + IPCFIFO9->Write(val); + if ((IPCFIFOCnt7 & 0x0400) && wasempty) + SetIRQ(1, IRQ_IPCRecv); + } + } + return; + + case 0x040001A0: + if (!(ExMemCnt[0] & (1<<11))) + { + NDSCart::WriteSPICnt(val & 0xFFFF); + NDSCart::WriteSPIData((val >> 16) & 0xFF); + } + return; + case 0x040001A4: + if (!(ExMemCnt[0] & (1<<11))) NDSCart::WriteROMCnt(val); + return; + + case 0x040001B0: *(u32*)&ROMSeed0[0] = val; return; + case 0x040001B4: *(u32*)&ROMSeed1[0] = val; return; + + case 0x04000208: IME[0] = val & 0x1; return; + case 0x04000210: IE[0] = val; return; + case 0x04000214: IF[0] &= ~val; GPU3D::CheckFIFOIRQ(); return; + + case 0x04000240: + GPU::MapVRAM_AB(0, val & 0xFF); + GPU::MapVRAM_AB(1, (val >> 8) & 0xFF); + GPU::MapVRAM_CD(2, (val >> 16) & 0xFF); + GPU::MapVRAM_CD(3, val >> 24); + return; + case 0x04000244: + GPU::MapVRAM_E(4, val & 0xFF); + GPU::MapVRAM_FG(5, (val >> 8) & 0xFF); + GPU::MapVRAM_FG(6, (val >> 16) & 0xFF); + MapSharedWRAM(val >> 24); + return; + case 0x04000248: + GPU::MapVRAM_H(7, val & 0xFF); + GPU::MapVRAM_I(8, (val >> 8) & 0xFF); + return; + + case 0x04000290: DivNumerator[0] = val; StartDiv(); return; + case 0x04000294: DivNumerator[1] = val; StartDiv(); return; + case 0x04000298: DivDenominator[0] = val; StartDiv(); return; + case 0x0400029C: DivDenominator[1] = val; StartDiv(); return; + + case 0x040002B8: SqrtVal[0] = val; StartSqrt(); return; + case 0x040002BC: SqrtVal[1] = val; StartSqrt(); return; + } + + if (addr >= 0x04000000 && addr < 0x04000060) + { + GPU::GPU2D_A->Write32(addr, val); + return; + } + if (addr >= 0x04001000 && addr < 0x04001060) + { + GPU::GPU2D_B->Write32(addr, val); + return; + } + if (addr >= 0x04000320 && addr < 0x040006A4) + { + GPU3D::Write32(addr, val); + return; + } + + printf("unknown ARM9 IO write32 %08X %08X\n", addr, val); +} + + +u8 ARM7IORead8(u32 addr) +{ + switch (addr) + { + case 0x04000138: return RTC::Read() & 0xFF; + + case 0x040001A2: return NDSCart::ReadSPIData(); + + case 0x040001C2: return SPI::ReadData(); + + case 0x04000208: return IME[1]; + + case 0x04000240: return GPU::VRAMSTAT; + case 0x04000241: return WRAMCnt; + + case 0x04000300: return PostFlag7; + } + + if (addr >= 0x04000400 && addr < 0x04000520) + { + // sound I/O + return 0; + } + + printf("unknown ARM7 IO read8 %08X\n", addr); + return 0; +} + +u16 ARM7IORead16(u32 addr) +{ + switch (addr) + { + case 0x04000004: return GPU::DispStat[1]; + case 0x04000006: return GPU::VCount; + + case 0x040000B8: return DMAs[4]->Cnt & 0xFFFF; + case 0x040000BA: return DMAs[4]->Cnt >> 16; + case 0x040000C4: return DMAs[5]->Cnt & 0xFFFF; + case 0x040000C6: return DMAs[5]->Cnt >> 16; + case 0x040000D0: return DMAs[6]->Cnt & 0xFFFF; + case 0x040000D2: return DMAs[6]->Cnt >> 16; + case 0x040000DC: return DMAs[7]->Cnt & 0xFFFF; + case 0x040000DE: return DMAs[7]->Cnt >> 16; + + case 0x04000100: return TimerGetCounter(4); + case 0x04000102: return Timers[4].Cnt; + case 0x04000104: return TimerGetCounter(5); + case 0x04000106: return Timers[5].Cnt; + case 0x04000108: return TimerGetCounter(6); + case 0x0400010A: return Timers[6].Cnt; + case 0x0400010C: return TimerGetCounter(7); + case 0x0400010E: return Timers[7].Cnt; + + case 0x04000130: return KeyInput & 0xFFFF; + case 0x04000136: return KeyInput >> 16; + + case 0x04000134: return 0x8000; + case 0x04000138: return RTC::Read(); + + case 0x04000180: return IPCSync7; + case 0x04000184: + { + u16 val = IPCFIFOCnt7; + if (IPCFIFO7->IsEmpty()) val |= 0x0001; + else if (IPCFIFO7->IsFull()) val |= 0x0002; + if (IPCFIFO9->IsEmpty()) val |= 0x0100; + else if (IPCFIFO9->IsFull()) val |= 0x0200; + return val; + } + + case 0x040001A0: return NDSCart::SPICnt; + case 0x040001A2: return NDSCart::ReadSPIData(); + + case 0x040001C0: return SPI::Cnt; + case 0x040001C2: return SPI::ReadData(); + + case 0x04000204: return ExMemCnt[1]; + case 0x04000208: return IME[1]; + + case 0x04000300: return PostFlag7; + case 0x04000304: return PowerControl7; + case 0x04000308: return ARM7BIOSProt; + + case 0x04000504: return _soundbias; + } + + if (addr >= 0x04000400 && addr < 0x04000520) + { + // sound I/O + return 0; + } + + printf("unknown ARM7 IO read16 %08X %08X\n", addr, ARM9->R[15]); + return 0; +} + +u32 ARM7IORead32(u32 addr) +{ + switch (addr) + { + case 0x04000004: return GPU::DispStat[1] | (GPU::VCount << 16); + + case 0x040000B0: return DMAs[4]->SrcAddr; + case 0x040000B4: return DMAs[4]->DstAddr; + case 0x040000B8: return DMAs[4]->Cnt; + case 0x040000BC: return DMAs[5]->SrcAddr; + case 0x040000C0: return DMAs[5]->DstAddr; + case 0x040000C4: return DMAs[5]->Cnt; + case 0x040000C8: return DMAs[6]->SrcAddr; + case 0x040000CC: return DMAs[6]->DstAddr; + case 0x040000D0: return DMAs[6]->Cnt; + case 0x040000D4: return DMAs[7]->SrcAddr; + case 0x040000D8: return DMAs[7]->DstAddr; + case 0x040000DC: return DMAs[7]->Cnt; + + case 0x04000100: return TimerGetCounter(4) | (Timers[4].Cnt << 16); + case 0x04000104: return TimerGetCounter(5) | (Timers[5].Cnt << 16); + case 0x04000108: return TimerGetCounter(6) | (Timers[6].Cnt << 16); + case 0x0400010C: return TimerGetCounter(7) | (Timers[7].Cnt << 16); + + case 0x040001A0: return NDSCart::SPICnt | (NDSCart::ReadSPIData() << 16); + case 0x040001A4: return NDSCart::ROMCnt; + + case 0x040001C0: + return SPI::Cnt | (SPI::ReadData() << 16); + + case 0x04000208: return IME[1]; + case 0x04000210: return IE[1]; + case 0x04000214: return IF[1]; + + case 0x04100000: + if (IPCFIFOCnt7 & 0x8000) + { + u32 ret; + if (IPCFIFO9->IsEmpty()) + { + IPCFIFOCnt7 |= 0x4000; + ret = IPCFIFO9->Peek(); + } + else + { + ret = IPCFIFO9->Read(); + + if (IPCFIFO9->IsEmpty() && (IPCFIFOCnt9 & 0x0004)) + SetIRQ(0, IRQ_IPCSendDone); + } + return ret; + } + else + return IPCFIFO9->Peek(); + + case 0x04100010: + if (ExMemCnt[0] & (1<<11)) return NDSCart::ReadROMData(); + return 0; + } + + if (addr >= 0x04000400 && addr < 0x04000520) + { + // sound I/O + return 0; + } + + printf("unknown ARM7 IO read32 %08X\n", addr); + return 0; +} + +void ARM7IOWrite8(u32 addr, u8 val) +{ + switch (addr) + { + case 0x04000138: RTC::Write(val, true); return; + + case 0x040001A0: + if (ExMemCnt[0] & (1<<11)) + { + NDSCart::WriteSPICnt((NDSCart::SPICnt & 0xFF00) | val); + } + return; + case 0x040001A1: + if (ExMemCnt[0] & (1<<11)) + { + NDSCart::WriteSPICnt((NDSCart::SPICnt & 0x00FF) | (val << 8)); + } + return; + case 0x040001A2: + NDSCart::WriteSPIData(val); + return; + + case 0x040001A8: NDSCart::ROMCommand[0] = val; return; + case 0x040001A9: NDSCart::ROMCommand[1] = val; return; + case 0x040001AA: NDSCart::ROMCommand[2] = val; return; + case 0x040001AB: NDSCart::ROMCommand[3] = val; return; + case 0x040001AC: NDSCart::ROMCommand[4] = val; return; + case 0x040001AD: NDSCart::ROMCommand[5] = val; return; + case 0x040001AE: NDSCart::ROMCommand[6] = val; return; + case 0x040001AF: NDSCart::ROMCommand[7] = val; return; + + case 0x040001C2: + SPI::WriteData(val); + return; + + case 0x04000208: IME[1] = val & 0x1; return; + + case 0x04000300: + if (ARM7->R[15] >= 0x4000) + return; + if (!(PostFlag7 & 0x01)) + PostFlag7 = val & 0x01; + return; + + case 0x04000301: + if (val == 0x80) ARM7->Halt(1); + return; + } + + if (addr >= 0x04000400 && addr < 0x04000520) + { + // sound I/O + return; + } + + printf("unknown ARM7 IO write8 %08X %02X\n", addr, val); +} + +void ARM7IOWrite16(u32 addr, u16 val) +{ + switch (addr) + { + case 0x04000004: GPU::SetDispStat(1, val); return; + + case 0x040000B8: DMAs[4]->WriteCnt((DMAs[4]->Cnt & 0xFFFF0000) | val); return; + case 0x040000BA: DMAs[4]->WriteCnt((DMAs[4]->Cnt & 0x0000FFFF) | (val << 16)); return; + case 0x040000C4: DMAs[5]->WriteCnt((DMAs[5]->Cnt & 0xFFFF0000) | val); return; + case 0x040000C6: DMAs[5]->WriteCnt((DMAs[5]->Cnt & 0x0000FFFF) | (val << 16)); return; + case 0x040000D0: DMAs[6]->WriteCnt((DMAs[6]->Cnt & 0xFFFF0000) | val); return; + case 0x040000D2: DMAs[6]->WriteCnt((DMAs[6]->Cnt & 0x0000FFFF) | (val << 16)); return; + case 0x040000DC: DMAs[7]->WriteCnt((DMAs[7]->Cnt & 0xFFFF0000) | val); return; + case 0x040000DE: DMAs[7]->WriteCnt((DMAs[7]->Cnt & 0x0000FFFF) | (val << 16)); return; + + case 0x04000100: Timers[4].Reload = val; return; + case 0x04000102: TimerStart(4, val); return; + case 0x04000104: Timers[5].Reload = val; return; + case 0x04000106: TimerStart(5, val); return; + case 0x04000108: Timers[6].Reload = val; return; + case 0x0400010A: TimerStart(6, val); return; + case 0x0400010C: Timers[7].Reload = val; return; + case 0x0400010E: TimerStart(7, val); return; + + case 0x04000134: return;printf("set debug port %04X %08X\n", val, ARM7Read32(ARM7->R[13]+4)); return; + + case 0x04000138: RTC::Write(val, false); return; + + case 0x04000180: + IPCSync9 &= 0xFFF0; + IPCSync9 |= ((val & 0x0F00) >> 8); + IPCSync7 &= 0xB0FF; + IPCSync7 |= (val & 0x4F00); + if ((val & 0x2000) && (IPCSync9 & 0x4000)) + { + SetIRQ(0, IRQ_IPCSync); + } + return; + + case 0x04000184: + if (val & 0x0008) + IPCFIFO7->Clear(); + if ((val & 0x0004) && (!(IPCFIFOCnt7 & 0x0004)) && IPCFIFO7->IsEmpty()) + SetIRQ(1, IRQ_IPCSendDone); + if ((val & 0x0400) && (!(IPCFIFOCnt7 & 0x0400)) && (!IPCFIFO9->IsEmpty())) + SetIRQ(1, IRQ_IPCRecv); + if (val & 0x4000) + IPCFIFOCnt7 &= ~0x4000; + IPCFIFOCnt7 = val & 0x8404; + return; + + case 0x040001A0: + if (ExMemCnt[0] & (1<<11)) + NDSCart::WriteSPICnt(val); + return; + case 0x040001A2: + NDSCart::WriteSPIData(val & 0xFF); + return; + + case 0x040001B8: ROMSeed0[12] = val & 0x7F; return; + case 0x040001BA: ROMSeed1[12] = val & 0x7F; return; + + case 0x040001C0: + SPI::WriteCnt(val); + return; + case 0x040001C2: + SPI::WriteData(val & 0xFF); + return; + + case 0x04000204: + ExMemCnt[1] = (ExMemCnt[1] & 0xFF80) | (val & 0x007F); + return; + + case 0x04000208: IME[1] = val & 0x1; return; + + case 0x04000300: + if (ARM7->R[15] >= 0x4000) + return; + if (!(PostFlag7 & 0x01)) + PostFlag7 = val & 0x01; + return; + + case 0x04000304: PowerControl7 = val; return; + + case 0x04000308: + if (ARM7BIOSProt == 0) + ARM7BIOSProt = val; + return; + + case 0x04000504: // removeme + _soundbias = val & 0x3FF; + return; + } + + if (addr >= 0x04000400 && addr < 0x04000520) + { + // sound I/O + return; + } + + printf("unknown ARM7 IO write16 %08X %04X\n", addr, val); +} + +void ARM7IOWrite32(u32 addr, u32 val) +{ + switch (addr) + { + case 0x040000B0: DMAs[4]->SrcAddr = val; return; + case 0x040000B4: DMAs[4]->DstAddr = val; return; + case 0x040000B8: DMAs[4]->WriteCnt(val); return; + case 0x040000BC: DMAs[5]->SrcAddr = val; return; + case 0x040000C0: DMAs[5]->DstAddr = val; return; + case 0x040000C4: DMAs[5]->WriteCnt(val); return; + case 0x040000C8: DMAs[6]->SrcAddr = val; return; + case 0x040000CC: DMAs[6]->DstAddr = val; return; + case 0x040000D0: DMAs[6]->WriteCnt(val); return; + case 0x040000D4: DMAs[7]->SrcAddr = val; return; + case 0x040000D8: DMAs[7]->DstAddr = val; return; + case 0x040000DC: DMAs[7]->WriteCnt(val); return; + + case 0x04000100: + Timers[4].Reload = val & 0xFFFF; + TimerStart(4, val>>16); + return; + case 0x04000104: + Timers[5].Reload = val & 0xFFFF; + TimerStart(5, val>>16); + return; + case 0x04000108: + Timers[6].Reload = val & 0xFFFF; + TimerStart(6, val>>16); + return; + case 0x0400010C: + Timers[7].Reload = val & 0xFFFF; + TimerStart(7, val>>16); + return; + + case 0x04000188: + if (IPCFIFOCnt7 & 0x8000) + { + if (IPCFIFO7->IsFull()) + IPCFIFOCnt7 |= 0x4000; + else + { + bool wasempty = IPCFIFO7->IsEmpty(); + IPCFIFO7->Write(val); + if ((IPCFIFOCnt9 & 0x0400) && wasempty) + SetIRQ(0, IRQ_IPCRecv); + } + } + return; + + case 0x040001A0: + if (ExMemCnt[0] & (1<<11)) + { + NDSCart::WriteSPICnt(val & 0xFFFF); + NDSCart::WriteSPIData((val >> 16) & 0xFF); + } + return; + case 0x040001A4: + if (ExMemCnt[0] & (1<<11)) NDSCart::WriteROMCnt(val); + return; + + case 0x040001B0: *(u32*)&ROMSeed0[8] = val; return; + case 0x040001B4: *(u32*)&ROMSeed1[8] = val; return; + + case 0x04000208: IME[1] = val & 0x1; return; + case 0x04000210: IE[1] = val; return; + case 0x04000214: IF[1] &= ~val; return; + } + + if (addr >= 0x04000400 && addr < 0x04000520) + { + // sound I/O + return; + } + + printf("unknown ARM7 IO write32 %08X %08X\n", addr, val); +} + +} diff --git a/src/NDS.h b/src/NDS.h new file mode 100644 index 0000000..ed706af --- /dev/null +++ b/src/NDS.h @@ -0,0 +1,181 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef NDS_H +#define NDS_H + +#include "types.h" + +namespace NDS +{ + +/*#define SCHED_BUF_LEN 64 + +typedef struct _SchedEvent +{ + u32 Delay; + void (*Func)(u32); + u32 Param; + struct _SchedEvent* PrevEvent; + struct _SchedEvent* NextEvent; + +} SchedEvent;*/ + +enum +{ + Event_LCD = 0, + + Event_Timer9_0, + Event_Timer9_1, + Event_Timer9_2, + Event_Timer9_3, + Event_Timer7_0, + Event_Timer7_1, + Event_Timer7_2, + Event_Timer7_3, + + Event_MAX +}; + +typedef struct +{ + void (*Func)(u32 param); + s32 WaitCycles; + u32 Param; + +} SchedEvent; + +enum +{ + IRQ_VBlank = 0, + IRQ_HBlank, + IRQ_VCount, + IRQ_Timer0, + IRQ_Timer1, + IRQ_Timer2, + IRQ_Timer3, + IRQ_RTC, + IRQ_DMA0, + IRQ_DMA1, + IRQ_DMA2, + IRQ_DMA3, + IRQ_Keypad, + IRQ_GBASlot, + IRQ_Unused14, + IRQ_Unused15, + IRQ_IPCSync, + IRQ_IPCSendDone, + IRQ_IPCRecv, + IRQ_CartSendDone, + IRQ_CartIREQMC, + IRQ_GXFIFO, + IRQ_LidOpen, + IRQ_SPI, + IRQ_Wifi +}; + +typedef struct +{ + u16 Reload; + u16 Cnt; + u32 Counter; + u32 CycleShift; + //SchedEvent* Event; + +} Timer; + +// hax +extern u32 IME[2]; +extern u32 IE[2]; +extern u32 IF[2]; +extern Timer Timers[8]; + +extern u16 ExMemCnt[2]; +extern u8 ROMSeed0[2*8]; +extern u8 ROMSeed1[2*8]; + +extern u8 ARM9BIOS[0x1000]; +extern u8 ARM7BIOS[0x4000]; + +bool Init(); +void DeInit(); +void Reset(); + +void SetupDirectBoot(); + +void RunFrame(); + +void PressKey(u32 key); +void ReleaseKey(u32 key); +void TouchScreen(u16 x, u16 y); +void ReleaseScreen(); + +/*SchedEvent* ScheduleEvent(s32 Delay, void (*Func)(u32), u32 Param); +void CancelEvent(SchedEvent* event); +void RunEvents(s32 cycles);*/ +void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 param); +void CancelEvent(u32 id); + +// DO NOT CALL FROM ARM7!! +void CompensateARM7(); + +void debug(u32 p); + +void Halt(); + +void MapSharedWRAM(u8 val); + +void SetIRQ(u32 cpu, u32 irq); +void ClearIRQ(u32 cpu, u32 irq); +bool HaltInterrupted(u32 cpu); +void StopCPU(u32 cpu, u32 mask); +void ResumeCPU(u32 cpu, u32 mask); + +void CheckDMAs(u32 cpu, u32 mode); + +u8 ARM9Read8(u32 addr); +u16 ARM9Read16(u32 addr); +u32 ARM9Read32(u32 addr); +void ARM9Write8(u32 addr, u8 val); +void ARM9Write16(u32 addr, u16 val); +void ARM9Write32(u32 addr, u32 val); + +u8 ARM7Read8(u32 addr); +u16 ARM7Read16(u32 addr); +u32 ARM7Read32(u32 addr); +void ARM7Write8(u32 addr, u8 val); +void ARM7Write16(u32 addr, u16 val); +void ARM7Write32(u32 addr, u32 val); + +u8 ARM9IORead8(u32 addr); +u16 ARM9IORead16(u32 addr); +u32 ARM9IORead32(u32 addr); +void ARM9IOWrite8(u32 addr, u8 val); +void ARM9IOWrite16(u32 addr, u16 val); +void ARM9IOWrite32(u32 addr, u32 val); + +u8 ARM7IORead8(u32 addr); +u16 ARM7IORead16(u32 addr); +u32 ARM7IORead32(u32 addr); +void ARM7IOWrite8(u32 addr, u8 val); +void ARM7IOWrite16(u32 addr, u16 val); +void ARM7IOWrite32(u32 addr, u32 val); + +} + +#endif // NDS_H diff --git a/src/NDSCart.cpp b/src/NDSCart.cpp new file mode 100644 index 0000000..416da26 --- /dev/null +++ b/src/NDSCart.cpp @@ -0,0 +1,939 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "NDSCart.h" + + +namespace NDSCart_SRAM +{ + +u8* SRAM; +u32 SRAMLength; + +char SRAMPath[256]; + +void (*WriteFunc)(u8 val, bool islast); + +u32 Discover_MemoryType; +u32 Discover_Likeliness; +u8* Discover_Buffer; +u32 Discover_DataPos; + +u32 Hold; +u8 CurCmd; +u32 DataPos; +u8 Data; + +u8 StatusReg; +u32 Addr; + + +void Write_Null(u8 val, bool islast); +void Write_EEPROMTiny(u8 val, bool islast); +void Write_EEPROM(u8 val, bool islast); +void Write_Flash(u8 val, bool islast); +void Write_Discover(u8 val, bool islast); + + +bool Init() +{ + SRAM = NULL; + Discover_Buffer = NULL; + return true; +} + +void DeInit() +{ + if (SRAM) delete[] SRAM; + if (Discover_Buffer) delete[] Discover_Buffer; +} + +void Reset() +{ +} + +void LoadSave(char* path) +{ + if (SRAM) delete[] SRAM; + if (Discover_Buffer) delete[] Discover_Buffer; + + Discover_Buffer = NULL; + + strncpy(SRAMPath, path, 255); + SRAMPath[255] = '\0'; + + FILE* f = fopen(path, "rb"); + if (f) + { + fseek(f, 0, SEEK_END); + SRAMLength = (u32)ftell(f); + SRAM = new u8[SRAMLength]; + + fseek(f, 0, SEEK_SET); + fread(SRAM, SRAMLength, 1, f); + + fclose(f); + + switch (SRAMLength) + { + case 512: WriteFunc = Write_EEPROMTiny; break; + case 8192: + case 65536: WriteFunc = Write_EEPROM; break; + case 256*1024: + case 512*1024: + case 1024*1024: + case 8192*1024: WriteFunc = Write_Flash; break; + default: + printf("!! BAD SAVE LENGTH %d\n", SRAMLength); + WriteFunc = Write_Null; + break; + } + } + else + { + SRAMLength = 0; + WriteFunc = Write_Discover; + Discover_MemoryType = 2; + Discover_Likeliness = 0; + + Discover_DataPos = 0; + Discover_Buffer = new u8[256*1024]; + memset(Discover_Buffer, 0, 256*1024); + } + + Hold = 0; + CurCmd = 0; + Data = 0; + StatusReg = 0x00; +} + +u8 Read() +{ + return Data; +} + +void SetMemoryType() +{ + switch (Discover_MemoryType) + { + case 1: + printf("Save memory type: EEPROM 4k\n"); + WriteFunc = Write_EEPROMTiny; + SRAMLength = 512; + break; + + case 2: + printf("Save memory type: EEPROM 64k\n"); + WriteFunc = Write_EEPROM; + SRAMLength = 8192; + break; + + case 3: + printf("Save memory type: EEPROM 512k\n"); + WriteFunc = Write_EEPROM; + SRAMLength = 65536; + break; + + case 4: + printf("Save memory type: Flash. Hope the size is 256K.\n"); + WriteFunc = Write_Flash; + SRAMLength = 256*1024; + break; + + case 5: + printf("Save memory type: ...something else\n"); + WriteFunc = Write_Null; + SRAMLength = 0; + break; + } + + if (!SRAMLength) + return; + + SRAM = new u8[SRAMLength]; + + // replay writes that occured during discovery + u8 prev_cmd = CurCmd; + u32 pos = 0; + while (pos < 256*1024) + { + u32 len = *(u32*)&Discover_Buffer[pos]; + pos += 4; + if (len == 0) break; + + CurCmd = Discover_Buffer[pos++]; + DataPos = 0; + Addr = 0; + Data = 0; + for (u32 i = 1; i < len; i++) + { + WriteFunc(Discover_Buffer[pos++], (i==(len-1))); + DataPos++; + } + } + + CurCmd = prev_cmd; + + delete[] Discover_Buffer; +} + +void Write_Discover(u8 val, bool islast) +{ + // attempt at autodetecting the type of save memory. + // we basically hope the game will be nice and clear whole pages of memory. + + if (CurCmd == 0x03 || CurCmd == 0x0B) + { + if (Discover_Likeliness) + { + // apply. and pray. + SetMemoryType(); + + DataPos = 0; + Addr = 0; + Data = 0; + return WriteFunc(val, islast); + } + else + { + Data = 0; + return; + } + } + + if (CurCmd == 0x02 || CurCmd == 0x0A) + { + if (DataPos == 0) + Discover_Buffer[Discover_DataPos + 4] = CurCmd; + + Discover_Buffer[Discover_DataPos + 5 + DataPos] = val; + + if (islast) + { + u32 len = DataPos+1; + + *(u32*)&Discover_Buffer[Discover_DataPos] = len+1; + Discover_DataPos += 5+len; + + if (Discover_Likeliness <= len) + { + Discover_Likeliness = len; + + if (len > 3+256) // bigger Flash, FRAM, whatever + { + Discover_MemoryType = 5; + } + else if (len > 2+128) // Flash + { + Discover_MemoryType = 4; + } + else if (len > 2+32) // EEPROM 512k + { + Discover_MemoryType = 3; + } + else if (len > 1+16 || (len != 1+16 && CurCmd != 0x0A)) // EEPROM 64k + { + Discover_MemoryType = 2; + } + else // EEPROM 4k + { + Discover_MemoryType = 1; + } + } + + printf("discover: type=%d likeliness=%d\n", Discover_MemoryType, Discover_Likeliness); + } + } +} + +void Write_Null(u8 val, bool islast) {} + +void Write_EEPROMTiny(u8 val, bool islast) +{ + // TODO +} + +void Write_EEPROM(u8 val, bool islast) +{ + switch (CurCmd) + { + case 0x02: + if (DataPos < 2) + { + Addr <<= 8; + Addr |= val; + Data = 0; + } + else + { + SRAM[Addr & (SRAMLength-1)] = val; + Addr++; + } + break; + + case 0x03: + if (DataPos < 2) + { + Addr <<= 8; + Addr |= val; + Data = 0; + } + else + { + Data = SRAM[Addr & (SRAMLength-1)]; + Addr++; + } + break; + + case 0x9F: + Data = 0xFF; + break; + + default: + if (DataPos==0) + printf("unknown EEPROM save command %02X\n", CurCmd); + break; + } +} + +void Write_Flash(u8 val, bool islast) +{ + switch (CurCmd) + { + case 0x03: + if (DataPos < 3) + { + Addr <<= 8; + Addr |= val; + Data = 0; + } + else + { + // CHECKME: does Flash also wraparound when the address is out of bounds? + if (Addr >= SRAMLength) + Data = 0; + else + Data = SRAM[Addr]; + + Addr++; + } + break; + + case 0x0A: + if (DataPos < 3) + { + Addr <<= 8; + Addr |= val; + Data = 0; + } + else + { + if (Addr < SRAMLength) + SRAM[Addr] = val; + + Addr++; + } + break; + + case 0x9F: + Data = 0xFF; + break; + + default: + if (DataPos==0) + printf("unknown Flash save command %02X\n", CurCmd); + break; + } +} + +void Write(u8 val, u32 hold) +{ + bool islast = false; + + if (!hold) + { + if (Hold) islast = true; + Hold = 0; + } + + if (hold && (!Hold)) + { + CurCmd = val; + Hold = 1; + Data = 0; + DataPos = 0; + Addr = 0; + //printf("save SPI command %02X\n", CurCmd); + return; + } + + switch (CurCmd) + { + case 0x02: + case 0x03: + case 0x0A: + case 0x0B: + case 0x9F: + WriteFunc(val, islast); + DataPos++; + break; + + case 0x04: // write disable + StatusReg &= ~(1<<1); + Data = 0; + break; + + case 0x05: // read status reg + Data = StatusReg; + break; + + case 0x06: // write enable + StatusReg |= (1<<1); + Data = 0; + break; + + default: + if (DataPos==0) + printf("unknown save SPI command %02X\n", CurCmd); + break; + } + + if (islast && (CurCmd == 0x02 || CurCmd == 0x0A)) + { + FILE* f = fopen(SRAMPath, "wb"); + if (f) + { + fwrite(SRAM, SRAMLength, 1, f); + fclose(f); + } + } +} + +} + + +namespace NDSCart +{ + +u16 SPICnt; +u32 ROMCnt; + +u8 ROMCommand[8]; +u32 ROMDataOut; + +u8 DataOut[0x4000]; +u32 DataOutPos; +u32 DataOutLen; + +bool CartInserted; +u8* CartROM; +u32 CartROMSize; +u32 CartID; +bool CartIsHomebrew; + +u32 CmdEncMode; +u32 DataEncMode; + +u32 Key1_KeyBuf[0x412]; + +u64 Key2_X; +u64 Key2_Y; + + +u32 ByteSwap(u32 val) +{ + return (val >> 24) | ((val >> 8) & 0xFF00) | ((val << 8) & 0xFF0000) | (val << 24); +} + +void Key1_Encrypt(u32* data) +{ + u32 y = data[0]; + u32 x = data[1]; + u32 z; + + for (u32 i = 0x0; i <= 0xF; i++) + { + z = Key1_KeyBuf[i] ^ x; + x = Key1_KeyBuf[0x012 + (z >> 24) ]; + x += Key1_KeyBuf[0x112 + ((z >> 16) & 0xFF)]; + x ^= Key1_KeyBuf[0x212 + ((z >> 8) & 0xFF)]; + x += Key1_KeyBuf[0x312 + (z & 0xFF)]; + x ^= y; + y = z; + } + + data[0] = x ^ Key1_KeyBuf[0x10]; + data[1] = y ^ Key1_KeyBuf[0x11]; +} + +void Key1_Decrypt(u32* data) +{ + u32 y = data[0]; + u32 x = data[1]; + u32 z; + + for (u32 i = 0x11; i >= 0x2; i--) + { + z = Key1_KeyBuf[i] ^ x; + x = Key1_KeyBuf[0x012 + (z >> 24) ]; + x += Key1_KeyBuf[0x112 + ((z >> 16) & 0xFF)]; + x ^= Key1_KeyBuf[0x212 + ((z >> 8) & 0xFF)]; + x += Key1_KeyBuf[0x312 + (z & 0xFF)]; + x ^= y; + y = z; + } + + data[0] = x ^ Key1_KeyBuf[0x1]; + data[1] = y ^ Key1_KeyBuf[0x0]; +} + +void Key1_ApplyKeycode(u32* keycode, u32 mod) +{ + Key1_Encrypt(&keycode[1]); + Key1_Encrypt(&keycode[0]); + + u32 temp[2] = {0,0}; + + for (u32 i = 0; i <= 0x11; i++) + { + Key1_KeyBuf[i] ^= ByteSwap(keycode[i % mod]); + } + for (u32 i = 0; i <= 0x410; i+=2) + { + Key1_Encrypt(temp); + Key1_KeyBuf[i ] = temp[1]; + Key1_KeyBuf[i+1] = temp[0]; + } +} + +void Key1_InitKeycode(u32 idcode, u32 level, u32 mod) +{ + memcpy(Key1_KeyBuf, &NDS::ARM7BIOS[0x30], 0x1048); // hax + + u32 keycode[3] = {idcode, idcode>>1, idcode<<1}; + if (level >= 1) Key1_ApplyKeycode(keycode, mod); + if (level >= 2) Key1_ApplyKeycode(keycode, mod); + if (level >= 3) + { + keycode[1] <<= 1; + keycode[2] >>= 1; + Key1_ApplyKeycode(keycode, mod); + } +} + + +void Key2_Encrypt(u8* data, u32 len) +{ + for (u32 i = 0; i < len; i++) + { + Key2_X = (((Key2_X >> 5) ^ + (Key2_X >> 17) ^ + (Key2_X >> 18) ^ + (Key2_X >> 31)) & 0xFF) + + (Key2_X << 8); + Key2_Y = (((Key2_Y >> 5) ^ + (Key2_Y >> 23) ^ + (Key2_Y >> 18) ^ + (Key2_Y >> 31)) & 0xFF) + + (Key2_Y << 8); + + Key2_X &= 0x0000007FFFFFFFFFULL; + Key2_Y &= 0x0000007FFFFFFFFFULL; + } +} + + +bool Init() +{ + if (!NDSCart_SRAM::Init()) return false; + + return true; +} + +void DeInit() +{ + NDSCart_SRAM::DeInit(); +} + +void Reset() +{ + SPICnt = 0; + ROMCnt = 0; + + memset(ROMCommand, 0, 8); + ROMDataOut = 0; + + Key2_X = 0; + Key2_Y = 0; + + memset(DataOut, 0, 0x4000); + DataOutPos = 0; + DataOutLen = 0; + + CartInserted = false; + CartROM = NULL; + CartROMSize = 0; + CartID = 0; + CartIsHomebrew = false; + + CmdEncMode = 0; + DataEncMode = 0; + + NDSCart_SRAM::Reset(); +} + + +bool LoadROM(char* path) +{ + // TODO: streaming mode? for really big ROMs or systems with limited RAM + // for now we're lazy + + FILE* f = fopen(path, "rb"); + if (!f) + { + printf("Failed to open ROM file %s\n", path); + return false; + } + + fseek(f, 0, SEEK_END); + u32 len = (u32)ftell(f); + + CartROMSize = 0x200; + while (CartROMSize < len) + CartROMSize <<= 1; + + u32 gamecode; + fseek(f, 0x0C, SEEK_SET); + fread(&gamecode, 4, 1, f); + + CartROM = new u8[CartROMSize]; + memset(CartROM, 0, CartROMSize); + fseek(f, 0, SEEK_SET); + fread(CartROM, 1, len, f); + + fclose(f); + //CartROM = f; + + // temp. TODO: later make this user selectable + // calling this sets up shit for booting from the cart directly. + // normal behavior is booting from the BIOS. + NDS::SetupDirectBoot(); + + CartInserted = true; + + // generate a ROM ID + // note: most games don't check the actual value + // it just has to stay the same throughout gameplay + CartID = 0x00001FC2; + + u32 arm9base = *(u32*)&CartROM[0x20]; + if (arm9base < 0x8000) + { + if (arm9base >= 0x4000) + { + // reencrypt secure area if needed + if (*(u32*)&CartROM[arm9base] == 0xE7FFDEFF) + { + printf("Re-encrypting cart secure area\n"); + + strncpy((char*)&CartROM[arm9base], "encryObj", 8); + + Key1_InitKeycode(gamecode, 3, 2); + for (u32 i = 0; i < 0x800; i += 8) + Key1_Encrypt((u32*)&CartROM[arm9base + i]); + + Key1_InitKeycode(gamecode, 2, 2); + Key1_Encrypt((u32*)&CartROM[arm9base]); + } + } + else + CartIsHomebrew = true; + } + + // encryption + Key1_InitKeycode(gamecode, 2, 2); + + + // save + char savepath[256]; + strncpy(savepath, path, 255); + savepath[255] = '\0'; + strncpy(savepath + strlen(path) - 3, "sav", 3); + printf("Save file: %s\n", savepath); + NDSCart_SRAM::LoadSave(savepath); + + return true; +} + +void ReadROM(u32 addr, u32 len, u32 offset) +{ + if (!CartInserted) return; + + if (addr >= CartROMSize) return; + if ((addr+len) > CartROMSize) + len = CartROMSize - addr; + + memcpy(DataOut+offset, CartROM+addr, len); +} + +void ReadROM_B7(u32 addr, u32 len, u32 offset) +{ + addr &= (CartROMSize-1); + if (!CartIsHomebrew) + { + if (addr < 0x8000) + addr = 0x8000 + (addr & 0x1FF); + } + + memcpy(DataOut+offset, CartROM+addr, len); +} + + +void EndTransfer() +{ + ROMCnt &= ~(1<<23); + ROMCnt &= ~(1<<31); + + if (SPICnt & (1<<14)) + NDS::SetIRQ((NDS::ExMemCnt[0]>>11)&0x1, NDS::IRQ_CartSendDone); +} + +void ROMPrepareData(u32 param) +{ + if (DataOutPos >= DataOutLen) + ROMDataOut = 0; + else + ROMDataOut = *(u32*)&DataOut[DataOutPos]; + + DataOutPos += 4; + + ROMCnt |= (1<<23); + NDS::CheckDMAs(0, 0x06); + NDS::CheckDMAs(1, 0x12); + + //if (DataOutPos < DataOutLen) + // NDS::ScheduleEvent((ROMCnt & (1<<27)) ? 8:5, ROMPrepareData, 0); +} + +void WriteROMCnt(u32 val) +{ + ROMCnt = val & 0xFF7F7FFF; + + if (!(SPICnt & (1<<15))) return; + + if (val & (1<<15)) + { + u32 snum = (NDS::ExMemCnt[0]>>8)&0x8; + u64 seed0 = *(u32*)&NDS::ROMSeed0[snum] | ((u64)NDS::ROMSeed0[snum+4] << 32); + u64 seed1 = *(u32*)&NDS::ROMSeed1[snum] | ((u64)NDS::ROMSeed1[snum+4] << 32); + + Key2_X = 0; + Key2_Y = 0; + for (u32 i = 0; i < 39; i++) + { + if (seed0 & (1ULL << i)) Key2_X |= (1ULL << (38-i)); + if (seed1 & (1ULL << i)) Key2_Y |= (1ULL << (38-i)); + } + + printf("seed0: %02X%08X\n", (u32)(seed0>>32), (u32)seed0); + printf("seed1: %02X%08X\n", (u32)(seed1>>32), (u32)seed1); + printf("key2 X: %02X%08X\n", (u32)(Key2_X>>32), (u32)Key2_X); + printf("key2 Y: %02X%08X\n", (u32)(Key2_Y>>32), (u32)Key2_Y); + } + + if (!(ROMCnt & (1<<31))) return; + + u32 datasize = (ROMCnt >> 24) & 0x7; + if (datasize == 7) + datasize = 4; + else if (datasize > 0) + datasize = 0x100 << datasize; + + DataOutPos = 0; + DataOutLen = datasize; + + // handle KEY1 encryption as needed. + // KEY2 encryption is implemented in hardware and doesn't need to be handled. + u8 cmd[8]; + if (CmdEncMode == 1) + { + *(u32*)&cmd[0] = ByteSwap(*(u32*)&ROMCommand[4]); + *(u32*)&cmd[4] = ByteSwap(*(u32*)&ROMCommand[0]); + Key1_Decrypt((u32*)cmd); + u32 tmp = ByteSwap(*(u32*)&cmd[4]); + *(u32*)&cmd[4] = ByteSwap(*(u32*)&cmd[0]); + *(u32*)&cmd[0] = tmp; + } + else + { + *(u32*)&cmd[0] = *(u32*)&ROMCommand[0]; + *(u32*)&cmd[4] = *(u32*)&ROMCommand[4]; + } + + /*printf("ROM COMMAND %04X %08X %02X%02X%02X%02X%02X%02X%02X%02X SIZE %04X\n", + SPICnt, ROMCnt, + cmd[0], cmd[1], cmd[2], cmd[3], + cmd[4], cmd[5], cmd[6], cmd[7], + datasize);*/ + + switch (cmd[0]) + { + case 0x9F: + memset(DataOut, 0xFF, DataOutLen); + break; + + case 0x00: + memset(DataOut, 0, DataOutLen); + if (DataOutLen > 0x1000) + { + ReadROM(0, 0x1000, 0); + for (u32 pos = 0x1000; pos < DataOutLen; pos += 0x1000) + memcpy(DataOut+pos, DataOut, 0x1000); + } + else + ReadROM(0, DataOutLen, 0); + break; + + case 0x90: + case 0xB8: + for (u32 pos = 0; pos < DataOutLen; pos += 4) + *(u32*)&DataOut[pos] = CartID; + break; + + case 0x3C: + CmdEncMode = 1; + break; + + case 0xB7: + { + u32 addr = (cmd[1]<<24) | (cmd[2]<<16) | (cmd[3]<<8) | cmd[4]; + memset(DataOut, 0, DataOutLen); + + if (((addr + DataOutLen - 1) >> 12) != (addr >> 12)) + { + u32 len1 = 0x1000 - (addr & 0xFFF); + ReadROM_B7(addr, len1, 0); + ReadROM_B7(addr+len1, DataOutLen-len1, len1); + } + else + ReadROM_B7(addr, DataOutLen, 0); + } + break; + + default: + switch (cmd[0] & 0xF0) + { + case 0x40: + DataEncMode = 2; + break; + + case 0x10: + for (u32 pos = 0; pos < DataOutLen; pos += 4) + *(u32*)&DataOut[pos] = CartID; + break; + + case 0x20: + { + u32 addr = (cmd[2] & 0xF0) << 8; + ReadROM(addr, 0x1000, 0); + } + break; + + case 0xA0: + CmdEncMode = 2; + break; + } + break; + } + + //ROMCnt &= ~(1<<23); + ROMCnt |= (1<<23); + + if (datasize == 0) + EndTransfer(); + else + { + NDS::CheckDMAs(0, 0x05); + NDS::CheckDMAs(1, 0x12); + } + //NDS::ScheduleEvent((ROMCnt & (1<<27)) ? 8:5, ROMPrepareData, 0); +} + +u32 ReadROMData() +{ + /*if (ROMCnt & (1<<23)) + { + ROMCnt &= ~(1<<23); + if (DataOutPos >= DataOutLen) + EndTransfer(); + } + + return ROMDataOut;*/ + u32 ret; + if (DataOutPos >= DataOutLen) + ret = 0; + else + ret = *(u32*)&DataOut[DataOutPos]; + + DataOutPos += 4; + + if (DataOutPos == DataOutLen) + EndTransfer(); + + return ret; +} + +void DMA(u32 addr) +{ + void (*writefn)(u32,u32) = (NDS::ExMemCnt[0] & (1<<11)) ? NDS::ARM7Write32 : NDS::ARM9Write32; + for (u32 i = 0; i < DataOutLen; i+=4) + { + writefn(addr+i, *(u32*)&DataOut[i]); + } + + EndTransfer(); +} + + +void WriteSPICnt(u16 val) +{ + SPICnt = (SPICnt & 0x0080) | (val & 0xE043); +} + +u8 ReadSPIData() +{ + if (!(SPICnt & (1<<15))) return 0; + if (!(SPICnt & (1<<13))) return 0; + + return NDSCart_SRAM::Read(); +} + +void WriteSPIData(u8 val) +{ + if (!(SPICnt & (1<<15))) return; + if (!(SPICnt & (1<<13))) return; + + // TODO: take delays into account + + NDSCart_SRAM::Write(val, SPICnt&(1<<6)); +} + +} diff --git a/src/NDSCart.h b/src/NDSCart.h new file mode 100644 index 0000000..61dd11a --- /dev/null +++ b/src/NDSCart.h @@ -0,0 +1,55 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef NDSCART_H +#define NDSCART_H + +#include "types.h" + +namespace NDSCart +{ + +extern u16 SPICnt; +extern u32 ROMCnt; + +extern u8 ROMCommand[8]; +extern u32 ROMDataOut; + +extern u8 EncSeed0[5]; +extern u8 EncSeed1[5]; + +extern u8* CartROM; +extern u32 CartROMSize; + +bool Init(); +void DeInit(); +void Reset(); + +bool LoadROM(char* path); + +void WriteROMCnt(u32 val); +u32 ReadROMData(); +void DMA(u32 addr); + +void WriteSPICnt(u16 val); +u8 ReadSPIData(); +void WriteSPIData(u8 val); + +} + +#endif diff --git a/src/RTC.cpp b/src/RTC.cpp new file mode 100644 index 0000000..842fdae --- /dev/null +++ b/src/RTC.cpp @@ -0,0 +1,255 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "RTC.h" + + +namespace RTC +{ + +u16 IO; + +u8 Input; +u32 InputBit; +u32 InputPos; + +u8 Output[8]; +u32 OutputBit; +u32 OutputPos; + +u8 CurCmd; + +u8 StatusReg1; +u8 StatusReg2; +u8 Alarm1[3]; +u8 Alarm2[3]; +u8 ClockAdjust; +u8 FreeReg; + + +bool Init() +{ + return true; +} + +void DeInit() +{ +} + +void Reset() +{ + Input = 0; + InputBit = 0; + InputPos = 0; + + memset(Output, 0, sizeof(Output)); + OutputPos = 0; + + CurCmd = 0; + + StatusReg1 = 0; + StatusReg2 = 0; + memset(Alarm1, 0, sizeof(Alarm1)); + memset(Alarm2, 0, sizeof(Alarm2)); + ClockAdjust = 0; + FreeReg = 0; +} + + +void ByteIn(u8 val) +{ + //printf("RTC IN: %02X\n", val); + if (InputPos == 0) + { + if ((val & 0xF0) == 0x60) + { + u8 rev[16] = {0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6}; + CurCmd = rev[val & 0xF]; + } + else + CurCmd = val; + + if (CurCmd & 0x80) + { + switch (CurCmd & 0x70) + { + case 0x00: Output[0] = StatusReg1; break; + case 0x40: Output[0] = StatusReg2; break; + + case 0x20: + // TODO: get actual system time + Output[0] = 0x17; + Output[1] = 0x01; + Output[2] = 0x19; + Output[3] = 0x04; // day of week. checkme. apparently 04=Thursday + Output[4] = 0x06; + Output[5] = 0x30; + Output[6] = 0x30; + break; + + case 0x60: + // TODO: get actual system time + Output[0] = 0x06; + Output[1] = 0x30; + Output[2] = 0x30; + break; + + case 0x10: + if (StatusReg2 & 0x04) + { + Output[0] = Alarm1[0]; + Output[1] = Alarm1[1]; + Output[2] = Alarm1[2]; + } + else + Output[0] = Alarm1[2]; + break; + + case 0x50: + Output[0] = Alarm2[0]; + Output[1] = Alarm2[1]; + Output[2] = Alarm2[2]; + break; + + case 0x30: Output[0] = ClockAdjust; break; + case 0x70: Output[0] = FreeReg; break; + } + } + InputPos++; + return; + } + + switch (CurCmd & 0x70) + { + case 0x00: + if (InputPos == 1) StatusReg1 = val & 0x0E; + break; + + case 0x40: + if (InputPos == 1) StatusReg2 = val; + if (StatusReg2 & 0x4F) printf("RTC INTERRUPT ON: %02X\n", StatusReg2); + break; + + case 0x20: + // TODO: set time somehow?? + break; + + case 0x60: + // same shit + break; + + case 0x10: + if (StatusReg2 & 0x04) + { + if (InputPos <= 3) Alarm1[InputPos-1] = val; + } + else + { + if (InputPos == 1) Alarm1[2] = val; + } + break; + + case 0x50: + if (InputPos <= 3) Alarm2[InputPos-1] = val; + break; + + case 0x30: + if (InputPos == 1) ClockAdjust = val; + break; + + case 0x70: + if (InputPos == 1) FreeReg = val; + break; + } + + InputPos++; +} + + +u16 Read() +{ + //printf("RTC READ %04X\n", IO); + return IO; +} + +void Write(u16 val, bool byte) +{ + if (byte) val |= (IO & 0xFF00); + + //printf("RTC WRITE %04X\n", val); + if (val & 0x0004) + { + if (!(IO & 0x0004)) + { + // start transfer + Input = 0; + InputBit = 0; + InputPos = 0; + + memset(Output, 0, sizeof(Output)); + OutputBit = 0; + OutputPos = 0; + } + else + { + if (!(val & 0x0002)) // clock low + { + if (val & 0x0010) + { + // write + if (val & 0x0001) + Input |= (1<<InputBit); + + InputBit++; + if (InputBit >= 8) + { + InputBit = 0; + ByteIn(Input); + Input = 0; + InputPos++; + } + } + else + { + // read + if (Output[OutputPos] & (1<<OutputBit)) + IO |= 0x0001; + else + IO &= 0xFFFE; + + OutputBit++; + if (OutputBit >= 8) + { + OutputBit = 0; + if (OutputPos < 7) + OutputPos++; + } + } + } + } + } + + if (val & 0x0010) + IO = val; + else + IO = (IO & 0x0001) | (val & 0xFFFE); +} + +} diff --git a/src/RTC.h b/src/RTC.h new file mode 100644 index 0000000..6ada5c1 --- /dev/null +++ b/src/RTC.h @@ -0,0 +1,36 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef RTC_H +#define RTC_H + +#include "types.h" + +namespace RTC +{ + +bool Init(); +void DeInit(); +void Reset(); + +u16 Read(); +void Write(u16 val, bool byte); + +} + +#endif diff --git a/src/SPI.cpp b/src/SPI.cpp new file mode 100644 index 0000000..13ab2ab --- /dev/null +++ b/src/SPI.cpp @@ -0,0 +1,457 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "SPI.h" + + +namespace SPI_Firmware +{ + +u8* Firmware; +u32 FirmwareLength; + +u32 Hold; +u8 CurCmd; +u32 DataPos; +u8 Data; + +u8 StatusReg; +u32 Addr; + + +u16 CRC16(u8* data, u32 len, u32 start) +{ + u16 blarg[8] = {0xC0C1, 0xC181, 0xC301, 0xC601, 0xCC01, 0xD801, 0xF001, 0xA001}; + + for (u32 i = 0; i < len; i++) + { + start ^= data[i]; + + for (int j = 0; j < 8; j++) + { + if (start & 0x1) + { + start >>= 1; + start ^= (blarg[j] << (7-j)); + } + else + start >>= 1; + } + } + + return start & 0xFFFF; +} + +bool VerifyCRC16(u32 start, u32 offset, u32 len, u32 crcoffset) +{ + u16 crc_stored = *(u16*)&Firmware[crcoffset]; + u16 crc_calced = CRC16(&Firmware[offset], len, start); + //printf("%04X vs %04X\n", crc_stored, crc_calced); + return (crc_stored == crc_calced); +} + + +bool Init() +{ + Firmware = NULL; + return true; +} + +void DeInit() +{ + if (Firmware) delete[] Firmware; +} + +void Reset() +{ + if (Firmware) delete[] Firmware; + Firmware = NULL; + + FILE* f = fopen("firmware.bin", "rb"); + if (!f) + { + printf("firmware.bin not found\n"); + + // TODO: generate default firmware + return; + } + + fseek(f, 0, SEEK_END); + FirmwareLength = (u32)ftell(f); + Firmware = new u8[FirmwareLength]; + + fseek(f, 0, SEEK_SET); + fread(Firmware, FirmwareLength, 1, f); + + fclose(f); + + u32 userdata = 0x3FE00; + if (*(u16*)&Firmware[0x3FF70] == ((*(u16*)&Firmware[0x3FE70] + 1) & 0x7F)) + { + if (VerifyCRC16(0xFFFF, 0x3FF00, 0x70, 0x3FF72)) + userdata = 0x3FF00; + } + + // fix touchscreen coords + *(u16*)&Firmware[userdata+0x58] = 0; + *(u16*)&Firmware[userdata+0x5A] = 0; + Firmware[userdata+0x5C] = 1; + Firmware[userdata+0x5D] = 1; + *(u16*)&Firmware[userdata+0x5E] = 254<<4; + *(u16*)&Firmware[userdata+0x60] = 190<<4; + Firmware[userdata+0x62] = 255; + Firmware[userdata+0x63] = 191; + + // disable autoboot + //Firmware[userdata+0x64] &= 0xBF; + + *(u16*)&Firmware[userdata+0x72] = CRC16(&Firmware[userdata], 0x70, 0xFFFF); + + // verify shit + printf("FW: WIFI CRC16 = %s\n", VerifyCRC16(0x0000, 0x2C, *(u16*)&Firmware[0x2C], 0x2A)?"GOOD":"BAD"); + printf("FW: AP1 CRC16 = %s\n", VerifyCRC16(0x0000, 0x3FA00, 0xFE, 0x3FAFE)?"GOOD":"BAD"); + printf("FW: AP2 CRC16 = %s\n", VerifyCRC16(0x0000, 0x3FB00, 0xFE, 0x3FBFE)?"GOOD":"BAD"); + printf("FW: AP3 CRC16 = %s\n", VerifyCRC16(0x0000, 0x3FC00, 0xFE, 0x3FCFE)?"GOOD":"BAD"); + printf("FW: USER0 CRC16 = %s\n", VerifyCRC16(0xFFFF, 0x3FE00, 0x70, 0x3FE72)?"GOOD":"BAD"); + printf("FW: USER1 CRC16 = %s\n", VerifyCRC16(0xFFFF, 0x3FF00, 0x70, 0x3FF72)?"GOOD":"BAD"); + + Hold = 0; + CurCmd = 0; + Data = 0; + StatusReg = 0x00; +} + +u8 Read() +{ + return Data; +} + +void Write(u8 val, u32 hold) +{ + if (!hold) + { + Hold = 0; + } + + if (hold && (!Hold)) + { + CurCmd = val; + Hold = 1; + Data = 0; + DataPos = 1; + Addr = 0; + //printf("firmware SPI command %02X\n", CurCmd); + return; + } + + switch (CurCmd) + { + case 0x03: // read + { + if (DataPos < 4) + { + Addr <<= 8; + Addr |= val; + Data = 0; + + //if (DataPos == 3) printf("firmware SPI read %08X\n", Addr); + } + else + { + if (Addr >= FirmwareLength) + Data = 0; + else + Data = Firmware[Addr]; + + Addr++; + } + + DataPos++; + } + break; + + case 0x04: // write disable + StatusReg &= ~(1<<1); + Data = 0; + break; + + case 0x05: // read status reg + Data = StatusReg; + break; + + case 0x06: // write enable + StatusReg |= (1<<1); + Data = 0; + break; + + case 0x9F: // read JEDEC ID + { + switch (DataPos) + { + case 1: Data = 0x20; break; + case 2: Data = 0x40; break; + case 3: Data = 0x12; break; + default: Data = 0; break; + } + DataPos++; + } + break; + + default: + printf("unknown firmware SPI command %02X\n", CurCmd); + break; + } +} + +} + +namespace SPI_Powerman +{ + +u32 Hold; +u32 DataPos; +u8 Index; +u8 Data; + +u8 Registers[8]; +u8 RegMasks[8]; + + +bool Init() +{ + return true; +} + +void DeInit() +{ +} + +void Reset() +{ + Hold = 0; + Index = 0; + Data = 0; + + memset(Registers, 0, sizeof(Registers)); + memset(RegMasks, 0, sizeof(RegMasks)); + + Registers[4] = 0x40; + + RegMasks[0] = 0x7F; + RegMasks[1] = 0x01; + RegMasks[2] = 0x01; + RegMasks[3] = 0x03; + RegMasks[4] = 0x0F; +} + +u8 Read() +{ + return Data; +} + +void Write(u8 val, u32 hold) +{ + if (!hold) + { + Hold = 0; + } + + if (hold && (!Hold)) + { + Index = val; + Hold = 1; + Data = 0; + DataPos = 1; + return; + } + + if (DataPos == 1) + { + if (Index & 0x80) + { + Data = Registers[Index & 0x07]; + } + else + { + Registers[Index & 0x07] = + (Registers[Index & 0x07] & ~RegMasks[Index & 0x07]) | + (val & RegMasks[Index & 0x07]); + } + } + else + Data = 0; +} + +} + + +namespace SPI_TSC +{ + +u32 DataPos; +u8 ControlByte; +u8 Data; + +u16 ConvResult; + +u16 TouchX, TouchY; + + +bool Init() +{ + return true; +} + +void DeInit() +{ +} + +void Reset() +{ + ControlByte = 0; + Data = 0; + + ConvResult = 0; +} + +void SetTouchCoords(u16 x, u16 y) +{ + // scr.x = (adc.x-adc.x1) * (scr.x2-scr.x1) / (adc.x2-adc.x1) + (scr.x1-1) + // scr.y = (adc.y-adc.y1) * (scr.y2-scr.y1) / (adc.y2-adc.y1) + (scr.y1-1) + // adc.x = ((scr.x * ((adc.x2-adc.x1) + (scr.x1-1))) / (scr.x2-scr.x1)) + adc.x1 + // adc.y = ((scr.y * ((adc.y2-adc.y1) + (scr.y1-1))) / (scr.y2-scr.y1)) + adc.y1 + TouchX = x; + TouchY = y; + + if (y == 0xFFF) return; + + TouchX <<= 4; + TouchY <<= 4; +} + +u8 Read() +{ + return Data; +} + +void Write(u8 val, u32 hold) +{ + if (DataPos == 1) + Data = (ConvResult >> 5) & 0xFF; + else if (DataPos == 2) + Data = (ConvResult << 3) & 0xFF; + else + Data = 0; + + if (val & 0x80) + { + ControlByte = val; + DataPos = 1; + + switch (ControlByte & 0x70) + { + case 0x10: ConvResult = TouchY; break; + case 0x50: ConvResult = TouchX; break; + default: ConvResult = 0xFFF; break; + } + + if (ControlByte & 0x08) + ConvResult &= 0x0FF0; // checkme + } + else + DataPos++; +} + +} + + +namespace SPI +{ + +u16 Cnt; + +u32 CurDevice; + + +bool Init() +{ + if (!SPI_Firmware::Init()) return false; + if (!SPI_Powerman::Init()) return false; + if (!SPI_TSC::Init()) return false; + + return true; +} + +void DeInit() +{ + SPI_Firmware::DeInit(); + SPI_Powerman::DeInit(); + SPI_TSC::DeInit(); +} + +void Reset() +{ + Cnt = 0; + + SPI_Firmware::Reset(); + SPI_Powerman::Reset(); + SPI_TSC::Init(); +} + + +void WriteCnt(u16 val) +{ + Cnt = (Cnt & 0x0080) | (val & 0xCF03); + if (val & 0x0400) printf("!! CRAPOED 16BIT SPI MODE\n"); +} + +u8 ReadData() +{ + if (!(Cnt & (1<<15))) return 0; + + switch (Cnt & 0x0300) + { + case 0x0000: return SPI_Powerman::Read(); + case 0x0100: return SPI_Firmware::Read(); + case 0x0200: return SPI_TSC::Read(); + default: return 0; + } +} + +void WriteData(u8 val) +{ + if (!(Cnt & (1<<15))) return; + + // TODO: take delays into account + + switch (Cnt & 0x0300) + { + case 0x0000: SPI_Powerman::Write(val, Cnt&(1<<11)); break; + case 0x0100: SPI_Firmware::Write(val, Cnt&(1<<11)); break; + case 0x0200: SPI_TSC::Write(val, Cnt&(1<<11)); break; + default: printf("SPI to unknown device %04X %02X\n", Cnt, val); break; + } + + if (Cnt & (1<<14)) + NDS::SetIRQ(1, NDS::IRQ_SPI); +} + +} diff --git a/src/SPI.h b/src/SPI.h new file mode 100644 index 0000000..73a4180 --- /dev/null +++ b/src/SPI.h @@ -0,0 +1,46 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef SPI_H +#define SPI_H + +namespace SPI_TSC +{ + +void SetTouchCoords(u16 x, u16 y); + +} + +namespace SPI +{ + +extern u16 Cnt; + +bool Init(); +void DeInit(); +void Reset(); + +u16 ReadCnt(); +void WriteCnt(u16 val); + +u8 ReadData(); +void WriteData(u8 val); + +} + +#endif diff --git a/src/Wifi.cpp b/src/Wifi.cpp new file mode 100644 index 0000000..0f1c239 --- /dev/null +++ b/src/Wifi.cpp @@ -0,0 +1,120 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "Wifi.h" + + +namespace Wifi +{ + +u16 BBCnt; +u8 BBWrite; +u8 BBRegs[0x100]; +u8 BBRegsRO[0x100]; + + +void Reset() +{ + BBCnt = 0; + BBWrite = 0; + memset(BBRegs, 0, 0x100); + memset(BBRegsRO, 0, 0x100); + + #define BBREG_FIXED(id, val) BBRegs[id] = val; BBRegsRO[id] = 1; + BBREG_FIXED(0x00, 0x6D); + BBREG_FIXED(0x0D, 0x00); + BBREG_FIXED(0x0E, 0x00); + BBREG_FIXED(0x0F, 0x00); + BBREG_FIXED(0x10, 0x00); + BBREG_FIXED(0x11, 0x00); + BBREG_FIXED(0x12, 0x00); + BBREG_FIXED(0x16, 0x00); + BBREG_FIXED(0x17, 0x00); + BBREG_FIXED(0x18, 0x00); + BBREG_FIXED(0x19, 0x00); + BBREG_FIXED(0x1A, 0x00); + BBREG_FIXED(0x27, 0x00); + BBREG_FIXED(0x4D, 0x00); // 00 or BF + BBREG_FIXED(0x5D, 0x01); + BBREG_FIXED(0x5E, 0x00); + BBREG_FIXED(0x5F, 0x00); + BBREG_FIXED(0x60, 0x00); + BBREG_FIXED(0x61, 0x00); + BBREG_FIXED(0x64, 0xFF); // FF or 3F + BBREG_FIXED(0x66, 0x00); + for (int i = 0x69; i < 0x100; i++) + { + BBREG_FIXED(i, 0x00); + } + #undef BBREG_FIXED +} + + +u16 Read(u32 addr) +{ + addr &= 0x7FFF; + + switch (addr) + { + case 0x158: + return BBCnt; + + case 0x15C: + if ((BBCnt & 0xF000) != 0x6000) + { + printf("WIFI: bad BB read, CNT=%04X\n", BBCnt); + return 0; + } + return BBRegs[BBCnt & 0xFF]; + + case 0x15E: + return 0; // cheap + } + + printf("WIFI: unknown read %08X\n", addr); + return 0; +} + +void Write(u32 addr, u16 val) +{ + addr &= 0x7FFF; + + switch (addr) + { + case 0x158: + BBCnt = val; + if ((BBCnt & 0xF000) == 0x5000) + { + u32 regid = BBCnt & 0xFF; + if (!BBRegsRO[regid]) + BBRegs[regid] = val & 0xFF; + } + return; + + case 0x15A: + BBWrite = val; + return; + } + + printf("WIFI: unknown write %08X %04X\n", addr, val); +} + +} diff --git a/src/Wifi.h b/src/Wifi.h new file mode 100644 index 0000000..a1755ea --- /dev/null +++ b/src/Wifi.h @@ -0,0 +1,35 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef WIFI_H +#define WIFI_H + +namespace Wifi +{ + +// + + +void Reset(); + +u16 Read(u32 addr); +void Write(u32 addr, u16 val); + +} + +#endif diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..3e713da --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,272 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <windows.h> +#include "NDS.h" +#include "GPU.h" + + +#define VERSION "0.1" + + +HINSTANCE instance; +HWND melon; +BITMAPV4HEADER bmp; +bool quit; + +bool touching; + + +LRESULT CALLBACK derpo(HWND window, UINT msg, WPARAM wparam, LPARAM lparam) +{ + switch (msg) + { + case WM_CLOSE: + printf("close\n"); + { + FILE* f = fopen("debug/wram.bin", "wb"); + if (f) + { + for (u32 i = 0x37F8000; i < 0x3808000; i+=4) + { + u32 blarg = NDS::ARM7Read32(i); + fwrite(&blarg, 4, 1, f); + } + fclose(f); + } + f = fopen("debug/arm7vram.bin", "wb"); + if (f) + { + for (u32 i = 0x6000000; i < 0x6040000; i+=4) + { + u32 blarg = NDS::ARM7Read32(i); + fwrite(&blarg, 4, 1, f); + } + fclose(f); + } + f = fopen("debug/mainram.bin", "wb"); + if (f) + { + for (u32 i = 0x2000000; i < 0x2400000; i+=4) + { + u32 blarg = NDS::ARM9Read32(i); + fwrite(&blarg, 4, 1, f); + } + fclose(f); + } + } + PostQuitMessage(0); + return 0; + + case WM_KEYDOWN: + switch (wparam) + { + case VK_RETURN: NDS::PressKey(3); break; + case VK_SPACE: NDS::PressKey(2); break; + case VK_UP: NDS::PressKey(6); break; + case VK_DOWN: NDS::PressKey(7); break; + case VK_LEFT: NDS::PressKey(5); break; + case VK_RIGHT: NDS::PressKey(4); break; + case 'A': NDS::PressKey(0); break; + case 'B': NDS::PressKey(1); break; + case 'X': NDS::PressKey(16); break; + case 'Y': NDS::PressKey(17); break; + case 'L': NDS::PressKey(9); break; + case 'R': NDS::PressKey(8); break; + case 'D': NDS::debug(0); break; + } + return 0; + + case WM_KEYUP: + switch (wparam) + { + case VK_RETURN: NDS::ReleaseKey(3); break; + case VK_SPACE: NDS::ReleaseKey(2); break; + case VK_UP: NDS::ReleaseKey(6); break; + case VK_DOWN: NDS::ReleaseKey(7); break; + case VK_LEFT: NDS::ReleaseKey(5); break; + case VK_RIGHT: NDS::ReleaseKey(4); break; + case 'A': NDS::ReleaseKey(0); break; + case 'B': NDS::ReleaseKey(1); break; + case 'X': NDS::ReleaseKey(16); break; + case 'Y': NDS::ReleaseKey(17); break; + case 'L': NDS::ReleaseKey(9); break; + case 'R': NDS::ReleaseKey(8); break; + } + return 0; + + case WM_LBUTTONDOWN: + if (!touching) + { + s16 x = (s16)(lparam & 0xFFFF); + s16 y = (s16)(lparam >> 16); + + y -= 192; + if (x >= 0 && x < 256 && y >= 0 && y < 192) + { + NDS::TouchScreen(x, y); + NDS::PressKey(16+6); + touching = true; + } + } + return 0; + + case WM_LBUTTONUP: + case WM_NCLBUTTONUP: + if (touching) + { + NDS::ReleaseScreen(); + NDS::ReleaseKey(16+6); + touching = false; + } + return 0; + + case WM_MOUSEMOVE: + if (touching) + { + s16 x = (s16)(lparam & 0xFFFF); + s16 y = (s16)(lparam >> 16); + + y -= 192; + if (x >= 0 && x < 256 && y >= 0 && y < 192) + NDS::TouchScreen(x, y); + } + return 0; + + case WM_PAINT: + { + PAINTSTRUCT partisocialiste; + HDC dc = BeginPaint(window, &partisocialiste); + + SetDIBitsToDevice(dc, 0, 0, 256, 384, 0, 0, 0, 384, GPU::Framebuffer, (BITMAPINFO*)&bmp, DIB_RGB_COLORS); + + EndPaint(window, &partisocialiste); + } + return 0; + } + + return DefWindowProc(window, msg, wparam, lparam); +} + + +int main() +{ + printf("melonDS version uh... 0.1??\n"); + printf("it's a DS emulator!!!\n"); + printf("http://melonds.kuribo64.net/\n"); + quit = false; + touching = false; + + instance = GetModuleHandle(NULL); + + //SetThreadAffinityMask(GetCurrentThread(), 0x8); + + // god this shit sucks + WNDCLASSEX shit; + shit.cbSize = sizeof(shit); + shit.style = CS_HREDRAW | CS_VREDRAW; + shit.lpfnWndProc = derpo; + shit.cbClsExtra = 0; + shit.cbWndExtra = 0; + shit.hInstance = instance; + shit.hIcon = NULL; + shit.hIconSm = NULL; + shit.hCursor = NULL; + shit.hbrBackground = (HBRUSH)(COLOR_WINDOWFRAME+1); + shit.lpszMenuName = NULL; + shit.lpszClassName = "v0ltmeters"; + RegisterClassEx(&shit); + + RECT rekt; + rekt.left = 0; rekt.top = 0; + rekt.right = 256; rekt.bottom = 384; + AdjustWindowRect(&rekt, WS_OVERLAPPEDWINDOW, FALSE); + + melon = CreateWindow("v0ltmeters", + "melonDS " VERSION, + WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, CW_USEDEFAULT, + rekt.right-rekt.left, rekt.bottom-rekt.top, + NULL, + NULL, + instance, + NULL); + + ShowWindow(melon, SW_SHOW); + + // more sucky shit! + memset(&bmp, 0, sizeof(bmp)); + bmp.bV4Size = sizeof(bmp); + bmp.bV4Width = 256; + bmp.bV4Height = -384; + bmp.bV4Planes = 1; + bmp.bV4BitCount = 32; + bmp.bV4V4Compression = BI_RGB|BI_BITFIELDS; + bmp.bV4RedMask = 0x000000FF; + bmp.bV4GreenMask = 0x0000FF00; + bmp.bV4BlueMask = 0x00FF0000; + + NDS::Init(); + + u32 nframes = 0; + u32 lasttick = GetTickCount(); + + for (;;) + { + MSG msg; + while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) + { + if (msg.message == WM_QUIT) + { + quit = true; + break; + } + + TranslateMessage(&msg); + DispatchMessage(&msg); + } + if (quit) break; + + NDS::RunFrame(); + + //HDC dc = GetDC(melon); + //SetDIBitsToDevice(dc, 0, 0, 256, 384, 0, 0, 0, 384, GPU::Framebuffer, (BITMAPINFO*)&bmp, DIB_RGB_COLORS); + InvalidateRect(melon, NULL, false); + UpdateWindow(melon); + + nframes++; + if (nframes >= 30) + { + u32 tick = GetTickCount(); + u32 diff = tick - lasttick; + lasttick = tick; + + u32 fps = (nframes * 1000) / diff; + nframes = 0; + + char melontitle[100]; + sprintf(melontitle, "melonDS " VERSION " | %d FPS", fps); + SetWindowText(melon, melontitle); + } + } + printf("deinit\n"); + NDS::DeInit(); + + return 0; +} diff --git a/src/types.h b/src/types.h new file mode 100644 index 0000000..8a6c7e3 --- /dev/null +++ b/src/types.h @@ -0,0 +1,31 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef TYPES_H +#define TYPES_H + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long int u64; +typedef signed char s8; +typedef signed short s16; +typedef signed int s32; +typedef signed long long int s64; + +#endif // TYPES_H |