diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/DSi.cpp | 36 | ||||
-rw-r--r-- | src/DSi_AES.cpp | 134 | ||||
-rw-r--r-- | src/DSi_NDMA.cpp | 3 | ||||
-rw-r--r-- | src/DSi_SD.cpp | 23 | ||||
-rw-r--r-- | src/DSi_SD.h | 1 | ||||
-rw-r--r-- | src/SPU.cpp | 7 |
6 files changed, 180 insertions, 24 deletions
diff --git a/src/DSi.cpp b/src/DSi.cpp index 8f35d91..e601da9 100644 --- a/src/DSi.cpp +++ b/src/DSi.cpp @@ -44,6 +44,8 @@ namespace DSi u32 BootAddr[2]; +u16 SCFG_Clock9; +u16 SCFG_Clock7; u32 SCFG_MC; u32 MBK[2][9]; @@ -123,11 +125,19 @@ void Reset() SDMMC->Reset(); SDIO->Reset(); + SCFG_Clock9 = 0x0187; // CHECKME + SCFG_Clock7 = 0x0187; SCFG_MC = 0x0011; // LCD init flag GPU::DispStat[0] |= (1<<6); GPU::DispStat[1] |= (1<<6); + + NDS::MapSharedWRAM(3); + + // TEST + u8 derp[16] = {0xE5, 0xCC, 0x5A, 0x8B, 0x56, 0xD0, 0xC9, 0x72, 0x9C, 0x17, 0xE8, 0xDC, 0x39, 0x12, 0x36, 0xA9}; + for (int i = 0; i < 16; i+=4) ARM7Write32(0x03FFC580+i, *(u32*)&derp[i]); } bool LoadBIOS() @@ -1007,7 +1017,7 @@ u16 ARM9IORead16(u32 addr) { switch (addr) { - case 0x04004004: return 0; // TODO + case 0x04004004: return SCFG_Clock9; case 0x04004010: return SCFG_MC & 0xFFFF; CASE_READ16_32BIT(0x04004040, MBK[0][0]) @@ -1079,6 +1089,16 @@ void ARM9IOWrite8(u32 addr, u8 val) { switch (addr) { + case 0x04000301: + // TODO: OPTIONAL PERFORMANCE HACK + // the DSi ARM9 BIOS has a bug where the IRQ wait function attempts to use (ARM7-only) HALTCNT + // effectively causing it to wait in a busy loop. + // for better DSi performance, we can implement an actual IRQ wait here. + // in practice this would only matter when running DS software in DSi mode (ie already a hack). + // DSi software does not use the BIOS IRQ wait function. + //if (val == 0x80 && NDS::ARM9->R[15] == 0xFFFF0268) NDS::ARM9->Halt(1); + return; + case 0x04004040: MapNWRAM_A(0, val); return; case 0x04004041: MapNWRAM_A(1, val); return; case 0x04004042: MapNWRAM_A(2, val); return; @@ -1108,6 +1128,12 @@ void ARM9IOWrite16(u32 addr, u16 val) { switch (addr) { + case 0x04004004: + // TODO: actually change clock! + printf("CLOCK9=%04X\n", val); + SCFG_Clock9 = val & 0x0187; + return; + case 0x04004040: MapNWRAM_A(0, val & 0xFF); MapNWRAM_A(1, val >> 8); @@ -1267,7 +1293,7 @@ u16 ARM7IORead16(u32 addr) case 0x04000218: return NDS::IE2; case 0x0400021C: return NDS::IF2; - case 0x04004004: return 0x0187; + case 0x04004004: return SCFG_Clock7; case 0x04004006: return 0; // JTAG register case 0x04004010: return SCFG_MC & 0xFFFF; @@ -1390,6 +1416,10 @@ void ARM7IOWrite16(u32 addr, u16 val) case 0x04000218: NDS::IE2 = (val & 0x7FF7); NDS::UpdateIRQ(1); return; case 0x0400021C: NDS::IF2 &= ~(val & 0x7FF7); NDS::UpdateIRQ(1); return; + case 0x04004004: + SCFG_Clock7 = val & 0x0187; + return; + case 0x04004010: val &= 0x800C; if ((val & 0xC) == 0xC) val &= ~0xC; // hax @@ -1482,7 +1512,7 @@ void ARM7IOWrite32(u32 addr, u32 val) { addr -= 0x04004440; int n = 0; - while (addr > 0x30) { addr -= 0x30; n++; } + while (addr >= 0x30) { addr -= 0x30; n++; } switch (addr >> 4) { diff --git a/src/DSi_AES.cpp b/src/DSi_AES.cpp index 7f962fd..8ae9082 100644 --- a/src/DSi_AES.cpp +++ b/src/DSi_AES.cpp @@ -42,11 +42,14 @@ FIFO<u32>* OutputFIFO; u8 IV[16]; +u8 MAC[16]; + u8 KeyNormal[4][16]; u8 KeyX[4][16]; u8 KeyY[4][16]; u8 CurKey[16]; +u8 CurMAC[16]; AES_ctx Ctx; @@ -77,6 +80,9 @@ void ROL16(u8* val, u32 n) #define _printhex(str, size) { for (int z = 0; z < (size); z++) printf("%02X", (str)[z]); printf("\n"); } #define _printhex2(str, size) { for (int z = 0; z < (size); z++) printf("%02X", (str)[z]); } +#define _printhexR(str, size) { for (int z = 0; z < (size); z++) printf("%02X", (str)[((size)-1)-z]); printf("\n"); } +#define _printhex2R(str, size) { for (int z = 0; z < (size); z++) printf("%02X", (str)[((size)-1)-z]); } + bool Init() { @@ -111,11 +117,16 @@ void Reset() InputFIFO->Clear(); OutputFIFO->Clear(); + memset(IV, 0, sizeof(IV)); + + memset(MAC, 0, sizeof(MAC)); + memset(KeyNormal, 0, sizeof(KeyNormal)); memset(KeyX, 0, sizeof(KeyX)); memset(KeyY, 0, sizeof(KeyY)); memset(CurKey, 0, sizeof(CurKey)); + memset(CurMAC, 0, sizeof(CurMAC)); // initialize keys, as per GBAtek @@ -123,6 +134,12 @@ void Reset() *(u32*)&KeyX[0][0] = 0x746E694E; *(u32*)&KeyX[0][4] = 0x6F646E65; + // slot 1: 'Tad'/dev.kp + *(u32*)&KeyX[1][0] = 0x4E00004A; + *(u32*)&KeyX[1][4] = 0x4A00004E; + *(u32*)&KeyX[1][8] = (u32)(DSi::ConsoleID >> 32) ^ 0xC80C4B72; + *(u32*)&KeyX[1][12] = (u32)DSi::ConsoleID; + // slot 3: console-unique eMMC crypto *(u32*)&KeyX[3][0] = (u32)DSi::ConsoleID; *(u32*)&KeyX[3][4] = (u32)DSi::ConsoleID ^ 0x24EE6906; @@ -134,6 +151,34 @@ void Reset() } +void ProcessBlock_CCM_Decrypt() +{ + u8 data[16]; + u8 data_rev[16]; + + *(u32*)&data[0] = InputFIFO->Read(); + *(u32*)&data[4] = InputFIFO->Read(); + *(u32*)&data[8] = InputFIFO->Read(); + *(u32*)&data[12] = InputFIFO->Read(); + + //printf("AES-CCM: "); _printhex2(data, 16); + + Swap16(data_rev, data); + AES_CTR_xcrypt_buffer(&Ctx, data_rev, 16); + + for (int i = 0; i < 16; i++) CurMAC[i] ^= data_rev[i]; + AES_ECB_encrypt(&Ctx, CurMAC); + + Swap16(data, data_rev); + + //printf(" -> "); _printhex2(data, 16); + + OutputFIFO->Write(*(u32*)&data[0]); + OutputFIFO->Write(*(u32*)&data[4]); + OutputFIFO->Write(*(u32*)&data[8]); + OutputFIFO->Write(*(u32*)&data[12]); +} + void ProcessBlock_CTR() { u8 data[16]; @@ -186,18 +231,12 @@ void WriteCnt(u32 val) OutputDMASize = dmasize_out[(val >> 14) & 0x3]; AESMode = (val >> 28) & 0x3; - if (AESMode < 2) printf("AES-CCM TODO\n"); + if (AESMode == 1) printf("AES-CCM TODO\n"); if (val & (1<<24)) { u32 slot = (val >> 26) & 0x3; memcpy(CurKey, KeyNormal[slot], 16); - - //printf("AES: key(%d): ", slot); _printhex(CurKey, 16); - - u8 tmp[16]; - Swap16(tmp, CurKey); - AES_init_ctx(&Ctx, tmp); } if (!(oldcnt & (1<<31)) && (val & (1<<31))) @@ -205,11 +244,45 @@ void WriteCnt(u32 val) // transfer start (checkme) RemBlocks = BlkCnt >> 16; + u8 key[16]; + u8 iv[16]; + + Swap16(key, CurKey); + Swap16(iv, IV); + + if (AESMode < 2) + { + if (BlkCnt & 0xFFFF) printf("AES: CCM EXTRA LEN TODO\n"); + + u32 maclen = (val >> 16) & 0x7; + if (maclen < 1) maclen = 1; + + iv[0] = 0x02; + for (int i = 0; i < 12; i++) iv[1+i] = iv[4+i]; + iv[13] = 0x00; + iv[14] = 0x00; + iv[15] = 0x01; + + AES_init_ctx_iv(&Ctx, key, iv); + + iv[0] |= (maclen << 3) | ((BlkCnt & 0xFFFF) ? (1<<6) : 0); + iv[13] = RemBlocks >> 12; + iv[14] = RemBlocks >> 4; + iv[15] = RemBlocks << 4; + + memcpy(CurMAC, iv, 16); + AES_ECB_encrypt(&Ctx, CurMAC); + } + else + { + AES_init_ctx_iv(&Ctx, key, iv); + } + DSi::CheckNDMAs(1, 0x2A); } - printf("AES CNT: %08X / mode=%d inDMA=%d outDMA=%d blocks=%d\n", - val, AESMode, InputDMASize, OutputDMASize, RemBlocks); + printf("AES CNT: %08X / mode=%d key=%d inDMA=%d outDMA=%d blocks=%d\n", + val, AESMode, (val >> 26) & 0x3, InputDMASize, OutputDMASize, RemBlocks); } void WriteBlkCnt(u32 val) @@ -219,6 +292,8 @@ void WriteBlkCnt(u32 val) u32 ReadOutputFIFO() { + if (OutputFIFO->IsEmpty()) printf("!!! AES OUTPUT FIFO EMPTY\n"); + u32 ret = OutputFIFO->Read(); if (Cnt & (1<<31)) @@ -241,6 +316,8 @@ void WriteInputFIFO(u32 val) { // TODO: add some delay to processing + if (InputFIFO->IsFull()) printf("!!! AES INPUT FIFO FULL\n"); + InputFIFO->Write(val); if (!(Cnt & (1<<31))) return; @@ -276,6 +353,7 @@ void Update() { switch (AESMode) { + case 0: ProcessBlock_CCM_Decrypt(); break; case 2: case 3: ProcessBlock_CTR(); break; default: @@ -293,6 +371,28 @@ void Update() if (RemBlocks == 0) { + if (AESMode == 0) + { + Ctx.Iv[13] = 0x00; + Ctx.Iv[14] = 0x00; + Ctx.Iv[15] = 0x00;_printhex(Ctx.Iv, 16); + AES_CTR_xcrypt_buffer(&Ctx, CurMAC, 16); + + //printf("FINAL MAC: "); _printhexR(CurMAC, 16); + //printf("INPUT MAC: "); _printhex(MAC, 16); + + Cnt |= (1<<21); + for (int i = 0; i < 16; i++) + { + if (CurMAC[15-i] != MAC[i]) Cnt &= ~(1<<21); + } + } + else + { + // CHECKME + Cnt &= ~(1<<21); + } + Cnt &= ~(1<<31); if (Cnt & (1<<30)) NDS::SetIRQ2(NDS::IRQ2_DSi_AES); DSi::StopNDMAs(1, 0x2A); @@ -313,15 +413,15 @@ void WriteIV(u32 offset, u32 val, u32 mask) *(u32*)&IV[offset] = (old & ~mask) | (val & mask); //printf("AES: IV: "); _printhex(IV, 16); - - u8 tmp[16]; - Swap16(tmp, IV); - AES_ctx_set_iv(&Ctx, tmp); } void WriteMAC(u32 offset, u32 val, u32 mask) { - // + u32 old = *(u32*)&MAC[offset]; + + *(u32*)&MAC[offset] = (old & ~mask) | (val & mask); + + //printf("AES: MAC: "); _printhex(MAC, 16); } void DeriveNormalKey(u32 slot) @@ -355,6 +455,8 @@ void WriteKeyNormal(u32 slot, u32 offset, u32 val, u32 mask) u32 old = *(u32*)&KeyNormal[slot][offset]; *(u32*)&KeyNormal[slot][offset] = (old & ~mask) | (val & mask); + + //printf("KeyNormal(%d): ", slot); _printhex(KeyNormal[slot], 16); } void WriteKeyX(u32 slot, u32 offset, u32 val, u32 mask) @@ -362,6 +464,8 @@ void WriteKeyX(u32 slot, u32 offset, u32 val, u32 mask) u32 old = *(u32*)&KeyX[slot][offset]; *(u32*)&KeyX[slot][offset] = (old & ~mask) | (val & mask); + + //printf("KeyX(%d): ", slot); _printhex(KeyX[slot], 16); } void WriteKeyY(u32 slot, u32 offset, u32 val, u32 mask) @@ -370,6 +474,8 @@ void WriteKeyY(u32 slot, u32 offset, u32 val, u32 mask) *(u32*)&KeyY[slot][offset] = (old & ~mask) | (val & mask); + //printf("[%08X] KeyY(%d): ", NDS::GetPC(1), slot); _printhex(KeyY[slot], 16); + if (offset >= 0xC) { DeriveNormalKey(slot); diff --git a/src/DSi_NDMA.cpp b/src/DSi_NDMA.cpp index 37eb687..e7fc7ab 100644 --- a/src/DSi_NDMA.cpp +++ b/src/DSi_NDMA.cpp @@ -123,7 +123,10 @@ void DSi_NDMA::Start() if (((StartMode & 0x1F) != 0x10) && !(Cnt & (1<<29))) { if (IterCount > TotalRemCount) + { IterCount = TotalRemCount; + RemCount = IterCount; + } } if (Cnt & (1<<12)) CurDstAddr = DstAddr; diff --git a/src/DSi_SD.cpp b/src/DSi_SD.cpp index 7367eea..93431b5 100644 --- a/src/DSi_SD.cpp +++ b/src/DSi_SD.cpp @@ -185,6 +185,17 @@ void DSi_SDHost::SendData(u8* data, u32 len) NDS::ScheduleEvent(NDS::Event_DSi_SDTransfer, false, 512, FinishSend, param); } +void DSi_SDHost::FinishReceive(u32 param) +{ + DSi_SDHost* host = (param & 0x1) ? DSi::SDIO : DSi::SDMMC; + DSi_SDDevice* dev = host->Ports[host->PortSelect & 0x1]; + + host->ClearIRQ(24); + host->SetIRQ(25); + + if (dev) dev->ContinueTransfer(); +} + void DSi_SDHost::ReceiveData(u8* data, u32 len) { printf("%s: data TX, len=%d, blkcnt=%d (%d) blklen=%d, irq=%08X\n", SD_DESC, len, BlockCount16, BlockCountInternal, BlockLen16, IRQMask); @@ -481,10 +492,12 @@ void DSi_SDHost::WriteFIFO32(u32 val) // we completed one block, send it to the SD card - ClearIRQ(24); - SetIRQ(25); + //ClearIRQ(24); + //SetIRQ(25); - if (dev) dev->ContinueTransfer(); + //if (dev) dev->ContinueTransfer(); + // TODO measure the actual delay!! + NDS::ScheduleEvent(NDS::Event_DSi_SDTransfer, false, 2048, FinishReceive, Num); } @@ -622,10 +635,10 @@ void DSi_MMCStorage::SendCMD(u8 cmd, u32 param) Host->SendResponse(CSR, true); WriteBlock(RWAddress); RWAddress += BlockSize; - SetState(0x06); + SetState(0x04); return; - case 55: // ?? + case 55: // appcmd prefix CSR |= (1<<5); Host->SendResponse(CSR, true); return; diff --git a/src/DSi_SD.h b/src/DSi_SD.h index 007a8a8..855dd5e 100644 --- a/src/DSi_SD.h +++ b/src/DSi_SD.h @@ -37,6 +37,7 @@ public: void DoSavestate(Savestate* file); static void FinishSend(u32 param); + static void FinishReceive(u32 param); void SendResponse(u32 val, bool last); void SendData(u8* data, u32 len); void ReceiveData(u8* data, u32 len); diff --git a/src/SPU.cpp b/src/SPU.cpp index ee9237f..d31a371 100644 --- a/src/SPU.cpp +++ b/src/SPU.cpp @@ -19,6 +19,7 @@ #include <stdio.h> #include <string.h> #include "NDS.h" +#include "DSi.h" #include "SPU.h" @@ -216,7 +217,8 @@ void Channel::FIFO_BufferData() for (u32 i = 0; i < burstlen; i += 4) { - FIFO[FIFOWritePos] = NDS::ARM7Read32(SrcAddr + FIFOReadOffset); + //FIFO[FIFOWritePos] = NDS::ARM7Read32(SrcAddr + FIFOReadOffset); + FIFO[FIFOWritePos] = DSi::ARM7Read32(SrcAddr + FIFOReadOffset); FIFOReadOffset += 4; FIFOWritePos++; FIFOWritePos &= 0x7; @@ -499,7 +501,8 @@ void CaptureUnit::FIFO_FlushData() { for (u32 i = 0; i < 4; i++) { - NDS::ARM7Write32(DstAddr + FIFOWriteOffset, FIFO[FIFOReadPos]); + //NDS::ARM7Write32(DstAddr + FIFOWriteOffset, FIFO[FIFOReadPos]); + DSi::ARM7Write32(DstAddr + FIFOWriteOffset, FIFO[FIFOReadPos]); FIFOReadPos++; FIFOReadPos &= 0x3; |