diff options
Diffstat (limited to 'src/DMA.cpp')
| -rw-r--r-- | src/DMA.cpp | 241 | 
1 files changed, 159 insertions, 82 deletions
| diff --git a/src/DMA.cpp b/src/DMA.cpp index f0c22b5..7bbf980 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -26,6 +26,25 @@  // NOTES ON DMA SHIT  //  // * could use optimized code paths for common types of DMA transfers. for example, VRAM +//   have to profile it to see if it's actually worth doing + + +// DMA TIMINGS +// +// sequential timing: +// * 1 cycle per read or write +// * in 32bit mode, accessing a 16bit bus (mainRAM, palette, VRAM) incurs 1 cycle of penalty +// * in 32bit mode, transferring from mainRAM to another bank is 1 cycle faster +// * if source and destination are the same memory bank, there is a 1 cycle penalty +// * transferring from mainRAM to mainRAM is a trainwreck (all accesses are made nonsequential) +// +// nonsequential timing: +// * nonseq penalty is applied to the first read and write +// * I also figure it gets nonseq penalty again when resuming, after having been interrupted by +//   another DMA (TODO: check) +// * applied to all accesses for mainRAM->mainRAM, resulting in timings of 16-18 cycles per unit +// +// TODO: GBA slot  DMA::DMA(u32 cpu, u32 num) @@ -38,61 +57,6 @@ DMA::DMA(u32 cpu, u32 num)      else          CountMask = (num==3 ? 0x0000FFFF : 0x00003FFF); -    // TODO: merge with the one in ARM.cpp, somewhere -    for (int i = 0; i < 16; i++) -    { -        Waitstates[0][i] = 1; -        Waitstates[1][i] = 1; -    } - -    if (!cpu) -    { -        // ARM9 -        // note: 33MHz cycles -        Waitstates[0][0x2] = 1; -        Waitstates[0][0x3] = 1; -        Waitstates[0][0x4] = 1; -        Waitstates[0][0x5] = 1; -        Waitstates[0][0x6] = 1; -        Waitstates[0][0x7] = 1; -        Waitstates[0][0x8] = 6; -        Waitstates[0][0x9] = 6; -        Waitstates[0][0xA] = 10; -        Waitstates[0][0xF] = 1; - -        Waitstates[1][0x2] = 2; -        Waitstates[1][0x3] = 1; -        Waitstates[1][0x4] = 1; -        Waitstates[1][0x5] = 2; -        Waitstates[1][0x6] = 2; -        Waitstates[1][0x7] = 1; -        Waitstates[1][0x8] = 12; -        Waitstates[1][0x9] = 12; -        Waitstates[1][0xA] = 10; -        Waitstates[1][0xF] = 1; -    } -    else -    { -        // ARM7 -        Waitstates[0][0x0] = 1; -        Waitstates[0][0x2] = 1; -        Waitstates[0][0x3] = 1; -        Waitstates[0][0x4] = 1; -        Waitstates[0][0x6] = 1; -        Waitstates[0][0x8] = 6; -        Waitstates[0][0x9] = 6; -        Waitstates[0][0xA] = 10; - -        Waitstates[1][0x0] = 1; -        Waitstates[1][0x2] = 2; -        Waitstates[1][0x3] = 1; -        Waitstates[1][0x4] = 1; -        Waitstates[1][0x6] = 2; -        Waitstates[1][0x8] = 12; -        Waitstates[1][0x9] = 12; -        Waitstates[1][0xA] = 10; -    } -      Reset();  } @@ -136,7 +100,7 @@ void DMA::DoSavestate(Savestate* file)      file->Var32(&SrcAddrInc);      file->Var32(&DstAddrInc); -    file->Var32((u32*)&Running); +    file->Var32(&Running);      file->Var32((u32*)&InProgress);      file->Var32((u32*)&IsGXFIFODMA);  } @@ -213,7 +177,11 @@ void DMA::Start()      // TODO eventually: not stop if we're running code in ITCM -    Running = true; +    if (NDS::DMAsRunning(CPU)) +        Running = 1; +    else +        Running = 2; +      InProgress = true;      NDS::StopCPU(CPU, 1<<Num);  } @@ -223,73 +191,177 @@ s32 DMA::Run(s32 cycles)      if (!Running)          return cycles; +#ifdef DEBUG_CHECK_DESYNC +    s32 startc = cycles; +#endif // DEBUG_CHECK_DESYNC + +    Executing = true; + +    // add NS penalty for first accesses in burst +    // note: this seems to only apply when starting DMA 'in the void' +    // for example, the aging cart DMA PRIORITY test: +    // starts a big DMA immediately, and a small DMA upon HBlank +    // each pulling from a timer incrementing once per cycle +    // it expects that the values be increasing linearly (2c/unit) +    // even as the small DMA starts and ends +    bool burststart = (Running == 2); +    Running = 1; + +    s32 unitcycles; +    s32 lastcycles = cycles; +      if (!(Cnt & 0x04000000))      { +        if (CPU == 0) +        { +            if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) +            { +                unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][0] + NDS::ARM9MemTimings[CurDstAddr >> 14][0]; +            } +            else +            { +                unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][1] + NDS::ARM9MemTimings[CurDstAddr >> 14][1]; +                if ((CurSrcAddr >> 24) == (CurDstAddr >> 24)) +                    unitcycles++; + +                if (burststart) +                { +                    cycles -= 2; +                    cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][0] + NDS::ARM9MemTimings[CurDstAddr >> 14][0]); +                    cycles += unitcycles; +                } +            } +        } +        else +        { +            if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) +            { +                unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0]; +            } +            else +            { +                unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][1] + NDS::ARM7MemTimings[CurDstAddr >> 15][1]; +                if ((CurSrcAddr >> 23) == (CurDstAddr >> 23)) +                    unitcycles++; + +                if (burststart) +                { +                    cycles -= 2; +                    cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0]); +                    cycles += unitcycles; +                } +            } +        } +          u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16;          void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16; -        while (IterCount > 0 && cycles > 0) +        while (IterCount > 0 && !Stall)          { -            writefn(CurDstAddr, readfn(CurSrcAddr)); +            cycles -= unitcycles; + +            NDS::RunTightTimers(CPU, lastcycles-cycles); -            s32 c = (Waitstates[0][(CurSrcAddr >> 24) & 0xF] + Waitstates[0][(CurDstAddr >> 24) & 0xF]); -            cycles -= c; -            NDS::RunTimingCriticalDevices(CPU, c); +            lastcycles = cycles; + +            writefn(CurDstAddr, readfn(CurSrcAddr));              CurSrcAddr += SrcAddrInc<<1;              CurDstAddr += DstAddrInc<<1;              IterCount--;              RemCount--; + +            if (cycles <= 0) break;          }      }      else      { -        // optimized path for typical GXFIFO DMA -        if (IsGXFIFODMA) +        if (CPU == 0)          { -            while (IterCount > 0 && cycles > 0) +            if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02)              { -                GPU3D::WriteToGXFIFO(*(u32*)&NDS::MainRAM[CurSrcAddr&0x3FFFFF]); - -                s32 c = (Waitstates[1][0x2] + Waitstates[1][0x4]); -                cycles -= c; -                NDS::RunTimingCriticalDevices(0, c); - -                CurSrcAddr += SrcAddrInc<<2; -                IterCount--; -                RemCount--; +                unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][2] + NDS::ARM9MemTimings[CurDstAddr >> 14][2]; +            } +            else +            { +                unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][3] + NDS::ARM9MemTimings[CurDstAddr >> 14][3]; +                if ((CurSrcAddr >> 24) == (CurDstAddr >> 24)) +                    unitcycles++; +                else if ((CurSrcAddr >> 24) == 0x02) +                    unitcycles--; + +                if (burststart) +                { +                    cycles -= 2; +                    cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][2] + NDS::ARM9MemTimings[CurDstAddr >> 14][2]); +                    cycles += unitcycles; +                } +            } +        } +        else +        { +            if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) +            { +                unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][2] + NDS::ARM7MemTimings[CurDstAddr >> 15][2]; +            } +            else +            { +                unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][3] + NDS::ARM7MemTimings[CurDstAddr >> 15][3]; +                if ((CurSrcAddr >> 23) == (CurDstAddr >> 23)) +                    unitcycles++; +                else if ((CurSrcAddr >> 24) == 0x02) +                    unitcycles--; + +                if (burststart) +                { +                    cycles -= 2; +                    cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][2] + NDS::ARM7MemTimings[CurDstAddr >> 15][2]); +                    cycles += unitcycles; +                }              }          }          u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;          void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; -        while (IterCount > 0 && cycles > 0) +        while (IterCount > 0 && !Stall)          { -            writefn(CurDstAddr, readfn(CurSrcAddr)); +            cycles -= unitcycles; + +            NDS::RunTightTimers(CPU, lastcycles-cycles); -            s32 c = (Waitstates[1][(CurSrcAddr >> 24) & 0xF] + Waitstates[1][(CurDstAddr >> 24) & 0xF]); -            cycles -= c; -            NDS::RunTimingCriticalDevices(CPU, c); +            lastcycles = cycles; + +            writefn(CurDstAddr, readfn(CurSrcAddr));              CurSrcAddr += SrcAddrInc<<2;              CurDstAddr += DstAddrInc<<2;              IterCount--;              RemCount--; + +            if (cycles <= 0) break;          }      } +    Executing = false; +    Stall = false; +      if (RemCount)      {          if (IterCount == 0)          { -            Running = false; +            Running = 0;              NDS::ResumeCPU(CPU, 1<<Num);              if (StartMode == 0x07)                  GPU3D::CheckFIFODMA();          } +#ifdef DEBUG_CHECK_DESYNC +        if (CPU) NDS::dbg_CyclesARM7 += (startc-cycles); +        else     NDS::dbg_CyclesARM9 += (startc-cycles); +#endif // DEBUG_CHECK_DESYNC +          return cycles;      } @@ -299,9 +371,14 @@ s32 DMA::Run(s32 cycles)      if (Cnt & 0x40000000)          NDS::SetIRQ(CPU, NDS::IRQ_DMA0 + Num); -    Running = false; +    Running = 0;      InProgress = false;      NDS::ResumeCPU(CPU, 1<<Num); -    return cycles - 2; +#ifdef DEBUG_CHECK_DESYNC +    if (CPU) NDS::dbg_CyclesARM7 += (startc-cycles); +    else     NDS::dbg_CyclesARM9 += (startc-cycles); +#endif // DEBUG_CHECK_DESYNC + +    return cycles;  } |