From e7fbeb1a6071db070c4c2b21c4c5ed70d53e269b Mon Sep 17 00:00:00 2001 From: lonkaars Date: Thu, 23 Feb 2023 13:21:31 +0100 Subject: ppu foreground sprite working and ~50% tested --- basys3/basys3.srcs/ppu.vhd | 4 +- basys3/basys3.srcs/ppu_sprite_fg.vhd | 67 +++++++++-------- basys3/basys3.srcs/ppu_sprite_fg_tb.vhd | 125 ++++++++++++++++++++++++++++++++ basys3/basys3.xpr | 4 +- 4 files changed, 164 insertions(+), 36 deletions(-) diff --git a/basys3/basys3.srcs/ppu.vhd b/basys3/basys3.srcs/ppu.vhd index 61c22aa..9cf1bc0 100644 --- a/basys3/basys3.srcs/ppu.vhd +++ b/basys3/basys3.srcs/ppu.vhd @@ -104,12 +104,12 @@ architecture Behavioral of ppu is IDX : natural := 0); port( -- inputs - CLK : in std_logic; -- pipeline clock + CLK : in std_logic; -- system clock RESET : in std_logic; -- reset internal memory and clock counters OE : in std_logic; -- output enable (of CIDX) X : in std_logic_vector(PPU_POS_H_WIDTH-1 downto 0); -- current screen pixel x Y : in std_logic_vector(PPU_POS_V_WIDTH-1 downto 0); -- current screen pixel y - FETCH : in std_logic; -- fetch sprite data from TMM (TODO : generic map, set foreground sprite component index) + FETCH : in std_logic; -- fetch sprite data from TMM VBLANK : in std_logic; -- fetch during vblank -- internal memory block (FAM) diff --git a/basys3/basys3.srcs/ppu_sprite_fg.vhd b/basys3/basys3.srcs/ppu_sprite_fg.vhd index 7b39b1d..af7cfa3 100644 --- a/basys3/basys3.srcs/ppu_sprite_fg.vhd +++ b/basys3/basys3.srcs/ppu_sprite_fg.vhd @@ -13,7 +13,6 @@ entity ppu_sprite_fg is -- foreground sprite port( -- inputs CLK : in std_logic; -- system clock - PL_CLK : in std_logic; -- pipeline clock RESET : in std_logic; -- reset internal memory and clock counters OE : in std_logic; -- output enable (of CIDX) X : in std_logic_vector(PPU_POS_H_WIDTH-1 downto 0); -- current screen pixel x @@ -58,13 +57,9 @@ architecture Behavioral of ppu_sprite_fg is REG : out std_logic_vector((ADDR_RANGE*DATA_W)-1 downto 0)); -- exposed register output end component; - -- FAM and TMM in/out temp + registers - signal T_TMM_ADDR, R_TMM_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0'); - signal T_TMM_DATA, R_TMM_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0'); - - -- state machine for synchronizing pipeline stages - type states is (PL_TMM_ADDR, PL_TMM_DATA); - signal state : states := PL_TMM_ADDR; + -- FAM and TMM in/out lines + signal T_TMM_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0'); + signal T_TMM_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0'); -- auxiliary signals (temp variables) signal T_CIDX : std_logic_vector(PPU_PALETTE_CIDX_WIDTH-1 downto 0) := (others => '0'); -- output color buffer/register @@ -86,16 +81,14 @@ architecture Behavioral of ppu_sprite_fg is signal TILEMAP_WORD_OFFSET : integer := 0; -- word offset from tile start address in TMM signal TMM_DATA_PAL_IDX : std_logic_vector(PPU_PALETTE_COLOR_WIDTH-1 downto 0); -- color of palette - -- TMM cache - signal TMM_CACHE_WEN : std_logic := '0'; + -- TMM cache lines + signal TMM_CACHE_WEN, TMM_CACHE_UPDATE_TURN : std_logic := '0'; signal TMM_CACHE_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0'); signal TMM_CACHE_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0'); signal TMM_CACHE : std_logic_vector((PPU_SPRITE_WORD_COUNT * PPU_TMM_DATA_WIDTH)-1 downto 0); begin -- output drivers CIDX <= T_CIDX when OE = '1' else (others => 'Z'); - TMM_ADDR <= R_TMM_ADDR; - T_TMM_DATA <= TMM_DATA; -- CIDX combination T_CIDX <= FAM_REG_COL_IDX & TMM_DATA_PAL_IDX; @@ -141,6 +134,8 @@ begin HIT <= SPRITE_ACTIVE and (nor TMM_DATA_PAL_IDX); -- FETCH LOGIC BELOW + TMM_ADDR <= T_TMM_ADDR when TMM_CACHE_UPDATE_TURN else (others => 'Z'); + T_TMM_DATA <= TMM_DATA; -- TTM cache ttm_cache : component er_ram @@ -157,32 +152,40 @@ begin DATA => TMM_CACHE_DATA, REG => TMM_CACHE); - TILEMAP_WORD_OFFSET <= TRANS_TILE_PIXEL_IDX / PPU_PIXELS_PER_TILE_WORD; -- word offset from starting word of sprite - T_TMM_ADDR <= std_logic_vector(to_unsigned(PPU_SPRITE_WORD_COUNT * to_integer(unsigned(FAM_REG_TILE_IDX)) + TILEMAP_WORD_OFFSET, PPU_TMM_ADDR_WIDTH)); -- TMM address - - - -- state machine (pipeline stage counter) + sync r/w - process(CLK, RESET) - constant TMM_FETCH_CLK_RANGE_BEGIN : natural := PPU_TMM_CACHE_FETCH_C_COUNT * IDX; - variable TMM_FETCH_CTR : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); - variable TMM_FETCH_CTR_REL : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); + -- fetch machine, should do the following (offset data read by one clock -> propagation/lookup delay): + -- CLK[53 * IDX + 0] (addr = 0) + -- CLK[53 * IDX + 1] (addr = 1, read data[0]) + -- CLK[53 * IDX + 2] (addr = 2, read data[1]), etc + -- a full tile is 52 words, but since the offset is 1 clock, a total copy takes 53 clock cycles + process(CLK, RESET, FETCH) + constant TMM_FETCH_CLK_RANGE_BEGIN : natural := PPU_TMM_CACHE_FETCH_C_COUNT * IDX; -- fetch CLK count for copying this module's sprite from TMM + variable TMM_FETCH_CTR : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); -- CLK counter while FETCH=1 + variable TMM_FETCH_CTR_REL : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); -- CLK counter relative for sprite[IDX] begin - if RESET = '1' then - -- reset state - state <= PL_TMM_ADDR; - -- reset internal pipeline registers - R_TMM_ADDR <= (others => '0'); - R_TMM_DATA <= (others => '0'); + if RESET = '1' or FETCH = '0' then + TMM_FETCH_CTR := (others => '0'); + TMM_FETCH_CTR_REL := (others => '0'); + TMM_CACHE_WEN <= '0'; + TMM_CACHE_UPDATE_TURN <= '0'; elsif rising_edge(CLK) then - TMM_FETCH_CTR := (others => '0') when FETCH = '0' else TMM_FETCH_CTR + 1; + TMM_FETCH_CTR := TMM_FETCH_CTR + 1; TMM_FETCH_CTR_REL := TMM_FETCH_CTR - TMM_FETCH_CLK_RANGE_BEGIN; - if FETCH = '1' and TMM_FETCH_CTR >= TMM_FETCH_CLK_RANGE_BEGIN and TMM_FETCH_CTR < (TMM_FETCH_CLK_RANGE_BEGIN + PPU_TMM_CACHE_FETCH_C_COUNT) then - TMM_CACHE_WEN <= '1'; - R_TMM_DATA <= T_TMM_DATA; - T_TMM_ADDR <= R_TMM_ADDR; + if TMM_FETCH_CTR >= TMM_FETCH_CLK_RANGE_BEGIN and + TMM_FETCH_CTR < (TMM_FETCH_CLK_RANGE_BEGIN + PPU_TMM_CACHE_FETCH_C_COUNT) then + TMM_CACHE_UPDATE_TURN <= '1'; + if TMM_FETCH_CTR_REL < PPU_TMM_CACHE_FETCH_C_COUNT - 1 then -- calculate address until second to last clock + T_TMM_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR - IDX, T_TMM_ADDR'length)); + TMM_CACHE_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR_REL - 1, TMM_CACHE_ADDR'length)); + end if; + + if TMM_FETCH_CTR_REL > 0 then -- read offset + TMM_CACHE_DATA <= T_TMM_DATA; + TMM_CACHE_WEN <= '1'; + end if; else TMM_CACHE_WEN <= '0'; + TMM_CACHE_UPDATE_TURN <= '0'; end if; end if; end process; diff --git a/basys3/basys3.srcs/ppu_sprite_fg_tb.vhd b/basys3/basys3.srcs/ppu_sprite_fg_tb.vhd index 87c4f6e..19b9f35 100644 --- a/basys3/basys3.srcs/ppu_sprite_fg_tb.vhd +++ b/basys3/basys3.srcs/ppu_sprite_fg_tb.vhd @@ -11,5 +11,130 @@ entity ppu_sprite_fg_tb is end ppu_sprite_fg_tb; architecture Behavioral of ppu_sprite_fg_tb is + component ppu_sprite_fg -- foreground sprite + generic ( + IDX : natural := 0); -- sprite index number + port( + -- inputs + CLK : in std_logic; -- system clock + RESET : in std_logic; -- reset internal memory and clock counters + OE : in std_logic; -- output enable (of CIDX) + X : in std_logic_vector(PPU_POS_H_WIDTH-1 downto 0); -- current screen pixel x + Y : in std_logic_vector(PPU_POS_V_WIDTH-1 downto 0); -- current screen pixel y + FETCH : in std_logic; -- fetch sprite data from TMM + VBLANK : in std_logic; -- fetch during vblank + + -- internal memory block (FAM) + FAM_WEN : in std_logic; -- VRAM FAM write enable + FAM_ADDR : in std_logic_vector(PPU_FAM_ADDR_WIDTH-1 downto 0); -- VRAM fam address + FAM_DATA : in std_logic_vector(PPU_FAM_DATA_WIDTH-1 downto 0); -- VRAM fam data + + -- used memory blocks + TMM_ADDR : out std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0); + TMM_DATA : in std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0); + + -- outputs + CIDX : out std_logic_vector(PPU_PALETTE_CIDX_WIDTH-1 downto 0); -- output color + HIT : out std_logic); -- current pixel is not transparent + end component; + signal CLK : std_logic := '0'; + signal RESET : std_logic := '0'; + signal OE : std_logic := '0'; + signal X : std_logic_vector(PPU_POS_H_WIDTH-1 downto 0) := (others => '0'); + signal Y : std_logic_vector(PPU_POS_V_WIDTH-1 downto 0) := (others => '0'); + signal FETCH : std_logic := '0'; + signal VBLANK : std_logic := '0'; + signal FAM_WEN : std_logic := '0'; + signal FAM_ADDR : std_logic_vector(PPU_FAM_ADDR_WIDTH-1 downto 0) := (others => '0'); + signal FAM_DATA : std_logic_vector(PPU_FAM_DATA_WIDTH-1 downto 0) := (others => '0'); + signal TMM_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0); + signal TMM_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0'); + signal CIDX : std_logic_vector(PPU_PALETTE_CIDX_WIDTH-1 downto 0) := (others => '0'); + signal HIT : std_logic; + + signal CLK_I : unsigned(31 downto 0) := (others => '0'); begin + uut: component ppu_sprite_fg + generic map( IDX => 2 ) + port map( + CLK => CLK, + RESET => RESET, + OE => OE, + X => X, + Y => Y, + FETCH => FETCH, + VBLANK => VBLANK, + FAM_WEN => FAM_WEN, + FAM_ADDR => FAM_ADDR, + FAM_DATA => FAM_DATA, + TMM_ADDR => TMM_ADDR, + TMM_DATA => TMM_DATA, + CIDX => CIDX, + HIT => HIT); + + tb : process + begin + -- initialize TMM cache + RESET <= '1'; wait for 1 ns; RESET <= '0'; wait for 1 ns; + + OE <= '1'; + VBLANK <= '0'; + FETCH <= '0'; + + -- FAM contents: + -- flip horizontally + -- xy -> (42-16, 8-16) = (26, -8) + -- palette index 2 + -- tilemap index 460 (0x1cc) + -- = hex((1 << 31) | (0 << 30) | (42 << 21) | (8 << 13) | (2 << 10) | (460 << 0)) + FAM_WEN <= '1'; + FAM_ADDR <= std_logic_vector(to_unsigned(16#0005#, FAM_ADDR'length)); + FAM_DATA <= std_logic_vector(to_unsigned(16#8541#, FAM_DATA'length)); + CLK <= '1'; wait for 1 ns; CLK <= '0'; wait for 1 ns; + FAM_ADDR <= std_logic_vector(to_unsigned(16#0004#, FAM_ADDR'length)); + FAM_DATA <= std_logic_vector(to_unsigned(16#09cc#, FAM_DATA'length)); + CLK <= '1'; wait for 1 ns; CLK <= '0'; wait for 1 ns; + FAM_WEN <= '0'; + + for i in 0 to 32 loop + if i = 0 then + X <= std_logic_vector(to_unsigned(25, X'length)); + Y <= std_logic_vector(to_unsigned(60, Y'length)); + end if; + + if i = 3 then + X <= std_logic_vector(to_unsigned(29, X'length)); + Y <= std_logic_vector(to_unsigned(4, Y'length)); + end if; + + wait for 5 ns; CLK <= '1'; wait for 5 ns; CLK <= '0'; + end loop; + + wait for 1 ns; + RESET <= '1'; + wait for 10 ns; + RESET <= '0'; + VBLANK <= '1'; + FETCH <= '1'; + wait for 1 ns; + + -- FETCH check + for i in 0 to 500 loop + CLK_I <= to_unsigned(i, 32); + -- if i > 10 then + -- OE <= '1'; + -- end if; + -- if i > 20 then + -- RESET <= '1'; + -- end if; + + TMM_DATA <= std_logic_vector(to_unsigned(i - 106, TMM_DATA'length)); + + CLK <= '1'; + wait for 1 ns; + CLK <= '0'; + wait for 1 ns; + end loop; + wait; -- stop for simulator + end process; end Behavioral; diff --git a/basys3/basys3.xpr b/basys3/basys3.xpr index 813b3e2..0c452cf 100644 --- a/basys3/basys3.xpr +++ b/basys3/basys3.xpr @@ -61,7 +61,7 @@