diff options
Diffstat (limited to 'basys3/basys3.srcs/ppu_sprite_fg.vhd')
-rw-r--r-- | basys3/basys3.srcs/ppu_sprite_fg.vhd | 201 |
1 files changed, 127 insertions, 74 deletions
diff --git a/basys3/basys3.srcs/ppu_sprite_fg.vhd b/basys3/basys3.srcs/ppu_sprite_fg.vhd index af7cfa3..3b4d2c6 100644 --- a/basys3/basys3.srcs/ppu_sprite_fg.vhd +++ b/basys3/basys3.srcs/ppu_sprite_fg.vhd @@ -14,6 +14,8 @@ entity ppu_sprite_fg is -- foreground sprite -- inputs CLK : in std_logic; -- system clock RESET : in std_logic; -- reset internal memory and clock counters + PL_CLK : in std_logic; -- pipeline clock + PL_RESET : in std_logic; -- reset pipeline clock counters OE : in std_logic; -- output enable (of CIDX) X : in std_logic_vector(PPU_POS_H_WIDTH-1 downto 0); -- current screen pixel x Y : in std_logic_vector(PPU_POS_V_WIDTH-1 downto 0); -- current screen pixel y @@ -57,9 +59,9 @@ architecture Behavioral of ppu_sprite_fg is REG : out std_logic_vector((ADDR_RANGE*DATA_W)-1 downto 0)); -- exposed register output end component; - -- FAM and TMM in/out lines - signal T_TMM_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0'); - signal T_TMM_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0'); + -- TMM in/out temp + registers + signal T_TMM_ADDR, R_TMM_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0'); + signal T_TMM_DATA, R_TMM_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0'); -- auxiliary signals (temp variables) signal T_CIDX : std_logic_vector(PPU_PALETTE_CIDX_WIDTH-1 downto 0) := (others => '0'); -- output color buffer/register @@ -75,23 +77,15 @@ architecture Behavioral of ppu_sprite_fg is signal SPRITE_ACTIVE : std_logic := '0'; -- is pixel in bounding box of sprite signal PIXEL_ABS_X, PIXEL_ABS_Y : integer := 0; -- absolute pixel position (relative to FG canvas instead of viewport) + signal PIXEL_BIT_OFFSET : integer := 0; -- pixel index within word of TMM signal TILE_PIDX_X, TRANS_TILE_PIDX_X : unsigned(PPU_SPRITE_POS_H_WIDTH-1 downto 0) := (others => '0'); -- xy position of pixel within tile (local tile coords) signal TILE_PIDX_Y, TRANS_TILE_PIDX_Y : unsigned(PPU_SPRITE_POS_V_WIDTH-1 downto 0) := (others => '0'); -- xy position of pixel within tile (local tile coords) - signal TRANS_TILE_PIXEL_IDX : integer := 0; -- index of pixel within tile (reading order) + signal TRANS_TILE_PIDX : integer := 0; -- index of pixel within tile (reading order) + signal TILEMAP_WORD : unsigned(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0'); signal TILEMAP_WORD_OFFSET : integer := 0; -- word offset from tile start address in TMM signal TMM_DATA_PAL_IDX : std_logic_vector(PPU_PALETTE_COLOR_WIDTH-1 downto 0); -- color of palette - -- TMM cache lines - signal TMM_CACHE_WEN, TMM_CACHE_UPDATE_TURN : std_logic := '0'; - signal TMM_CACHE_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0'); - signal TMM_CACHE_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0'); - signal TMM_CACHE : std_logic_vector((PPU_SPRITE_WORD_COUNT * PPU_TMM_DATA_WIDTH)-1 downto 0); begin - -- output drivers - CIDX <= T_CIDX when OE = '1' else (others => 'Z'); - -- CIDX combination - T_CIDX <= FAM_REG_COL_IDX & TMM_DATA_PAL_IDX; - -- FAM memory FAM : component er_ram generic map( @@ -107,11 +101,18 @@ begin DATA => FAM_DATA, REG => INT_FAM); + -- CIDX combination + T_CIDX <= FAM_REG_COL_IDX & TMM_DATA_PAL_IDX; + -- output drivers + CIDX <= T_CIDX when OE = '1' else (others => 'Z'); + -- TMM memory + T_TMM_DATA <= TMM_DATA; + -- pixel position within bounding box of sprite SPRITE_ACTIVE <= '1' when ((unsigned(X) + 16) >= unsigned(FAM_REG_POS_H)) and - ((unsigned(X) + 16) < (unsigned(FAM_REG_POS_H) + to_unsigned(PPU_SPRITE_WIDTH, PPU_POS_H_WIDTH))) and - ((unsigned(Y) + 16) >= unsigned(FAM_REG_POS_V)) and - ((unsigned(Y) + 16) < (unsigned(FAM_REG_POS_V) + to_unsigned(PPU_SPRITE_HEIGHT, PPU_POS_V_WIDTH))) else '0'; + ((unsigned(X) + 16) < (unsigned(FAM_REG_POS_H) + to_unsigned(PPU_SPRITE_WIDTH, PPU_POS_H_WIDTH))) and + ((unsigned(Y) + 16) >= unsigned(FAM_REG_POS_V)) and + ((unsigned(Y) + 16) < (unsigned(FAM_REG_POS_V) + to_unsigned(PPU_SPRITE_HEIGHT, PPU_POS_V_WIDTH))) else '0'; -- (sprite local) pixel coords TILE_PIDX_X <= resize(unsigned(X) + 16 - resize(unsigned(FAM_REG_POS_H), TILE_PIDX_X'length), TILE_PIDX_X'length); @@ -127,66 +128,118 @@ begin YO => TRANS_TILE_PIDX_Y); -- pixel index - TRANS_TILE_PIXEL_IDX <= integer(PPU_SPRITE_WIDTH) * to_integer(TRANS_TILE_PIDX_Y) + to_integer(TRANS_TILE_PIDX_X); - -- palette color at pixel - TMM_DATA_PAL_IDX <= TMM_CACHE(TRANS_TILE_PIXEL_IDX * integer(PPU_PALETTE_COLOR_WIDTH) + integer(PPU_PALETTE_COLOR_WIDTH)-1 downto TRANS_TILE_PIXEL_IDX * integer(PPU_PALETTE_COLOR_WIDTH)); - -- if pixel in sprite hitbox and TMM_DATA_PAL_IDX > 0 - HIT <= SPRITE_ACTIVE and (nor TMM_DATA_PAL_IDX); - - -- FETCH LOGIC BELOW - TMM_ADDR <= T_TMM_ADDR when TMM_CACHE_UPDATE_TURN else (others => 'Z'); - T_TMM_DATA <= TMM_DATA; - - -- TTM cache - ttm_cache : component er_ram - generic map( - ADDR_W => PPU_TMM_ADDR_WIDTH, - DATA_W => PPU_TMM_DATA_WIDTH, - ADDR_LOW => 0, - ADDR_RANGE => PPU_SPRITE_WORD_COUNT) - port map( - CLK => CLK, - RST => RESET, - WEN => TMM_CACHE_WEN, - ADDR => TMM_CACHE_ADDR, - DATA => TMM_CACHE_DATA, - REG => TMM_CACHE); - - -- fetch machine, should do the following (offset data read by one clock -> propagation/lookup delay): - -- CLK[53 * IDX + 0] (addr = 0) - -- CLK[53 * IDX + 1] (addr = 1, read data[0]) - -- CLK[53 * IDX + 2] (addr = 2, read data[1]), etc - -- a full tile is 52 words, but since the offset is 1 clock, a total copy takes 53 clock cycles - process(CLK, RESET, FETCH) - constant TMM_FETCH_CLK_RANGE_BEGIN : natural := PPU_TMM_CACHE_FETCH_C_COUNT * IDX; -- fetch CLK count for copying this module's sprite from TMM - variable TMM_FETCH_CTR : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); -- CLK counter while FETCH=1 - variable TMM_FETCH_CTR_REL : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); -- CLK counter relative for sprite[IDX] + TRANS_TILE_PIDX <= integer(PPU_SPRITE_WIDTH) * to_integer(TRANS_TILE_PIDX_Y) + to_integer(TRANS_TILE_PIDX_X); + TILEMAP_WORD <= resize(unsigned(FAM_REG_TILE_IDX) * PPU_SPRITE_WORD_COUNT, TILEMAP_WORD'length); -- TMM sprite starting word + TILEMAP_WORD_OFFSET <= TRANS_TILE_PIDX / PPU_PIXELS_PER_TILE_WORD; -- word offset from starting word of sprite + PIXEL_BIT_OFFSET <= TRANS_TILE_PIDX mod PPU_PIXELS_PER_TILE_WORD; -- pixel bit offset + + inaccurate_occlusion_shims: if IDX >= PPU_ACCURATE_FG_SPRITE_COUNT generate + -- state machine for synchronizing pipeline stages + type states is (PL_TMM_ADDR, PL_TMM_DATA); + signal state : states := PL_TMM_ADDR; begin - if RESET = '1' or FETCH = '0' then - TMM_FETCH_CTR := (others => '0'); - TMM_FETCH_CTR_REL := (others => '0'); - TMM_CACHE_WEN <= '0'; - TMM_CACHE_UPDATE_TURN <= '0'; - elsif rising_edge(CLK) then - TMM_FETCH_CTR := TMM_FETCH_CTR + 1; - TMM_FETCH_CTR_REL := TMM_FETCH_CTR - TMM_FETCH_CLK_RANGE_BEGIN; - - if TMM_FETCH_CTR >= TMM_FETCH_CLK_RANGE_BEGIN and - TMM_FETCH_CTR < (TMM_FETCH_CLK_RANGE_BEGIN + PPU_TMM_CACHE_FETCH_C_COUNT) then - TMM_CACHE_UPDATE_TURN <= '1'; - if TMM_FETCH_CTR_REL < PPU_TMM_CACHE_FETCH_C_COUNT - 1 then -- calculate address until second to last clock - T_TMM_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR - IDX, T_TMM_ADDR'length)); - TMM_CACHE_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR_REL - 1, TMM_CACHE_ADDR'length)); + HIT <= SPRITE_ACTIVE; + -- only fetch if OE is high, and during the second pipeline stage + TMM_ADDR <= R_TMM_ADDR when OE = '1' and state = PL_TMM_ADDR else (others => 'Z'); + T_TMM_ADDR <= std_logic_vector(TILEMAP_WORD + to_unsigned(TILEMAP_WORD_OFFSET, PPU_TMM_ADDR_WIDTH)); -- TMM address + + -- TMM DATA + with PIXEL_BIT_OFFSET select + TMM_DATA_PAL_IDX <= R_TMM_DATA(2 downto 0) when 0, + R_TMM_DATA(5 downto 3) when 1, + R_TMM_DATA(8 downto 6) when 2, + R_TMM_DATA(11 downto 9) when 3, + R_TMM_DATA(14 downto 12) when 4, + (others => '0') when others; + + process(PL_CLK, RESET, PL_RESET) + begin + if RESET = '1' or PL_RESET = '1' then + -- reset state + state <= PL_TMM_ADDR; + if RESET = '1' then + -- reset internal pipeline registers + R_TMM_ADDR <= (others => '0'); + R_TMM_DATA <= (others => '0'); end if; - - if TMM_FETCH_CTR_REL > 0 then -- read offset - TMM_CACHE_DATA <= T_TMM_DATA; - TMM_CACHE_WEN <= '1'; - end if; - else + elsif rising_edge(CLK) then + case state is + when PL_TMM_ADDR => + state <= PL_TMM_DATA; + R_TMM_ADDR <= T_TMM_ADDR; + when PL_TMM_DATA => + state <= PL_TMM_ADDR; + R_TMM_DATA <= T_TMM_DATA; + end case; + end if; + end process; + end generate; + + accurate_occlusion_logic: if IDX < PPU_ACCURATE_FG_SPRITE_COUNT generate + -- TMM cache lines + signal TMM_CACHE_WEN, TMM_CACHE_UPDATE_TURN : std_logic := '0'; + signal TMM_CACHE_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0'); + signal TMM_CACHE_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0'); + signal TMM_CACHE : std_logic_vector((PPU_SPRITE_WORD_COUNT * PPU_TMM_DATA_WIDTH)-1 downto 0); + begin + HIT <= SPRITE_ACTIVE and (nor TMM_DATA_PAL_IDX); + + -- palette color at pixel + TMM_DATA_PAL_IDX <= TMM_CACHE(TRANS_TILE_PIDX * integer(PPU_PALETTE_COLOR_WIDTH) + integer(PPU_PALETTE_COLOR_WIDTH)-1 downto TRANS_TILE_PIDX * integer(PPU_PALETTE_COLOR_WIDTH)); + + TMM_ADDR <= T_TMM_ADDR when TMM_CACHE_UPDATE_TURN else (others => 'Z'); + + -- TTM cache + ttm_cache : component er_ram + generic map( + ADDR_W => PPU_TMM_ADDR_WIDTH, + DATA_W => PPU_TMM_DATA_WIDTH, + ADDR_LOW => 0, + ADDR_RANGE => PPU_SPRITE_WORD_COUNT) + port map( + CLK => CLK, + RST => RESET, + WEN => TMM_CACHE_WEN, + ADDR => TMM_CACHE_ADDR, + DATA => TMM_CACHE_DATA, + REG => TMM_CACHE); + + -- fetch machine, should do the following (offset data read by one clock -> propagation/lookup delay): + -- CLK[53 * IDX + 0] (addr = 0) + -- CLK[53 * IDX + 1] (addr = 1, read data[0]) + -- CLK[53 * IDX + 2] (addr = 2, read data[1]), etc + -- a full tile is 52 words, but since the offset is 1 clock, a total copy takes 53 clock cycles + process(CLK, RESET, FETCH) + constant TMM_FETCH_CLK_RANGE_BEGIN : natural := PPU_TMM_CACHE_FETCH_C_COUNT * IDX; -- fetch CLK count for copying this module's sprite from TMM + variable TMM_FETCH_CTR : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); -- CLK counter while FETCH=1 + variable TMM_FETCH_CTR_REL : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); -- CLK counter relative for sprite[IDX] + begin + if RESET = '1' or FETCH = '0' then + TMM_FETCH_CTR := (others => '0'); + TMM_FETCH_CTR_REL := (others => '0'); TMM_CACHE_WEN <= '0'; TMM_CACHE_UPDATE_TURN <= '0'; + elsif rising_edge(CLK) then + TMM_FETCH_CTR := TMM_FETCH_CTR + 1; + TMM_FETCH_CTR_REL := TMM_FETCH_CTR - TMM_FETCH_CLK_RANGE_BEGIN; + + if TMM_FETCH_CTR >= TMM_FETCH_CLK_RANGE_BEGIN and + TMM_FETCH_CTR < (TMM_FETCH_CLK_RANGE_BEGIN + PPU_TMM_CACHE_FETCH_C_COUNT) then + TMM_CACHE_UPDATE_TURN <= '1'; + if TMM_FETCH_CTR_REL < PPU_TMM_CACHE_FETCH_C_COUNT - 1 then -- calculate address until second to last clock + T_TMM_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR - IDX, T_TMM_ADDR'length)); -- -IDX to correct for each fetch cycle taking 1 extra clock cycle + TMM_CACHE_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR_REL - 1, TMM_CACHE_ADDR'length)); + end if; + + if TMM_FETCH_CTR_REL > 0 then -- read offset + TMM_CACHE_DATA <= T_TMM_DATA; + TMM_CACHE_WEN <= '1'; + end if; + else + TMM_CACHE_WEN <= '0'; + TMM_CACHE_UPDATE_TURN <= '0'; + end if; end if; - end if; - end process; + end process; + end generate; end Behavioral; |