aboutsummaryrefslogtreecommitdiff
path: root/basys3/basys3.srcs/ppu_sprite_fg.vhd
diff options
context:
space:
mode:
Diffstat (limited to 'basys3/basys3.srcs/ppu_sprite_fg.vhd')
-rw-r--r--basys3/basys3.srcs/ppu_sprite_fg.vhd201
1 files changed, 127 insertions, 74 deletions
diff --git a/basys3/basys3.srcs/ppu_sprite_fg.vhd b/basys3/basys3.srcs/ppu_sprite_fg.vhd
index af7cfa3..3b4d2c6 100644
--- a/basys3/basys3.srcs/ppu_sprite_fg.vhd
+++ b/basys3/basys3.srcs/ppu_sprite_fg.vhd
@@ -14,6 +14,8 @@ entity ppu_sprite_fg is -- foreground sprite
-- inputs
CLK : in std_logic; -- system clock
RESET : in std_logic; -- reset internal memory and clock counters
+ PL_CLK : in std_logic; -- pipeline clock
+ PL_RESET : in std_logic; -- reset pipeline clock counters
OE : in std_logic; -- output enable (of CIDX)
X : in std_logic_vector(PPU_POS_H_WIDTH-1 downto 0); -- current screen pixel x
Y : in std_logic_vector(PPU_POS_V_WIDTH-1 downto 0); -- current screen pixel y
@@ -57,9 +59,9 @@ architecture Behavioral of ppu_sprite_fg is
REG : out std_logic_vector((ADDR_RANGE*DATA_W)-1 downto 0)); -- exposed register output
end component;
- -- FAM and TMM in/out lines
- signal T_TMM_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0');
- signal T_TMM_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0');
+ -- TMM in/out temp + registers
+ signal T_TMM_ADDR, R_TMM_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0');
+ signal T_TMM_DATA, R_TMM_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0');
-- auxiliary signals (temp variables)
signal T_CIDX : std_logic_vector(PPU_PALETTE_CIDX_WIDTH-1 downto 0) := (others => '0'); -- output color buffer/register
@@ -75,23 +77,15 @@ architecture Behavioral of ppu_sprite_fg is
signal SPRITE_ACTIVE : std_logic := '0'; -- is pixel in bounding box of sprite
signal PIXEL_ABS_X, PIXEL_ABS_Y : integer := 0; -- absolute pixel position (relative to FG canvas instead of viewport)
+ signal PIXEL_BIT_OFFSET : integer := 0; -- pixel index within word of TMM
signal TILE_PIDX_X, TRANS_TILE_PIDX_X : unsigned(PPU_SPRITE_POS_H_WIDTH-1 downto 0) := (others => '0'); -- xy position of pixel within tile (local tile coords)
signal TILE_PIDX_Y, TRANS_TILE_PIDX_Y : unsigned(PPU_SPRITE_POS_V_WIDTH-1 downto 0) := (others => '0'); -- xy position of pixel within tile (local tile coords)
- signal TRANS_TILE_PIXEL_IDX : integer := 0; -- index of pixel within tile (reading order)
+ signal TRANS_TILE_PIDX : integer := 0; -- index of pixel within tile (reading order)
+ signal TILEMAP_WORD : unsigned(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0');
signal TILEMAP_WORD_OFFSET : integer := 0; -- word offset from tile start address in TMM
signal TMM_DATA_PAL_IDX : std_logic_vector(PPU_PALETTE_COLOR_WIDTH-1 downto 0); -- color of palette
- -- TMM cache lines
- signal TMM_CACHE_WEN, TMM_CACHE_UPDATE_TURN : std_logic := '0';
- signal TMM_CACHE_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0');
- signal TMM_CACHE_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0');
- signal TMM_CACHE : std_logic_vector((PPU_SPRITE_WORD_COUNT * PPU_TMM_DATA_WIDTH)-1 downto 0);
begin
- -- output drivers
- CIDX <= T_CIDX when OE = '1' else (others => 'Z');
- -- CIDX combination
- T_CIDX <= FAM_REG_COL_IDX & TMM_DATA_PAL_IDX;
-
-- FAM memory
FAM : component er_ram
generic map(
@@ -107,11 +101,18 @@ begin
DATA => FAM_DATA,
REG => INT_FAM);
+ -- CIDX combination
+ T_CIDX <= FAM_REG_COL_IDX & TMM_DATA_PAL_IDX;
+ -- output drivers
+ CIDX <= T_CIDX when OE = '1' else (others => 'Z');
+ -- TMM memory
+ T_TMM_DATA <= TMM_DATA;
+
-- pixel position within bounding box of sprite
SPRITE_ACTIVE <= '1' when ((unsigned(X) + 16) >= unsigned(FAM_REG_POS_H)) and
- ((unsigned(X) + 16) < (unsigned(FAM_REG_POS_H) + to_unsigned(PPU_SPRITE_WIDTH, PPU_POS_H_WIDTH))) and
- ((unsigned(Y) + 16) >= unsigned(FAM_REG_POS_V)) and
- ((unsigned(Y) + 16) < (unsigned(FAM_REG_POS_V) + to_unsigned(PPU_SPRITE_HEIGHT, PPU_POS_V_WIDTH))) else '0';
+ ((unsigned(X) + 16) < (unsigned(FAM_REG_POS_H) + to_unsigned(PPU_SPRITE_WIDTH, PPU_POS_H_WIDTH))) and
+ ((unsigned(Y) + 16) >= unsigned(FAM_REG_POS_V)) and
+ ((unsigned(Y) + 16) < (unsigned(FAM_REG_POS_V) + to_unsigned(PPU_SPRITE_HEIGHT, PPU_POS_V_WIDTH))) else '0';
-- (sprite local) pixel coords
TILE_PIDX_X <= resize(unsigned(X) + 16 - resize(unsigned(FAM_REG_POS_H), TILE_PIDX_X'length), TILE_PIDX_X'length);
@@ -127,66 +128,118 @@ begin
YO => TRANS_TILE_PIDX_Y);
-- pixel index
- TRANS_TILE_PIXEL_IDX <= integer(PPU_SPRITE_WIDTH) * to_integer(TRANS_TILE_PIDX_Y) + to_integer(TRANS_TILE_PIDX_X);
- -- palette color at pixel
- TMM_DATA_PAL_IDX <= TMM_CACHE(TRANS_TILE_PIXEL_IDX * integer(PPU_PALETTE_COLOR_WIDTH) + integer(PPU_PALETTE_COLOR_WIDTH)-1 downto TRANS_TILE_PIXEL_IDX * integer(PPU_PALETTE_COLOR_WIDTH));
- -- if pixel in sprite hitbox and TMM_DATA_PAL_IDX > 0
- HIT <= SPRITE_ACTIVE and (nor TMM_DATA_PAL_IDX);
-
- -- FETCH LOGIC BELOW
- TMM_ADDR <= T_TMM_ADDR when TMM_CACHE_UPDATE_TURN else (others => 'Z');
- T_TMM_DATA <= TMM_DATA;
-
- -- TTM cache
- ttm_cache : component er_ram
- generic map(
- ADDR_W => PPU_TMM_ADDR_WIDTH,
- DATA_W => PPU_TMM_DATA_WIDTH,
- ADDR_LOW => 0,
- ADDR_RANGE => PPU_SPRITE_WORD_COUNT)
- port map(
- CLK => CLK,
- RST => RESET,
- WEN => TMM_CACHE_WEN,
- ADDR => TMM_CACHE_ADDR,
- DATA => TMM_CACHE_DATA,
- REG => TMM_CACHE);
-
- -- fetch machine, should do the following (offset data read by one clock -> propagation/lookup delay):
- -- CLK[53 * IDX + 0] (addr = 0)
- -- CLK[53 * IDX + 1] (addr = 1, read data[0])
- -- CLK[53 * IDX + 2] (addr = 2, read data[1]), etc
- -- a full tile is 52 words, but since the offset is 1 clock, a total copy takes 53 clock cycles
- process(CLK, RESET, FETCH)
- constant TMM_FETCH_CLK_RANGE_BEGIN : natural := PPU_TMM_CACHE_FETCH_C_COUNT * IDX; -- fetch CLK count for copying this module's sprite from TMM
- variable TMM_FETCH_CTR : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); -- CLK counter while FETCH=1
- variable TMM_FETCH_CTR_REL : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); -- CLK counter relative for sprite[IDX]
+ TRANS_TILE_PIDX <= integer(PPU_SPRITE_WIDTH) * to_integer(TRANS_TILE_PIDX_Y) + to_integer(TRANS_TILE_PIDX_X);
+ TILEMAP_WORD <= resize(unsigned(FAM_REG_TILE_IDX) * PPU_SPRITE_WORD_COUNT, TILEMAP_WORD'length); -- TMM sprite starting word
+ TILEMAP_WORD_OFFSET <= TRANS_TILE_PIDX / PPU_PIXELS_PER_TILE_WORD; -- word offset from starting word of sprite
+ PIXEL_BIT_OFFSET <= TRANS_TILE_PIDX mod PPU_PIXELS_PER_TILE_WORD; -- pixel bit offset
+
+ inaccurate_occlusion_shims: if IDX >= PPU_ACCURATE_FG_SPRITE_COUNT generate
+ -- state machine for synchronizing pipeline stages
+ type states is (PL_TMM_ADDR, PL_TMM_DATA);
+ signal state : states := PL_TMM_ADDR;
begin
- if RESET = '1' or FETCH = '0' then
- TMM_FETCH_CTR := (others => '0');
- TMM_FETCH_CTR_REL := (others => '0');
- TMM_CACHE_WEN <= '0';
- TMM_CACHE_UPDATE_TURN <= '0';
- elsif rising_edge(CLK) then
- TMM_FETCH_CTR := TMM_FETCH_CTR + 1;
- TMM_FETCH_CTR_REL := TMM_FETCH_CTR - TMM_FETCH_CLK_RANGE_BEGIN;
-
- if TMM_FETCH_CTR >= TMM_FETCH_CLK_RANGE_BEGIN and
- TMM_FETCH_CTR < (TMM_FETCH_CLK_RANGE_BEGIN + PPU_TMM_CACHE_FETCH_C_COUNT) then
- TMM_CACHE_UPDATE_TURN <= '1';
- if TMM_FETCH_CTR_REL < PPU_TMM_CACHE_FETCH_C_COUNT - 1 then -- calculate address until second to last clock
- T_TMM_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR - IDX, T_TMM_ADDR'length));
- TMM_CACHE_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR_REL - 1, TMM_CACHE_ADDR'length));
+ HIT <= SPRITE_ACTIVE;
+ -- only fetch if OE is high, and during the second pipeline stage
+ TMM_ADDR <= R_TMM_ADDR when OE = '1' and state = PL_TMM_ADDR else (others => 'Z');
+ T_TMM_ADDR <= std_logic_vector(TILEMAP_WORD + to_unsigned(TILEMAP_WORD_OFFSET, PPU_TMM_ADDR_WIDTH)); -- TMM address
+
+ -- TMM DATA
+ with PIXEL_BIT_OFFSET select
+ TMM_DATA_PAL_IDX <= R_TMM_DATA(2 downto 0) when 0,
+ R_TMM_DATA(5 downto 3) when 1,
+ R_TMM_DATA(8 downto 6) when 2,
+ R_TMM_DATA(11 downto 9) when 3,
+ R_TMM_DATA(14 downto 12) when 4,
+ (others => '0') when others;
+
+ process(PL_CLK, RESET, PL_RESET)
+ begin
+ if RESET = '1' or PL_RESET = '1' then
+ -- reset state
+ state <= PL_TMM_ADDR;
+ if RESET = '1' then
+ -- reset internal pipeline registers
+ R_TMM_ADDR <= (others => '0');
+ R_TMM_DATA <= (others => '0');
end if;
-
- if TMM_FETCH_CTR_REL > 0 then -- read offset
- TMM_CACHE_DATA <= T_TMM_DATA;
- TMM_CACHE_WEN <= '1';
- end if;
- else
+ elsif rising_edge(CLK) then
+ case state is
+ when PL_TMM_ADDR =>
+ state <= PL_TMM_DATA;
+ R_TMM_ADDR <= T_TMM_ADDR;
+ when PL_TMM_DATA =>
+ state <= PL_TMM_ADDR;
+ R_TMM_DATA <= T_TMM_DATA;
+ end case;
+ end if;
+ end process;
+ end generate;
+
+ accurate_occlusion_logic: if IDX < PPU_ACCURATE_FG_SPRITE_COUNT generate
+ -- TMM cache lines
+ signal TMM_CACHE_WEN, TMM_CACHE_UPDATE_TURN : std_logic := '0';
+ signal TMM_CACHE_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0');
+ signal TMM_CACHE_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0');
+ signal TMM_CACHE : std_logic_vector((PPU_SPRITE_WORD_COUNT * PPU_TMM_DATA_WIDTH)-1 downto 0);
+ begin
+ HIT <= SPRITE_ACTIVE and (nor TMM_DATA_PAL_IDX);
+
+ -- palette color at pixel
+ TMM_DATA_PAL_IDX <= TMM_CACHE(TRANS_TILE_PIDX * integer(PPU_PALETTE_COLOR_WIDTH) + integer(PPU_PALETTE_COLOR_WIDTH)-1 downto TRANS_TILE_PIDX * integer(PPU_PALETTE_COLOR_WIDTH));
+
+ TMM_ADDR <= T_TMM_ADDR when TMM_CACHE_UPDATE_TURN else (others => 'Z');
+
+ -- TTM cache
+ ttm_cache : component er_ram
+ generic map(
+ ADDR_W => PPU_TMM_ADDR_WIDTH,
+ DATA_W => PPU_TMM_DATA_WIDTH,
+ ADDR_LOW => 0,
+ ADDR_RANGE => PPU_SPRITE_WORD_COUNT)
+ port map(
+ CLK => CLK,
+ RST => RESET,
+ WEN => TMM_CACHE_WEN,
+ ADDR => TMM_CACHE_ADDR,
+ DATA => TMM_CACHE_DATA,
+ REG => TMM_CACHE);
+
+ -- fetch machine, should do the following (offset data read by one clock -> propagation/lookup delay):
+ -- CLK[53 * IDX + 0] (addr = 0)
+ -- CLK[53 * IDX + 1] (addr = 1, read data[0])
+ -- CLK[53 * IDX + 2] (addr = 2, read data[1]), etc
+ -- a full tile is 52 words, but since the offset is 1 clock, a total copy takes 53 clock cycles
+ process(CLK, RESET, FETCH)
+ constant TMM_FETCH_CLK_RANGE_BEGIN : natural := PPU_TMM_CACHE_FETCH_C_COUNT * IDX; -- fetch CLK count for copying this module's sprite from TMM
+ variable TMM_FETCH_CTR : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); -- CLK counter while FETCH=1
+ variable TMM_FETCH_CTR_REL : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); -- CLK counter relative for sprite[IDX]
+ begin
+ if RESET = '1' or FETCH = '0' then
+ TMM_FETCH_CTR := (others => '0');
+ TMM_FETCH_CTR_REL := (others => '0');
TMM_CACHE_WEN <= '0';
TMM_CACHE_UPDATE_TURN <= '0';
+ elsif rising_edge(CLK) then
+ TMM_FETCH_CTR := TMM_FETCH_CTR + 1;
+ TMM_FETCH_CTR_REL := TMM_FETCH_CTR - TMM_FETCH_CLK_RANGE_BEGIN;
+
+ if TMM_FETCH_CTR >= TMM_FETCH_CLK_RANGE_BEGIN and
+ TMM_FETCH_CTR < (TMM_FETCH_CLK_RANGE_BEGIN + PPU_TMM_CACHE_FETCH_C_COUNT) then
+ TMM_CACHE_UPDATE_TURN <= '1';
+ if TMM_FETCH_CTR_REL < PPU_TMM_CACHE_FETCH_C_COUNT - 1 then -- calculate address until second to last clock
+ T_TMM_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR - IDX, T_TMM_ADDR'length)); -- -IDX to correct for each fetch cycle taking 1 extra clock cycle
+ TMM_CACHE_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR_REL - 1, TMM_CACHE_ADDR'length));
+ end if;
+
+ if TMM_FETCH_CTR_REL > 0 then -- read offset
+ TMM_CACHE_DATA <= T_TMM_DATA;
+ TMM_CACHE_WEN <= '1';
+ end if;
+ else
+ TMM_CACHE_WEN <= '0';
+ TMM_CACHE_UPDATE_TURN <= '0';
+ end if;
end if;
- end if;
- end process;
+ end process;
+ end generate;
end Behavioral;