From c09e9faf7e325184e435539d3e06d8340c67acd9 Mon Sep 17 00:00:00 2001 From: lonkaars Date: Wed, 22 Feb 2023 16:13:12 +0100 Subject: WIP ppu fg sprite fetch logic --- basys3/basys3.srcs/ppu.vhd | 25 ++++++-- basys3/basys3.srcs/ppu_consts.vhd | 23 ++++++- basys3/basys3.srcs/ppu_sprite_bg.vhd | 2 +- basys3/basys3.srcs/ppu_sprite_fg.vhd | 120 ++++++++++++++++++++++------------- basys3/basys3.xpr | 16 +++-- 5 files changed, 126 insertions(+), 60 deletions(-) (limited to 'basys3') diff --git a/basys3/basys3.srcs/ppu.vhd b/basys3/basys3.srcs/ppu.vhd index 22ee210..61c22aa 100644 --- a/basys3/basys3.srcs/ppu.vhd +++ b/basys3/basys3.srcs/ppu.vhd @@ -110,6 +110,7 @@ architecture Behavioral of ppu is X : in std_logic_vector(PPU_POS_H_WIDTH-1 downto 0); -- current screen pixel x Y : in std_logic_vector(PPU_POS_V_WIDTH-1 downto 0); -- current screen pixel y FETCH : in std_logic; -- fetch sprite data from TMM (TODO : generic map, set foreground sprite component index) + VBLANK : in std_logic; -- fetch during vblank -- internal memory block (FAM) FAM_WEN : in std_logic; -- VRAM FAM write enable @@ -188,6 +189,8 @@ architecture Behavioral of ppu is signal BG_SHIFT_X : std_logic_vector(PPU_POS_H_WIDTH-1 downto 0); signal BG_SHIFT_Y : std_logic_vector(PPU_POS_V_WIDTH-1 downto 0); signal FG_FETCH : std_logic; + signal TINY_VBLANK, TINY_VSYNC, TINY_HBLANK, TINY_HSYNC, + NATIVE_VSYNC, NATIVE_HSYNC : std_logic; begin SYSCLK <= CLK100; SYSRST <= RESET; @@ -201,6 +204,13 @@ begin FAM_AI <= (others => '0'); PAL_AI <= (others => '0'); + TVBLANK <= TINY_VBLANK; + TVSYNC <= TINY_VSYNC; + THBLANK <= TINY_HBLANK; + THSYNC <= TINY_HSYNC; + NVSYNC <= NATIVE_VSYNC; + NHSYNC <= NATIVE_HSYNC; + pipeline_clock_edge_generator : component ppu_pceg port map( CLK => SYSCLK, RESET => SYSRST, @@ -273,12 +283,13 @@ begin foreground_sprite : component ppu_sprite_fg generic map( IDX => FG_IDX ) port map( - CLK => PL_SPRITE, + CLK => SYSCLK, RESET => SYSRST, OE => FG_EN(FG_IDX), X => X, Y => Y, FETCH => FG_FETCH, + VBLANK => TINY_VBLANK, FAM_WEN => FAM_WEN, FAM_ADDR => FAM_AO, FAM_DATA => DATA(PPU_FAM_DATA_WIDTH-1 downto 0), @@ -323,10 +334,10 @@ begin RESET => SYSRST, X => X, Y => Y, - VSYNC => TVSYNC, - VBLANK => TVBLANK, - HSYNC => THSYNC, - HBLANK => THBLANK); + VSYNC => TINY_VSYNC, + VBLANK => TINY_VBLANK, + HSYNC => TINY_HSYNC, + HBLANK => TINY_HBLANK); native_vga_signal_generator : component ppu_vga_native port map( -- native vga signal generator (upscaler) CLK => SYSCLK, @@ -340,6 +351,6 @@ begin RO => R, GO => G, BO => B, - VSYNC => NVSYNC, - HSYNC => NHSYNC); + VSYNC => NATIVE_VSYNC, + HSYNC => NATIVE_HSYNC); end Behavioral; diff --git a/basys3/basys3.srcs/ppu_consts.vhd b/basys3/basys3.srcs/ppu_consts.vhd index 722954d..75b6168 100644 --- a/basys3/basys3.srcs/ppu_consts.vhd +++ b/basys3/basys3.srcs/ppu_consts.vhd @@ -1,12 +1,18 @@ +library ieee; +use ieee.math_real.all; + -- https://docs.google.com/spreadsheets/d/1MU6K4c4PtMR_JXIpc3I0ZJdLZNnoFO7G2P3olCz6LSc package ppu_consts is + -- utility functions + function ceil_log2(n : natural) return natural; + constant PPU_RAM_BUS_ADDR_WIDTH : natural := 16; -- RAM bus address width constant PPU_RAM_BUS_DATA_WIDTH : natural := 16; -- RAM bus data width constant PPU_FG_SPRITE_COUNT : natural := 128; -- foreground sprites constant PPU_COLOR_OUTPUT_DEPTH : natural := 4; -- VGA output channel depth constant PPU_PALETTE_COLOR_WIDTH : natural := 3; -- palette index width (within sprite) constant PPU_PALETTE_INDEX_WIDTH : natural := 3; -- palette index width (palette table) - constant PPU_PALETTE_CIDX_WIDTH : natural := PPU_PALETTE_COLOR_WIDTH + PPU_PALETTE_INDEX_WIDTH; -- global palette index width + constant PPU_PALETTE_CIDX_WIDTH : natural := (PPU_PALETTE_COLOR_WIDTH + PPU_PALETTE_INDEX_WIDTH); -- global palette index width constant PPU_TMM_ADDR_WIDTH : natural := 16; -- tilemap memory ram bus address width constant PPU_TMM_DATA_WIDTH : natural := 15; -- tilemap memory ram bus data width constant PPU_BAM_ADDR_WIDTH : natural := 11; -- background attribute memory ram bus address width @@ -32,7 +38,18 @@ package ppu_consts is constant PPU_BG_CANVAS_TILE_V_WIDTH : natural := 5; -- bits needed to describe vertical bg tile index (grid coordinates) constant PPU_TILE_INDEX_WIDTH : natural := 10; -- bits needed to index a tile from TMM memory constant PPU_PIXELS_PER_TILE_WORD : natural := 5; -- pixels defined in one word in TMM memory - constant PPU_SPRITE_PIXELS_PER_WORD : natural := 52; -- words needed for a single sprite + constant PPU_SPRITE_WORD_COUNT : natural := 52; -- words needed for a single sprite constant PPU_PIXEL_BIT_WIDTH : natural := 3; -- bits needed to identify pixel in TMM word + constant PPU_TILE_BIT_WIDTH : natural := (PPU_SPRITE_WIDTH * PPU_SPRITE_HEIGHT * PPU_PALETTE_COLOR_WIDTH); -- bits in single tile + constant PPU_TMM_CACHE_FETCH_C_COUNT : natural := PPU_SPRITE_WORD_COUNT + 1; + constant PPU_TMM_CACHE_FETCH_A_COUNT : natural := PPU_TMM_CACHE_FETCH_C_COUNT * PPU_FG_SPRITE_COUNT; -- amount of clocks to fetch new TMM cache + constant PPU_TMM_CACHE_FETCH_A_WIDTH : natural := ceil_log2(PPU_TMM_CACHE_FETCH_A_COUNT); end package ppu_consts; - +package body ppu_consts is + -- https://stackoverflow.com/questions/21783280/number-of-bits-to-represent-an-integer-in-vhdl + -- Returns number of bits required to represent val in binary vector + function ceil_log2(n : natural) return natural is + begin + return natural(integer(ceil(log2(real(n - 1))))); + end function; +end package body ppu_consts; diff --git a/basys3/basys3.srcs/ppu_sprite_bg.vhd b/basys3/basys3.srcs/ppu_sprite_bg.vhd index 243fd93..dba5b8e 100644 --- a/basys3/basys3.srcs/ppu_sprite_bg.vhd +++ b/basys3/basys3.srcs/ppu_sprite_bg.vhd @@ -96,7 +96,7 @@ begin TRANS_TILE_PIDX <= integer(PPU_SPRITE_WIDTH) * to_integer(TRANS_TILE_PIDX_Y) + to_integer(TRANS_TILE_PIDX_X); -- pixel index of sprite TILEMAP_WORD_OFFSET <= TRANS_TILE_PIDX / PPU_PIXELS_PER_TILE_WORD; -- word offset from starting word of sprite PIXEL_BIT_OFFSET <= TRANS_TILE_PIDX mod PPU_PIXELS_PER_TILE_WORD; -- pixel bit offset - T_TMM_ADDR <= std_logic_vector(to_unsigned(PPU_SPRITE_PIXELS_PER_WORD * to_integer(unsigned(BAM_DATA_TILE_IDX)) + TILEMAP_WORD_OFFSET, PPU_TMM_ADDR_WIDTH)); -- TMM address + T_TMM_ADDR <= std_logic_vector(to_unsigned(PPU_SPRITE_WORD_COUNT * to_integer(unsigned(BAM_DATA_TILE_IDX)) + TILEMAP_WORD_OFFSET, PPU_TMM_ADDR_WIDTH)); -- TMM address -- TMM DATA with PIXEL_BIT_OFFSET select diff --git a/basys3/basys3.srcs/ppu_sprite_fg.vhd b/basys3/basys3.srcs/ppu_sprite_fg.vhd index c3cb59a..7b39b1d 100644 --- a/basys3/basys3.srcs/ppu_sprite_fg.vhd +++ b/basys3/basys3.srcs/ppu_sprite_fg.vhd @@ -19,6 +19,7 @@ entity ppu_sprite_fg is -- foreground sprite X : in std_logic_vector(PPU_POS_H_WIDTH-1 downto 0); -- current screen pixel x Y : in std_logic_vector(PPU_POS_V_WIDTH-1 downto 0); -- current screen pixel y FETCH : in std_logic; -- fetch sprite data from TMM + VBLANK : in std_logic; -- fetch during vblank -- internal memory block (FAM) FAM_WEN : in std_logic; -- VRAM FAM write enable @@ -44,10 +45,10 @@ architecture Behavioral of ppu_sprite_fg is end component; component er_ram -- exposed register RAM generic( - ADDR_W : natural := PPU_FAM_ADDR_WIDTH; -- ADDR line width - DATA_W : natural := PPU_FAM_DATA_WIDTH; -- DATA line width - ADDR_LOW : natural := IDX*2; -- starting address - ADDR_RANGE : natural := 2); -- amount of valid addresses after ADDR_LOW + ADDR_W : natural := 2; -- ADDR line width + DATA_W : natural := 2; -- DATA line width + ADDR_LOW : natural := 16#0000#; -- starting address + ADDR_RANGE : natural := 16#0002#); -- amount of valid addresses after ADDR_LOW port( CLK : in std_logic; -- clock RST : in std_logic; -- async memory clear @@ -83,8 +84,13 @@ architecture Behavioral of ppu_sprite_fg is signal TILE_PIDX_Y, TRANS_TILE_PIDX_Y : unsigned(PPU_SPRITE_POS_V_WIDTH-1 downto 0) := (others => '0'); -- xy position of pixel within tile (local tile coords) signal TRANS_TILE_PIXEL_IDX : integer := 0; -- index of pixel within tile (reading order) signal TILEMAP_WORD_OFFSET : integer := 0; -- word offset from tile start address in TMM - signal PIXEL_BIT_OFFSET : integer := 0; -- pixel index within word of TMM signal TMM_DATA_PAL_IDX : std_logic_vector(PPU_PALETTE_COLOR_WIDTH-1 downto 0); -- color of palette + + -- TMM cache + signal TMM_CACHE_WEN : std_logic := '0'; + signal TMM_CACHE_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0'); + signal TMM_CACHE_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0'); + signal TMM_CACHE : std_logic_vector((PPU_SPRITE_WORD_COUNT * PPU_TMM_DATA_WIDTH)-1 downto 0); begin -- output drivers CIDX <= T_CIDX when OE = '1' else (others => 'Z'); @@ -94,25 +100,31 @@ begin T_CIDX <= FAM_REG_COL_IDX & TMM_DATA_PAL_IDX; -- FAM memory - FAM : component er_ram port map( - CLK => CLK, - RST => RESET, - WEN => FAM_WEN, - ADDR => FAM_ADDR, - DATA => FAM_DATA, - REG => INT_FAM); - - SPRITE_ACTIVE <= ((unsigned(X) + 16) >= unsigned(FAM_REG_POS_H)) and - ((unsigned(X) + 16) < (unsigned(FAM_REG_POS_H) + to_unsigned(PPU_SPRITE_WIDTH, PPU_POS_H_WIDTH))) and - ((unsigned(Y) + 16) >= unsigned(FAM_REG_POS_V)) and - ((unsigned(Y) + 16) < (unsigned(FAM_REG_POS_V) + to_unsigned(PPU_SPRITE_HEIGHT, PPU_POS_V_WIDTH))); - - HIT <= SPRITE_ACTIVE and (nor TMM_DATA_PAL_IDX); -- if pixel in sprite hitbox and TMM_DATA_PAL_IDX > 0 - - TILE_PIDX_X <= to_unsigned(unsigned(X) + 16 - to_unsigned(FAM_REG_POS_H, TILE_PIDX_X'length), TILE_PIDX_X'length); -- (sprite local) pixel coords - TILE_PIDX_Y <= to_unsigned(unsigned(Y) + 16 - to_unsigned(FAM_REG_POS_V, TILE_PIDX_Y'length), TILE_PIDX_Y'length); -- (sprite local) pixel coords - - -- FAM data dependant calculations + FAM : component er_ram + generic map( + ADDR_W => PPU_FAM_ADDR_WIDTH, + DATA_W => PPU_FAM_DATA_WIDTH, + ADDR_LOW => IDX*2, + ADDR_RANGE => 2) + port map( + CLK => CLK, + RST => RESET, + WEN => FAM_WEN, + ADDR => FAM_ADDR, + DATA => FAM_DATA, + REG => INT_FAM); + + -- pixel position within bounding box of sprite + SPRITE_ACTIVE <= '1' when ((unsigned(X) + 16) >= unsigned(FAM_REG_POS_H)) and + ((unsigned(X) + 16) < (unsigned(FAM_REG_POS_H) + to_unsigned(PPU_SPRITE_WIDTH, PPU_POS_H_WIDTH))) and + ((unsigned(Y) + 16) >= unsigned(FAM_REG_POS_V)) and + ((unsigned(Y) + 16) < (unsigned(FAM_REG_POS_V) + to_unsigned(PPU_SPRITE_HEIGHT, PPU_POS_V_WIDTH))) else '0'; + + -- (sprite local) pixel coords + TILE_PIDX_X <= resize(unsigned(X) + 16 - resize(unsigned(FAM_REG_POS_H), TILE_PIDX_X'length), TILE_PIDX_X'length); + TILE_PIDX_Y <= resize(unsigned(Y) + 16 - resize(unsigned(FAM_REG_POS_V), TILE_PIDX_Y'length), TILE_PIDX_Y'length); + + -- transform local coords transform: component ppu_sprite_transform port map( XI => TILE_PIDX_X, YI => TILE_PIDX_Y, @@ -121,23 +133,39 @@ begin XO => TRANS_TILE_PIDX_X, YO => TRANS_TILE_PIDX_Y); - -- TMM address calculations (sprite word start, word offset, and pixel offset) - TRANS_TILE_PIXEL_IDX <= integer(PPU_SPRITE_WIDTH) * to_integer(TRANS_TILE_PIDX_Y) + to_integer(TRANS_TILE_PIDX_X); -- pixel index of sprite + -- pixel index + TRANS_TILE_PIXEL_IDX <= integer(PPU_SPRITE_WIDTH) * to_integer(TRANS_TILE_PIDX_Y) + to_integer(TRANS_TILE_PIDX_X); + -- palette color at pixel + TMM_DATA_PAL_IDX <= TMM_CACHE(TRANS_TILE_PIXEL_IDX * integer(PPU_PALETTE_COLOR_WIDTH) + integer(PPU_PALETTE_COLOR_WIDTH)-1 downto TRANS_TILE_PIXEL_IDX * integer(PPU_PALETTE_COLOR_WIDTH)); + -- if pixel in sprite hitbox and TMM_DATA_PAL_IDX > 0 + HIT <= SPRITE_ACTIVE and (nor TMM_DATA_PAL_IDX); + + -- FETCH LOGIC BELOW + + -- TTM cache + ttm_cache : component er_ram + generic map( + ADDR_W => PPU_TMM_ADDR_WIDTH, + DATA_W => PPU_TMM_DATA_WIDTH, + ADDR_LOW => 0, + ADDR_RANGE => PPU_SPRITE_WORD_COUNT) + port map( + CLK => CLK, + RST => RESET, + WEN => TMM_CACHE_WEN, + ADDR => TMM_CACHE_ADDR, + DATA => TMM_CACHE_DATA, + REG => TMM_CACHE); + TILEMAP_WORD_OFFSET <= TRANS_TILE_PIXEL_IDX / PPU_PIXELS_PER_TILE_WORD; -- word offset from starting word of sprite - PIXEL_BIT_OFFSET <= TRANS_TILE_PIXEL_IDX mod PPU_PIXELS_PER_TILE_WORD; -- pixel bit offset - T_TMM_ADDR <= std_logic_vector(to_unsigned(PPU_SPRITE_PIXELS_PER_WORD * to_integer(unsigned(FAM_REG_TILE_IDX)) + TILEMAP_WORD_OFFSET, PPU_TMM_ADDR_WIDTH)); -- TMM address - - -- TMM DATA - with PIXEL_BIT_OFFSET select - TMM_DATA_PAL_IDX <= R_TMM_DATA(2 downto 0) when 0, - R_TMM_DATA(5 downto 3) when 1, - R_TMM_DATA(8 downto 6) when 2, - R_TMM_DATA(11 downto 9) when 3, - R_TMM_DATA(14 downto 12) when 4, - (others => '0') when others; + T_TMM_ADDR <= std_logic_vector(to_unsigned(PPU_SPRITE_WORD_COUNT * to_integer(unsigned(FAM_REG_TILE_IDX)) + TILEMAP_WORD_OFFSET, PPU_TMM_ADDR_WIDTH)); -- TMM address + -- state machine (pipeline stage counter) + sync r/w process(CLK, RESET) + constant TMM_FETCH_CLK_RANGE_BEGIN : natural := PPU_TMM_CACHE_FETCH_C_COUNT * IDX; + variable TMM_FETCH_CTR : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); + variable TMM_FETCH_CTR_REL : unsigned(PPU_TMM_CACHE_FETCH_A_WIDTH-1 downto 0) := (others => '0'); begin if RESET = '1' then -- reset state @@ -146,14 +174,16 @@ begin R_TMM_ADDR <= (others => '0'); R_TMM_DATA <= (others => '0'); elsif rising_edge(CLK) then - case state is - when PL_TMM_ADDR => - state <= PL_TMM_DATA; - R_TMM_ADDR <= T_TMM_ADDR; - when PL_TMM_DATA => - state <= PL_TMM_ADDR; - R_TMM_DATA <= T_TMM_DATA; - end case; + TMM_FETCH_CTR := (others => '0') when FETCH = '0' else TMM_FETCH_CTR + 1; + TMM_FETCH_CTR_REL := TMM_FETCH_CTR - TMM_FETCH_CLK_RANGE_BEGIN; + + if FETCH = '1' and TMM_FETCH_CTR >= TMM_FETCH_CLK_RANGE_BEGIN and TMM_FETCH_CTR < (TMM_FETCH_CLK_RANGE_BEGIN + PPU_TMM_CACHE_FETCH_C_COUNT) then + TMM_CACHE_WEN <= '1'; + R_TMM_DATA <= T_TMM_DATA; + T_TMM_ADDR <= R_TMM_ADDR; + else + TMM_CACHE_WEN <= '0'; + end if; end if; end process; end Behavioral; diff --git a/basys3/basys3.xpr b/basys3/basys3.xpr index d828223..813b3e2 100644 --- a/basys3/basys3.xpr +++ b/basys3/basys3.xpr @@ -61,7 +61,7 @@