aboutsummaryrefslogtreecommitdiff
path: root/basys3
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-03-04 14:09:08 +0100
committerlonkaars <loek@pipeframe.xyz>2023-03-04 14:09:08 +0100
commit7d316cce9af0e724c6f95fa997cd32a680fdede7 (patch)
tree260a9d790f7a5948388c12331789cd0713f15a7c /basys3
parentdf8902fba3a6e97ca3c5fdedb70999faac713815 (diff)
foreground sprite optimization (untested) done
Diffstat (limited to 'basys3')
-rw-r--r--basys3/basys3.srcs/ppu.vhd12
-rw-r--r--basys3/basys3.srcs/ppu_pceg.vhd30
-rw-r--r--basys3/basys3.srcs/ppu_pceg_tb.vhd25
-rw-r--r--basys3/basys3.srcs/ppu_sprite_fg.vhd73
-rw-r--r--basys3/basys3.xpr2
5 files changed, 100 insertions, 42 deletions
diff --git a/basys3/basys3.srcs/ppu.vhd b/basys3/basys3.srcs/ppu.vhd
index c6dfe60..9e869d5 100644
--- a/basys3/basys3.srcs/ppu.vhd
+++ b/basys3/basys3.srcs/ppu.vhd
@@ -20,7 +20,8 @@ architecture Behavioral of ppu is
component ppu_pceg port( -- pipeline clock edge generator
CLK : in std_logic; -- system clock
RESET : in std_logic; -- async reset
- SPRITE : out std_logic; -- sprite info fetch + sprite pixel fetch
+ SPRITE_BG : out std_logic; -- sprite info fetch + sprite pixel fetch
+ SPRITE_FG : out std_logic; -- sprite pixel fetch
DONE : out std_logic; -- last pipeline stage done
READY : out std_logic); -- rgb buffer propagation ready
end component;
@@ -105,6 +106,7 @@ architecture Behavioral of ppu is
-- inputs
CLK : in std_logic; -- system clock
RESET : in std_logic; -- reset internal memory and clock counters
+ PL_CLK : in std_logic; -- pipeline clock
PL_RESET : in std_logic; -- reset pipeline clock counters
OE : in std_logic; -- output enable (of CIDX)
X : in std_logic_vector(PPU_POS_H_WIDTH-1 downto 0); -- current screen pixel x
@@ -167,7 +169,7 @@ architecture Behavioral of ppu is
-- signals
signal SYSCLK, SYSRST : std_logic; -- system clock and reset
- signal PL_SPRITE, PL_DONE, PL_READY : std_logic; -- pipeline stages
+ signal PL_SPRITE_FG, PL_SPRITE_BG, PL_DONE, PL_READY : std_logic; -- pipeline stages
signal TMM_WEN, BAM_WEN, FAM_WEN, PAL_WEN, AUX_WEN : std_logic;
signal TMM_W_ADDR, TMM_R_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0); -- read/write TMM addr (dual port)
signal BAM_W_ADDR, BAM_R_ADDR : std_logic_vector(PPU_BAM_ADDR_WIDTH-1 downto 0); -- read/write BAM addr (dual port)
@@ -202,7 +204,8 @@ begin
pipeline_clock_edge_generator : component ppu_pceg port map(
CLK => SYSCLK,
RESET => SYSRST,
- SPRITE => PL_SPRITE,
+ SPRITE_FG => PL_SPRITE_FG,
+ SPRITE_BG => PL_SPRITE_BG,
DONE => PL_DONE,
READY => PL_READY);
@@ -254,7 +257,7 @@ begin
FG_FETCH => FG_FETCH);
background_sprite : component ppu_sprite_bg port map(
- CLK => PL_SPRITE,
+ CLK => PL_SPRITE_BG,
RESET => SYSRST,
PL_RESET => PL_READY,
OE => BG_EN,
@@ -274,6 +277,7 @@ begin
port map(
CLK => SYSCLK,
RESET => SYSRST,
+ PL_CLK => PL_SPRITE_FG,
PL_RESET => PL_READY,
OE => FG_EN(FG_IDX),
X => X,
diff --git a/basys3/basys3.srcs/ppu_pceg.vhd b/basys3/basys3.srcs/ppu_pceg.vhd
index 5d9f4d6..d53d86a 100644
--- a/basys3/basys3.srcs/ppu_pceg.vhd
+++ b/basys3/basys3.srcs/ppu_pceg.vhd
@@ -5,34 +5,36 @@ use work.ppu_consts.all;
entity ppu_pceg is port(
CLK : in std_logic; -- system clock
RESET : in std_logic; -- async reset
- SPRITE : out std_logic; -- sprite info fetch + sprite pixel fetch
+ SPRITE_BG : out std_logic; -- sprite info fetch + sprite pixel fetch
+ SPRITE_FG : out std_logic; -- sprite pixel fetch
DONE : out std_logic; -- last pipeline stage done
READY : out std_logic); -- rgb buffer propagation ready
end ppu_pceg;
architecture Behavioral of ppu_pceg is
- type states is (PL_SPRITE, PL_DONE, PL_READY);
- signal state : states := PL_SPRITE;
+ signal PL_SPRITE_BG, PL_SPRITE_FG, PL_DONE, PL_READY : boolean := false;
begin
-- output drivers
- SPRITE <= CLK when RESET = '0' and state = PL_SPRITE else '0';
- DONE <= CLK when RESET = '0' and state = PL_DONE else '0';
- READY <= '1' when RESET = '0' and state = PL_READY else '0';
+ SPRITE_BG <= CLK when RESET = '0' and PL_SPRITE_BG else '0';
+ SPRITE_FG <= CLK when RESET = '0' and PL_SPRITE_FG else '0';
+ DONE <= CLK when RESET = '0' and PL_DONE else '0';
+ READY <= '1' when RESET = '0' and PL_READY else '0';
process(CLK, RESET)
variable CLK_IDX : natural range 0 to PPU_PL_TOTAL_STAGES+1 := 0;
begin
if RESET = '1' then
- state <= PL_SPRITE;
+ CLK_IDX := 0;
+ PL_SPRITE_BG <= false;
+ PL_SPRITE_FG <= false;
+ PL_DONE <= false;
+ PL_READY <= false;
elsif rising_edge(CLK) then
-- clock counter ranges
- if CLK_IDX < 4 then
- state <= PL_SPRITE;
- elsif CLK_IDX < 5 then
- state <= PL_DONE;
- else
- state <= PL_READY;
- end if;
+ PL_SPRITE_BG <= true when CLK_IDX >= 0 and CLK_IDX <= 3 else false;
+ PL_SPRITE_FG <= true when CLK_IDX >= 1 and CLK_IDX <= 2 else false;
+ PL_DONE <= true when CLK_IDX = 4 else false;
+ PL_READY <= true when CLK_IDX >= 5 else false;
-- increment clock counter
CLK_IDX := CLK_IDX + 1;
diff --git a/basys3/basys3.srcs/ppu_pceg_tb.vhd b/basys3/basys3.srcs/ppu_pceg_tb.vhd
index 86061a0..1c2c855 100644
--- a/basys3/basys3.srcs/ppu_pceg_tb.vhd
+++ b/basys3/basys3.srcs/ppu_pceg_tb.vhd
@@ -12,13 +12,15 @@ architecture behavioral of ppu_pceg_tb is
component ppu_pceg port(
CLK : in std_logic; -- system clock
RESET : in std_logic; -- async reset
- SPRITE : out std_logic; -- sprite info fetch + sprite pixel fetch
+ SPRITE_BG : out std_logic; -- sprite info fetch + sprite pixel fetch
+ SPRITE_FG : out std_logic; -- sprite pixel fetch
DONE : out std_logic; -- last pipeline stage done
READY : out std_logic); -- rgb buffer propagation ready
end component;
signal CLK : std_logic := '0';
signal RESET : std_logic := '0';
- signal SPRITE : std_logic;
+ signal SPRITE_BG : std_logic;
+ signal SPRITE_FG : std_logic;
signal DONE : std_logic;
signal READY : std_logic;
@@ -26,17 +28,14 @@ begin
uut : ppu_pceg port map(
CLK => CLK,
RESET => RESET,
- SPRITE => SPRITE,
+ SPRITE_BG => SPRITE_BG,
+ SPRITE_FG => SPRITE_FG,
DONE => DONE,
READY => READY);
tb : process
begin
for i in 0 to 32 loop
- if i > 20 then
- RESET <= '1';
- end if;
-
wait for 5 ns;
CLK <= '1';
wait for 5 ns;
@@ -44,4 +43,16 @@ begin
end loop;
wait; -- stop for simulator
end process;
+
+ gert : process
+ begin
+ RESET <= '1';
+ wait for 1 ns;
+ RESET <= '0';
+ wait for 100 ns;
+ RESET <= '1';
+ wait for 5 ns;
+ RESET <= '0';
+ wait;
+ end process;
end;
diff --git a/basys3/basys3.srcs/ppu_sprite_fg.vhd b/basys3/basys3.srcs/ppu_sprite_fg.vhd
index dd315d8..3b4d2c6 100644
--- a/basys3/basys3.srcs/ppu_sprite_fg.vhd
+++ b/basys3/basys3.srcs/ppu_sprite_fg.vhd
@@ -14,6 +14,7 @@ entity ppu_sprite_fg is -- foreground sprite
-- inputs
CLK : in std_logic; -- system clock
RESET : in std_logic; -- reset internal memory and clock counters
+ PL_CLK : in std_logic; -- pipeline clock
PL_RESET : in std_logic; -- reset pipeline clock counters
OE : in std_logic; -- output enable (of CIDX)
X : in std_logic_vector(PPU_POS_H_WIDTH-1 downto 0); -- current screen pixel x
@@ -58,9 +59,9 @@ architecture Behavioral of ppu_sprite_fg is
REG : out std_logic_vector((ADDR_RANGE*DATA_W)-1 downto 0)); -- exposed register output
end component;
- -- FAM and TMM in/out lines
- signal T_TMM_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0');
- signal T_TMM_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0');
+ -- TMM in/out temp + registers
+ signal T_TMM_ADDR, R_TMM_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0');
+ signal T_TMM_DATA, R_TMM_DATA : std_logic_vector(PPU_TMM_DATA_WIDTH-1 downto 0) := (others => '0');
-- auxiliary signals (temp variables)
signal T_CIDX : std_logic_vector(PPU_PALETTE_CIDX_WIDTH-1 downto 0) := (others => '0'); -- output color buffer/register
@@ -76,11 +77,14 @@ architecture Behavioral of ppu_sprite_fg is
signal SPRITE_ACTIVE : std_logic := '0'; -- is pixel in bounding box of sprite
signal PIXEL_ABS_X, PIXEL_ABS_Y : integer := 0; -- absolute pixel position (relative to FG canvas instead of viewport)
+ signal PIXEL_BIT_OFFSET : integer := 0; -- pixel index within word of TMM
signal TILE_PIDX_X, TRANS_TILE_PIDX_X : unsigned(PPU_SPRITE_POS_H_WIDTH-1 downto 0) := (others => '0'); -- xy position of pixel within tile (local tile coords)
signal TILE_PIDX_Y, TRANS_TILE_PIDX_Y : unsigned(PPU_SPRITE_POS_V_WIDTH-1 downto 0) := (others => '0'); -- xy position of pixel within tile (local tile coords)
- signal TRANS_TILE_PIXEL_IDX : integer := 0; -- index of pixel within tile (reading order)
+ signal TRANS_TILE_PIDX : integer := 0; -- index of pixel within tile (reading order)
+ signal TILEMAP_WORD : unsigned(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0');
signal TILEMAP_WORD_OFFSET : integer := 0; -- word offset from tile start address in TMM
signal TMM_DATA_PAL_IDX : std_logic_vector(PPU_PALETTE_COLOR_WIDTH-1 downto 0); -- color of palette
+
begin
-- FAM memory
FAM : component er_ram
@@ -97,11 +101,11 @@ begin
DATA => FAM_DATA,
REG => INT_FAM);
- -- output drivers
- CIDX <= T_CIDX when OE = '1' else (others => 'Z');
-- CIDX combination
T_CIDX <= FAM_REG_COL_IDX & TMM_DATA_PAL_IDX;
-
+ -- output drivers
+ CIDX <= T_CIDX when OE = '1' else (others => 'Z');
+ -- TMM memory
T_TMM_DATA <= TMM_DATA;
-- pixel position within bounding box of sprite
@@ -124,16 +128,51 @@ begin
YO => TRANS_TILE_PIDX_Y);
-- pixel index
- TRANS_TILE_PIXEL_IDX <= integer(PPU_SPRITE_WIDTH) * to_integer(TRANS_TILE_PIDX_Y) + to_integer(TRANS_TILE_PIDX_X);
- -- if pixel in sprite hitbox and TMM_DATA_PAL_IDX > 0
- HIT <= SPRITE_ACTIVE and (nor TMM_DATA_PAL_IDX);
+ TRANS_TILE_PIDX <= integer(PPU_SPRITE_WIDTH) * to_integer(TRANS_TILE_PIDX_Y) + to_integer(TRANS_TILE_PIDX_X);
+ TILEMAP_WORD <= resize(unsigned(FAM_REG_TILE_IDX) * PPU_SPRITE_WORD_COUNT, TILEMAP_WORD'length); -- TMM sprite starting word
+ TILEMAP_WORD_OFFSET <= TRANS_TILE_PIDX / PPU_PIXELS_PER_TILE_WORD; -- word offset from starting word of sprite
+ PIXEL_BIT_OFFSET <= TRANS_TILE_PIDX mod PPU_PIXELS_PER_TILE_WORD; -- pixel bit offset
inaccurate_occlusion_shims: if IDX >= PPU_ACCURATE_FG_SPRITE_COUNT generate
+ -- state machine for synchronizing pipeline stages
+ type states is (PL_TMM_ADDR, PL_TMM_DATA);
+ signal state : states := PL_TMM_ADDR;
begin
- -- palette color at pixel
- TMM_DATA_PAL_IDX <= (others => '0');
-
- TMM_ADDR <= (others => 'Z');
+ HIT <= SPRITE_ACTIVE;
+ -- only fetch if OE is high, and during the second pipeline stage
+ TMM_ADDR <= R_TMM_ADDR when OE = '1' and state = PL_TMM_ADDR else (others => 'Z');
+ T_TMM_ADDR <= std_logic_vector(TILEMAP_WORD + to_unsigned(TILEMAP_WORD_OFFSET, PPU_TMM_ADDR_WIDTH)); -- TMM address
+
+ -- TMM DATA
+ with PIXEL_BIT_OFFSET select
+ TMM_DATA_PAL_IDX <= R_TMM_DATA(2 downto 0) when 0,
+ R_TMM_DATA(5 downto 3) when 1,
+ R_TMM_DATA(8 downto 6) when 2,
+ R_TMM_DATA(11 downto 9) when 3,
+ R_TMM_DATA(14 downto 12) when 4,
+ (others => '0') when others;
+
+ process(PL_CLK, RESET, PL_RESET)
+ begin
+ if RESET = '1' or PL_RESET = '1' then
+ -- reset state
+ state <= PL_TMM_ADDR;
+ if RESET = '1' then
+ -- reset internal pipeline registers
+ R_TMM_ADDR <= (others => '0');
+ R_TMM_DATA <= (others => '0');
+ end if;
+ elsif rising_edge(CLK) then
+ case state is
+ when PL_TMM_ADDR =>
+ state <= PL_TMM_DATA;
+ R_TMM_ADDR <= T_TMM_ADDR;
+ when PL_TMM_DATA =>
+ state <= PL_TMM_ADDR;
+ R_TMM_DATA <= T_TMM_DATA;
+ end case;
+ end if;
+ end process;
end generate;
accurate_occlusion_logic: if IDX < PPU_ACCURATE_FG_SPRITE_COUNT generate
@@ -143,8 +182,10 @@ begin
signal TMM_CACHE_ADDR : std_logic_vector(PPU_TMM_ADDR_WIDTH-1 downto 0) := (others => '0');
signal TMM_CACHE : std_logic_vector((PPU_SPRITE_WORD_COUNT * PPU_TMM_DATA_WIDTH)-1 downto 0);
begin
+ HIT <= SPRITE_ACTIVE and (nor TMM_DATA_PAL_IDX);
+
-- palette color at pixel
- TMM_DATA_PAL_IDX <= TMM_CACHE(TRANS_TILE_PIXEL_IDX * integer(PPU_PALETTE_COLOR_WIDTH) + integer(PPU_PALETTE_COLOR_WIDTH)-1 downto TRANS_TILE_PIXEL_IDX * integer(PPU_PALETTE_COLOR_WIDTH));
+ TMM_DATA_PAL_IDX <= TMM_CACHE(TRANS_TILE_PIDX * integer(PPU_PALETTE_COLOR_WIDTH) + integer(PPU_PALETTE_COLOR_WIDTH)-1 downto TRANS_TILE_PIDX * integer(PPU_PALETTE_COLOR_WIDTH));
TMM_ADDR <= T_TMM_ADDR when TMM_CACHE_UPDATE_TURN else (others => 'Z');
@@ -186,7 +227,7 @@ begin
TMM_FETCH_CTR < (TMM_FETCH_CLK_RANGE_BEGIN + PPU_TMM_CACHE_FETCH_C_COUNT) then
TMM_CACHE_UPDATE_TURN <= '1';
if TMM_FETCH_CTR_REL < PPU_TMM_CACHE_FETCH_C_COUNT - 1 then -- calculate address until second to last clock
- T_TMM_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR - IDX, T_TMM_ADDR'length));
+ T_TMM_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR - IDX, T_TMM_ADDR'length)); -- -IDX to correct for each fetch cycle taking 1 extra clock cycle
TMM_CACHE_ADDR <= std_logic_vector(resize(TMM_FETCH_CTR_REL - 1, TMM_CACHE_ADDR'length));
end if;
diff --git a/basys3/basys3.xpr b/basys3/basys3.xpr
index 22b1d66..a253b15 100644
--- a/basys3/basys3.xpr
+++ b/basys3/basys3.xpr
@@ -61,7 +61,7 @@
<Option Name="IPStaticSourceDir" Val="$PIPUSERFILESDIR/ipstatic"/>
<Option Name="EnableBDX" Val="FALSE"/>
<Option Name="DSABoardId" Val="basys3"/>
- <Option Name="WTXSimLaunchSim" Val="121"/>
+ <Option Name="WTXSimLaunchSim" Val="126"/>
<Option Name="WTModelSimLaunchSim" Val="0"/>
<Option Name="WTQuestaLaunchSim" Val="0"/>
<Option Name="WTIesLaunchSim" Val="0"/>