ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2899 lines
161 KiB
Systemverilog
2899 lines
161 KiB
Systemverilog
// retroDE_ps2 — de25_nano_psmct32_raster_demo_top (current as of Ch165)
|
||
//
|
||
// DE25-Nano (Terasic, Agilex 5) board-shaped wrapper around the
|
||
// Ch157+Ch158 hardware top (`top_psmct32_raster_demo_bram`). The
|
||
// Ch149 / Ch151 banner originally targeted the Ch146 legacy wrapper
|
||
// (`top_psmct32_raster_demo`, vram_stub-backed) — Ch159 swaps the
|
||
// instance to the BRAM-backed wrapper so the Agilex 5 fitter has
|
||
// real M20K storage for VRAM instead of the 65,536-flip-flop array
|
||
// that drove the Ch152 fit failure (155k ALMs / 6 RAM blocks /
|
||
// 121k registers). External port shape is unchanged so no board-
|
||
// level signal rewiring is required. Wraps the board-agnostic
|
||
// top with:
|
||
//
|
||
// - DE25-Nano signal names (CLOCK0_50 / CLOCK1_50 / CLOCK2_50 /
|
||
// KEY[1:0] / SW[3:0] / LED[7:0]) per the resource-CD
|
||
// `Golden_top.v` and the established `retroDE_nes` pattern.
|
||
// - PLL: CLOCK2_50 → de25_nano_pll_stub (sim default, pass-
|
||
// through) / Quartus IOPLL via `\`ifdef USE_PLL_IP` (synth) →
|
||
// `design_clk`. The stub asserts `locked` after a 32-cycle
|
||
// warm-up, mirroring the real IP's lock-acquire shape. The
|
||
// PLL is reset by `ninit_done`, so lock can only acquire after
|
||
// FPGA initialization completes.
|
||
// - Reset bridge: async-asserts on (ninit_done | ~pll_locked),
|
||
// synchronously deasserts on `design_clk` through a 2-stage
|
||
// shift register (KEY[0] is sampled synchronously through the
|
||
// same register; only the FPGA-init / PLL-lock signals can
|
||
// async-assert). The design can only leave reset once both
|
||
// FPGA init AND PLL lock are complete. Mirrors retroDE_nes:
|
||
// 121-177 with the PLL-lock gate added.
|
||
// - `core_go` sequencer: 16-cycle delay after reset deasserts
|
||
// then a one-cycle pulse on `design_clk`. The EE core's `go_i`
|
||
// is sampled level-sensitively in S_IDLE
|
||
// (`rtl/ee/ee_core_stub.sv:812-813`), so a single pulse is
|
||
// sufficient.
|
||
// - LED[2:0] driving the three sticky status outputs of the
|
||
// Ch146 wrapper (core_halt / dma_done_seen / frame_seen),
|
||
// respecting the DE25-Nano's active-LOW LED polarity.
|
||
// (Ch165/Ch166 added LED[3] = hdmi_init_done and LED[4] =
|
||
// hdmi_i2c_error; LED[7:5] tied HIGH = OFF.)
|
||
// - Top-level video outputs (VIDEO_R/G/B/HSYNC/VSYNC/DE). The
|
||
// Ch150 QSF marks these as `VIRTUAL_PIN ON` so Quartus does
|
||
// NOT auto-place them on arbitrary package pins; the PHY shim
|
||
// chapter (Ch152+) will replace those VIRTUAL_PIN lines with
|
||
// real `set_location_assignment` directives plus a PHY shim
|
||
// module.
|
||
//
|
||
// Ch161-Ch163 status — synth path is hardware-real at 50 MHz:
|
||
//
|
||
// - Ch161 introduced a real Quartus Agilex 5 IOPLL `.ip`
|
||
// (`synth/.../ip/pll.ip` → `pll.qip`) and defined
|
||
// `USE_PLL_IP=1` in the QSF so the wrapper's
|
||
// `\`ifdef USE_PLL_IP` instantiates the IP-generated `pll`
|
||
// module in place of `de25_nano_pll_stub`. The IP was
|
||
// initially configured for 50 MHz refclk → 30 MHz outclk_0
|
||
// (Ch161's first .sof ran the chip at 30 MHz).
|
||
// - Ch162 added `STRIP_HW_DIVIDER=1'b1` on the
|
||
// `top_psmct32_raster_demo_bram` instantiation below — the
|
||
// EE-core auto-inferred 32-bit DIVU divider is gated out
|
||
// (bootlet doesn't execute DIVU; behavior-neutral).
|
||
// - Ch163 added `STRIP_PCRTC_MAG_DIV=1'b1` (PCRTC
|
||
// magnification divider gated out — demo locks MAGH=MAGV=0)
|
||
// and **retuned the IOPLL to 50 MHz outclk_0**. With both
|
||
// strips in place, STA closes at the 50 MHz constraint
|
||
// with +7.500 ns setup slack / Fmax 80.0 MHz, so the
|
||
// .sof produced by build_quartus.sh genuinely runs at
|
||
// 50 MHz on the DE25-Nano.
|
||
//
|
||
// Sim still uses the stub (no `+define+USE_PLL_IP` on the
|
||
// iverilog Makefile).
|
||
//
|
||
// Ch164 — first video-PHY shim move ("get pixels off-chip"):
|
||
// the wrapper now drives real HDMI_TX_* outputs that map onto
|
||
// the DE25-Nano's ADV7513 HDMI transmitter pins (mirrors
|
||
// `retroDE_nes` HDMI pinout for the same board). The
|
||
// abstract `VIDEO_R/G/B/HSYNC/VSYNC/DE` ports are kept as
|
||
// VIRTUAL_PIN ON for sim/TB compat (the Ch149 board TB
|
||
// references them); the real bitstream output pins are
|
||
// HDMI_TX_CLK / HDMI_TX_D[23:0] / HDMI_TX_HS / HDMI_TX_VS /
|
||
// HDMI_TX_DE. HDMI_TX_D is `{VIDEO_R, VIDEO_G, VIDEO_B}`
|
||
// (R in MSBs, B in LSBs — ADV7513 default 24-bit RGB pinout).
|
||
// HDMI_TX_CLK is `design_clk` (the 50 MHz pixel clock).
|
||
//
|
||
// Ch165 — ADV7513 wake-up FSM is now LIVE. Four control pins
|
||
// landed (`HDMI_I2C_SCL/SDA`, `HDMI_TX_INT`, `HDMI_MCLK`) plus
|
||
// `I2C_HDMI_Config u_hdmi_i2c` (Terasic-derived, ported from
|
||
// `retroDE_splash/rtl/platform/`). Walking a 38-entry LUT
|
||
// powers the chip up + configures 24-bit RGB input + AVI
|
||
// InfoFrame for full-range RGB; `READY` (= `hdmi_init_done`)
|
||
// surfaces on `LED[3]` (active-LOW; lit = chip configured).
|
||
// HDMI_TX_INT going low retriggers the LUT walk for late
|
||
// hot-plug. The .sof produced by build_quartus.sh should
|
||
// now drive a real HDMI monitor.
|
||
//
|
||
// Ch166 — HDMI bring-up observability + operator playbook.
|
||
// `I2C_HDMI_Config` gains a sticky NACK watchdog (`ERROR`
|
||
// output): asserts after NACK_LIMIT (default 16) consecutive
|
||
// retries on the same LUT entry — a stuck-bus signature for
|
||
// "chip absent / wrong address / SDA short". Surfaces on
|
||
// `LED[4]` (active-LOW; lit = error latched). Ch165's
|
||
// `tb_hdmi_i2c_wake_smoke` is extended to cover both the
|
||
// happy path (error stays 0) and a forced-NACK path
|
||
// (error rises). A bring-up runbook lives at
|
||
// `docs/hardware/de25_nano_bringup.md` covering build/.sof
|
||
// path, LED ledger, expected behavior, and triage steps.
|
||
//
|
||
// What's still NOT in this RTL (deferred to Ch167+):
|
||
// - Proper `set_output_delay` on HDMI_TX_* once the ADV7513
|
||
// setup/hold window is locked from the bring-up datasheet
|
||
// pass (replaces Ch164's `set_false_path -to`).
|
||
// - VGA DAC / PMOD video-output alternates (HDMI is the
|
||
// primary on-board output target).
|
||
// - LPDDR4 / SDRAM / HPS / FAN_ALERT_n / CAM / UART / GPIO —
|
||
// unused by the PSMCT32 raster demo.
|
||
//
|
||
// Codex framing for Ch149: "make the design board-shaped, not yet
|
||
// board-pinned."
|
||
// Codex framing for Ch151: "PLL/reset-release integration ... the
|
||
// most conservative hardware bring-up step before touching video PHY."
|
||
// Codex framing for Ch161: "real PLL commit ... so the .sof
|
||
// actually runs at 30 MHz on the DE25-Nano instead of merely
|
||
// being constrained as if it did."
|
||
// Codex framing for Ch162: "lower-blast timing move first:
|
||
// add a STRIP_GOURAUD_TRI [actually DIVU] hardware-demo
|
||
// parameter ... should tell us immediately how much of the
|
||
// 30 MHz ceiling is just dead divider logic."
|
||
// Codex framing for Ch163: "default-off hardware-demo
|
||
// parameter on gs_pcrtc_stub to bypass hwin_rel / hmag_factor
|
||
// and vwin_rel / vmag_factor when the demo locks MAGH=MAGV=0,
|
||
// then forward it through the BRAM wrapper and board top."
|
||
// Codex framing for Ch164: "small PHY shim chapter, not a
|
||
// full display-stack leap. Get pixels off-chip before making
|
||
// them pretty." Replace VIRTUAL_PIN for the chosen output
|
||
// (HDMI here), wire VIDEO_* through, leave ADV7513 wake-up
|
||
// FSM for the next chapter.
|
||
// Codex framing for Ch165: "wake the ADV7513, don't broaden
|
||
// the video system yet ... Port/adapt the known-good ADV7513
|
||
// init sequence from retroDE_nes ... that should turn 'FPGA
|
||
// pins toggling' into 'monitor has a fighting chance of
|
||
// showing the tiny frame.'"
|
||
//
|
||
|
||
`timescale 1ns/1ps
|
||
|
||
module de25_nano_psmct32_raster_demo_top (
|
||
// ---------- Clock ----------
|
||
input CLOCK0_50,
|
||
input CLOCK1_50,
|
||
input CLOCK2_50, // system clock (used)
|
||
|
||
// ---------- Buttons + switches ----------
|
||
input [1:0] KEY, // active-LOW; KEY[0] = soft reset
|
||
input [3:0] SW, // unused (placeholder)
|
||
|
||
// ---------- LEDs ----------
|
||
output [7:0] LED, // active-LOW; LED[2:0] = status
|
||
|
||
// ---------- Raw video (PHY shim deferred) ----------
|
||
output [7:0] VIDEO_R,
|
||
output [7:0] VIDEO_G,
|
||
output [7:0] VIDEO_B,
|
||
output VIDEO_HSYNC,
|
||
output VIDEO_VSYNC,
|
||
output VIDEO_DE,
|
||
|
||
// ---------- HDMI data path (Ch164) ----------
|
||
// ADV7513 24-bit RGB + sync interface. HDMI_TX_CLK is the
|
||
// pixel clock (= design_clk = 50 MHz post-PLL); HDMI_TX_D
|
||
// packs `{VIDEO_R[7:0], VIDEO_G[7:0], VIDEO_B[7:0]}` (R in
|
||
// MSBs). HDMI_TX_HS/VS/DE mirror the abstract VIDEO_* signals.
|
||
output HDMI_TX_CLK,
|
||
output [23:0] HDMI_TX_D,
|
||
output HDMI_TX_HS,
|
||
output HDMI_TX_VS,
|
||
output HDMI_TX_DE,
|
||
|
||
// ---------- HDMI control / wake-up (Ch165) ----------
|
||
// ADV7513 I²C control bus + interrupt + audio MCLK. The I²C
|
||
// bus is two-wire open-drain (SCL master-driven; SDA driven
|
||
// low or released to high-Z by master OR slave). HDMI_TX_INT
|
||
// is the chip's open-drain interrupt output (active-low,
|
||
// monitored by the wake-up FSM to retrigger init on hot-plug).
|
||
// HDMI_MCLK is the audio sample-rate reference; for the
|
||
// video-only PSMCT32 demo we drive it with CLOCK2_50 — the
|
||
// chip's audio config goes through the init LUT but isn't
|
||
// sourced from real audio data.
|
||
inout HDMI_I2C_SCL,
|
||
inout HDMI_I2C_SDA,
|
||
input HDMI_TX_INT,
|
||
output HDMI_MCLK,
|
||
|
||
// ---------- DS2 wired-controller (Ch248) ----------
|
||
// PSX-style SPI on the DE25-Nano GPIO header. Driven by the
|
||
// shared platform `ds2_controller` (referenced from
|
||
// ../retroDE_splash/rtl/platform/ds2_controller.sv). Pin map
|
||
// (PIN_H16/Y1/C2/P1, 3.3-V LVCMOS, DATA has a weak pull-up)
|
||
// matches NES/Atari2600/splash exactly.
|
||
output GPIO_0_DS2_CLK,
|
||
output GPIO_0_DS2_CMD,
|
||
input GPIO_0_DS2_DATA,
|
||
output GPIO_0_DS2_ATTN,
|
||
|
||
// ---------- LPDDR4A — HPS hard-memory side (Ch170) ----------
|
||
// Pin shape and assignments mirror retroDE_Atari2600 / retroDE_nes
|
||
// verbatim so the produced .core.rbf is the same class of artifact
|
||
// every other retroDE core ships. Driven entirely by the qsys_top
|
||
// EMIF block — the ps2 fabric never touches these wires.
|
||
input LPDDR4A_REFCLK_p,
|
||
output LPDDR4A_CS_n,
|
||
output [5:0] LPDDR4A_CA,
|
||
output LPDDR4A_CK,
|
||
output LPDDR4A_CKE,
|
||
output LPDDR4A_CK_n,
|
||
inout [3:0] LPDDR4A_DM,
|
||
inout [31:0] LPDDR4A_DQ,
|
||
inout [3:0] LPDDR4A_DQS,
|
||
inout [3:0] LPDDR4A_DQS_n,
|
||
output LPDDR4A_RESET_n,
|
||
input LPDDR4A_RZQ,
|
||
|
||
`ifdef GS_LPDDR4B_FB
|
||
// ---------- FPGA-private LPDDR4B (Ch319 — GS framebuffer memory) ----------
|
||
// Driven by EMIF_Qsys (cloned from de25_lpddr4_bw/ao486, same device).
|
||
// Widths mirror LPDDR4A. Gen-guarded: absent unless GS_LPDDR4B_FB is set,
|
||
// so the default flat build + sim regression are byte-identical.
|
||
input LPDDR4B_REFCLK_p,
|
||
output LPDDR4B_CS_n,
|
||
output [5:0] LPDDR4B_CA,
|
||
output LPDDR4B_CK,
|
||
output LPDDR4B_CKE,
|
||
output LPDDR4B_CK_n,
|
||
inout [3:0] LPDDR4B_DM,
|
||
inout [31:0] LPDDR4B_DQ,
|
||
inout [3:0] LPDDR4B_DQS,
|
||
inout [3:0] LPDDR4B_DQS_n,
|
||
output LPDDR4B_RESET_n,
|
||
input LPDDR4B_RZQ,
|
||
`endif
|
||
|
||
// ---------- HPS hard-IP side (Ch170) ----------
|
||
// 60+ pins routed straight through qsys_top.hps_io_* to the HPS
|
||
// hard IP. ps2 fabric does not consume any of these; they exist
|
||
// so the runtime fabric image is "shaped like" the other retroDE
|
||
// cores and Linux/HPS continues to see its expected device shape
|
||
// after `core_loader.sh load`.
|
||
input HPS_CLK_25,
|
||
output HPS_ENET_MDC,
|
||
inout HPS_ENET_MDIO,
|
||
input HPS_ENET_RX_CLK,
|
||
input HPS_ENET_RX_CTL,
|
||
input [3:0] HPS_ENET_RX_DATA,
|
||
output HPS_ENET_TX_CLK,
|
||
output HPS_ENET_TX_CTL,
|
||
output [3:0] HPS_ENET_TX_DATA,
|
||
inout HPS_GSENSOR_I2C_EN,
|
||
inout HPS_GSENSOR_INT,
|
||
inout HPS_I2C_SCL,
|
||
inout HPS_I2C_SDA,
|
||
inout HPS_KEY,
|
||
inout HPS_LED,
|
||
output HPS_SD_CLK,
|
||
inout HPS_SD_CMD,
|
||
inout [3:0] HPS_SD_DATA,
|
||
input HPS_UART_RX,
|
||
output HPS_UART_TX,
|
||
input HPS_USB_CLK,
|
||
inout [7:0] HPS_USB_DATA,
|
||
input HPS_USB_DIR,
|
||
input HPS_USB_NXT,
|
||
output HPS_USB_STP
|
||
);
|
||
|
||
// -------------------------------------------------------------
|
||
// ninit_done — high until FPGA initialization completes.
|
||
//
|
||
// Three driver options, in priority order:
|
||
// `USE_QSYS_TOP` (Ch170 — Quartus synth) — qsys_top.ninit_done_ninit_done
|
||
// drives the wire directly. The qsys instance below ties
|
||
// this output into `ninit_done`, so the inline stub MUST
|
||
// be disabled or we get a multiple-driver conflict.
|
||
// `USE_TERASIC_RESET_RELEASE_IP` (legacy path) — uses Terasic's
|
||
// reset_release IP from the DE25-Nano resource CD.
|
||
// default (sim) — inline counter stub.
|
||
// -------------------------------------------------------------
|
||
wire ninit_done;
|
||
`ifdef USE_QSYS_TOP
|
||
// ninit_done driven by qsys_top.ninit_done_ninit_done below.
|
||
`elsif USE_TERASIC_RESET_RELEASE_IP
|
||
reset_release u_reset_release (
|
||
.ninit_done(ninit_done)
|
||
);
|
||
`else
|
||
// Inline sim-friendly stub: ninit_done is high for the first
|
||
// ~16 cycles after power-up, then drops permanently. Mirrors
|
||
// the real IP's behavior (high until BRAM init completes).
|
||
logic [3:0] init_cnt = 4'd0;
|
||
always_ff @(posedge CLOCK2_50) begin
|
||
if (init_cnt != 4'hF) init_cnt <= init_cnt + 4'd1;
|
||
end
|
||
assign ninit_done = (init_cnt != 4'hF);
|
||
`endif
|
||
|
||
// -------------------------------------------------------------
|
||
// PLL — Ch151. CLOCK2_50 → design clock, with `pll_locked`
|
||
// gating the reset bridge. On synthesis, define `USE_PLL_IP`
|
||
// and add Quartus's IOPLL IP (sibling cores instantiate the
|
||
// same `pll` module signature; see retroDE_nes/ip/pll/pll_bb.v
|
||
// and retroDE_splash/ip/sys_pll/sys_pll_bb.v). Default-off
|
||
// uses an inline pass-through stub (de25_nano_pll_stub) that
|
||
// forwards refclk and asserts `locked` after a 32-cycle warm-
|
||
// up. Stub keeps the module sim-friendly without the IP
|
||
// blackbox; pass-through means CLOCK2_50 itself is the design
|
||
// clock until a real PLL chapter retunes the frequency.
|
||
//
|
||
// The PLL is reset by ninit_done — when FPGA initialization
|
||
// is still in progress, the PLL is held in reset; lock can
|
||
// only acquire after ninit_done deasserts.
|
||
// -------------------------------------------------------------
|
||
wire design_clk;
|
||
wire pll_locked;
|
||
`ifdef USE_PLL_IP
|
||
pll u_pll (
|
||
.refclk (CLOCK2_50),
|
||
.rst (ninit_done),
|
||
.outclk_0(design_clk),
|
||
.locked (pll_locked)
|
||
);
|
||
`else
|
||
de25_nano_pll_stub u_pll (
|
||
.refclk (CLOCK2_50),
|
||
.rst (ninit_done),
|
||
.outclk_0(design_clk),
|
||
.locked (pll_locked)
|
||
);
|
||
`endif
|
||
|
||
// -------------------------------------------------------------
|
||
// Reset synchronizer — async-asserts on (ninit_done | ~pll_locked):
|
||
// FPGA init pending OR the PLL hasn't acquired lock keeps the
|
||
// shift register cleared. KEY[0] is sampled synchronously
|
||
// through a 2-stage register on the design clock. Mirrors
|
||
// retroDE_nes:170-177 with the PLL-lock gate added (Ch151).
|
||
// The shift register's deasserting edge is automatically
|
||
// synchronous to design_clk, so downstream consumers see a
|
||
// clean transition. Until pll_locked is high, no design logic
|
||
// can leave reset — even if the user releases KEY[0] early,
|
||
// the bridge holds reset asserted until the PLL is stable.
|
||
// -------------------------------------------------------------
|
||
wire button_n = KEY[0]; // active-LOW button
|
||
wire async_rst_assert = ninit_done | ~pll_locked;
|
||
reg [1:0] rst_sync;
|
||
always_ff @(posedge design_clk or posedge async_rst_assert) begin
|
||
if (async_rst_assert)
|
||
rst_sync <= 2'b00;
|
||
else
|
||
rst_sync <= {rst_sync[0], button_n};
|
||
end
|
||
|
||
// Ch176 — fold the HPS-driven CORE_CTRL[0] RESET bit into the
|
||
// design-side reset. `bridge_core_reset_req` is active-high and
|
||
// lives in the CLOCK2_50 domain; sync it into design_clk through
|
||
// a 2-FF chain before gating the reset. This is a *latched*
|
||
// signal in the bridge so the standard CDC-for-levels treatment
|
||
// (no toggle trick needed) is sufficient. The bridge runs on
|
||
// CLOCK2_50 and `~ninit_done` so the HPS itself cannot reset the
|
||
// bridge into an unreadable state — only the design domain
|
||
// downstream of `core_rst_n` is held.
|
||
wire bridge_core_reset_req;
|
||
|
||
// ---- Ch229 tile broadcast (bridge CLOCK2_50 → tile_ram_cdc) ----
|
||
// Driven by `u_hps_bridge` inside USE_QSYS_TOP; tied safe (zero +
|
||
// static toggle) on the sim path so tile_ram_cdc receives no
|
||
// write events and `shadow_mem` stays at its sim `initial` zero.
|
||
wire bridge_tile_wr_toggle;
|
||
wire [9:0] bridge_tile_wr_index;
|
||
wire [31:0] bridge_tile_wr_data;
|
||
|
||
// ---- Ch229 design-domain tile shadow (read by Ch245 platform OSD adapter) ----
|
||
wire [9:0] overlay_tile_rd_index;
|
||
wire [31:0] overlay_tile_rd_data;
|
||
wire [15:0] tile_wr_too_close_count; // Ch230 diagnostic (unconnected sink)
|
||
|
||
// ---- Ch235 bridge → PS2-fabric input latches (no consumer yet) ----
|
||
// Surfaced from `ps2_hps_bridge.input_p1_o/p2_o`; when an IOP-core
|
||
// integration lands on the synth top, these wires feed into
|
||
// `iop_memory_map_stub.input_p1/p2` for the Ch234 sio2_input_stub.
|
||
wire [31:0] bridge_input_p1;
|
||
wire [31:0] bridge_input_p2;
|
||
// Ch245 — un-remapped retroDE-bitmap copy for the platform OSD
|
||
// menu FSM. INPUT_P1 may be per-game-remapped by retrodesd; the
|
||
// OSD reads the raw form so navigation buttons land in their
|
||
// documented bit positions.
|
||
wire [31:0] bridge_input_p1_raw;
|
||
|
||
// ---- Ch330 Brick 4 — runtime command-list feeder bridge<->core wires ----
|
||
// The bridge (CLOCK2_50) emits TOGGLES for staging-commit + retrigger with
|
||
// stable level addr/data; the feeder lives in design_clk, so we sync the two
|
||
// toggles into design_clk and edge-detect them into 1-cycle pulses. Tied 0
|
||
// outside USE_QSYS_TOP (no bridge), so non-QSYS builds are unaffected.
|
||
wire feeder_stg_we_tgl_w, feeder_go_tgl_w; // bridge -> core (CLOCK2_50 toggles)
|
||
wire [11:0] feeder_stg_waddr_w; // bridge -> core (level)
|
||
wire [63:0] feeder_stg_wdata_w; // bridge -> core (level)
|
||
wire feeder_ready_w; // core -> bridge (design_clk)
|
||
wire [15:0] feeder_records_w; // core -> bridge
|
||
wire [31:0] feeder_waits_w; // core -> bridge
|
||
reg [2:0] fdr_we_sync, fdr_go_sync;
|
||
always_ff @(posedge design_clk or negedge core_rst_n) begin
|
||
if (!core_rst_n) begin fdr_we_sync <= 3'd0; fdr_go_sync <= 3'd0; end
|
||
else begin
|
||
fdr_we_sync <= {fdr_we_sync[1:0], feeder_stg_we_tgl_w};
|
||
fdr_go_sync <= {fdr_go_sync[1:0], feeder_go_tgl_w};
|
||
end
|
||
end
|
||
wire feeder_stg_we_pulse_w = fdr_we_sync[2] ^ fdr_we_sync[1]; // 1 design_clk pulse per commit
|
||
wire feeder_go_pulse_w = fdr_go_sync[2] ^ fdr_go_sync[1]; // 1 design_clk pulse per retrigger
|
||
|
||
// ---- Ch248 platform DS2 wired controller --------------------------
|
||
// The shared `ds2_controller` runs on sys_clk (= CLOCK2_50 = 50 MHz),
|
||
// drives CLK/CMD/ATTN to the controller, samples DATA, and surfaces
|
||
// {ds2_connected, ds2_error, ds2_buttons[31:0]} to the bridge so
|
||
// retrodesd's ds2_poll_thread sees real wired-controller state.
|
||
// Analog + debug outputs are tied to dead nets — the bridge only
|
||
// consumes digital + connected/error today.
|
||
wire [31:0] ds2_buttons_w;
|
||
wire ds2_connected_w;
|
||
wire ds2_error_w;
|
||
/* verilator lint_off UNUSEDSIGNAL */
|
||
wire [31:0] ds2_analog_w;
|
||
wire [31:0] ds2_rx_dbg0_w, ds2_rx_dbg1_w;
|
||
wire [7:0] ds2_rx_dbg2_w;
|
||
wire [7:0] ds2_id_w;
|
||
wire [2:0] ds2_cmd_phase_dbg_w;
|
||
wire [3:0] ds2_init_retry_dbg_w;
|
||
wire [1:0] ds2_init_variant_dbg_w;
|
||
wire _unused_ds2 = &{1'b0, ds2_analog_w, ds2_rx_dbg0_w, ds2_rx_dbg1_w,
|
||
ds2_rx_dbg2_w, ds2_id_w, ds2_cmd_phase_dbg_w,
|
||
ds2_init_retry_dbg_w, ds2_init_variant_dbg_w, 1'b0};
|
||
/* verilator lint_on UNUSEDSIGNAL */
|
||
ds2_controller #(
|
||
.CLK_HZ (50_000_000)
|
||
) u_ds2 (
|
||
.clk (CLOCK2_50),
|
||
.reset_n (~ninit_done),
|
||
.ds2_clk (GPIO_0_DS2_CLK),
|
||
.ds2_cmd (GPIO_0_DS2_CMD),
|
||
.ds2_data (GPIO_0_DS2_DATA),
|
||
.ds2_attn (GPIO_0_DS2_ATTN),
|
||
.ds2_buttons (ds2_buttons_w),
|
||
.ds2_analog (ds2_analog_w),
|
||
.ds2_connected (ds2_connected_w),
|
||
.ds2_error (ds2_error_w),
|
||
.ds2_rx_dbg0 (ds2_rx_dbg0_w),
|
||
.ds2_rx_dbg1 (ds2_rx_dbg1_w),
|
||
.ds2_rx_dbg2 (ds2_rx_dbg2_w),
|
||
.ds2_id (ds2_id_w),
|
||
.ds2_cmd_phase_dbg (ds2_cmd_phase_dbg_w),
|
||
.ds2_init_retry_dbg (ds2_init_retry_dbg_w),
|
||
.ds2_init_variant_dbg (ds2_init_variant_dbg_w)
|
||
);
|
||
|
||
// ---- Ch245 platform OSD wiring (bridge ↔ menu FSM ↔ overlay) ----
|
||
// CLOCK2_50 domain: bridge register outputs, menu FSM inputs/outputs.
|
||
wire [31:0] bridge_osd_ctrl;
|
||
wire [31:0] bridge_osd_cfg0;
|
||
wire [31:0] bridge_osd_cfg1;
|
||
wire menu_osd_active;
|
||
wire [4:0] menu_cursor_row;
|
||
wire menu_act_select;
|
||
wire menu_act_back;
|
||
wire menu_act_scroll_down;
|
||
wire menu_act_scroll_up;
|
||
wire menu_act_open;
|
||
|
||
// Ch249 — Ch230 osd_ctrl_enable 3-FF sync removed. The platform
|
||
// osd_overlay's `osd_enable` is now driven directly by the menu
|
||
// FSM's `osd_active` (already 2-FF synced into design_clk just
|
||
// above), so the bridge's single-bit Ch230 enable broadcast is no
|
||
// longer needed.
|
||
|
||
// ---- Ch229 inner-demo raw video (before overlay composition) ----
|
||
wire [7:0] demo_video_r;
|
||
wire [7:0] demo_video_g;
|
||
wire [7:0] demo_video_b;
|
||
wire demo_video_de;
|
||
wire demo_video_hsync;
|
||
wire demo_video_vsync;
|
||
// Ch320 — scanout source-select (bridge out) + LPDDR4B scanout pixels/status.
|
||
// Declared here (before the video mux) so the mux can reference them; driven by
|
||
// the scanout reader under GS_LPDDR4B_FB, tied 0 in the other branches.
|
||
wire video_src_w;
|
||
wire scanout_lb_w; // Ch321 — 1 = line-buffer scanout, 0 = frame-cache
|
||
wire [7:0] scan_r_w, scan_g_w, scan_b_w;
|
||
wire scan_cache_valid_w, scan_err_w;
|
||
// Ch320/Ch321 — LPDDR scanout frame-cache size: 256 beats (8 KiB, 64x64) by default,
|
||
// 1024 beats (32 KiB, 128x128) for the Ch321 larger-frame demo.
|
||
`ifdef GS_TILE_LPDDR128_DEMO
|
||
localparam int SCANOUT_CACHE_BEATS = 1024;
|
||
`elsif GS_TILE_SPILL
|
||
localparam int SCANOUT_CACHE_BEATS = 2048; // Ch325 — 128x128 PSMCT32 = 64 KiB
|
||
`else
|
||
localparam int SCANOUT_CACHE_BEATS = 256;
|
||
`endif
|
||
// Ch327a — line-buffer scanout geometry (the SCALABLE display path: O(width) BRAM, not
|
||
// O(width*height) like the frame-cache). For the Ch326 PSMCT32 spill FB: 128 px * 4 B =
|
||
// 512 B/row = 16 beats. (Default = the Ch321 128x128 PSMCT16 demo: 256 B/row = 8 beats.)
|
||
`ifdef GS_TILE_SPILL
|
||
localparam int SCANOUT_LB_STRIDE = 1024; // Ch327b — 256 px * 4 B PSMCT32
|
||
localparam int SCANOUT_LB_ROW_BEATS = 32; // 1024 / 32
|
||
localparam int SCANOUT_LB_NROWS = 256;
|
||
`else
|
||
localparam int SCANOUT_LB_STRIDE = 256;
|
||
localparam int SCANOUT_LB_ROW_BEATS = 8;
|
||
localparam int SCANOUT_LB_NROWS = 128;
|
||
`endif
|
||
// Initialize at declaration so iverilog doesn't bring up the
|
||
// sim with an X on hps_reset_sync[1] — that X would AND into
|
||
// `core_rst_n` and propagate forward, making downstream rising-
|
||
// edge detectors miss the first 0→1 transition. (`rst_sync` got
|
||
// away with not being initialized pre-Ch176 because it's the
|
||
// only term of core_rst_n; once we AND in a second source, the
|
||
// X-glitch becomes visible to consumers like
|
||
// tb_de25_nano_psmct32_raster_demo_top:247 which guards
|
||
// `t_core_rst_n_rise` against duplicate captures.)
|
||
reg [1:0] hps_reset_sync = 2'b00;
|
||
always_ff @(posedge design_clk or posedge async_rst_assert) begin
|
||
if (async_rst_assert)
|
||
hps_reset_sync <= 2'b00;
|
||
else
|
||
hps_reset_sync <= {hps_reset_sync[0], bridge_core_reset_req};
|
||
end
|
||
wire hps_core_reset = hps_reset_sync[1];
|
||
|
||
wire core_rst_n = rst_sync[1] & ~hps_core_reset;
|
||
|
||
// -------------------------------------------------------------
|
||
// core_go pulse — wait GO_DELAY cycles after core_rst_n
|
||
// deasserts (the PLL is already locked by this point because
|
||
// pll_locked gates rst_sync above), then pulse core_go high
|
||
// for one cycle. The EE core's go_i is sampled level-
|
||
// sensitively in S_IDLE (rtl/ee/ee_core_stub.sv:812-813) so a
|
||
// single-cycle pulse is sufficient. Clocked on design_clk so
|
||
// the pulse stays in the same domain as the EE.
|
||
// -------------------------------------------------------------
|
||
localparam int GO_DELAY = 16;
|
||
logic [7:0] go_cnt;
|
||
logic core_go;
|
||
always_ff @(posedge design_clk) begin
|
||
if (!core_rst_n) begin
|
||
go_cnt <= 8'd0;
|
||
core_go <= 1'b0;
|
||
end else begin
|
||
if (go_cnt < GO_DELAY[7:0])
|
||
go_cnt <= go_cnt + 8'd1;
|
||
else if (go_cnt == GO_DELAY[7:0])
|
||
go_cnt <= go_cnt + 8'd1;
|
||
// core_go: 1-cycle pulse on the cycle go_cnt == GO_DELAY.
|
||
core_go <= (go_cnt == GO_DELAY[7:0]);
|
||
end
|
||
end
|
||
|
||
// -------------------------------------------------------------
|
||
// Ch146 wrapper — clocked on design_clk (the PLL output).
|
||
// -------------------------------------------------------------
|
||
logic core_halt;
|
||
logic dma_done_seen;
|
||
logic frame_seen;
|
||
// Ch173 — gs_stub raster_overflow surfaced via the inner wrapper
|
||
// so the HPS bridge can mirror it into a status register. Under
|
||
// Ch172 backpressure this stays LOW forever; non-zero on HPS
|
||
// reads = the backpressure path broke at some point.
|
||
logic raster_overflow;
|
||
// Ch174 — event toggles flipped on each end-of-frame / DMAC-done
|
||
// pulse in the design_clk domain. The HPS bridge 2-FF syncs each
|
||
// toggle to CLOCK2_50 and counts every edge, replacing Ch173's
|
||
// edge-of-sticky source (which could only fire once per signal
|
||
// lifetime).
|
||
logic frame_toggle;
|
||
logic dma_done_toggle;
|
||
|
||
// Ch159 — board top now instantiates `top_psmct32_raster_demo_bram`
|
||
// (the BRAM-backed wrapper). External port shape is identical to
|
||
// the Ch146 legacy wrapper, so this swap is drop-in at the board
|
||
// level. The wrapper internally uses `vram_bram_stub` (Ch154) +
|
||
// `vram_normalize_pkg::normalize_write` (Ch155+) + the Ch157
|
||
// PSMT4 RMW pipe + Ch158 PCRTC sync-read alignment so all four
|
||
// writer PSMs and PCRTC scanout work end-to-end against the
|
||
// Agilex 5 M20K-friendly BRAM contract. The Ch152 fit failure
|
||
// (155k ALMs / 6 RAM blocks / 121k registers needed for the
|
||
// legacy vram_stub flip-flop array) is the baseline this swap
|
||
// should now retire.
|
||
// Ch318 — PSMCT16 tile-flush stream tap (declared before u_demo so the port
|
||
// widths are correct; consumed by the LPDDR AXI writer farther down).
|
||
wire demo_flush_emit;
|
||
wire [31:0] demo_flush_addr;
|
||
wire [15:0] demo_flush_pix16;
|
||
wire [5:0] demo_flush_psm;
|
||
wire [31:0] demo_vram_raddr; // Ch320 — PCRTC scanout VRAM byte address
|
||
wire demo_pix_window; // Ch320 — high inside the displayed frame window
|
||
// Ch322 — texel-fetch tap for the prefilled LPDDR texture cache. u_demo exposes the
|
||
// sampler's read request; the cache (under GS_LPDDR_TEX, in the EMIF block) returns the
|
||
// texel. Tied inert when GS_LPDDR_TEX is not set (the bram-top mux constant-folds to BRAM).
|
||
wire demo_gs_tex_rd_en;
|
||
wire [31:0] demo_gs_tex_rd_addr;
|
||
wire [31:0] demo_tex_cache_data;
|
||
wire demo_tex_cache_ready;
|
||
wire [31:0] demo_tex_cache_hits; // Ch322 — texels served from LPDDR cache (per render)
|
||
wire [31:0] demo_tex_bram_hits; // Ch322 — texels served from BRAM (fallback)
|
||
// Ch323 — tile COLOR+Z spill/reload bridge wires. u_demo (bram-top) emits the color/Z
|
||
// flush streams + the reload staging requests; the LPDDR spill writers + gs_tile_reload
|
||
// inside the EMIF block (under GS_TILE_SPILL) consume them and return the reload color/Z.
|
||
// All inert (0) when TILE_SPILL_ENABLE is 0, so connecting them is byte-identical otherwise.
|
||
wire [31:0] demo_flush_color32; // full 32-bit flushed color (color spill capture)
|
||
wire demo_z_flush_emit; // tile Z-flush strobe (TP_ZFLUSH sweep)
|
||
wire [31:0] demo_z_flush_addr; // Z-backing-relative byte offset (pixel_index*4)
|
||
wire [31:0] demo_z_flush_data; // 32-bit Z for this pixel
|
||
wire demo_cflush_emit; // DEDICATED color-flush spill strobe (TP_FLUSH only)
|
||
wire [31:0] demo_cflush_addr; // spill-region-relative byte offset (pixel_index*4)
|
||
wire [31:0] demo_cflush_data; // 32-bit flushed tile color
|
||
wire demo_reload_start; // reload-staging arm (toggle, gs domain)
|
||
wire [7:0] demo_tile_reload_raddr; // tile index sweep during TP_RELOAD
|
||
wire [29:0] demo_reload_base; // Ch324 — current tile's raster-FB byte offset
|
||
wire demo_tile_reload_ready; // reload engine warm (reload_ready synced to gs)
|
||
wire [31:0] demo_tile_reload_color; // reloaded color for raddr (1-cyc)
|
||
wire [31:0] demo_tile_reload_z; // reloaded Z for raddr (1-cyc)
|
||
wire [2:0] demo_tile_phase; // Ch323 diag — current tile phase (event-counter source)
|
||
// Ch352 (Codex guardrail #1) — LPDDR texture-cache geometry as PROFILE PARAMETERS, not hardcoded over the
|
||
// Ch322 tritex demo. SH3 real-draw crop: full 512x512 PSMT8 (262 KiB) in LPDDR, sampled at TBP=1024
|
||
// (0x40000). Every other GS_LPDDR_TEX profile keeps the tritex 8x8 (2 KiB) geometry.
|
||
`ifdef GS_SH3_REAL_DRAW_DEMO
|
||
localparam int TEXC_VRAM_BASE = 32'h0004_0000; // crop TEX_VRAM_BASE = NEW_TBP(1024)*256
|
||
localparam int TEXC_BYTES = 262144; // 512x512 PSMT8
|
||
localparam int TEXC_NBEATS = 8192; // 262144 / 32 (single-beat 32 B reads)
|
||
`else
|
||
localparam int TEXC_VRAM_BASE = 2048; // Ch322 tritex 8x8 PSMCT32
|
||
localparam int TEXC_BYTES = 2048;
|
||
localparam int TEXC_NBEATS = 64;
|
||
`endif
|
||
top_psmct32_raster_demo_bram #(
|
||
// Ch162 — strip the EE-core 32-bit hardware divider on the
|
||
// PSMCT32 SPRITE-only hardware build. Quartus infers the
|
||
// divider from `rs_val / rt_val` (DIVU); the bootlet
|
||
// doesn't execute DIVU, so the divider is dead code on
|
||
// this build path. Removing it retires the Ch159+ critical
|
||
// path and frees the timing budget for a faster clock.
|
||
.STRIP_HW_DIVIDER (1'b1),
|
||
// Ch352 — SEQUENTIAL gradient divider (registered iterations, no combinational cone). This REPLACES the
|
||
// combinational divide + the multicycle/false_path SDC approaches, which all made the Quartus fitter
|
||
// grind on the divider cone for hours. The sequential divider closes timing with NO SDC exception.
|
||
// Bit-exact to `/` (tb_gs_grad_divider) so the SH3 oracle stays 96.2%. GRAD_DIV_CYCLES below is ignored.
|
||
.GRAD_SEQ_DIVIDER (1'b1),
|
||
.GRAD_DIV_CYCLES (4),
|
||
// Ch163 — strip the PCRTC magnification divider. The demo
|
||
// locks MAGH=MAGV=0 so the divisor is constant 1 and the
|
||
// `hwin_rel / hmag_factor` / `vwin_rel / vmag_factor` math
|
||
// collapses to a passthrough. Removing it retires the
|
||
// Ch162-onwards STA worst path on
|
||
// `u_demo|u_pcrtc|div_1_rtl_0|...`.
|
||
.STRIP_PCRTC_MAG_DIV(1'b1),
|
||
// Ch296 fitter-rescue — strip the ~33k-FF useg_shadow_mem array
|
||
// from the board build. The PSMCT32 SPRITE-only bootlet runs
|
||
// entirely from BIOS (0xBFC0_0000) + EE-RAM (GIF payload via
|
||
// DMAC) and never issues useg traffic, so the shadow backing is
|
||
// dead on this path. Sim TBs keep the default 1. See
|
||
// ee_memory_map_stub.USEG_SHADOW_ENABLE.
|
||
.USEG_SHADOW_ENABLE (1'b0),
|
||
// Ch169 — drive VGA 640x480 @ 60 Hz at the IP-retuned
|
||
// 25.175 MHz pixel clock. The 16x8 PCRTC frame still
|
||
// paints in the upper-left of the 640x480 active region
|
||
// (PCRTC active area is governed by DISPLAY1 register,
|
||
// not these timing params; H_/V_ACTIVE here are the
|
||
// raster window the scanout walks each frame). Sync
|
||
// polarities are HSYNC/VSYNC active-LOW (the default in
|
||
// platform_video_stub / gs_pcrtc_stub), matching the
|
||
// VGA spec.
|
||
.H_ACTIVE (640),
|
||
.H_FRONT (16),
|
||
.H_SYNC (96),
|
||
.H_BACK (48),
|
||
.V_ACTIVE (480),
|
||
.V_FRONT (10),
|
||
.V_SYNC (2),
|
||
.V_BACK (33),
|
||
// Ch171 — bump VRAM from 8 KiB (Ch159 default, only large enough
|
||
// for the Ch123 16x8 test sprite) to 512 KiB so the 320x240 test
|
||
// card fits at PSMCT32 (320*240*4 = 307,200 bytes; the BRAM
|
||
// stub requires a power-of-two byte count). At ~37% of the
|
||
// Agilex 5's 893 KiB total M20K BRAM this is the headline
|
||
// resource jump for Ch171 but still leaves plenty of margin
|
||
// for future work.
|
||
`ifdef GS_RMW_DEMO
|
||
// GS read-modify-write feature-demo profile (opt-in via the
|
||
// GS_RMW_DEMO define). The texture, alpha-blend, and (future)
|
||
// depth demos all READ vram during raster (texel fetch / dest-fb
|
||
// read / z read), so the vram_bram_stub read2 port must be LIVE
|
||
// (ENABLE_READ2=1). Re-enabling read2 at the 512 KiB flat-build
|
||
// VRAM size would trip vram_bram_stub's M20K replication tripwire
|
||
// (BYTES >= 256 KiB), so this profile shrinks VRAM to 8 KiB —
|
||
// ample for the 16x8 demo framebuffer (+ a small texture for the
|
||
// textured fixture). PSMCT32_SWIZZLE=0 so the linear texel/dest
|
||
// addresses and any BITBLT upload share one VRAM layout. The
|
||
// matching *.mem fixture (textured or alpha) is selected by the
|
||
// QSF VERILOG_MACRO block under the same GS_RMW_DEMO define.
|
||
`ifdef GS_TILE_BIN4X4_DEMO
|
||
// Ch312 — 4x4 GRID (16 tiles, 64x64). Same bin-buffer mechanism scaled from
|
||
// 2x2 to 4x4. Three prims: P0 blue top-left (4 tiles), P1 red mid crossing
|
||
// seams (6 tiles), P2 white corner (1 tile), plus empty tiles. 64x64 PSMCT32
|
||
// FB fills 16 KiB -> VRAM 32 KiB, textures @ 0x4000. TILE_COLS=TILE_ROWS=4,
|
||
// TILE_MULTIPRIM=1, TILE_PRIM_COUNT=3, BIN_BUFFER_ENABLE=1. read2 LIVE.
|
||
// Fixture: bios_tile_bin4x4/payload_tile_bin4x4 (DISPLAY1 = 64x64).
|
||
.VRAM_BYTES (32 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (4),
|
||
.TILE_ROWS (4),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (3),
|
||
.BIN_BUFFER_ENABLE (1'b1)
|
||
`elsif GS_TILE_CAP_DEMO
|
||
// Ch315 — PRIMITIVE/BIN CAPACITY SCALING. The 4x4 grid (16 tiles, 64x64) but
|
||
// with TILE_FIFO_DEPTH=8 (was the hardcoded 4) and TILE_PRIM_COUNT=7: SEVEN
|
||
// overlapping prims buffered + binned + rendered, exceeding the old 4-deep
|
||
// limit. Stresses overlap, draw order, per-tile bin counts, full-ish + empty
|
||
// bins. The fit (vs Ch312's depth-4 4x4) gives the RESOURCE SLOPE per added
|
||
// capacity slot — the architectural answer to "where do register bins stop
|
||
// being reasonable" (the per-prim attribute FIFO dominates; bins are indices).
|
||
// read2 LIVE. 32 KiB VRAM. Fixture: bios_tile_cap/payload_tile_cap.
|
||
.VRAM_BYTES (32 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (4),
|
||
.TILE_ROWS (4),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (7),
|
||
.TILE_FIFO_DEPTH (8),
|
||
.BIN_BUFFER_ENABLE (1'b1)
|
||
`elsif GS_FEEDER_DEMO
|
||
// Ch330 — RUNTIME COMMAND-LIST FEEDER. The HPS streams a normalized combined-TAZ
|
||
// triangle list into the feeder's staging RAM over the bridge (0x0D8/0x0DC/0x0E4)
|
||
// and pulses retrigger (0x0E8); the feeder expands each record into the gif_reg_*
|
||
// stream gs_stub already consumes (the feeder OWNS gif_reg_* post-setup). 4x4 grid,
|
||
// TILE_PRIM_COUNT=4, FIFO depth 8. FEEDER_ENABLE=1; HEARTBEAT_SPLICE_ENABLE=0 (this
|
||
// profile ONLY — the heartbeat read-splicer would corrupt the staged list payload).
|
||
// read2 LIVE. 32 KiB VRAM. Fixture: a setup-only bootlet uploads the texture
|
||
// (bios/payload_feeder_setup), the feeder draws list A, then the HPS swaps list B
|
||
// over the bridge and retriggers — image changes with no RBF rebuild / reset.
|
||
.VRAM_BYTES (32 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (4),
|
||
.TILE_ROWS (4),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (4),
|
||
.TILE_FIFO_DEPTH (8),
|
||
.BIN_BUFFER_ENABLE (1'b1),
|
||
.HEARTBEAT_SPLICE_ENABLE (1'b0),
|
||
.FEEDER_ENABLE (1'b1),
|
||
.FEEDER_STG_WORDS (256)
|
||
`elsif GS_TEXALPHA_DEMO
|
||
// Ch344 — TEXTURED + source-over ALPHA SPRITE on silicon (boot payload; no feeder/tile/combined).
|
||
// SPRITE_TEX_ALPHA=1 enables the ISOLATED half-rate texel-read/dest-read/blend sprite path (the
|
||
// board's registered read2). Fixture bios/payload_texalpha: upload an 8x8 alpha-checker texture +
|
||
// opaque blue BG sprite + a textured-alpha overlay sprite over (16,16)-(48,48). 64x64 display.
|
||
// On HDMI: a checkerboard of light-gray (opaque texels) and blue (transparent texels reveal the BG).
|
||
.VRAM_BYTES (32 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.SPRITE_TEX_ALPHA (1'b1),
|
||
.COMBINED_TAZ (1'b0),
|
||
.TILE_LOCAL (1'b0),
|
||
.TILE_COLS (1),
|
||
.TILE_ROWS (1),
|
||
.TILE_MULTIPRIM (1'b0),
|
||
.TILE_PRIM_COUNT (1),
|
||
.TILE_FIFO_DEPTH (8),
|
||
.BIN_BUFFER_ENABLE (1'b0),
|
||
.HEARTBEAT_SPLICE_ENABLE (1'b0),
|
||
.FEEDER_ENABLE (1'b0)
|
||
`elsif GS_FEEDER_SPRITE_DEMO
|
||
// Ch345a — RUNTIME FEEDER textured-alpha SPRITEs (sprite_mode staging word0[33]). SPRITE_TEX_ALPHA=1
|
||
// + FEEDER_ENABLE=1. Setup bootlet (bios/payload_sprite_setup) uploads an 8x8 alpha-checker texture +
|
||
// a blue BG; FEEDER_STG_INIT=feeder_sprite streams 3 textured-alpha sprites over it at boot. On HDMI:
|
||
// 3 gray/blue alpha-checker squares on a blue field — runtime sprite ingestion of the Ch344 subset.
|
||
.VRAM_BYTES (32 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.SPRITE_TEX_ALPHA (1'b1),
|
||
.COMBINED_TAZ (1'b0),
|
||
.TILE_LOCAL (1'b0),
|
||
.TILE_COLS (1),
|
||
.TILE_ROWS (1),
|
||
.TILE_MULTIPRIM (1'b0),
|
||
.TILE_PRIM_COUNT (1),
|
||
.TILE_FIFO_DEPTH (8),
|
||
.BIN_BUFFER_ENABLE (1'b0),
|
||
.HEARTBEAT_SPLICE_ENABLE (1'b0),
|
||
.FEEDER_ENABLE (1'b1),
|
||
.FEEDER_STG_WORDS (256)
|
||
`elsif GS_SH3_CLUT_DEMO
|
||
// Ch347 — AUTHENTIC Silent Hill 3 64x64-crop PSMT8 texture + real CLUT on silicon (boot payload, DECAL).
|
||
// Bootlet (bios/payload_sh3_clut): BITBLT-upload the 256-entry CLUT + a 64x64 PSMT8 index texture, then
|
||
// TEX0(PSM=PSMT8, CLD=1, CSM2-linear) fires the clut_loader VRAM->CLUT load, then ONE 64x64 DECAL sprite.
|
||
// 64x64 display. HEARTBEAT_SPLICE OFF — the Ch251/255 heartbeat splice patches EE-RAM qword 115, which
|
||
// collides with the texture payload (proven by tb_top_psmct32_sh3_clut_demo: 4096/4096 only with it off).
|
||
// On HDMI: a dark SH3 surface with cyan glowing particles. Label: authentic SH3 palettized art on
|
||
// silicon via chosen sprite geometry (DECAL/opaque; real CLUT RGB authentic, alpha preserved-not-blended).
|
||
.VRAM_BYTES (32 * 1024),
|
||
.RAM_SIZE_BYTES (8 * 1024), // 512-qword bootlet payload (256-CLUT + 64x64 tex + draw)
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b0),
|
||
.TILE_LOCAL (1'b0),
|
||
.TILE_COLS (1),
|
||
.TILE_ROWS (1),
|
||
.TILE_MULTIPRIM (1'b0),
|
||
.TILE_PRIM_COUNT (1),
|
||
.TILE_FIFO_DEPTH (8),
|
||
.BIN_BUFFER_ENABLE (1'b0),
|
||
.HEARTBEAT_SPLICE_ENABLE (1'b0),
|
||
.FEEDER_ENABLE (1'b0)
|
||
`elsif GS_FEEDER_PERSP_DEMO
|
||
// Ch342 — PERSPECTIVE-CORRECT textured triangles through the FEEDER on the S1 path. The
|
||
// authentic cube prims are TME=1 ABE=0 FST=0 -> NON-combined -> the S1/legacy perspective
|
||
// path (ST + RGBAQ.Q -> u=S/Q via gs_reciprocal_stub + gs_persp_uv). The feeder emits
|
||
// RGBAQ/ST/XYZ2 (perspective staging word0[32]=1). Tiling/combined are OFF (the combined-TAZ
|
||
// perspective integration is a separate follow-on bug). PERSPECTIVE_CORRECT=1. read2 LIVE.
|
||
// 32 KiB VRAM. Fixture: bios/payload_persp_feeder_setup uploads a 16x16 checkerboard @ TBP=100
|
||
// (fits the 4 KiB EE RAM); FEEDER_STG_INIT=feeder_persp renders the perspective checkerboard
|
||
// floor at boot. (The authentic 64x64 cube texture is a LOCAL extension: a bigger payload +
|
||
// 32 KiB EE RAM; the HPS then streams cube_persp.scene over the bridge.)
|
||
.VRAM_BYTES (64 * 1024), // Ch343 — FB(0..4095)+Z(4096..8191)+64x64 tex(8192..12287): 3 distinct 64x64 surfaces
|
||
.RAM_SIZE_BYTES (32 * 1024), // Ch343 — fits the 64x64 cube texture boot payload (QWC=1030)
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b0),
|
||
.TILE_LOCAL (1'b0),
|
||
.TILE_COLS (1),
|
||
.TILE_ROWS (1),
|
||
.TILE_MULTIPRIM (1'b0),
|
||
.TILE_PRIM_COUNT (1),
|
||
.TILE_FIFO_DEPTH (8),
|
||
.BIN_BUFFER_ENABLE (1'b0),
|
||
.HEARTBEAT_SPLICE_ENABLE (1'b0),
|
||
.PERSPECTIVE_CORRECT(1'b1),
|
||
.FEEDER_ENABLE (1'b1),
|
||
.FEEDER_STG_WORDS (256)
|
||
`elsif GS_SH3_PERSP_DEMO
|
||
// Ch348 — AUTHENTIC SH3 PSMT8 texture + real CLUT through the PERSPECTIVE-triangle FEEDER path
|
||
// (composes Ch342 perspective ST/Q + Ch347 SH3 PSMT8->CLUT). The feeder staging (feeder_sh3_persp)
|
||
// carries a perspective quad + TEX0(PSM=PSMT8, CLD=1, CSM2-linear) -> the feeder's TEX0 commit fires
|
||
// the clut_loader VRAM->CLUT load. Setup bootlet (bios/payload_sh3_persp) BITBLT-uploads the 64x64
|
||
// PSMT8 texture (as PSMCT32 words) + the 256-entry CLUT. PERSPECTIVE_CORRECT=1, FEEDER on, HEARTBEAT
|
||
// OFF (qword-115 collision, the Ch347 lesson). Sim-verified by tb_top_psmct32_sh3_persp_demo (95.6%
|
||
// ±1-texel neighborhood match, clut_bad=0, persp!=affine=1856). Label: authentic SH3 PSMT8 texture +
|
||
// real CLUT through the perspective triangle path, chosen perspective geometry (DECAL/opaque).
|
||
.VRAM_BYTES (32 * 1024),
|
||
.RAM_SIZE_BYTES (8 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b0),
|
||
.TILE_LOCAL (1'b0),
|
||
.TILE_COLS (1),
|
||
.TILE_ROWS (1),
|
||
.TILE_MULTIPRIM (1'b0),
|
||
.TILE_PRIM_COUNT (1),
|
||
.TILE_FIFO_DEPTH (8),
|
||
.BIN_BUFFER_ENABLE (1'b0),
|
||
.HEARTBEAT_SPLICE_ENABLE (1'b0),
|
||
.PERSPECTIVE_CORRECT(1'b1),
|
||
.FEEDER_ENABLE (1'b1),
|
||
.FEEDER_STG_WORDS (256)
|
||
`elsif GS_SH3_REAL_DRAW_DEMO
|
||
// Ch350/351/352 — the ACTUAL Ch349 SH3 draw CROP on silicon (Codex board gate): real draw geometry
|
||
// (idx89761 lamppost/bench, host-clipped to a 256x120 viewport) + the reconstructed 512x512 PSMT8
|
||
// texture in LPDDR (GS_LPDDR_TEX cache) + the REAL CSM1 CLUT via the Ch350 CSM1-grid clut_loader +
|
||
// Ch351 WIDENED perspective precision (PERSP_RECIP_IDX_BITS=11) for the far-W surface. VRAM is the
|
||
// 128 KiB CROP window (256x120 FB + CLUT). The CSM1 loader borrows read0 during its boot-time load,
|
||
// so read2 stays disabled and the framebuffer is not duplicated. The 256 KiB full texture lives in
|
||
// LPDDR, not BRAM. Golden oracle: tb_top_psmct32_sh3_real_draw_demo (ALL 96.2% <=1 texel).
|
||
// GS_LPDDR_TEX must ALSO be defined (gates the texture-cache wiring + TEX_VRAM_BASE/TEX_BYTES/N_BEATS).
|
||
// Fixtures SH3-derived -> LOCAL/gitignored: python3 tools/gs_make_sh3_real_draw_fixture.py.
|
||
.VRAM_BYTES (128 * 1024), // Ch352 — crop FB(256x120)+CLUT, under the read2 tripwire
|
||
.RAM_SIZE_BYTES (32 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b0), // CSM1 load reuses read0; avoid duplicating the 128 KiB framebuffer
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b0),
|
||
.TILE_LOCAL (1'b0),
|
||
.TILE_COLS (1),
|
||
.TILE_ROWS (1),
|
||
.TILE_MULTIPRIM (1'b0),
|
||
.TILE_PRIM_COUNT (1),
|
||
.TILE_FIFO_DEPTH (8),
|
||
.BIN_BUFFER_ENABLE (1'b0),
|
||
.HEARTBEAT_SPLICE_ENABLE (1'b0),
|
||
.PERSPECTIVE_CORRECT(1'b1),
|
||
.PERSP_RECIP_IDX_BITS(11), // Ch351 — widened far-W reciprocal precision
|
||
.CLUT_CSM1_ENABLE (1'b1),
|
||
.FEEDER_ENABLE (1'b1),
|
||
.FEEDER_STG_WORDS (768) // 68 clipped tris -> ~655 staging words
|
||
`elsif GS_TILE_LATE_DEMO
|
||
// Ch316 — LEADING-EMPTY-TILE traversal fix. ONE prim, ONLY in the LAST tile
|
||
// t15 of the 4x4 grid; tiles t0..t14 are empty and precede it. Proves the
|
||
// fix: empty leading tiles flush the GREEN clear (not black) and the renderer
|
||
// still reaches t15. read2 LIVE. 32 KiB VRAM. Fixture: bios_tile_late.
|
||
.VRAM_BYTES (32 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (4),
|
||
.TILE_ROWS (4),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (1),
|
||
.BIN_BUFFER_ENABLE (1'b1)
|
||
`elsif GS_TILE_PSMCT16FB_DEMO
|
||
// Ch313 — FULL PSMCT16 FRAMEBUFFER. The same Ch312 4x4 (64x64) bin-buffer
|
||
// scene, but FRAME.PSM=PSMCT16: the relaxed close_combined gate accepts a
|
||
// PSMCT16 dest when TILE_COLOR_PSMCT16=1, so the whole render/flush/scanout
|
||
// path is 16-bit RGB5A1. A 64x64 PSMCT16 FB is 8 KiB (HALF the 16 KiB
|
||
// PSMCT32 FB) -> this runs in 16 KiB VRAM vs Ch312's 32 KiB, textures @
|
||
// 0x2000. Same TILE_COLS=TILE_ROWS=4 / BIN_BUFFER_ENABLE=1. read2 LIVE.
|
||
// Fixture: bios_tile_psmct16fb/payload_tile_psmct16fb (DISPLAY1 = 64x64 PSMCT16).
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (4),
|
||
.TILE_ROWS (4),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (3),
|
||
.BIN_BUFFER_ENABLE (1'b1),
|
||
.TILE_COLOR_PSMCT16 (1'b1)
|
||
`elsif GS_TILE_LPDDR128_DEMO
|
||
// Ch321 — 128x128 PSMCT16 framebuffer (32 KiB) in 64 KiB VRAM, 8x8 grid of
|
||
// 16x16 tiles. Same scaled scene as the 64x64 psmct16fb demo; the larger frame
|
||
// is the LPDDR4B scanout target (retires the 8 KiB special case for scanout).
|
||
// Fixture: bios_tile_lpddr128/payload_tile_lpddr128 (DISPLAY1 = 128x128 PSMCT16).
|
||
.VRAM_BYTES (64 * 1024),
|
||
.LPDDR_FB_BYTES (32 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (8),
|
||
.TILE_ROWS (8),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (3),
|
||
.BIN_BUFFER_ENABLE (1'b1),
|
||
.TILE_COLOR_PSMCT16 (1'b1)
|
||
`elsif GS_TILE_SPILL_DEMO
|
||
// Ch324 — 4x4 MULTI-TILE COLOR+Z spill/reload two-batch depth proof. A 64x64
|
||
// PSMCT32 raster framebuffer = a 4x4 grid of 16x16 tiles (FBW=1, stride=256).
|
||
// BATCH 1 (P1, near Z=0x8000, color1) + BATCH 2 (P2, mid Z=0x6000, color2),
|
||
// cross-seam triangles. Each touched tile spills color+Z to LPDDR scratch; the
|
||
// 9 two-batch tiles reload (per-tile reload_base) so the evicted near-Z survives
|
||
// and the overlap KEEPS color1 across multiple tiles; region B accepts color2;
|
||
// empty tiles stay clear. Single-prim tile mode (NO bin/multiprim — the FSM
|
||
// re-tests each prim against each tile), TILE_LOCAL + COMBINED_TAZ, clean-Z
|
||
// bootstrap. VRAM 32 KiB: 64x64x4 = 16 KiB FB (0..0x3FFF) + texture @ 0x4000.
|
||
// Ch325 — 8x8 grid = 128x128 PSMCT32 raster FB (FBW=2, stride=512), DISPLAY1=128x128.
|
||
// Ch326 (retry) — LPDDR-ONLY FB: FB_LPDDR_ONLY gates the 64 KiB BRAM FB mirror (flush
|
||
// spills only to LPDDR), texture @ vram 0x0 (fixture TBP0=0), VRAM 128->8 KiB (~120 KiB
|
||
// reclaimed), LPDDR scanout forced below. The first attempt blanked because the rd-arb
|
||
// watchdog (was 2^10 ~3.3us) DROPPED slow reads under the always-on-scanout traffic,
|
||
// hanging the scanout/probe — root-caused via tb_gs_lpddr_scanout_concurrency, fixed in
|
||
// gs_lpddr_rd_arb (watchdog 2^10 -> 2^21).
|
||
// Ch327b — scaled to a 16x16 grid = 256x256 PSMCT32 raster FB (FBW=4, stride=1024),
|
||
// displayed via the line-buffer scanout (the frame-cache would be 256 KiB; line-buffer is
|
||
// O(width)). Same renderer/features as Ch325/326 — only pixel count + LPDDR bandwidth grow.
|
||
.VRAM_BYTES (8 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (16),
|
||
.TILE_ROWS (16),
|
||
.FB_LPDDR_ONLY (1'b1)
|
||
`elsif GS_TILE_BIN_DEMO
|
||
// Ch311 — per-tile BIN BUFFER. Three primitives with distinct tile coverage
|
||
// (P0 blue all-tiles / P1 red 2-tiles / P2 white 1-tile); a binning pass
|
||
// precomputes per-tile primitive lists and each tile renders only its bin.
|
||
// Same image as the Ch305 re-test path; proves the routing machinery.
|
||
// TILE_MULTIPRIM=1, TILE_PRIM_COUNT=3, BIN_BUFFER_ENABLE=1. read2 LIVE.
|
||
// 16 KiB VRAM. Fixture: bios_tile_bin/payload_tile_bin.
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (2),
|
||
.TILE_ROWS (2),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (3),
|
||
.BIN_BUFFER_ENABLE (1'b1)
|
||
`elsif GS_TILE_BILINEAR_DEMO
|
||
// Ch310 — BILINEAR filtering in the combined tile path. Two triangles
|
||
// sampling the SAME magnified 4x4 blue/white CHECKER: LEFT TEX1.MMAG=0
|
||
// (NEAREST, blocky), RIGHT TEX1.MMAG=1 (LINEAR, smoothed midtones). The
|
||
// 4-tap sampler stalls the combined per-pixel FSM ~9 cyc on the bilinear
|
||
// prim. TILE_MULTIPRIM=1, TILE_PRIM_COUNT=2, BILINEAR_ENABLE=1. read2 LIVE.
|
||
// 16 KiB VRAM. Fixture: bios_tile_bilinear/payload_tile_bilinear.
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (2),
|
||
.TILE_ROWS (2),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (2),
|
||
.BILINEAR_ENABLE (1'b1)
|
||
`elsif GS_TILE_PALBILINEAR_DEMO
|
||
// Ch314 — BILINEAR filtering for a PALETTIZED (PSMT8 indexed) texture in
|
||
// the combined tile path. Two triangles sampling the SAME magnified 4x4
|
||
// INDEXED checker (palette: blue/white): LEFT TEX1.MMAG=0 (NEAREST, blocky),
|
||
// RIGHT TEX1.MMAG=1 (LINEAR). The 4-tap sampler CLUTs each tap to a color
|
||
// THEN interpolates colors (CLUT-before-interp) — so the right triangle
|
||
// shows interpolated blue<->white midtones, not blocky index steps.
|
||
// TILE_MULTIPRIM=1, TILE_PRIM_COUNT=2, BILINEAR_ENABLE=1, PALETTE_BILINEAR=1.
|
||
// read2 LIVE. 16 KiB VRAM. Fixture: bios_tile_palbilinear/payload_tile_palbilinear.
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (2),
|
||
.TILE_ROWS (2),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (2),
|
||
.BILINEAR_ENABLE (1'b1),
|
||
.PALETTE_BILINEAR (1'b1)
|
||
`elsif GS_TILE_ALPHA_DEMO
|
||
// Ch309 — GS ALPHA mode expansion. The Ch305 3-primitive scene, but P1 is
|
||
// ADDITIVE (Cv=Cs+Cd via A=Cs,B=0,C=FIX=0x80,D=Cd) so it BRIGHTENS the blue
|
||
// bg to magenta (a glow/particle add), while P0/P2 stay source-over. Two
|
||
// visibly-different blend modes coexist. TILE_MULTIPRIM=1, TILE_PRIM_COUNT=3,
|
||
// ALPHA_MODES_ENABLE=1. read2 LIVE. 16 KiB VRAM. Fixture: bios_tile_alpha.
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (2),
|
||
.TILE_ROWS (2),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (3),
|
||
.ALPHA_MODES_ENABLE (1'b1)
|
||
`elsif GS_TILE_PSMCT16_DEMO
|
||
// Ch308 — PSMCT16 tile color buffer. The Ch305 3-primitive scene, but the
|
||
// on-chip tile color RAM + the framebuffer are PSMCT16 (RGB5A1, 16-bit):
|
||
// tile color RAM halves, the flush writes 16-bit lanes, and scanout unpacks
|
||
// RGB5A1->ABGR (5-bit quantized vs the PSMCT32 reference). FRAME/DISPFB are
|
||
// PSMCT16 via the fixture bootlet. TILE_MULTIPRIM=1, TILE_PRIM_COUNT=3,
|
||
// TILE_COLOR_PSMCT16=1. read2 LIVE. 16 KiB VRAM. Fixture: bios_tile_psmct16.
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (2),
|
||
.TILE_ROWS (2),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (3),
|
||
.TILE_COLOR_PSMCT16 (1'b1)
|
||
`elsif GS_TILE_WRAP_DEMO
|
||
// Ch307 — GS texture WRAP MODES (REPEAT + CLAMP). Two textured combined
|
||
// prims sampling the same striped 4x4 texture with UV 0..8 (2x width):
|
||
// REPEAT prim (top) shows TWO white stripes (texture tiles); CLAMP prim
|
||
// (bottom) shows ONE white stripe + clamped blue edge. Both cross the
|
||
// x=16 seam. TILE_MULTIPRIM=1, TILE_PRIM_COUNT=2, TEX_WRAP_ENABLE=1.
|
||
// read2 LIVE. 16 KiB VRAM. Fixture: bios_tile_wrap/payload_tile_wrap.
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (2),
|
||
.TILE_ROWS (2),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (2),
|
||
.TEX_WRAP_ENABLE (1'b1)
|
||
`elsif GS_TILE_SCISSOR_DEMO
|
||
// Ch306 — GS SCISSOR clipping in the tiled renderer. Same 3-primitive
|
||
// scene as GS_TILE_MULTIPRIM_DEMO, PLUS a SCISSOR_1 rectangle [6..25]x
|
||
// [4..20] (crossing both tile seams) that clips the whole scene: pixels
|
||
// outside the rect stay the CLEAR color. Effective raster bounds =
|
||
// primitive bbox ∩ tile bbox ∩ scissor rect, baked into the tile walker
|
||
// (no per-pixel test). TILE_MULTIPRIM=1 + SCISSOR_ENABLE=1. read2 LIVE.
|
||
// 16 KiB VRAM. Fixture: bios_tile_scissor/payload_tile_scissor.
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (2),
|
||
.TILE_ROWS (2),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (3),
|
||
.SCISSOR_ENABLE (1'b1)
|
||
`elsif GS_TILE_MULTIPRIM_DEMO
|
||
// Ch305 — MULTI-PRIMITIVE tiled scene over the 2x2 grid. A fixed list of
|
||
// 3 combined TME+ABE+ZTE primitives (opaque BLUE background tri @ far Z,
|
||
// depth-tested RED textured tri @ mid Z, translucent WHITE tri @ near Z),
|
||
// all spanning the 32x32 region, is re-rendered per tile IN ORDER so
|
||
// later primitives depth-test/alpha-blend over earlier ones. Proves draw
|
||
// order + depth + alpha interactions across tile seams. TILE_LOCAL=1,
|
||
// TILE_COLS=2, TILE_ROWS=2, TILE_MULTIPRIM=1, TILE_PRIM_COUNT=3. read2 LIVE.
|
||
// 16 KiB VRAM (32-row FB 0..0x1FFF + 3 textures @ 0x2000/0x2100/0x2200;
|
||
// Z on-chip). Fixture: bios_tile_multiprim/payload_tile_multiprim.
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (2),
|
||
.TILE_ROWS (2),
|
||
.TILE_MULTIPRIM (1'b1),
|
||
.TILE_PRIM_COUNT (3)
|
||
`elsif GS_TILE2X2_DEMO
|
||
// Ch304 — 2x2 MULTI-TILE renderer. One combined TME+ABE+ZTE triangle
|
||
// spanning a 32x32 region (a 2x2 grid of 16x16 on-chip tiles, crossing
|
||
// x=16 & y=16 seams) is re-tested against each tile and rendered
|
||
// CLEAR->RENDER(clipped)->FLUSH per tile; seams are continuous (screen-
|
||
// space attributes). TILE_LOCAL=1, TILE_COLS=2, TILE_ROWS=2. read2 LIVE
|
||
// (texture). 16 KiB VRAM (32-row FB 0..0x1FFF + texture 0x2000; Z on-chip).
|
||
// Fixture: bios_tile2x2/payload_tile2x2.
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1),
|
||
.TILE_COLS (2),
|
||
.TILE_ROWS (2)
|
||
`elsif GS_TILE_DEMO
|
||
// Ch303 — TILE-LOCAL combined renderer. The combined TME+ABE+ZTE triangle
|
||
// renders into an ON-CHIP 16x16 color+Z tile (CLEAR->RENDER->FLUSH); only
|
||
// color/Z move on-chip, texture still from VRAM. Same visual as the Ch302
|
||
// combined demo (green clear; triangle top blended / bottom occluded) but
|
||
// the color/Z RMW is resolved on-chip and flushed to the framebuffer.
|
||
// TILE_LOCAL=1 (implies COMBINED_TAZ=1). read2 LIVE (texture). 8 KiB VRAM
|
||
// (16-row FB 0..0xFFF + texture 0x1000; Z is on-chip, not in VRAM).
|
||
// Fixture: bios_tile/payload_tile.
|
||
.VRAM_BYTES (8 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1),
|
||
.TILE_LOCAL (1'b1)
|
||
`elsif GS_COMBINED_DEMO
|
||
// Ch302 — COMBINED textured+alpha+depth demo profile. One TME+ABE+ZTE
|
||
// triangle runs the multi-beat per-pixel FSM (Zread->Ztest->texel->dest->
|
||
// colorwrite->Zwrite) over a green Z-writing background: top half passes
|
||
// depth (translucent texture blended over green), bottom half fails
|
||
// (occluded, green shows through). COMBINED_TAZ=1. read2 LIVE. 16 KiB VRAM
|
||
// (FBW=1 -> 256 B/row -> 16-row FB 0..0xFFF, Z 0x1000..0x1FFF, texture
|
||
// 0x2000). PSMCT32_SWIZZLE=0. Fixture: bios_combined/payload_combined.
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.COMBINED_TAZ (1'b1)
|
||
`elsif GS_PERSP_DEMO
|
||
// Ch301 — PERSPECTIVE-CORRECT textured-triangle demo profile. A receding
|
||
// "floor" quad (2 TME TRIANGLEs) textured with a checkerboard; coords via
|
||
// ST (S=u/w,T=v/w) + RGBAQ.Q (=1/w). PERSPECTIVE_CORRECT=1 enables the
|
||
// pipelined reciprocal-LUT divide (gs_persp_uv) so the checkerboard
|
||
// compresses toward the far (top) edge. Texture is LINEAR PSMCT32
|
||
// (PSMCT32_SWIZZLE=0) — perspective isolated from the swizzle family.
|
||
// read2 LIVE (texel fetch). 8 KiB VRAM (16x24 FB + 16x16 texture at
|
||
// 0x800). Fixture: bios_persp/payload_persp.
|
||
.VRAM_BYTES (8 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.PERSPECTIVE_CORRECT(1'b1)
|
||
`elsif GS_SWZ32_DEMO
|
||
// Ch300 — SWIZZLED PSMCT32 (direct-color) texture demo profile, the
|
||
// CLOSURE rung of the swizzle layout family. PSMCT32_SWIZZLE is a single
|
||
// per-format gate, so setting it 1 swizzles BOTH the framebuffer AND the
|
||
// PSMCT32 texture (upload + sample) — the most faithful PS2 behavior
|
||
// (PSMCT32 is always block-swizzled on real hardware). The 22x40 texture
|
||
// (only 16x40 sampled) lands swizzled at 0x3000..; VRAM is 32 KiB (still
|
||
// under the read2 M20K-replication tripwire). Fixture: bios_swz32/
|
||
// payload_swz32 (selected by the matching QSF VERILOG_MACRO block).
|
||
.VRAM_BYTES (32 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b1)
|
||
`elsif GS_SWZ8_DEMO
|
||
// Ch299 — SWIZZLED PSMT8 texture demo profile (sibling of the Ch298
|
||
// PSMT4 swizzle below). The PSMT8 index texture lands swizzled at
|
||
// byte 0x3400.. with its CLUT at 0x3000, so VRAM is bumped to 32 KiB
|
||
// (still well under the read2 M20K-replication tripwire at 256 KiB).
|
||
// PSMT8_SWIZZLE=1 routes BOTH the texture UPLOAD (gif_image_xfer) and
|
||
// the SAMPLE (gs_texture_unit) through the real PS2 block layout;
|
||
// PSMCT32_SWIZZLE stays 0 (the framebuffer is linear). Fixture:
|
||
// bios_swz8/payload_swz8 (selected by the matching QSF VERILOG_MACRO
|
||
// block under GS_SWZ8_DEMO).
|
||
.VRAM_BYTES (32 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.PSMT8_SWIZZLE (1'b1)
|
||
`elsif GS_SWZ4_DEMO
|
||
// Ch298 — SWIZZLED PSMT4 texture demo profile. The 64x32 PSMT4
|
||
// index texture lands swizzled at byte 0x2400..0x27FF, so VRAM is
|
||
// bumped to 16 KiB (still well under the read2 M20K-replication
|
||
// tripwire at 256 KiB). PSMT4_SWIZZLE=1 routes BOTH the texture
|
||
// UPLOAD (gif_image_xfer) and the SAMPLE (gs_texture_unit) through
|
||
// the real PS2 block layout; PSMCT32_SWIZZLE stays 0 (the PSMCT32
|
||
// framebuffer is linear). Fixture: bios_swz4/payload_swz4 (selected
|
||
// by the matching QSF VERILOG_MACRO block under GS_SWZ4_DEMO).
|
||
.VRAM_BYTES (16 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0),
|
||
.PSMT4_SWIZZLE (1'b1)
|
||
`elsif GS_LPDDR_TEX_DEMO
|
||
// Ch322 — LPDDR-backed TEXTURE demo (tritex scene). An 8x8 PSMCT32 texture at
|
||
// TBP0=8 + one textured triangle — IDENTICAL to the BRAM tritex path except the
|
||
// texel SOURCE: paired with the GS_LPDDR_TEX feature macro, the sampler reads from
|
||
// the prefilled LPDDR texture cache (warmed by the HPS write-probe) instead of VRAM.
|
||
// 8 KiB VRAM covers the FB + the texture's addressed range [0x800,0x1000).
|
||
// Fixture: bios_tritex/payload_tritex (selected by the matching QSF macro block).
|
||
.VRAM_BYTES (8 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0)
|
||
`else
|
||
.VRAM_BYTES (8 * 1024),
|
||
.VRAM_ENABLE_READ2 (1'b1),
|
||
.PSMCT32_SWIZZLE (1'b0)
|
||
`endif
|
||
`else
|
||
.VRAM_BYTES (512 * 1024),
|
||
// Ch251.4 fit rescue — PSMCT32-only build, so the PSMT4 RMW
|
||
// read2 port is dead. Setting ENABLE_READ2=0 collapses the
|
||
// vram_bram_stub from two replicated 1W+1R simple-dual-port
|
||
// M20K banks (~410 M20Ks at 512 KiB) to one (~205 M20Ks),
|
||
// bringing total RAM Blocks back under the Agilex 5 budget
|
||
// of 358. Sim TBs leave this at the default `1` to keep
|
||
// PSMT4 paths exercised. See
|
||
// docs/decisions/0006-vram-roadmap.md for the longer-term
|
||
// arbitrated-VRAM follow-up.
|
||
.VRAM_ENABLE_READ2 (1'b0)
|
||
// PSMCT32_SWIZZLE defaults to 1'b1 — the swizzled flat
|
||
// production demo, byte-identical to the pre-Ch295 build.
|
||
`endif
|
||
`ifdef GS_LPDDR_TEX
|
||
// Ch322 — route the texel-fetch port to the prefilled LPDDR texture cache.
|
||
// (leading comma attaches to the selected profile's last param above.)
|
||
, .GS_LPDDR_TEX(1'b1), .TEX_VRAM_BASE(TEXC_VRAM_BASE), .TEX_CACHE_BYTES(TEXC_BYTES)
|
||
`endif
|
||
`ifdef GS_TILE_SPILL
|
||
// Ch323 — enable the tile color+Z spill/reload phases (TP_ZFLUSH/TP_RELOAD) so the
|
||
// LPDDR spill writers + gs_tile_reload below are actually exercised.
|
||
, .TILE_SPILL_ENABLE(1'b1)
|
||
`endif
|
||
) u_demo (
|
||
.clk (design_clk),
|
||
.rst_n (core_rst_n),
|
||
.core_go (core_go),
|
||
.r (demo_video_r),
|
||
.g (demo_video_g),
|
||
.b (demo_video_b),
|
||
.hsync (demo_video_hsync),
|
||
.vsync (demo_video_vsync),
|
||
.de (demo_video_de),
|
||
.core_halt (core_halt),
|
||
.dma_done_seen (dma_done_seen),
|
||
.frame_seen (frame_seen),
|
||
.raster_overflow(raster_overflow),
|
||
.frame_toggle (frame_toggle),
|
||
.dma_done_toggle(dma_done_toggle),
|
||
// Ch255 — heartbeat color override from the controller. Tap
|
||
// INPUT_P1_RAW[9] (Sony ○ / JOY_A) and [7] (Sony × / JOY_B)
|
||
// directly off the bridge's already-design_clk-domain output;
|
||
// the bootlet keeps animating cyan↔red and the wrapper-side
|
||
// mux only changes what the GS paints on the next drain.
|
||
// See top_psmct32_raster_demo_bram.sv for the priority table.
|
||
.joy_a_pressed_i(bridge_input_p1_raw[9]),
|
||
.joy_b_pressed_i(bridge_input_p1_raw[7]),
|
||
// Ch318 — PSMCT16 tile-flush stream tap for the LPDDR AXI writer below.
|
||
.flush_emit_o (demo_flush_emit),
|
||
.flush_addr_o (demo_flush_addr),
|
||
.flush_pix16_o(demo_flush_pix16),
|
||
.flush_psm_o (demo_flush_psm),
|
||
.vram_read_addr_o(demo_vram_raddr), // Ch320 — PCRTC scanout addr for LPDDR4B scanout
|
||
.pix_window_o (demo_pix_window), // Ch320 — displayed-frame window gate
|
||
// Ch322 — texel-fetch tap for the prefilled LPDDR texture cache (open/inert
|
||
// unless GS_LPDDR_TEX; the bram-top mux constant-folds to BRAM when the param is 0).
|
||
.gs_tex_rd_en_o (demo_gs_tex_rd_en),
|
||
.gs_tex_rd_addr_o(demo_gs_tex_rd_addr),
|
||
.tex_cache_data_i(demo_tex_cache_data),
|
||
.tex_cache_ready_i(demo_tex_cache_ready),
|
||
.tex_cache_hits_o(demo_tex_cache_hits),
|
||
.tex_bram_hits_o (demo_tex_bram_hits),
|
||
// Ch323 — tile color+Z spill/reload (inert outputs when TILE_SPILL_ENABLE=0).
|
||
.flush_color32_o (demo_flush_color32),
|
||
.z_flush_emit_o (demo_z_flush_emit),
|
||
.z_flush_addr_o (demo_z_flush_addr),
|
||
.z_flush_data_o (demo_z_flush_data),
|
||
.tile_color_flush_emit_o(demo_cflush_emit),
|
||
.tile_color_flush_addr_o(demo_cflush_addr),
|
||
.tile_color_flush_data_o(demo_cflush_data),
|
||
.reload_start_o (demo_reload_start),
|
||
.tile_reload_raddr_o(demo_tile_reload_raddr),
|
||
.reload_base_o (demo_reload_base),
|
||
.tile_reload_ready_i(demo_tile_reload_ready),
|
||
.tile_reload_color_i(demo_tile_reload_color),
|
||
.tile_reload_z_i (demo_tile_reload_z),
|
||
.tile_phase_o (demo_tile_phase),
|
||
// Ch330 Brick 4 — runtime command-list feeder (driven by the bridge in
|
||
// USE_QSYS_TOP via the CDC pulses above; tied 0 otherwise). FEEDER_ENABLE=0
|
||
// profiles constant-fold these to g_no_feeder, so the ports are inert.
|
||
.feeder_stg_we_i (feeder_stg_we_pulse_w),
|
||
.feeder_stg_waddr_i(feeder_stg_waddr_w),
|
||
.feeder_stg_wdata_i(feeder_stg_wdata_w),
|
||
.feeder_go_i (feeder_go_pulse_w),
|
||
.feeder_ready_o (feeder_ready_w),
|
||
.feeder_records_o (feeder_records_w),
|
||
.feeder_waits_o (feeder_waits_w)
|
||
);
|
||
`ifndef GS_LPDDR_TEX
|
||
// Ch322 — no texture cache: the texel tap returns BRAM (bram-top mux constant-folds
|
||
// to vram_read2 when its GS_LPDDR_TEX param is 0). Tie the cache reply inert.
|
||
assign demo_tex_cache_data = 32'd0;
|
||
assign demo_tex_cache_ready = 1'b0;
|
||
`endif
|
||
`ifndef GS_TILE_SPILL
|
||
// Ch323 — no spill/reload: tile-reload reply is inert (the bram-top ignores it when
|
||
// TILE_SPILL_ENABLE=0). Under GS_TILE_SPILL these are driven by gs_tile_reload below.
|
||
assign demo_tile_reload_ready = 1'b0;
|
||
assign demo_tile_reload_color = 32'd0;
|
||
assign demo_tile_reload_z = 32'd0;
|
||
`endif
|
||
|
||
// -------------------------------------------------------------
|
||
// Ch323 diag — UPSTREAM tile-spill/reload EVENT counters (design_clk, reset per-render
|
||
// via core_rst_n). Codex's bring-up order: prove the rasterizer actually ENTERED the
|
||
// spill/reload phases and EMITTED data BEFORE diagnosing LPDDR commit. Surfaced via the
|
||
// HPS bridge (0x0A0..). Edge/entry-detected so each value is "events this render".
|
||
// -------------------------------------------------------------
|
||
localparam [2:0] EVP_RENDER = 3'd2, EVP_FLUSH = 3'd3, EVP_RELOAD = 3'd5, EVP_ZFLUSH = 3'd6;
|
||
reg [2:0] ev_phase_d;
|
||
reg ev_rstart_d, ev_rready_d;
|
||
reg [31:0] ev_tp_flush_q, ev_tp_zflush_q, ev_tp_reload_q, ev_tp_render_q;
|
||
reg [31:0] ev_flush_emit_q, ev_zflush_emit_q, ev_reload_start_q, ev_reload_ready_q;
|
||
always_ff @(posedge design_clk or negedge core_rst_n) begin
|
||
if (!core_rst_n) begin
|
||
ev_phase_d <= 3'd0; ev_rstart_d <= 1'b0; ev_rready_d <= 1'b0;
|
||
ev_tp_flush_q <= 0; ev_tp_zflush_q <= 0; ev_tp_reload_q <= 0; ev_tp_render_q <= 0;
|
||
ev_flush_emit_q <= 0; ev_zflush_emit_q <= 0; ev_reload_start_q <= 0; ev_reload_ready_q <= 0;
|
||
end else begin
|
||
ev_phase_d <= demo_tile_phase;
|
||
ev_rstart_d <= demo_reload_start;
|
||
ev_rready_d <= demo_tile_reload_ready;
|
||
// phase ENTRY detect (value now != value last cycle)
|
||
if (demo_tile_phase != ev_phase_d) begin
|
||
case (demo_tile_phase)
|
||
EVP_FLUSH : ev_tp_flush_q <= ev_tp_flush_q + 32'd1;
|
||
EVP_ZFLUSH: ev_tp_zflush_q <= ev_tp_zflush_q + 32'd1;
|
||
EVP_RELOAD: ev_tp_reload_q <= ev_tp_reload_q + 32'd1;
|
||
EVP_RENDER: ev_tp_render_q <= ev_tp_render_q + 32'd1;
|
||
default: ;
|
||
endcase
|
||
end
|
||
if (demo_flush_emit) ev_flush_emit_q <= ev_flush_emit_q + 32'd1;
|
||
if (demo_z_flush_emit) ev_zflush_emit_q <= ev_zflush_emit_q + 32'd1;
|
||
if (demo_reload_start && !ev_rstart_d) ev_reload_start_q <= ev_reload_start_q + 32'd1;
|
||
if (demo_tile_reload_ready && !ev_rready_d) ev_reload_ready_q <= ev_reload_ready_q + 32'd1;
|
||
end
|
||
end
|
||
|
||
// -------------------------------------------------------------
|
||
// Ch229 — design-domain tile RAM shadow. The bridge broadcasts
|
||
// tile writes (0x1000..0x1FFF window, 32-bit words) into the
|
||
// design clock domain via a toggle-based CDC. The shadow's
|
||
// combinational read port feeds the Ch245 platform-OSD char-BRAM
|
||
// adapter below — `osd_overlay` reads 11-bit cell addresses and
|
||
// the adapter selects the low/high 16-bit cell from each 32-bit
|
||
// word. The Ch228..Ch244 PS2-local `osd_overlay_stub` that this
|
||
// shadow used to feed was retired in Ch249.
|
||
// -------------------------------------------------------------
|
||
tile_ram_cdc u_tile_cdc (
|
||
.bclk (CLOCK2_50),
|
||
.breset_n (~ninit_done),
|
||
.bclk_wr_toggle (bridge_tile_wr_toggle),
|
||
.bclk_wr_index (bridge_tile_wr_index),
|
||
.bclk_wr_data (bridge_tile_wr_data),
|
||
.dclk (design_clk),
|
||
.dreset_n (core_rst_n),
|
||
.dclk_rd_index (overlay_tile_rd_index),
|
||
.dclk_rd_data (overlay_tile_rd_data),
|
||
.tile_wr_too_close_count(tile_wr_too_close_count)
|
||
);
|
||
|
||
// -------------------------------------------------------------
|
||
// Ch245 — Platform OSD migration. Replaces the PS2-local stub
|
||
// (still instantiated above) with the shared
|
||
// `retroDE_splash/rtl/platform/osd_overlay.sv`
|
||
// and its menu navigation FSM. Every sibling core wires this the
|
||
// same way, so retrodesd's menu chrome (border, cursor highlight,
|
||
// CGA palette, CP437 line-draw glyphs) renders identically here.
|
||
//
|
||
// Wiring summary:
|
||
// - osd_x/y/cols/rows decoded from the bridge's OSD_CFG0 fields
|
||
// - cursor_attr decoded from OSD_CFG1[23:16]
|
||
// - menu FSM runs on CLOCK2_50 (= sys_clk for siblings)
|
||
// - char BRAM is true dual-port (bridge writes on CLOCK2_50,
|
||
// overlay reads on design_clk). We keep the Ch229 32-bit-word
|
||
// packed storage in tile_ram_cdc and add a 32→16 cell-select
|
||
// adapter on the read side.
|
||
// - osd_global_transparent_bg hardwired to 0 (matches NES).
|
||
// - osd_scale hardwired to 3'd2 (matches NES; platform clamps
|
||
// 2..4 internally).
|
||
// -------------------------------------------------------------
|
||
// Pixel-coord counters derived from demo_video_de/hsync/vsync.
|
||
// NES uses video_timing.sv to generate these; we stay local to
|
||
// keep the existing video pipeline untouched. pixel_x increments
|
||
// every cycle within an active line, resets at line start
|
||
// (start-of-de). pixel_y resets at frame start (vsync edge) and
|
||
// advances every line.
|
||
logic [11:0] pixel_x, pixel_y;
|
||
logic demo_video_de_d, demo_video_vsync_d;
|
||
always_ff @(posedge design_clk or negedge core_rst_n) begin
|
||
if (!core_rst_n) begin
|
||
pixel_x <= 12'd0;
|
||
pixel_y <= 12'd0;
|
||
demo_video_de_d <= 1'b0;
|
||
demo_video_vsync_d <= 1'b1;
|
||
end else begin
|
||
demo_video_de_d <= demo_video_de;
|
||
demo_video_vsync_d <= demo_video_vsync;
|
||
// Falling edge of vsync (active-low) → new frame.
|
||
if (demo_video_vsync_d && !demo_video_vsync) begin
|
||
pixel_y <= 12'd0;
|
||
pixel_x <= 12'd0;
|
||
// Rising edge of de → end of active line: bump y.
|
||
end else if (demo_video_de_d && !demo_video_de) begin
|
||
pixel_y <= pixel_y + 12'd1;
|
||
pixel_x <= 12'd0;
|
||
end else if (demo_video_de) begin
|
||
pixel_x <= pixel_x + 12'd1;
|
||
end else begin
|
||
pixel_x <= 12'd0;
|
||
end
|
||
end
|
||
end
|
||
|
||
// ---- CDC: CLOCK2_50 bridge regs + menu FSM signals → design_clk
|
||
// Frame-stable signals; 2-FF synchronizer per sibling pattern.
|
||
(* dont_merge, preserve *) logic [31:0] osd_cfg0_sync [0:1];
|
||
(* dont_merge, preserve *) logic [31:0] osd_cfg1_sync [0:1];
|
||
(* dont_merge, preserve *) logic [1:0] osd_active_sync;
|
||
(* dont_merge, preserve *) logic [4:0] cursor_row_sync [0:1];
|
||
always_ff @(posedge design_clk) begin
|
||
osd_cfg0_sync[0] <= bridge_osd_cfg0;
|
||
osd_cfg0_sync[1] <= osd_cfg0_sync[0];
|
||
osd_cfg1_sync[0] <= bridge_osd_cfg1;
|
||
osd_cfg1_sync[1] <= osd_cfg1_sync[0];
|
||
osd_active_sync[0] <= menu_osd_active;
|
||
osd_active_sync[1] <= osd_active_sync[0];
|
||
cursor_row_sync[0] <= menu_cursor_row;
|
||
cursor_row_sync[1] <= cursor_row_sync[0];
|
||
end
|
||
wire [31:0] osd_cfg0_pix = osd_cfg0_sync[1];
|
||
wire [31:0] osd_cfg1_pix = osd_cfg1_sync[1];
|
||
wire osd_active_pix = osd_active_sync[1];
|
||
wire [4:0] cursor_row_pix = cursor_row_sync[1];
|
||
|
||
// Decode CFG0 fields (matches sibling-ABI bit layout used by NES).
|
||
wire [11:0] osd_x_pix = {osd_cfg0_pix[23:16], 4'd0}; // chars × 16 (2x scale)
|
||
wire [11:0] osd_y_pix = {osd_cfg0_pix[31:24], 4'd0};
|
||
wire [5:0] osd_cols_pix = osd_cfg0_pix[5:0];
|
||
wire [4:0] osd_rows_pix = osd_cfg0_pix[12:8];
|
||
wire [7:0] cursor_attr = osd_cfg1_pix[23:16];
|
||
|
||
// ---- Char BRAM read-side adapter (design_clk domain).
|
||
// Translate platform char_rd_addr (11-bit cell) into our
|
||
// existing tile_ram_cdc shadow address (10-bit 32-bit word
|
||
// index) + low/high 16-bit cell select. Output is registered
|
||
// to match the platform pipeline's 1-cycle BRAM latency.
|
||
wire [10:0] osd_char_rd_addr_w;
|
||
wire [9:0] shadow_word_idx = osd_char_rd_addr_w[10:1];
|
||
wire cell_half_sel = osd_char_rd_addr_w[0];
|
||
assign overlay_tile_rd_index = shadow_word_idx;
|
||
wire [15:0] cell_data_w = cell_half_sel
|
||
? overlay_tile_rd_data[31:16]
|
||
: overlay_tile_rd_data[15:0];
|
||
logic [15:0] osd_char_rd_data_q;
|
||
always_ff @(posedge design_clk) osd_char_rd_data_q <= cell_data_w;
|
||
|
||
// ---- Platform font ROM (1-cycle latency, design_clk domain).
|
||
wire [10:0] osd_font_rd_addr_w;
|
||
wire [7:0] osd_font_rd_data_w;
|
||
osd_font_rom u_osd_font (
|
||
.clk (design_clk),
|
||
.addr (osd_font_rd_addr_w),
|
||
.data (osd_font_rd_data_w)
|
||
);
|
||
|
||
// ---- Platform menu navigation FSM (sys_clk = CLOCK2_50).
|
||
// CLK_FREQ_HZ tells the FSM how to derive its hold/cooldown
|
||
// timers — our sys_clk is the unaltered 50 MHz CLOCK2_50.
|
||
osd_menu_fsm #(
|
||
.CLK_FREQ_HZ(50_000_000)
|
||
) u_osd_menu (
|
||
.clk (CLOCK2_50),
|
||
.reset_n (~ninit_done),
|
||
// INPUT_P1_RAW (un-remapped retroDE bitmap) per
|
||
// retroDE_nes.sv:1235. retrodesd may remap INPUT_P1
|
||
// per-game; the OSD uses the raw form so Select/Start/etc
|
||
// land in the FSM's expected bit positions.
|
||
.joy0_buttons (bridge_input_p1_raw),
|
||
.osd_enable (bridge_osd_ctrl[0]),
|
||
// OSD_CTRL bit layout matches nes_hps_bridge: bit 2 =
|
||
// force_open, bit 3 = force_close (the same bit Ch245's
|
||
// bridge self-clears as the "request" bit).
|
||
.force_open (bridge_osd_ctrl[2]),
|
||
.force_close (bridge_osd_ctrl[3]),
|
||
.menu_first_row (bridge_osd_cfg1[4:0]),
|
||
.menu_last_row (bridge_osd_cfg1[12:8]),
|
||
.osd_active (menu_osd_active),
|
||
.cursor_row (menu_cursor_row),
|
||
.act_select (menu_act_select),
|
||
.act_back (menu_act_back),
|
||
.act_scroll_down (menu_act_scroll_down),
|
||
.act_scroll_up (menu_act_scroll_up),
|
||
.act_open (menu_act_open)
|
||
);
|
||
|
||
// Ch320 — video-source mux: BRAM scanout (demo_video_*, DEFAULT) vs LPDDR4B scanout,
|
||
// selected at runtime by the bridge video_src bit. video_src_w defaults 0 (BRAM), so
|
||
// the default build is byte-identical. de/hsync/vsync stay from the PCRTC.
|
||
// The LPDDR scanout is gated by demo_pix_window (the PCRTC's displayed-frame window)
|
||
// so it shows ONE frame and blanks outside it — exactly like BRAM scanout — instead
|
||
// of tiling the frame across the whole active line.
|
||
wire [7:0] scan_r_win = demo_pix_window ? scan_r_w : 8'd0;
|
||
wire [7:0] scan_g_win = demo_pix_window ? scan_g_w : 8'd0;
|
||
wire [7:0] scan_b_win = demo_pix_window ? scan_b_w : 8'd0;
|
||
// Ch326 — the GS_TILE_SPILL profile has FB_LPDDR_ONLY (no BRAM FB mirror), so it MUST
|
||
// display from LPDDR scanout. Force it; other profiles keep the runtime bridge bit.
|
||
`ifdef GS_TILE_SPILL
|
||
wire video_src_eff = 1'b1;
|
||
wire video_src_emif = 1'b1; // Ch352 — forced LPDDR scanout; no bridge CDC on this profile
|
||
// Ch328 — BOOT DISPLAY GATE. With FB_LPDDR_ONLY there is no stable BRAM fallback while LPDDR
|
||
// is still being rendered, and the LPDDR scanout is forced on from reset — so power-on
|
||
// (RBF-only, no script) would display a HALF-WRITTEN / stale framebuffer (the corrupt boot
|
||
// image seen at 256x256, where the 4x-longer render lost the race). Gate is keyed off the
|
||
// WRITER drain, not the render FSM: arm on dma_done_seen (render DMA consumed), then latch
|
||
// frame_ready on the 2nd vsync edge after — by which point the color spill writer has drained
|
||
// the whole frame to LPDDR (>> a frame of margin) AND we engage on a clean frame boundary.
|
||
logic vsync_q, fr_armed, frame_ready_r; logic [1:0] fr_vcnt;
|
||
logic [27:0] fr_boot_to; // ~5.4 s @ 25 MHz — insurance: if dma_done_seen never pulses, the
|
||
// gate must NOT wedge the display black forever; force-arm by then
|
||
// (the render is long-since complete) so the worst case is a late frame.
|
||
always_ff @(posedge design_clk or posedge async_rst_assert) begin
|
||
if (async_rst_assert) begin vsync_q<=1'b0; fr_armed<=1'b0; fr_vcnt<=2'd0; frame_ready_r<=1'b0; fr_boot_to<=28'd0; end
|
||
else begin
|
||
vsync_q <= demo_video_vsync;
|
||
if (fr_boot_to != 28'hFFFFFFF) fr_boot_to <= fr_boot_to + 28'd1;
|
||
if (dma_done_seen || fr_boot_to[27]) fr_armed <= 1'b1; // render DMA done OR timeout
|
||
if (fr_armed && demo_video_vsync && !vsync_q && fr_vcnt != 2'd2) fr_vcnt <= fr_vcnt + 1'b1;
|
||
if (fr_vcnt == 2'd2) frame_ready_r <= 1'b1; // latched (re-renders never re-blank)
|
||
end
|
||
end
|
||
// sync the design-domain latch into the emif domain for the line-buffer prefetch enable
|
||
logic [1:0] fr_emif_sync;
|
||
always_ff @(posedge emif_clk or negedge emif_reset_n) begin
|
||
if (!emif_reset_n) fr_emif_sync <= 2'b00;
|
||
else fr_emif_sync <= {fr_emif_sync[0], frame_ready_r};
|
||
end
|
||
wire frame_ready_emif = fr_emif_sync[1];
|
||
`else
|
||
`ifdef GS_LPDDR4B_FB
|
||
// Ch352 CDC (Codex, option B) — video_src/scanout_lb cross from the bridge (CLOCK2_50) into BOTH design_clk
|
||
// (HDMI source mux) and emif_clk (LPDDR scanout enable + reader routing). Capture them COHERENTLY on the
|
||
// bridge commit TOGGLE (bundled, never independent per-bit sampling) in EACH consuming domain.
|
||
//
|
||
// REGISTER CONTRACT (enforced by HOST TOOLING, not live-switch hardware): set scanout_lb while video_src=0,
|
||
// THEN raise video_src. NO live owner switching — flipping scanout_lb while LPDDR scanout is active
|
||
// (video_src=1, reads in flight) is OUT OF CONTRACT. The transaction-drain owner-handoff that would make
|
||
// live switching safe is DEFERRED: no shipping profile live-switches, and SH3 keeps video_src=0 throughout
|
||
// (BRAM scanout). This is a PER-PROFILE statement, NOT a platform-wide guarantee.
|
||
logic [2:0] commit_d_sync; logic vsrc_cap_d, vsync_d_q, video_src_dclk;
|
||
always_ff @(posedge design_clk or posedge async_rst_assert) begin
|
||
if (async_rst_assert) begin commit_d_sync<=3'd0; vsrc_cap_d<=1'b0; vsync_d_q<=1'b0; video_src_dclk<=1'b0; end
|
||
else begin
|
||
commit_d_sync <= {commit_d_sync[1:0], lpddr_ctrl_commit_w};
|
||
if (commit_d_sync[2] != commit_d_sync[1]) vsrc_cap_d <= video_src_w; // coherent capture on commit
|
||
vsync_d_q <= demo_video_vsync;
|
||
if (demo_video_vsync && !vsync_d_q) video_src_dclk <= vsrc_cap_d; // apply at frame boundary
|
||
end
|
||
end
|
||
wire video_src_eff = video_src_dclk;
|
||
wire frame_ready_r = 1'b1; // other profiles: BRAM mirror is a stable fallback, no gate
|
||
wire frame_ready_emif = 1'b1;
|
||
// emif-domain coherent bundled capture (LPDDR scanout enable + reader routing below).
|
||
logic [2:0] commit_e_sync; logic video_src_emif, scanout_lb_emif;
|
||
always_ff @(posedge emif_clk or negedge emif_reset_n) begin
|
||
if (!emif_reset_n) begin commit_e_sync<=3'd0; video_src_emif<=1'b0; scanout_lb_emif<=1'b0; end
|
||
else begin
|
||
commit_e_sync <= {commit_e_sync[1:0], lpddr_ctrl_commit_w};
|
||
if (commit_e_sync[2] != commit_e_sync[1]) begin // {video_src,scanout_lb} captured together
|
||
video_src_emif <= video_src_w;
|
||
scanout_lb_emif <= scanout_lb_w;
|
||
end
|
||
end
|
||
end
|
||
`else
|
||
// Profiles without the private-LPDDR EMIF have no emif_clk domain or
|
||
// scanout reader to synchronize. Preserve the legacy design-clock mux
|
||
// behavior; the non-LPDDR branches below tie scanout pixels/status inert.
|
||
wire video_src_eff = video_src_w;
|
||
wire frame_ready_r = 1'b1;
|
||
wire frame_ready_emif = 1'b1;
|
||
`endif
|
||
`endif
|
||
// Boot gate: blank the displayed framebuffer until the first LPDDR frame is committed.
|
||
wire [7:0] vid_src_r = frame_ready_r ? (video_src_eff ? scan_r_win : demo_video_r) : 8'd0;
|
||
wire [7:0] vid_src_g = frame_ready_r ? (video_src_eff ? scan_g_win : demo_video_g) : 8'd0;
|
||
wire [7:0] vid_src_b = frame_ready_r ? (video_src_eff ? scan_b_win : demo_video_b) : 8'd0;
|
||
|
||
// ---- Platform OSD overlay (design_clk domain).
|
||
osd_overlay u_osd_platform (
|
||
.clk (design_clk),
|
||
.reset_n (core_rst_n),
|
||
.pixel_x (pixel_x),
|
||
.pixel_y (pixel_y),
|
||
.vid_r (vid_src_r),
|
||
.vid_g (vid_src_g),
|
||
.vid_b (vid_src_b),
|
||
.vid_de (demo_video_de),
|
||
.vid_hsync (demo_video_hsync),
|
||
.vid_vsync (demo_video_vsync),
|
||
.osd_enable (osd_active_pix),
|
||
.osd_global_transparent_bg(1'b0),
|
||
.osd_x (osd_x_pix),
|
||
.osd_y (osd_y_pix),
|
||
.osd_cols (osd_cols_pix),
|
||
.osd_rows (osd_rows_pix),
|
||
.osd_scale (3'd2),
|
||
.char_rd_addr (osd_char_rd_addr_w),
|
||
.char_rd_data (osd_char_rd_data_q),
|
||
.font_rd_addr (osd_font_rd_addr_w),
|
||
.font_rd_data (osd_font_rd_data_w),
|
||
.cursor_row (cursor_row_pix),
|
||
.cursor_attr (cursor_attr),
|
||
.cursor_enable (osd_active_pix),
|
||
.out_r (VIDEO_R),
|
||
.out_g (VIDEO_G),
|
||
.out_b (VIDEO_B),
|
||
.out_de (VIDEO_DE),
|
||
.out_hsync (VIDEO_HSYNC),
|
||
.out_vsync (VIDEO_VSYNC)
|
||
);
|
||
|
||
// -------------------------------------------------------------
|
||
// Ch250 — PS2-fabric input consumer on silicon. sio2_input_stub
|
||
// takes the bridge's `bridge_input_p1_raw` (un-remapped retroDE
|
||
// bitmap that retrodesd writes from keyboard + DS2 merge), does
|
||
// its Sony-byte translation, and surfaces the post-translation
|
||
// 16-bit pad word for downstream consumers. The IOP-side read /
|
||
// write ports are tied to zero — there's no IOP execution path
|
||
// on silicon yet, so the only consumer of this instantiation is
|
||
// the Ch250 LED ledger below.
|
||
//
|
||
// Wires both P1 and P2 from the bridge for completeness, but P2
|
||
// bits are unused for LED display (retrodesd doesn't drive a P2
|
||
// gamepad meaningfully yet).
|
||
// -------------------------------------------------------------
|
||
wire [15:0] p1_sony_word_w;
|
||
/* verilator lint_off UNUSEDSIGNAL */
|
||
wire [15:0] p2_sony_word_w;
|
||
wire [31:0] sio2_rd_data_w;
|
||
wire sio2_rd_valid_w;
|
||
wire _unused_sio2 = &{1'b0, p2_sony_word_w, sio2_rd_data_w,
|
||
sio2_rd_valid_w, 1'b0};
|
||
/* verilator lint_on UNUSEDSIGNAL */
|
||
sio2_input_stub u_sio2_input (
|
||
.clk (CLOCK2_50),
|
||
.rst_n (~ninit_done),
|
||
.input_p1 (bridge_input_p1_raw),
|
||
.input_p2 (bridge_input_p2),
|
||
.rd_en (1'b0),
|
||
.rd_addr (4'd0),
|
||
.rd_data (sio2_rd_data_w),
|
||
.rd_valid (sio2_rd_valid_w),
|
||
.wr_en (1'b0),
|
||
.wr_addr (4'd0),
|
||
.wr_data (32'd0),
|
||
.p1_sony_word_o (p1_sony_word_w),
|
||
.p2_sony_word_o (p2_sony_word_w)
|
||
);
|
||
|
||
`ifdef GS_LPDDR4B_FB
|
||
// =============================================================
|
||
// Ch319 Brick 1 — FPGA-private LPDDR4B EMIF (EMIF_Qsys).
|
||
// Port map cloned verbatim from de25_lpddr4_bw/ao486 (same device).
|
||
// Brick 1 scope: bring the EMIF up and prove CALIBRATION only.
|
||
// The AXI4 user port + AXI4-Lite are tied off here (no transactions);
|
||
// Brick 2 re-clocks gs_lpddr_axi_master onto emif_clk and drives it.
|
||
// =============================================================
|
||
wire emif_clk; // ~310 MHz EMIF user clock
|
||
wire emif_reset_n; // calibration-ready (low during cal, high when done)
|
||
wire emif_pll_locked;
|
||
|
||
// Brick 2 — writer→EMIF AXI write channel (gs_lpddr_axi_master drives these;
|
||
// the read channel is unused — the writer is write-only).
|
||
wire [29:0] emif_axi_awaddr; wire [7:0] emif_axi_awlen; wire [2:0] emif_axi_awsize;
|
||
wire [1:0] emif_axi_awburst; wire [6:0] emif_axi_awid; wire emif_axi_awvalid, emif_axi_awready;
|
||
wire [255:0] emif_axi_wdata; wire [31:0] emif_axi_wstrb; wire emif_axi_wlast, emif_axi_wvalid, emif_axi_wready;
|
||
wire [6:0] emif_axi_bid; wire [1:0] emif_axi_bresp; wire emif_axi_bvalid, emif_axi_bready;
|
||
// Brick 3 — EMIF READ channel (gs_lpddr_rd_probe drives AR, consumes R).
|
||
wire [29:0] emif_ar_araddr; wire [1:0] emif_ar_arburst; wire [6:0] emif_ar_arid;
|
||
wire [7:0] emif_ar_arlen; wire [2:0] emif_ar_arsize; wire emif_ar_arvalid, emif_ar_arready;
|
||
wire [255:0] emif_r_rdata; wire [1:0] emif_r_rresp; wire emif_r_rlast, emif_r_rvalid, emif_r_rready;
|
||
// Ch320 — two read masters arbitrated onto emif_ar_*/emif_r_*: s0 = scanout, s1 = probe.
|
||
wire [29:0] scan_ar_araddr; wire [1:0] scan_ar_arburst; wire [6:0] scan_ar_arid;
|
||
wire [7:0] scan_ar_arlen; wire [2:0] scan_ar_arsize; wire scan_ar_arvalid, scan_ar_arready;
|
||
wire [255:0] scan_r_rdata; wire [1:0] scan_r_rresp; wire scan_r_rlast, scan_r_rvalid, scan_r_rready;
|
||
wire [29:0] probe_ar_araddr; wire [1:0] probe_ar_arburst; wire [6:0] probe_ar_arid;
|
||
wire [7:0] probe_ar_arlen; wire [2:0] probe_ar_arsize; wire probe_ar_arvalid, probe_ar_arready;
|
||
wire [255:0] probe_r_rdata; wire [1:0] probe_r_rresp; wire probe_r_rlast, probe_r_rvalid, probe_r_rready;
|
||
// Ch322 — read-arbiter port 2: texture-cache fill (lowest priority). Driven by the
|
||
// gs_texture_cache under GS_LPDDR_TEX; tied inert (arvalid=0) otherwise.
|
||
wire [29:0] texf_ar_araddr; wire [1:0] texf_ar_arburst; wire [6:0] texf_ar_arid;
|
||
wire [7:0] texf_ar_arlen; wire [2:0] texf_ar_arsize; wire texf_ar_arvalid, texf_ar_arready;
|
||
wire [255:0] texf_r_rdata; wire [1:0] texf_r_rresp; wire texf_r_rlast, texf_r_rvalid, texf_r_rready;
|
||
|
||
EMIF_Qsys u_emif_lpddr4b (
|
||
.iopll_refclk_clk (CLOCK2_50),
|
||
.iopll_reset_reset (ninit_done),
|
||
.iopll_locked_export (emif_pll_locked),
|
||
.iopll_outclk_axi4_lite_clk (),
|
||
.reset_reset_n (~ninit_done),
|
||
.clock_310m_out_clk_clk (emif_clk),
|
||
.emif_lpddr4b_s0_axi4_ctrl_ready_reset_n (emif_reset_n),
|
||
// AXI4 user port — Brick 2: driven by gs_lpddr_axi_master (write-only).
|
||
.emif_lpddr4b_s0_axi4_awaddr (emif_axi_awaddr),
|
||
.emif_lpddr4b_s0_axi4_awburst (emif_axi_awburst),
|
||
.emif_lpddr4b_s0_axi4_awid (emif_axi_awid),
|
||
.emif_lpddr4b_s0_axi4_awlen (emif_axi_awlen),
|
||
.emif_lpddr4b_s0_axi4_awsize (emif_axi_awsize),
|
||
.emif_lpddr4b_s0_axi4_awvalid (emif_axi_awvalid),
|
||
.emif_lpddr4b_s0_axi4_awready (emif_axi_awready),
|
||
.emif_lpddr4b_s0_axi4_awuser (14'd0), // private DDR — no NoC metadata needed
|
||
.emif_lpddr4b_s0_axi4_awprot (3'd0),
|
||
.emif_lpddr4b_s0_axi4_awlock (1'b0),
|
||
.emif_lpddr4b_s0_axi4_awqos (4'd0),
|
||
.emif_lpddr4b_s0_axi4_wdata (emif_axi_wdata),
|
||
.emif_lpddr4b_s0_axi4_wstrb (emif_axi_wstrb),
|
||
.emif_lpddr4b_s0_axi4_wlast (emif_axi_wlast),
|
||
.emif_lpddr4b_s0_axi4_wvalid (emif_axi_wvalid),
|
||
.emif_lpddr4b_s0_axi4_wready (emif_axi_wready),
|
||
.emif_lpddr4b_s0_axi4_bid (emif_axi_bid),
|
||
.emif_lpddr4b_s0_axi4_bresp (emif_axi_bresp),
|
||
.emif_lpddr4b_s0_axi4_bvalid (emif_axi_bvalid),
|
||
.emif_lpddr4b_s0_axi4_bready (emif_axi_bready),
|
||
// AXI4 read channel — Brick 3: driven by gs_lpddr_rd_probe (HPS readback).
|
||
.emif_lpddr4b_s0_axi4_araddr (emif_ar_araddr),
|
||
.emif_lpddr4b_s0_axi4_arburst (emif_ar_arburst),
|
||
.emif_lpddr4b_s0_axi4_arid (emif_ar_arid),
|
||
.emif_lpddr4b_s0_axi4_arlen (emif_ar_arlen),
|
||
.emif_lpddr4b_s0_axi4_arsize (emif_ar_arsize),
|
||
.emif_lpddr4b_s0_axi4_arvalid (emif_ar_arvalid),
|
||
.emif_lpddr4b_s0_axi4_arready (emif_ar_arready),
|
||
.emif_lpddr4b_s0_axi4_aruser (14'd0),
|
||
.emif_lpddr4b_s0_axi4_arprot (3'd0),
|
||
.emif_lpddr4b_s0_axi4_arlock (1'b0),
|
||
.emif_lpddr4b_s0_axi4_arqos (4'd0),
|
||
.emif_lpddr4b_s0_axi4_rid (),
|
||
.emif_lpddr4b_s0_axi4_rdata (emif_r_rdata),
|
||
.emif_lpddr4b_s0_axi4_rresp (emif_r_rresp),
|
||
.emif_lpddr4b_s0_axi4_rlast (emif_r_rlast),
|
||
.emif_lpddr4b_s0_axi4_rvalid (emif_r_rvalid),
|
||
.emif_lpddr4b_s0_axi4_rready (emif_r_rready),
|
||
// AXI4-Lite control — tied off (no runtime reconfig).
|
||
.emif_lpddr4b_s0_axi4lite_clock_clk (emif_clk),
|
||
.emif_lpddr4b_s0_axi4lite_reset_n_reset_n (emif_reset_n),
|
||
.emif_lpddr4b_s0_axi4lite_awaddr (27'd0),
|
||
.emif_lpddr4b_s0_axi4lite_awprot (3'd0),
|
||
.emif_lpddr4b_s0_axi4lite_awvalid (1'b0),
|
||
.emif_lpddr4b_s0_axi4lite_awready (),
|
||
.emif_lpddr4b_s0_axi4lite_araddr (27'd0),
|
||
.emif_lpddr4b_s0_axi4lite_arprot (3'd0),
|
||
.emif_lpddr4b_s0_axi4lite_arvalid (1'b0),
|
||
.emif_lpddr4b_s0_axi4lite_arready (),
|
||
.emif_lpddr4b_s0_axi4lite_wdata (32'd0),
|
||
.emif_lpddr4b_s0_axi4lite_wstrb (4'd0),
|
||
.emif_lpddr4b_s0_axi4lite_wvalid (1'b0),
|
||
.emif_lpddr4b_s0_axi4lite_wready (),
|
||
.emif_lpddr4b_s0_axi4lite_bready (1'b1),
|
||
.emif_lpddr4b_s0_axi4lite_bresp (),
|
||
.emif_lpddr4b_s0_axi4lite_bvalid (),
|
||
.emif_lpddr4b_s0_axi4lite_rready (1'b1),
|
||
.emif_lpddr4b_s0_axi4lite_rdata (),
|
||
.emif_lpddr4b_s0_axi4lite_rresp (),
|
||
.emif_lpddr4b_s0_axi4lite_rvalid (),
|
||
// Physical LPDDR4B pins
|
||
.emif_lpddr4b_mem_mem_cs (LPDDR4B_CS_n),
|
||
.emif_lpddr4b_mem_mem_ca (LPDDR4B_CA),
|
||
.emif_lpddr4b_mem_mem_cke (LPDDR4B_CKE),
|
||
.emif_lpddr4b_mem_mem_dq (LPDDR4B_DQ),
|
||
.emif_lpddr4b_mem_mem_dqs_t (LPDDR4B_DQS),
|
||
.emif_lpddr4b_mem_mem_dqs_c (LPDDR4B_DQS_n),
|
||
.emif_lpddr4b_mem_mem_dmi (LPDDR4B_DM),
|
||
.emif_lpddr4b_mem_ck_mem_ck_t (LPDDR4B_CK),
|
||
.emif_lpddr4b_mem_ck_mem_ck_c (LPDDR4B_CK_n),
|
||
.emif_lpddr4b_mem_reset_n_mem_reset_n (LPDDR4B_RESET_n),
|
||
.emif_lpddr4b_oct_oct_rzqin (LPDDR4B_RZQ),
|
||
.emif_lpddr4b_ref_clk_clk (LPDDR4B_REFCLK_p)
|
||
);
|
||
|
||
// Calibration-done indicator. emif_reset_n is the EMIF cal-ready strobe
|
||
// (low during calibration, high when DRAM is usable), async to design_clk;
|
||
// 2-FF sync it for the status LED.
|
||
reg [1:0] lpddr4b_cal_sync = 2'b00;
|
||
always_ff @(posedge design_clk) lpddr4b_cal_sync <= {lpddr4b_cal_sync[0], emif_reset_n};
|
||
wire lpddr4b_cal_done = lpddr4b_cal_sync[1];
|
||
`endif
|
||
|
||
// -------------------------------------------------------------
|
||
// LED outputs — DE25-Nano LEDs are active-LOW (LED HIGH = OFF).
|
||
// Status sources are inverted before driving the pins so a
|
||
// status-asserted signal lights its LED. Ch250 reclaims LED[7:5]
|
||
// (previously tied HIGH = OFF) as a hardware proof of the PS2-
|
||
// fabric input consumer landing on silicon.
|
||
// -------------------------------------------------------------
|
||
// Ch165: LED[3] surfaces the ADV7513 init-done status (from
|
||
// the Ch165 I²C wake-up FSM above). LED is active-LOW so a
|
||
// lit LED[3] = HDMI is configured and ready to transmit;
|
||
// unlit means the chip is still in standby (or HDMI_TX_INT
|
||
// re-fired and the FSM is re-initing).
|
||
//
|
||
// Ch166: LED[4] surfaces the sticky NACK watchdog from the
|
||
// same FSM. Lit LED[4] = bus error latched (chip absent,
|
||
// wrong I²C address, bus shorted, monitor unplugged after
|
||
// power-on with a chip that won't ACK). The polarity is the
|
||
// same as the other status LEDs (active-LOW driver, lit means
|
||
// signal asserted) — but unlike LED[3] which is "lit = good",
|
||
// LED[4] is "lit = bad". Documented in the bring-up runbook.
|
||
assign LED[0] = ~core_halt;
|
||
assign LED[1] = ~dma_done_seen;
|
||
assign LED[2] = ~frame_seen;
|
||
assign LED[3] = ~hdmi_init_done;
|
||
assign LED[4] = ~hdmi_i2c_error;
|
||
// Ch250 — three Sony-format pad bits from sio2_input_stub.
|
||
// Sony wire format is active-LOW (bit=0 when pressed); DE25 LEDs
|
||
// are active-LOW (pin=0 = lit). Pass-through, no inversion:
|
||
// LED[5] = p1_sony_word[3] START (byte3 bit 3)
|
||
// LED[6] = p1_sony_word[14] CROSS× (byte4 bit 6 → bit [8+6]=14)
|
||
// LED[7] = p1_sony_word[4] D-pad UP (byte3 bit 4)
|
||
assign LED[5] = p1_sony_word_w[3];
|
||
assign LED[6] = p1_sony_word_w[14];
|
||
`ifdef GS_LPDDR4B_FB
|
||
assign LED[7] = ~lpddr4b_cal_done; // Ch319 Brick 1: lit = LPDDR4B EMIF calibrated
|
||
`else
|
||
assign LED[7] = p1_sony_word_w[4];
|
||
`endif
|
||
|
||
// -------------------------------------------------------------
|
||
// HDMI data path (Ch164) — drive the ADV7513 24-bit RGB + sync
|
||
// interface directly from the abstract VIDEO_* outputs of the
|
||
// inner wrapper. HDMI_TX_CLK is the pixel clock (= design_clk
|
||
// = 50 MHz post-PLL); HDMI_TX_D packs RGB with R in the MSBs.
|
||
// -------------------------------------------------------------
|
||
assign HDMI_TX_CLK = design_clk;
|
||
assign HDMI_TX_D = {VIDEO_R, VIDEO_G, VIDEO_B};
|
||
assign HDMI_TX_HS = VIDEO_HSYNC;
|
||
assign HDMI_TX_VS = VIDEO_VSYNC;
|
||
assign HDMI_TX_DE = VIDEO_DE;
|
||
|
||
// -------------------------------------------------------------
|
||
// HDMI ADV7513 wake-up (Ch165) — Terasic-derived I²C config
|
||
// FSM that walks a 38-entry LUT of ADV7513 register writes,
|
||
// turning the chip from standby into "transmitting RGB on
|
||
// the HDMI port". `READY` (= hdmi_init_done) goes high once
|
||
// the LUT walk completes; if HDMI_TX_INT goes low (HPD or
|
||
// monitor-sense event), the FSM re-initiates the LUT walk so
|
||
// the monitor can be plugged in after the FPGA boots.
|
||
//
|
||
// Clocked on CLOCK2_50 (the physical 50 MHz oscillator) — NOT
|
||
// `design_clk` — so the wake-up runs even before the PLL
|
||
// locks. Reset is `~ninit_done` (raw async reset) so the I²C
|
||
// bus stays held in a clean state until FPGA init completes.
|
||
//
|
||
// HDMI_MCLK is driven by CLOCK2_50 as a reasonable audio-clock
|
||
// reference for the chip's PLL (the demo doesn't generate
|
||
// audio data; the I²C config still touches audio registers
|
||
// but the chip's video path is independent of audio MCLK
|
||
// correctness).
|
||
// -------------------------------------------------------------
|
||
wire hdmi_init_done;
|
||
wire hdmi_i2c_error;
|
||
I2C_HDMI_Config u_hdmi_i2c (
|
||
.iCLK (CLOCK2_50),
|
||
.iRST_N (~ninit_done),
|
||
.I2C_SCLK (HDMI_I2C_SCL),
|
||
.I2C_SDAT (HDMI_I2C_SDA),
|
||
.HDMI_TX_INT (HDMI_TX_INT),
|
||
.READY (hdmi_init_done),
|
||
.ERROR (hdmi_i2c_error)
|
||
);
|
||
|
||
assign HDMI_MCLK = CLOCK2_50;
|
||
|
||
// -------------------------------------------------------------
|
||
// Ch170 — HPS / qsys platform shell
|
||
// -------------------------------------------------------------
|
||
//
|
||
// retroDE_ps2 is loaded at runtime onto a DE25-Nano whose HPS is
|
||
// already booted from QSPI-flashed retroDE_splash. For the
|
||
// fpga-manager runtime fabric swap to be SAFE — i.e. SSH stays
|
||
// alive, kernel doesn't hang on bridge transactions — the
|
||
// produced .core.rbf must be the same "shape" of artifact every
|
||
// other retroDE core ships: HPS region + AXI bridge endpoints
|
||
// + LPDDR4 EMIF + identity register window.
|
||
//
|
||
// The HPS hard IP itself, the AXI bridges, and the LPDDR4 EMIF
|
||
// controller live entirely inside `qsys_top` (Platform Designer).
|
||
// We copy that subsystem verbatim from retroDE_Atari2600 (the
|
||
// qsys files are byte-identical across nes/a2600/coco2/gb) and
|
||
// instantiate it here.
|
||
//
|
||
// ps2 fabric does NOT consume the hps2fpga AXI bridge as a real
|
||
// control surface in Ch170 — that's a Ch171+ concern. For now
|
||
// a minimal null-AXI slave (`ps2_hps_bridge_null`) provides
|
||
// proper handshake on every transaction so HPS-side reads/writes
|
||
// can't stall the bus, plus a 4-word identity register window at
|
||
// bridge offset 0x000-0x00F so retrodesd/userspace tools can
|
||
// probe "who's loaded right now."
|
||
//
|
||
// Clock for qsys (`clk_100_clk`): fed from raw CLOCK2_50 (the
|
||
// physical 50 MHz oscillator), per Codex's Ch170 call. Keeps
|
||
// qsys's bridge clock domain fully independent from the ps2
|
||
// pixel clock (now 25.175 MHz post-PLL), so retuning the IOPLL
|
||
// for video timing never disturbs the HPS bridges. The
|
||
// `clk_100` name is a retroDE-ism inherited from older cores
|
||
// (the rate doesn't have to be 100 MHz — splash itself feeds it
|
||
// 50 MHz too).
|
||
// -------------------------------------------------------------
|
||
|
||
// The entire qsys+null-bridge block below is gated on
|
||
// `USE_QSYS_TOP` (Quartus synth). In sim the qsys_top
|
||
// blackbox stub (qsys_top_bb.v) wouldn't drive anything
|
||
// useful, and the AXI handshake from a black-hole master
|
||
// could leave the bridge state machine in unexpected states.
|
||
// Skipping the block entirely in sim keeps the existing
|
||
// 144 PASS regression unchanged.
|
||
`ifdef USE_QSYS_TOP
|
||
|
||
// qsys-side PIO surfaces — tied off in Ch170 (HPS reads zeros /
|
||
// boards-button registers don't echo ps2 state). Wire up in
|
||
// Ch171+ if/when we want HPS to mirror core_halt / frame_seen /
|
||
// etc. via a status register instead of an AXI poll.
|
||
wire [2:0] qsys_led_pio = 3'b000;
|
||
wire [3:0] qsys_dipsw_pio = SW;
|
||
wire [3:0] qsys_button_pio = {2'b11, KEY};
|
||
|
||
// h2f reset from qsys (HPS-driven fabric reset). Routed to the
|
||
// null bridge; not used to gate the ps2 fabric in Ch170.
|
||
wire h2f_reset_reset;
|
||
|
||
// f2sdram bridge outputs — declared so each output of the
|
||
// Sundancemesa MPFE primitive has a real wire to drive (open
|
||
// ports `()` cause Quartus to reject the IP). The wires are
|
||
// unused (nothing reads them); Quartus prunes them as dangling
|
||
// outputs, no real fabric cost.
|
||
wire f2sdram_arready_open;
|
||
wire f2sdram_awready_open;
|
||
wire [4:0] f2sdram_bid_open;
|
||
wire [1:0] f2sdram_bresp_open;
|
||
wire f2sdram_bvalid_open;
|
||
wire [255:0] f2sdram_rdata_open;
|
||
wire [4:0] f2sdram_rid_open;
|
||
wire f2sdram_rlast_open;
|
||
wire [1:0] f2sdram_rresp_open;
|
||
wire f2sdram_rvalid_open;
|
||
wire f2sdram_wready_open;
|
||
wire [7:0] f2sdram_buser_open;
|
||
wire [7:0] f2sdram_ruser_open;
|
||
|
||
// ------------------------------------------------------------------
|
||
// Ch318 — LPDDR framebuffer AXI WRITE master (drives the f2sdram write channel).
|
||
// DEFAULT (no GS_LPDDR_FB macro): the write channel is the legacy inert tie-off
|
||
// (awvalid=0) — byte-identical to before. With GS_LPDDR_FB: gs_lpddr_axi_master
|
||
// streams the PSMCT16 tile flush to LPDDR over f2sdram, BUT its hard
|
||
// write_enable defaults to 0, so the fitted core boots inert (no LPDDR writes)
|
||
// until (a) FB_BASE is set to a Linux-SAFE reserved region (from /proc/iomem)
|
||
// and (b) write_enable is raised. The board write/readback run is gated on that.
|
||
// GS clock = design_clk (PLL); f2sdram clock = CLOCK2_50 — genuinely async, so
|
||
// the master's internal gray-code async FIFO does the CDC.
|
||
// ------------------------------------------------------------------
|
||
// f2sdram write-channel drive (muxed: master outputs vs inert constants)
|
||
wire [31:0] f2s_awaddr_w; wire [7:0] f2s_awlen_w; wire [2:0] f2s_awsize_w;
|
||
wire [1:0] f2s_awburst_w; wire [4:0] f2s_awid_w; wire f2s_awvalid_w;
|
||
wire [255:0] f2s_wdata_w; wire [31:0] f2s_wstrb_w; wire f2s_wlast_w; wire f2s_wvalid_w;
|
||
wire f2s_bready_w;
|
||
// Runtime control/status, driven by the HPS bridge registers (CLOCK2_50). All
|
||
// defaults SAFE at the bridge: arm=0, canary=1, fb_base=0x8000_0000. The HPS arms
|
||
// and selects canary/full at runtime — ONE bitstream, no rebuild to change mode.
|
||
// f2sdram AWADDR is the HPS PHYSICAL address (qsys f2sdram slave maps a flat 4 GiB
|
||
// = the full HPS map; 0x8000_0000 is the /proc/iomem reserved region).
|
||
wire lpddr_arm_w, lpddr_canary_w;
|
||
wire lpddr_ctrl_commit_w; // Ch352 — control-commit toggle (bridge -> gs_lpddr_axi_master snapshot)
|
||
wire [31:0] lpddr_fb_base_w;
|
||
wire [31:0] lpddr_bytes_w, lpddr_bursts_w, lpddr_bresp_err_w, lpddr_fifo_ovf_w;
|
||
wire lpddr_idle_w;
|
||
// Ch319 Brick 3 — bridge <-> LPDDR4B read-probe (declared unconditionally; the
|
||
// bridge always connects them, the probe drives data/done only under GS_LPDDR4B_FB).
|
||
wire [31:0] lpddr_rd_addr_w; wire lpddr_rd_pulse_w; // bridge -> probe
|
||
wire [31:0] lpddr_rd_data_w; wire lpddr_rd_done_w; // probe -> bridge
|
||
// Ch322 — bridge <-> LPDDR write-probe (HPS stages texture words into LPDDR4B) and
|
||
// <-> texture-cache fill. Declared unconditionally so the bridge always connects;
|
||
// driven by the probe/cache only under GS_LPDDR_TEX (tied inert otherwise).
|
||
wire [31:0] lpddr_wr_addr_w; wire [31:0] lpddr_wr_data_w; wire lpddr_wr_pulse_w; // bridge -> wr-probe
|
||
wire lpddr_wr_busy_w; wire lpddr_wr_done_w; wire [31:0] lpddr_wr_bresp_err_w; // wr-probe -> bridge
|
||
wire tex_fill_start_w; // bridge -> cache (fill arm)
|
||
wire tex_fill_done_w; wire [31:0] tex_fill_beats_w, tex_fill_bytes_w, tex_rd_errs_w; // cache -> bridge
|
||
wire [31:0] tex_fill_crc_w; // Ch352 — cache tex_mem integrity sum32 -> bridge 0x070
|
||
// Ch323 — tile COLOR+Z spill/reload counters -> bridge (declared unconditionally so the
|
||
// bridge always connects; driven by the spill writers / reload engine under GS_TILE_SPILL,
|
||
// tied 0 in every other build below). Six Codex-named counters + per-writer errs/overflow.
|
||
wire [31:0] color_spill_beats_w, color_spill_errs_w; wire color_spill_ovf_w; // COLOR flush-write
|
||
wire [31:0] z_spill_beats_w, z_spill_errs_w; wire z_spill_ovf_w; // Z flush-write
|
||
wire [31:0] reload_color_beats_w, reload_z_beats_w, reload_rd_errs_w; // reload read-back
|
||
wire [7:0] bridge_diag_ctrl; // [6]=trace_clear (resets per-render spill pipeline counters)
|
||
wire [31:0] dbg_c_beats_w; // color writer beats (B handshakes) since trace_clear
|
||
wire [31:0] dbg_c_emit_w, dbg_c_push_w, dbg_c_pop_w, dbg_c_aw_w, dbg_c_w_w; // color-writer pipeline-split counters
|
||
wire [31:0] dbg_z_beats_w, dbg_z_emit_w, dbg_z_push_w, dbg_z_pop_w; // Ch324 — Z-writer pipeline-split counters
|
||
// (Ch320 scanout source-select + pixels/status wires are declared earlier,
|
||
// near demo_video_*, so the video mux ahead of this block can reference them.)
|
||
`ifdef GS_LPDDR4B_FB
|
||
// ----- Brick 2: writer targets FPGA-PRIVATE LPDDR4B (emif_clk domain). -----
|
||
// The gray-code async FIFO inside the master does the design_clk -> emif_clk CDC.
|
||
// Master is 32b-addr / 5b-id; the EMIF user port is 30b / 7b — adapted below.
|
||
// f2sdram (HPS DRAM) write channel is permanently inert (retired: platform-blocked,
|
||
// BRESP 256/256 — see Ch318 closure). axi_rst_n = emif_reset_n (EMIF cal-ready).
|
||
// Ch322 — where the staged texture lives in FPGA-private LPDDR4B (well clear of the
|
||
// framebuffer at base 0). The HPS write-probe loads it here; the texture cache fills from it.
|
||
localparam [29:0] TEX_LPDDR_BASE = 30'h0020_0000; // 2 MiB offset
|
||
// Ch323 — FPGA-private LPDDR4B region map (named bases, far apart, per Codex):
|
||
// FB base (PSMCT16 scanout framebuffer) ............ 0x0000_0000 (gs_lpddr_axi_master)
|
||
// tile COLOR spill scratch (32-bit/px, screen-strided) 0x0040_0000 (4 MiB) — gs_z_flush_writer
|
||
// tile Z spill scratch (32-bit/px, screen-strided) . 0x0050_0000 (5 MiB) — gs_z_flush_writer
|
||
// texture region (Ch322) .......................... 0x0020_0000 (2 MiB)
|
||
// The COLOR/Z spill scratch regions are 32-bit-per-pixel, screen-relative
|
||
// (byte = pixel_index*4, row stride = 64px*4 = 256 B), matching gs_tile_reload's
|
||
// STRIDE_BYTES=256 / ROW_BEATS=2 read layout for a 64-px-wide (ras_fbw=1) FB.
|
||
localparam [29:0] COLOR_SPILL_BASE = 30'h0040_0000;
|
||
localparam [29:0] Z_SPILL_BASE = 30'h0050_0000;
|
||
// Ch324 Brick 3 — LPDDR scanout config: the spill profile scans the 64x64 PSMCT32 color
|
||
// FB straight out of LPDDR at COLOR_SPILL_BASE (PCRTC addresses it BRAM-relative so
|
||
// VRAM_BASE stays 0). Other LPDDR profiles keep the FB-at-0 PSMCT16 mirror.
|
||
`ifdef GS_TILE_SPILL
|
||
localparam [29:0] SCANOUT_FB_BASE = COLOR_SPILL_BASE;
|
||
localparam bit SCANOUT_PSMCT32 = 1'b1;
|
||
`else
|
||
localparam [29:0] SCANOUT_FB_BASE = 30'd0;
|
||
localparam bit SCANOUT_PSMCT32 = 1'b0;
|
||
`endif
|
||
// gs_tile_reload read port -> read-arbiter port 3 (driven by the reload engine under
|
||
// GS_TILE_SPILL; tied inert otherwise so the rd_arb s3 input never floats).
|
||
wire [29:0] reload_ar_araddr; wire [1:0] reload_ar_arburst; wire [6:0] reload_ar_arid;
|
||
wire [7:0] reload_ar_arlen; wire [2:0] reload_ar_arsize; wire reload_ar_arvalid, reload_ar_arready;
|
||
wire [255:0] reload_r_rdata; wire [1:0] reload_r_rresp; wire reload_r_rlast, reload_r_rvalid, reload_r_rready;
|
||
|
||
wire [31:0] emif_m_awaddr; wire [4:0] emif_m_awid;
|
||
// FB-writer EMIF-facing side. Under GS_LPDDR_TEX it is port 0 of the write arbiter;
|
||
// otherwise it drives the EMIF write channel directly (byte-identical passthrough).
|
||
wire [29:0] fbw_awaddr; wire [1:0] fbw_awburst; wire [6:0] fbw_awid;
|
||
wire [7:0] fbw_awlen; wire [2:0] fbw_awsize; wire fbw_awvalid, fbw_awready;
|
||
wire [255:0] fbw_wdata; wire [31:0] fbw_wstrb; wire fbw_wlast, fbw_wvalid, fbw_wready;
|
||
wire [1:0] fbw_bresp; wire fbw_bvalid, fbw_bready;
|
||
gs_lpddr_axi_master #(.FIFO_DEPTH(16)) u_lpddr_axi (
|
||
.gs_clk(design_clk), .gs_rst_n(core_rst_n), .enable(1'b1),
|
||
.arm(lpddr_arm_w), .canary(lpddr_canary_w), .fb_base(lpddr_fb_base_w), .ctrl_commit(lpddr_ctrl_commit_w),
|
||
.px_emit(demo_flush_emit && (demo_flush_psm == 6'h02)),
|
||
.px_addr(demo_flush_addr), .px_pix16(demo_flush_pix16),
|
||
.axi_clk(emif_clk), .axi_rst_n(emif_reset_n),
|
||
.awaddr(emif_m_awaddr), .awlen(fbw_awlen), .awsize(fbw_awsize),
|
||
.awburst(fbw_awburst), .awid(emif_m_awid), .awvalid(fbw_awvalid),
|
||
.awready(fbw_awready),
|
||
.wdata(fbw_wdata), .wstrb(fbw_wstrb), .wlast(fbw_wlast),
|
||
.wvalid(fbw_wvalid), .wready(fbw_wready),
|
||
.bvalid(fbw_bvalid), .bready(fbw_bready), .bresp(fbw_bresp),
|
||
.beats_written(lpddr_bytes_w), .bursts_issued(lpddr_bursts_w),
|
||
.bresp_err_count(lpddr_bresp_err_w), .fifo_overflow_count(lpddr_fifo_ovf_w),
|
||
.idle(lpddr_idle_w)
|
||
);
|
||
assign fbw_awaddr = emif_m_awaddr[29:0]; // EMIF addr is 30-bit (base 0)
|
||
assign fbw_awid = {2'b00, emif_m_awid}; // EMIF awid is 7-bit
|
||
`ifdef GS_LPDDR_TEX
|
||
// Ch322 — HPS write-probe (stage texture words) + 2:1 write arbiter (FB writer = priority).
|
||
wire [29:0] wp_awaddr; wire [1:0] wp_awburst; wire [6:0] wp_awid;
|
||
wire [7:0] wp_awlen; wire [2:0] wp_awsize; wire wp_awvalid, wp_awready;
|
||
wire [255:0] wp_wdata; wire [31:0] wp_wstrb; wire wp_wlast, wp_wvalid, wp_wready;
|
||
wire [1:0] wp_bresp; wire wp_bvalid, wp_bready;
|
||
gs_lpddr_wr_probe u_lpddr_wr (
|
||
.emif_clk(emif_clk), .emif_rst_n(emif_reset_n),
|
||
.wr_pulse(lpddr_wr_pulse_w), .wr_addr(lpddr_wr_addr_w[29:0]), .wr_data(lpddr_wr_data_w),
|
||
.full_beat(1'b0),
|
||
.busy(lpddr_wr_busy_w), .done_toggle(lpddr_wr_done_w), .bresp_errs(lpddr_wr_bresp_err_w),
|
||
.awaddr(wp_awaddr), .awburst(wp_awburst), .awid(wp_awid), .awlen(wp_awlen),
|
||
.awsize(wp_awsize), .awvalid(wp_awvalid), .awready(wp_awready),
|
||
.wdata(wp_wdata), .wstrb(wp_wstrb), .wlast(wp_wlast), .wvalid(wp_wvalid), .wready(wp_wready),
|
||
.bresp(wp_bresp), .bvalid(wp_bvalid), .bready(wp_bready)
|
||
);
|
||
gs_lpddr_wr_arb u_lpddr_wr_arb (
|
||
.clk(emif_clk), .rst_n(emif_reset_n),
|
||
.s0_awaddr(fbw_awaddr), .s0_awburst(fbw_awburst), .s0_awid(fbw_awid), .s0_awlen(fbw_awlen),
|
||
.s0_awsize(fbw_awsize), .s0_awvalid(fbw_awvalid), .s0_awready(fbw_awready),
|
||
.s0_wdata(fbw_wdata), .s0_wstrb(fbw_wstrb), .s0_wlast(fbw_wlast), .s0_wvalid(fbw_wvalid), .s0_wready(fbw_wready),
|
||
.s0_bresp(fbw_bresp), .s0_bvalid(fbw_bvalid), .s0_bready(fbw_bready),
|
||
.s1_awaddr(wp_awaddr), .s1_awburst(wp_awburst), .s1_awid(wp_awid), .s1_awlen(wp_awlen),
|
||
.s1_awsize(wp_awsize), .s1_awvalid(wp_awvalid), .s1_awready(wp_awready),
|
||
.s1_wdata(wp_wdata), .s1_wstrb(wp_wstrb), .s1_wlast(wp_wlast), .s1_wvalid(wp_wvalid), .s1_wready(wp_wready),
|
||
.s1_bresp(wp_bresp), .s1_bvalid(wp_bvalid), .s1_bready(wp_bready),
|
||
// Ch323 — ports 2/3 (Z spill / HPS write-probe) unused in the GS_LPDDR_TEX build; tie inert.
|
||
.s2_awaddr(30'd0), .s2_awburst(2'b01), .s2_awid(7'd6), .s2_awlen(8'd0), .s2_awsize(3'b101),
|
||
.s2_awvalid(1'b0), .s2_awready(), .s2_wdata(256'd0), .s2_wstrb(32'd0), .s2_wlast(1'b0),
|
||
.s2_wvalid(1'b0), .s2_wready(), .s2_bresp(), .s2_bvalid(), .s2_bready(1'b1),
|
||
.s3_awaddr(30'd0), .s3_awburst(2'b01), .s3_awid(7'd5), .s3_awlen(8'd0), .s3_awsize(3'b101),
|
||
.s3_awvalid(1'b0), .s3_awready(), .s3_wdata(256'd0), .s3_wstrb(32'd0), .s3_wlast(1'b0),
|
||
.s3_wvalid(1'b0), .s3_wready(), .s3_bresp(), .s3_bvalid(), .s3_bready(1'b1),
|
||
.m_awaddr(emif_axi_awaddr), .m_awburst(emif_axi_awburst), .m_awid(emif_axi_awid), .m_awlen(emif_axi_awlen),
|
||
.m_awsize(emif_axi_awsize), .m_awvalid(emif_axi_awvalid), .m_awready(emif_axi_awready),
|
||
.m_wdata(emif_axi_wdata), .m_wstrb(emif_axi_wstrb), .m_wlast(emif_axi_wlast), .m_wvalid(emif_axi_wvalid), .m_wready(emif_axi_wready),
|
||
.m_bresp(emif_axi_bresp), .m_bvalid(emif_axi_bvalid), .m_bready(emif_axi_bready)
|
||
);
|
||
`elsif GS_TILE_SPILL
|
||
// Ch323 — tile color+Z spill writers share the EMIF write channel with the FB scanout
|
||
// writer through a 3:1 arbiter: s0 = FB scanout (priority), s1 = COLOR spill, s2 = Z spill.
|
||
// Color spill fires during TP_FLUSH (same cycles as the FB writer; the arb serializes them);
|
||
// Z spill fires during TP_ZFLUSH (a later, disjoint phase). Both writers are generic
|
||
// single-32-bit-lane LPDDR writers (gs_z_flush_writer) pointed at their own scratch base.
|
||
//
|
||
// COLOR spill: write the full 32-bit flushed color at the tile-relative byte offset
|
||
// (= pixel_index*4; demo_flush_addr carries this when the demo's color FBP=0) into
|
||
// COLOR_SPILL_BASE. The low 16 bits are the PSMCT16 texel the reload returns.
|
||
// DIAG_CTRL[6] = trace_clear: pulse to reset the per-render spill PIPELINE counters (beats/pop).
|
||
// (Ch323 hunt knobs full-beat/lane0/probe-port/awid were removed once the spill path was proven.)
|
||
reg [7:0] diag_s1=8'd0, diag_s2=8'd0;
|
||
always_ff @(posedge emif_clk) begin diag_s1 <= bridge_diag_ctrl; diag_s2 <= diag_s1; end
|
||
wire diag_trace_clear = diag_s2[6];
|
||
wire [29:0] csp_awaddr; wire [1:0] csp_awburst; wire [6:0] csp_awid;
|
||
wire [7:0] csp_awlen; wire [2:0] csp_awsize; wire csp_awvalid, csp_awready;
|
||
wire [255:0] csp_wdata; wire [31:0] csp_wstrb; wire csp_wlast, csp_wvalid, csp_wready;
|
||
wire [1:0] csp_bresp; wire csp_bvalid, csp_bready;
|
||
gs_z_flush_writer #(.Z_BASE(COLOR_SPILL_BASE),
|
||
.FB_BASE(30'd0), .FB_BYTES(32'h0010_0000), // canary: clear of FB
|
||
.TEX_BASE(Z_SPILL_BASE), .TEX_BYTES(32'h0010_0000), // canary: clear of Z scratch
|
||
.FIFO_DEPTH(16)) u_color_spill (
|
||
.gs_clk(design_clk), .gs_rst_n(core_rst_n), .enable(1'b1),
|
||
// DEDICATED TP_FLUSH-only color stream (NOT demo_flush_emit/raster_pixel_emit, which also
|
||
// carries RENDER-phase pixel writes → over-fed the writer → the 108-beats+ovf board bug).
|
||
.z_flush_emit(demo_cflush_emit), .z_flush_addr(demo_cflush_addr), .z_flush_data(demo_cflush_data),
|
||
.z_write_beats(color_spill_beats_w), .z_wr_errs(color_spill_errs_w), .fifo_overflow(color_spill_ovf_w),
|
||
.trace_clear(diag_trace_clear), .dbg_beat_count(dbg_c_beats_w),
|
||
.dbg_emit_count(dbg_c_emit_w), .dbg_push_count(dbg_c_push_w),
|
||
.dbg_pop_count(dbg_c_pop_w), .dbg_aw_count(dbg_c_aw_w), .dbg_w_count(dbg_c_w_w),
|
||
.axi_clk(emif_clk), .axi_rst_n(emif_reset_n),
|
||
.awaddr(csp_awaddr), .awburst(csp_awburst), .awid(csp_awid), .awlen(csp_awlen), .awsize(csp_awsize),
|
||
.awvalid(csp_awvalid), .awready(csp_awready), .wdata(csp_wdata), .wstrb(csp_wstrb), .wlast(csp_wlast),
|
||
.wvalid(csp_wvalid), .wready(csp_wready), .bresp(csp_bresp), .bvalid(csp_bvalid), .bready(csp_bready)
|
||
);
|
||
// Z spill: write the 32-bit Z at z_flush_addr (= pixel_index*4) into Z_SPILL_BASE.
|
||
wire [29:0] zsp_awaddr; wire [1:0] zsp_awburst; wire [6:0] zsp_awid;
|
||
wire [7:0] zsp_awlen; wire [2:0] zsp_awsize; wire zsp_awvalid, zsp_awready;
|
||
wire [255:0] zsp_wdata; wire [31:0] zsp_wstrb; wire zsp_wlast, zsp_wvalid, zsp_wready;
|
||
wire [1:0] zsp_bresp; wire zsp_bvalid, zsp_bready;
|
||
gs_z_flush_writer #(.Z_BASE(Z_SPILL_BASE),
|
||
.FB_BASE(30'd0), .FB_BYTES(32'h0010_0000), // canary: clear of FB
|
||
.TEX_BASE(COLOR_SPILL_BASE), .TEX_BYTES(32'h0010_0000), // canary: clear of COLOR scratch
|
||
.FIFO_DEPTH(16)) u_z_spill (
|
||
.gs_clk(design_clk), .gs_rst_n(core_rst_n), .enable(1'b1),
|
||
.z_flush_emit(demo_z_flush_emit), .z_flush_addr(demo_z_flush_addr), .z_flush_data(demo_z_flush_data),
|
||
.z_write_beats(z_spill_beats_w), .z_wr_errs(z_spill_errs_w), .fifo_overflow(z_spill_ovf_w), .trace_clear(diag_trace_clear),
|
||
.dbg_beat_count(dbg_z_beats_w), .dbg_emit_count(dbg_z_emit_w), .dbg_push_count(dbg_z_push_w),
|
||
.dbg_pop_count(dbg_z_pop_w), .dbg_aw_count(), .dbg_w_count(),
|
||
.axi_clk(emif_clk), .axi_rst_n(emif_reset_n),
|
||
.awaddr(zsp_awaddr), .awburst(zsp_awburst), .awid(zsp_awid), .awlen(zsp_awlen), .awsize(zsp_awsize),
|
||
.awvalid(zsp_awvalid), .awready(zsp_awready), .wdata(zsp_wdata), .wstrb(zsp_wstrb), .wlast(zsp_wlast),
|
||
.wvalid(zsp_wvalid), .wready(zsp_wready), .bresp(zsp_bresp), .bvalid(zsp_bvalid), .bready(zsp_bready)
|
||
);
|
||
// Ch323 diag — HPS write-probe (write-arb port 3, lowest priority). Lets the HPS stage a
|
||
// KNOWN pattern into COLOR_SPILL_BASE / Z_SPILL_BASE and read it back via the read-probe,
|
||
// proving the LPDDR region + read/write probe path independent of the GS spill writers.
|
||
wire [29:0] wp_awaddr; wire [1:0] wp_awburst; wire [6:0] wp_awid;
|
||
wire [7:0] wp_awlen; wire [2:0] wp_awsize; wire wp_awvalid, wp_awready;
|
||
wire [255:0] wp_wdata; wire [31:0] wp_wstrb; wire wp_wlast, wp_wvalid, wp_wready;
|
||
wire [1:0] wp_bresp; wire wp_bvalid, wp_bready;
|
||
gs_lpddr_wr_probe u_lpddr_wr (
|
||
.emif_clk(emif_clk), .emif_rst_n(emif_reset_n),
|
||
.wr_pulse(lpddr_wr_pulse_w), .wr_addr(lpddr_wr_addr_w[29:0]), .wr_data(lpddr_wr_data_w),
|
||
.full_beat(1'b0), // generic single-lane probe (the Ch323 full-beat hunt mode was retired)
|
||
.busy(lpddr_wr_busy_w), .done_toggle(lpddr_wr_done_w), .bresp_errs(lpddr_wr_bresp_err_w),
|
||
.awaddr(wp_awaddr), .awburst(wp_awburst), .awid(wp_awid), .awlen(wp_awlen),
|
||
.awsize(wp_awsize), .awvalid(wp_awvalid), .awready(wp_awready),
|
||
.wdata(wp_wdata), .wstrb(wp_wstrb), .wlast(wp_wlast), .wvalid(wp_wvalid), .wready(wp_wready),
|
||
.bresp(wp_bresp), .bvalid(wp_bvalid), .bready(wp_bready)
|
||
);
|
||
// Fixed write-arb routing: s1=color-spill, s2=Z-spill, s3=HPS probe (lowest priority).
|
||
wire a1_awready,a1_wready,a1_bvalid; wire [1:0] a1_bresp; // arb s1 (color) responses
|
||
wire a2_awready,a2_wready,a2_bvalid; wire [1:0] a2_bresp; // arb s2 (Z) responses
|
||
wire a3_awready,a3_wready,a3_bvalid; wire [1:0] a3_bresp; // arb s3 (probe) responses
|
||
assign csp_awready=a1_awready; assign csp_wready=a1_wready; assign csp_bvalid=a1_bvalid; assign csp_bresp=a1_bresp;
|
||
assign zsp_awready=a2_awready; assign zsp_wready=a2_wready; assign zsp_bvalid=a2_bvalid; assign zsp_bresp=a2_bresp;
|
||
assign wp_awready =a3_awready; assign wp_wready =a3_wready; assign wp_bvalid =a3_bvalid; assign wp_bresp =a3_bresp;
|
||
gs_lpddr_wr_arb u_lpddr_wr_arb (
|
||
.clk(emif_clk), .rst_n(emif_reset_n),
|
||
.s0_awaddr(fbw_awaddr), .s0_awburst(fbw_awburst), .s0_awid(fbw_awid), .s0_awlen(fbw_awlen),
|
||
.s0_awsize(fbw_awsize), .s0_awvalid(fbw_awvalid), .s0_awready(fbw_awready),
|
||
.s0_wdata(fbw_wdata), .s0_wstrb(fbw_wstrb), .s0_wlast(fbw_wlast), .s0_wvalid(fbw_wvalid), .s0_wready(fbw_wready),
|
||
.s0_bresp(fbw_bresp), .s0_bvalid(fbw_bvalid), .s0_bready(fbw_bready),
|
||
// s1 = color-spill
|
||
.s1_awaddr(csp_awaddr), .s1_awburst(csp_awburst),
|
||
.s1_awid(csp_awid), .s1_awlen(csp_awlen),
|
||
.s1_awsize(csp_awsize), .s1_awvalid(csp_awvalid), .s1_awready(a1_awready),
|
||
.s1_wdata(csp_wdata), .s1_wstrb(csp_wstrb),
|
||
.s1_wlast(csp_wlast), .s1_wvalid(csp_wvalid), .s1_wready(a1_wready),
|
||
.s1_bresp(a1_bresp), .s1_bvalid(a1_bvalid), .s1_bready(csp_bready),
|
||
// s2 = Z-spill
|
||
.s2_awaddr(zsp_awaddr), .s2_awburst(zsp_awburst),
|
||
.s2_awid(zsp_awid), .s2_awlen(zsp_awlen),
|
||
.s2_awsize(zsp_awsize), .s2_awvalid(zsp_awvalid), .s2_awready(a2_awready),
|
||
.s2_wdata(zsp_wdata), .s2_wstrb(zsp_wstrb),
|
||
.s2_wlast(zsp_wlast), .s2_wvalid(zsp_wvalid), .s2_wready(a2_wready),
|
||
.s2_bresp(a2_bresp), .s2_bvalid(a2_bvalid), .s2_bready(zsp_bready),
|
||
// s3 = HPS write-probe
|
||
.s3_awaddr(wp_awaddr), .s3_awburst(wp_awburst),
|
||
.s3_awid(wp_awid), .s3_awlen(wp_awlen),
|
||
.s3_awsize(wp_awsize), .s3_awvalid(wp_awvalid), .s3_awready(a3_awready),
|
||
.s3_wdata(wp_wdata), .s3_wstrb(wp_wstrb),
|
||
.s3_wlast(wp_wlast), .s3_wvalid(wp_wvalid), .s3_wready(a3_wready),
|
||
.s3_bresp(a3_bresp), .s3_bvalid(a3_bvalid), .s3_bready(wp_bready),
|
||
.m_awaddr(emif_axi_awaddr), .m_awburst(emif_axi_awburst), .m_awid(emif_axi_awid), .m_awlen(emif_axi_awlen),
|
||
.m_awsize(emif_axi_awsize), .m_awvalid(emif_axi_awvalid), .m_awready(emif_axi_awready),
|
||
.m_wdata(emif_axi_wdata), .m_wstrb(emif_axi_wstrb), .m_wlast(emif_axi_wlast), .m_wvalid(emif_axi_wvalid), .m_wready(emif_axi_wready),
|
||
.m_bresp(emif_axi_bresp), .m_bvalid(emif_axi_bvalid), .m_bready(emif_axi_bready)
|
||
);
|
||
`else
|
||
// no write-probe: FB writer drives the EMIF write channel directly (byte-identical).
|
||
assign emif_axi_awaddr=fbw_awaddr; assign emif_axi_awburst=fbw_awburst; assign emif_axi_awid=fbw_awid;
|
||
assign emif_axi_awlen=fbw_awlen; assign emif_axi_awsize=fbw_awsize; assign emif_axi_awvalid=fbw_awvalid;
|
||
assign fbw_awready=emif_axi_awready;
|
||
assign emif_axi_wdata=fbw_wdata; assign emif_axi_wstrb=fbw_wstrb; assign emif_axi_wlast=fbw_wlast;
|
||
assign emif_axi_wvalid=fbw_wvalid; assign fbw_wready=emif_axi_wready;
|
||
assign emif_axi_bready=fbw_bready; assign fbw_bvalid=emif_axi_bvalid; assign fbw_bresp=emif_axi_bresp;
|
||
`endif
|
||
// f2sdram write channel — inert.
|
||
assign f2s_awaddr_w=32'd0; assign f2s_awlen_w=8'd0; assign f2s_awsize_w=3'd0;
|
||
assign f2s_awburst_w=2'b01; assign f2s_awid_w=5'd0; assign f2s_awvalid_w=1'b0;
|
||
assign f2s_wdata_w=256'd0; assign f2s_wstrb_w=32'd0; assign f2s_wlast_w=1'b0;
|
||
assign f2s_wvalid_w=1'b0; assign f2s_bready_w=1'b1;
|
||
// Brick 3 — HPS read-probe (port 1 of the read arbiter).
|
||
gs_lpddr_rd_probe #(.ADDR_W(30)) u_lpddr_rd (
|
||
.axi_clk(emif_clk), .axi_rst_n(emif_reset_n),
|
||
.rd_pulse(lpddr_rd_pulse_w), .rd_addr(lpddr_rd_addr_w),
|
||
.rd_done(lpddr_rd_done_w), .rd_data(lpddr_rd_data_w), .rd_busy(),
|
||
.araddr(probe_ar_araddr), .arburst(probe_ar_arburst), .arid(probe_ar_arid),
|
||
.arlen(probe_ar_arlen), .arsize(probe_ar_arsize), .arvalid(probe_ar_arvalid),
|
||
.arready(probe_ar_arready),
|
||
.rdata(probe_r_rdata), .rresp(probe_r_rresp), .rlast(probe_r_rlast),
|
||
.rvalid(probe_r_rvalid), .rready(probe_r_rready)
|
||
);
|
||
|
||
// Ch320 Brick 2b — LPDDR4B scanout reader (port 0, priority). Fills a frame cache
|
||
// on each PCRTC vsync; serves pixels indexed by the PCRTC vram_read_addr.
|
||
// enable = video_src_w (only refill while displaying from LPDDR).
|
||
// TWO LPDDR scanout readers share arbiter port 0, selected by scanout_lb_w:
|
||
// frame-cache (gs_lpddr_scanout, reference/fallback) vs line-buffer
|
||
// (gs_lpddr_scanout_lb, Ch321 architectural path). Only the selected one is
|
||
// enabled, so only it issues reads; the mux routes its AR + the port-0 R back.
|
||
wire [31:0] fc_rd_errs_w, lb_rd_errs_w;
|
||
wire [7:0] fc_r_w, fc_g_w, fc_b_w, lb_r_w, lb_g_w, lb_b_w;
|
||
wire fc_valid_w, lb_valid_w, lb_underflow_w;
|
||
wire [29:0] fc_araddr, lb_araddr; wire [1:0] fc_arburst, lb_arburst; wire [6:0] fc_arid, lb_arid;
|
||
wire [7:0] fc_arlen, lb_arlen; wire [2:0] fc_arsize, lb_arsize;
|
||
wire fc_arvalid, lb_arvalid, fc_rready, lb_rready;
|
||
|
||
// Ch327a — the GS_TILE_SPILL profile uses the SCALABLE line-buffer scanout (the frame-cache
|
||
// doesn't scale past ~128x128). Force the select so the spill profile always displays via the
|
||
// line-buffer; other profiles keep the runtime bridge bit (default frame-cache).
|
||
`ifdef GS_TILE_SPILL
|
||
wire scanout_lb_eff = 1'b1;
|
||
`else
|
||
wire scanout_lb_eff = scanout_lb_emif; // Ch352 — emif-domain coherent capture (not raw bridge bit)
|
||
`endif
|
||
|
||
gs_lpddr_scanout #(.FB_BASE(SCANOUT_FB_BASE), .VRAM_BASE(30'd0), .PSMCT32(SCANOUT_PSMCT32),
|
||
.CACHE_BEATS(SCANOUT_CACHE_BEATS)) u_lpddr_scan (
|
||
.axi_clk(emif_clk), .axi_rst_n(emif_reset_n),
|
||
.enable(video_src_emif & ~scanout_lb_eff & frame_ready_emif), .frame_start(demo_video_vsync),
|
||
.video_clk(design_clk), .vram_read_addr(demo_vram_raddr),
|
||
.r(fc_r_w), .g(fc_g_w), .b(fc_b_w),
|
||
.cache_valid(fc_valid_w), .rd_beats(), .rd_errs(fc_rd_errs_w),
|
||
.araddr(fc_araddr), .arburst(fc_arburst), .arid(fc_arid),
|
||
.arlen(fc_arlen), .arsize(fc_arsize), .arvalid(fc_arvalid),
|
||
.arready(scan_ar_arready & ~scanout_lb_eff),
|
||
.rdata(scan_r_rdata), .rresp(scan_r_rresp), .rlast(scan_r_rlast),
|
||
.rvalid(scan_r_rvalid & ~scanout_lb_eff), .rready(fc_rready)
|
||
);
|
||
gs_lpddr_scanout_lb #(.FB_BASE(SCANOUT_FB_BASE), .STRIDE_BYTES(SCANOUT_LB_STRIDE),
|
||
.ROW_BEATS(SCANOUT_LB_ROW_BEATS), .N_ROWS(SCANOUT_LB_NROWS),
|
||
.PSMCT32(SCANOUT_PSMCT32)) u_lpddr_scan_lb (
|
||
.axi_clk(emif_clk), .axi_rst_n(emif_reset_n),
|
||
.enable(video_src_emif & scanout_lb_eff & frame_ready_emif),
|
||
.video_clk(design_clk), .frame_start(demo_video_vsync),
|
||
.pixel_x(pixel_x), .pixel_y(pixel_y), .in_window(demo_pix_window),
|
||
.r(lb_r_w), .g(lb_g_w), .b(lb_b_w),
|
||
.line_valid(lb_valid_w), .underflow(lb_underflow_w), .rd_errs(lb_rd_errs_w),
|
||
.araddr(lb_araddr), .arburst(lb_arburst), .arid(lb_arid),
|
||
.arlen(lb_arlen), .arsize(lb_arsize), .arvalid(lb_arvalid),
|
||
.arready(scan_ar_arready & scanout_lb_eff),
|
||
.rdata(scan_r_rdata), .rresp(scan_r_rresp), .rlast(scan_r_rlast),
|
||
.rvalid(scan_r_rvalid & scanout_lb_eff), .rready(lb_rready)
|
||
);
|
||
// mux the active reader onto arbiter port 0 + onto the video output / status.
|
||
assign scan_ar_araddr = scanout_lb_eff ? lb_araddr : fc_araddr;
|
||
assign scan_ar_arburst = scanout_lb_eff ? lb_arburst : fc_arburst;
|
||
assign scan_ar_arid = scanout_lb_eff ? lb_arid : fc_arid;
|
||
assign scan_ar_arlen = scanout_lb_eff ? lb_arlen : fc_arlen;
|
||
assign scan_ar_arsize = scanout_lb_eff ? lb_arsize : fc_arsize;
|
||
assign scan_ar_arvalid = scanout_lb_eff ? lb_arvalid : fc_arvalid;
|
||
assign scan_r_rready = scanout_lb_eff ? lb_rready : fc_rready;
|
||
assign scan_r_w = scanout_lb_eff ? lb_r_w : fc_r_w;
|
||
assign scan_g_w = scanout_lb_eff ? lb_g_w : fc_g_w;
|
||
assign scan_b_w = scanout_lb_eff ? lb_b_w : fc_b_w;
|
||
assign scan_cache_valid_w = scanout_lb_eff ? lb_valid_w : fc_valid_w;
|
||
assign scan_err_w = scanout_lb_eff ? (lb_underflow_w | (lb_rd_errs_w != 32'd0))
|
||
: (fc_rd_errs_w != 32'd0);
|
||
|
||
// 2:1 read arbiter — scanout (s0, priority) + probe (s1) onto the EMIF read channel.
|
||
gs_lpddr_rd_arb u_lpddr_rd_arb (
|
||
.clk(emif_clk), .rst_n(emif_reset_n),
|
||
.s0_araddr(scan_ar_araddr), .s0_arburst(scan_ar_arburst), .s0_arid(scan_ar_arid),
|
||
.s0_arlen(scan_ar_arlen), .s0_arsize(scan_ar_arsize), .s0_arvalid(scan_ar_arvalid),
|
||
.s0_arready(scan_ar_arready),
|
||
.s0_rdata(scan_r_rdata), .s0_rresp(scan_r_rresp), .s0_rlast(scan_r_rlast),
|
||
.s0_rvalid(scan_r_rvalid), .s0_rready(scan_r_rready),
|
||
.s1_araddr(probe_ar_araddr), .s1_arburst(probe_ar_arburst), .s1_arid(probe_ar_arid),
|
||
.s1_arlen(probe_ar_arlen), .s1_arsize(probe_ar_arsize), .s1_arvalid(probe_ar_arvalid),
|
||
.s1_arready(probe_ar_arready),
|
||
.s1_rdata(probe_r_rdata), .s1_rresp(probe_r_rresp), .s1_rlast(probe_r_rlast),
|
||
.s1_rvalid(probe_r_rvalid), .s1_rready(probe_r_rready),
|
||
.s2_araddr(texf_ar_araddr), .s2_arburst(texf_ar_arburst), .s2_arid(texf_ar_arid),
|
||
.s2_arlen(texf_ar_arlen), .s2_arsize(texf_ar_arsize), .s2_arvalid(texf_ar_arvalid),
|
||
.s2_arready(texf_ar_arready),
|
||
.s2_rdata(texf_r_rdata), .s2_rresp(texf_r_rresp), .s2_rlast(texf_r_rlast),
|
||
.s2_rvalid(texf_r_rvalid), .s2_rready(texf_r_rready),
|
||
.s3_araddr(reload_ar_araddr), .s3_arburst(reload_ar_arburst), .s3_arid(reload_ar_arid),
|
||
.s3_arlen(reload_ar_arlen), .s3_arsize(reload_ar_arsize), .s3_arvalid(reload_ar_arvalid),
|
||
.s3_arready(reload_ar_arready),
|
||
.s3_rdata(reload_r_rdata), .s3_rresp(reload_r_rresp), .s3_rlast(reload_r_rlast),
|
||
.s3_rvalid(reload_r_rvalid), .s3_rready(reload_r_rready),
|
||
.m_araddr(emif_ar_araddr), .m_arburst(emif_ar_arburst), .m_arid(emif_ar_arid),
|
||
.m_arlen(emif_ar_arlen), .m_arsize(emif_ar_arsize), .m_arvalid(emif_ar_arvalid),
|
||
.m_arready(emif_ar_arready),
|
||
.m_rdata(emif_r_rdata), .m_rresp(emif_r_rresp), .m_rlast(emif_r_rlast),
|
||
.m_rvalid(emif_r_rvalid), .m_rready(emif_r_rready)
|
||
);
|
||
|
||
// Ch322 — prefilled LPDDR texture cache on read-arbiter port 2. Fill side on emif_clk
|
||
// (one-shot before raster, armed by the bridge); sample side on design_clk, tapping
|
||
// u_demo's texel-fetch request and returning the texel at the existing 1-cycle latency.
|
||
`ifdef GS_LPDDR_TEX
|
||
gs_texture_cache #(
|
||
.LPDDR_TEX_BASE(TEX_LPDDR_BASE), .TEX_VRAM_BASE(TEXC_VRAM_BASE), .TEX_BYTES(TEXC_BYTES), .N_BEATS(TEXC_NBEATS)
|
||
) u_texcache (
|
||
.axi_clk(emif_clk), .axi_rst_n(emif_reset_n),
|
||
.fill_start(tex_fill_start_w), .fill_done(tex_fill_done_w),
|
||
.fill_beats(tex_fill_beats_w), .fill_bytes(tex_fill_bytes_w), .rd_errs(tex_rd_errs_w), .fill_crc(tex_fill_crc_w),
|
||
.araddr(texf_ar_araddr), .arburst(texf_ar_arburst), .arid(texf_ar_arid),
|
||
.arlen(texf_ar_arlen), .arsize(texf_ar_arsize), .arvalid(texf_ar_arvalid),
|
||
.arready(texf_ar_arready),
|
||
.rdata(texf_r_rdata), .rresp(texf_r_rresp), .rlast(texf_r_rlast),
|
||
.rvalid(texf_r_rvalid), .rready(texf_r_rready),
|
||
.sample_clk(design_clk), .tex_rd_en(demo_gs_tex_rd_en), .tex_rd_addr(demo_gs_tex_rd_addr),
|
||
.tex_rd_data(demo_tex_cache_data), .tex_ready(demo_tex_cache_ready)
|
||
);
|
||
`else
|
||
// no texture cache — tie read-port-2 inert (arvalid=0, rready=1 drains).
|
||
assign texf_ar_araddr=30'd0; assign texf_ar_arburst=2'b01; assign texf_ar_arid=7'd4;
|
||
assign texf_ar_arlen=8'd0; assign texf_ar_arsize=3'b101; assign texf_ar_arvalid=1'b0;
|
||
assign texf_r_rready=1'b1;
|
||
assign tex_fill_done_w=1'b0; assign tex_fill_beats_w=32'd0;
|
||
assign tex_fill_bytes_w=32'd0; assign tex_rd_errs_w=32'd0; assign tex_fill_crc_w=32'd0;
|
||
`ifndef GS_TILE_SPILL
|
||
// GS_TILE_SPILL drives these from its own HPS write-probe (diag); tie only when absent.
|
||
assign lpddr_wr_busy_w=1'b0; assign lpddr_wr_done_w=1'b0; assign lpddr_wr_bresp_err_w=32'd0;
|
||
`endif
|
||
`endif
|
||
|
||
// Ch323 — tile COLOR+Z reload engine on read-arbiter port 3 (priority below scanout,
|
||
// above probe/texfill). Fill side (emif_clk) reads a tile's color row from COLOR_SPILL_BASE
|
||
// then Z row from Z_SPILL_BASE per row; serve side (design_clk) returns (color,Z) per tile
|
||
// index to gs_stub's TP_RELOAD sweep at 1-cycle latency. Armed by demo_reload_start (toggle).
|
||
`ifdef GS_TILE_SPILL
|
||
gs_tile_reload #(
|
||
.COLOR_BASE(COLOR_SPILL_BASE), .Z_BASE(Z_SPILL_BASE),
|
||
.TILE_W(16), .TILE_H(16), .STRIDE_BYTES(1024), .ROW_BEATS(2), .COLOR_W(32) // Ch327b 256px*4B
|
||
) u_tile_reload (
|
||
.axi_clk(emif_clk), .axi_rst_n(emif_reset_n),
|
||
.reload_start(demo_reload_start), .reload_base(demo_reload_base), .reload_done(),
|
||
.color_beats(reload_color_beats_w), .z_beats(reload_z_beats_w), .rd_errs(reload_rd_errs_w),
|
||
.araddr(reload_ar_araddr), .arburst(reload_ar_arburst), .arid(reload_ar_arid),
|
||
.arlen(reload_ar_arlen), .arsize(reload_ar_arsize), .arvalid(reload_ar_arvalid),
|
||
.arready(reload_ar_arready),
|
||
.rdata(reload_r_rdata), .rresp(reload_r_rresp), .rlast(reload_r_rlast),
|
||
.rvalid(reload_r_rvalid), .rready(reload_r_rready),
|
||
.serve_clk(design_clk), .raddr(demo_tile_reload_raddr),
|
||
.color_o(demo_tile_reload_color), .z_o(demo_tile_reload_z),
|
||
.reload_ready(demo_tile_reload_ready)
|
||
);
|
||
`else
|
||
// no reload engine — tie read-port-3 inert (arvalid=0, rready=1 drains) + counters 0.
|
||
assign reload_ar_araddr=30'd0; assign reload_ar_arburst=2'b01; assign reload_ar_arid=7'd6;
|
||
assign reload_ar_arlen=8'd0; assign reload_ar_arsize=3'b101; assign reload_ar_arvalid=1'b0;
|
||
assign reload_r_rready=1'b1;
|
||
assign reload_color_beats_w=32'd0; assign reload_z_beats_w=32'd0; assign reload_rd_errs_w=32'd0;
|
||
`endif
|
||
`ifndef GS_TILE_SPILL
|
||
// spill writers absent — tie their counters 0 (the GS_TILE_SPILL write branch drives them).
|
||
assign color_spill_beats_w=32'd0; assign color_spill_errs_w=32'd0; assign color_spill_ovf_w=1'b0;
|
||
assign dbg_c_beats_w=32'd0;
|
||
assign dbg_z_beats_w=32'd0; assign dbg_z_emit_w=32'd0; assign dbg_z_push_w=32'd0; assign dbg_z_pop_w=32'd0;
|
||
assign dbg_c_emit_w=32'd0; assign dbg_c_push_w=32'd0; assign dbg_c_pop_w=32'd0; assign dbg_c_aw_w=32'd0; assign dbg_c_w_w=32'd0;
|
||
assign z_spill_beats_w=32'd0; assign z_spill_errs_w=32'd0; assign z_spill_ovf_w=1'b0;
|
||
`endif
|
||
`elsif GS_LPDDR_FB
|
||
// Legacy f2sdram→HPS-DRAM writer (RETIRED — platform-blocked; kept for reference).
|
||
gs_lpddr_axi_master #(.FIFO_DEPTH(16)) u_lpddr_axi (
|
||
.gs_clk(design_clk), .gs_rst_n(core_rst_n), .enable(1'b1),
|
||
.arm(lpddr_arm_w), .canary(lpddr_canary_w), .fb_base(lpddr_fb_base_w), .ctrl_commit(lpddr_ctrl_commit_w),
|
||
.px_emit(demo_flush_emit && (demo_flush_psm == 6'h02)),
|
||
.px_addr(demo_flush_addr), .px_pix16(demo_flush_pix16),
|
||
.axi_clk(CLOCK2_50), .axi_rst_n(~ninit_done),
|
||
.awaddr(f2s_awaddr_w), .awlen(f2s_awlen_w), .awsize(f2s_awsize_w),
|
||
.awburst(f2s_awburst_w), .awid(f2s_awid_w), .awvalid(f2s_awvalid_w),
|
||
.awready(f2sdram_awready_open),
|
||
.wdata(f2s_wdata_w), .wstrb(f2s_wstrb_w), .wlast(f2s_wlast_w),
|
||
.wvalid(f2s_wvalid_w), .wready(f2sdram_wready_open),
|
||
.bvalid(f2sdram_bvalid_open), .bready(f2s_bready_w), .bresp(f2sdram_bresp_open),
|
||
.beats_written(lpddr_bytes_w), .bursts_issued(lpddr_bursts_w),
|
||
.bresp_err_count(lpddr_bresp_err_w), .fifo_overflow_count(lpddr_fifo_ovf_w),
|
||
.idle(lpddr_idle_w)
|
||
);
|
||
assign lpddr_rd_data_w=32'd0; assign lpddr_rd_done_w=1'b0; // no read-probe in the f2sdram path
|
||
assign scan_r_w=8'd0; assign scan_g_w=8'd0; assign scan_b_w=8'd0;
|
||
assign scan_cache_valid_w=1'b0; assign scan_err_w=1'b0; // no LPDDR scanout
|
||
assign tex_fill_done_w=1'b0; assign tex_fill_beats_w=32'd0; assign tex_fill_bytes_w=32'd0;
|
||
assign tex_rd_errs_w=32'd0; assign tex_fill_crc_w=32'd0;
|
||
assign lpddr_wr_busy_w=1'b0; assign lpddr_wr_done_w=1'b0; assign lpddr_wr_bresp_err_w=32'd0;
|
||
assign color_spill_beats_w=32'd0; assign color_spill_errs_w=32'd0; assign color_spill_ovf_w=1'b0;
|
||
assign dbg_c_beats_w=32'd0;
|
||
assign dbg_z_beats_w=32'd0; assign dbg_z_emit_w=32'd0; assign dbg_z_push_w=32'd0; assign dbg_z_pop_w=32'd0;
|
||
assign dbg_c_emit_w=32'd0; assign dbg_c_push_w=32'd0; assign dbg_c_pop_w=32'd0; assign dbg_c_aw_w=32'd0; assign dbg_c_w_w=32'd0;
|
||
assign z_spill_beats_w=32'd0; assign z_spill_errs_w=32'd0; assign z_spill_ovf_w=1'b0;
|
||
assign reload_color_beats_w=32'd0; assign reload_z_beats_w=32'd0; assign reload_rd_errs_w=32'd0;
|
||
`else
|
||
// No LPDDR master: f2sdram write channel inert, status tied to a safe idle.
|
||
assign lpddr_bytes_w=32'd0; assign lpddr_bursts_w=32'd0;
|
||
assign lpddr_bresp_err_w=32'd0; assign lpddr_fifo_ovf_w=32'd0; assign lpddr_idle_w=1'b1;
|
||
assign f2s_awaddr_w=32'd0; assign f2s_awlen_w=8'd0; assign f2s_awsize_w=3'd0;
|
||
assign f2s_awburst_w=2'b01; assign f2s_awid_w=5'd0; assign f2s_awvalid_w=1'b0;
|
||
assign f2s_wdata_w=256'd0; assign f2s_wstrb_w=32'd0; assign f2s_wlast_w=1'b0;
|
||
assign f2s_wvalid_w=1'b0; assign f2s_bready_w=1'b1;
|
||
assign lpddr_rd_data_w=32'd0; assign lpddr_rd_done_w=1'b0; // no read-probe (inert path)
|
||
assign scan_r_w=8'd0; assign scan_g_w=8'd0; assign scan_b_w=8'd0;
|
||
assign scan_cache_valid_w=1'b0; assign scan_err_w=1'b0; // no LPDDR scanout
|
||
assign tex_fill_done_w=1'b0; assign tex_fill_beats_w=32'd0; assign tex_fill_bytes_w=32'd0;
|
||
assign tex_rd_errs_w=32'd0; assign tex_fill_crc_w=32'd0;
|
||
assign lpddr_wr_busy_w=1'b0; assign lpddr_wr_done_w=1'b0; assign lpddr_wr_bresp_err_w=32'd0;
|
||
assign color_spill_beats_w=32'd0; assign color_spill_errs_w=32'd0; assign color_spill_ovf_w=1'b0;
|
||
assign dbg_c_beats_w=32'd0;
|
||
assign dbg_z_beats_w=32'd0; assign dbg_z_emit_w=32'd0; assign dbg_z_push_w=32'd0; assign dbg_z_pop_w=32'd0;
|
||
assign dbg_c_emit_w=32'd0; assign dbg_c_push_w=32'd0; assign dbg_c_pop_w=32'd0; assign dbg_c_aw_w=32'd0; assign dbg_c_w_w=32'd0;
|
||
assign z_spill_beats_w=32'd0; assign z_spill_errs_w=32'd0; assign z_spill_ovf_w=1'b0;
|
||
assign reload_color_beats_w=32'd0; assign reload_z_beats_w=32'd0; assign reload_rd_errs_w=32'd0;
|
||
`endif
|
||
|
||
// hps2fpga AXI4 bridge — qsys is master, our null bridge is the
|
||
// slave. Widths match the splash bridge ABI: 4-bit ID, 38-bit
|
||
// address, 128-bit data.
|
||
wire [3:0] h2f_axi_awid;
|
||
wire [37:0] h2f_axi_awaddr;
|
||
wire [7:0] h2f_axi_awlen;
|
||
wire [2:0] h2f_axi_awsize;
|
||
wire [1:0] h2f_axi_awburst;
|
||
wire h2f_axi_awlock;
|
||
wire [3:0] h2f_axi_awcache;
|
||
wire [2:0] h2f_axi_awprot;
|
||
wire h2f_axi_awvalid;
|
||
wire h2f_axi_awready;
|
||
wire [127:0] h2f_axi_wdata;
|
||
wire [15:0] h2f_axi_wstrb;
|
||
wire h2f_axi_wlast;
|
||
wire h2f_axi_wvalid;
|
||
wire h2f_axi_wready;
|
||
wire [3:0] h2f_axi_bid;
|
||
wire [1:0] h2f_axi_bresp;
|
||
wire h2f_axi_bvalid;
|
||
wire h2f_axi_bready;
|
||
wire [3:0] h2f_axi_arid;
|
||
wire [37:0] h2f_axi_araddr;
|
||
wire [7:0] h2f_axi_arlen;
|
||
wire [2:0] h2f_axi_arsize;
|
||
wire [1:0] h2f_axi_arburst;
|
||
wire h2f_axi_arlock;
|
||
wire [3:0] h2f_axi_arcache;
|
||
wire [2:0] h2f_axi_arprot;
|
||
wire h2f_axi_arvalid;
|
||
wire h2f_axi_arready;
|
||
wire [3:0] h2f_axi_rid;
|
||
wire [127:0] h2f_axi_rdata;
|
||
wire [1:0] h2f_axi_rresp;
|
||
wire h2f_axi_rlast;
|
||
wire h2f_axi_rvalid;
|
||
wire h2f_axi_rready;
|
||
|
||
qsys_top soc_inst (
|
||
.clk_100_clk (CLOCK2_50),
|
||
.reset_reset_n (~ninit_done),
|
||
.ninit_done_ninit_done (ninit_done),
|
||
.led_pio_external_connection_in_port (qsys_led_pio),
|
||
.led_pio_external_connection_out_port (),
|
||
.dipsw_pio_external_connection_export (qsys_dipsw_pio),
|
||
.button_pio_external_connection_export (qsys_button_pio),
|
||
|
||
// HPS hard-IO pins — pass-through from top-level ports.
|
||
.hps_io_hps_osc_clk (HPS_CLK_25),
|
||
.hps_io_emac0_tx_clk (HPS_ENET_TX_CLK),
|
||
.hps_io_emac0_rx_clk (HPS_ENET_RX_CLK),
|
||
.hps_io_emac0_tx_ctl (HPS_ENET_TX_CTL),
|
||
.hps_io_emac0_rx_ctl (HPS_ENET_RX_CTL),
|
||
.hps_io_emac0_txd0 (HPS_ENET_TX_DATA[0]),
|
||
.hps_io_emac0_txd1 (HPS_ENET_TX_DATA[1]),
|
||
.hps_io_emac0_txd2 (HPS_ENET_TX_DATA[2]),
|
||
.hps_io_emac0_txd3 (HPS_ENET_TX_DATA[3]),
|
||
.hps_io_emac0_rxd0 (HPS_ENET_RX_DATA[0]),
|
||
.hps_io_emac0_rxd1 (HPS_ENET_RX_DATA[1]),
|
||
.hps_io_emac0_rxd2 (HPS_ENET_RX_DATA[2]),
|
||
.hps_io_emac0_rxd3 (HPS_ENET_RX_DATA[3]),
|
||
.hps_io_mdio0_mdio (HPS_ENET_MDIO),
|
||
.hps_io_mdio0_mdc (HPS_ENET_MDC),
|
||
.hps_io_sdmmc_cclk (HPS_SD_CLK),
|
||
.hps_io_sdmmc_cmd (HPS_SD_CMD),
|
||
.hps_io_sdmmc_data0 (HPS_SD_DATA[0]),
|
||
.hps_io_sdmmc_data1 (HPS_SD_DATA[1]),
|
||
.hps_io_sdmmc_data2 (HPS_SD_DATA[2]),
|
||
.hps_io_sdmmc_data3 (HPS_SD_DATA[3]),
|
||
.hps_io_uart1_rx (HPS_UART_RX),
|
||
.hps_io_uart1_tx (HPS_UART_TX),
|
||
.hps_io_usb0_clk (HPS_USB_CLK),
|
||
.hps_io_usb0_stp (HPS_USB_STP),
|
||
.hps_io_usb0_dir (HPS_USB_DIR),
|
||
.hps_io_usb0_nxt (HPS_USB_NXT),
|
||
.hps_io_usb0_data0 (HPS_USB_DATA[0]),
|
||
.hps_io_usb0_data1 (HPS_USB_DATA[1]),
|
||
.hps_io_usb0_data2 (HPS_USB_DATA[2]),
|
||
.hps_io_usb0_data3 (HPS_USB_DATA[3]),
|
||
.hps_io_usb0_data4 (HPS_USB_DATA[4]),
|
||
.hps_io_usb0_data5 (HPS_USB_DATA[5]),
|
||
.hps_io_usb0_data6 (HPS_USB_DATA[6]),
|
||
.hps_io_usb0_data7 (HPS_USB_DATA[7]),
|
||
.hps_io_i2c1_sda (HPS_I2C_SDA),
|
||
.hps_io_i2c1_scl (HPS_I2C_SCL),
|
||
.hps_io_gpio28 (HPS_GSENSOR_INT),
|
||
.hps_io_gpio34 (HPS_GSENSOR_I2C_EN),
|
||
.hps_io_gpio40 (HPS_KEY),
|
||
.hps_io_gpio41 (HPS_LED),
|
||
|
||
// h2f reset + hps2fpga AXI4 master → ps2_hps_bridge_null slave
|
||
.h2f_reset_reset (h2f_reset_reset),
|
||
.subsys_hps_hps2fpga_awid (h2f_axi_awid),
|
||
.subsys_hps_hps2fpga_awaddr (h2f_axi_awaddr),
|
||
.subsys_hps_hps2fpga_awlen (h2f_axi_awlen),
|
||
.subsys_hps_hps2fpga_awsize (h2f_axi_awsize),
|
||
.subsys_hps_hps2fpga_awburst (h2f_axi_awburst),
|
||
.subsys_hps_hps2fpga_awlock (h2f_axi_awlock),
|
||
.subsys_hps_hps2fpga_awcache (h2f_axi_awcache),
|
||
.subsys_hps_hps2fpga_awprot (h2f_axi_awprot),
|
||
.subsys_hps_hps2fpga_awvalid (h2f_axi_awvalid),
|
||
.subsys_hps_hps2fpga_awready (h2f_axi_awready),
|
||
.subsys_hps_hps2fpga_wdata (h2f_axi_wdata),
|
||
.subsys_hps_hps2fpga_wstrb (h2f_axi_wstrb),
|
||
.subsys_hps_hps2fpga_wlast (h2f_axi_wlast),
|
||
.subsys_hps_hps2fpga_wvalid (h2f_axi_wvalid),
|
||
.subsys_hps_hps2fpga_wready (h2f_axi_wready),
|
||
.subsys_hps_hps2fpga_bid (h2f_axi_bid),
|
||
.subsys_hps_hps2fpga_bresp (h2f_axi_bresp),
|
||
.subsys_hps_hps2fpga_bvalid (h2f_axi_bvalid),
|
||
.subsys_hps_hps2fpga_bready (h2f_axi_bready),
|
||
.subsys_hps_hps2fpga_arid (h2f_axi_arid),
|
||
.subsys_hps_hps2fpga_araddr (h2f_axi_araddr),
|
||
.subsys_hps_hps2fpga_arlen (h2f_axi_arlen),
|
||
.subsys_hps_hps2fpga_arsize (h2f_axi_arsize),
|
||
.subsys_hps_hps2fpga_arburst (h2f_axi_arburst),
|
||
.subsys_hps_hps2fpga_arlock (h2f_axi_arlock),
|
||
.subsys_hps_hps2fpga_arcache (h2f_axi_arcache),
|
||
.subsys_hps_hps2fpga_arprot (h2f_axi_arprot),
|
||
.subsys_hps_hps2fpga_arvalid (h2f_axi_arvalid),
|
||
.subsys_hps_hps2fpga_arready (h2f_axi_arready),
|
||
.subsys_hps_hps2fpga_rid (h2f_axi_rid),
|
||
.subsys_hps_hps2fpga_rdata (h2f_axi_rdata),
|
||
.subsys_hps_hps2fpga_rresp (h2f_axi_rresp),
|
||
.subsys_hps_hps2fpga_rlast (h2f_axi_rlast),
|
||
.subsys_hps_hps2fpga_rvalid (h2f_axi_rvalid),
|
||
.subsys_hps_hps2fpga_rready (h2f_axi_rready),
|
||
.f2h_irq1_in_irq (32'd0),
|
||
|
||
// HPS warm reset handshake — idle.
|
||
.subsys_hps_h2f_warm_reset_handshake_reset_req (),
|
||
.subsys_hps_h2f_warm_reset_handshake_reset_ack (1'b0),
|
||
|
||
// FPGA-to-SDRAM AXI4 bridge — idle (ps2 doesn't use SDRAM
|
||
// controllers in the qsys-side address space). Outputs go
|
||
// to named wires (declared below) instead of `()` — some
|
||
// Agilex 5 hard-IP primitives (notably `tennm_sm_soc_mpfe`,
|
||
// the Sundancemesa MPFE inside the HPS LPDDR4 controller)
|
||
// reject fully-open outputs with "not legally connected
|
||
// and/or configured" during synthesis even if they're
|
||
// semantically equivalent to a wire-to-nothing.
|
||
.f2sdram_araddr (32'd0),
|
||
.f2sdram_arburst (2'b01),
|
||
.f2sdram_arcache (4'h2), // DE25 GHRD f2sdram_adapter values (NoC slave — NOT ao486's EMIF values)
|
||
.f2sdram_arid (5'd0),
|
||
.f2sdram_arlen (8'd0),
|
||
.f2sdram_arlock (1'b0),
|
||
.f2sdram_arprot (3'b011),
|
||
.f2sdram_arqos (4'd0),
|
||
.f2sdram_arready (f2sdram_arready_open),
|
||
.f2sdram_arsize (3'd0),
|
||
.f2sdram_arvalid (1'b0),
|
||
.f2sdram_awaddr (f2s_awaddr_w),
|
||
.f2sdram_awburst (f2s_awburst_w),
|
||
.f2sdram_awcache (4'h2), // DE25 GHRD f2sdram_adapter: awcache=4'h2, awprot=3'b011, awuser=8'hE0 — the HPS-NoC metadata the MPFE firewall checks (ao486's EMIF values are for a different slave)
|
||
.f2sdram_awid (f2s_awid_w),
|
||
.f2sdram_awlen (f2s_awlen_w),
|
||
.f2sdram_awlock (1'b0),
|
||
.f2sdram_awprot (3'b011),
|
||
.f2sdram_awqos (4'd0),
|
||
.f2sdram_awready (f2sdram_awready_open),
|
||
.f2sdram_awsize (f2s_awsize_w),
|
||
.f2sdram_awvalid (f2s_awvalid_w),
|
||
.f2sdram_bid (f2sdram_bid_open),
|
||
.f2sdram_bready (f2s_bready_w),
|
||
.f2sdram_bresp (f2sdram_bresp_open),
|
||
.f2sdram_bvalid (f2sdram_bvalid_open),
|
||
.f2sdram_rdata (f2sdram_rdata_open),
|
||
.f2sdram_rid (f2sdram_rid_open),
|
||
.f2sdram_rlast (f2sdram_rlast_open),
|
||
.f2sdram_rready (1'b1),
|
||
.f2sdram_rresp (f2sdram_rresp_open),
|
||
.f2sdram_rvalid (f2sdram_rvalid_open),
|
||
.f2sdram_wdata (f2s_wdata_w),
|
||
.f2sdram_wlast (f2s_wlast_w),
|
||
.f2sdram_wready (f2sdram_wready_open),
|
||
.f2sdram_wstrb (f2s_wstrb_w),
|
||
.f2sdram_wvalid (f2s_wvalid_w),
|
||
.f2sdram_aruser (8'hE0), // GHRD-forced NoC user metadata
|
||
.f2sdram_awuser (8'hE0), // GHRD-forced NoC user metadata — the bit we were missing
|
||
.f2sdram_wuser (8'd0), // GHRD man_wuser = 8'h00
|
||
.f2sdram_buser (f2sdram_buser_open),
|
||
.f2sdram_arregion (4'd0),
|
||
.f2sdram_ruser (f2sdram_ruser_open),
|
||
.f2sdram_awregion (4'd0),
|
||
|
||
// LPDDR4 EMIF — passes through to the LPDDR4A_* board pins.
|
||
.emif_hps_emif_mem_0_mem_cs (LPDDR4A_CS_n),
|
||
.emif_hps_emif_mem_0_mem_ca (LPDDR4A_CA),
|
||
.emif_hps_emif_mem_0_mem_cke (LPDDR4A_CKE),
|
||
.emif_hps_emif_mem_0_mem_dq (LPDDR4A_DQ),
|
||
.emif_hps_emif_mem_0_mem_dqs_t (LPDDR4A_DQS),
|
||
.emif_hps_emif_mem_0_mem_dqs_c (LPDDR4A_DQS_n),
|
||
.emif_hps_emif_mem_0_mem_dmi (LPDDR4A_DM),
|
||
.emif_hps_emif_mem_ck_0_mem_ck_t (LPDDR4A_CK),
|
||
.emif_hps_emif_mem_ck_0_mem_ck_c (LPDDR4A_CK_n),
|
||
.emif_hps_emif_mem_reset_n_mem_reset_n (LPDDR4A_RESET_n),
|
||
.emif_hps_emif_oct_0_oct_rzqin (LPDDR4A_RZQ),
|
||
.emif_hps_emif_ref_clk_0_clk (LPDDR4A_REFCLK_p)
|
||
);
|
||
|
||
// The minimal null AXI4 slave for the hps2fpga bridge. Same port
|
||
// signature as `splash_hps_bridge` so a future "real" ps2 bridge
|
||
// can be dropped in without re-plumbing the top.
|
||
ps2_hps_bridge u_hps_bridge (
|
||
.clk (CLOCK2_50),
|
||
.reset_n (~ninit_done),
|
||
.h2f_reset (h2f_reset_reset),
|
||
|
||
// Ch173 — live status surfaces. Asynchronous to CLOCK2_50;
|
||
// the bridge synchronizes each through a 2-FF chain.
|
||
.core_halt (core_halt),
|
||
.dma_done_seen (dma_done_seen),
|
||
.frame_seen (frame_seen),
|
||
.hdmi_init_done (hdmi_init_done),
|
||
.hdmi_i2c_error (hdmi_i2c_error),
|
||
.raster_overflow(raster_overflow),
|
||
|
||
// Ch174 — event toggles for FRAME_COUNT / DMA_DONE_COUNT.
|
||
// Toggles, not pulses (see ps2_hps_bridge header comment).
|
||
.frame_toggle (frame_toggle),
|
||
.dma_done_toggle(dma_done_toggle),
|
||
|
||
// Ch318 — LPDDR framebuffer test control/status (runtime, HPS-armed).
|
||
.lpddr_arm_o (lpddr_arm_w),
|
||
.lpddr_canary_o (lpddr_canary_w),
|
||
.lpddr_ctrl_commit_o(lpddr_ctrl_commit_w), // Ch352 — drives the EMIF-domain control snapshot
|
||
.lpddr_fb_base_o (lpddr_fb_base_w),
|
||
.lpddr_bytes_i (lpddr_bytes_w),
|
||
.lpddr_bursts_i (lpddr_bursts_w),
|
||
.lpddr_bresp_err_i(lpddr_bresp_err_w),
|
||
.lpddr_fifo_ovf_i (lpddr_fifo_ovf_w),
|
||
.lpddr_idle_i (lpddr_idle_w),
|
||
.lpddr_rd_addr_o (lpddr_rd_addr_w),
|
||
.lpddr_rd_pulse_o (lpddr_rd_pulse_w),
|
||
.lpddr_rd_data_i (lpddr_rd_data_w),
|
||
.lpddr_rd_done_i (lpddr_rd_done_w),
|
||
.lpddr_video_src_o (video_src_w),
|
||
.lpddr_scanout_lb_o(scanout_lb_w),
|
||
.lpddr_scan_valid_i(scan_cache_valid_w),
|
||
.lpddr_scan_err_i (scan_err_w),
|
||
// Ch322 — LPDDR write-probe (HPS stages texture words) + texture-cache fill.
|
||
.lpddr_wr_addr_o (lpddr_wr_addr_w),
|
||
.lpddr_wr_data_o (lpddr_wr_data_w),
|
||
.lpddr_wr_pulse_o (lpddr_wr_pulse_w),
|
||
.lpddr_wr_busy_i (lpddr_wr_busy_w),
|
||
.lpddr_wr_done_i (lpddr_wr_done_w),
|
||
.lpddr_wr_bresp_err_i(lpddr_wr_bresp_err_w),
|
||
.tex_fill_start_o (tex_fill_start_w),
|
||
.tex_fill_done_i (tex_fill_done_w),
|
||
.tex_fill_beats_i (tex_fill_beats_w),
|
||
.tex_fill_crc_i (tex_fill_crc_w),
|
||
.tex_fill_bytes_i (tex_fill_bytes_w),
|
||
.tex_rd_errs_i (tex_rd_errs_w),
|
||
.tex_cache_hits_i (demo_tex_cache_hits),
|
||
.tex_bram_hits_i (demo_tex_bram_hits),
|
||
|
||
// Ch323 — tile COLOR+Z spill/reload counters (0 unless GS_TILE_SPILL).
|
||
.spill_color_beats_i (color_spill_beats_w),
|
||
.spill_z_beats_i (z_spill_beats_w),
|
||
.reload_color_beats_i(reload_color_beats_w),
|
||
.reload_z_beats_i (reload_z_beats_w),
|
||
.reload_rd_errs_i (reload_rd_errs_w),
|
||
.spill_color_errs_i (color_spill_errs_w),
|
||
.spill_z_errs_i (z_spill_errs_w),
|
||
.spill_color_ovf_i (color_spill_ovf_w),
|
||
.spill_z_ovf_i (z_spill_ovf_w),
|
||
// Ch323 diag — upstream event counters (design_clk, reset per-render).
|
||
.ev_tp_flush_i (ev_tp_flush_q),
|
||
.ev_tp_zflush_i (ev_tp_zflush_q),
|
||
.ev_tp_reload_i (ev_tp_reload_q),
|
||
.ev_tp_render_i (ev_tp_render_q),
|
||
.ev_flush_emit_i (ev_flush_emit_q),
|
||
.ev_zflush_emit_i (ev_zflush_emit_q),
|
||
.ev_reload_start_i (ev_reload_start_q),
|
||
.ev_reload_ready_i (ev_reload_ready_q),
|
||
.dbg_c_beat_count_i (dbg_c_beats_w),
|
||
.dbg_c_emit_count_i (dbg_c_emit_w), .dbg_c_push_count_i(dbg_c_push_w), .dbg_c_pop_count_i(dbg_c_pop_w),
|
||
.dbg_z_beat_count_i (dbg_z_beats_w), .dbg_z_emit_count_i(dbg_z_emit_w),
|
||
.dbg_z_push_count_i (dbg_z_push_w), .dbg_z_pop_count_i (dbg_z_pop_w),
|
||
.diag_ctrl_o (bridge_diag_ctrl),
|
||
|
||
// Ch176 — CORE_CTRL[0] writes drive this latched line. The
|
||
// sync chain above lifts it into design_clk and folds it
|
||
// into core_rst_n.
|
||
.core_reset_req (bridge_core_reset_req),
|
||
|
||
// Ch229 — tile-write broadcast → design-domain shadow RAM
|
||
// via `u_tile_cdc`. Toggle + index + data update at the same
|
||
// bridge clock edge on every AXI write into 0x1000..0x1FFF.
|
||
.tile_wr_toggle (bridge_tile_wr_toggle),
|
||
.tile_wr_index (bridge_tile_wr_index),
|
||
.tile_wr_data (bridge_tile_wr_data),
|
||
|
||
.s_axi_awid (h2f_axi_awid),
|
||
.s_axi_awaddr (h2f_axi_awaddr),
|
||
.s_axi_awlen (h2f_axi_awlen),
|
||
.s_axi_awsize (h2f_axi_awsize),
|
||
.s_axi_awburst (h2f_axi_awburst),
|
||
.s_axi_awlock (h2f_axi_awlock),
|
||
.s_axi_awcache (h2f_axi_awcache),
|
||
.s_axi_awprot (h2f_axi_awprot),
|
||
.s_axi_awvalid (h2f_axi_awvalid),
|
||
.s_axi_awready (h2f_axi_awready),
|
||
|
||
.s_axi_wdata (h2f_axi_wdata),
|
||
.s_axi_wstrb (h2f_axi_wstrb),
|
||
.s_axi_wlast (h2f_axi_wlast),
|
||
.s_axi_wvalid (h2f_axi_wvalid),
|
||
.s_axi_wready (h2f_axi_wready),
|
||
|
||
.s_axi_bid (h2f_axi_bid),
|
||
.s_axi_bresp (h2f_axi_bresp),
|
||
.s_axi_bvalid (h2f_axi_bvalid),
|
||
.s_axi_bready (h2f_axi_bready),
|
||
|
||
.s_axi_arid (h2f_axi_arid),
|
||
.s_axi_araddr (h2f_axi_araddr),
|
||
.s_axi_arlen (h2f_axi_arlen),
|
||
.s_axi_arsize (h2f_axi_arsize),
|
||
.s_axi_arburst (h2f_axi_arburst),
|
||
.s_axi_arlock (h2f_axi_arlock),
|
||
.s_axi_arcache (h2f_axi_arcache),
|
||
.s_axi_arprot (h2f_axi_arprot),
|
||
.s_axi_arvalid (h2f_axi_arvalid),
|
||
.s_axi_arready (h2f_axi_arready),
|
||
|
||
.s_axi_rid (h2f_axi_rid),
|
||
.s_axi_rdata (h2f_axi_rdata),
|
||
.s_axi_rresp (h2f_axi_rresp),
|
||
.s_axi_rlast (h2f_axi_rlast),
|
||
.s_axi_rvalid (h2f_axi_rvalid),
|
||
.s_axi_rready (h2f_axi_rready),
|
||
|
||
// Ch235 — INPUT_P1/P2 surfaced for downstream PS2-fabric
|
||
// consumers. The synth top doesn't instantiate the IOP core
|
||
// yet, so they leave the bridge as exposed wires; once IOP
|
||
// integration lands, sio2_input_stub (via iop_memory_map_stub)
|
||
// consumes these directly.
|
||
.input_p1_o (bridge_input_p1),
|
||
.input_p2_o (bridge_input_p2),
|
||
.input_p1_raw_o (bridge_input_p1_raw),
|
||
|
||
// Ch245 — platform OSD register surface + menu FSM glue.
|
||
.osd_ctrl_o (bridge_osd_ctrl),
|
||
.osd_cfg0_o (bridge_osd_cfg0),
|
||
.osd_cfg1_o (bridge_osd_cfg1),
|
||
.osd_active_i (menu_osd_active),
|
||
.osd_cursor_row_i (menu_cursor_row),
|
||
.osd_set_trigger_i (menu_act_select),
|
||
.osd_back_trigger_i (menu_act_back),
|
||
.osd_scroll_down_trigger_i (menu_act_scroll_down),
|
||
.osd_scroll_up_trigger_i (menu_act_scroll_up),
|
||
.osd_open_trigger_i (menu_act_open),
|
||
.osd_trigger_row_i (menu_cursor_row),
|
||
|
||
// Ch248 — real DS2 wired-controller readback (replaces Ch226 stub).
|
||
.ds2_buttons_i (ds2_buttons_w),
|
||
.ds2_connected_i (ds2_connected_w),
|
||
.ds2_error_i (ds2_error_w),
|
||
|
||
// Ch330 Brick 4 — runtime command-list feeder control.
|
||
.feeder_stg_we_tgl_o (feeder_stg_we_tgl_w),
|
||
.feeder_stg_waddr_o (feeder_stg_waddr_w),
|
||
.feeder_stg_wdata_o (feeder_stg_wdata_w),
|
||
.feeder_go_tgl_o (feeder_go_tgl_w),
|
||
.feeder_ready_i (feeder_ready_w),
|
||
.feeder_records_i (feeder_records_w),
|
||
.feeder_waits_i (feeder_waits_w)
|
||
);
|
||
|
||
`else
|
||
// Ch176 — sim path: the bridge isn't instantiated (qsys_top is
|
||
// a synth-only black box), so the bridge output wires above need
|
||
// safe defaults to avoid X-propagation into core_rst_n.
|
||
assign bridge_core_reset_req = 1'b0;
|
||
// Ch229 — sim-path tile-broadcast tie-offs: no bridge → no
|
||
// tile writes → static toggle and zero index/data. The
|
||
// tile_ram_cdc receiver sees no edges and shadow_mem stays at
|
||
// its sim-`initial` zero, so the overlay is transparent.
|
||
assign bridge_tile_wr_toggle = 1'b0;
|
||
assign bridge_tile_wr_index = 10'd0;
|
||
assign bridge_tile_wr_data = 32'd0;
|
||
// Ch245 — sim-path platform-OSD register tie-offs. With no bridge,
|
||
// the OSD is configured-zero (cols=0, rows=0, scale=0). The
|
||
// platform overlay clamps scale to 2 internally; the menu FSM
|
||
// sees osd_enable=0 and stays inactive.
|
||
assign bridge_osd_ctrl = 32'd0;
|
||
assign bridge_osd_cfg0 = 32'd0;
|
||
assign bridge_osd_cfg1 = 32'd0;
|
||
// Ch235 / Ch248 — sim-path INPUT_P* tie-offs. Pre-Ch250 these
|
||
// wires terminated at unconnected nets, so sim never needed them
|
||
// grounded. Ch250 added a fabric consumer (sio2_input_stub feeds
|
||
// LED[5:7]); without these ties the top TB sees X on LED[7:5].
|
||
// The Ch234 sio2_input_stub TBs still drive the input wires
|
||
// directly in their own integration setups; this `else` branch
|
||
// only covers the unit TB of the top.
|
||
assign bridge_input_p1 = 32'd0;
|
||
assign bridge_input_p2 = 32'd0;
|
||
assign bridge_input_p1_raw = 32'd0;
|
||
// Ch330 Brick 4 — no bridge in the non-QSYS path: feeder stays idle.
|
||
assign feeder_stg_we_tgl_w = 1'b0;
|
||
assign feeder_go_tgl_w = 1'b0;
|
||
assign feeder_stg_waddr_w = 12'd0;
|
||
assign feeder_stg_wdata_w = 64'd0;
|
||
`endif // USE_QSYS_TOP
|
||
|
||
// Suppress unused-input warnings on lint (CLOCK0_50 / CLOCK1_50
|
||
// are reserved for the future PLL chapter; KEY[1] and SW are
|
||
// available for board-level configuration but unused here).
|
||
/* verilator lint_off UNUSEDSIGNAL */
|
||
wire _unused = &{1'b0, CLOCK0_50, CLOCK1_50, KEY[1], SW, 1'b0};
|
||
/* verilator lint_on UNUSEDSIGNAL */
|
||
|
||
endmodule : de25_nano_psmct32_raster_demo_top
|