Files
retroDE_ps2/rtl/gif_gs/gs_lpddr_axi_master.sv
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

249 lines
12 KiB
Systemverilog

// retroDE_ps2 — gs_lpddr_axi_master (Ch318)
//
// HARDWARE-facing wrapper that takes the PSMCT16 tile-FLUSH pixel stream (GS clock)
// and writes it to real LPDDR over the qsys f2sdram AXI4 port (f2sdram clock). It
// does NOT modify the proven gs_lpddr_fb_writer (the Ch317 sim model) — it is a
// sibling hardware path with the same input stream.
//
// Pipeline (per the Ch318 directive):
// GS clock : PACKER — accumulate 16 PSMCT16 pixels of a tile-row into one 256-bit
// (32-byte) beat {addr, data, strb}. A tile-row is exactly 16 px on a
// 32-byte-aligned line, so a beat completes naturally on its 16th px
// (no dangling partial beat). On completion, push to the async FIFO.
// async FIFO: gray-code CDC, carries {addr[31:0], data[255:0], strb[31:0]} (320b).
// f2sdram : AXI burst FSM — pop a beat and issue a single-beat INCR write
// (AWSIZE=5 = 32 B, AWLEN=0, AWBURST=INCR, full per-byte WSTRB, never
// crossing a 4 KiB boundary since each beat is one 32-byte line). AW
// then W then B, all with backpressure (await ready/valid).
//
// Address: awaddr = FB_BASE + packet_addr (packet_addr is the FB-relative byte addr
// from raster_pixel_fb_addr_q). FB_BASE must point at a LINUX-SAFE reserved LPDDR
// region before any board run — the qsys aperture proves fabric CAN address SDRAM,
// not which physical range is safe to scribble on (Ch318 board gate).
//
// Counters (f2sdram domain, TB/status readable): beats, bursts, bresp_err, fifo
// overflow, done-ish (idle && fifo empty). enable=0 → fully inert.
module gs_lpddr_axi_master #(
parameter int FIFO_DEPTH = 16
) (
// GS clock domain — flush pixel stream
input logic gs_clk,
input logic gs_rst_n,
input logic enable,
// ---- RUNTIME controls (driven by the HPS bridge register, axi_clk domain) ----
// arm: HARD SAFETY GATE — no AXI write can issue unless high. Defaults LOW at the
// bridge register, so the booted core is inert until the HPS explicitly arms it.
// Synced into gs_clk for the packer; used directly in the axi_clk FSM.
input logic arm,
// canary: when high, write ONLY the offset-0 beat (the 32-byte top-of-frame line)
// and discard all others — a deterministic, blast-radius-limited first test.
input logic canary,
// fb_base: LPDDR byte base address for the framebuffer (e.g. 0x8000_0000). awaddr
// = fb_base + frame-relative offset. Runtime so a wrong base is re-targetable
// without a rebuild.
input logic [31:0] fb_base,
// Ch352 CDC (Codex) — {arm,canary,fb_base} arrive RAW from the HPS bridge (CLOCK2_50), NOT axi_clk as the
// legacy comment above implies. ctrl_commit is a TOGGLE the bridge flips on any control write; we sync it
// into axi_clk and latch the controls on its edge, so the multi-bit fb_base crosses COHERENTLY (the CDC
// lives here, at the receiving boundary, so no caller can supply raw controls into the AW path).
input logic ctrl_commit,
input logic px_emit,
input logic [31:0] px_addr, // FB-relative byte address (raster_pixel_fb_addr_q)
input logic [15:0] px_pix16,
// f2sdram (LPDDR AXI) clock domain
input logic axi_clk,
input logic axi_rst_n,
// AXI4 write-address
output logic [31:0] awaddr,
output logic [7:0] awlen,
output logic [2:0] awsize,
output logic [1:0] awburst,
output logic [4:0] awid,
output logic awvalid,
input logic awready,
// AXI4 write-data
output logic [255:0] wdata,
output logic [31:0] wstrb,
output logic wlast,
output logic wvalid,
input logic wready,
// AXI4 write-response
input logic bvalid,
output logic bready,
input logic [1:0] bresp,
// status / counters (axi domain)
output logic [31:0] beats_written,
output logic [31:0] bursts_issued,
output logic [31:0] bresp_err_count,
output logic [31:0] fifo_overflow_count,
output logic idle
);
localparam int PW = 320; // {addr[31:0], data[255:0], strb[31:0]}
// ============================ GS-clock PACKER ============================
logic [31:0] cur_addr;
logic [255:0] cur_data;
logic [31:0] cur_strb;
logic has_data;
logic fifo_wr;
logic [PW-1:0] fifo_wdata;
logic fifo_wfull;
// Ch352 — axi_clk control snapshot: sync the bridge commit toggle and latch {arm,canary,fb_base} on its
// edge. Init to the bridge's SAFE defaults (arm=0, canary=1, fb_base=0x8000_0000) so the booted core is
// inert until the HPS arms it, even before the first commit. All axi_clk uses + the gs_clk arm-sync read
// these coherent latched copies instead of the raw bridge buses.
logic [2:0] commit_sync;
logic arm_axi, canary_axi;
logic [31:0] fb_base_axi;
always_ff @(posedge axi_clk or negedge axi_rst_n) begin
if (!axi_rst_n) begin
commit_sync <= 3'd0; arm_axi <= 1'b0; canary_axi <= 1'b1; fb_base_axi <= 32'h8000_0000;
end else begin
commit_sync <= {commit_sync[1:0], ctrl_commit};
if (commit_sync[2] != commit_sync[1]) begin // commit edge: bridge buses are stable, latch them
arm_axi <= arm;
canary_axi <= canary;
fb_base_axi <= fb_base;
end
end
end
// High for the one cycle the snapshot updates. Admission is blocked then so the FSM never consumes a beat
// straddling a config change (old base/arm on the pop cycle, new on the next).
wire commit_edge = (commit_sync[2] != commit_sync[1]);
// arm crosses from axi_clk into gs_clk — 2-FF synchronizer (from the COHERENT latched arm).
logic arm_s1, arm_gs;
always_ff @(posedge gs_clk or negedge gs_rst_n) begin
if (!gs_rst_n) begin arm_s1 <= 1'b0; arm_gs <= 1'b0; end
else begin arm_s1 <= arm_axi; arm_gs <= arm_s1; end
end
always_ff @(posedge gs_clk or negedge gs_rst_n) begin
if (!gs_rst_n) begin
cur_addr <= '0; cur_data <= '0; cur_strb <= '0; has_data <= 1'b0;
fifo_wr <= 1'b0; fifo_wdata <= '0; fifo_overflow_count <= '0;
end else begin
fifo_wr <= 1'b0;
if (enable && arm_gs && px_emit) begin // gate: no accumulation until armed
logic [31:0] abeat;
logic [3:0] lane; // 0..15 (which 16-bit lane)
logic [255:0] nd;
logic [31:0] ns;
abeat = {px_addr[31:5], 5'd0};
lane = px_addr[4:1];
if (has_data && (abeat != cur_addr)) begin
// line changed before the previous beat filled — flush it, restart
fifo_wdata <= {cur_addr, cur_data, cur_strb};
fifo_wr <= 1'b1;
cur_addr <= abeat;
cur_data <= (256'(px_pix16) << ({28'd0, lane} * 16));
cur_strb <= (32'd3 << ({28'd0, lane} * 2));
has_data <= 1'b1;
end else begin
nd = has_data ? cur_data : 256'd0;
ns = has_data ? cur_strb : 32'd0;
nd[ ({28'd0, lane} * 16) +: 16 ] = px_pix16;
ns[ ({28'd0, lane} * 2) +: 2 ] = 2'b11;
if (&ns) begin
// beat complete (all 16 lanes) — flush, beat consumed
fifo_wdata <= {abeat, nd, ns};
fifo_wr <= 1'b1;
has_data <= 1'b0;
end else begin
cur_addr <= abeat;
cur_data <= nd;
cur_strb <= ns;
has_data <= 1'b1;
end
end
end
// overflow witness: a push attempt while the FIFO is full (must stay 0)
if (fifo_wr && fifo_wfull)
fifo_overflow_count <= fifo_overflow_count + 32'd1;
end
end
// ============================ async FIFO (CDC) ============================
logic [PW-1:0] fifo_rdata;
logic fifo_rempty;
logic fifo_rd;
// Ch323 — reset BOTH FIFO pointers from the STABLE axi_rst_n (assert async, deassert
// synced into gs_clk). gs_rst_n (= core reset) toggles on every CORE_CTRL re-render; if
// the write pointer reset followed it while the read pointer stayed, the gray pointers
// would desync → FIFO corruption (phantom beats, no commit). Same fix as gs_z_flush_writer.
reg [1:0] wrst_sync;
always_ff @(posedge gs_clk or negedge axi_rst_n) begin
if (!axi_rst_n) wrst_sync <= 2'b00;
else wrst_sync <= {wrst_sync[0], 1'b1};
end
wire fifo_wrst_n = wrst_sync[1];
gs_async_fifo #(.WIDTH(PW), .DEPTH(FIFO_DEPTH)) u_fifo (
.wclk(gs_clk), .wrst_n(fifo_wrst_n), .wr(fifo_wr && !fifo_wfull), .wdata(fifo_wdata), .wfull(fifo_wfull),
.rclk(axi_clk), .rrst_n(axi_rst_n), .rd(fifo_rd), .rdata(fifo_rdata), .rempty(fifo_rempty)
);
// ============================ f2sdram-clock AXI FSM ============================
localparam logic [1:0] S_IDLE=2'd0, S_AW=2'd1, S_W=2'd2, S_B=2'd3;
logic [1:0] state;
logic [31:0] beat_addr;
logic [255:0] beat_data;
logic [31:0] beat_strb;
logic [31:0] awaddr_q; // Ch352 — full AW address latched at admission, held stable AW->W->B
assign awsize = 3'd5; // 32 bytes/beat (256-bit)
assign awburst = 2'b01; // INCR
assign awid = 5'd0;
assign awlen = 8'd0; // single beat per line (tile-rows aren't contiguous)
assign awaddr = awaddr_q; // Ch352 — latched at admission; STABLE through AW->W->B (AXI requires it)
assign wdata = beat_data;
assign wstrb = beat_strb;
assign wlast = 1'b1; // 1-beat burst
// Ch352 — AXI transaction stability (Codex): arm_axi/commit gate ADMISSION ONLY (S_IDLE pop). Once a beat is
// admitted, awvalid/wvalid are driven by STATE alone and run to completion, so a later arm-deassert or a
// fb_base commit can never drop VALID mid-handshake or move awaddr while AWVALID && !AWREADY.
assign awvalid = (state == S_AW);
assign wvalid = (state == S_W);
assign bready = (state == S_B);
assign fifo_rd = (state == S_IDLE) && !fifo_rempty && arm_axi && !commit_edge;
assign idle = (state == S_IDLE) && fifo_rempty;
always_ff @(posedge axi_clk or negedge axi_rst_n) begin
if (!axi_rst_n) begin
state <= S_IDLE; beat_addr <= '0; beat_data <= '0; beat_strb <= '0; awaddr_q <= '0;
beats_written <= '0; bursts_issued <= '0; bresp_err_count <= '0;
end else begin
unique case (state)
S_IDLE: if (!fifo_rempty && arm_axi && !commit_edge) begin
beat_addr <= fifo_rdata[319:288]; // {addr, data, strb}
beat_data <= fifo_rdata[287:32];
beat_strb <= fifo_rdata[31:0];
awaddr_q <= fb_base_axi + fifo_rdata[319:288]; // latch FULL AW addr from the STABLE base
// canary: write ONLY the offset-0 (top-of-frame) 32-byte line;
// discard every other beat (fifo_rd still pops it this cycle).
if (canary_axi && (fifo_rdata[319:288] != 32'd0))
state <= S_IDLE;
else
state <= S_AW;
end
S_AW: if (awready) begin
bursts_issued <= bursts_issued + 32'd1;
state <= S_W;
end
S_W: if (wready) begin
beats_written <= beats_written + 32'd1;
state <= S_B;
end
default: if (bvalid) begin // S_B
if (bresp != 2'b00) bresp_err_count <= bresp_err_count + 32'd1;
state <= S_IDLE;
end
endcase
end
end
endmodule : gs_lpddr_axi_master