ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
249 lines
12 KiB
Systemverilog
249 lines
12 KiB
Systemverilog
// retroDE_ps2 — gs_lpddr_axi_master (Ch318)
|
|
//
|
|
// HARDWARE-facing wrapper that takes the PSMCT16 tile-FLUSH pixel stream (GS clock)
|
|
// and writes it to real LPDDR over the qsys f2sdram AXI4 port (f2sdram clock). It
|
|
// does NOT modify the proven gs_lpddr_fb_writer (the Ch317 sim model) — it is a
|
|
// sibling hardware path with the same input stream.
|
|
//
|
|
// Pipeline (per the Ch318 directive):
|
|
// GS clock : PACKER — accumulate 16 PSMCT16 pixels of a tile-row into one 256-bit
|
|
// (32-byte) beat {addr, data, strb}. A tile-row is exactly 16 px on a
|
|
// 32-byte-aligned line, so a beat completes naturally on its 16th px
|
|
// (no dangling partial beat). On completion, push to the async FIFO.
|
|
// async FIFO: gray-code CDC, carries {addr[31:0], data[255:0], strb[31:0]} (320b).
|
|
// f2sdram : AXI burst FSM — pop a beat and issue a single-beat INCR write
|
|
// (AWSIZE=5 = 32 B, AWLEN=0, AWBURST=INCR, full per-byte WSTRB, never
|
|
// crossing a 4 KiB boundary since each beat is one 32-byte line). AW
|
|
// then W then B, all with backpressure (await ready/valid).
|
|
//
|
|
// Address: awaddr = FB_BASE + packet_addr (packet_addr is the FB-relative byte addr
|
|
// from raster_pixel_fb_addr_q). FB_BASE must point at a LINUX-SAFE reserved LPDDR
|
|
// region before any board run — the qsys aperture proves fabric CAN address SDRAM,
|
|
// not which physical range is safe to scribble on (Ch318 board gate).
|
|
//
|
|
// Counters (f2sdram domain, TB/status readable): beats, bursts, bresp_err, fifo
|
|
// overflow, done-ish (idle && fifo empty). enable=0 → fully inert.
|
|
|
|
module gs_lpddr_axi_master #(
|
|
parameter int FIFO_DEPTH = 16
|
|
) (
|
|
// GS clock domain — flush pixel stream
|
|
input logic gs_clk,
|
|
input logic gs_rst_n,
|
|
input logic enable,
|
|
// ---- RUNTIME controls (driven by the HPS bridge register, axi_clk domain) ----
|
|
// arm: HARD SAFETY GATE — no AXI write can issue unless high. Defaults LOW at the
|
|
// bridge register, so the booted core is inert until the HPS explicitly arms it.
|
|
// Synced into gs_clk for the packer; used directly in the axi_clk FSM.
|
|
input logic arm,
|
|
// canary: when high, write ONLY the offset-0 beat (the 32-byte top-of-frame line)
|
|
// and discard all others — a deterministic, blast-radius-limited first test.
|
|
input logic canary,
|
|
// fb_base: LPDDR byte base address for the framebuffer (e.g. 0x8000_0000). awaddr
|
|
// = fb_base + frame-relative offset. Runtime so a wrong base is re-targetable
|
|
// without a rebuild.
|
|
input logic [31:0] fb_base,
|
|
// Ch352 CDC (Codex) — {arm,canary,fb_base} arrive RAW from the HPS bridge (CLOCK2_50), NOT axi_clk as the
|
|
// legacy comment above implies. ctrl_commit is a TOGGLE the bridge flips on any control write; we sync it
|
|
// into axi_clk and latch the controls on its edge, so the multi-bit fb_base crosses COHERENTLY (the CDC
|
|
// lives here, at the receiving boundary, so no caller can supply raw controls into the AW path).
|
|
input logic ctrl_commit,
|
|
input logic px_emit,
|
|
input logic [31:0] px_addr, // FB-relative byte address (raster_pixel_fb_addr_q)
|
|
input logic [15:0] px_pix16,
|
|
|
|
// f2sdram (LPDDR AXI) clock domain
|
|
input logic axi_clk,
|
|
input logic axi_rst_n,
|
|
// AXI4 write-address
|
|
output logic [31:0] awaddr,
|
|
output logic [7:0] awlen,
|
|
output logic [2:0] awsize,
|
|
output logic [1:0] awburst,
|
|
output logic [4:0] awid,
|
|
output logic awvalid,
|
|
input logic awready,
|
|
// AXI4 write-data
|
|
output logic [255:0] wdata,
|
|
output logic [31:0] wstrb,
|
|
output logic wlast,
|
|
output logic wvalid,
|
|
input logic wready,
|
|
// AXI4 write-response
|
|
input logic bvalid,
|
|
output logic bready,
|
|
input logic [1:0] bresp,
|
|
|
|
// status / counters (axi domain)
|
|
output logic [31:0] beats_written,
|
|
output logic [31:0] bursts_issued,
|
|
output logic [31:0] bresp_err_count,
|
|
output logic [31:0] fifo_overflow_count,
|
|
output logic idle
|
|
);
|
|
localparam int PW = 320; // {addr[31:0], data[255:0], strb[31:0]}
|
|
|
|
// ============================ GS-clock PACKER ============================
|
|
logic [31:0] cur_addr;
|
|
logic [255:0] cur_data;
|
|
logic [31:0] cur_strb;
|
|
logic has_data;
|
|
logic fifo_wr;
|
|
logic [PW-1:0] fifo_wdata;
|
|
logic fifo_wfull;
|
|
|
|
// Ch352 — axi_clk control snapshot: sync the bridge commit toggle and latch {arm,canary,fb_base} on its
|
|
// edge. Init to the bridge's SAFE defaults (arm=0, canary=1, fb_base=0x8000_0000) so the booted core is
|
|
// inert until the HPS arms it, even before the first commit. All axi_clk uses + the gs_clk arm-sync read
|
|
// these coherent latched copies instead of the raw bridge buses.
|
|
logic [2:0] commit_sync;
|
|
logic arm_axi, canary_axi;
|
|
logic [31:0] fb_base_axi;
|
|
always_ff @(posedge axi_clk or negedge axi_rst_n) begin
|
|
if (!axi_rst_n) begin
|
|
commit_sync <= 3'd0; arm_axi <= 1'b0; canary_axi <= 1'b1; fb_base_axi <= 32'h8000_0000;
|
|
end else begin
|
|
commit_sync <= {commit_sync[1:0], ctrl_commit};
|
|
if (commit_sync[2] != commit_sync[1]) begin // commit edge: bridge buses are stable, latch them
|
|
arm_axi <= arm;
|
|
canary_axi <= canary;
|
|
fb_base_axi <= fb_base;
|
|
end
|
|
end
|
|
end
|
|
|
|
// High for the one cycle the snapshot updates. Admission is blocked then so the FSM never consumes a beat
|
|
// straddling a config change (old base/arm on the pop cycle, new on the next).
|
|
wire commit_edge = (commit_sync[2] != commit_sync[1]);
|
|
|
|
// arm crosses from axi_clk into gs_clk — 2-FF synchronizer (from the COHERENT latched arm).
|
|
logic arm_s1, arm_gs;
|
|
always_ff @(posedge gs_clk or negedge gs_rst_n) begin
|
|
if (!gs_rst_n) begin arm_s1 <= 1'b0; arm_gs <= 1'b0; end
|
|
else begin arm_s1 <= arm_axi; arm_gs <= arm_s1; end
|
|
end
|
|
|
|
always_ff @(posedge gs_clk or negedge gs_rst_n) begin
|
|
if (!gs_rst_n) begin
|
|
cur_addr <= '0; cur_data <= '0; cur_strb <= '0; has_data <= 1'b0;
|
|
fifo_wr <= 1'b0; fifo_wdata <= '0; fifo_overflow_count <= '0;
|
|
end else begin
|
|
fifo_wr <= 1'b0;
|
|
if (enable && arm_gs && px_emit) begin // gate: no accumulation until armed
|
|
logic [31:0] abeat;
|
|
logic [3:0] lane; // 0..15 (which 16-bit lane)
|
|
logic [255:0] nd;
|
|
logic [31:0] ns;
|
|
abeat = {px_addr[31:5], 5'd0};
|
|
lane = px_addr[4:1];
|
|
if (has_data && (abeat != cur_addr)) begin
|
|
// line changed before the previous beat filled — flush it, restart
|
|
fifo_wdata <= {cur_addr, cur_data, cur_strb};
|
|
fifo_wr <= 1'b1;
|
|
cur_addr <= abeat;
|
|
cur_data <= (256'(px_pix16) << ({28'd0, lane} * 16));
|
|
cur_strb <= (32'd3 << ({28'd0, lane} * 2));
|
|
has_data <= 1'b1;
|
|
end else begin
|
|
nd = has_data ? cur_data : 256'd0;
|
|
ns = has_data ? cur_strb : 32'd0;
|
|
nd[ ({28'd0, lane} * 16) +: 16 ] = px_pix16;
|
|
ns[ ({28'd0, lane} * 2) +: 2 ] = 2'b11;
|
|
if (&ns) begin
|
|
// beat complete (all 16 lanes) — flush, beat consumed
|
|
fifo_wdata <= {abeat, nd, ns};
|
|
fifo_wr <= 1'b1;
|
|
has_data <= 1'b0;
|
|
end else begin
|
|
cur_addr <= abeat;
|
|
cur_data <= nd;
|
|
cur_strb <= ns;
|
|
has_data <= 1'b1;
|
|
end
|
|
end
|
|
end
|
|
// overflow witness: a push attempt while the FIFO is full (must stay 0)
|
|
if (fifo_wr && fifo_wfull)
|
|
fifo_overflow_count <= fifo_overflow_count + 32'd1;
|
|
end
|
|
end
|
|
|
|
// ============================ async FIFO (CDC) ============================
|
|
logic [PW-1:0] fifo_rdata;
|
|
logic fifo_rempty;
|
|
logic fifo_rd;
|
|
// Ch323 — reset BOTH FIFO pointers from the STABLE axi_rst_n (assert async, deassert
|
|
// synced into gs_clk). gs_rst_n (= core reset) toggles on every CORE_CTRL re-render; if
|
|
// the write pointer reset followed it while the read pointer stayed, the gray pointers
|
|
// would desync → FIFO corruption (phantom beats, no commit). Same fix as gs_z_flush_writer.
|
|
reg [1:0] wrst_sync;
|
|
always_ff @(posedge gs_clk or negedge axi_rst_n) begin
|
|
if (!axi_rst_n) wrst_sync <= 2'b00;
|
|
else wrst_sync <= {wrst_sync[0], 1'b1};
|
|
end
|
|
wire fifo_wrst_n = wrst_sync[1];
|
|
gs_async_fifo #(.WIDTH(PW), .DEPTH(FIFO_DEPTH)) u_fifo (
|
|
.wclk(gs_clk), .wrst_n(fifo_wrst_n), .wr(fifo_wr && !fifo_wfull), .wdata(fifo_wdata), .wfull(fifo_wfull),
|
|
.rclk(axi_clk), .rrst_n(axi_rst_n), .rd(fifo_rd), .rdata(fifo_rdata), .rempty(fifo_rempty)
|
|
);
|
|
|
|
// ============================ f2sdram-clock AXI FSM ============================
|
|
localparam logic [1:0] S_IDLE=2'd0, S_AW=2'd1, S_W=2'd2, S_B=2'd3;
|
|
logic [1:0] state;
|
|
logic [31:0] beat_addr;
|
|
logic [255:0] beat_data;
|
|
logic [31:0] beat_strb;
|
|
logic [31:0] awaddr_q; // Ch352 — full AW address latched at admission, held stable AW->W->B
|
|
|
|
assign awsize = 3'd5; // 32 bytes/beat (256-bit)
|
|
assign awburst = 2'b01; // INCR
|
|
assign awid = 5'd0;
|
|
assign awlen = 8'd0; // single beat per line (tile-rows aren't contiguous)
|
|
assign awaddr = awaddr_q; // Ch352 — latched at admission; STABLE through AW->W->B (AXI requires it)
|
|
assign wdata = beat_data;
|
|
assign wstrb = beat_strb;
|
|
assign wlast = 1'b1; // 1-beat burst
|
|
// Ch352 — AXI transaction stability (Codex): arm_axi/commit gate ADMISSION ONLY (S_IDLE pop). Once a beat is
|
|
// admitted, awvalid/wvalid are driven by STATE alone and run to completion, so a later arm-deassert or a
|
|
// fb_base commit can never drop VALID mid-handshake or move awaddr while AWVALID && !AWREADY.
|
|
assign awvalid = (state == S_AW);
|
|
assign wvalid = (state == S_W);
|
|
assign bready = (state == S_B);
|
|
assign fifo_rd = (state == S_IDLE) && !fifo_rempty && arm_axi && !commit_edge;
|
|
assign idle = (state == S_IDLE) && fifo_rempty;
|
|
|
|
always_ff @(posedge axi_clk or negedge axi_rst_n) begin
|
|
if (!axi_rst_n) begin
|
|
state <= S_IDLE; beat_addr <= '0; beat_data <= '0; beat_strb <= '0; awaddr_q <= '0;
|
|
beats_written <= '0; bursts_issued <= '0; bresp_err_count <= '0;
|
|
end else begin
|
|
unique case (state)
|
|
S_IDLE: if (!fifo_rempty && arm_axi && !commit_edge) begin
|
|
beat_addr <= fifo_rdata[319:288]; // {addr, data, strb}
|
|
beat_data <= fifo_rdata[287:32];
|
|
beat_strb <= fifo_rdata[31:0];
|
|
awaddr_q <= fb_base_axi + fifo_rdata[319:288]; // latch FULL AW addr from the STABLE base
|
|
// canary: write ONLY the offset-0 (top-of-frame) 32-byte line;
|
|
// discard every other beat (fifo_rd still pops it this cycle).
|
|
if (canary_axi && (fifo_rdata[319:288] != 32'd0))
|
|
state <= S_IDLE;
|
|
else
|
|
state <= S_AW;
|
|
end
|
|
S_AW: if (awready) begin
|
|
bursts_issued <= bursts_issued + 32'd1;
|
|
state <= S_W;
|
|
end
|
|
S_W: if (wready) begin
|
|
beats_written <= beats_written + 32'd1;
|
|
state <= S_B;
|
|
end
|
|
default: if (bvalid) begin // S_B
|
|
if (bresp != 2'b00) bresp_err_count <= bresp_err_count + 32'd1;
|
|
state <= S_IDLE;
|
|
end
|
|
endcase
|
|
end
|
|
end
|
|
endmodule : gs_lpddr_axi_master
|