Files
retroDE_ps2/rtl/gif_gs/gs_lpddr_fb_writer.sv
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

134 lines
6.7 KiB
Systemverilog

// retroDE_ps2 — gs_lpddr_fb_writer (Ch317)
//
// FIRST LPDDR-backed-framebuffer step: a write sink that takes the GS tile-FLUSH
// pixel stream (PSMCT16, one pixel per emit) and commits it to an LPDDR-style
// framebuffer, modelling the real EMIF AXI4 write path so the addressing / data /
// stride / burst behaviour can be proven in sim before wiring the hard EMIF.
//
// SCOPE (Ch317, deliberately tight — see doc 0010 Ch317):
// * Tile color/Z stay ON-CHIP; texture stays local. ONLY the framebuffer FLUSH
// is redirected here.
// * Address gen is the simple linear `fb_base + (screen_y*pitch + screen_x)*bpp`
// — which the GS already produces on `raster_pixel_fb_addr_q` for PSMCT16
// ((fbp<<11) + (pixel_index<<1)), so we consume that byte address directly.
// * PSMCT16 (2 bytes/pixel) — lower bandwidth, already-proven format.
// * BURSTS: the flush emits a tile-row's 16 pixels at contiguous +2 byte
// addresses, then jumps by `pitch` to the next row. The burst engine COALESCES
// a contiguous +2 run into one burst, capped at MAX_BURST_BYTES (the doc 0008
// 4 KiB-boundary AXI rule). Real per-tile-row burst = 16 beats = 32 bytes.
// * A staging FIFO decouples the 1-pixel/cycle emit from the burst engine and
// surfaces under/overflow — the realistic shape a hard EMIF AXI master needs.
// * Backing memory `fbmem` is byte-addressed and TB-readable for the
// write/readback PROOF (a later rung swaps it for the EMIF AXI master +
// LPDDR scanout). At enable=0 the whole module is inert (no writes, counters 0).
//
// COUNTERS (Codex acceptance — bandwidth/diag): bytes_written, burst_count,
// busy_cycles (engine draining), fifo_overflow/underflow, fifo_occ_max. The TB
// computes effective GB/s off bytes_written / (busy_cycles * clk_period).
module gs_lpddr_fb_writer #(
parameter int FB_BYTES = 8192, // backing FB size (64x64 PSMCT16 = 8 KiB)
parameter int FIFO_DEPTH = 32, // pixel staging FIFO depth (power-of-2)
parameter int MAX_BURST_BYTES = 4096 // AXI4 4 KiB-boundary cap (doc 0008 lesson)
) (
input logic clk,
input logic rst_n,
input logic enable, // LPDDR_FB_ENABLE; 0 → fully inert
// GS tile-flush pixel stream (PSMCT16, one pixel per emit)
input logic px_emit,
input logic [31:0] px_addr, // linear FB byte address (raster_pixel_fb_addr_q)
input logic [15:0] px_pix16, // raster_pixel_color_q[15:0]
// diagnostics / proof (read hierarchically by the TB; no functional consumers)
output logic [31:0] bytes_written,
output logic [31:0] burst_count,
output logic [31:0] busy_cycles,
output logic [31:0] fifo_overflow_count,
output logic [31:0] fifo_underflow_count,
output logic [15:0] fifo_occ
);
localparam int ADDR_W = $clog2(FB_BYTES);
localparam int PTR_W = (FIFO_DEPTH > 1) ? $clog2(FIFO_DEPTH) : 1;
// ---- byte-addressed backing framebuffer (the LPDDR model) ----
logic [7:0] fbmem [0:FB_BYTES-1];
// ---- staging FIFO of {addr, pix16} ----
logic [31:0] fifo_addr [0:FIFO_DEPTH-1];
logic [15:0] fifo_pix [0:FIFO_DEPTH-1];
logic [PTR_W-1:0] wptr, rptr;
logic [PTR_W:0] count; // 0..FIFO_DEPTH (PTR_W+1 bits)
// count==FIFO_DEPTH sets the top bit (FIFO_DEPTH is a power of 2 == 1<<PTR_W),
// so count[PTR_W] alone is the full flag. (Do NOT compare against a PTR_W-wide
// literal — PTR_W'(FIFO_DEPTH) truncates FIFO_DEPTH to 0 and reads empty as full.)
wire fifo_full = count[PTR_W];
wire fifo_empty = (count == '0);
// ---- burst engine state (coalesce contiguous +2 runs) ----
logic in_burst; // currently extending a burst
logic [31:0] last_addr; // last byte address written
logic [31:0] burst_bytes; // bytes in the current burst so far
logic do_push, do_pop;
logic [31:0] a; // popped byte address
logic [15:0] p; // popped pixel
logic contig;
always_comb begin
do_push = px_emit && !fifo_full;
do_pop = !fifo_empty; // drain one entry/cycle when available
a = fifo_addr[rptr];
p = fifo_pix [rptr];
contig = in_burst && (a == last_addr + 32'd2)
&& (burst_bytes + 32'd2 <= 32'(MAX_BURST_BYTES));
end
always_ff @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
wptr <= '0; rptr <= '0; count <= '0;
in_burst <= 1'b0; last_addr <= '0; burst_bytes <= '0;
bytes_written <= '0; burst_count <= '0; busy_cycles <= '0;
fifo_overflow_count <= '0; fifo_underflow_count <= '0; fifo_occ <= '0;
end else if (enable) begin
// ---- push side: one flushed pixel per emit ----
if (px_emit && fifo_full)
fifo_overflow_count <= fifo_overflow_count + 32'd1; // dropped — proof must show 0
if (do_push) begin
fifo_addr[wptr] <= px_addr;
fifo_pix [wptr] <= px_pix16;
wptr <= wptr + PTR_W'(1);
end
// ---- drain side: pop one entry/cycle, commit fbmem, coalesce bursts ----
if (do_pop) begin
// commit the 2 PSMCT16 bytes at the linear address
if (a < 32'(FB_BYTES)) fbmem[a[ADDR_W-1:0]] <= p[7:0];
if ((a + 1) < 32'(FB_BYTES)) fbmem[a[ADDR_W-1:0] + 1'b1] <= p[15:8];
bytes_written <= bytes_written + 32'd2;
busy_cycles <= busy_cycles + 32'd1;
rptr <= rptr + PTR_W'(1);
if (contig)
burst_bytes <= burst_bytes + 32'd2; // extend current burst
else begin
burst_count <= burst_count + 32'd1; // start a NEW burst
burst_bytes <= 32'd2;
end
in_burst <= 1'b1;
last_addr <= a;
end else if (in_burst) begin
in_burst <= 1'b0; // FIFO drained → close burst
end
// single count update (push and pop net correctly)
if (do_push && !do_pop) count <= count + 1'b1;
else if (!do_push && do_pop) count <= count - 1'b1;
// (both or neither → unchanged)
if (16'(count) > fifo_occ) fifo_occ <= 16'(count);
// fifo_underflow_count: the engine never pops empty (do_pop gated on
// !fifo_empty), so it stays 0 here — surfaced for the future EMIF rung
// where an external AXI master could request beyond the staged data.
end
end
endmodule : gs_lpddr_fb_writer