ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
134 lines
6.7 KiB
Systemverilog
134 lines
6.7 KiB
Systemverilog
// retroDE_ps2 — gs_lpddr_fb_writer (Ch317)
|
|
//
|
|
// FIRST LPDDR-backed-framebuffer step: a write sink that takes the GS tile-FLUSH
|
|
// pixel stream (PSMCT16, one pixel per emit) and commits it to an LPDDR-style
|
|
// framebuffer, modelling the real EMIF AXI4 write path so the addressing / data /
|
|
// stride / burst behaviour can be proven in sim before wiring the hard EMIF.
|
|
//
|
|
// SCOPE (Ch317, deliberately tight — see doc 0010 Ch317):
|
|
// * Tile color/Z stay ON-CHIP; texture stays local. ONLY the framebuffer FLUSH
|
|
// is redirected here.
|
|
// * Address gen is the simple linear `fb_base + (screen_y*pitch + screen_x)*bpp`
|
|
// — which the GS already produces on `raster_pixel_fb_addr_q` for PSMCT16
|
|
// ((fbp<<11) + (pixel_index<<1)), so we consume that byte address directly.
|
|
// * PSMCT16 (2 bytes/pixel) — lower bandwidth, already-proven format.
|
|
// * BURSTS: the flush emits a tile-row's 16 pixels at contiguous +2 byte
|
|
// addresses, then jumps by `pitch` to the next row. The burst engine COALESCES
|
|
// a contiguous +2 run into one burst, capped at MAX_BURST_BYTES (the doc 0008
|
|
// 4 KiB-boundary AXI rule). Real per-tile-row burst = 16 beats = 32 bytes.
|
|
// * A staging FIFO decouples the 1-pixel/cycle emit from the burst engine and
|
|
// surfaces under/overflow — the realistic shape a hard EMIF AXI master needs.
|
|
// * Backing memory `fbmem` is byte-addressed and TB-readable for the
|
|
// write/readback PROOF (a later rung swaps it for the EMIF AXI master +
|
|
// LPDDR scanout). At enable=0 the whole module is inert (no writes, counters 0).
|
|
//
|
|
// COUNTERS (Codex acceptance — bandwidth/diag): bytes_written, burst_count,
|
|
// busy_cycles (engine draining), fifo_overflow/underflow, fifo_occ_max. The TB
|
|
// computes effective GB/s off bytes_written / (busy_cycles * clk_period).
|
|
|
|
module gs_lpddr_fb_writer #(
|
|
parameter int FB_BYTES = 8192, // backing FB size (64x64 PSMCT16 = 8 KiB)
|
|
parameter int FIFO_DEPTH = 32, // pixel staging FIFO depth (power-of-2)
|
|
parameter int MAX_BURST_BYTES = 4096 // AXI4 4 KiB-boundary cap (doc 0008 lesson)
|
|
) (
|
|
input logic clk,
|
|
input logic rst_n,
|
|
input logic enable, // LPDDR_FB_ENABLE; 0 → fully inert
|
|
|
|
// GS tile-flush pixel stream (PSMCT16, one pixel per emit)
|
|
input logic px_emit,
|
|
input logic [31:0] px_addr, // linear FB byte address (raster_pixel_fb_addr_q)
|
|
input logic [15:0] px_pix16, // raster_pixel_color_q[15:0]
|
|
|
|
// diagnostics / proof (read hierarchically by the TB; no functional consumers)
|
|
output logic [31:0] bytes_written,
|
|
output logic [31:0] burst_count,
|
|
output logic [31:0] busy_cycles,
|
|
output logic [31:0] fifo_overflow_count,
|
|
output logic [31:0] fifo_underflow_count,
|
|
output logic [15:0] fifo_occ
|
|
);
|
|
localparam int ADDR_W = $clog2(FB_BYTES);
|
|
localparam int PTR_W = (FIFO_DEPTH > 1) ? $clog2(FIFO_DEPTH) : 1;
|
|
|
|
// ---- byte-addressed backing framebuffer (the LPDDR model) ----
|
|
logic [7:0] fbmem [0:FB_BYTES-1];
|
|
|
|
// ---- staging FIFO of {addr, pix16} ----
|
|
logic [31:0] fifo_addr [0:FIFO_DEPTH-1];
|
|
logic [15:0] fifo_pix [0:FIFO_DEPTH-1];
|
|
logic [PTR_W-1:0] wptr, rptr;
|
|
logic [PTR_W:0] count; // 0..FIFO_DEPTH (PTR_W+1 bits)
|
|
// count==FIFO_DEPTH sets the top bit (FIFO_DEPTH is a power of 2 == 1<<PTR_W),
|
|
// so count[PTR_W] alone is the full flag. (Do NOT compare against a PTR_W-wide
|
|
// literal — PTR_W'(FIFO_DEPTH) truncates FIFO_DEPTH to 0 and reads empty as full.)
|
|
wire fifo_full = count[PTR_W];
|
|
wire fifo_empty = (count == '0);
|
|
|
|
// ---- burst engine state (coalesce contiguous +2 runs) ----
|
|
logic in_burst; // currently extending a burst
|
|
logic [31:0] last_addr; // last byte address written
|
|
logic [31:0] burst_bytes; // bytes in the current burst so far
|
|
|
|
logic do_push, do_pop;
|
|
logic [31:0] a; // popped byte address
|
|
logic [15:0] p; // popped pixel
|
|
logic contig;
|
|
always_comb begin
|
|
do_push = px_emit && !fifo_full;
|
|
do_pop = !fifo_empty; // drain one entry/cycle when available
|
|
a = fifo_addr[rptr];
|
|
p = fifo_pix [rptr];
|
|
contig = in_burst && (a == last_addr + 32'd2)
|
|
&& (burst_bytes + 32'd2 <= 32'(MAX_BURST_BYTES));
|
|
end
|
|
|
|
always_ff @(posedge clk or negedge rst_n) begin
|
|
if (!rst_n) begin
|
|
wptr <= '0; rptr <= '0; count <= '0;
|
|
in_burst <= 1'b0; last_addr <= '0; burst_bytes <= '0;
|
|
bytes_written <= '0; burst_count <= '0; busy_cycles <= '0;
|
|
fifo_overflow_count <= '0; fifo_underflow_count <= '0; fifo_occ <= '0;
|
|
end else if (enable) begin
|
|
// ---- push side: one flushed pixel per emit ----
|
|
if (px_emit && fifo_full)
|
|
fifo_overflow_count <= fifo_overflow_count + 32'd1; // dropped — proof must show 0
|
|
if (do_push) begin
|
|
fifo_addr[wptr] <= px_addr;
|
|
fifo_pix [wptr] <= px_pix16;
|
|
wptr <= wptr + PTR_W'(1);
|
|
end
|
|
|
|
// ---- drain side: pop one entry/cycle, commit fbmem, coalesce bursts ----
|
|
if (do_pop) begin
|
|
// commit the 2 PSMCT16 bytes at the linear address
|
|
if (a < 32'(FB_BYTES)) fbmem[a[ADDR_W-1:0]] <= p[7:0];
|
|
if ((a + 1) < 32'(FB_BYTES)) fbmem[a[ADDR_W-1:0] + 1'b1] <= p[15:8];
|
|
bytes_written <= bytes_written + 32'd2;
|
|
busy_cycles <= busy_cycles + 32'd1;
|
|
rptr <= rptr + PTR_W'(1);
|
|
if (contig)
|
|
burst_bytes <= burst_bytes + 32'd2; // extend current burst
|
|
else begin
|
|
burst_count <= burst_count + 32'd1; // start a NEW burst
|
|
burst_bytes <= 32'd2;
|
|
end
|
|
in_burst <= 1'b1;
|
|
last_addr <= a;
|
|
end else if (in_burst) begin
|
|
in_burst <= 1'b0; // FIFO drained → close burst
|
|
end
|
|
|
|
// single count update (push and pop net correctly)
|
|
if (do_push && !do_pop) count <= count + 1'b1;
|
|
else if (!do_push && do_pop) count <= count - 1'b1;
|
|
// (both or neither → unchanged)
|
|
|
|
if (16'(count) > fifo_occ) fifo_occ <= 16'(count);
|
|
// fifo_underflow_count: the engine never pops empty (do_pop gated on
|
|
// !fifo_empty), so it stays 0 here — surfaced for the future EMIF rung
|
|
// where an external AXI master could request beyond the staged data.
|
|
end
|
|
end
|
|
endmodule : gs_lpddr_fb_writer
|