Files
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

90 lines
4.0 KiB
Systemverilog

// retroDE_ps2 — gs_async_fifo (Ch318)
//
// Generic dual-clock (asynchronous) FIFO with gray-code pointers and 2-FF pointer
// synchronizers — the standard CDC-safe ring buffer. Used by gs_lpddr_axi_master to
// cross 256-bit framebuffer-row packets {addr,data,strb} from the GS clock domain to
// the f2sdram (LPDDR AXI) clock domain. Both domains are treated as GENUINELY async
// even when nominally the same frequency (GS = PLL design_clk; f2sdram = raw board
// clock), per the Ch318 directive.
//
// DEPTH must be a power of two. `wr`/`rd` are single-cycle handshakes gated by
// !full / !empty. Standard caveats: do NOT assert wr when full or rd when empty
// (the wrapper gates both). One-deep gray pointers, single 2-FF synchronizer each
// way — adequate for the modest packet rate (one 32-byte beat per 16 flushed pixels).
module gs_async_fifo #(
parameter int WIDTH = 320, // {addr[31:0], data[255:0], strb[31:0]}
parameter int DEPTH = 16 // power of two
) (
// write domain
input logic wclk,
input logic wrst_n,
input logic wr,
input logic [WIDTH-1:0] wdata,
output logic wfull,
// read domain
input logic rclk,
input logic rrst_n,
input logic rd,
output logic [WIDTH-1:0] rdata,
output logic rempty
);
localparam int AW = $clog2(DEPTH);
logic [WIDTH-1:0] mem [0:DEPTH-1];
// ---- binary + gray pointers (one extra MSB for full/empty disambiguation) ----
logic [AW:0] wbin, wgray, wbin_nxt, wgray_nxt;
logic wfull_nxt; // Ch352 — combinational next-value for the now-REGISTERED wfull
logic [AW:0] rbin, rgray, rbin_nxt, rgray_nxt;
// synchronized opposite-domain gray pointers (2-FF)
logic [AW:0] rgray_s1, rgray_s2; // read gray -> write domain
logic [AW:0] wgray_s1, wgray_s2; // write gray -> read domain
function automatic logic [AW:0] bin2gray(input logic [AW:0] b);
bin2gray = b ^ (b >> 1);
endfunction
// ---------------- write domain ----------------
assign wbin_nxt = wbin + (wr && !wfull);
assign wgray_nxt = bin2gray(wbin_nxt);
// full: next write gray == read gray with top two bits inverted. Ch352 — wfull is now a REGISTERED flag
// (Cummings canonical). The previous `assign wfull = (wgray_nxt == ...)` was combinational, and since
// wgray_nxt <- wbin_nxt <- wfull, it formed a wbin_nxt->wgray_nxt->wfull->wbin_nxt COMBINATIONAL LOOP that
// Quartus reports and that made Place churn. Registering it breaks the loop with no overflow-behavior change:
// wfull still asserts the cycle after the filling write (full is computed from wgray_nxt = the pointer AFTER
// the current write), so the (DEPTH+1)th write is still blocked. rempty is intentionally left unchanged.
assign wfull_nxt = (wgray_nxt == {~rgray_s2[AW:AW-1], rgray_s2[AW-2:0]});
always_ff @(posedge wclk or negedge wrst_n) begin
if (!wrst_n) begin
wbin <= '0; wgray <= '0; wfull <= 1'b0;
rgray_s1 <= '0; rgray_s2 <= '0;
end else begin
wbin <= wbin_nxt;
wgray <= wgray_nxt;
wfull <= wfull_nxt;
rgray_s1 <= rgray; // sync read gray into write domain
rgray_s2 <= rgray_s1;
end
end
always_ff @(posedge wclk) if (wr && !wfull) mem[wbin[AW-1:0]] <= wdata;
// ---------------- read domain ----------------
assign rbin_nxt = rbin + (rd && !rempty);
assign rgray_nxt = bin2gray(rbin_nxt);
always_ff @(posedge rclk or negedge rrst_n) begin
if (!rrst_n) begin
rbin <= '0; rgray <= '0;
wgray_s1 <= '0; wgray_s2 <= '0;
end else begin
rbin <= rbin_nxt;
rgray <= rgray_nxt;
wgray_s1 <= wgray; // sync write gray into read domain
wgray_s2 <= wgray_s1;
end
end
assign rdata = mem[rbin[AW-1:0]];
assign rempty = (rgray == wgray_s2);
endmodule : gs_async_fifo