Files
retroDE_ps2/rtl/gif_gs/gs_lpddr_wr_probe.sv
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

137 lines
6.2 KiB
Systemverilog

// ============================================================================
// gs_lpddr_wr_probe.sv (Ch322 Brick 3)
//
// HPS-bridge-driven LPDDR4B WRITE probe — the missing PS2-side LPDDR loader,
// cloned from retroDE_ao486/rtl/ao486/lpddr4b_loader.sv (write half). The PS2
// core already has the READ half (gs_lpddr_rd_probe); this is its symmetric
// twin so the HPS can STAGE arbitrary words into FPGA-private LPDDR4B from Linux
// (e.g. a known texture for Ch322), then read them back / hash via the existing
// read-probe before the texture cache fills from them.
//
// This is HPS -> bridge registers -> FPGA EMIF write. NOT HPS direct memory
// access, and NOT the retired f2sdram path. The EMIF write channel is shared
// with the GS framebuffer writer through gs_lpddr_wr_arb (FB writer = priority,
// this probe writes only when the writer is idle).
//
// Runs on emif_clk. The bridge pulse/addr/data come from CLOCK2_50 and are
// toggle-synchronized internally (same CDC as lpddr4b_loader / gs_lpddr_rd_probe).
//
// Each wr_pulse triggers ONE single-beat 32-bit write: the 32-bit lane within
// the 256-bit EMIF word is selected by addr[4:2] with the matching WSTRB nibble.
// ============================================================================
`timescale 1ns/1ps
module gs_lpddr_wr_probe (
input logic emif_clk,
input logic emif_rst_n,
// ---- control (from HPS bridge, CLOCK2_50 domain) ----
input logic wr_pulse, // toggles when the HPS writes a data word
input logic [29:0] wr_addr, // EMIF byte address (stable when pulse flips)
input logic [31:0] wr_data, // data word (stable when pulse flips)
input logic full_beat, // Ch323 diag: write ALL 8 lanes (wstrb=0xFFFFFFFF) — tests
// full-width commit through THIS arbiter/profile path
// ---- status (emif_clk domain; bridge syncs) ----
output logic busy,
output logic done_toggle, // toggles on each completed write
output logic [31:0] bresp_errs, // count of non-OKAY write responses
// ---- AXI4 write channel to the EMIF user port (emif_clk, 256-bit) ----
output logic [29:0] awaddr,
output logic [1:0] awburst,
output logic [6:0] awid,
output logic [7:0] awlen,
output logic [2:0] awsize,
output logic awvalid,
input logic awready,
output logic [255:0] wdata,
output logic [31:0] wstrb,
output logic wlast,
output logic wvalid,
input logic wready,
input logic [1:0] bresp,
input logic bvalid,
output logic bready
);
assign awburst = 2'b01; // INCR
assign awid = 7'd5; // distinct id: fb-writer/probe ids elsewhere; 5 = wr-probe
assign awlen = 8'd0; // single beat
assign awsize = 3'b101; // 32 bytes (full 256-bit bus)
assign bready = 1'b1;
// CDC: toggle sync CLOCK2_50 -> emif_clk (same as lpddr4b_loader)
reg [2:0] wr_sync;
wire wr_edge = (wr_sync[2] != wr_sync[1]);
reg [29:0] lat_addr;
reg [31:0] lat_wdata;
typedef enum logic [1:0] { S_IDLE, S_AW, S_W, S_B } state_t;
state_t state;
always_ff @(posedge emif_clk or negedge emif_rst_n) begin
if (!emif_rst_n) begin
wr_sync <= 3'd0; lat_addr <= 30'd0; lat_wdata <= 32'd0;
state <= S_IDLE; awaddr <= 30'd0; awvalid <= 1'b0;
wdata <= 256'd0; wstrb <= 32'd0; wlast <= 1'b0; wvalid <= 1'b0;
busy <= 1'b0; done_toggle <= 1'b0; bresp_errs <= 32'd0;
end else begin
wr_sync <= {wr_sync[1:0], wr_pulse};
case (state)
S_IDLE: begin
busy <= 1'b0;
if (wr_edge) begin
lat_addr <= wr_addr;
lat_wdata <= wr_data;
busy <= 1'b1;
awaddr <= {wr_addr[29:5], 5'd0}; // 32-byte aligned beat
awvalid <= 1'b1;
state <= S_AW;
end
end
S_AW: begin
if (awready) begin
awvalid <= 1'b0;
wdata <= 256'd0;
wstrb <= 32'd0;
if (full_beat) begin
// diag: replicate the word across all 8 lanes, full WSTRB.
wdata <= {8{lat_wdata}};
wstrb <= 32'hFFFF_FFFF;
end else
case (lat_addr[4:2]) // place the 32-bit lane + its WSTRB nibble
3'd0: begin wdata[ 31: 0] <= lat_wdata; wstrb[ 3: 0] <= 4'hF; end
3'd1: begin wdata[ 63: 32] <= lat_wdata; wstrb[ 7: 4] <= 4'hF; end
3'd2: begin wdata[ 95: 64] <= lat_wdata; wstrb[11: 8] <= 4'hF; end
3'd3: begin wdata[127: 96] <= lat_wdata; wstrb[15:12] <= 4'hF; end
3'd4: begin wdata[159:128] <= lat_wdata; wstrb[19:16] <= 4'hF; end
3'd5: begin wdata[191:160] <= lat_wdata; wstrb[23:20] <= 4'hF; end
3'd6: begin wdata[223:192] <= lat_wdata; wstrb[27:24] <= 4'hF; end
3'd7: begin wdata[255:224] <= lat_wdata; wstrb[31:28] <= 4'hF; end
endcase
wlast <= 1'b1;
wvalid <= 1'b1;
state <= S_W;
end
end
S_W: begin
if (wready) begin
wvalid <= 1'b0;
wlast <= 1'b0;
state <= S_B;
end
end
S_B: begin
if (bvalid) begin
if (bresp != 2'b00) bresp_errs <= bresp_errs + 32'd1;
busy <= 1'b0;
done_toggle <= ~done_toggle;
state <= S_IDLE;
end
end
default: state <= S_IDLE;
endcase
end
end
endmodule