Files
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

166 lines
6.3 KiB
Systemverilog

// retroDE_ps2 — ee_ram_stub
//
// Tiny addressable EE-RAM block for Wave 2.5. Provides the first real memory
// source for DMAC-backed transfers. Not the final 32 MiB main-RAM model —
// see docs/wave25_memory_backed_dma_plan.md for explicit scope.
//
// Contract refs:
// docs/wave25_memory_backed_dma_plan.md (ee_ram_stub scope)
// docs/contracts/memory.md (memory subsystem ownership)
//
// Interface:
// - 128-bit wide data path, qword-aligned addressing (low 4 bits ignored).
// - One-cycle read latency: rd_en on cycle N → rd_data / rd_valid on N+1.
// - Write port has per-byte enables (wr_be[15:0]).
// - Optional `$readmemh` preload via IMAGE_FILE parameter.
//
// Trace:
// Emits MEM READ / MEM WRITE events one cycle after the request, matching
// the existing MEM schema. master_id is a caller-provided input (8 bits);
// the integration TB tags reads as 1 (DMAC) while TB-initiated writes are
// tagged 0. Any downstream master can drive its own id without RAM-side
// changes.
//
// Trace payload:
// MEM READ arg0=addr arg1=data_lo arg2=master_id arg3=region_id
// MEM WRITE arg0=addr arg1=data_lo arg2=master_id arg3=region_id
// master_id : caller-provided (e.g. 0 = TB direct, 1 = DMAC)
// region_id : 1 = EE_RAM (constant for this module)
// flags bit 0: 1 = write, 0 = read
`timescale 1ns/1ps
module ee_ram_stub
import trace_pkg::*;
#(
parameter int SIZE_BYTES = 16 * 1024, // 16 KiB default
parameter string IMAGE_FILE = ""
) (
input logic clk,
input logic rst_n,
// Read port (qword-aligned)
input logic rd_en,
input logic [$clog2(SIZE_BYTES)-1:0] rd_addr,
output logic [127:0] rd_data,
output logic rd_valid,
// Write port (qword-aligned; wr_be provides per-byte granularity)
input logic wr_en,
input logic [$clog2(SIZE_BYTES)-1:0] wr_addr,
input logic [127:0] wr_data,
input logic [15:0] wr_be,
// Optional caller-provided master id for trace attribution. Default tie
// to 8'd0 (TB direct) if the caller doesn't drive; DMAC drives 8'd1.
input logic [7:0] master_id,
// Trace
output logic ev_valid,
output subsys_e ev_subsys,
output event_e ev_event,
output logic [63:0] ev_arg0,
output logic [63:0] ev_arg1,
output logic [63:0] ev_arg2,
output logic [63:0] ev_arg3,
output logic [31:0] ev_flags
);
localparam int ADDR_WIDTH = $clog2(SIZE_BYTES);
localparam int QWORD_COUNT = SIZE_BYTES / 16;
localparam int QW_INDEX_WIDTH = $clog2(QWORD_COUNT);
localparam logic [63:0] REGION_EE_RAM = 64'd1;
logic [127:0] mem [0:QWORD_COUNT-1];
initial begin
if (IMAGE_FILE != "") begin
$display("[ee_ram_stub] loading image: %0s", IMAGE_FILE);
$readmemh(IMAGE_FILE, mem);
end else begin
for (int i = 0; i < QWORD_COUNT; i++) mem[i] = 128'd0;
$display("[ee_ram_stub] zero-initialised (%0d qwords / %0d bytes)",
QWORD_COUNT, SIZE_BYTES);
end
end
logic [QW_INDEX_WIDTH-1:0] rd_qw_idx;
logic [QW_INDEX_WIDTH-1:0] wr_qw_idx;
assign rd_qw_idx = rd_addr[ADDR_WIDTH-1:4];
assign wr_qw_idx = wr_addr[ADDR_WIDTH-1:4];
// ------------------------------------------------------------------
// Read + write (one-cycle latency). Reads and writes to the same
// address in the same cycle are not expected in Wave 2.5; if they
// occur, the read sees pre-write data (standard register-file
// semantics).
// ------------------------------------------------------------------
always_ff @(posedge clk) begin
if (!rst_n) begin
rd_data <= 128'd0;
rd_valid <= 1'b0;
end else begin
rd_valid <= rd_en;
if (rd_en) rd_data <= mem[rd_qw_idx];
if (wr_en) begin
for (int b = 0; b < 16; b++) begin
if (wr_be[b]) mem[wr_qw_idx][b*8 +: 8] <= wr_data[b*8 +: 8];
end
end
end
end
// ------------------------------------------------------------------
// Trace emission: one event per cycle, read wins over write on the
// unlikely same-cycle collision (single-port RAM would not see that
// anyway). Registered so ev_valid lines up with rd_valid / wr_ack
// boundaries.
// ------------------------------------------------------------------
logic [127:0] rd_data_sampled;
always_ff @(posedge clk) begin
if (!rst_n) begin
ev_valid <= 1'b0;
ev_subsys <= SUBSYS_MEM;
ev_event <= EV_READ;
ev_arg0 <= 64'd0;
ev_arg1 <= 64'd0;
ev_arg2 <= 64'd0;
ev_arg3 <= 64'd0;
ev_flags <= 32'd0;
rd_data_sampled <= 128'd0;
end else begin
// The actual fetched data is available one cycle after rd_en.
// Sample it in parallel with rd_data so the trace fires on the
// same edge as rd_valid.
rd_data_sampled <= mem[rd_qw_idx];
if (rd_en) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_MEM;
ev_event <= EV_READ;
ev_arg0 <= {{(64-ADDR_WIDTH){1'b0}}, rd_addr};
ev_arg1 <= mem[rd_qw_idx][63:0];
ev_arg2 <= {56'd0, master_id};
ev_arg3 <= REGION_EE_RAM;
ev_flags <= 32'd0;
end else if (wr_en) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_MEM;
ev_event <= EV_WRITE;
ev_arg0 <= {{(64-ADDR_WIDTH){1'b0}}, wr_addr};
ev_arg1 <= wr_data[63:0];
ev_arg2 <= {56'd0, master_id};
ev_arg3 <= REGION_EE_RAM;
ev_flags <= 32'h0000_0001; // bit 0 = write
end else begin
ev_valid <= 1'b0;
end
end
end
endmodule : ee_ram_stub