Files
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

356 lines
14 KiB
Systemverilog

// retroDE_ps2 — dmac_reg_stub
//
// EE DMAC stub. Channel-agnostic: the module's behaviour is generic across
// PS2 DMA channels and downstream endpoints. The specific channel and path
// id are set via parameters; the downstream endpoint wires (ep_*) are
// valid/data/last/ready regardless of what consumer is connected. Current
// uses: CHANNEL=2 (GIF path), CHANNEL=5 (SIF0 path).
//
// Payload source: memory-backed via the `mem_rd_*` master port, typically
// routed through `ee_memory_map_stub` to `ee_ram_stub`. MADR is the real
// fetch source address.
//
// Contract refs:
// docs/stub_module_plan.md (Wave 2, item 8)
// docs/wave2_dma_gif_plan.md (Wave 2 scope)
// docs/wave25_memory_backed_dma_plan.md (Wave 2.5 scope — THIS REVISION)
// docs/contracts/dmac.md
//
// Register surface (single channel, selected by CHANNEL parameter):
// offset 0x00 CHCR — start bit at [0], other bits recorded
// offset 0x10 MADR — real fetch source address (Wave 2.5)
// offset 0x20 QWC — transfer length in 128-bit qwords (first sign-off
// path requires QWC == 1; state machine is QWC-
// generic for a future Wave 2.6 extension)
// offset 0x30 TADR — recorded for future chain-mode use
// offset 0x40 DONE_COUNT — monotonic completion counter (read-only;
// writes are accepted but ignored). Software reads
// this to distinguish "nth completion" without
// counting interrupts externally. EE-core chapter 4
// addition; mirrors iop_dmac_reg_stub's DONE_COUNT
// but at a new slot (0x0C is occupied on the IOP
// stub; EE stub's 16-byte register spacing puts
// DONE_COUNT at 0x40).
//
// Register reads (EE-core chapter 4, added alongside the original write
// surface): reg_rd_en / reg_rd_data / reg_rd_valid with 1-cycle latency,
// matching the rest of the stub ecosystem. All four config registers plus
// DONE_COUNT are readable; all other offsets return 0.
//
// Memory master interface (to ee_ram_stub in Wave 2.5):
// mem_rd_en / mem_rd_addr drive the request
// mem_rd_valid / mem_rd_data return data one cycle later
//
// Downstream endpoint: ep_{valid,data,last,ready}. The port names are
// channel-agnostic because the DMAC's behaviour is generic across PS2
// channels (ch2 = GIF, ch5 = SIF0, etc.). Connect the endpoint side to
// whichever consumer matches the instantiated CHANNEL/PATH_ID.
//
// State machine:
// IDLE → FETCH_WAIT on CHCR start
// FETCH_WAIT → ACTIVE_SEND on mem_rd_valid (data latched)
// ACTIVE_SEND → FETCH_WAIT on endpoint accept with more beats pending
// → DONE on endpoint accept for the final beat
// DONE → IDLE next cycle (clears CHCR.start)
//
// Trace payload schemas (per wave25_memory_backed_dma_plan.md):
// DMAC DMA_CFG arg0=channel arg1=chcr arg2=madr arg3=qwc
// flags=reg_offset (which reg was written)
// DMAC DMA_START arg0=channel arg1=qwc arg2=MADR arg3=path_id
// DMAC DMA_BEAT arg0=channel arg1=beat arg2=src_addr arg3=remaining
// DMAC DMA_DONE arg0=channel arg1=beats arg2=completion arg3=path_id
// completion code: 0 = OK
`timescale 1ns/1ps
module dmac_reg_stub
import trace_pkg::*;
#(
parameter logic [3:0] CHANNEL = 4'd2,
parameter logic [3:0] PATH_ID = 4'd2
) (
input logic clk,
input logic rst_n,
// CPU / testbench register write port (single-channel, see CHANNEL).
// reg_offset is shared by read and write; callers must not assert both
// enables in the same cycle (the map ensures this because the EE CPU
// emits either rd or wr per transaction, never both).
input logic reg_wr_en,
input logic [7:0] reg_offset,
input logic [31:0] reg_wr_data,
// Register read port (EE-core chapter 4). 1-cycle latency.
input logic reg_rd_en,
output logic [31:0] reg_rd_data,
output logic reg_rd_valid,
// Memory master (Wave 2.5) — direct link to ee_ram_stub in this phase.
// Future waves will route this through ee_memory_map_stub.
output logic mem_rd_en,
output logic [31:0] mem_rd_addr,
input logic [127:0] mem_rd_data,
input logic mem_rd_valid,
// Downstream to gif_path_stub
output logic ep_valid,
output logic [127:0] ep_data,
output logic ep_last,
input logic ep_ready,
// Completion pulse — one cycle high when the transfer reaches S_DONE.
// Intended as an INTC source; level-held bit latching happens in the
// interrupt controller, not here.
output logic irq_completion_o,
// Trace
output logic ev_valid,
output subsys_e ev_subsys,
output event_e ev_event,
output logic [63:0] ev_arg0,
output logic [63:0] ev_arg1,
output logic [63:0] ev_arg2,
output logic [63:0] ev_arg3,
output logic [31:0] ev_flags
);
localparam logic [7:0] CHCR_OFFSET = 8'h00;
localparam logic [7:0] MADR_OFFSET = 8'h10;
localparam logic [7:0] QWC_OFFSET = 8'h20;
localparam logic [7:0] TADR_OFFSET = 8'h30;
localparam logic [7:0] DONE_COUNT_OFFSET = 8'h40;
// ------------------------------------------------------------------
// Register file (ch2 only)
// ------------------------------------------------------------------
logic [31:0] chcr;
logic [31:0] madr;
logic [31:0] qwc;
logic [31:0] tadr;
logic [31:0] done_count;
logic start_pulse;
assign start_pulse = reg_wr_en && (reg_offset == CHCR_OFFSET) &&
reg_wr_data[0] && !chcr[0];
// Single owner for the config regs: software writes win over the
// S_DONE auto-clear on CHCR[0] in the unlikely same-cycle case
// (the NBA queue lets the case-statement full-width assign
// override the partial bit-0 clear). Software writing CHCR while
// the DMA is completing is not part of any sane flow, so this
// ordering is defensive — the point is: chcr has one procedural
// driver, not two.
always_ff @(posedge clk) begin
if (!rst_n) begin
chcr <= 32'd0;
madr <= 32'd0;
qwc <= 32'd0;
tadr <= 32'd0;
end else begin
if (state == S_DONE) chcr[0] <= 1'b0;
if (reg_wr_en) begin
case (reg_offset)
CHCR_OFFSET: chcr <= reg_wr_data;
MADR_OFFSET: madr <= reg_wr_data;
QWC_OFFSET: qwc <= reg_wr_data;
TADR_OFFSET: tadr <= reg_wr_data;
default: ;
endcase
end
end
end
// DONE_COUNT: monotonic completion counter. Increments on S_DONE
// entry. Reset-only clear path; writes at the DONE_COUNT offset are
// silently dropped by the write always_ff above (read-only register).
always_ff @(posedge clk) begin
if (!rst_n) done_count <= 32'd0;
else if (state == S_DONE) done_count <= done_count + 32'd1;
end
// Register read (1-cycle latency, matches rest of stub ecosystem).
always_ff @(posedge clk) begin
if (!rst_n) begin
reg_rd_data <= 32'd0;
reg_rd_valid <= 1'b0;
end else begin
reg_rd_valid <= reg_rd_en;
if (reg_rd_en) begin
case (reg_offset)
CHCR_OFFSET: reg_rd_data <= chcr;
MADR_OFFSET: reg_rd_data <= madr;
QWC_OFFSET: reg_rd_data <= qwc;
TADR_OFFSET: reg_rd_data <= tadr;
DONE_COUNT_OFFSET: reg_rd_data <= done_count;
default: reg_rd_data <= 32'd0;
endcase
end
end
end
// ------------------------------------------------------------------
// Transfer state machine
// ------------------------------------------------------------------
typedef enum logic [1:0] {
S_IDLE = 2'd0,
S_FETCH_WAIT = 2'd1,
S_ACTIVE_SEND = 2'd2,
S_DONE = 2'd3
} state_e;
state_e state;
logic [31:0] madr_latched;
logic [31:0] qwc_latched;
logic [31:0] beat_index;
logic [127:0] beat_payload;
logic [31:0] src_addr;
assign src_addr = madr_latched + (beat_index << 4); // beat * 16 bytes
logic beat_accepted;
assign beat_accepted = ep_valid && ep_ready;
// Pulse mem_rd_en for one cycle whenever we first enter FETCH_WAIT.
logic prev_state_fw;
always_ff @(posedge clk) begin
if (!rst_n) prev_state_fw <= 1'b0;
else prev_state_fw <= (state == S_FETCH_WAIT);
end
logic entering_fw;
assign entering_fw = (state == S_FETCH_WAIT) && !prev_state_fw;
assign mem_rd_en = entering_fw;
assign mem_rd_addr = src_addr;
// Drive endpoint only in ACTIVE_SEND with the latched payload.
assign ep_valid = (state == S_ACTIVE_SEND);
assign ep_data = beat_payload;
assign ep_last = (state == S_ACTIVE_SEND) &&
(beat_index + 32'd1 == qwc_latched);
assign irq_completion_o = (state == S_DONE);
always_ff @(posedge clk) begin
if (!rst_n) begin
state <= S_IDLE;
madr_latched <= 32'd0;
qwc_latched <= 32'd0;
beat_index <= 32'd0;
beat_payload <= 128'd0;
end else begin
unique case (state)
S_IDLE: begin
if (start_pulse) begin
// start_pulse is gated by reg_wr_en && reg_offset ==
// CHCR_OFFSET, so a same-cycle QWC write is
// structurally impossible through this interface.
// Latch the currently-visible register state.
state <= S_FETCH_WAIT;
madr_latched <= madr;
qwc_latched <= qwc;
beat_index <= 32'd0;
end
end
S_FETCH_WAIT: begin
if (mem_rd_valid) begin
beat_payload <= mem_rd_data;
state <= S_ACTIVE_SEND;
end
end
S_ACTIVE_SEND: begin
if (beat_accepted) begin
if (beat_index + 32'd1 == qwc_latched) begin
state <= S_DONE;
end else begin
beat_index <= beat_index + 32'd1;
state <= S_FETCH_WAIT;
end
end
end
S_DONE: begin
state <= S_IDLE;
// chcr[0] auto-clear on S_DONE now lives in the
// register-ownership always_ff above (single
// procedural driver for chcr).
end
default: state <= S_IDLE;
endcase
end
end
// ------------------------------------------------------------------
// Trace emission — one event per cycle; priority:
// DONE pulse > BEAT accept > START on transition > CFG on write
// ------------------------------------------------------------------
logic prev_state_fetch_or_later;
always_ff @(posedge clk) begin
if (!rst_n) prev_state_fetch_or_later <= 1'b0;
else prev_state_fetch_or_later <= (state != S_IDLE);
end
logic enter_start; // transitioning from IDLE into the transfer
assign enter_start = (state == S_FETCH_WAIT) && !prev_state_fetch_or_later;
logic enter_done;
assign enter_done = (state == S_DONE);
always_ff @(posedge clk) begin
if (!rst_n) begin
ev_valid <= 1'b0;
ev_subsys <= SUBSYS_DMAC;
ev_event <= EV_DMA_CFG;
ev_arg0 <= 64'd0;
ev_arg1 <= 64'd0;
ev_arg2 <= 64'd0;
ev_arg3 <= 64'd0;
ev_flags <= 32'd0;
end else if (enter_done) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_DMAC;
ev_event <= EV_DMA_DONE;
ev_arg0 <= {60'd0, CHANNEL};
ev_arg1 <= {32'd0, beat_index + 32'd1}; // beats completed
ev_arg2 <= 64'd0; // completion: OK
ev_arg3 <= {60'd0, PATH_ID};
ev_flags <= 32'd0;
end else if (beat_accepted) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_DMAC;
ev_event <= EV_DMA_BEAT;
ev_arg0 <= {60'd0, CHANNEL};
ev_arg1 <= {32'd0, beat_index};
ev_arg2 <= {32'd0, src_addr}; // this beat's source
ev_arg3 <= {32'd0, qwc_latched - beat_index - 32'd1};
ev_flags <= 32'd0;
end else if (enter_start) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_DMAC;
ev_event <= EV_DMA_START;
ev_arg0 <= {60'd0, CHANNEL};
ev_arg1 <= {32'd0, qwc_latched};
ev_arg2 <= {32'd0, madr_latched}; // MADR is the source
ev_arg3 <= {60'd0, PATH_ID};
ev_flags <= 32'd0;
end else if (reg_wr_en) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_DMAC;
ev_event <= EV_DMA_CFG;
ev_arg0 <= {60'd0, CHANNEL};
ev_arg1 <= {32'd0, (reg_offset == CHCR_OFFSET) ? reg_wr_data : chcr};
ev_arg2 <= {32'd0, (reg_offset == MADR_OFFSET) ? reg_wr_data : madr};
ev_arg3 <= {32'd0, (reg_offset == QWC_OFFSET) ? reg_wr_data : qwc};
ev_flags <= {24'd0, reg_offset};
end else begin
ev_valid <= 1'b0;
end
end
endmodule : dmac_reg_stub