ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
356 lines
14 KiB
Systemverilog
356 lines
14 KiB
Systemverilog
// retroDE_ps2 — dmac_reg_stub
|
|
//
|
|
// EE DMAC stub. Channel-agnostic: the module's behaviour is generic across
|
|
// PS2 DMA channels and downstream endpoints. The specific channel and path
|
|
// id are set via parameters; the downstream endpoint wires (ep_*) are
|
|
// valid/data/last/ready regardless of what consumer is connected. Current
|
|
// uses: CHANNEL=2 (GIF path), CHANNEL=5 (SIF0 path).
|
|
//
|
|
// Payload source: memory-backed via the `mem_rd_*` master port, typically
|
|
// routed through `ee_memory_map_stub` to `ee_ram_stub`. MADR is the real
|
|
// fetch source address.
|
|
//
|
|
// Contract refs:
|
|
// docs/stub_module_plan.md (Wave 2, item 8)
|
|
// docs/wave2_dma_gif_plan.md (Wave 2 scope)
|
|
// docs/wave25_memory_backed_dma_plan.md (Wave 2.5 scope — THIS REVISION)
|
|
// docs/contracts/dmac.md
|
|
//
|
|
// Register surface (single channel, selected by CHANNEL parameter):
|
|
// offset 0x00 CHCR — start bit at [0], other bits recorded
|
|
// offset 0x10 MADR — real fetch source address (Wave 2.5)
|
|
// offset 0x20 QWC — transfer length in 128-bit qwords (first sign-off
|
|
// path requires QWC == 1; state machine is QWC-
|
|
// generic for a future Wave 2.6 extension)
|
|
// offset 0x30 TADR — recorded for future chain-mode use
|
|
// offset 0x40 DONE_COUNT — monotonic completion counter (read-only;
|
|
// writes are accepted but ignored). Software reads
|
|
// this to distinguish "nth completion" without
|
|
// counting interrupts externally. EE-core chapter 4
|
|
// addition; mirrors iop_dmac_reg_stub's DONE_COUNT
|
|
// but at a new slot (0x0C is occupied on the IOP
|
|
// stub; EE stub's 16-byte register spacing puts
|
|
// DONE_COUNT at 0x40).
|
|
//
|
|
// Register reads (EE-core chapter 4, added alongside the original write
|
|
// surface): reg_rd_en / reg_rd_data / reg_rd_valid with 1-cycle latency,
|
|
// matching the rest of the stub ecosystem. All four config registers plus
|
|
// DONE_COUNT are readable; all other offsets return 0.
|
|
//
|
|
// Memory master interface (to ee_ram_stub in Wave 2.5):
|
|
// mem_rd_en / mem_rd_addr drive the request
|
|
// mem_rd_valid / mem_rd_data return data one cycle later
|
|
//
|
|
// Downstream endpoint: ep_{valid,data,last,ready}. The port names are
|
|
// channel-agnostic because the DMAC's behaviour is generic across PS2
|
|
// channels (ch2 = GIF, ch5 = SIF0, etc.). Connect the endpoint side to
|
|
// whichever consumer matches the instantiated CHANNEL/PATH_ID.
|
|
//
|
|
// State machine:
|
|
// IDLE → FETCH_WAIT on CHCR start
|
|
// FETCH_WAIT → ACTIVE_SEND on mem_rd_valid (data latched)
|
|
// ACTIVE_SEND → FETCH_WAIT on endpoint accept with more beats pending
|
|
// → DONE on endpoint accept for the final beat
|
|
// DONE → IDLE next cycle (clears CHCR.start)
|
|
//
|
|
// Trace payload schemas (per wave25_memory_backed_dma_plan.md):
|
|
// DMAC DMA_CFG arg0=channel arg1=chcr arg2=madr arg3=qwc
|
|
// flags=reg_offset (which reg was written)
|
|
// DMAC DMA_START arg0=channel arg1=qwc arg2=MADR arg3=path_id
|
|
// DMAC DMA_BEAT arg0=channel arg1=beat arg2=src_addr arg3=remaining
|
|
// DMAC DMA_DONE arg0=channel arg1=beats arg2=completion arg3=path_id
|
|
// completion code: 0 = OK
|
|
|
|
`timescale 1ns/1ps
|
|
|
|
module dmac_reg_stub
|
|
import trace_pkg::*;
|
|
#(
|
|
parameter logic [3:0] CHANNEL = 4'd2,
|
|
parameter logic [3:0] PATH_ID = 4'd2
|
|
) (
|
|
input logic clk,
|
|
input logic rst_n,
|
|
|
|
// CPU / testbench register write port (single-channel, see CHANNEL).
|
|
// reg_offset is shared by read and write; callers must not assert both
|
|
// enables in the same cycle (the map ensures this because the EE CPU
|
|
// emits either rd or wr per transaction, never both).
|
|
input logic reg_wr_en,
|
|
input logic [7:0] reg_offset,
|
|
input logic [31:0] reg_wr_data,
|
|
|
|
// Register read port (EE-core chapter 4). 1-cycle latency.
|
|
input logic reg_rd_en,
|
|
output logic [31:0] reg_rd_data,
|
|
output logic reg_rd_valid,
|
|
|
|
// Memory master (Wave 2.5) — direct link to ee_ram_stub in this phase.
|
|
// Future waves will route this through ee_memory_map_stub.
|
|
output logic mem_rd_en,
|
|
output logic [31:0] mem_rd_addr,
|
|
input logic [127:0] mem_rd_data,
|
|
input logic mem_rd_valid,
|
|
|
|
// Downstream to gif_path_stub
|
|
output logic ep_valid,
|
|
output logic [127:0] ep_data,
|
|
output logic ep_last,
|
|
input logic ep_ready,
|
|
|
|
// Completion pulse — one cycle high when the transfer reaches S_DONE.
|
|
// Intended as an INTC source; level-held bit latching happens in the
|
|
// interrupt controller, not here.
|
|
output logic irq_completion_o,
|
|
|
|
// Trace
|
|
output logic ev_valid,
|
|
output subsys_e ev_subsys,
|
|
output event_e ev_event,
|
|
output logic [63:0] ev_arg0,
|
|
output logic [63:0] ev_arg1,
|
|
output logic [63:0] ev_arg2,
|
|
output logic [63:0] ev_arg3,
|
|
output logic [31:0] ev_flags
|
|
);
|
|
|
|
localparam logic [7:0] CHCR_OFFSET = 8'h00;
|
|
localparam logic [7:0] MADR_OFFSET = 8'h10;
|
|
localparam logic [7:0] QWC_OFFSET = 8'h20;
|
|
localparam logic [7:0] TADR_OFFSET = 8'h30;
|
|
localparam logic [7:0] DONE_COUNT_OFFSET = 8'h40;
|
|
|
|
// ------------------------------------------------------------------
|
|
// Register file (ch2 only)
|
|
// ------------------------------------------------------------------
|
|
|
|
logic [31:0] chcr;
|
|
logic [31:0] madr;
|
|
logic [31:0] qwc;
|
|
logic [31:0] tadr;
|
|
logic [31:0] done_count;
|
|
|
|
logic start_pulse;
|
|
assign start_pulse = reg_wr_en && (reg_offset == CHCR_OFFSET) &&
|
|
reg_wr_data[0] && !chcr[0];
|
|
|
|
// Single owner for the config regs: software writes win over the
|
|
// S_DONE auto-clear on CHCR[0] in the unlikely same-cycle case
|
|
// (the NBA queue lets the case-statement full-width assign
|
|
// override the partial bit-0 clear). Software writing CHCR while
|
|
// the DMA is completing is not part of any sane flow, so this
|
|
// ordering is defensive — the point is: chcr has one procedural
|
|
// driver, not two.
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
chcr <= 32'd0;
|
|
madr <= 32'd0;
|
|
qwc <= 32'd0;
|
|
tadr <= 32'd0;
|
|
end else begin
|
|
if (state == S_DONE) chcr[0] <= 1'b0;
|
|
if (reg_wr_en) begin
|
|
case (reg_offset)
|
|
CHCR_OFFSET: chcr <= reg_wr_data;
|
|
MADR_OFFSET: madr <= reg_wr_data;
|
|
QWC_OFFSET: qwc <= reg_wr_data;
|
|
TADR_OFFSET: tadr <= reg_wr_data;
|
|
default: ;
|
|
endcase
|
|
end
|
|
end
|
|
end
|
|
|
|
// DONE_COUNT: monotonic completion counter. Increments on S_DONE
|
|
// entry. Reset-only clear path; writes at the DONE_COUNT offset are
|
|
// silently dropped by the write always_ff above (read-only register).
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) done_count <= 32'd0;
|
|
else if (state == S_DONE) done_count <= done_count + 32'd1;
|
|
end
|
|
|
|
// Register read (1-cycle latency, matches rest of stub ecosystem).
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
reg_rd_data <= 32'd0;
|
|
reg_rd_valid <= 1'b0;
|
|
end else begin
|
|
reg_rd_valid <= reg_rd_en;
|
|
if (reg_rd_en) begin
|
|
case (reg_offset)
|
|
CHCR_OFFSET: reg_rd_data <= chcr;
|
|
MADR_OFFSET: reg_rd_data <= madr;
|
|
QWC_OFFSET: reg_rd_data <= qwc;
|
|
TADR_OFFSET: reg_rd_data <= tadr;
|
|
DONE_COUNT_OFFSET: reg_rd_data <= done_count;
|
|
default: reg_rd_data <= 32'd0;
|
|
endcase
|
|
end
|
|
end
|
|
end
|
|
|
|
// ------------------------------------------------------------------
|
|
// Transfer state machine
|
|
// ------------------------------------------------------------------
|
|
|
|
typedef enum logic [1:0] {
|
|
S_IDLE = 2'd0,
|
|
S_FETCH_WAIT = 2'd1,
|
|
S_ACTIVE_SEND = 2'd2,
|
|
S_DONE = 2'd3
|
|
} state_e;
|
|
|
|
state_e state;
|
|
logic [31:0] madr_latched;
|
|
logic [31:0] qwc_latched;
|
|
logic [31:0] beat_index;
|
|
logic [127:0] beat_payload;
|
|
|
|
logic [31:0] src_addr;
|
|
assign src_addr = madr_latched + (beat_index << 4); // beat * 16 bytes
|
|
|
|
logic beat_accepted;
|
|
assign beat_accepted = ep_valid && ep_ready;
|
|
|
|
// Pulse mem_rd_en for one cycle whenever we first enter FETCH_WAIT.
|
|
logic prev_state_fw;
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) prev_state_fw <= 1'b0;
|
|
else prev_state_fw <= (state == S_FETCH_WAIT);
|
|
end
|
|
logic entering_fw;
|
|
assign entering_fw = (state == S_FETCH_WAIT) && !prev_state_fw;
|
|
|
|
assign mem_rd_en = entering_fw;
|
|
assign mem_rd_addr = src_addr;
|
|
|
|
// Drive endpoint only in ACTIVE_SEND with the latched payload.
|
|
assign ep_valid = (state == S_ACTIVE_SEND);
|
|
assign ep_data = beat_payload;
|
|
assign ep_last = (state == S_ACTIVE_SEND) &&
|
|
(beat_index + 32'd1 == qwc_latched);
|
|
|
|
assign irq_completion_o = (state == S_DONE);
|
|
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
state <= S_IDLE;
|
|
madr_latched <= 32'd0;
|
|
qwc_latched <= 32'd0;
|
|
beat_index <= 32'd0;
|
|
beat_payload <= 128'd0;
|
|
end else begin
|
|
unique case (state)
|
|
S_IDLE: begin
|
|
if (start_pulse) begin
|
|
// start_pulse is gated by reg_wr_en && reg_offset ==
|
|
// CHCR_OFFSET, so a same-cycle QWC write is
|
|
// structurally impossible through this interface.
|
|
// Latch the currently-visible register state.
|
|
state <= S_FETCH_WAIT;
|
|
madr_latched <= madr;
|
|
qwc_latched <= qwc;
|
|
beat_index <= 32'd0;
|
|
end
|
|
end
|
|
|
|
S_FETCH_WAIT: begin
|
|
if (mem_rd_valid) begin
|
|
beat_payload <= mem_rd_data;
|
|
state <= S_ACTIVE_SEND;
|
|
end
|
|
end
|
|
|
|
S_ACTIVE_SEND: begin
|
|
if (beat_accepted) begin
|
|
if (beat_index + 32'd1 == qwc_latched) begin
|
|
state <= S_DONE;
|
|
end else begin
|
|
beat_index <= beat_index + 32'd1;
|
|
state <= S_FETCH_WAIT;
|
|
end
|
|
end
|
|
end
|
|
|
|
S_DONE: begin
|
|
state <= S_IDLE;
|
|
// chcr[0] auto-clear on S_DONE now lives in the
|
|
// register-ownership always_ff above (single
|
|
// procedural driver for chcr).
|
|
end
|
|
|
|
default: state <= S_IDLE;
|
|
endcase
|
|
end
|
|
end
|
|
|
|
// ------------------------------------------------------------------
|
|
// Trace emission — one event per cycle; priority:
|
|
// DONE pulse > BEAT accept > START on transition > CFG on write
|
|
// ------------------------------------------------------------------
|
|
|
|
logic prev_state_fetch_or_later;
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) prev_state_fetch_or_later <= 1'b0;
|
|
else prev_state_fetch_or_later <= (state != S_IDLE);
|
|
end
|
|
|
|
logic enter_start; // transitioning from IDLE into the transfer
|
|
assign enter_start = (state == S_FETCH_WAIT) && !prev_state_fetch_or_later;
|
|
|
|
logic enter_done;
|
|
assign enter_done = (state == S_DONE);
|
|
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
ev_valid <= 1'b0;
|
|
ev_subsys <= SUBSYS_DMAC;
|
|
ev_event <= EV_DMA_CFG;
|
|
ev_arg0 <= 64'd0;
|
|
ev_arg1 <= 64'd0;
|
|
ev_arg2 <= 64'd0;
|
|
ev_arg3 <= 64'd0;
|
|
ev_flags <= 32'd0;
|
|
end else if (enter_done) begin
|
|
ev_valid <= 1'b1;
|
|
ev_subsys <= SUBSYS_DMAC;
|
|
ev_event <= EV_DMA_DONE;
|
|
ev_arg0 <= {60'd0, CHANNEL};
|
|
ev_arg1 <= {32'd0, beat_index + 32'd1}; // beats completed
|
|
ev_arg2 <= 64'd0; // completion: OK
|
|
ev_arg3 <= {60'd0, PATH_ID};
|
|
ev_flags <= 32'd0;
|
|
end else if (beat_accepted) begin
|
|
ev_valid <= 1'b1;
|
|
ev_subsys <= SUBSYS_DMAC;
|
|
ev_event <= EV_DMA_BEAT;
|
|
ev_arg0 <= {60'd0, CHANNEL};
|
|
ev_arg1 <= {32'd0, beat_index};
|
|
ev_arg2 <= {32'd0, src_addr}; // this beat's source
|
|
ev_arg3 <= {32'd0, qwc_latched - beat_index - 32'd1};
|
|
ev_flags <= 32'd0;
|
|
end else if (enter_start) begin
|
|
ev_valid <= 1'b1;
|
|
ev_subsys <= SUBSYS_DMAC;
|
|
ev_event <= EV_DMA_START;
|
|
ev_arg0 <= {60'd0, CHANNEL};
|
|
ev_arg1 <= {32'd0, qwc_latched};
|
|
ev_arg2 <= {32'd0, madr_latched}; // MADR is the source
|
|
ev_arg3 <= {60'd0, PATH_ID};
|
|
ev_flags <= 32'd0;
|
|
end else if (reg_wr_en) begin
|
|
ev_valid <= 1'b1;
|
|
ev_subsys <= SUBSYS_DMAC;
|
|
ev_event <= EV_DMA_CFG;
|
|
ev_arg0 <= {60'd0, CHANNEL};
|
|
ev_arg1 <= {32'd0, (reg_offset == CHCR_OFFSET) ? reg_wr_data : chcr};
|
|
ev_arg2 <= {32'd0, (reg_offset == MADR_OFFSET) ? reg_wr_data : madr};
|
|
ev_arg3 <= {32'd0, (reg_offset == QWC_OFFSET) ? reg_wr_data : qwc};
|
|
ev_flags <= {24'd0, reg_offset};
|
|
end else begin
|
|
ev_valid <= 1'b0;
|
|
end
|
|
end
|
|
|
|
endmodule : dmac_reg_stub
|