Files
retroDE_ps2/rtl/iop/iop_dmac_reg_stub.sv
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

320 lines
12 KiB
Systemverilog

// retroDE_ps2 — iop_dmac_reg_stub
//
// IOP DMAC channel 9 (SIF0 IOP→EE) with a real, bounded data path.
// Upgraded from the earlier register+lifecycle shell: MADR is a real
// source pointer into IOP RAM, BCR is a real word count, and the
// state machine pulls 32-bit beats out of IOP RAM through the IOP map
// and emits them on a word-granularity endpoint with ready/valid/last
// handshake. Mirrors the EE DMAC shape (dmac_reg_stub) at 32-bit width.
//
// Contract refs:
// docs/contracts/iop.md (IOP DMAC ownership)
//
// Register surface (per-channel, low-byte offset):
// 0x00 MADR — real source address in IOP physical space
// 0x04 BCR — transfer length in 32-bit beats
// 0x08 CHCR — channel control; bit[0] is the start bit
// 0x0C DONE_COUNT — monotonic completion counter (read-only; writes
// are accepted but ignored). Software reads this
// to distinguish "nth completion" without needing
// to count interrupts externally.
// Other offsets: writes accepted but ignored; reads return 0.
//
// Memory master interface (to iop_memory_map_stub's dma_rd_* port):
// mem_rd_en / mem_rd_addr issue the request (one cycle)
// mem_rd_valid / mem_rd_data return the word one cycle later
// mem_master_id drives the map trace attribution (convention: 4)
//
// Endpoint (to sif_dma_ee_ram_bridge_stub or similar 32-bit sink):
// ep_valid / ep_data[31:0] / ep_last
// ep_ready is the backpressure signal — when low, the state machine
// holds in ACTIVE_SEND with the current beat. No false completion.
//
// State machine:
// IDLE → FETCH_WAIT on CHCR start
// FETCH_WAIT → ACTIVE_SEND on mem_rd_valid (word latched)
// ACTIVE_SEND → FETCH_WAIT on endpoint accept with more beats left
// → DONE on endpoint accept for the final beat
// DONE → IDLE next cycle (clears CHCR.start)
//
// Source stepping: src_addr = madr_latched + (beat_index * 4).
//
// Trace payload schema (SUBSYS_DMAC):
// DMA_CFG arg0=channel arg1=chcr arg2=madr arg3=bcr flags=reg_offset
// DMA_START arg0=channel arg1=bcr arg2=madr arg3=path_id
// DMA_BEAT arg0=channel arg1=beat_index arg2=src_addr arg3=remaining
// DMA_DONE arg0=channel arg1=beats arg2=completion_code arg3=path_id
// completion_code 0 = OK.
`timescale 1ns/1ps
module iop_dmac_reg_stub
import trace_pkg::*;
#(
parameter logic [3:0] CHANNEL = 4'd9, // SIF0 (IOP → EE)
parameter logic [3:0] PATH_ID = 4'd9,
parameter logic [7:0] MASTER_ID = 8'd4 // for dma_rd trace attribution
) (
input logic clk,
input logic rst_n,
// IOP-side register access (from the memory map's iop_dmac_* port)
input logic reg_wr_en,
input logic reg_rd_en,
input logic [3:0] reg_offset,
input logic [31:0] reg_wr_data,
output logic [31:0] reg_rd_data,
output logic reg_rd_valid,
// Memory read master (to iop_memory_map_stub dma_rd_* port)
output logic mem_rd_en,
output logic [31:0] mem_rd_addr,
output logic [7:0] mem_master_id,
input logic [31:0] mem_rd_data,
input logic mem_rd_valid,
// Endpoint (word-granularity stream to SIF egress bridge)
output logic ep_valid,
output logic [31:0] ep_data,
output logic ep_last,
input logic ep_ready,
// Completion pulse — one cycle high when the channel reaches S_DONE.
// Intended as an IOP INTC source; latching is the interrupt
// controller's responsibility.
output logic irq_completion_o,
// Status
output logic busy_o,
output logic [31:0] done_count_o,
// Trace
output logic ev_valid,
output subsys_e ev_subsys,
output event_e ev_event,
output logic [63:0] ev_arg0,
output logic [63:0] ev_arg1,
output logic [63:0] ev_arg2,
output logic [63:0] ev_arg3,
output logic [31:0] ev_flags
);
localparam logic [3:0] MADR_OFFSET = 4'h0;
localparam logic [3:0] BCR_OFFSET = 4'h4;
localparam logic [3:0] CHCR_OFFSET = 4'h8;
localparam logic [3:0] DONE_COUNT_OFFSET = 4'hC;
typedef enum logic [1:0] {
S_IDLE = 2'd0,
S_FETCH_WAIT = 2'd1,
S_ACTIVE_SEND = 2'd2,
S_DONE = 2'd3
} state_e;
logic [31:0] madr;
logic [31:0] bcr;
logic [31:0] chcr;
state_e state;
logic [31:0] madr_latched;
logic [31:0] bcr_latched;
logic [31:0] beat_index;
logic [31:0] beat_payload;
logic start_pulse;
assign start_pulse = reg_wr_en && (reg_offset == CHCR_OFFSET)
&& reg_wr_data[0] && !chcr[0];
// ------------------------------------------------------------------
// Register file
// ------------------------------------------------------------------
always_ff @(posedge clk) begin
if (!rst_n) begin
madr <= 32'd0;
bcr <= 32'd0;
chcr <= 32'd0;
end else begin
if (reg_wr_en) begin
case (reg_offset)
MADR_OFFSET: madr <= reg_wr_data;
BCR_OFFSET: bcr <= reg_wr_data;
CHCR_OFFSET: chcr <= reg_wr_data;
default: ;
endcase
end
if (state == S_DONE) chcr[0] <= 1'b0;
end
end
// ------------------------------------------------------------------
// Register read (1-cycle latency, matches rest of stub ecosystem)
// ------------------------------------------------------------------
always_ff @(posedge clk) begin
if (!rst_n) begin
reg_rd_data <= 32'd0;
reg_rd_valid <= 1'b0;
end else begin
reg_rd_valid <= reg_rd_en;
if (reg_rd_en) begin
case (reg_offset)
MADR_OFFSET: reg_rd_data <= madr;
BCR_OFFSET: reg_rd_data <= bcr;
CHCR_OFFSET: reg_rd_data <= chcr;
DONE_COUNT_OFFSET: reg_rd_data <= done_count_o;
default: reg_rd_data <= 32'd0;
endcase
end
end
end
// ------------------------------------------------------------------
// Transfer state machine
// ------------------------------------------------------------------
logic [31:0] src_addr;
assign src_addr = madr_latched + (beat_index << 2); // 4 bytes/beat
logic beat_accepted;
assign beat_accepted = ep_valid && ep_ready;
// Pulse mem_rd_en for one cycle whenever we first enter FETCH_WAIT.
logic prev_state_fw;
always_ff @(posedge clk) begin
if (!rst_n) prev_state_fw <= 1'b0;
else prev_state_fw <= (state == S_FETCH_WAIT);
end
logic entering_fw;
assign entering_fw = (state == S_FETCH_WAIT) && !prev_state_fw;
assign mem_rd_en = entering_fw;
assign mem_rd_addr = src_addr;
assign mem_master_id = MASTER_ID;
// Drive endpoint only in ACTIVE_SEND with the latched payload.
assign ep_valid = (state == S_ACTIVE_SEND);
assign ep_data = beat_payload;
assign ep_last = (state == S_ACTIVE_SEND) &&
(beat_index + 32'd1 == bcr_latched);
always_ff @(posedge clk) begin
if (!rst_n) begin
state <= S_IDLE;
madr_latched <= 32'd0;
bcr_latched <= 32'd0;
beat_index <= 32'd0;
beat_payload <= 32'd0;
end else begin
unique case (state)
S_IDLE: begin
if (start_pulse) begin
state <= S_FETCH_WAIT;
madr_latched <= madr;
bcr_latched <= bcr;
beat_index <= 32'd0;
end
end
S_FETCH_WAIT: begin
if (mem_rd_valid) begin
beat_payload <= mem_rd_data;
state <= S_ACTIVE_SEND;
end
end
S_ACTIVE_SEND: begin
if (beat_accepted) begin
if (beat_index + 32'd1 == bcr_latched) begin
state <= S_DONE;
end else begin
beat_index <= beat_index + 32'd1;
state <= S_FETCH_WAIT;
end
end
end
S_DONE: begin
state <= S_IDLE;
end
default: state <= S_IDLE;
endcase
end
end
assign busy_o = (state != S_IDLE);
assign irq_completion_o = (state == S_DONE);
// ------------------------------------------------------------------
// Trace emission — one event per cycle. Priority:
// DONE > BEAT > START > CFG (register write)
// ------------------------------------------------------------------
logic prev_in_transfer;
always_ff @(posedge clk) begin
if (!rst_n) prev_in_transfer <= 1'b0;
else prev_in_transfer <= (state != S_IDLE);
end
logic enter_start;
assign enter_start = (state == S_FETCH_WAIT) && !prev_in_transfer;
logic enter_done;
assign enter_done = (state == S_DONE);
always_ff @(posedge clk) begin
if (!rst_n) begin
ev_valid <= 1'b0;
ev_subsys <= SUBSYS_DMAC;
ev_event <= EV_DMA_CFG;
ev_arg0 <= 64'd0;
ev_arg1 <= 64'd0;
ev_arg2 <= 64'd0;
ev_arg3 <= 64'd0;
ev_flags <= 32'd0;
done_count_o <= 32'd0;
end else if (enter_done) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_DMAC;
ev_event <= EV_DMA_DONE;
ev_arg0 <= {60'd0, CHANNEL};
ev_arg1 <= {32'd0, beat_index + 32'd1}; // beats completed
ev_arg2 <= 64'd0; // completion OK
ev_arg3 <= {60'd0, PATH_ID};
ev_flags <= 32'd0;
done_count_o <= done_count_o + 32'd1;
end else if (beat_accepted) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_DMAC;
ev_event <= EV_DMA_BEAT;
ev_arg0 <= {60'd0, CHANNEL};
ev_arg1 <= {32'd0, beat_index};
ev_arg2 <= {32'd0, src_addr};
ev_arg3 <= {32'd0, bcr_latched - beat_index - 32'd1};
ev_flags <= 32'd0;
end else if (enter_start) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_DMAC;
ev_event <= EV_DMA_START;
ev_arg0 <= {60'd0, CHANNEL};
ev_arg1 <= {32'd0, bcr_latched};
ev_arg2 <= {32'd0, madr_latched};
ev_arg3 <= {60'd0, PATH_ID};
ev_flags <= 32'd0;
end else if (reg_wr_en) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_DMAC;
ev_event <= EV_DMA_CFG;
ev_arg0 <= {60'd0, CHANNEL};
ev_arg1 <= {32'd0, (reg_offset == CHCR_OFFSET) ? reg_wr_data : chcr};
ev_arg2 <= {32'd0, (reg_offset == MADR_OFFSET) ? reg_wr_data : madr};
ev_arg3 <= {32'd0, (reg_offset == BCR_OFFSET) ? reg_wr_data : bcr};
ev_flags <= {28'd0, reg_offset};
end else begin
ev_valid <= 1'b0;
end
end
endmodule : iop_dmac_reg_stub