Files
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

321 lines
11 KiB
Systemverilog

// retroDE_ps2 — iop_exec_stub
//
// First RAM-backed IOP execution primitive. Micro-op fetch now comes
// from IOP RAM through the real `iop_memory_map_stub` CPU-side port —
// the same way a future MIPS-class CPU would fetch instructions. The
// control program is no longer RTL-resident; it lives as data in RAM
// that someone (a TB, eventually a BIOS / loader path) preloads before
// pulsing `go_i`.
//
// NOT a MIPS core, NOT an ISA decoder. A tiny FSM sequencer over a
// five-opcode micro-op ISA, designed as the bridge between "testbench
// choreographs everything" and a real instruction-fetching CPU. When
// the real CPU arrives, it replaces this module but keeps the same
// map / DMA / INTC hookup verbatim.
//
// Contract refs:
// docs/contracts/iop.md (IOP-local programming model)
//
// Opcodes (encoded in word 0 low nibble):
// OP_HALT 0x0 — terminal; halt_o rises, no further accesses.
// OP_WRITE 0x1 — pulse map CPU write with (addr, data). pc++
// OP_READ 0x2 — pulse map CPU read; latch into last_read_data.
// pc++
// OP_WAIT_IRQ 0x3 — block until cpu_irq==1. pc++
// OP_BNE 0x4 — if last_read_data != expected, pc <= target;
// else pc++.
// target is in word1[7:0]; expected is in word2.
//
// Micro-op layout in RAM (16 bytes per op, little-endian word order):
// +0 word 0: {28'd0, opcode[3:0]}
// +4 word 1: addr (for WRITE/READ) or target_pc in low 8 bits (for BNE)
// +8 word 2: data (for WRITE) or expected value (for BNE); unused for
// READ/WAIT_IRQ/HALT
// +12 word 3: reserved for future opcodes
//
// Fetch sequence: three map reads per op (words 0/1/2). Word 3 is
// skipped to save a cycle. Each read has one-cycle latency via the
// map — so a full fetch is ~6 cycles, after which dispatch takes one
// more cycle. Negligible in the current scope; swap the engine for a
// real CPU later and the instruction width stops mattering.
//
// Trace payload (SUBSYS_IOP, EV_IFETCH, emitted on each op completion):
// arg0 = pc value of the op that just completed
// arg1 = opcode
// arg2 = addr (0 for WAIT_IRQ/HALT)
// arg3 = data written, data read back, expected (for BNE), or 0
// flags bit 0 = 1 → write-flavour op
// flags bit 1 = 1 → WAIT_IRQ just exited (IRQ observed)
// flags bit 2 = 1 → HALT entered
// flags bit 3 = 1 → BNE branch taken (pc changed to target, not +1)
`timescale 1ns/1ps
module iop_exec_stub
import trace_pkg::*;
#(
parameter logic [31:0] SCRIPT_BASE = 32'h0000_0400
) (
input logic clk,
input logic rst_n,
input logic go_i,
// Drive the IOP memory map's CPU-side port. Both ifetch reads and
// the script's own WRITE/READ ops flow through here.
output logic map_rd_en,
output logic [31:0] map_rd_addr,
input logic [31:0] map_rd_data,
input logic map_rd_valid,
output logic map_wr_en,
output logic [31:0] map_wr_addr,
output logic [31:0] map_wr_data,
output logic [3:0] map_wr_be,
input logic cpu_irq,
output logic halt_o,
output logic [7:0] pc_o,
output logic ev_valid,
output subsys_e ev_subsys,
output event_e ev_event,
output logic [63:0] ev_arg0,
output logic [63:0] ev_arg1,
output logic [63:0] ev_arg2,
output logic [63:0] ev_arg3,
output logic [31:0] ev_flags
);
localparam logic [3:0] OP_HALT = 4'h0;
localparam logic [3:0] OP_WRITE = 4'h1;
localparam logic [3:0] OP_READ = 4'h2;
localparam logic [3:0] OP_WAIT_IRQ = 4'h3;
localparam logic [3:0] OP_BNE = 4'h4;
typedef enum logic [3:0] {
S_IDLE = 4'd0,
S_IF0_REQ = 4'd1,
S_IF0_WAIT = 4'd2,
S_IF1_REQ = 4'd3,
S_IF1_WAIT = 4'd4,
S_IF2_REQ = 4'd5,
S_IF2_WAIT = 4'd6,
S_DECODE = 4'd7,
S_WRITE = 4'd8,
S_READ_REQ = 4'd9,
S_READ_WAIT = 4'd10,
S_WAIT_IRQ = 4'd11,
S_BNE = 4'd12,
S_HALT = 4'd13
} state_e;
state_e state;
logic [7:0] pc;
logic [3:0] cur_opcode;
logic [31:0] cur_addr;
logic [31:0] cur_data;
logic [31:0] last_read_data;
// Op-completion event triggers (one-cycle pulses)
logic ev_op_done;
logic ev_wait_irq_exit;
logic ev_enter_halt;
logic ev_bne_taken;
// Address for the next ifetch word: SCRIPT_BASE + pc*16 + word_offset
logic [31:0] ifetch_base;
assign ifetch_base = SCRIPT_BASE + {20'd0, pc, 4'd0}; // pc << 4
// ------------------------------------------------------------------
// Map-port drive (combinational on state)
// ------------------------------------------------------------------
always_comb begin
map_wr_en = 1'b0;
map_wr_addr = 32'd0;
map_wr_data = 32'd0;
map_wr_be = 4'd0;
map_rd_en = 1'b0;
map_rd_addr = 32'd0;
case (state)
S_IF0_REQ: begin
map_rd_en = 1'b1;
map_rd_addr = ifetch_base + 32'd0;
end
S_IF1_REQ: begin
map_rd_en = 1'b1;
map_rd_addr = ifetch_base + 32'd4;
end
S_IF2_REQ: begin
map_rd_en = 1'b1;
map_rd_addr = ifetch_base + 32'd8;
end
S_WRITE: begin
map_wr_en = 1'b1;
map_wr_addr = cur_addr;
map_wr_data = cur_data;
map_wr_be = 4'b1111;
end
S_READ_REQ: begin
map_rd_en = 1'b1;
map_rd_addr = cur_addr;
end
default: ;
endcase
end
// ------------------------------------------------------------------
// State machine
// ------------------------------------------------------------------
always_ff @(posedge clk) begin
if (!rst_n) begin
state <= S_IDLE;
pc <= 8'd0;
cur_opcode <= 4'd0;
cur_addr <= 32'd0;
cur_data <= 32'd0;
last_read_data <= 32'd0;
ev_op_done <= 1'b0;
ev_wait_irq_exit <= 1'b0;
ev_enter_halt <= 1'b0;
ev_bne_taken <= 1'b0;
end else begin
ev_op_done <= 1'b0;
ev_wait_irq_exit <= 1'b0;
ev_enter_halt <= 1'b0;
ev_bne_taken <= 1'b0;
case (state)
S_IDLE: begin
if (go_i) begin
pc <= 8'd0;
state <= S_IF0_REQ;
end
end
S_IF0_REQ: state <= S_IF0_WAIT;
S_IF0_WAIT: if (map_rd_valid) begin
cur_opcode <= map_rd_data[3:0];
state <= S_IF1_REQ;
end
S_IF1_REQ: state <= S_IF1_WAIT;
S_IF1_WAIT: if (map_rd_valid) begin
cur_addr <= map_rd_data;
state <= S_IF2_REQ;
end
S_IF2_REQ: state <= S_IF2_WAIT;
S_IF2_WAIT: if (map_rd_valid) begin
cur_data <= map_rd_data;
state <= S_DECODE;
end
S_DECODE: begin
case (cur_opcode)
OP_HALT: begin
state <= S_HALT;
ev_enter_halt <= 1'b1;
end
OP_WRITE: state <= S_WRITE;
OP_READ: state <= S_READ_REQ;
OP_WAIT_IRQ: state <= S_WAIT_IRQ;
OP_BNE: state <= S_BNE;
default: state <= S_HALT; // unknown opcode → safe stop
endcase
end
S_WRITE: begin
pc <= pc + 8'd1;
state <= S_IF0_REQ;
ev_op_done <= 1'b1;
end
S_READ_REQ: state <= S_READ_WAIT;
S_READ_WAIT: if (map_rd_valid) begin
last_read_data <= map_rd_data;
pc <= pc + 8'd1;
state <= S_IF0_REQ;
ev_op_done <= 1'b1;
end
S_WAIT_IRQ: begin
if (cpu_irq) begin
pc <= pc + 8'd1;
state <= S_IF0_REQ;
ev_op_done <= 1'b1;
ev_wait_irq_exit <= 1'b1;
end
end
S_BNE: begin
// target_pc = cur_addr[7:0], expected = cur_data
if (last_read_data != cur_data) begin
pc <= cur_addr[7:0];
ev_bne_taken <= 1'b1;
end else begin
pc <= pc + 8'd1;
end
state <= S_IF0_REQ;
ev_op_done <= 1'b1;
end
S_HALT: state <= S_HALT;
default: state <= S_IDLE;
endcase
end
end
assign halt_o = (state == S_HALT);
assign pc_o = pc;
// ------------------------------------------------------------------
// Trace emission. One event per op completion + one on HALT entry.
// ------------------------------------------------------------------
always_ff @(posedge clk) begin
if (!rst_n) begin
ev_valid <= 1'b0;
ev_subsys <= SUBSYS_IOP;
ev_event <= EV_IFETCH;
ev_arg0 <= 64'd0;
ev_arg1 <= 64'd0;
ev_arg2 <= 64'd0;
ev_arg3 <= 64'd0;
ev_flags <= 32'd0;
end else if (ev_enter_halt) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_IOP;
ev_event <= EV_IFETCH;
ev_arg0 <= {56'd0, pc};
ev_arg1 <= {60'd0, cur_opcode};
ev_arg2 <= 64'd0;
ev_arg3 <= 64'd0;
ev_flags <= 32'h0000_0004; // halt marker
end else if (ev_op_done) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_IOP;
ev_event <= EV_IFETCH;
ev_arg0 <= {56'd0, pc};
ev_arg1 <= {60'd0, cur_opcode};
ev_arg2 <= {32'd0, cur_addr};
ev_arg3 <= (cur_opcode == OP_READ)
? {32'd0, map_rd_data}
: {32'd0, cur_data};
ev_flags <= {28'd0,
ev_bne_taken,
1'b0, // (was halt; halt has its own path above)
ev_wait_irq_exit,
(cur_opcode == OP_WRITE)};
end else begin
ev_valid <= 1'b0;
end
end
endmodule : iop_exec_stub