ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
321 lines
11 KiB
Systemverilog
321 lines
11 KiB
Systemverilog
// retroDE_ps2 — iop_exec_stub
|
|
//
|
|
// First RAM-backed IOP execution primitive. Micro-op fetch now comes
|
|
// from IOP RAM through the real `iop_memory_map_stub` CPU-side port —
|
|
// the same way a future MIPS-class CPU would fetch instructions. The
|
|
// control program is no longer RTL-resident; it lives as data in RAM
|
|
// that someone (a TB, eventually a BIOS / loader path) preloads before
|
|
// pulsing `go_i`.
|
|
//
|
|
// NOT a MIPS core, NOT an ISA decoder. A tiny FSM sequencer over a
|
|
// five-opcode micro-op ISA, designed as the bridge between "testbench
|
|
// choreographs everything" and a real instruction-fetching CPU. When
|
|
// the real CPU arrives, it replaces this module but keeps the same
|
|
// map / DMA / INTC hookup verbatim.
|
|
//
|
|
// Contract refs:
|
|
// docs/contracts/iop.md (IOP-local programming model)
|
|
//
|
|
// Opcodes (encoded in word 0 low nibble):
|
|
// OP_HALT 0x0 — terminal; halt_o rises, no further accesses.
|
|
// OP_WRITE 0x1 — pulse map CPU write with (addr, data). pc++
|
|
// OP_READ 0x2 — pulse map CPU read; latch into last_read_data.
|
|
// pc++
|
|
// OP_WAIT_IRQ 0x3 — block until cpu_irq==1. pc++
|
|
// OP_BNE 0x4 — if last_read_data != expected, pc <= target;
|
|
// else pc++.
|
|
// target is in word1[7:0]; expected is in word2.
|
|
//
|
|
// Micro-op layout in RAM (16 bytes per op, little-endian word order):
|
|
// +0 word 0: {28'd0, opcode[3:0]}
|
|
// +4 word 1: addr (for WRITE/READ) or target_pc in low 8 bits (for BNE)
|
|
// +8 word 2: data (for WRITE) or expected value (for BNE); unused for
|
|
// READ/WAIT_IRQ/HALT
|
|
// +12 word 3: reserved for future opcodes
|
|
//
|
|
// Fetch sequence: three map reads per op (words 0/1/2). Word 3 is
|
|
// skipped to save a cycle. Each read has one-cycle latency via the
|
|
// map — so a full fetch is ~6 cycles, after which dispatch takes one
|
|
// more cycle. Negligible in the current scope; swap the engine for a
|
|
// real CPU later and the instruction width stops mattering.
|
|
//
|
|
// Trace payload (SUBSYS_IOP, EV_IFETCH, emitted on each op completion):
|
|
// arg0 = pc value of the op that just completed
|
|
// arg1 = opcode
|
|
// arg2 = addr (0 for WAIT_IRQ/HALT)
|
|
// arg3 = data written, data read back, expected (for BNE), or 0
|
|
// flags bit 0 = 1 → write-flavour op
|
|
// flags bit 1 = 1 → WAIT_IRQ just exited (IRQ observed)
|
|
// flags bit 2 = 1 → HALT entered
|
|
// flags bit 3 = 1 → BNE branch taken (pc changed to target, not +1)
|
|
|
|
`timescale 1ns/1ps
|
|
|
|
module iop_exec_stub
|
|
import trace_pkg::*;
|
|
#(
|
|
parameter logic [31:0] SCRIPT_BASE = 32'h0000_0400
|
|
) (
|
|
input logic clk,
|
|
input logic rst_n,
|
|
|
|
input logic go_i,
|
|
|
|
// Drive the IOP memory map's CPU-side port. Both ifetch reads and
|
|
// the script's own WRITE/READ ops flow through here.
|
|
output logic map_rd_en,
|
|
output logic [31:0] map_rd_addr,
|
|
input logic [31:0] map_rd_data,
|
|
input logic map_rd_valid,
|
|
|
|
output logic map_wr_en,
|
|
output logic [31:0] map_wr_addr,
|
|
output logic [31:0] map_wr_data,
|
|
output logic [3:0] map_wr_be,
|
|
|
|
input logic cpu_irq,
|
|
|
|
output logic halt_o,
|
|
output logic [7:0] pc_o,
|
|
|
|
output logic ev_valid,
|
|
output subsys_e ev_subsys,
|
|
output event_e ev_event,
|
|
output logic [63:0] ev_arg0,
|
|
output logic [63:0] ev_arg1,
|
|
output logic [63:0] ev_arg2,
|
|
output logic [63:0] ev_arg3,
|
|
output logic [31:0] ev_flags
|
|
);
|
|
|
|
localparam logic [3:0] OP_HALT = 4'h0;
|
|
localparam logic [3:0] OP_WRITE = 4'h1;
|
|
localparam logic [3:0] OP_READ = 4'h2;
|
|
localparam logic [3:0] OP_WAIT_IRQ = 4'h3;
|
|
localparam logic [3:0] OP_BNE = 4'h4;
|
|
|
|
typedef enum logic [3:0] {
|
|
S_IDLE = 4'd0,
|
|
S_IF0_REQ = 4'd1,
|
|
S_IF0_WAIT = 4'd2,
|
|
S_IF1_REQ = 4'd3,
|
|
S_IF1_WAIT = 4'd4,
|
|
S_IF2_REQ = 4'd5,
|
|
S_IF2_WAIT = 4'd6,
|
|
S_DECODE = 4'd7,
|
|
S_WRITE = 4'd8,
|
|
S_READ_REQ = 4'd9,
|
|
S_READ_WAIT = 4'd10,
|
|
S_WAIT_IRQ = 4'd11,
|
|
S_BNE = 4'd12,
|
|
S_HALT = 4'd13
|
|
} state_e;
|
|
|
|
state_e state;
|
|
logic [7:0] pc;
|
|
logic [3:0] cur_opcode;
|
|
logic [31:0] cur_addr;
|
|
logic [31:0] cur_data;
|
|
logic [31:0] last_read_data;
|
|
|
|
// Op-completion event triggers (one-cycle pulses)
|
|
logic ev_op_done;
|
|
logic ev_wait_irq_exit;
|
|
logic ev_enter_halt;
|
|
logic ev_bne_taken;
|
|
|
|
// Address for the next ifetch word: SCRIPT_BASE + pc*16 + word_offset
|
|
logic [31:0] ifetch_base;
|
|
assign ifetch_base = SCRIPT_BASE + {20'd0, pc, 4'd0}; // pc << 4
|
|
|
|
// ------------------------------------------------------------------
|
|
// Map-port drive (combinational on state)
|
|
// ------------------------------------------------------------------
|
|
|
|
always_comb begin
|
|
map_wr_en = 1'b0;
|
|
map_wr_addr = 32'd0;
|
|
map_wr_data = 32'd0;
|
|
map_wr_be = 4'd0;
|
|
map_rd_en = 1'b0;
|
|
map_rd_addr = 32'd0;
|
|
|
|
case (state)
|
|
S_IF0_REQ: begin
|
|
map_rd_en = 1'b1;
|
|
map_rd_addr = ifetch_base + 32'd0;
|
|
end
|
|
S_IF1_REQ: begin
|
|
map_rd_en = 1'b1;
|
|
map_rd_addr = ifetch_base + 32'd4;
|
|
end
|
|
S_IF2_REQ: begin
|
|
map_rd_en = 1'b1;
|
|
map_rd_addr = ifetch_base + 32'd8;
|
|
end
|
|
S_WRITE: begin
|
|
map_wr_en = 1'b1;
|
|
map_wr_addr = cur_addr;
|
|
map_wr_data = cur_data;
|
|
map_wr_be = 4'b1111;
|
|
end
|
|
S_READ_REQ: begin
|
|
map_rd_en = 1'b1;
|
|
map_rd_addr = cur_addr;
|
|
end
|
|
default: ;
|
|
endcase
|
|
end
|
|
|
|
// ------------------------------------------------------------------
|
|
// State machine
|
|
// ------------------------------------------------------------------
|
|
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
state <= S_IDLE;
|
|
pc <= 8'd0;
|
|
cur_opcode <= 4'd0;
|
|
cur_addr <= 32'd0;
|
|
cur_data <= 32'd0;
|
|
last_read_data <= 32'd0;
|
|
ev_op_done <= 1'b0;
|
|
ev_wait_irq_exit <= 1'b0;
|
|
ev_enter_halt <= 1'b0;
|
|
ev_bne_taken <= 1'b0;
|
|
end else begin
|
|
ev_op_done <= 1'b0;
|
|
ev_wait_irq_exit <= 1'b0;
|
|
ev_enter_halt <= 1'b0;
|
|
ev_bne_taken <= 1'b0;
|
|
|
|
case (state)
|
|
S_IDLE: begin
|
|
if (go_i) begin
|
|
pc <= 8'd0;
|
|
state <= S_IF0_REQ;
|
|
end
|
|
end
|
|
|
|
S_IF0_REQ: state <= S_IF0_WAIT;
|
|
S_IF0_WAIT: if (map_rd_valid) begin
|
|
cur_opcode <= map_rd_data[3:0];
|
|
state <= S_IF1_REQ;
|
|
end
|
|
|
|
S_IF1_REQ: state <= S_IF1_WAIT;
|
|
S_IF1_WAIT: if (map_rd_valid) begin
|
|
cur_addr <= map_rd_data;
|
|
state <= S_IF2_REQ;
|
|
end
|
|
|
|
S_IF2_REQ: state <= S_IF2_WAIT;
|
|
S_IF2_WAIT: if (map_rd_valid) begin
|
|
cur_data <= map_rd_data;
|
|
state <= S_DECODE;
|
|
end
|
|
|
|
S_DECODE: begin
|
|
case (cur_opcode)
|
|
OP_HALT: begin
|
|
state <= S_HALT;
|
|
ev_enter_halt <= 1'b1;
|
|
end
|
|
OP_WRITE: state <= S_WRITE;
|
|
OP_READ: state <= S_READ_REQ;
|
|
OP_WAIT_IRQ: state <= S_WAIT_IRQ;
|
|
OP_BNE: state <= S_BNE;
|
|
default: state <= S_HALT; // unknown opcode → safe stop
|
|
endcase
|
|
end
|
|
|
|
S_WRITE: begin
|
|
pc <= pc + 8'd1;
|
|
state <= S_IF0_REQ;
|
|
ev_op_done <= 1'b1;
|
|
end
|
|
|
|
S_READ_REQ: state <= S_READ_WAIT;
|
|
S_READ_WAIT: if (map_rd_valid) begin
|
|
last_read_data <= map_rd_data;
|
|
pc <= pc + 8'd1;
|
|
state <= S_IF0_REQ;
|
|
ev_op_done <= 1'b1;
|
|
end
|
|
|
|
S_WAIT_IRQ: begin
|
|
if (cpu_irq) begin
|
|
pc <= pc + 8'd1;
|
|
state <= S_IF0_REQ;
|
|
ev_op_done <= 1'b1;
|
|
ev_wait_irq_exit <= 1'b1;
|
|
end
|
|
end
|
|
|
|
S_BNE: begin
|
|
// target_pc = cur_addr[7:0], expected = cur_data
|
|
if (last_read_data != cur_data) begin
|
|
pc <= cur_addr[7:0];
|
|
ev_bne_taken <= 1'b1;
|
|
end else begin
|
|
pc <= pc + 8'd1;
|
|
end
|
|
state <= S_IF0_REQ;
|
|
ev_op_done <= 1'b1;
|
|
end
|
|
|
|
S_HALT: state <= S_HALT;
|
|
|
|
default: state <= S_IDLE;
|
|
endcase
|
|
end
|
|
end
|
|
|
|
assign halt_o = (state == S_HALT);
|
|
assign pc_o = pc;
|
|
|
|
// ------------------------------------------------------------------
|
|
// Trace emission. One event per op completion + one on HALT entry.
|
|
// ------------------------------------------------------------------
|
|
|
|
always_ff @(posedge clk) begin
|
|
if (!rst_n) begin
|
|
ev_valid <= 1'b0;
|
|
ev_subsys <= SUBSYS_IOP;
|
|
ev_event <= EV_IFETCH;
|
|
ev_arg0 <= 64'd0;
|
|
ev_arg1 <= 64'd0;
|
|
ev_arg2 <= 64'd0;
|
|
ev_arg3 <= 64'd0;
|
|
ev_flags <= 32'd0;
|
|
end else if (ev_enter_halt) begin
|
|
ev_valid <= 1'b1;
|
|
ev_subsys <= SUBSYS_IOP;
|
|
ev_event <= EV_IFETCH;
|
|
ev_arg0 <= {56'd0, pc};
|
|
ev_arg1 <= {60'd0, cur_opcode};
|
|
ev_arg2 <= 64'd0;
|
|
ev_arg3 <= 64'd0;
|
|
ev_flags <= 32'h0000_0004; // halt marker
|
|
end else if (ev_op_done) begin
|
|
ev_valid <= 1'b1;
|
|
ev_subsys <= SUBSYS_IOP;
|
|
ev_event <= EV_IFETCH;
|
|
ev_arg0 <= {56'd0, pc};
|
|
ev_arg1 <= {60'd0, cur_opcode};
|
|
ev_arg2 <= {32'd0, cur_addr};
|
|
ev_arg3 <= (cur_opcode == OP_READ)
|
|
? {32'd0, map_rd_data}
|
|
: {32'd0, cur_data};
|
|
ev_flags <= {28'd0,
|
|
ev_bne_taken,
|
|
1'b0, // (was halt; halt has its own path above)
|
|
ev_wait_irq_exit,
|
|
(cur_opcode == OP_WRITE)};
|
|
end else begin
|
|
ev_valid <= 1'b0;
|
|
end
|
|
end
|
|
|
|
endmodule : iop_exec_stub
|