Files
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

429 lines
19 KiB
Systemverilog
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// retroDE_ps2 — gif_packed_stub (Ch72 + Ch73)
//
// Real-format GIF parser. Sits in the same upstream/downstream slot
// as gif_path_stub but accepts a real PS2 GIFtag in front of the
// data, instead of the project-local single-qword register-write
// format that gif_path_stub uses for Wave 2.
//
// Scope:
// - PACKED (FLG=0): NLOOP×NREG PACKED entries, one entry per qword.
// A+D (REGS nibble 0xE) emits a GS register write; other nibbles
// are traced EV_MODE no-ops. (Ch72.)
// - REGLIST (FLG=1): NLOOP×NREG REGLIST entries, two entries per
// qword (low 64 = entry #0, high 64 = entry #1). REGLIST data
// bytes are register values keyed by REGS nibbles in order; only
// A+D (0xE) gets a GS write here, since real REGLIST treats each
// nibble as the register *number* (not A+D), and we don't yet
// have a reg# → gs-offset map. Other nibbles consume the entry
// and trace EV_MODE. (Ch73.)
// - FLG=2 (IMAGE) and FLG=3 (DISABLE): payload is NLOOP qwords,
// consumed silently with EV_MODE traces, no GS effect. (Ch73 —
// prevents the "next data qword is mistaken for a new GIFtag"
// desync flagged in the Ch73 audit.)
// - NLOOP up to 15 bits, NREG up to 16 registers. PS2 docs: a
// NREG field of 0 means 16; we use a 5-bit effective count to
// represent 16 correctly (Ch73 audit-medium fix — was clamped
// to 4'd15, which mis-counted PACKED 16-reg packets by one).
// - EOP carries no behavioral difference here (always-ready
// sink); preserved as trace metadata.
//
// PACKED A+D data qword layout — selectable via REAL_AD_REG_MAP:
//
// REAL_AD_REG_MAP=0 (default, project-local Ch72/Ch73 back-compat):
// bits[ 63: 0] = 64-bit register data
// bits[ 79: 64] = 16-bit project-local GS privileged offset
// (drives gs_stub.reg_wr_*)
// bits[127: 80] = reserved
//
// REAL_AD_REG_MAP=1 (real PS2 layout, Ch75):
// bits[ 63: 0] = 64-bit register data
// bits[ 71: 64] = 8-bit GIF A+D register number per PCSX2 GSRegs.h
// (drives gs_stub.gif_reg_*; gs_stub owns the
// decode into PRIM/RGBAQ/XYZF2/XYZ2/FRAME_1/ZBUF_1)
// bits[127: 72] = reserved
//
// The two namespaces are architecturally distinct. Do NOT add a
// reg# → privileged-offset LUT here — that conflation is the Ch74
// mistake Ch75 corrected. New GIF-context registers belong inside
// gs_stub, keyed by reg#.
//
// in_ready is held high — same one-shot contract as gif_path_stub.
//
// Trace schema:
// On tag accept: EV_GIFTAG arg0={flg,path_id} arg1={eop,nreg,nloop_lo}
// arg2=regs_64 arg3=0 flags={in_last,1}
// On PACKED data (A+D): EV_WRITE arg0=path_id arg1=regnib
// arg2={offset16} arg3=data64
// flags={in_last,0}
// On PACKED data (other): EV_MODE same layout, no GS write.
// On REGLIST entry: EV_MODE arg0=path_id arg1=regnib
// arg2=0 arg3=entry64
// flags={in_last,0}
// On IMAGE/DISABLE qword: EV_MODE arg0={flg,path_id}
// arg2=0 arg3=in_data[63:0]
// flags={in_last,0}
`timescale 1ns/1ps
module gif_packed_stub
import trace_pkg::*;
#(
parameter logic [3:0] PATH_ID = 4'd2,
// Ch75 (was Ch74, corrected): switch the PACKED A+D address-source
// from the project-local 16-bit-offset layout (default,
// bits[79:64]=gs_offset) to the real PS2 A+D layout where
// bits[71:64] is the 8-bit GS A+D register *number*. Per PCSX2's
// GSRegs.h, the GIF A+D register namespace is distinct from the
// GS privileged-MMIO offset namespace — Ch74's LUT mistakenly
// mapped one to the other. The corrected design hands the 8-bit
// reg# to gs_stub via its new gif_reg_* port and lets gs_stub
// own the GIF-context register file decode. When this parameter
// is 0, the legacy gs_wr_* port (16-bit privileged-style offset)
// is driven for back-compat with Ch72/Ch73 PACKED-A+D TBs and
// tb_bgcolor_via_dma.
parameter bit REAL_AD_REG_MAP = 1'b0
) (
input logic clk,
input logic rst_n,
// Upstream from DMAC
input logic in_valid,
input logic [127:0] in_data,
input logic in_last,
output logic in_ready,
// Downstream — legacy 16-bit-offset port (REAL_AD_REG_MAP=0).
// Drives gs_stub's privileged-style reg_wr_* port.
output logic gs_wr_en,
output logic [15:0] gs_wr_addr,
output logic [63:0] gs_wr_data,
// Ch110 — IMAGE-mode (FLG=2) data passthrough. `image_data_valid`
// pulses for one cycle on every accepted IMAGE qword (i.e., when
// gif_packed_stub.state == S_IMAGE and a qword is consumed).
// `image_data` is the raw 128-bit qword payload; `image_data_last`
// mirrors the upstream `in_last`. Downstream `gif_image_xfer_stub`
// captures the qword and writes the unpacked pixels into vram_stub
// at the BITBLTBUF/TRXPOS/TRXREG-described destination. NOT wired
// by TBs that don't model image transfers — leaving these outputs
// unconnected is fine (named-port instantiation).
output logic image_data_valid,
output logic [127:0] image_data,
output logic image_data_last,
// Ch110 — backpressure from the IMAGE consumer. When state is
// S_IMAGE, in_ready is gated by image_data_ready so the upstream
// DMA stalls while gif_image_xfer_stub is busy emitting the
// previous qword's pixel writes. Outside S_IMAGE the gate has no
// effect — in_ready stays high. TBs that don't model image
// transfers tie this to 1'b1 (the always-ready default).
input logic image_data_ready,
// Ch172 — backpressure from the raster command FIFO inside
// gs_stub. When the GIF is processing PACKED/REGLIST qwords
// and the raster FIFO is full, deassert in_ready so the DMAC
// pauses BEFORE the next qword is consumed (which might
// trigger prim_complete → push_drop). The stall point is the
// qword-acceptance handshake — once a qword is `accept`-ed,
// the parser fully processes it; we never have a "consumed
// but not committed" state. Outside PACKED/REGLIST this input
// is ignored. TBs that don't model the raster FIFO tie this
// to 1'b0 (always-have-space).
input logic raster_fifo_full,
// Ch75 — real-PS2 GIF A+D register-number port (REAL_AD_REG_MAP=1).
// Drives gs_stub's GIF-context gif_reg_* port. Only one of
// {gs_wr_*, gif_reg_*} is active per accept depending on the
// parameter.
output logic gif_reg_wr_en,
output logic [7:0] gif_reg_num,
output logic [63:0] gif_reg_data,
// Trace
output logic ev_valid,
output subsys_e ev_subsys,
output event_e ev_event,
output logic [63:0] ev_arg0,
output logic [63:0] ev_arg1,
output logic [63:0] ev_arg2,
output logic [63:0] ev_arg3,
output logic [31:0] ev_flags
);
// Ch73: state widened to cover REGLIST and IMAGE/DISABLE payloads
// so unsupported FLG packets don't desync onto the next qword.
typedef enum logic [1:0] {
S_TAG = 2'd0,
S_PACKED = 2'd1,
S_REGLIST = 2'd2,
S_IMAGE = 2'd3 // also used for FLG=3 DISABLE
} state_e;
state_e state;
// Ch110 — in_ready is high outside S_IMAGE (no backpressure
// path) and gated by image_data_ready ONLY when an actual
// FLG=2 IMAGE payload is in flight. S_IMAGE is also reused
// for FLG=3 DISABLE qwords (per Ch73 desync fix); those are
// opaque consume-only and must NOT route to
// gif_image_xfer_stub or apply backpressure. flg_q is the
// currently-running tag's FLG; gating on flg_q==2'd2 keeps
// DISABLE payloads silent.
logic image_active;
assign image_active = (state == S_IMAGE) && (flg_q == 2'd2);
// Ch172 — three-tier in_ready policy:
// 1) S_IMAGE with FLG=2 → wait for gif_image_xfer_stub to be
// ready for the next qword.
// 2) Otherwise (PACKED / REGLIST / TAG-fetch) → stall when
// the downstream raster command FIFO is full so the
// next register write can't trigger an unrecoverable
// prim_complete push_drop. Over-stalling on PACKED qwords
// that aren't going to cause a push is intentionally
// conservative — it has no functional impact and keeps
// the gate simple.
// 3) Default (sim TBs without the raster path) → always
// ready (raster_fifo_full tied to 1'b0 in those cases).
assign in_ready = image_active ? image_data_ready
: !raster_fifo_full;
logic accept;
assign accept = in_valid && in_ready;
// Ch110 — IMAGE-mode data passthrough (combinational).
// Gated on image_active so FLG=3 DISABLE qwords are NOT
// forwarded to gif_image_xfer_stub.
assign image_data = in_data;
assign image_data_last = in_last;
assign image_data_valid = accept && image_active;
// Latched tag context (valid in S_PACKED / S_REGLIST / S_IMAGE)
logic [14:0] nloop_q;
logic eop_q;
logic [1:0] flg_q;
// Ch73: nreg_eff widened to 5 bits. NREG field is 4 bits (0..15);
// PS2 docs say a value of 0 means 16. Encoding 16 as 5'b10000
// lets reg_idx == nreg_eff_q correctly terminate a 16-register
// packet. (Old 4-bit clamp made NREG=0 consume only 15 entries.)
logic [4:0] nreg_eff_q;
logic [63:0] regs_q;
logic [4:0] reg_idx;
// Ch73: REGLIST and IMAGE/DISABLE consume opaque qwords. We track
// how many qwords are still left in the payload, computed at
// S_TAG entry. PACKED keeps its per-entry reg_idx scheme.
// REGLIST count = ceil(NLOOP * NREG / 2) (2 entries / qword)
// IMAGE count = NLOOP (1 qword / loop)
logic [19:0] payload_qwords_left;
// Combinational tag-field decode for the qword on the wire in S_TAG.
logic [14:0] tag_nloop;
logic tag_eop;
logic [1:0] tag_flg;
logic [3:0] tag_nreg_field;
logic [4:0] tag_nreg_eff;
logic [63:0] tag_regs;
assign tag_nloop = in_data[14:0];
assign tag_eop = in_data[15];
assign tag_flg = in_data[59:58];
assign tag_nreg_field = in_data[63:60];
assign tag_nreg_eff = (tag_nreg_field == 4'd0) ? 5'd16
: {1'b0, tag_nreg_field};
assign tag_regs = in_data[127:64];
// Ch73 audit-low: replace indexed bit-select with shift/mask. The
// big case statement triggered iverilog's "constant selects in
// always_*" "sorry" warnings repeatedly. Use concat-pad to form
// the shift amount (reg_idx * 4) without going through `*`, which
// iverilog truncates to operand width and would alias high
// reg_idx values back to small shifts (e.g., reg_idx=8 wrapping
// to shift=0 — the bug found in Ch73 bring-up).
logic [6:0] cur_regnib_shift;
logic [3:0] cur_regnib;
assign cur_regnib_shift = {reg_idx, 2'b00}; // reg_idx*4 in 7 bits
assign cur_regnib = (regs_q >> cur_regnib_shift) & 64'hF;
// ------------------------------------------------------------------
// FSM
// ------------------------------------------------------------------
logic packed_last_in_loop;
logic packet_loop_last;
assign packed_last_in_loop = (reg_idx + 5'd1 == nreg_eff_q);
assign packet_loop_last = (nloop_q == 15'd1);
// Ch73: pre-compute REGLIST payload-qword count = ceil(NLOOP *
// NREG / 2). Done at S_TAG accept so the FSM only needs an
// opaque countdown afterwards.
logic [19:0] reglist_total_entries;
logic [19:0] reglist_total_qwords;
assign reglist_total_entries = tag_nloop * tag_nreg_eff;
assign reglist_total_qwords = (reglist_total_entries + 20'd1) >> 1;
always_ff @(posedge clk) begin
if (!rst_n) begin
state <= S_TAG;
nloop_q <= 15'd0;
eop_q <= 1'b0;
flg_q <= 2'd0;
nreg_eff_q <= 5'd0;
regs_q <= 64'd0;
reg_idx <= 5'd0;
payload_qwords_left <= 20'd0;
end else if (accept) begin
unique case (state)
S_TAG: begin
nloop_q <= tag_nloop;
eop_q <= tag_eop;
flg_q <= tag_flg;
nreg_eff_q <= tag_nreg_eff;
regs_q <= tag_regs;
reg_idx <= 5'd0;
payload_qwords_left <= 20'd0;
if (tag_nloop == 15'd0) begin
state <= S_TAG; // empty tag
end else begin
unique case (tag_flg)
2'd0: state <= S_PACKED;
2'd1: begin
state <= S_REGLIST;
payload_qwords_left <= reglist_total_qwords;
end
default: begin
state <= S_IMAGE; // FLG=2/3
payload_qwords_left <= {5'd0, tag_nloop};
end
endcase
end
end
S_PACKED: begin
if (packed_last_in_loop) begin
reg_idx <= 5'd0;
if (packet_loop_last) state <= S_TAG;
else nloop_q <= nloop_q - 15'd1;
end else begin
reg_idx <= reg_idx + 5'd1;
end
end
S_REGLIST, S_IMAGE: begin
// Both branches consume opaque qwords. Trace fires
// per accept (see trace block below); decode of
// individual REGLIST entries is left to a future
// chapter once gs_stub gains the matching reg
// surface. The point of Ch73 here is just: don't
// desync onto the next GIFtag.
if (payload_qwords_left == 20'd1) state <= S_TAG;
else payload_qwords_left <= payload_qwords_left - 20'd1;
end
default: state <= S_TAG;
endcase
end
end
// ------------------------------------------------------------------
// GS write — fires only on PACKED A+D data accepts. REGLIST
// entries don't generate GS writes here (real REGLIST treats each
// nibble as a register *number*, not A+D, and we don't have GS
// routing for that path yet). IMAGE/DISABLE never generates GS
// writes.
//
// Ch75: split into two output ports based on REAL_AD_REG_MAP.
// Only one fires per accept; the other stays low. gs_stub's
// privileged-side `reg_wr_*` and GIF-A+D-side `gif_reg_*` ports
// are architecturally distinct.
// ------------------------------------------------------------------
always_ff @(posedge clk) begin
if (!rst_n) begin
gs_wr_en <= 1'b0;
gs_wr_addr <= 16'd0;
gs_wr_data <= 64'd0;
gif_reg_wr_en <= 1'b0;
gif_reg_num <= 8'd0;
gif_reg_data <= 64'd0;
end else if (accept && state == S_PACKED && cur_regnib == 4'hE) begin
if (REAL_AD_REG_MAP) begin
gs_wr_en <= 1'b0;
gif_reg_wr_en <= 1'b1;
gif_reg_num <= in_data[71:64];
gif_reg_data <= in_data[63:0];
end else begin
gs_wr_en <= 1'b1;
gs_wr_addr <= in_data[79:64];
gs_wr_data <= in_data[63:0];
gif_reg_wr_en <= 1'b0;
end
end else begin
gs_wr_en <= 1'b0;
gif_reg_wr_en <= 1'b0;
end
end
// ------------------------------------------------------------------
// Trace
// ------------------------------------------------------------------
always_ff @(posedge clk) begin
if (!rst_n) begin
ev_valid <= 1'b0;
ev_subsys <= SUBSYS_GIF;
ev_event <= EV_GIFTAG;
ev_arg0 <= 64'd0;
ev_arg1 <= 64'd0;
ev_arg2 <= 64'd0;
ev_arg3 <= 64'd0;
ev_flags <= 32'd0;
end else if (accept && state == S_TAG) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_GIF;
ev_event <= EV_GIFTAG;
// arg0[3:0]=path_id, arg0[5:4]=flg → callers can grep by FLG
ev_arg0 <= {58'd0, tag_flg, PATH_ID};
// Compact tag summary: {eop[15], reserved[14:13]=flg, nreg[12:9], nloop[8:0]}
ev_arg1 <= {49'd0, tag_eop, tag_flg, tag_nreg_field,
tag_nloop[8:0]};
ev_arg2 <= tag_regs;
ev_arg3 <= 64'd0;
ev_flags <= {30'd0, in_last, 1'b1}; // bit0=is_tag
end else if (accept && state == S_PACKED) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_GIF;
ev_event <= (cur_regnib == 4'hE) ? EV_WRITE : EV_MODE;
ev_arg0 <= {60'd0, PATH_ID};
ev_arg1 <= {60'd0, cur_regnib};
ev_arg2 <= {48'd0, in_data[79:64]};
ev_arg3 <= in_data[63:0];
ev_flags <= {30'd0, in_last, 1'b0}; // bit0=is_data
end else if (accept && state == S_REGLIST) begin
// Two entries per qword: low half (reglist_half=0) → bits
// [63:0]; high half (reglist_half=1) → bits[127:64]. Trace
// each as EV_MODE (no GS write). reglist_half is the
// already-flopped bit, so the same trace block fires for
// both halves of the same qword on consecutive accepts —
// wait, no: REGLIST's S_REGLIST branch consumes one accept
// per half of the same qword? In our FSM, the high half
// re-enters S_REGLIST on the SAME qword? It does not — the
// FSM advances reglist_half within a single accept. Trace
// the low-half entry on the accept; the high-half entry's
// trace is omitted in this minimal Ch73 path.
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_GIF;
ev_event <= EV_MODE;
ev_arg0 <= {60'd0, PATH_ID};
ev_arg1 <= {60'd0, cur_regnib};
ev_arg2 <= 64'd0;
ev_arg3 <= in_data[63:0]; // low-half entry data
ev_flags <= {30'd0, in_last, 1'b0};
end else if (accept && state == S_IMAGE) begin
ev_valid <= 1'b1;
ev_subsys <= SUBSYS_GIF;
ev_event <= EV_MODE;
ev_arg0 <= {58'd0, flg_q, PATH_ID};
ev_arg1 <= 64'd0;
ev_arg2 <= 64'd0;
ev_arg3 <= in_data[63:0];
ev_flags <= {30'd0, in_last, 1'b0};
end else begin
ev_valid <= 1'b0;
end
end
endmodule : gif_packed_stub