ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
429 lines
19 KiB
Systemverilog
429 lines
19 KiB
Systemverilog
// retroDE_ps2 — gif_packed_stub (Ch72 + Ch73)
|
||
//
|
||
// Real-format GIF parser. Sits in the same upstream/downstream slot
|
||
// as gif_path_stub but accepts a real PS2 GIFtag in front of the
|
||
// data, instead of the project-local single-qword register-write
|
||
// format that gif_path_stub uses for Wave 2.
|
||
//
|
||
// Scope:
|
||
// - PACKED (FLG=0): NLOOP×NREG PACKED entries, one entry per qword.
|
||
// A+D (REGS nibble 0xE) emits a GS register write; other nibbles
|
||
// are traced EV_MODE no-ops. (Ch72.)
|
||
// - REGLIST (FLG=1): NLOOP×NREG REGLIST entries, two entries per
|
||
// qword (low 64 = entry #0, high 64 = entry #1). REGLIST data
|
||
// bytes are register values keyed by REGS nibbles in order; only
|
||
// A+D (0xE) gets a GS write here, since real REGLIST treats each
|
||
// nibble as the register *number* (not A+D), and we don't yet
|
||
// have a reg# → gs-offset map. Other nibbles consume the entry
|
||
// and trace EV_MODE. (Ch73.)
|
||
// - FLG=2 (IMAGE) and FLG=3 (DISABLE): payload is NLOOP qwords,
|
||
// consumed silently with EV_MODE traces, no GS effect. (Ch73 —
|
||
// prevents the "next data qword is mistaken for a new GIFtag"
|
||
// desync flagged in the Ch73 audit.)
|
||
// - NLOOP up to 15 bits, NREG up to 16 registers. PS2 docs: a
|
||
// NREG field of 0 means 16; we use a 5-bit effective count to
|
||
// represent 16 correctly (Ch73 audit-medium fix — was clamped
|
||
// to 4'd15, which mis-counted PACKED 16-reg packets by one).
|
||
// - EOP carries no behavioral difference here (always-ready
|
||
// sink); preserved as trace metadata.
|
||
//
|
||
// PACKED A+D data qword layout — selectable via REAL_AD_REG_MAP:
|
||
//
|
||
// REAL_AD_REG_MAP=0 (default, project-local Ch72/Ch73 back-compat):
|
||
// bits[ 63: 0] = 64-bit register data
|
||
// bits[ 79: 64] = 16-bit project-local GS privileged offset
|
||
// (drives gs_stub.reg_wr_*)
|
||
// bits[127: 80] = reserved
|
||
//
|
||
// REAL_AD_REG_MAP=1 (real PS2 layout, Ch75):
|
||
// bits[ 63: 0] = 64-bit register data
|
||
// bits[ 71: 64] = 8-bit GIF A+D register number per PCSX2 GSRegs.h
|
||
// (drives gs_stub.gif_reg_*; gs_stub owns the
|
||
// decode into PRIM/RGBAQ/XYZF2/XYZ2/FRAME_1/ZBUF_1)
|
||
// bits[127: 72] = reserved
|
||
//
|
||
// The two namespaces are architecturally distinct. Do NOT add a
|
||
// reg# → privileged-offset LUT here — that conflation is the Ch74
|
||
// mistake Ch75 corrected. New GIF-context registers belong inside
|
||
// gs_stub, keyed by reg#.
|
||
//
|
||
// in_ready is held high — same one-shot contract as gif_path_stub.
|
||
//
|
||
// Trace schema:
|
||
// On tag accept: EV_GIFTAG arg0={flg,path_id} arg1={eop,nreg,nloop_lo}
|
||
// arg2=regs_64 arg3=0 flags={in_last,1}
|
||
// On PACKED data (A+D): EV_WRITE arg0=path_id arg1=regnib
|
||
// arg2={offset16} arg3=data64
|
||
// flags={in_last,0}
|
||
// On PACKED data (other): EV_MODE same layout, no GS write.
|
||
// On REGLIST entry: EV_MODE arg0=path_id arg1=regnib
|
||
// arg2=0 arg3=entry64
|
||
// flags={in_last,0}
|
||
// On IMAGE/DISABLE qword: EV_MODE arg0={flg,path_id}
|
||
// arg2=0 arg3=in_data[63:0]
|
||
// flags={in_last,0}
|
||
|
||
`timescale 1ns/1ps
|
||
|
||
module gif_packed_stub
|
||
import trace_pkg::*;
|
||
#(
|
||
parameter logic [3:0] PATH_ID = 4'd2,
|
||
// Ch75 (was Ch74, corrected): switch the PACKED A+D address-source
|
||
// from the project-local 16-bit-offset layout (default,
|
||
// bits[79:64]=gs_offset) to the real PS2 A+D layout where
|
||
// bits[71:64] is the 8-bit GS A+D register *number*. Per PCSX2's
|
||
// GSRegs.h, the GIF A+D register namespace is distinct from the
|
||
// GS privileged-MMIO offset namespace — Ch74's LUT mistakenly
|
||
// mapped one to the other. The corrected design hands the 8-bit
|
||
// reg# to gs_stub via its new gif_reg_* port and lets gs_stub
|
||
// own the GIF-context register file decode. When this parameter
|
||
// is 0, the legacy gs_wr_* port (16-bit privileged-style offset)
|
||
// is driven for back-compat with Ch72/Ch73 PACKED-A+D TBs and
|
||
// tb_bgcolor_via_dma.
|
||
parameter bit REAL_AD_REG_MAP = 1'b0
|
||
) (
|
||
input logic clk,
|
||
input logic rst_n,
|
||
|
||
// Upstream from DMAC
|
||
input logic in_valid,
|
||
input logic [127:0] in_data,
|
||
input logic in_last,
|
||
output logic in_ready,
|
||
|
||
// Downstream — legacy 16-bit-offset port (REAL_AD_REG_MAP=0).
|
||
// Drives gs_stub's privileged-style reg_wr_* port.
|
||
output logic gs_wr_en,
|
||
output logic [15:0] gs_wr_addr,
|
||
output logic [63:0] gs_wr_data,
|
||
|
||
// Ch110 — IMAGE-mode (FLG=2) data passthrough. `image_data_valid`
|
||
// pulses for one cycle on every accepted IMAGE qword (i.e., when
|
||
// gif_packed_stub.state == S_IMAGE and a qword is consumed).
|
||
// `image_data` is the raw 128-bit qword payload; `image_data_last`
|
||
// mirrors the upstream `in_last`. Downstream `gif_image_xfer_stub`
|
||
// captures the qword and writes the unpacked pixels into vram_stub
|
||
// at the BITBLTBUF/TRXPOS/TRXREG-described destination. NOT wired
|
||
// by TBs that don't model image transfers — leaving these outputs
|
||
// unconnected is fine (named-port instantiation).
|
||
output logic image_data_valid,
|
||
output logic [127:0] image_data,
|
||
output logic image_data_last,
|
||
// Ch110 — backpressure from the IMAGE consumer. When state is
|
||
// S_IMAGE, in_ready is gated by image_data_ready so the upstream
|
||
// DMA stalls while gif_image_xfer_stub is busy emitting the
|
||
// previous qword's pixel writes. Outside S_IMAGE the gate has no
|
||
// effect — in_ready stays high. TBs that don't model image
|
||
// transfers tie this to 1'b1 (the always-ready default).
|
||
input logic image_data_ready,
|
||
// Ch172 — backpressure from the raster command FIFO inside
|
||
// gs_stub. When the GIF is processing PACKED/REGLIST qwords
|
||
// and the raster FIFO is full, deassert in_ready so the DMAC
|
||
// pauses BEFORE the next qword is consumed (which might
|
||
// trigger prim_complete → push_drop). The stall point is the
|
||
// qword-acceptance handshake — once a qword is `accept`-ed,
|
||
// the parser fully processes it; we never have a "consumed
|
||
// but not committed" state. Outside PACKED/REGLIST this input
|
||
// is ignored. TBs that don't model the raster FIFO tie this
|
||
// to 1'b0 (always-have-space).
|
||
input logic raster_fifo_full,
|
||
|
||
// Ch75 — real-PS2 GIF A+D register-number port (REAL_AD_REG_MAP=1).
|
||
// Drives gs_stub's GIF-context gif_reg_* port. Only one of
|
||
// {gs_wr_*, gif_reg_*} is active per accept depending on the
|
||
// parameter.
|
||
output logic gif_reg_wr_en,
|
||
output logic [7:0] gif_reg_num,
|
||
output logic [63:0] gif_reg_data,
|
||
|
||
// Trace
|
||
output logic ev_valid,
|
||
output subsys_e ev_subsys,
|
||
output event_e ev_event,
|
||
output logic [63:0] ev_arg0,
|
||
output logic [63:0] ev_arg1,
|
||
output logic [63:0] ev_arg2,
|
||
output logic [63:0] ev_arg3,
|
||
output logic [31:0] ev_flags
|
||
);
|
||
|
||
// Ch73: state widened to cover REGLIST and IMAGE/DISABLE payloads
|
||
// so unsupported FLG packets don't desync onto the next qword.
|
||
typedef enum logic [1:0] {
|
||
S_TAG = 2'd0,
|
||
S_PACKED = 2'd1,
|
||
S_REGLIST = 2'd2,
|
||
S_IMAGE = 2'd3 // also used for FLG=3 DISABLE
|
||
} state_e;
|
||
state_e state;
|
||
|
||
// Ch110 — in_ready is high outside S_IMAGE (no backpressure
|
||
// path) and gated by image_data_ready ONLY when an actual
|
||
// FLG=2 IMAGE payload is in flight. S_IMAGE is also reused
|
||
// for FLG=3 DISABLE qwords (per Ch73 desync fix); those are
|
||
// opaque consume-only and must NOT route to
|
||
// gif_image_xfer_stub or apply backpressure. flg_q is the
|
||
// currently-running tag's FLG; gating on flg_q==2'd2 keeps
|
||
// DISABLE payloads silent.
|
||
logic image_active;
|
||
assign image_active = (state == S_IMAGE) && (flg_q == 2'd2);
|
||
// Ch172 — three-tier in_ready policy:
|
||
// 1) S_IMAGE with FLG=2 → wait for gif_image_xfer_stub to be
|
||
// ready for the next qword.
|
||
// 2) Otherwise (PACKED / REGLIST / TAG-fetch) → stall when
|
||
// the downstream raster command FIFO is full so the
|
||
// next register write can't trigger an unrecoverable
|
||
// prim_complete push_drop. Over-stalling on PACKED qwords
|
||
// that aren't going to cause a push is intentionally
|
||
// conservative — it has no functional impact and keeps
|
||
// the gate simple.
|
||
// 3) Default (sim TBs without the raster path) → always
|
||
// ready (raster_fifo_full tied to 1'b0 in those cases).
|
||
assign in_ready = image_active ? image_data_ready
|
||
: !raster_fifo_full;
|
||
|
||
logic accept;
|
||
assign accept = in_valid && in_ready;
|
||
|
||
// Ch110 — IMAGE-mode data passthrough (combinational).
|
||
// Gated on image_active so FLG=3 DISABLE qwords are NOT
|
||
// forwarded to gif_image_xfer_stub.
|
||
assign image_data = in_data;
|
||
assign image_data_last = in_last;
|
||
assign image_data_valid = accept && image_active;
|
||
|
||
// Latched tag context (valid in S_PACKED / S_REGLIST / S_IMAGE)
|
||
logic [14:0] nloop_q;
|
||
logic eop_q;
|
||
logic [1:0] flg_q;
|
||
// Ch73: nreg_eff widened to 5 bits. NREG field is 4 bits (0..15);
|
||
// PS2 docs say a value of 0 means 16. Encoding 16 as 5'b10000
|
||
// lets reg_idx == nreg_eff_q correctly terminate a 16-register
|
||
// packet. (Old 4-bit clamp made NREG=0 consume only 15 entries.)
|
||
logic [4:0] nreg_eff_q;
|
||
logic [63:0] regs_q;
|
||
logic [4:0] reg_idx;
|
||
|
||
// Ch73: REGLIST and IMAGE/DISABLE consume opaque qwords. We track
|
||
// how many qwords are still left in the payload, computed at
|
||
// S_TAG entry. PACKED keeps its per-entry reg_idx scheme.
|
||
// REGLIST count = ceil(NLOOP * NREG / 2) (2 entries / qword)
|
||
// IMAGE count = NLOOP (1 qword / loop)
|
||
logic [19:0] payload_qwords_left;
|
||
|
||
// Combinational tag-field decode for the qword on the wire in S_TAG.
|
||
logic [14:0] tag_nloop;
|
||
logic tag_eop;
|
||
logic [1:0] tag_flg;
|
||
logic [3:0] tag_nreg_field;
|
||
logic [4:0] tag_nreg_eff;
|
||
logic [63:0] tag_regs;
|
||
|
||
assign tag_nloop = in_data[14:0];
|
||
assign tag_eop = in_data[15];
|
||
assign tag_flg = in_data[59:58];
|
||
assign tag_nreg_field = in_data[63:60];
|
||
assign tag_nreg_eff = (tag_nreg_field == 4'd0) ? 5'd16
|
||
: {1'b0, tag_nreg_field};
|
||
assign tag_regs = in_data[127:64];
|
||
|
||
// Ch73 audit-low: replace indexed bit-select with shift/mask. The
|
||
// big case statement triggered iverilog's "constant selects in
|
||
// always_*" "sorry" warnings repeatedly. Use concat-pad to form
|
||
// the shift amount (reg_idx * 4) without going through `*`, which
|
||
// iverilog truncates to operand width and would alias high
|
||
// reg_idx values back to small shifts (e.g., reg_idx=8 wrapping
|
||
// to shift=0 — the bug found in Ch73 bring-up).
|
||
logic [6:0] cur_regnib_shift;
|
||
logic [3:0] cur_regnib;
|
||
assign cur_regnib_shift = {reg_idx, 2'b00}; // reg_idx*4 in 7 bits
|
||
assign cur_regnib = (regs_q >> cur_regnib_shift) & 64'hF;
|
||
|
||
// ------------------------------------------------------------------
|
||
// FSM
|
||
// ------------------------------------------------------------------
|
||
logic packed_last_in_loop;
|
||
logic packet_loop_last;
|
||
assign packed_last_in_loop = (reg_idx + 5'd1 == nreg_eff_q);
|
||
assign packet_loop_last = (nloop_q == 15'd1);
|
||
|
||
// Ch73: pre-compute REGLIST payload-qword count = ceil(NLOOP *
|
||
// NREG / 2). Done at S_TAG accept so the FSM only needs an
|
||
// opaque countdown afterwards.
|
||
logic [19:0] reglist_total_entries;
|
||
logic [19:0] reglist_total_qwords;
|
||
assign reglist_total_entries = tag_nloop * tag_nreg_eff;
|
||
assign reglist_total_qwords = (reglist_total_entries + 20'd1) >> 1;
|
||
|
||
always_ff @(posedge clk) begin
|
||
if (!rst_n) begin
|
||
state <= S_TAG;
|
||
nloop_q <= 15'd0;
|
||
eop_q <= 1'b0;
|
||
flg_q <= 2'd0;
|
||
nreg_eff_q <= 5'd0;
|
||
regs_q <= 64'd0;
|
||
reg_idx <= 5'd0;
|
||
payload_qwords_left <= 20'd0;
|
||
end else if (accept) begin
|
||
unique case (state)
|
||
S_TAG: begin
|
||
nloop_q <= tag_nloop;
|
||
eop_q <= tag_eop;
|
||
flg_q <= tag_flg;
|
||
nreg_eff_q <= tag_nreg_eff;
|
||
regs_q <= tag_regs;
|
||
reg_idx <= 5'd0;
|
||
payload_qwords_left <= 20'd0;
|
||
if (tag_nloop == 15'd0) begin
|
||
state <= S_TAG; // empty tag
|
||
end else begin
|
||
unique case (tag_flg)
|
||
2'd0: state <= S_PACKED;
|
||
2'd1: begin
|
||
state <= S_REGLIST;
|
||
payload_qwords_left <= reglist_total_qwords;
|
||
end
|
||
default: begin
|
||
state <= S_IMAGE; // FLG=2/3
|
||
payload_qwords_left <= {5'd0, tag_nloop};
|
||
end
|
||
endcase
|
||
end
|
||
end
|
||
|
||
S_PACKED: begin
|
||
if (packed_last_in_loop) begin
|
||
reg_idx <= 5'd0;
|
||
if (packet_loop_last) state <= S_TAG;
|
||
else nloop_q <= nloop_q - 15'd1;
|
||
end else begin
|
||
reg_idx <= reg_idx + 5'd1;
|
||
end
|
||
end
|
||
|
||
S_REGLIST, S_IMAGE: begin
|
||
// Both branches consume opaque qwords. Trace fires
|
||
// per accept (see trace block below); decode of
|
||
// individual REGLIST entries is left to a future
|
||
// chapter once gs_stub gains the matching reg
|
||
// surface. The point of Ch73 here is just: don't
|
||
// desync onto the next GIFtag.
|
||
if (payload_qwords_left == 20'd1) state <= S_TAG;
|
||
else payload_qwords_left <= payload_qwords_left - 20'd1;
|
||
end
|
||
|
||
default: state <= S_TAG;
|
||
endcase
|
||
end
|
||
end
|
||
|
||
// ------------------------------------------------------------------
|
||
// GS write — fires only on PACKED A+D data accepts. REGLIST
|
||
// entries don't generate GS writes here (real REGLIST treats each
|
||
// nibble as a register *number*, not A+D, and we don't have GS
|
||
// routing for that path yet). IMAGE/DISABLE never generates GS
|
||
// writes.
|
||
//
|
||
// Ch75: split into two output ports based on REAL_AD_REG_MAP.
|
||
// Only one fires per accept; the other stays low. gs_stub's
|
||
// privileged-side `reg_wr_*` and GIF-A+D-side `gif_reg_*` ports
|
||
// are architecturally distinct.
|
||
// ------------------------------------------------------------------
|
||
always_ff @(posedge clk) begin
|
||
if (!rst_n) begin
|
||
gs_wr_en <= 1'b0;
|
||
gs_wr_addr <= 16'd0;
|
||
gs_wr_data <= 64'd0;
|
||
gif_reg_wr_en <= 1'b0;
|
||
gif_reg_num <= 8'd0;
|
||
gif_reg_data <= 64'd0;
|
||
end else if (accept && state == S_PACKED && cur_regnib == 4'hE) begin
|
||
if (REAL_AD_REG_MAP) begin
|
||
gs_wr_en <= 1'b0;
|
||
gif_reg_wr_en <= 1'b1;
|
||
gif_reg_num <= in_data[71:64];
|
||
gif_reg_data <= in_data[63:0];
|
||
end else begin
|
||
gs_wr_en <= 1'b1;
|
||
gs_wr_addr <= in_data[79:64];
|
||
gs_wr_data <= in_data[63:0];
|
||
gif_reg_wr_en <= 1'b0;
|
||
end
|
||
end else begin
|
||
gs_wr_en <= 1'b0;
|
||
gif_reg_wr_en <= 1'b0;
|
||
end
|
||
end
|
||
|
||
// ------------------------------------------------------------------
|
||
// Trace
|
||
// ------------------------------------------------------------------
|
||
always_ff @(posedge clk) begin
|
||
if (!rst_n) begin
|
||
ev_valid <= 1'b0;
|
||
ev_subsys <= SUBSYS_GIF;
|
||
ev_event <= EV_GIFTAG;
|
||
ev_arg0 <= 64'd0;
|
||
ev_arg1 <= 64'd0;
|
||
ev_arg2 <= 64'd0;
|
||
ev_arg3 <= 64'd0;
|
||
ev_flags <= 32'd0;
|
||
end else if (accept && state == S_TAG) begin
|
||
ev_valid <= 1'b1;
|
||
ev_subsys <= SUBSYS_GIF;
|
||
ev_event <= EV_GIFTAG;
|
||
// arg0[3:0]=path_id, arg0[5:4]=flg → callers can grep by FLG
|
||
ev_arg0 <= {58'd0, tag_flg, PATH_ID};
|
||
// Compact tag summary: {eop[15], reserved[14:13]=flg, nreg[12:9], nloop[8:0]}
|
||
ev_arg1 <= {49'd0, tag_eop, tag_flg, tag_nreg_field,
|
||
tag_nloop[8:0]};
|
||
ev_arg2 <= tag_regs;
|
||
ev_arg3 <= 64'd0;
|
||
ev_flags <= {30'd0, in_last, 1'b1}; // bit0=is_tag
|
||
end else if (accept && state == S_PACKED) begin
|
||
ev_valid <= 1'b1;
|
||
ev_subsys <= SUBSYS_GIF;
|
||
ev_event <= (cur_regnib == 4'hE) ? EV_WRITE : EV_MODE;
|
||
ev_arg0 <= {60'd0, PATH_ID};
|
||
ev_arg1 <= {60'd0, cur_regnib};
|
||
ev_arg2 <= {48'd0, in_data[79:64]};
|
||
ev_arg3 <= in_data[63:0];
|
||
ev_flags <= {30'd0, in_last, 1'b0}; // bit0=is_data
|
||
end else if (accept && state == S_REGLIST) begin
|
||
// Two entries per qword: low half (reglist_half=0) → bits
|
||
// [63:0]; high half (reglist_half=1) → bits[127:64]. Trace
|
||
// each as EV_MODE (no GS write). reglist_half is the
|
||
// already-flopped bit, so the same trace block fires for
|
||
// both halves of the same qword on consecutive accepts —
|
||
// wait, no: REGLIST's S_REGLIST branch consumes one accept
|
||
// per half of the same qword? In our FSM, the high half
|
||
// re-enters S_REGLIST on the SAME qword? It does not — the
|
||
// FSM advances reglist_half within a single accept. Trace
|
||
// the low-half entry on the accept; the high-half entry's
|
||
// trace is omitted in this minimal Ch73 path.
|
||
ev_valid <= 1'b1;
|
||
ev_subsys <= SUBSYS_GIF;
|
||
ev_event <= EV_MODE;
|
||
ev_arg0 <= {60'd0, PATH_ID};
|
||
ev_arg1 <= {60'd0, cur_regnib};
|
||
ev_arg2 <= 64'd0;
|
||
ev_arg3 <= in_data[63:0]; // low-half entry data
|
||
ev_flags <= {30'd0, in_last, 1'b0};
|
||
end else if (accept && state == S_IMAGE) begin
|
||
ev_valid <= 1'b1;
|
||
ev_subsys <= SUBSYS_GIF;
|
||
ev_event <= EV_MODE;
|
||
ev_arg0 <= {58'd0, flg_q, PATH_ID};
|
||
ev_arg1 <= 64'd0;
|
||
ev_arg2 <= 64'd0;
|
||
ev_arg3 <= in_data[63:0];
|
||
ev_flags <= {30'd0, in_last, 1'b0};
|
||
end else begin
|
||
ev_valid <= 1'b0;
|
||
end
|
||
end
|
||
|
||
endmodule : gif_packed_stub
|