ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
848 lines
40 KiB
Systemverilog
848 lines
40 KiB
Systemverilog
// retroDE_ps2 — gs_pcrtc_stub (Ch90)
|
||
//
|
||
// Minimal PCRTC (Programmable CRT Controller) scanout engine.
|
||
// Real PS2 PCRTC reads VRAM via a DISPFB (display framebuffer)
|
||
// configuration register and feeds the analog video DAC. This
|
||
// stub is the SCANOUT side of the GS pipeline — its dual is
|
||
// gs_stub, which is the WRITE side. Together they close the loop
|
||
// from `raster_pixel_emit` (Ch88) → vram_stub (Ch89) → visible
|
||
// pixels (Ch90).
|
||
//
|
||
// Architectural note. `platform_video_stub` is a flood-fill video
|
||
// adapter that always paints BGCOLOR within its active area —
|
||
// it predates VRAM persistence and stays as-is for back-compat.
|
||
// `gs_pcrtc_stub` is the SCANOUT-AWARE alternative, used by TBs
|
||
// that want to verify the round trip "gs_stub writes a pixel →
|
||
// vram_stub stores it → pcrtc reads it back as video." We did
|
||
// not extend platform_video_stub (which would have rippled
|
||
// through 6 existing TBs); pcrtc is a parallel module that owns
|
||
// its own raster timing AND vram read addressing, so a TB picks
|
||
// the one that fits.
|
||
//
|
||
// Scope:
|
||
// - Single DISPFB context: pcrtc consumes `pmode_q` and
|
||
// `dispfb1_q` directly from gs_stub's privileged CPU MMIO
|
||
// latches (Ch91). The Ch90 sideband ports
|
||
// (scanout_enable / dispfb_fbp / dispfb_fbw) are gone — TBs
|
||
// drive scanout configuration the way a real driver would,
|
||
// by writing PMODE and DISPFB1 through the gs_stub.reg_wr_*
|
||
// port. This means `wait (raster_done); write PMODE.EN1=1`
|
||
// is the canonical sequence, not a sideband poke.
|
||
// - Addressing: linear by DEFAULT — fb_addr math mirrors
|
||
// gs_stub's pixel fb_addr math byte-exactly so a pixel
|
||
// written at (x,y) reads back at (x,y) without swizzle
|
||
// reconciliation. Four OPTIONAL per-PSM swizzle paths gated
|
||
// by parameters: `PSMCT32_SWIZZLE=1` (Ch120) routes PSMCT32
|
||
// reads through gs_swizzle_psmct32_stub; `PSMCT16_SWIZZLE=1`
|
||
// (Ch126) routes PSMCT16 reads through gs_swizzle_psmct16_stub;
|
||
// `PSMT8_SWIZZLE=1` (Ch132) routes PSMT8 reads through
|
||
// gs_swizzle_psmt8_stub (page=128×64 px, bw_pg=FBW>>1 — FBW
|
||
// must be even for PSMT8); `PSMT4_SWIZZLE=1` (Ch138) routes
|
||
// PSMT4 reads through gs_swizzle_psmt4_stub (page=128×128 px,
|
||
// bw_pg=FBW>>1 — FBW must be even for PSMT4; module also
|
||
// outputs nibble_hi selector since PSMT4 packs 2 pixels/byte).
|
||
// The four parameters are independent. All four defaults are
|
||
// 0 → existing TBs see legacy linear behavior.
|
||
// - PSMCT32 (PSM=0), PSMCT16 (PSM=2), PSMT8 (PSM=0x13), and
|
||
// PSMT4 (PSM=0x14) are honored at this scope. Any other
|
||
// PSM forces scanout off rather than mis-decoding the byte
|
||
// layout. PSMCT16 reads 2 bytes/pixel and unpacks RGB5A1 →
|
||
// RGB888 via bit-replicate. PSMT8 reads 1 byte/pixel and
|
||
// PSMT4 reads 4 bits/pixel (2 pixels/byte, low nibble =
|
||
// even pixel). For PSMT8 / PSMT4, with `clut_enable=1` the
|
||
// index is looked up in clut_stub for real RGB; with
|
||
// `clut_enable=0`, the index/nibble surfaces as grayscale.
|
||
// gs_stub's raster channel emits PSMCT32 + PSMCT16 (Ch95) +
|
||
// PSMT8 (Ch105) + PSMT4 (Ch106). CLUT contents come from a
|
||
// TB-direct write OR from a VRAM→CLUT load triggered by
|
||
// TEX0_1.CLD via clut_loader_stub (Ch99..Ch102).
|
||
// - Single CRTC: one display, one DISPFB context. Real PS2 has
|
||
// two (DISPFB1/DISPLAY1 and DISPFB2/DISPLAY2) for interlace/
|
||
// merge. The PMODE.EN2 + DISPFB2/DISPLAY2 path is deferred.
|
||
// - DISPLAY1 DX/DY/DW/DH ARE honored (Ch92): they define the
|
||
// display window inside the active area. Outside the window,
|
||
// pcrtc emits 0 for r/g/b even with scanout_enable=1.
|
||
// MAGH/MAGV ARE honored (Ch93): each VRAM column shows for
|
||
// (MAGH+1) consecutive VCK pulses before advancing, and each
|
||
// VRAM line shows for (MAGV+1) raster lines. Practically,
|
||
// a 4-pixel-wide VRAM sprite with MAGH=1 (2×) appears 8
|
||
// pixels wide on screen. The H/V totals still come from
|
||
// module parameters at instantiation. Real PS2 driver-
|
||
// equivalent bring-up is now "configure DISPFB1 → configure
|
||
// DISPLAY1 → render → set PMODE.EN1=1." Note: DISPLAY1=0
|
||
// (post-reset default) means a 1×1 window at (0,0); a TB
|
||
// MUST configure DISPLAY1 for anything visible to scan out.
|
||
// - When scanout_enable
|
||
// (= PMODE.EN1 & (PSMCT32 || PSMCT16 || PSMT8 || PSMT4))
|
||
// is 0, r/g/b output is forced to 0 across the active area.
|
||
// There's no BGCOLOR fallback in this module — that lives in
|
||
// platform_video_stub.
|
||
//
|
||
// Trace payload: one EV_MODE pulse per completed frame, mirroring
|
||
// platform_video_stub's schema (arg0=frame_count, arg1=H*V).
|
||
// PLAT MODE arg0=frame_number arg1=pixels_per_frame arg2=- arg3=-
|
||
|
||
`timescale 1ns/1ps
|
||
|
||
module gs_pcrtc_stub
|
||
import trace_pkg::*;
|
||
#(
|
||
// Horizontal timing (in pixel clocks). Defaults match
|
||
// platform_video_stub's tiny-TB convention.
|
||
parameter int H_ACTIVE = 16,
|
||
parameter int H_FRONT = 2,
|
||
parameter int H_SYNC = 4,
|
||
parameter int H_BACK = 2,
|
||
|
||
// Vertical timing (in lines)
|
||
parameter int V_ACTIVE = 8,
|
||
parameter int V_FRONT = 1,
|
||
parameter int V_SYNC = 1,
|
||
parameter int V_BACK = 1,
|
||
|
||
parameter bit HSYNC_ACTIVE_LOW = 1'b1,
|
||
parameter bit VSYNC_ACTIVE_LOW = 1'b1,
|
||
|
||
// Ch120 — when set, PSMCT32 scanout reads VRAM via the real PS2
|
||
// GS page/block swizzle (gs_swizzle_psmct32_stub) instead of the
|
||
// legacy linear `FBW*64*y + x*4` formula. PSMCT16 / PSMT8 / PSMT4
|
||
// are governed by their own gates (PSMCT16_SWIZZLE Ch126,
|
||
// PSMT8_SWIZZLE Ch132, PSMT4_SWIZZLE Ch138 — see below).
|
||
// Default 0 keeps every existing PSMCT32 scanout TB on the
|
||
// original linear addressing.
|
||
parameter bit PSMCT32_SWIZZLE = 1'b0,
|
||
|
||
// Ch126 — when set, PSMCT16 scanout reads VRAM via the real PS2
|
||
// GS page/block/column swizzle (gs_swizzle_psmct16_stub) instead
|
||
// of the legacy linear `FBW*64*y + x*2` formula. PSMCT32 / PSMT8
|
||
// / PSMT4 are governed by their own gates (PSMCT32_SWIZZLE /
|
||
// PSMT8_SWIZZLE) or stay linear. Default 0 keeps every existing
|
||
// PSMCT16 scanout TB (Ch94 PSM-aware, Ch95 raster, Ch103 PSMT4-
|
||
// via-CT16-CLUT, etc.) on the original linear addressing.
|
||
parameter bit PSMCT16_SWIZZLE = 1'b0,
|
||
|
||
// Ch132 — when set, PSMT8 scanout reads VRAM via the real PS2 GS
|
||
// page/block/column swizzle (gs_swizzle_psmt8_stub) instead of
|
||
// the legacy linear `FBW*64*y + x` formula. PSMT8 pages are 128
|
||
// px wide (vs 64 px for CT32/CT16) so the swizzle internally uses
|
||
// bw_pg = FBW>>1 — PCSX2 asserts FBW must be even for PSMT8.
|
||
// Default 0 keeps every existing PSMT8 scanout TB (Ch96, Ch97,
|
||
// Ch103 PSMT4-via-CT16-CLUT, Ch107 PSMT4-e2e palette path, etc.)
|
||
// on the original linear addressing. PSMCT32 / PSMCT16 / PSMT4
|
||
// are governed by their own gates or stay linear.
|
||
parameter bit PSMT8_SWIZZLE = 1'b0,
|
||
|
||
// Ch138 — when set, PSMT4 scanout reads VRAM via the real PS2 GS
|
||
// page/block/column swizzle (gs_swizzle_psmt4_stub) instead of
|
||
// the legacy linear `byte_offset = pixel_index >> 1` formula.
|
||
// PSMT4 pixels are 4 bits each (2 pixels per byte); the swizzle
|
||
// module outputs both an absolute byte address AND a `nibble_hi`
|
||
// selector that picks the high or low nibble of the byte at
|
||
// that address. PSMT4 pages are 128 px wide (same as PSMT8) so
|
||
// the swizzle internally uses bw_pg = FBW>>1 — PCSX2 asserts
|
||
// FBW must be even for PSMT4. The grayscale + CLUT lookup paths
|
||
// BOTH use the same swizzle output: the byte at `addr` is read
|
||
// from VRAM, and `nibble_hi` (instead of pixel_index[0]) picks
|
||
// which nibble. Default 0 keeps every existing PSMT4 scanout TB
|
||
// (Ch103 PSMT4+CLUT, Ch104 PSMT4 round-trip, Ch107 PSMT4 e2e,
|
||
// etc.) on the original linear addressing. PSMCT32 / PSMCT16 /
|
||
// PSMT8 are governed by their own gates.
|
||
parameter bit PSMT4_SWIZZLE = 1'b0,
|
||
|
||
// Ch158 — when set, the data-decode + sync-output pipeline is
|
||
// delayed by 1 cycle so it aligns with a sync-read VRAM (e.g.
|
||
// `vram_bram_stub`, Ch154) whose `read_data` is registered.
|
||
// The address-driving stage (`vram_read_addr`) keeps using the
|
||
// current `(hcnt, vcnt)` so the read is issued one pixel
|
||
// "ahead"; the registered `vram_read_data` returns a cycle
|
||
// later, and the decode comb consumes the matching delayed
|
||
// counter view via the `*_dec` signals.
|
||
//
|
||
// Default 0 preserves the legacy combinational-read behavior
|
||
// every existing PCRTC TB (Ch90+ scanout TBs) is written
|
||
// against — those TBs drive `vram_read_data` via legacy
|
||
// `vram_stub` (comb read) and consume r/g/b on the same
|
||
// cycle as the addr drive. Set to 1 in the BRAM wrapper /
|
||
// board top once `vram_bram_stub` is the storage.
|
||
parameter bit VRAM_SYNC_READ = 1'b0,
|
||
|
||
// Ch163 — bypass the magnification dividers
|
||
// `vram_x_unshift = hwin_rel / hmag_factor` and the matching y
|
||
// form when the demo locks `MAGH = MAGV = 0`. Quartus infers a
|
||
// 32-bit hardware divider from the `/` operators above (the
|
||
// Ch162 STA worst path after STRIP_HW_DIVIDER closed the EE-
|
||
// core divider). For demos that never write MAGH/MAGV non-zero
|
||
// — which includes the PSMCT32 raster demo and every other
|
||
// hardware-target wrapper today — the divisors are constant 1
|
||
// and the math collapses to a passthrough.
|
||
//
|
||
// Default 0 keeps the existing divider math live so every
|
||
// Ch93-era scanout MAG TB stays green (the TBs that drive
|
||
// MAGH != 0 / MAGV != 0 such as `tb_gs_scanout_magh_magv`
|
||
// continue to use the default).
|
||
//
|
||
// When 1, `vram_x_unshift = hwin_rel` / `vram_y_unshift =
|
||
// vwin_rel` — equivalent to the MAGH=MAGV=0 case but without
|
||
// the divider. The hardware-demo path forwards this parameter
|
||
// through `top_psmct32_raster_demo_bram` and the DE25-Nano
|
||
// board top sets it to 1'b1.
|
||
parameter bit STRIP_PCRTC_MAG_DIV = 1'b0
|
||
) (
|
||
input logic clk,
|
||
input logic rst_n,
|
||
|
||
// Ch91/Ch92/Ch93/Ch94/Ch96/Ch103 — PMODE + DISPFB1 + DISPLAY1
|
||
// latches from gs_stub's privileged CPU MMIO port.
|
||
// EN1 (PMODE bit 0) gates scanout. DISPFB1 carries the
|
||
// framebuffer base / width / PSM the PCRTC reads from
|
||
// (PSMCT32, PSMCT16, PSMT8, and PSMT4 honored at this scope;
|
||
// any other PSM forces scanout off). DISPLAY1 carries the
|
||
// display window: DX/DY = origin within the active area;
|
||
// DW/DH = width/height MINUS one (real PS2 semantics).
|
||
// MAGH/MAGV (Ch93) scale the window-relative coordinate so
|
||
// each VRAM column/line repeats for (MAGH+1)/(MAGV+1)
|
||
// displayed pulses/lines; pcrtc still takes H/V TOTALS from
|
||
// module parameters at instantiation, not from registers.
|
||
input logic [63:0] pmode_q,
|
||
input logic [63:0] dispfb1_q,
|
||
input logic [63:0] display1_q,
|
||
|
||
// VRAM read port: combinational read from vram_stub.
|
||
output logic [31:0] vram_read_addr,
|
||
input logic [31:0] vram_read_data,
|
||
|
||
// Ch97 — CLUT (palette) read port for indexed-color scanout.
|
||
// When `clut_enable` is high AND the active PSM is PSMT8,
|
||
// pcrtc presents `clut_read_idx = vram_read_data[7:0] +
|
||
// (clut_csa << 4)` and decodes the returned PSMCT32 RGB
|
||
// entry instead of the grayscale fallback. CSM is implicitly
|
||
// CSM2 (linear). CSA shifts the lookup window in 16-entry
|
||
// increments and wraps mod 256. When `clut_enable` is low,
|
||
// the CLUT is bypassed and PSMT8 still scans out as
|
||
// grayscale (Ch96 default).
|
||
input logic clut_enable,
|
||
input logic [4:0] clut_csa,
|
||
output logic [7:0] clut_read_idx,
|
||
input logic [31:0] clut_read_data,
|
||
|
||
// Video out
|
||
output logic hsync,
|
||
output logic vsync,
|
||
output logic de,
|
||
output logic [7:0] r,
|
||
output logic [7:0] g,
|
||
output logic [7:0] b,
|
||
// Ch320 — high exactly when this scanout pixel is inside the displayed frame
|
||
// (scanout enabled AND within the DX/DY/DW/DH display window). Aligned to r/g/b.
|
||
// An LPDDR4B scanout reader gates its pixels by this so it shows ONE frame, not
|
||
// a tiled fill of the whole active line.
|
||
output logic pix_window_o,
|
||
|
||
// Trace
|
||
output logic ev_valid,
|
||
output subsys_e ev_subsys,
|
||
output event_e ev_event,
|
||
output logic [63:0] ev_arg0,
|
||
output logic [63:0] ev_arg1,
|
||
output logic [63:0] ev_arg2,
|
||
output logic [63:0] ev_arg3,
|
||
output logic [31:0] ev_flags
|
||
);
|
||
|
||
localparam int H_TOTAL = H_ACTIVE + H_FRONT + H_SYNC + H_BACK;
|
||
localparam int V_TOTAL = V_ACTIVE + V_FRONT + V_SYNC + V_BACK;
|
||
|
||
localparam int H_SYNC_START = H_ACTIVE + H_FRONT;
|
||
localparam int H_SYNC_END = H_SYNC_START + H_SYNC;
|
||
localparam int V_SYNC_START = V_ACTIVE + V_FRONT;
|
||
localparam int V_SYNC_END = V_SYNC_START + V_SYNC;
|
||
|
||
localparam int HCNT_W = $clog2(H_TOTAL);
|
||
localparam int VCNT_W = $clog2(V_TOTAL);
|
||
|
||
logic [HCNT_W-1:0] hcnt;
|
||
logic [VCNT_W-1:0] vcnt;
|
||
|
||
logic end_of_line;
|
||
logic end_of_frame;
|
||
assign end_of_line = (hcnt == HCNT_W'(H_TOTAL - 1));
|
||
assign end_of_frame = end_of_line && (vcnt == VCNT_W'(V_TOTAL - 1));
|
||
|
||
always_ff @(posedge clk) begin
|
||
if (!rst_n) begin
|
||
hcnt <= '0;
|
||
vcnt <= '0;
|
||
end else if (end_of_line) begin
|
||
hcnt <= '0;
|
||
vcnt <= end_of_frame ? '0 : (vcnt + VCNT_W'(1));
|
||
end else begin
|
||
hcnt <= hcnt + HCNT_W'(1);
|
||
end
|
||
end
|
||
|
||
logic active_h;
|
||
logic active_v;
|
||
logic in_hsync;
|
||
logic in_vsync;
|
||
|
||
assign active_h = (hcnt < HCNT_W'(H_ACTIVE));
|
||
assign active_v = (vcnt < VCNT_W'(V_ACTIVE));
|
||
assign in_hsync = (hcnt >= HCNT_W'(H_SYNC_START)) && (hcnt < HCNT_W'(H_SYNC_END));
|
||
assign in_vsync = (vcnt >= VCNT_W'(V_SYNC_START)) && (vcnt < VCNT_W'(V_SYNC_END));
|
||
|
||
// ------------------------------------------------------------------
|
||
// Ch158 — decode-stage pipeline. When `VRAM_SYNC_READ=1`, every
|
||
// hcnt/vcnt-derived signal that the data-decode stage consumes
|
||
// is delayed by 1 cycle so it lines up with `vram_bram_stub`'s
|
||
// 1-cycle-late `vram_read_data`. The address-side
|
||
// (`vram_read_addr`) keeps using the current `hcnt`/`vcnt` so the
|
||
// read is issued one pixel "ahead".
|
||
//
|
||
// The registers below always exist (zero-cost in sim, optimized
|
||
// away when unreached in synthesis); the `*_dec` muxes select
|
||
// between the registered view (sync) and the live signal
|
||
// (legacy comb-read passthrough).
|
||
// ------------------------------------------------------------------
|
||
logic in_hsync_q, in_vsync_q;
|
||
logic active_h_q, active_v_q;
|
||
logic in_display_window_q, scanout_enable_q;
|
||
logic dispfb_psm_ct32_q, dispfb_psm_ct16_q, dispfb_psm_t8_q, dispfb_psm_t4_q;
|
||
logic psm4_nibble_select_q;
|
||
logic end_of_frame_q;
|
||
|
||
logic in_hsync_dec, in_vsync_dec;
|
||
logic active_h_dec, active_v_dec;
|
||
logic in_display_window_dec, scanout_enable_dec;
|
||
logic dispfb_psm_ct32_dec, dispfb_psm_ct16_dec, dispfb_psm_t8_dec, dispfb_psm_t4_dec;
|
||
logic psm4_nibble_select_dec;
|
||
logic end_of_frame_dec;
|
||
|
||
// psm4_nibble_select / dispfb_psm_* / scanout_enable /
|
||
// in_display_window are forward-referenced — they are declared
|
||
// and assigned later in the file (after the address/decode
|
||
// logic that produces them). SystemVerilog allows module-level
|
||
// forward references inside always_ff/always_comb blocks; the
|
||
// registers below capture them at every posedge.
|
||
always_ff @(posedge clk) begin
|
||
if (!rst_n) begin
|
||
in_hsync_q <= 1'b0;
|
||
in_vsync_q <= 1'b0;
|
||
active_h_q <= 1'b0;
|
||
active_v_q <= 1'b0;
|
||
in_display_window_q <= 1'b0;
|
||
scanout_enable_q <= 1'b0;
|
||
dispfb_psm_ct32_q <= 1'b0;
|
||
dispfb_psm_ct16_q <= 1'b0;
|
||
dispfb_psm_t8_q <= 1'b0;
|
||
dispfb_psm_t4_q <= 1'b0;
|
||
psm4_nibble_select_q <= 1'b0;
|
||
end_of_frame_q <= 1'b0;
|
||
end else begin
|
||
in_hsync_q <= in_hsync;
|
||
in_vsync_q <= in_vsync;
|
||
active_h_q <= active_h;
|
||
active_v_q <= active_v;
|
||
in_display_window_q <= in_display_window;
|
||
scanout_enable_q <= scanout_enable;
|
||
dispfb_psm_ct32_q <= dispfb_psm_ct32;
|
||
dispfb_psm_ct16_q <= dispfb_psm_ct16;
|
||
dispfb_psm_t8_q <= dispfb_psm_t8;
|
||
dispfb_psm_t4_q <= dispfb_psm_t4;
|
||
psm4_nibble_select_q <= psm4_nibble_select;
|
||
end_of_frame_q <= end_of_frame;
|
||
end
|
||
end
|
||
|
||
assign in_hsync_dec = VRAM_SYNC_READ ? in_hsync_q : in_hsync;
|
||
assign in_vsync_dec = VRAM_SYNC_READ ? in_vsync_q : in_vsync;
|
||
assign active_h_dec = VRAM_SYNC_READ ? active_h_q : active_h;
|
||
assign active_v_dec = VRAM_SYNC_READ ? active_v_q : active_v;
|
||
assign in_display_window_dec = VRAM_SYNC_READ ? in_display_window_q : in_display_window;
|
||
assign scanout_enable_dec = VRAM_SYNC_READ ? scanout_enable_q : scanout_enable;
|
||
// Ch320 — same gate the r/g/b output uses (line ~"if (de && scanout_enable_dec &&
|
||
// in_display_window_dec)"), minus de (the HDMI path applies de). Lets an external
|
||
// LPDDR4B scanout reader blank outside the displayed frame, matching BRAM scanout.
|
||
assign pix_window_o = scanout_enable_dec && in_display_window_dec;
|
||
assign dispfb_psm_ct32_dec = VRAM_SYNC_READ ? dispfb_psm_ct32_q : dispfb_psm_ct32;
|
||
assign dispfb_psm_ct16_dec = VRAM_SYNC_READ ? dispfb_psm_ct16_q : dispfb_psm_ct16;
|
||
assign dispfb_psm_t8_dec = VRAM_SYNC_READ ? dispfb_psm_t8_q : dispfb_psm_t8;
|
||
assign dispfb_psm_t4_dec = VRAM_SYNC_READ ? dispfb_psm_t4_q : dispfb_psm_t4;
|
||
assign psm4_nibble_select_dec = VRAM_SYNC_READ ? psm4_nibble_select_q : psm4_nibble_select;
|
||
assign end_of_frame_dec = VRAM_SYNC_READ ? end_of_frame_q : end_of_frame;
|
||
|
||
assign hsync = HSYNC_ACTIVE_LOW ? ~in_hsync_dec : in_hsync_dec;
|
||
assign vsync = VSYNC_ACTIVE_LOW ? ~in_vsync_dec : in_vsync_dec;
|
||
assign de = active_h_dec && active_v_dec;
|
||
|
||
// ------------------------------------------------------------------
|
||
// VRAM addressing. Mirror gs_stub's fb_addr math byte-exactly
|
||
// so written-then-scanned pixels round-trip without
|
||
// reconciliation:
|
||
// fbp_bytes = dispfb_fbp << 11 (FBP * 2048)
|
||
// pixels_per_row = dispfb_fbw << 6 (FBW * 64)
|
||
// effective_x = (hcnt - DX) / (MAGH+1) + DBX (Ch92/Ch93)
|
||
// effective_y = (vcnt - DY) / (MAGV+1) + DBY
|
||
// pixel_index = effective_y * pixels_per_row + effective_x
|
||
// byte_offset = pixel_index << dispfb_bpp_shift
|
||
// fb_addr = fbp_bytes + byte_offset
|
||
// dispfb_bpp_shift is now PSM-aware (Ch94/Ch96): 2 for
|
||
// PSMCT32, 1 for PSMCT16, 0 for PSMT8. Other PSMs force
|
||
// scanout off rather than mis-decoding bytes.
|
||
// ------------------------------------------------------------------
|
||
|
||
// Decode DISPFB1 sub-fields per real PS2 GS register layout
|
||
// (PCSX2 GSRegs.h — DISPFB structure):
|
||
// FBP : [8:0] base address in 2048-byte units
|
||
// FBW : [14:9] width in 64-pixel units
|
||
// PSM : [19:15] pixel storage mode (we only honor PSMCT32 = 0)
|
||
// DBX : [42:32] display-buffer X origin (Ch91-audit fix)
|
||
// DBY : [53:43] display-buffer Y origin (Ch91-audit fix)
|
||
//
|
||
// DBX/DBY shift the scanout's VRAM origin: the pixel that
|
||
// appears at (hcnt=0, vcnt=0) is VRAM (DBX, DBY), not (0, 0).
|
||
// Useful for double-buffered framebuffers and offset display
|
||
// windows.
|
||
logic [8:0] dispfb_fbp;
|
||
logic [5:0] dispfb_fbw;
|
||
logic [4:0] dispfb_psm;
|
||
logic [10:0] dispfb_dbx;
|
||
logic [10:0] dispfb_dby;
|
||
logic dispfb_psm_ok;
|
||
logic pmode_en1;
|
||
logic scanout_enable;
|
||
|
||
assign dispfb_fbp = dispfb1_q[8:0];
|
||
assign dispfb_fbw = dispfb1_q[14:9];
|
||
assign dispfb_psm = dispfb1_q[19:15];
|
||
assign dispfb_dbx = dispfb1_q[42:32];
|
||
assign dispfb_dby = dispfb1_q[53:43];
|
||
|
||
// Ch94/Ch96/Ch97/Ch103 — scanout PSM awareness. Four formats:
|
||
// PSMCT32 (5'h00) — 4 bytes/pixel, byte order {A,B,G,R}.
|
||
// PSMCT16 (5'h02) — 2 bytes/pixel, RGB5A1 packed:
|
||
// R[4:0] G[9:5] B[14:10] A[15].
|
||
// PSMT8 (5'h13) — 1 byte/pixel, 8-bit index.
|
||
// PSMT4 (5'h14) — 4 bits/pixel = 2 pixels/byte. Byte
|
||
// offset = pixel_index >> 1; nibble
|
||
// selector = pixel_index[0] (low =
|
||
// even, high = odd). The 4-bit nibble
|
||
// zero-extends to an 8-bit CLUT index;
|
||
// CSA picks the 16-entry palette window.
|
||
// For PSMT8/PSMT4, with `clut_enable=1` pcrtc looks up
|
||
// CLUT[idx + (CSA << 4)] in the external clut_stub for real
|
||
// RGB. With `clut_enable=0`, the index/nibble surfaces as
|
||
// grayscale (8-bit replication for PSMT8, 4→8 bit-replicate
|
||
// for PSMT4) so the storage lane stays visually verifiable
|
||
// without programming a palette.
|
||
// 5→8 expansion (PSMCT16) uses bit-replicate ({r5, r5[4:2]}),
|
||
// matching PCSX2. Other PSMs still disable scanout rather
|
||
// than mis-decode bytes; PSMCT24/PSMCT16S/PSMZ32/etc. force
|
||
// scanout off here.
|
||
logic dispfb_psm_ct32;
|
||
logic dispfb_psm_ct16;
|
||
logic dispfb_psm_t8;
|
||
logic dispfb_psm_t4;
|
||
logic [1:0] dispfb_bpp_shift;
|
||
|
||
assign dispfb_psm_ct32 = (dispfb_psm == 5'h00);
|
||
assign dispfb_psm_ct16 = (dispfb_psm == 5'h02);
|
||
assign dispfb_psm_t8 = (dispfb_psm == 5'h13);
|
||
assign dispfb_psm_t4 = (dispfb_psm == 5'h14);
|
||
assign dispfb_psm_ok = dispfb_psm_ct32 | dispfb_psm_ct16
|
||
| dispfb_psm_t8 | dispfb_psm_t4;
|
||
assign dispfb_bpp_shift = dispfb_psm_ct32 ? 2'd2 : // 4 bytes/pixel
|
||
dispfb_psm_ct16 ? 2'd1 : // 2 bytes/pixel
|
||
dispfb_psm_t8 ? 2'd0 : // 1 byte/pixel
|
||
2'd2; // PSMT4 uses byte_offset right-shift, not bpp_shift
|
||
assign pmode_en1 = pmode_q[0];
|
||
assign scanout_enable = pmode_en1 & dispfb_psm_ok;
|
||
|
||
// Ch92/Ch93 — DISPLAY1 sub-fields per real PS2 GS register
|
||
// layout (PCSX2 GSRegs.h — DISPLAY structure):
|
||
// DX : [11:0] display window X start (in VCK pulses)
|
||
// DY : [22:12] display window Y start (in raster lines)
|
||
// MAGH : [26:23] horizontal magnification - 1 (Ch93)
|
||
// MAGV : [28:27] vertical magnification - 1 (Ch93)
|
||
// DW : [43:32] display width - 1 (in VCK pulses)
|
||
// DH : [54:44] display height - 1 (in raster lines)
|
||
//
|
||
// The display window is the sub-rect (DX..DX+DW, DY..DY+DH)
|
||
// inside the active area. Outside the window, r/g/b is 0
|
||
// even when scanout_enable is 1. Inside, the VRAM index is
|
||
// measured RELATIVE to the window origin, scaled DOWN by the
|
||
// magnification factors (MAGH+1 / MAGV+1), then shifted by
|
||
// DBX/DBY. This means the pixel at displayed (DX, DY)
|
||
// corresponds to VRAM (DBX, DBY); successive displayed
|
||
// pixels along H map to the SAME VRAM column for (MAGH+1)
|
||
// VCK pulses before advancing.
|
||
logic [11:0] display_dx;
|
||
logic [10:0] display_dy;
|
||
logic [3:0] display_magh;
|
||
logic [1:0] display_magv;
|
||
logic [11:0] display_dw;
|
||
logic [10:0] display_dh;
|
||
|
||
assign display_dx = display1_q[11:0];
|
||
assign display_dy = display1_q[22:12];
|
||
assign display_magh = display1_q[26:23];
|
||
assign display_magv = display1_q[28:27];
|
||
assign display_dw = display1_q[43:32];
|
||
assign display_dh = display1_q[54:44];
|
||
|
||
// Window inside-test: (hcnt - DX) in [0, DW] AND (vcnt - DY)
|
||
// in [0, DH]. We do the lower-bound check by comparing >=
|
||
// and the upper-bound by computing the relative coord.
|
||
logic [11:0] hwin_rel;
|
||
logic [11:0] vwin_rel;
|
||
logic in_display_window;
|
||
assign hwin_rel = {{(12-HCNT_W){1'b0}}, hcnt} - {{0{1'b0}}, display_dx};
|
||
assign vwin_rel = {{(12-VCNT_W){1'b0}}, vcnt[VCNT_W-1:0]} - {1'b0, display_dy};
|
||
assign in_display_window = ({{(12-HCNT_W){1'b0}}, hcnt} >= {{0{1'b0}}, display_dx})
|
||
&& (hwin_rel <= display_dw)
|
||
&& ({{(12-VCNT_W){1'b0}}, vcnt[VCNT_W-1:0]} >= {1'b0, display_dy})
|
||
&& (vwin_rel <= {1'b0, display_dh});
|
||
|
||
logic [31:0] fbp_bytes;
|
||
logic [31:0] pixels_per_row;
|
||
logic [31:0] hmag_factor; // MAGH + 1, range 1..16
|
||
logic [31:0] vmag_factor; // MAGV + 1, range 1..4
|
||
logic [31:0] vram_x_unshift;
|
||
logic [31:0] vram_y_unshift;
|
||
logic [31:0] effective_x;
|
||
logic [31:0] effective_y;
|
||
logic [31:0] pixel_index;
|
||
logic [31:0] byte_offset;
|
||
|
||
// VRAM index is measured from inside the display window and
|
||
// SCALED DOWN by the magnification factors:
|
||
// effective_x = ((hcnt - DX) / (MAGH+1)) + DBX
|
||
// effective_y = ((vcnt - DY) / (MAGV+1)) + DBY
|
||
// MAGH=MAGV=0 → factors=1×, math collapses to the pre-Ch93
|
||
// form (and the pre-Ch92 form when DISPLAY1 covers the full
|
||
// active area). MAGH=N>0 means each VRAM column shows for
|
||
// (N+1) consecutive VCK pulses before the next column. SystemVerilog
|
||
// `/` truncates toward zero on unsigned 32-bit operands —
|
||
// matches PS2 PCRTC behavior since (hcnt-DX) is always
|
||
// non-negative inside the window (the window check guards
|
||
// hcnt >= DX before VRAM is read).
|
||
assign fbp_bytes = {23'd0, dispfb_fbp} << 11;
|
||
assign pixels_per_row = {26'd0, dispfb_fbw} << 6;
|
||
assign hmag_factor = {28'd0, display_magh} + 32'd1;
|
||
assign vmag_factor = {30'd0, display_magv} + 32'd1;
|
||
// Ch163 — when STRIP_PCRTC_MAG_DIV is 1, bypass the divisions
|
||
// and use the window-relative coords directly. Quartus then has
|
||
// nothing to infer for the magnification divider (the Ch162-onwards
|
||
// STA worst path on `u_demo|u_pcrtc|div_1_rtl_0|...`). The
|
||
// hardware-demo path locks MAGH=MAGV=0 so the divisors are
|
||
// constant 1 and this is behavior-neutral. The default 0 keeps
|
||
// the live divider math for the existing Ch93 magnification
|
||
// scanout TBs (`tb_gs_scanout_magh_magv` etc.).
|
||
assign vram_x_unshift = STRIP_PCRTC_MAG_DIV
|
||
? {20'd0, hwin_rel}
|
||
: ({20'd0, hwin_rel} / hmag_factor);
|
||
assign vram_y_unshift = STRIP_PCRTC_MAG_DIV
|
||
? {20'd0, vwin_rel}
|
||
: ({20'd0, vwin_rel} / vmag_factor);
|
||
assign effective_x = vram_x_unshift + {21'd0, dispfb_dbx};
|
||
assign effective_y = vram_y_unshift + {21'd0, dispfb_dby};
|
||
assign pixel_index = (effective_y * pixels_per_row) + effective_x;
|
||
// PSMT4 packs 2 pixels per byte → byte_offset = pixel_index/2;
|
||
// all other supported PSMs are integer-bytes-per-pixel and
|
||
// use the standard left-shift by bpp_shift.
|
||
assign byte_offset = dispfb_psm_t4 ? (pixel_index >> 1)
|
||
: (pixel_index << dispfb_bpp_shift);
|
||
logic [31:0] vram_linear_addr;
|
||
assign vram_linear_addr = fbp_bytes + byte_offset;
|
||
|
||
// Ch120 — optional PSMCT32 swizzled scanout. The swizzle module
|
||
// is purely combinational and reuses dispfb_fbp / dispfb_fbw +
|
||
// the per-cycle effective_x / effective_y (already magnification-
|
||
// aware via Ch93). When PSMCT32_SWIZZLE=1 AND the active PSM is
|
||
// PSMCT32, mux its output into vram_read_addr. Other PSMs (CT16,
|
||
// T8, T4) and PSMCT32_SWIZZLE=0 keep the legacy linear address.
|
||
logic [31:0] vram_swizzled_addr;
|
||
gs_swizzle_psmct32_stub u_swizzle (
|
||
.fbp (dispfb_fbp),
|
||
.fbw (dispfb_fbw),
|
||
.x (effective_x[11:0]),
|
||
.y (effective_y[11:0]),
|
||
.addr(vram_swizzled_addr)
|
||
);
|
||
|
||
// Ch126 — optional PSMCT16 swizzled scanout. Same wiring shape
|
||
// as Ch120 but uses gs_swizzle_psmct16_stub. The PSMCT16 module
|
||
// bakes its own page-shape (64×64 vs CT32's 64×32), block grid
|
||
// (4 cols × 8 rows vs CT32's 8×4), and within-block column-table
|
||
// permutation in. Default PSMCT16_SWIZZLE=0 preserves linear
|
||
// PSMCT16 scanout for the legacy TBs (Ch94/Ch95/Ch103/etc.).
|
||
logic [31:0] vram_swizzled16_addr;
|
||
gs_swizzle_psmct16_stub u_swizzle16 (
|
||
.fbp (dispfb_fbp),
|
||
.fbw (dispfb_fbw),
|
||
.x (effective_x[11:0]),
|
||
.y (effective_y[11:0]),
|
||
.addr(vram_swizzled16_addr)
|
||
);
|
||
|
||
// Ch132 — optional PSMT8 swizzled scanout. Same wiring shape as
|
||
// Ch120/Ch126. PSMT8 pages are 128 px wide so the swizzle
|
||
// internally divides FBW by 2 (PCSX2 asserts FBW must be even
|
||
// for PSMT8). Default PSMT8_SWIZZLE=0 preserves linear PSMT8
|
||
// scanout for the legacy TBs (Ch96, Ch97, Ch103, Ch107, etc.).
|
||
logic [31:0] vram_swizzled8_addr;
|
||
gs_swizzle_psmt8_stub u_swizzle8 (
|
||
.fbp (dispfb_fbp),
|
||
.fbw (dispfb_fbw),
|
||
.x (effective_x[11:0]),
|
||
.y (effective_y[11:0]),
|
||
.addr(vram_swizzled8_addr)
|
||
);
|
||
|
||
// Ch138 — optional PSMT4 swizzled scanout. Same wiring shape as
|
||
// Ch120/Ch126/Ch132 but uses gs_swizzle_psmt4_stub. PSMT4 is
|
||
// 4 bits/pixel, so the module outputs both an absolute byte
|
||
// address AND a `nibble_hi` selector. Default PSMT4_SWIZZLE=0
|
||
// preserves linear PSMT4 scanout for the legacy TBs (Ch103,
|
||
// Ch104, Ch107, etc.) — the linear path uses pixel_index[0] as
|
||
// the nibble selector; the swizzled path uses the swizzle
|
||
// module's nibble_hi output instead.
|
||
logic [31:0] vram_swizzled4_addr;
|
||
logic swizzle4_nibble_hi;
|
||
gs_swizzle_psmt4_stub u_swizzle4 (
|
||
.fbp (dispfb_fbp),
|
||
.fbw (dispfb_fbw),
|
||
.x (effective_x[11:0]),
|
||
.y (effective_y[11:0]),
|
||
.addr (vram_swizzled4_addr),
|
||
.nibble_hi(swizzle4_nibble_hi)
|
||
);
|
||
|
||
assign vram_read_addr = (PSMCT32_SWIZZLE && dispfb_psm_ct32) ? vram_swizzled_addr :
|
||
(PSMCT16_SWIZZLE && dispfb_psm_ct16) ? vram_swizzled16_addr :
|
||
(PSMT8_SWIZZLE && dispfb_psm_t8) ? vram_swizzled8_addr :
|
||
(PSMT4_SWIZZLE && dispfb_psm_t4) ? vram_swizzled4_addr :
|
||
vram_linear_addr;
|
||
|
||
// PSMCT32 layout in vram_stub: little-endian write of
|
||
// raster_pixel_color_q[31:0] = {A, B, G, R}. Read back as:
|
||
// data[7:0] = R
|
||
// data[15:8] = G
|
||
// data[23:16] = B
|
||
// data[31:24] = A (alpha, not exposed at the video DAC)
|
||
|
||
// Ch94/Ch96/Ch97 — PSM-aware color decode.
|
||
// PSMCT32: lower 24 bits = {B, G, R}; alpha at [31:24]
|
||
// dropped.
|
||
// PSMCT16: RGB5A1 in lower 16 bits, 5→8 bit-replicate.
|
||
// PSMT8 : index in vram_read_data[7:0]. With clut_enable
|
||
// (Ch97), CLUT[idx + (CSA << 4)] is looked up for
|
||
// real RGB; without it, the index is emitted as
|
||
// grayscale (Ch96 fallback). The vram_stub read
|
||
// returns 4 bytes starting at the byte address,
|
||
// so [7:0] is the byte at the addressed PSMT8
|
||
// pixel regardless of 4-byte alignment.
|
||
logic [15:0] psm16_pixel;
|
||
logic [4:0] psm16_r5, psm16_g5, psm16_b5;
|
||
logic [7:0] psm16_r8, psm16_g8, psm16_b8;
|
||
logic [7:0] psm8_idx;
|
||
logic [3:0] psm4_nibble;
|
||
logic [7:0] psm4_idx;
|
||
logic [7:0] psm4_gray;
|
||
|
||
// Ch158 (audit Medium fix) — sub-word PSM lane selection.
|
||
//
|
||
// `vram_stub` returns the 4 bytes STARTING at `byte_addr`, so
|
||
// for the legacy comb-read shape the sub-word value is always
|
||
// at the LOW lane of `vram_read_data` (CT16 → [15:0], T8 → [7:0],
|
||
// T4 byte → [7:0]). `vram_bram_stub` is word-addressable
|
||
// (returns mem[byte_addr >> 2]), so the sub-word value lives
|
||
// at lane `byte_addr[1:0]` within the returned 32-bit word —
|
||
// CT16 halfword at byte_addr[1]==1 sits at [31:16] and is
|
||
// missed by a fixed-low-lane extract.
|
||
//
|
||
// The address-LSB register below is a 1-cycle-delayed copy of
|
||
// `vram_read_addr[1:0]` matching the `_dec` decode-stage view
|
||
// of the registered `vram_read_data`. The `data_lane` mux is
|
||
// forced to 0 in legacy mode (so vram_stub's byte-addressable
|
||
// semantics keep working) and uses the registered LSBs in
|
||
// sync mode (so vram_bram_stub's word-addressable layout
|
||
// resolves to the right byte/halfword).
|
||
logic [1:0] vram_addr_lane_q;
|
||
logic [1:0] vram_addr_lane_dec;
|
||
logic [1:0] data_lane;
|
||
|
||
always_ff @(posedge clk) begin
|
||
if (!rst_n) vram_addr_lane_q <= 2'd0;
|
||
else vram_addr_lane_q <= vram_read_addr[1:0];
|
||
end
|
||
assign vram_addr_lane_dec = VRAM_SYNC_READ ? vram_addr_lane_q
|
||
: vram_read_addr[1:0];
|
||
assign data_lane = VRAM_SYNC_READ ? vram_addr_lane_dec
|
||
: 2'd0;
|
||
|
||
// CT16 halfword: [1] picks low (==0) or high (==1) halfword of
|
||
// the 32-bit word. byte_addr[0]==1 is misuse for CT16 (the
|
||
// address-stage formula always yields even byte addresses).
|
||
assign psm16_pixel = data_lane[1] ? vram_read_data[31:16]
|
||
: vram_read_data[15:0];
|
||
|
||
// PSMT8/T4 byte: [1:0] picks 1 of 4 byte lanes. Used directly
|
||
// as `psm8_idx` and as the source byte for the PSMT4 nibble
|
||
// extract below.
|
||
logic [7:0] vram_byte_lane;
|
||
always_comb begin
|
||
case (data_lane)
|
||
2'b00: vram_byte_lane = vram_read_data[ 7: 0];
|
||
2'b01: vram_byte_lane = vram_read_data[15: 8];
|
||
2'b10: vram_byte_lane = vram_read_data[23:16];
|
||
2'b11: vram_byte_lane = vram_read_data[31:24];
|
||
endcase
|
||
end
|
||
|
||
assign psm16_r5 = psm16_pixel[4:0];
|
||
assign psm16_g5 = psm16_pixel[9:5];
|
||
assign psm16_b5 = psm16_pixel[14:10];
|
||
assign psm16_r8 = {psm16_r5, psm16_r5[4:2]};
|
||
assign psm16_g8 = {psm16_g5, psm16_g5[4:2]};
|
||
assign psm16_b8 = {psm16_b5, psm16_b5[4:2]};
|
||
assign psm8_idx = vram_byte_lane;
|
||
|
||
// Ch103 — PSMT4 nibble extraction. The byte at byte_offset
|
||
// holds two pixels: low nibble = even pixel, high nibble =
|
||
// odd pixel. pixel_index[0] picks which one this scanout
|
||
// cycle is reading. The 4-bit nibble zero-extends to an
|
||
// 8-bit CLUT index; the grayscale fallback replicates the
|
||
// nibble across both halves of an 8-bit channel value
|
||
// (4'hF → 8'hFF, 4'h5 → 8'h55, etc.).
|
||
//
|
||
// Ch138 — when PSMT4_SWIZZLE=1 AND the active PSM is PSMT4,
|
||
// the nibble selector comes from the swizzle module's
|
||
// `nibble_hi` output (which is `columnTable4[yb][xb] & 1` —
|
||
// the canonical PCSX2 selector under the swizzled layout).
|
||
// pixel_index[0] is the linear formula's selector; the
|
||
// swizzled formula needs the swizzle's own bit because the
|
||
// swizzle reorders pixels within a block.
|
||
logic psm4_nibble_select;
|
||
assign psm4_nibble_select = (PSMT4_SWIZZLE && dispfb_psm_t4)
|
||
? swizzle4_nibble_hi
|
||
: pixel_index[0];
|
||
// Ch158 — pair the nibble selector with vram_read_data: in
|
||
// legacy comb-read mode they are both same-cycle; in sync-read
|
||
// mode the selector is registered (psm4_nibble_select_dec) so
|
||
// it lines up with the registered VRAM data. The `_dec` mux
|
||
// selects between the two views via `VRAM_SYNC_READ`. The
|
||
// BYTE that holds the nibble is picked from `vram_byte_lane`
|
||
// (the byte_addr[1:0]-keyed lane in sync mode, the low lane
|
||
// in legacy mode — see the audit-Medium fix above).
|
||
assign psm4_nibble = psm4_nibble_select_dec ? vram_byte_lane[7:4]
|
||
: vram_byte_lane[3:0];
|
||
assign psm4_idx = {4'd0, psm4_nibble};
|
||
assign psm4_gray = {psm4_nibble, psm4_nibble};
|
||
|
||
// Ch97/Ch103 — CLUT effective index. `clut_csa` shifts the
|
||
// lookup window in 16-entry units. The 8-bit add wraps mod
|
||
// 256, matching the size of the staging area. The base index
|
||
// is the PSMT8 byte index for PSMT8, the zero-extended PSMT4
|
||
// nibble for PSMT4, otherwise unused (pcrtc just doesn't
|
||
// consume the CLUT output).
|
||
// Ch158 — clut_idx_base + clut_read_idx are derived from
|
||
// vram_read_data (already aligned with the data-decode stage)
|
||
// and from `dispfb_psm_t4_dec` (the registered/passthrough
|
||
// PSM flag), so the CLUT lookup happens on the same cycle as
|
||
// the pixel-emit decode comb.
|
||
logic [7:0] clut_idx_base;
|
||
assign clut_idx_base = dispfb_psm_t4_dec ? psm4_idx : psm8_idx;
|
||
assign clut_read_idx = clut_idx_base + {clut_csa, 4'd0};
|
||
|
||
always_comb begin
|
||
if (de && scanout_enable_dec && in_display_window_dec) begin
|
||
if (dispfb_psm_ct16_dec) begin
|
||
r = psm16_r8;
|
||
g = psm16_g8;
|
||
b = psm16_b8;
|
||
end else if (dispfb_psm_t8_dec) begin
|
||
if (clut_enable) begin
|
||
// CLUT lookup. Each entry is PSMCT32. Byte
|
||
// order matches PSMCT32 framebuffer reads:
|
||
// [7:0]=R, [15:8]=G, [23:16]=B, [31:24]=A
|
||
r = clut_read_data[7:0];
|
||
g = clut_read_data[15:8];
|
||
b = clut_read_data[23:16];
|
||
end else begin
|
||
// Ch96 fallback: surface index as grayscale.
|
||
r = psm8_idx;
|
||
g = psm8_idx;
|
||
b = psm8_idx;
|
||
end
|
||
end else if (dispfb_psm_t4_dec) begin
|
||
if (clut_enable) begin
|
||
// Ch103 — PSMT4 + CLUT. The 4-bit nibble has
|
||
// already been mux'd into clut_read_idx via
|
||
// clut_idx_base + (CSA<<4); the returned
|
||
// entry is PSMCT32 ABGR.
|
||
r = clut_read_data[7:0];
|
||
g = clut_read_data[15:8];
|
||
b = clut_read_data[23:16];
|
||
end else begin
|
||
// Grayscale fallback — replicate the nibble
|
||
// across the 8-bit DAC value so 4'hF → 8'hFF.
|
||
r = psm4_gray;
|
||
g = psm4_gray;
|
||
b = psm4_gray;
|
||
end
|
||
end else begin
|
||
// PSMCT32 — the only remaining format that
|
||
// dispfb_psm_ok admits at this scope.
|
||
r = vram_read_data[7:0];
|
||
g = vram_read_data[15:8];
|
||
b = vram_read_data[23:16];
|
||
end
|
||
end else begin
|
||
r = 8'd0;
|
||
g = 8'd0;
|
||
b = 8'd0;
|
||
end
|
||
end
|
||
|
||
// ------------------------------------------------------------------
|
||
// Trace: one EV_MODE per completed frame.
|
||
// ------------------------------------------------------------------
|
||
|
||
logic [31:0] frame_count;
|
||
|
||
always_ff @(posedge clk) begin
|
||
if (!rst_n) begin
|
||
frame_count <= 32'd0;
|
||
|
||
ev_valid <= 1'b0;
|
||
ev_subsys <= SUBSYS_PLAT;
|
||
ev_event <= EV_MODE;
|
||
ev_arg0 <= 64'd0;
|
||
ev_arg1 <= 64'd0;
|
||
ev_arg2 <= 64'd0;
|
||
ev_arg3 <= 64'd0;
|
||
ev_flags <= 32'd0;
|
||
end else if (end_of_frame_dec) begin
|
||
// Ch158: when VRAM_SYNC_READ=1, end_of_frame_dec lags
|
||
// the counter-side end_of_frame by 1 cycle so it fires
|
||
// when the LAST visible pixel actually emits (which is
|
||
// 1 cycle after the address-stage hits the last cell).
|
||
// Legacy comb-read passthrough makes end_of_frame_dec
|
||
// == end_of_frame, so existing TBs are unaffected.
|
||
frame_count <= frame_count + 32'd1;
|
||
|
||
ev_valid <= 1'b1;
|
||
ev_subsys <= SUBSYS_PLAT;
|
||
ev_event <= EV_MODE;
|
||
ev_arg0 <= {32'd0, frame_count};
|
||
ev_arg1 <= {32'd0, 32'(H_ACTIVE * V_ACTIVE)};
|
||
ev_arg2 <= 64'd0;
|
||
ev_arg3 <= 64'd0;
|
||
ev_flags <= 32'd0;
|
||
end else begin
|
||
ev_valid <= 1'b0;
|
||
end
|
||
end
|
||
|
||
endmodule : gs_pcrtc_stub
|