Files
retroDE_ps2/sim/tb/top/tb_gs_raster_backpressure_stress.sv
T
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

363 lines
17 KiB
Systemverilog
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// retroDE_ps2 — tb_gs_raster_backpressure_stress
// =============================================================================
// Ch172 backpressure acceptance TB. Demonstrates that:
//
// 1. A GIF stream with more sprites than the raster command FIFO depth
// now completes WITHOUT dropping any primitive (in Ch171 this would
// have been a `push_drop` silent loss).
// 2. While the FIFO is full, `gif_in_ready` (= `dmac_gif_ready`) goes
// LOW for at least one cycle — the upstream stall actually happens.
// 3. The final VRAM contents match the per-sprite expected colors at
// every probed coordinate.
// 4. `raster_overflow` stays LOW the entire run.
// 5. The DMA still asserts done after backpressure clears.
//
// Strategy: build an 8-SPRITE payload procedurally inside the TB (rather
// than `$readmemh` from baked .mem files) and shove it through the same
// `top_psmct32_raster_demo_bram` instance the board uses, with backpressure
// hooked up. 8 sprites × FIFO_DEPTH=2 means 6 of the 8 sprites would have
// dropped under Ch171's no-backpressure regime — under Ch172 backpressure
// all 8 complete.
//
// We use a smaller per-sprite raster footprint (40×30 pixels each, 8 sprites
// tiled across the 320×240 paint area) so the sim watchdog is comfortable.
// =============================================================================
`timescale 1ns/1ps
module tb_gs_raster_backpressure_stress;
// Match the board-wrapper Ch169 + Ch171 + Ch172 overrides.
localparam int H_ACTIVE = 640;
localparam int V_ACTIVE = 480;
localparam int H_FRONT = 16;
localparam int H_SYNC_W = 96;
localparam int H_BACK = 48;
localparam int V_FRONT = 10;
localparam int V_SYNC_W = 2;
localparam int V_BACK = 33;
localparam int VRAM_BYTES = 512 * 1024;
localparam int N_SPRITES = 8;
localparam int SPRITE_W = 40;
localparam int SPRITE_H = 30;
// Per-sprite tile layout: 4 across × 2 down, each tile 80×60 of the
// 320×240 paint region. Within each tile, fill the upper-left 40×30
// (the sprite) so adjacent tiles don't overlap.
function automatic int sprite_x0(input int s); return (s % 4) * 80; endfunction
function automatic int sprite_y0(input int s); return (s / 4) * 60; endfunction
function automatic int sprite_x1(input int s); return sprite_x0(s) + SPRITE_W - 1; endfunction
function automatic int sprite_y1(input int s); return sprite_y0(s) + SPRITE_H - 1; endfunction
// 8 distinct colors so every sprite is identifiable on a VRAM probe.
function automatic logic [7:0] sprite_r(input int s);
case (s)
0: return 8'hFF; 1: return 8'h00; 2: return 8'h00; 3: return 8'hFF;
4: return 8'hFF; 5: return 8'h00; 6: return 8'hFF; 7: return 8'h80;
default: return 8'h00;
endcase
endfunction
function automatic logic [7:0] sprite_g(input int s);
case (s)
0: return 8'h00; 1: return 8'hFF; 2: return 8'h00; 3: return 8'hFF;
4: return 8'h00; 5: return 8'hFF; 6: return 8'h80; 7: return 8'h80;
default: return 8'h00;
endcase
endfunction
function automatic logic [7:0] sprite_b(input int s);
case (s)
0: return 8'h00; 1: return 8'h00; 2: return 8'hFF; 3: return 8'h00;
4: return 8'hFF; 5: return 8'hFF; 6: return 8'h00; 7: return 8'h80;
default: return 8'h00;
endcase
endfunction
// =========================================================
// DUT
// =========================================================
logic clk;
logic rst_n;
initial clk = 1'b0;
always #5 clk = ~clk;
logic core_go;
logic [7:0] r, g, b;
logic hsync, vsync, de;
logic core_halt;
logic dma_done_seen;
logic frame_seen;
top_psmct32_raster_demo_bram #(
.H_ACTIVE (H_ACTIVE),
.V_ACTIVE (V_ACTIVE),
.H_FRONT (H_FRONT),
.H_SYNC (H_SYNC_W),
.H_BACK (H_BACK),
.V_FRONT (V_FRONT),
.V_SYNC (V_SYNC_W),
.V_BACK (V_BACK),
.VRAM_BYTES (VRAM_BYTES),
// Ch252 — mirror the hardware build profile (PSMCT32 only,
// no PSMT4 RMW) to clear the replication-tripwire $fatal.
.VRAM_ENABLE_READ2 (1'b0)
) dut (
// Ch255 — heartbeat override inputs are dormant in this
// stress TB; coverage of the override path itself lives in
// tb_top_psmct32_raster_demo_bram_ch171.
.joy_a_pressed_i (1'b0),
.joy_b_pressed_i (1'b0),
.clk(clk), .rst_n(rst_n),
.core_go(core_go),
.r(r), .g(g), .b(b),
.hsync(hsync), .vsync(vsync), .de(de),
.core_halt(core_halt),
.dma_done_seen(dma_done_seen),
.frame_seen(frame_seen)
);
// =========================================================
// Observers
// =========================================================
bit saw_in_ready_low; // backpressure asserted at least once
int in_ready_low_cycles; // total cycles in_ready was LOW
bit saw_raster_overflow; // should NEVER fire under Ch172
always_ff @(posedge clk) begin
if (rst_n) begin
if (!dut.u_gif.in_ready) begin
saw_in_ready_low <= 1'b1;
in_ready_low_cycles <= in_ready_low_cycles + 1;
end
if (dut.u_gs.raster_overflow)
saw_raster_overflow <= 1'b1;
end
end
// =========================================================
// PSMCT32 page/block address reference (FBW=5 for 320-wide
// framebuffer, matches Ch171 bake.py + board wrapper).
// =========================================================
function automatic int ref_block_idx_ct32(input int by, input int bx);
case ({by[1:0], bx[2:0]})
5'd0: return 0; 5'd1: return 1; 5'd2: return 4; 5'd3: return 5;
5'd4: return 16; 5'd5: return 17; 5'd6: return 20; 5'd7: return 21;
5'd8: return 2; 5'd9: return 3; 5'd10: return 6; 5'd11: return 7;
5'd12: return 18; 5'd13: return 19; 5'd14: return 22; 5'd15: return 23;
5'd16: return 8; 5'd17: return 9; 5'd18: return 12; 5'd19: return 13;
5'd20: return 24; 5'd21: return 25; 5'd22: return 28; 5'd23: return 29;
5'd24: return 10; 5'd25: return 11; 5'd26: return 14; 5'd27: return 15;
5'd28: return 26; 5'd29: return 27; 5'd30: return 30; default: return 31;
endcase
endfunction
function automatic int ref_addr_psmct32(input int x_v, input int y_v);
int page_x, page_y, page_idx, page_base;
int by, bx, blk_idx, xb, yb;
page_x = x_v / 64;
page_y = y_v / 32;
page_idx = page_y * 5 + page_x; // fbw=5
page_base = page_idx * 8192;
by = (y_v % 32) / 8;
bx = (x_v % 64) / 8;
blk_idx = ref_block_idx_ct32(by, bx);
xb = x_v % 8;
yb = y_v % 8;
return page_base + blk_idx * 256 + yb * 32 + xb * 4;
endfunction
// =========================================================
// Procedural bootlet + payload preload. The board path uses
// $readmemh; here we bypass that by writing directly into the
// ee_ram and bios_rom backings via hierarchical refs at time 0
// (before reset deasserts). This keeps the TB self-contained:
// 8-sprite GIF payload built in SystemVerilog without a baked
// .mem.
//
// Module-scope ints for the build loop (iverilog 12 quirk —
// declarations-with-initializers inside loop bodies silently
// zero-out; Ch171 caught this in tb_top_..._bram_ch171).
// =========================================================
localparam int QWC_TOTAL = N_SPRITES * 6; // 6 qwords per sprite (1 tag + 5 A+D)
int build_i;
int build_qw_base;
int build_s;
int build_eop;
int probe_x_v, probe_y_v, probe_addr;
logic [31:0] probe_got, probe_expected;
int errors;
// Helper: assemble a 128-bit qword via SystemVerilog concatenation.
function automatic logic [127:0] mk_giftag(input int nloop, input int eop);
// {regs[63:0]=0xE_EEEE, NREG=5 in [63:60], FLG=0 in [59:58],
// 42'd0, EOP at bit 15, NLOOP at [14:0]} -> lower 64 bits
// Upper 64 bits = REGS = 5 nibbles of 0xE.
logic [63:0] lower;
lower = '0;
lower[63:60] = 4'd5; // NREG = 5
lower[59:58] = 2'd0; // FLG = PACKED
lower[15] = eop[0]; // EOP
lower[14:0] = nloop[14:0]; // NLOOP
return {64'h0000_0000_000E_EEEE, lower};
endfunction
function automatic logic [127:0] mk_aplusd(input int reg_num, input logic [63:0] data64);
return { 56'd0, reg_num[7:0], data64 };
endfunction
function automatic logic [63:0] mk_xyz2(input int x, input int y);
logic [63:0] v;
v = '0;
v[15:4] = x[11:0];
v[31:20] = y[11:0];
return v;
endfunction
function automatic logic [63:0] mk_rgbaq(input logic [7:0] rr, input logic [7:0] gg, input logic [7:0] bb);
return {32'd0, 8'hFF, bb, gg, rr};
endfunction
// GIF register-number constants (match bake.py).
localparam int RN_PRIM = 8'h00;
localparam int RN_RGBAQ = 8'h01;
localparam int RN_XYZ2 = 8'h05;
localparam int RN_FRAME_1 = 8'h4C;
localparam logic [63:0] FRAME_1_PSMCT32_FBW5 = 64'h0000_0000_0005_0000; // FBW=5 in bits[21:16]
localparam logic [63:0] PRIM_SPRITE = 64'd6;
// EE bootlet: same shape as bake.py's Ch171 bootlet (with
// DISPLAY1_hi for 320×240, DISPFB1 FBW=5, kick DMAC ch2 with
// QWC = N_SPRITES * 6). Encoded as 32-bit MIPS words.
function automatic logic [31:0] enc_lui(input int rt, input int imm);
return (32'h0F << 26) | (rt[4:0] << 16) | imm[15:0];
endfunction
function automatic logic [31:0] enc_ori(input int rt, input int rs, input int imm);
return (32'h0D << 26) | (rs[4:0] << 21) | (rt[4:0] << 16) | imm[15:0];
endfunction
function automatic logic [31:0] enc_sw(input int rt, input int rs, input int imm);
return (32'h2B << 26) | (rs[4:0] << 21) | (rt[4:0] << 16) | imm[15:0];
endfunction
initial begin
// ---- Build the 8-sprite payload directly into ee_ram_stub ----
// PAYLOAD_MADR = 0x100 = qword index 16 (16-byte qwords).
build_qw_base = 16;
for (build_s = 0; build_s < N_SPRITES; build_s++) begin
build_eop = (build_s == N_SPRITES - 1) ? 1 : 0;
dut.u_ram.mem[build_qw_base + build_s*6 + 0] = mk_giftag(1, build_eop);
dut.u_ram.mem[build_qw_base + build_s*6 + 1] = mk_aplusd(RN_PRIM, PRIM_SPRITE);
dut.u_ram.mem[build_qw_base + build_s*6 + 2] = mk_aplusd(RN_FRAME_1, FRAME_1_PSMCT32_FBW5);
dut.u_ram.mem[build_qw_base + build_s*6 + 3] = mk_aplusd(RN_RGBAQ,
mk_rgbaq(sprite_r(build_s), sprite_g(build_s), sprite_b(build_s)));
dut.u_ram.mem[build_qw_base + build_s*6 + 4] = mk_aplusd(RN_XYZ2,
mk_xyz2(sprite_x0(build_s), sprite_y0(build_s)));
dut.u_ram.mem[build_qw_base + build_s*6 + 5] = mk_aplusd(RN_XYZ2,
mk_xyz2(sprite_x1(build_s), sprite_y1(build_s)));
end
// ---- Build the EE bootlet directly into bios_rom_stub ----
// (Mirrors bake.py's bootlet_for_display1_hi() with FBW=5 +
// DW=319/DH=239 and QWC=N_SPRITES*6.)
dut.u_bios.mem[0] = enc_lui( 1, 16'h1200); // r1 = 0x1200_0000
dut.u_bios.mem[1] = enc_lui( 2, 16'h0000); // r2 = 0
dut.u_bios.mem[2] = enc_ori( 2, 2, 16'h0A00); // DISPFB1 = FBW=5, PSM=PSMCT32
dut.u_bios.mem[3] = enc_sw ( 2, 1, 16'h0070); // *DISPFB1 = r2
dut.u_bios.mem[4] = enc_sw ( 0, 1, 16'h0080); // *DISPLAY1_lo = 0
dut.u_bios.mem[5] = enc_lui( 2, 16'h000E); // DISPLAY1_hi upper
dut.u_bios.mem[6] = enc_ori( 2, 2, 16'hF13F); // DISPLAY1_hi lower (DW=319/DH=239)
dut.u_bios.mem[7] = enc_sw ( 2, 1, 16'h0084); // *DISPLAY1_hi = r2
dut.u_bios.mem[8] = enc_ori( 2, 0, 16'h0001); // r2 = PMODE.EN1
dut.u_bios.mem[9] = enc_sw ( 2, 1, 16'h0000); // *PMODE = r2
dut.u_bios.mem[10] = enc_lui(10, 16'h1000); // r10 = 0x1000_0000
dut.u_bios.mem[11] = enc_ori(10, 10, 16'hA000); // r10 = DMAC ch2 base
dut.u_bios.mem[12] = enc_ori(11, 0, 16'h0100); // r11 = PAYLOAD_MADR
dut.u_bios.mem[13] = enc_sw (11, 10, 16'h0010); // *MADR = r11
dut.u_bios.mem[14] = enc_ori(11, 0, QWC_TOTAL[15:0]); // r11 = QWC = N_SPRITES*6
dut.u_bios.mem[15] = enc_sw (11, 10, 16'h0020); // *QWC = r11
dut.u_bios.mem[16] = enc_ori(11, 0, 16'h0001); // r11 = CHCR.start
dut.u_bios.mem[17] = enc_sw (11, 10, 16'h0000); // *CHCR = r11
dut.u_bios.mem[18] = 32'h0000_000C; // syscall (halt)
// Zero pad the rest so undefined memory reads stay quiet.
for (build_i = 19; build_i < 1024; build_i++)
dut.u_bios.mem[build_i] = 32'h0000_0000;
errors = 0;
saw_in_ready_low = 1'b0;
in_ready_low_cycles = 0;
saw_raster_overflow = 1'b0;
rst_n = 1'b0;
core_go = 1'b0;
repeat (10) @(posedge clk);
rst_n = 1'b1;
repeat (4) @(posedge clk);
@(negedge clk); core_go = 1'b1;
@(negedge clk); core_go = 1'b0;
// Wait for the EE bootlet to finish + DMAC/GIF/raster to drain.
wait (core_halt == 1'b1);
repeat (4) @(posedge clk);
wait (dma_done_seen == 1'b1);
repeat (10) @(posedge clk);
if (dut.xfer_busy == 1'b1) wait (dut.xfer_busy == 1'b0);
if (dut.u_gs.raster_active == 1'b1) wait (dut.u_gs.raster_active == 1'b0);
repeat (40) @(posedge clk);
// -----------------------------------------------------
// Acceptance check 1 — backpressure was observed.
// -----------------------------------------------------
if (!saw_in_ready_low) begin
$error("[bp] in_ready never went low — backpressure did not assert despite 8 sprites in flight");
errors = errors + 1;
end
if (in_ready_low_cycles == 0) begin
// Same as above but counted explicitly so the
// pass-message can report the stall duration.
errors = errors + 1;
end
// -----------------------------------------------------
// Acceptance check 2 — no raster_overflow.
// -----------------------------------------------------
if (saw_raster_overflow) begin
$error("[bp] raster_overflow latched during stress — at least one primitive was dropped");
errors = errors + 1;
end
// -----------------------------------------------------
// Acceptance check 3 — every sprite's center pixel
// landed in VRAM with the right color.
// -----------------------------------------------------
for (build_s = 0; build_s < N_SPRITES; build_s++) begin
probe_x_v = sprite_x0(build_s) + SPRITE_W/2;
probe_y_v = sprite_y0(build_s) + SPRITE_H/2;
probe_addr = ref_addr_psmct32(probe_x_v, probe_y_v);
probe_expected = {8'hFF, sprite_b(build_s), sprite_g(build_s), sprite_r(build_s)};
probe_got = dut.u_vram.mem[probe_addr >> 2];
if (probe_got !== probe_expected) begin
$error("[bp] sprite %0d center (%0d,%0d) got 0x%08x expected 0x%08x",
build_s, probe_x_v, probe_y_v, probe_got, probe_expected);
errors = errors + 1;
end
end
// -----------------------------------------------------
// Status latches.
// -----------------------------------------------------
if (!core_halt) begin $error("[bp] core_halt low at end"); errors = errors + 1; end
if (!dma_done_seen) begin $error("[bp] dma_done_seen never latched"); errors = errors + 1; end
if (!frame_seen) begin $error("[bp] frame_seen never latched"); errors = errors + 1; end
$display("[tb_gs_raster_backpressure_stress] sprites=%0d FIFO_DEPTH=%0d in_ready_low_cycles=%0d errors=%0d",
N_SPRITES, dut.u_gs.FIFO_DEPTH, in_ready_low_cycles, errors);
if (errors == 0) $display("[tb_gs_raster_backpressure_stress] PASS");
else $display("[tb_gs_raster_backpressure_stress] FAIL");
$finish;
end
// Watchdog — 8 sprites × 1200 raster cycles each + scanout + slack.
initial begin
#30_000_000;
$error("[tb_gs_raster_backpressure_stress] TIMEOUT");
$finish;
end
endmodule : tb_gs_raster_backpressure_stress