ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
363 lines
17 KiB
Systemverilog
363 lines
17 KiB
Systemverilog
// retroDE_ps2 — tb_gs_raster_backpressure_stress
|
||
// =============================================================================
|
||
// Ch172 backpressure acceptance TB. Demonstrates that:
|
||
//
|
||
// 1. A GIF stream with more sprites than the raster command FIFO depth
|
||
// now completes WITHOUT dropping any primitive (in Ch171 this would
|
||
// have been a `push_drop` silent loss).
|
||
// 2. While the FIFO is full, `gif_in_ready` (= `dmac_gif_ready`) goes
|
||
// LOW for at least one cycle — the upstream stall actually happens.
|
||
// 3. The final VRAM contents match the per-sprite expected colors at
|
||
// every probed coordinate.
|
||
// 4. `raster_overflow` stays LOW the entire run.
|
||
// 5. The DMA still asserts done after backpressure clears.
|
||
//
|
||
// Strategy: build an 8-SPRITE payload procedurally inside the TB (rather
|
||
// than `$readmemh` from baked .mem files) and shove it through the same
|
||
// `top_psmct32_raster_demo_bram` instance the board uses, with backpressure
|
||
// hooked up. 8 sprites × FIFO_DEPTH=2 means 6 of the 8 sprites would have
|
||
// dropped under Ch171's no-backpressure regime — under Ch172 backpressure
|
||
// all 8 complete.
|
||
//
|
||
// We use a smaller per-sprite raster footprint (40×30 pixels each, 8 sprites
|
||
// tiled across the 320×240 paint area) so the sim watchdog is comfortable.
|
||
// =============================================================================
|
||
|
||
`timescale 1ns/1ps
|
||
|
||
module tb_gs_raster_backpressure_stress;
|
||
|
||
// Match the board-wrapper Ch169 + Ch171 + Ch172 overrides.
|
||
localparam int H_ACTIVE = 640;
|
||
localparam int V_ACTIVE = 480;
|
||
localparam int H_FRONT = 16;
|
||
localparam int H_SYNC_W = 96;
|
||
localparam int H_BACK = 48;
|
||
localparam int V_FRONT = 10;
|
||
localparam int V_SYNC_W = 2;
|
||
localparam int V_BACK = 33;
|
||
localparam int VRAM_BYTES = 512 * 1024;
|
||
|
||
localparam int N_SPRITES = 8;
|
||
localparam int SPRITE_W = 40;
|
||
localparam int SPRITE_H = 30;
|
||
|
||
// Per-sprite tile layout: 4 across × 2 down, each tile 80×60 of the
|
||
// 320×240 paint region. Within each tile, fill the upper-left 40×30
|
||
// (the sprite) so adjacent tiles don't overlap.
|
||
function automatic int sprite_x0(input int s); return (s % 4) * 80; endfunction
|
||
function automatic int sprite_y0(input int s); return (s / 4) * 60; endfunction
|
||
function automatic int sprite_x1(input int s); return sprite_x0(s) + SPRITE_W - 1; endfunction
|
||
function automatic int sprite_y1(input int s); return sprite_y0(s) + SPRITE_H - 1; endfunction
|
||
|
||
// 8 distinct colors so every sprite is identifiable on a VRAM probe.
|
||
function automatic logic [7:0] sprite_r(input int s);
|
||
case (s)
|
||
0: return 8'hFF; 1: return 8'h00; 2: return 8'h00; 3: return 8'hFF;
|
||
4: return 8'hFF; 5: return 8'h00; 6: return 8'hFF; 7: return 8'h80;
|
||
default: return 8'h00;
|
||
endcase
|
||
endfunction
|
||
function automatic logic [7:0] sprite_g(input int s);
|
||
case (s)
|
||
0: return 8'h00; 1: return 8'hFF; 2: return 8'h00; 3: return 8'hFF;
|
||
4: return 8'h00; 5: return 8'hFF; 6: return 8'h80; 7: return 8'h80;
|
||
default: return 8'h00;
|
||
endcase
|
||
endfunction
|
||
function automatic logic [7:0] sprite_b(input int s);
|
||
case (s)
|
||
0: return 8'h00; 1: return 8'h00; 2: return 8'hFF; 3: return 8'h00;
|
||
4: return 8'hFF; 5: return 8'hFF; 6: return 8'h00; 7: return 8'h80;
|
||
default: return 8'h00;
|
||
endcase
|
||
endfunction
|
||
|
||
// =========================================================
|
||
// DUT
|
||
// =========================================================
|
||
logic clk;
|
||
logic rst_n;
|
||
initial clk = 1'b0;
|
||
always #5 clk = ~clk;
|
||
|
||
logic core_go;
|
||
logic [7:0] r, g, b;
|
||
logic hsync, vsync, de;
|
||
logic core_halt;
|
||
logic dma_done_seen;
|
||
logic frame_seen;
|
||
|
||
top_psmct32_raster_demo_bram #(
|
||
.H_ACTIVE (H_ACTIVE),
|
||
.V_ACTIVE (V_ACTIVE),
|
||
.H_FRONT (H_FRONT),
|
||
.H_SYNC (H_SYNC_W),
|
||
.H_BACK (H_BACK),
|
||
.V_FRONT (V_FRONT),
|
||
.V_SYNC (V_SYNC_W),
|
||
.V_BACK (V_BACK),
|
||
.VRAM_BYTES (VRAM_BYTES),
|
||
// Ch252 — mirror the hardware build profile (PSMCT32 only,
|
||
// no PSMT4 RMW) to clear the replication-tripwire $fatal.
|
||
.VRAM_ENABLE_READ2 (1'b0)
|
||
) dut (
|
||
// Ch255 — heartbeat override inputs are dormant in this
|
||
// stress TB; coverage of the override path itself lives in
|
||
// tb_top_psmct32_raster_demo_bram_ch171.
|
||
.joy_a_pressed_i (1'b0),
|
||
.joy_b_pressed_i (1'b0),
|
||
.clk(clk), .rst_n(rst_n),
|
||
.core_go(core_go),
|
||
.r(r), .g(g), .b(b),
|
||
.hsync(hsync), .vsync(vsync), .de(de),
|
||
.core_halt(core_halt),
|
||
.dma_done_seen(dma_done_seen),
|
||
.frame_seen(frame_seen)
|
||
);
|
||
|
||
// =========================================================
|
||
// Observers
|
||
// =========================================================
|
||
bit saw_in_ready_low; // backpressure asserted at least once
|
||
int in_ready_low_cycles; // total cycles in_ready was LOW
|
||
bit saw_raster_overflow; // should NEVER fire under Ch172
|
||
|
||
always_ff @(posedge clk) begin
|
||
if (rst_n) begin
|
||
if (!dut.u_gif.in_ready) begin
|
||
saw_in_ready_low <= 1'b1;
|
||
in_ready_low_cycles <= in_ready_low_cycles + 1;
|
||
end
|
||
if (dut.u_gs.raster_overflow)
|
||
saw_raster_overflow <= 1'b1;
|
||
end
|
||
end
|
||
|
||
// =========================================================
|
||
// PSMCT32 page/block address reference (FBW=5 for 320-wide
|
||
// framebuffer, matches Ch171 bake.py + board wrapper).
|
||
// =========================================================
|
||
function automatic int ref_block_idx_ct32(input int by, input int bx);
|
||
case ({by[1:0], bx[2:0]})
|
||
5'd0: return 0; 5'd1: return 1; 5'd2: return 4; 5'd3: return 5;
|
||
5'd4: return 16; 5'd5: return 17; 5'd6: return 20; 5'd7: return 21;
|
||
5'd8: return 2; 5'd9: return 3; 5'd10: return 6; 5'd11: return 7;
|
||
5'd12: return 18; 5'd13: return 19; 5'd14: return 22; 5'd15: return 23;
|
||
5'd16: return 8; 5'd17: return 9; 5'd18: return 12; 5'd19: return 13;
|
||
5'd20: return 24; 5'd21: return 25; 5'd22: return 28; 5'd23: return 29;
|
||
5'd24: return 10; 5'd25: return 11; 5'd26: return 14; 5'd27: return 15;
|
||
5'd28: return 26; 5'd29: return 27; 5'd30: return 30; default: return 31;
|
||
endcase
|
||
endfunction
|
||
function automatic int ref_addr_psmct32(input int x_v, input int y_v);
|
||
int page_x, page_y, page_idx, page_base;
|
||
int by, bx, blk_idx, xb, yb;
|
||
page_x = x_v / 64;
|
||
page_y = y_v / 32;
|
||
page_idx = page_y * 5 + page_x; // fbw=5
|
||
page_base = page_idx * 8192;
|
||
by = (y_v % 32) / 8;
|
||
bx = (x_v % 64) / 8;
|
||
blk_idx = ref_block_idx_ct32(by, bx);
|
||
xb = x_v % 8;
|
||
yb = y_v % 8;
|
||
return page_base + blk_idx * 256 + yb * 32 + xb * 4;
|
||
endfunction
|
||
|
||
// =========================================================
|
||
// Procedural bootlet + payload preload. The board path uses
|
||
// $readmemh; here we bypass that by writing directly into the
|
||
// ee_ram and bios_rom backings via hierarchical refs at time 0
|
||
// (before reset deasserts). This keeps the TB self-contained:
|
||
// 8-sprite GIF payload built in SystemVerilog without a baked
|
||
// .mem.
|
||
//
|
||
// Module-scope ints for the build loop (iverilog 12 quirk —
|
||
// declarations-with-initializers inside loop bodies silently
|
||
// zero-out; Ch171 caught this in tb_top_..._bram_ch171).
|
||
// =========================================================
|
||
localparam int QWC_TOTAL = N_SPRITES * 6; // 6 qwords per sprite (1 tag + 5 A+D)
|
||
|
||
int build_i;
|
||
int build_qw_base;
|
||
int build_s;
|
||
int build_eop;
|
||
int probe_x_v, probe_y_v, probe_addr;
|
||
logic [31:0] probe_got, probe_expected;
|
||
int errors;
|
||
|
||
// Helper: assemble a 128-bit qword via SystemVerilog concatenation.
|
||
function automatic logic [127:0] mk_giftag(input int nloop, input int eop);
|
||
// {regs[63:0]=0xE_EEEE, NREG=5 in [63:60], FLG=0 in [59:58],
|
||
// 42'd0, EOP at bit 15, NLOOP at [14:0]} -> lower 64 bits
|
||
// Upper 64 bits = REGS = 5 nibbles of 0xE.
|
||
logic [63:0] lower;
|
||
lower = '0;
|
||
lower[63:60] = 4'd5; // NREG = 5
|
||
lower[59:58] = 2'd0; // FLG = PACKED
|
||
lower[15] = eop[0]; // EOP
|
||
lower[14:0] = nloop[14:0]; // NLOOP
|
||
return {64'h0000_0000_000E_EEEE, lower};
|
||
endfunction
|
||
function automatic logic [127:0] mk_aplusd(input int reg_num, input logic [63:0] data64);
|
||
return { 56'd0, reg_num[7:0], data64 };
|
||
endfunction
|
||
function automatic logic [63:0] mk_xyz2(input int x, input int y);
|
||
logic [63:0] v;
|
||
v = '0;
|
||
v[15:4] = x[11:0];
|
||
v[31:20] = y[11:0];
|
||
return v;
|
||
endfunction
|
||
function automatic logic [63:0] mk_rgbaq(input logic [7:0] rr, input logic [7:0] gg, input logic [7:0] bb);
|
||
return {32'd0, 8'hFF, bb, gg, rr};
|
||
endfunction
|
||
|
||
// GIF register-number constants (match bake.py).
|
||
localparam int RN_PRIM = 8'h00;
|
||
localparam int RN_RGBAQ = 8'h01;
|
||
localparam int RN_XYZ2 = 8'h05;
|
||
localparam int RN_FRAME_1 = 8'h4C;
|
||
localparam logic [63:0] FRAME_1_PSMCT32_FBW5 = 64'h0000_0000_0005_0000; // FBW=5 in bits[21:16]
|
||
localparam logic [63:0] PRIM_SPRITE = 64'd6;
|
||
|
||
// EE bootlet: same shape as bake.py's Ch171 bootlet (with
|
||
// DISPLAY1_hi for 320×240, DISPFB1 FBW=5, kick DMAC ch2 with
|
||
// QWC = N_SPRITES * 6). Encoded as 32-bit MIPS words.
|
||
function automatic logic [31:0] enc_lui(input int rt, input int imm);
|
||
return (32'h0F << 26) | (rt[4:0] << 16) | imm[15:0];
|
||
endfunction
|
||
function automatic logic [31:0] enc_ori(input int rt, input int rs, input int imm);
|
||
return (32'h0D << 26) | (rs[4:0] << 21) | (rt[4:0] << 16) | imm[15:0];
|
||
endfunction
|
||
function automatic logic [31:0] enc_sw(input int rt, input int rs, input int imm);
|
||
return (32'h2B << 26) | (rs[4:0] << 21) | (rt[4:0] << 16) | imm[15:0];
|
||
endfunction
|
||
|
||
initial begin
|
||
// ---- Build the 8-sprite payload directly into ee_ram_stub ----
|
||
// PAYLOAD_MADR = 0x100 = qword index 16 (16-byte qwords).
|
||
build_qw_base = 16;
|
||
for (build_s = 0; build_s < N_SPRITES; build_s++) begin
|
||
build_eop = (build_s == N_SPRITES - 1) ? 1 : 0;
|
||
dut.u_ram.mem[build_qw_base + build_s*6 + 0] = mk_giftag(1, build_eop);
|
||
dut.u_ram.mem[build_qw_base + build_s*6 + 1] = mk_aplusd(RN_PRIM, PRIM_SPRITE);
|
||
dut.u_ram.mem[build_qw_base + build_s*6 + 2] = mk_aplusd(RN_FRAME_1, FRAME_1_PSMCT32_FBW5);
|
||
dut.u_ram.mem[build_qw_base + build_s*6 + 3] = mk_aplusd(RN_RGBAQ,
|
||
mk_rgbaq(sprite_r(build_s), sprite_g(build_s), sprite_b(build_s)));
|
||
dut.u_ram.mem[build_qw_base + build_s*6 + 4] = mk_aplusd(RN_XYZ2,
|
||
mk_xyz2(sprite_x0(build_s), sprite_y0(build_s)));
|
||
dut.u_ram.mem[build_qw_base + build_s*6 + 5] = mk_aplusd(RN_XYZ2,
|
||
mk_xyz2(sprite_x1(build_s), sprite_y1(build_s)));
|
||
end
|
||
|
||
// ---- Build the EE bootlet directly into bios_rom_stub ----
|
||
// (Mirrors bake.py's bootlet_for_display1_hi() with FBW=5 +
|
||
// DW=319/DH=239 and QWC=N_SPRITES*6.)
|
||
dut.u_bios.mem[0] = enc_lui( 1, 16'h1200); // r1 = 0x1200_0000
|
||
dut.u_bios.mem[1] = enc_lui( 2, 16'h0000); // r2 = 0
|
||
dut.u_bios.mem[2] = enc_ori( 2, 2, 16'h0A00); // DISPFB1 = FBW=5, PSM=PSMCT32
|
||
dut.u_bios.mem[3] = enc_sw ( 2, 1, 16'h0070); // *DISPFB1 = r2
|
||
dut.u_bios.mem[4] = enc_sw ( 0, 1, 16'h0080); // *DISPLAY1_lo = 0
|
||
dut.u_bios.mem[5] = enc_lui( 2, 16'h000E); // DISPLAY1_hi upper
|
||
dut.u_bios.mem[6] = enc_ori( 2, 2, 16'hF13F); // DISPLAY1_hi lower (DW=319/DH=239)
|
||
dut.u_bios.mem[7] = enc_sw ( 2, 1, 16'h0084); // *DISPLAY1_hi = r2
|
||
dut.u_bios.mem[8] = enc_ori( 2, 0, 16'h0001); // r2 = PMODE.EN1
|
||
dut.u_bios.mem[9] = enc_sw ( 2, 1, 16'h0000); // *PMODE = r2
|
||
dut.u_bios.mem[10] = enc_lui(10, 16'h1000); // r10 = 0x1000_0000
|
||
dut.u_bios.mem[11] = enc_ori(10, 10, 16'hA000); // r10 = DMAC ch2 base
|
||
dut.u_bios.mem[12] = enc_ori(11, 0, 16'h0100); // r11 = PAYLOAD_MADR
|
||
dut.u_bios.mem[13] = enc_sw (11, 10, 16'h0010); // *MADR = r11
|
||
dut.u_bios.mem[14] = enc_ori(11, 0, QWC_TOTAL[15:0]); // r11 = QWC = N_SPRITES*6
|
||
dut.u_bios.mem[15] = enc_sw (11, 10, 16'h0020); // *QWC = r11
|
||
dut.u_bios.mem[16] = enc_ori(11, 0, 16'h0001); // r11 = CHCR.start
|
||
dut.u_bios.mem[17] = enc_sw (11, 10, 16'h0000); // *CHCR = r11
|
||
dut.u_bios.mem[18] = 32'h0000_000C; // syscall (halt)
|
||
// Zero pad the rest so undefined memory reads stay quiet.
|
||
for (build_i = 19; build_i < 1024; build_i++)
|
||
dut.u_bios.mem[build_i] = 32'h0000_0000;
|
||
|
||
errors = 0;
|
||
saw_in_ready_low = 1'b0;
|
||
in_ready_low_cycles = 0;
|
||
saw_raster_overflow = 1'b0;
|
||
rst_n = 1'b0;
|
||
core_go = 1'b0;
|
||
repeat (10) @(posedge clk);
|
||
rst_n = 1'b1;
|
||
repeat (4) @(posedge clk);
|
||
|
||
@(negedge clk); core_go = 1'b1;
|
||
@(negedge clk); core_go = 1'b0;
|
||
|
||
// Wait for the EE bootlet to finish + DMAC/GIF/raster to drain.
|
||
wait (core_halt == 1'b1);
|
||
repeat (4) @(posedge clk);
|
||
wait (dma_done_seen == 1'b1);
|
||
repeat (10) @(posedge clk);
|
||
if (dut.xfer_busy == 1'b1) wait (dut.xfer_busy == 1'b0);
|
||
if (dut.u_gs.raster_active == 1'b1) wait (dut.u_gs.raster_active == 1'b0);
|
||
repeat (40) @(posedge clk);
|
||
|
||
// -----------------------------------------------------
|
||
// Acceptance check 1 — backpressure was observed.
|
||
// -----------------------------------------------------
|
||
if (!saw_in_ready_low) begin
|
||
$error("[bp] in_ready never went low — backpressure did not assert despite 8 sprites in flight");
|
||
errors = errors + 1;
|
||
end
|
||
if (in_ready_low_cycles == 0) begin
|
||
// Same as above but counted explicitly so the
|
||
// pass-message can report the stall duration.
|
||
errors = errors + 1;
|
||
end
|
||
|
||
// -----------------------------------------------------
|
||
// Acceptance check 2 — no raster_overflow.
|
||
// -----------------------------------------------------
|
||
if (saw_raster_overflow) begin
|
||
$error("[bp] raster_overflow latched during stress — at least one primitive was dropped");
|
||
errors = errors + 1;
|
||
end
|
||
|
||
// -----------------------------------------------------
|
||
// Acceptance check 3 — every sprite's center pixel
|
||
// landed in VRAM with the right color.
|
||
// -----------------------------------------------------
|
||
for (build_s = 0; build_s < N_SPRITES; build_s++) begin
|
||
probe_x_v = sprite_x0(build_s) + SPRITE_W/2;
|
||
probe_y_v = sprite_y0(build_s) + SPRITE_H/2;
|
||
probe_addr = ref_addr_psmct32(probe_x_v, probe_y_v);
|
||
probe_expected = {8'hFF, sprite_b(build_s), sprite_g(build_s), sprite_r(build_s)};
|
||
probe_got = dut.u_vram.mem[probe_addr >> 2];
|
||
if (probe_got !== probe_expected) begin
|
||
$error("[bp] sprite %0d center (%0d,%0d) got 0x%08x expected 0x%08x",
|
||
build_s, probe_x_v, probe_y_v, probe_got, probe_expected);
|
||
errors = errors + 1;
|
||
end
|
||
end
|
||
|
||
// -----------------------------------------------------
|
||
// Status latches.
|
||
// -----------------------------------------------------
|
||
if (!core_halt) begin $error("[bp] core_halt low at end"); errors = errors + 1; end
|
||
if (!dma_done_seen) begin $error("[bp] dma_done_seen never latched"); errors = errors + 1; end
|
||
if (!frame_seen) begin $error("[bp] frame_seen never latched"); errors = errors + 1; end
|
||
|
||
$display("[tb_gs_raster_backpressure_stress] sprites=%0d FIFO_DEPTH=%0d in_ready_low_cycles=%0d errors=%0d",
|
||
N_SPRITES, dut.u_gs.FIFO_DEPTH, in_ready_low_cycles, errors);
|
||
if (errors == 0) $display("[tb_gs_raster_backpressure_stress] PASS");
|
||
else $display("[tb_gs_raster_backpressure_stress] FAIL");
|
||
$finish;
|
||
end
|
||
|
||
// Watchdog — 8 sprites × 1200 raster cycles each + scanout + slack.
|
||
initial begin
|
||
#30_000_000;
|
||
$error("[tb_gs_raster_backpressure_stress] TIMEOUT");
|
||
$finish;
|
||
end
|
||
|
||
endmodule : tb_gs_raster_backpressure_stress
|