ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
232 lines
12 KiB
Systemverilog
232 lines
12 KiB
Systemverilog
// retroDE_ps2 — gs_swizzle_psmct16_stub (Ch125)
|
||
//
|
||
// Pure-combinational PSMCT16 page/block/column swizzle: maps a
|
||
// pixel coordinate (x, y) within a framebuffer at (FBP, FBW) to
|
||
// its physical VRAM byte address using the real PS2 GS PSMCT16
|
||
// layout. Mirrors Ch119's `gs_swizzle_psmct32_stub` shape but
|
||
// with PSMCT16's 4-cols × 8-rows page block grid (represented
|
||
// as `blockTable16[8][4]` indexed `[block_y][block_x]`) and the
|
||
// within-block column table that PSMCT32 didn't need (CT32
|
||
// within-block IS row-major halfwords by accident; CT16 is not).
|
||
//
|
||
// THIS MODULE IS NOT YET WIRED INTO gs_pcrtc_stub /
|
||
// gif_image_xfer_stub / gs_stub. Future chapters will wire it
|
||
// behind a `PSMCT16_SWIZZLE`-style parameter gate, mirroring the
|
||
// PSMCT32 progression (Ch120 read-side → Ch121 image-xfer write
|
||
// side → Ch122 raster write side → Ch123/Ch124 e2e demos).
|
||
// Default-off keeps the legacy linear PSMCT16 TBs (Ch94, Ch95,
|
||
// Ch103, Ch116) on the linear path.
|
||
//
|
||
// SOURCE-TABLE PROVENANCE (per Codex's Ch125 guidance):
|
||
// blockTable16 — pcsx2/GS/GSTables.cpp lines 29–39, master
|
||
// HEAD commit 3d71e310 (file-touch commit
|
||
// d983b2b0, 2026-01-12). 8 rows × 4 cols,
|
||
// indexed [block_y][block_x].
|
||
// columnTable16 — pcsx2/GS/GSTables.cpp lines 91–109, same
|
||
// commit. 8 rows × 16 cols, indexed [yb][xb],
|
||
// values are halfword-within-block (0..127).
|
||
// Cross-check — older GSdx (Debian pcsx2 1.5.0~gfc1d9aef0)
|
||
// PixelAddressOrg16(x, y, bp, bw) =
|
||
// (BlockNumber16(x, y, bp, bw) << 7) +
|
||
// columnTable16[y & 7][x & 15], with
|
||
// BlockNumber16 = bp + ((y>>1) & ~0x1f)*bw +
|
||
// ((x>>1) & ~0x1f) +
|
||
// blockTable16[(y>>3)&7][(x>>4)&3].
|
||
// The `<< 7` confirms columnTable16 is in
|
||
// halfword units (block = 128 halfwords).
|
||
// Multiply final value by 2 for byte address.
|
||
// PCSX2's `bp` is in 256-byte block-pointer
|
||
// units; in our FBP (2048-byte) units,
|
||
// bp = FBP * 8, so bp*256 = FBP*2048.
|
||
//
|
||
// NOTE on PCSX2 license: the PCSX2 project is GPL-3.0+. This
|
||
// stub re-expresses the same PSMCT16 swizzle math in
|
||
// SystemVerilog as a hardware contract — the values in the
|
||
// blockTable16 / columnTable16 case statements come from PCSX2
|
||
// source and represent the PS2 hardware layout itself (not
|
||
// PCSX2-original creative content). The retroDE_ps2 project
|
||
// authors should consider whether this provenance affects
|
||
// licensing for downstream consumers; from an engineering
|
||
// correctness standpoint, locking against the canonical source
|
||
// is the only way to be byte-accurate to real PS2 VRAM.
|
||
//
|
||
// Real PS2 PSMCT16 layout:
|
||
// - VRAM is 4 MiB total, organized in 8 KiB pages.
|
||
// - Each page is 64×64 PSMCT16 pixels (= 64*64*2 = 8192 bytes).
|
||
// 2× as many pixels per page as PSMCT32 (which has 64×32 px)
|
||
// because each PSMCT16 pixel is 2 bytes vs CT32's 4.
|
||
// - Each page is divided into a 4×8 grid of blocks (4 cols of
|
||
// blocks across, 8 rows down). Each block is 16×8 PSMCT16
|
||
// pixels (= 16*8*2 = 256 bytes). 4×8 = 32 blocks/page.
|
||
// - Block ordering within a page follows blockTable16, which
|
||
// differs from blockTable32 because the grid shape is
|
||
// different (8×4 vs 4×8).
|
||
// - Within a block, halfword placement follows columnTable16:
|
||
// a 16×8 → 128-entry permutation that organizes the 4
|
||
// internal 16×2-pixel sub-columns and interleaves the two
|
||
// pixel rows per sub-column.
|
||
//
|
||
// Address formula (FBP in 2048-byte units; FBW in 64-pixel
|
||
// units; addr in bytes):
|
||
// page_x = x / 64
|
||
// page_y = y / 64
|
||
// page_index = page_y * FBW + page_x
|
||
// page_base = FBP*2048 + page_index*8192
|
||
//
|
||
// block_x_in_page = (x % 64) / 16 // 0..3
|
||
// block_y_in_page = (y % 64) / 8 // 0..7
|
||
// block_idx = blockTable16[block_y_in_page][block_x_in_page]
|
||
// block_base = page_base + block_idx*256
|
||
//
|
||
// xb = x % 16
|
||
// yb = y % 8
|
||
// hw_idx = columnTable16[yb][xb] // 0..127
|
||
// addr = block_base + hw_idx*2
|
||
|
||
`timescale 1ns/1ps
|
||
|
||
module gs_swizzle_psmct16_stub
|
||
(
|
||
input logic [8:0] fbp, // FBP — frame base, in 2048-byte units
|
||
input logic [5:0] fbw, // FBW — frame width, in 64-pixel units
|
||
input logic [11:0] x,
|
||
input logic [11:0] y,
|
||
output logic [31:0] addr
|
||
);
|
||
|
||
// --------------------------------------------------------------
|
||
// blockTable16 (verbatim from pcsx2/GS/GSTables.cpp lines 29–39).
|
||
// Indexed [block_y_in_page (0..7)][block_x_in_page (0..3)].
|
||
// --------------------------------------------------------------
|
||
function automatic logic [4:0] swizzle_psmct16(
|
||
input logic [2:0] by,
|
||
input logic [1:0] bx);
|
||
case ({by, bx})
|
||
5'd0: return 5'd0; // (0,0)
|
||
5'd1: return 5'd2; // (0,1)
|
||
5'd2: return 5'd8; // (0,2)
|
||
5'd3: return 5'd10; // (0,3)
|
||
5'd4: return 5'd1; // (1,0)
|
||
5'd5: return 5'd3; // (1,1)
|
||
5'd6: return 5'd9; // (1,2)
|
||
5'd7: return 5'd11; // (1,3)
|
||
5'd8: return 5'd4; // (2,0)
|
||
5'd9: return 5'd6; // (2,1)
|
||
5'd10: return 5'd12; // (2,2)
|
||
5'd11: return 5'd14; // (2,3)
|
||
5'd12: return 5'd5; // (3,0)
|
||
5'd13: return 5'd7; // (3,1)
|
||
5'd14: return 5'd13; // (3,2)
|
||
5'd15: return 5'd15; // (3,3)
|
||
5'd16: return 5'd16; // (4,0)
|
||
5'd17: return 5'd18; // (4,1)
|
||
5'd18: return 5'd24; // (4,2)
|
||
5'd19: return 5'd26; // (4,3)
|
||
5'd20: return 5'd17; // (5,0)
|
||
5'd21: return 5'd19; // (5,1)
|
||
5'd22: return 5'd25; // (5,2)
|
||
5'd23: return 5'd27; // (5,3)
|
||
5'd24: return 5'd20; // (6,0)
|
||
5'd25: return 5'd22; // (6,1)
|
||
5'd26: return 5'd28; // (6,2)
|
||
5'd27: return 5'd30; // (6,3)
|
||
5'd28: return 5'd21; // (7,0)
|
||
5'd29: return 5'd23; // (7,1)
|
||
5'd30: return 5'd29; // (7,2)
|
||
default: return 5'd31; // (7,3)
|
||
endcase
|
||
endfunction
|
||
|
||
// --------------------------------------------------------------
|
||
// columnTable16 (verbatim from pcsx2/GS/GSTables.cpp lines 91–109).
|
||
// Indexed [yb (0..7)][xb (0..15)] → halfword-within-block 0..127.
|
||
// yb=0: 0 2 8 10 16 18 24 26 1 3 9 11 17 19 25 27
|
||
// yb=1: 4 6 12 14 20 22 28 30 5 7 13 15 21 23 29 31
|
||
// yb=2: 32 34 40 42 48 50 56 58 33 35 41 43 49 51 57 59
|
||
// yb=3: 36 38 44 46 52 54 60 62 37 39 45 47 53 55 61 63
|
||
// yb=4: 64 66 72 74 80 82 88 90 65 67 73 75 81 83 89 91
|
||
// yb=5: 68 70 76 78 84 86 92 94 69 71 77 79 85 87 93 95
|
||
// yb=6: 96 98 104 106 112 114 120 122 97 99 105 107 113 115 121 123
|
||
// yb=7: 100 102 108 110 116 118 124 126 101 103 109 111 117 119 125 127
|
||
// --------------------------------------------------------------
|
||
function automatic logic [6:0] col_idx_psmct16(
|
||
input logic [2:0] yb,
|
||
input logic [3:0] xb);
|
||
case ({yb, xb})
|
||
// yb=0
|
||
7'd0: return 7'd0; 7'd1: return 7'd2; 7'd2: return 7'd8; 7'd3: return 7'd10;
|
||
7'd4: return 7'd16; 7'd5: return 7'd18; 7'd6: return 7'd24; 7'd7: return 7'd26;
|
||
7'd8: return 7'd1; 7'd9: return 7'd3; 7'd10: return 7'd9; 7'd11: return 7'd11;
|
||
7'd12: return 7'd17; 7'd13: return 7'd19; 7'd14: return 7'd25; 7'd15: return 7'd27;
|
||
// yb=1
|
||
7'd16: return 7'd4; 7'd17: return 7'd6; 7'd18: return 7'd12; 7'd19: return 7'd14;
|
||
7'd20: return 7'd20; 7'd21: return 7'd22; 7'd22: return 7'd28; 7'd23: return 7'd30;
|
||
7'd24: return 7'd5; 7'd25: return 7'd7; 7'd26: return 7'd13; 7'd27: return 7'd15;
|
||
7'd28: return 7'd21; 7'd29: return 7'd23; 7'd30: return 7'd29; 7'd31: return 7'd31;
|
||
// yb=2
|
||
7'd32: return 7'd32; 7'd33: return 7'd34; 7'd34: return 7'd40; 7'd35: return 7'd42;
|
||
7'd36: return 7'd48; 7'd37: return 7'd50; 7'd38: return 7'd56; 7'd39: return 7'd58;
|
||
7'd40: return 7'd33; 7'd41: return 7'd35; 7'd42: return 7'd41; 7'd43: return 7'd43;
|
||
7'd44: return 7'd49; 7'd45: return 7'd51; 7'd46: return 7'd57; 7'd47: return 7'd59;
|
||
// yb=3
|
||
7'd48: return 7'd36; 7'd49: return 7'd38; 7'd50: return 7'd44; 7'd51: return 7'd46;
|
||
7'd52: return 7'd52; 7'd53: return 7'd54; 7'd54: return 7'd60; 7'd55: return 7'd62;
|
||
7'd56: return 7'd37; 7'd57: return 7'd39; 7'd58: return 7'd45; 7'd59: return 7'd47;
|
||
7'd60: return 7'd53; 7'd61: return 7'd55; 7'd62: return 7'd61; 7'd63: return 7'd63;
|
||
// yb=4
|
||
7'd64: return 7'd64; 7'd65: return 7'd66; 7'd66: return 7'd72; 7'd67: return 7'd74;
|
||
7'd68: return 7'd80; 7'd69: return 7'd82; 7'd70: return 7'd88; 7'd71: return 7'd90;
|
||
7'd72: return 7'd65; 7'd73: return 7'd67; 7'd74: return 7'd73; 7'd75: return 7'd75;
|
||
7'd76: return 7'd81; 7'd77: return 7'd83; 7'd78: return 7'd89; 7'd79: return 7'd91;
|
||
// yb=5
|
||
7'd80: return 7'd68; 7'd81: return 7'd70; 7'd82: return 7'd76; 7'd83: return 7'd78;
|
||
7'd84: return 7'd84; 7'd85: return 7'd86; 7'd86: return 7'd92; 7'd87: return 7'd94;
|
||
7'd88: return 7'd69; 7'd89: return 7'd71; 7'd90: return 7'd77; 7'd91: return 7'd79;
|
||
7'd92: return 7'd85; 7'd93: return 7'd87; 7'd94: return 7'd93; 7'd95: return 7'd95;
|
||
// yb=6
|
||
7'd96: return 7'd96; 7'd97: return 7'd98; 7'd98: return 7'd104; 7'd99: return 7'd106;
|
||
7'd100: return 7'd112; 7'd101: return 7'd114; 7'd102: return 7'd120; 7'd103: return 7'd122;
|
||
7'd104: return 7'd97; 7'd105: return 7'd99; 7'd106: return 7'd105; 7'd107: return 7'd107;
|
||
7'd108: return 7'd113; 7'd109: return 7'd115; 7'd110: return 7'd121; 7'd111: return 7'd123;
|
||
// yb=7
|
||
7'd112: return 7'd100; 7'd113: return 7'd102; 7'd114: return 7'd108; 7'd115: return 7'd110;
|
||
7'd116: return 7'd116; 7'd117: return 7'd118; 7'd118: return 7'd124; 7'd119: return 7'd126;
|
||
7'd120: return 7'd101; 7'd121: return 7'd103; 7'd122: return 7'd109; 7'd123: return 7'd111;
|
||
7'd124: return 7'd117; 7'd125: return 7'd119; 7'd126: return 7'd125; default: return 7'd127;
|
||
endcase
|
||
endfunction
|
||
|
||
// Decompose pixel coord into page / block / pixel-in-block.
|
||
logic [11:0] page_x;
|
||
logic [11:0] page_y;
|
||
logic [2:0] by;
|
||
logic [1:0] bx;
|
||
logic [3:0] xb;
|
||
logic [2:0] yb;
|
||
|
||
assign page_x = x >> 6; // x / 64
|
||
assign page_y = y >> 6; // y / 64
|
||
assign by = y[5:3]; // (y % 64) / 8
|
||
assign bx = x[5:4]; // (x % 64) / 16
|
||
assign xb = x[3:0]; // x % 16
|
||
assign yb = y[2:0]; // y % 8
|
||
|
||
logic [4:0] block_idx;
|
||
assign block_idx = swizzle_psmct16(by, bx);
|
||
|
||
logic [6:0] hw_idx;
|
||
assign hw_idx = col_idx_psmct16(yb, xb);
|
||
|
||
logic [31:0] page_base;
|
||
logic [31:0] block_base;
|
||
logic [31:0] byte_in_block;
|
||
logic [31:0] page_index;
|
||
assign page_index = ({20'd0, page_y} * {26'd0, fbw}) + {20'd0, page_x};
|
||
assign page_base = ({23'd0, fbp} << 11) + (page_index << 13); // FBP*2048 + page_index*8192
|
||
assign block_base = page_base + ({27'd0, block_idx} << 8); // + block_idx*256
|
||
assign byte_in_block = {24'd0, hw_idx, 1'b0}; // hw_idx * 2
|
||
|
||
assign addr = block_base + byte_in_block;
|
||
|
||
endmodule : gs_swizzle_psmct16_stub
|