Files
retroDE_ps2/rtl/gif_gs/gs_swizzle_psmct16_stub.sv
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

232 lines
12 KiB
Systemverilog
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// retroDE_ps2 — gs_swizzle_psmct16_stub (Ch125)
//
// Pure-combinational PSMCT16 page/block/column swizzle: maps a
// pixel coordinate (x, y) within a framebuffer at (FBP, FBW) to
// its physical VRAM byte address using the real PS2 GS PSMCT16
// layout. Mirrors Ch119's `gs_swizzle_psmct32_stub` shape but
// with PSMCT16's 4-cols × 8-rows page block grid (represented
// as `blockTable16[8][4]` indexed `[block_y][block_x]`) and the
// within-block column table that PSMCT32 didn't need (CT32
// within-block IS row-major halfwords by accident; CT16 is not).
//
// THIS MODULE IS NOT YET WIRED INTO gs_pcrtc_stub /
// gif_image_xfer_stub / gs_stub. Future chapters will wire it
// behind a `PSMCT16_SWIZZLE`-style parameter gate, mirroring the
// PSMCT32 progression (Ch120 read-side → Ch121 image-xfer write
// side → Ch122 raster write side → Ch123/Ch124 e2e demos).
// Default-off keeps the legacy linear PSMCT16 TBs (Ch94, Ch95,
// Ch103, Ch116) on the linear path.
//
// SOURCE-TABLE PROVENANCE (per Codex's Ch125 guidance):
// blockTable16 — pcsx2/GS/GSTables.cpp lines 2939, master
// HEAD commit 3d71e310 (file-touch commit
// d983b2b0, 2026-01-12). 8 rows × 4 cols,
// indexed [block_y][block_x].
// columnTable16 — pcsx2/GS/GSTables.cpp lines 91109, same
// commit. 8 rows × 16 cols, indexed [yb][xb],
// values are halfword-within-block (0..127).
// Cross-check — older GSdx (Debian pcsx2 1.5.0~gfc1d9aef0)
// PixelAddressOrg16(x, y, bp, bw) =
// (BlockNumber16(x, y, bp, bw) << 7) +
// columnTable16[y & 7][x & 15], with
// BlockNumber16 = bp + ((y>>1) & ~0x1f)*bw +
// ((x>>1) & ~0x1f) +
// blockTable16[(y>>3)&7][(x>>4)&3].
// The `<< 7` confirms columnTable16 is in
// halfword units (block = 128 halfwords).
// Multiply final value by 2 for byte address.
// PCSX2's `bp` is in 256-byte block-pointer
// units; in our FBP (2048-byte) units,
// bp = FBP * 8, so bp*256 = FBP*2048.
//
// NOTE on PCSX2 license: the PCSX2 project is GPL-3.0+. This
// stub re-expresses the same PSMCT16 swizzle math in
// SystemVerilog as a hardware contract — the values in the
// blockTable16 / columnTable16 case statements come from PCSX2
// source and represent the PS2 hardware layout itself (not
// PCSX2-original creative content). The retroDE_ps2 project
// authors should consider whether this provenance affects
// licensing for downstream consumers; from an engineering
// correctness standpoint, locking against the canonical source
// is the only way to be byte-accurate to real PS2 VRAM.
//
// Real PS2 PSMCT16 layout:
// - VRAM is 4 MiB total, organized in 8 KiB pages.
// - Each page is 64×64 PSMCT16 pixels (= 64*64*2 = 8192 bytes).
// 2× as many pixels per page as PSMCT32 (which has 64×32 px)
// because each PSMCT16 pixel is 2 bytes vs CT32's 4.
// - Each page is divided into a 4×8 grid of blocks (4 cols of
// blocks across, 8 rows down). Each block is 16×8 PSMCT16
// pixels (= 16*8*2 = 256 bytes). 4×8 = 32 blocks/page.
// - Block ordering within a page follows blockTable16, which
// differs from blockTable32 because the grid shape is
// different (8×4 vs 4×8).
// - Within a block, halfword placement follows columnTable16:
// a 16×8 → 128-entry permutation that organizes the 4
// internal 16×2-pixel sub-columns and interleaves the two
// pixel rows per sub-column.
//
// Address formula (FBP in 2048-byte units; FBW in 64-pixel
// units; addr in bytes):
// page_x = x / 64
// page_y = y / 64
// page_index = page_y * FBW + page_x
// page_base = FBP*2048 + page_index*8192
//
// block_x_in_page = (x % 64) / 16 // 0..3
// block_y_in_page = (y % 64) / 8 // 0..7
// block_idx = blockTable16[block_y_in_page][block_x_in_page]
// block_base = page_base + block_idx*256
//
// xb = x % 16
// yb = y % 8
// hw_idx = columnTable16[yb][xb] // 0..127
// addr = block_base + hw_idx*2
`timescale 1ns/1ps
module gs_swizzle_psmct16_stub
(
input logic [8:0] fbp, // FBP — frame base, in 2048-byte units
input logic [5:0] fbw, // FBW — frame width, in 64-pixel units
input logic [11:0] x,
input logic [11:0] y,
output logic [31:0] addr
);
// --------------------------------------------------------------
// blockTable16 (verbatim from pcsx2/GS/GSTables.cpp lines 2939).
// Indexed [block_y_in_page (0..7)][block_x_in_page (0..3)].
// --------------------------------------------------------------
function automatic logic [4:0] swizzle_psmct16(
input logic [2:0] by,
input logic [1:0] bx);
case ({by, bx})
5'd0: return 5'd0; // (0,0)
5'd1: return 5'd2; // (0,1)
5'd2: return 5'd8; // (0,2)
5'd3: return 5'd10; // (0,3)
5'd4: return 5'd1; // (1,0)
5'd5: return 5'd3; // (1,1)
5'd6: return 5'd9; // (1,2)
5'd7: return 5'd11; // (1,3)
5'd8: return 5'd4; // (2,0)
5'd9: return 5'd6; // (2,1)
5'd10: return 5'd12; // (2,2)
5'd11: return 5'd14; // (2,3)
5'd12: return 5'd5; // (3,0)
5'd13: return 5'd7; // (3,1)
5'd14: return 5'd13; // (3,2)
5'd15: return 5'd15; // (3,3)
5'd16: return 5'd16; // (4,0)
5'd17: return 5'd18; // (4,1)
5'd18: return 5'd24; // (4,2)
5'd19: return 5'd26; // (4,3)
5'd20: return 5'd17; // (5,0)
5'd21: return 5'd19; // (5,1)
5'd22: return 5'd25; // (5,2)
5'd23: return 5'd27; // (5,3)
5'd24: return 5'd20; // (6,0)
5'd25: return 5'd22; // (6,1)
5'd26: return 5'd28; // (6,2)
5'd27: return 5'd30; // (6,3)
5'd28: return 5'd21; // (7,0)
5'd29: return 5'd23; // (7,1)
5'd30: return 5'd29; // (7,2)
default: return 5'd31; // (7,3)
endcase
endfunction
// --------------------------------------------------------------
// columnTable16 (verbatim from pcsx2/GS/GSTables.cpp lines 91109).
// Indexed [yb (0..7)][xb (0..15)] → halfword-within-block 0..127.
// yb=0: 0 2 8 10 16 18 24 26 1 3 9 11 17 19 25 27
// yb=1: 4 6 12 14 20 22 28 30 5 7 13 15 21 23 29 31
// yb=2: 32 34 40 42 48 50 56 58 33 35 41 43 49 51 57 59
// yb=3: 36 38 44 46 52 54 60 62 37 39 45 47 53 55 61 63
// yb=4: 64 66 72 74 80 82 88 90 65 67 73 75 81 83 89 91
// yb=5: 68 70 76 78 84 86 92 94 69 71 77 79 85 87 93 95
// yb=6: 96 98 104 106 112 114 120 122 97 99 105 107 113 115 121 123
// yb=7: 100 102 108 110 116 118 124 126 101 103 109 111 117 119 125 127
// --------------------------------------------------------------
function automatic logic [6:0] col_idx_psmct16(
input logic [2:0] yb,
input logic [3:0] xb);
case ({yb, xb})
// yb=0
7'd0: return 7'd0; 7'd1: return 7'd2; 7'd2: return 7'd8; 7'd3: return 7'd10;
7'd4: return 7'd16; 7'd5: return 7'd18; 7'd6: return 7'd24; 7'd7: return 7'd26;
7'd8: return 7'd1; 7'd9: return 7'd3; 7'd10: return 7'd9; 7'd11: return 7'd11;
7'd12: return 7'd17; 7'd13: return 7'd19; 7'd14: return 7'd25; 7'd15: return 7'd27;
// yb=1
7'd16: return 7'd4; 7'd17: return 7'd6; 7'd18: return 7'd12; 7'd19: return 7'd14;
7'd20: return 7'd20; 7'd21: return 7'd22; 7'd22: return 7'd28; 7'd23: return 7'd30;
7'd24: return 7'd5; 7'd25: return 7'd7; 7'd26: return 7'd13; 7'd27: return 7'd15;
7'd28: return 7'd21; 7'd29: return 7'd23; 7'd30: return 7'd29; 7'd31: return 7'd31;
// yb=2
7'd32: return 7'd32; 7'd33: return 7'd34; 7'd34: return 7'd40; 7'd35: return 7'd42;
7'd36: return 7'd48; 7'd37: return 7'd50; 7'd38: return 7'd56; 7'd39: return 7'd58;
7'd40: return 7'd33; 7'd41: return 7'd35; 7'd42: return 7'd41; 7'd43: return 7'd43;
7'd44: return 7'd49; 7'd45: return 7'd51; 7'd46: return 7'd57; 7'd47: return 7'd59;
// yb=3
7'd48: return 7'd36; 7'd49: return 7'd38; 7'd50: return 7'd44; 7'd51: return 7'd46;
7'd52: return 7'd52; 7'd53: return 7'd54; 7'd54: return 7'd60; 7'd55: return 7'd62;
7'd56: return 7'd37; 7'd57: return 7'd39; 7'd58: return 7'd45; 7'd59: return 7'd47;
7'd60: return 7'd53; 7'd61: return 7'd55; 7'd62: return 7'd61; 7'd63: return 7'd63;
// yb=4
7'd64: return 7'd64; 7'd65: return 7'd66; 7'd66: return 7'd72; 7'd67: return 7'd74;
7'd68: return 7'd80; 7'd69: return 7'd82; 7'd70: return 7'd88; 7'd71: return 7'd90;
7'd72: return 7'd65; 7'd73: return 7'd67; 7'd74: return 7'd73; 7'd75: return 7'd75;
7'd76: return 7'd81; 7'd77: return 7'd83; 7'd78: return 7'd89; 7'd79: return 7'd91;
// yb=5
7'd80: return 7'd68; 7'd81: return 7'd70; 7'd82: return 7'd76; 7'd83: return 7'd78;
7'd84: return 7'd84; 7'd85: return 7'd86; 7'd86: return 7'd92; 7'd87: return 7'd94;
7'd88: return 7'd69; 7'd89: return 7'd71; 7'd90: return 7'd77; 7'd91: return 7'd79;
7'd92: return 7'd85; 7'd93: return 7'd87; 7'd94: return 7'd93; 7'd95: return 7'd95;
// yb=6
7'd96: return 7'd96; 7'd97: return 7'd98; 7'd98: return 7'd104; 7'd99: return 7'd106;
7'd100: return 7'd112; 7'd101: return 7'd114; 7'd102: return 7'd120; 7'd103: return 7'd122;
7'd104: return 7'd97; 7'd105: return 7'd99; 7'd106: return 7'd105; 7'd107: return 7'd107;
7'd108: return 7'd113; 7'd109: return 7'd115; 7'd110: return 7'd121; 7'd111: return 7'd123;
// yb=7
7'd112: return 7'd100; 7'd113: return 7'd102; 7'd114: return 7'd108; 7'd115: return 7'd110;
7'd116: return 7'd116; 7'd117: return 7'd118; 7'd118: return 7'd124; 7'd119: return 7'd126;
7'd120: return 7'd101; 7'd121: return 7'd103; 7'd122: return 7'd109; 7'd123: return 7'd111;
7'd124: return 7'd117; 7'd125: return 7'd119; 7'd126: return 7'd125; default: return 7'd127;
endcase
endfunction
// Decompose pixel coord into page / block / pixel-in-block.
logic [11:0] page_x;
logic [11:0] page_y;
logic [2:0] by;
logic [1:0] bx;
logic [3:0] xb;
logic [2:0] yb;
assign page_x = x >> 6; // x / 64
assign page_y = y >> 6; // y / 64
assign by = y[5:3]; // (y % 64) / 8
assign bx = x[5:4]; // (x % 64) / 16
assign xb = x[3:0]; // x % 16
assign yb = y[2:0]; // y % 8
logic [4:0] block_idx;
assign block_idx = swizzle_psmct16(by, bx);
logic [6:0] hw_idx;
assign hw_idx = col_idx_psmct16(yb, xb);
logic [31:0] page_base;
logic [31:0] block_base;
logic [31:0] byte_in_block;
logic [31:0] page_index;
assign page_index = ({20'd0, page_y} * {26'd0, fbw}) + {20'd0, page_x};
assign page_base = ({23'd0, fbp} << 11) + (page_index << 13); // FBP*2048 + page_index*8192
assign block_base = page_base + ({27'd0, block_idx} << 8); // + block_idx*256
assign byte_in_block = {24'd0, hw_idx, 1'b0}; // hw_idx * 2
assign addr = block_base + byte_in_block;
endmodule : gs_swizzle_psmct16_stub