ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
110 lines
4.2 KiB
Systemverilog
110 lines
4.2 KiB
Systemverilog
// retroDE_ps2 — gs_persp_uv (Ch301)
|
|
//
|
|
// Per-pixel PERSPECTIVE-CORRECT texture-coordinate divide. Given the three
|
|
// affinely-interpolated perspective attributes at a pixel —
|
|
//
|
|
// uq = (u/w) * 2**FRAC (u-over-w, fixed-point)
|
|
// vq = (v/w) * 2**FRAC (v-over-w, fixed-point)
|
|
// q = (1/w) * 2**FRAC (one-over-w, fixed-point)
|
|
//
|
|
// — this recovers the integer texel coordinates:
|
|
//
|
|
// w_recip = 1/q (= w, via the pipelined gs_reciprocal_stub LUT, NO divider)
|
|
// u_texel = (uq * w_recip) >> SCALE (= (u/w) * w = u)
|
|
// v_texel = (vq * w_recip) >> SCALE (= (v/w) * w = v)
|
|
//
|
|
// gs_reciprocal_stub returns recip = floor(2**SCALE / q). With q = (1/w)<<FRAC
|
|
// that is recip = w << (SCALE-FRAC). Then uq*recip = (u/w<<FRAC)*(w<<(SCALE-FRAC))
|
|
// = u << SCALE, so (uq*recip) >> SCALE = u. (The FRAC scaling cancels.)
|
|
//
|
|
// Pipeline (NO divider, ~1 result/cycle):
|
|
// recip: RLAT cycles (gs_reciprocal_stub, 3).
|
|
// uq/vq: delayed RLAT cycles to align with recip.
|
|
// mul: 1 cycle (uq*recip, vq*recip) + shift + clamp.
|
|
// total latency = RLAT + 1.
|
|
//
|
|
// Output texel coords are clamped to [0, TEXEL_MAX] (saturating), matching the
|
|
// integer-coord clamp the affine path already applies.
|
|
|
|
`timescale 1ns/1ps
|
|
|
|
module gs_persp_uv #(
|
|
parameter int ATTR_W = 24, // width of uq/vq ((u/w)<<FRAC)
|
|
parameter int Q_W = 24, // width of q ((1/w)<<FRAC)
|
|
parameter int FRAC = 12, // fixed-point fraction bits of the attributes
|
|
parameter int SCALE = 24, // gs_reciprocal scale (recip = floor(2**SCALE/q))
|
|
parameter int RECIP_W = 25,
|
|
parameter int TEXEL_W = 11,
|
|
parameter int TEXEL_MAX = 2047,
|
|
// Ch351 — reciprocal LUT mantissa width. Default 8 (256-entry) is byte-identical to Ch301/342/348.
|
|
// Far-W perspective draws (small Q at high PERSP_FRAC) want more: 11 (2048-entry) ~ 0.05% rel error.
|
|
parameter int RECIP_IDX_BITS = 8
|
|
) (
|
|
input logic clk,
|
|
input logic rst_n,
|
|
input logic in_valid,
|
|
input logic [ATTR_W-1:0] uq,
|
|
input logic [ATTR_W-1:0] vq,
|
|
input logic [Q_W-1:0] q,
|
|
output logic out_valid,
|
|
output logic [TEXEL_W-1:0] u,
|
|
output logic [TEXEL_W-1:0] v
|
|
);
|
|
localparam int RLAT = 3; // gs_reciprocal_stub latency
|
|
|
|
// --- reciprocal of q (= w), pipelined LUT, no divider ---
|
|
logic recip_valid;
|
|
logic [RECIP_W-1:0] w_recip;
|
|
gs_reciprocal_stub #(
|
|
.Q_W(Q_W), .IDX_BITS(RECIP_IDX_BITS), .SCALE(SCALE), .OUT_W(RECIP_W)
|
|
) u_recip (
|
|
.clk(clk), .rst_n(rst_n),
|
|
.in_valid(in_valid), .q(q),
|
|
.out_valid(recip_valid), .recip(w_recip)
|
|
);
|
|
|
|
// --- delay uq/vq by RLAT to align with w_recip ---
|
|
logic [ATTR_W-1:0] uq_pipe [0:RLAT-1];
|
|
logic [ATTR_W-1:0] vq_pipe [0:RLAT-1];
|
|
always_ff @(posedge clk or negedge rst_n) begin
|
|
if (!rst_n) begin
|
|
for (int i = 0; i < RLAT; i++) begin
|
|
uq_pipe[i] <= '0;
|
|
vq_pipe[i] <= '0;
|
|
end
|
|
end else begin
|
|
uq_pipe[0] <= uq;
|
|
vq_pipe[0] <= vq;
|
|
for (int i = 1; i < RLAT; i++) begin
|
|
uq_pipe[i] <= uq_pipe[i-1];
|
|
vq_pipe[i] <= vq_pipe[i-1];
|
|
end
|
|
end
|
|
end
|
|
|
|
// --- multiply + shift + clamp (1 reg stage) ---
|
|
localparam int PROD_W = ATTR_W + RECIP_W;
|
|
function automatic logic [TEXEL_W-1:0] clamp_texel(input logic [PROD_W-1:0] prod);
|
|
logic [PROD_W-1:0] shifted;
|
|
shifted = prod >> SCALE;
|
|
if (shifted > PROD_W'(TEXEL_MAX)) clamp_texel = TEXEL_W'(TEXEL_MAX);
|
|
else clamp_texel = shifted[TEXEL_W-1:0];
|
|
endfunction
|
|
|
|
always_ff @(posedge clk or negedge rst_n) begin
|
|
if (!rst_n) begin
|
|
out_valid <= 1'b0;
|
|
u <= '0;
|
|
v <= '0;
|
|
end else begin
|
|
logic [PROD_W-1:0] u_prod, v_prod;
|
|
out_valid <= recip_valid;
|
|
u_prod = uq_pipe[RLAT-1] * w_recip;
|
|
v_prod = vq_pipe[RLAT-1] * w_recip;
|
|
u <= clamp_texel(u_prod);
|
|
v <= clamp_texel(v_prod);
|
|
end
|
|
end
|
|
|
|
endmodule : gs_persp_uv
|