ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
128 lines
5.2 KiB
Systemverilog
128 lines
5.2 KiB
Systemverilog
// retroDE_ps2 — gs_reciprocal_stub (Ch301)
|
|
//
|
|
// Pipelined fixed-point reciprocal unit for PERSPECTIVE-CORRECT texture
|
|
// interpolation. Computes recip = floor(2**SCALE / q) for an unsigned input
|
|
// q, with NO divider in the datapath — a serialized per-pixel divide would
|
|
// stall the ~1-pixel/cycle rasterizer (the architect's explicit constraint).
|
|
//
|
|
// Method — range-reduced table lookup (classic LUT reciprocal):
|
|
// 1. e = position of q's most-significant set bit (0..Q_W-1).
|
|
// 2. M = q normalized to an IDX_BITS-wide mantissa with its MSB at the top
|
|
// (M in [2**(IDX_BITS-1) .. 2**IDX_BITS-1)), i.e. q ~= M * 2**(e-(IDX_BITS-1)).
|
|
// 3. recip = LUT[M] >> e, where LUT[M] = floor(2**(SCALE+IDX_BITS-1) / M).
|
|
// Proof: LUT[M] >> e ~= 2**(SCALE+IDX_BITS-1)/(M * 2**e)
|
|
// = 2**SCALE / (M * 2**(e-(IDX_BITS-1)))
|
|
// = 2**SCALE / q. ✓ (uniform for all e)
|
|
//
|
|
// Accuracy is ~1 part in 2**IDX_BITS (relative). For the first perspective
|
|
// rung (texel coords <= 63) an 8-bit mantissa gives sub-texel error; bump
|
|
// IDX_BITS for tighter precision later if real traces demand it.
|
|
//
|
|
// Pipeline: 3 stages (LAT=3), one result per cycle.
|
|
// S0: register input q + valid.
|
|
// S1: e = msb(q); M = normalize(q).
|
|
// S2: lut_out = LUT[M]; carry e.
|
|
// S3: recip = lut_out >> e; out_valid.
|
|
//
|
|
// q==0 saturates to all-ones (1/0 -> +inf), which is harmless for the demo
|
|
// (q = 1/w with w finite positive is always > 0).
|
|
//
|
|
// LUT init is a computed `initial` for-loop (Quartus infers ROM from it). If a
|
|
// future synth flow rejects it, switch to $readmemh of a generated .mem.
|
|
|
|
`timescale 1ns/1ps
|
|
|
|
module gs_reciprocal_stub #(
|
|
parameter int Q_W = 24, // input width (q in [1, 2**Q_W))
|
|
parameter int IDX_BITS = 8, // mantissa / LUT-index width (256 entries)
|
|
parameter int SCALE = 24, // output = floor(2**SCALE / q)
|
|
parameter int OUT_W = 25 // output width (recip <= 2**SCALE for q>=1)
|
|
) (
|
|
input logic clk,
|
|
input logic rst_n,
|
|
input logic in_valid,
|
|
input logic [Q_W-1:0] q,
|
|
output logic out_valid,
|
|
output logic [OUT_W-1:0] recip
|
|
);
|
|
|
|
localparam int LUT_N = (1 << IDX_BITS);
|
|
localparam int TOP_BIT = IDX_BITS - 1; // mantissa MSB position
|
|
// LUT entries: floor(2**(SCALE+TOP_BIT) / M). Only M in [2**TOP_BIT .. LUT_N-1]
|
|
// are ever addressed (M always has its MSB set after normalization).
|
|
localparam int LUT_W = SCALE + 1; // wide enough for M=2**TOP_BIT
|
|
logic [LUT_W-1:0] lut [0:LUT_N-1];
|
|
|
|
initial begin
|
|
// 2**(SCALE+TOP_BIT) as a 64-bit constant numerator.
|
|
longint unsigned num;
|
|
num = (64'd1 << (SCALE + TOP_BIT));
|
|
for (int m = 0; m < LUT_N; m++) begin
|
|
if (m == 0) lut[m] = '0;
|
|
else lut[m] = LUT_W'(num / m);
|
|
end
|
|
end
|
|
|
|
// --- combinational msb-detect + normalize (S0->S1 inputs) ---
|
|
function automatic int unsigned msb_index(input logic [Q_W-1:0] v);
|
|
msb_index = 0;
|
|
for (int i = 0; i < Q_W; i++)
|
|
if (v[i]) msb_index = i;
|
|
endfunction
|
|
|
|
// ---------------- S1: e + mantissa (from the LIVE input) ----------------
|
|
// The msb-detect + normalize is combinational on the input q and registered
|
|
// here, so the whole unit is exactly 3 register stages (S1/S2/S3) → LAT=3.
|
|
logic s1_valid;
|
|
logic [$clog2(Q_W):0] s1_e;
|
|
logic [IDX_BITS-1:0] s1_m;
|
|
logic s1_zero;
|
|
always_ff @(posedge clk or negedge rst_n) begin
|
|
if (!rst_n) begin
|
|
s1_valid <= 1'b0; s1_e <= '0; s1_m <= '0; s1_zero <= 1'b0;
|
|
end else begin
|
|
int unsigned e;
|
|
logic [Q_W-1:0] norm;
|
|
e = msb_index(q);
|
|
s1_valid <= in_valid;
|
|
s1_zero <= (q == '0);
|
|
s1_e <= ($clog2(Q_W)+1)'(e);
|
|
// normalize so the mantissa MSB sits at bit TOP_BIT
|
|
if (e >= TOP_BIT) norm = q >> (e - TOP_BIT);
|
|
else norm = q << (TOP_BIT - e);
|
|
s1_m <= norm[IDX_BITS-1:0];
|
|
end
|
|
end
|
|
|
|
// ---------------- S2: LUT read ------------------------
|
|
logic s2_valid;
|
|
logic [$clog2(Q_W):0] s2_e;
|
|
logic [LUT_W-1:0] s2_lut;
|
|
logic s2_zero;
|
|
always_ff @(posedge clk or negedge rst_n) begin
|
|
if (!rst_n) begin
|
|
s2_valid <= 1'b0; s2_e <= '0; s2_lut <= '0; s2_zero <= 1'b0;
|
|
end else begin
|
|
s2_valid <= s1_valid;
|
|
s2_e <= s1_e;
|
|
s2_lut <= lut[s1_m];
|
|
s2_zero <= s1_zero;
|
|
end
|
|
end
|
|
|
|
// ---------------- S3: shift back ----------------------
|
|
always_ff @(posedge clk or negedge rst_n) begin
|
|
if (!rst_n) begin
|
|
out_valid <= 1'b0; recip <= '0;
|
|
end else begin
|
|
logic [LUT_W-1:0] shifted;
|
|
out_valid <= s2_valid;
|
|
shifted = s2_lut >> s2_e;
|
|
if (s2_zero) recip <= '1; // 1/0 -> saturate
|
|
else if (shifted > OUT_W'('1)) recip <= '1; // clamp to OUT_W
|
|
else recip <= OUT_W'(shifted);
|
|
end
|
|
end
|
|
|
|
endmodule : gs_reciprocal_stub
|