ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
89 lines
4.4 KiB
Systemverilog
89 lines
4.4 KiB
Systemverilog
// ============================================================================
|
|
// gs_grad_divider.sv (Ch352 — sequential signed divider for the triangle-setup gradient solve)
|
|
//
|
|
// Replaces the single combinational `grad_num_q[grad_step] / grad_det_q` in gs_stub. That combinational
|
|
// divider is a ~6700-cell, ~100ns cone at the 25MHz design clock — the worst setup path, and (the real lesson)
|
|
// it CANNOT be covered by any SDC timing exception: both a multicycle and a false_path made the Quartus fitter
|
|
// grind on its cone indefinitely (Place stuck <1% for hours). A sequential divider has REGISTERED iterations and
|
|
// no combinational cone, so every internal path is an ordinary single-cycle path that closes timing normally —
|
|
// no exception needed, no grind.
|
|
//
|
|
// BIT-EXACT to SystemVerilog signed `/`:
|
|
// * truncation toward zero (divide magnitudes, then apply the XOR-of-signs);
|
|
// * den == 0 -> quotient 0 (matches the gs_stub `if (grad_det_q==0) grad_quo=0` guard).
|
|
// Restoring division of the W-bit magnitudes (W iterations), one iteration per clock.
|
|
//
|
|
// Handshake: pulse `start` with num/den stable -> `busy` high for the solve -> `done` pulses for one cycle
|
|
// with `quo` valid (and stays valid until the next start). The gs_stub gradient FSM waits on `done`.
|
|
// ============================================================================
|
|
`timescale 1ns/1ps
|
|
|
|
module gs_grad_divider #(
|
|
parameter int W = 56 // operand width (gs_stub: grad_num_q / sign-extended grad_det)
|
|
)(
|
|
input logic clk,
|
|
input logic rst_n,
|
|
input logic start, // pulse: begin a divide (num/den sampled this cycle)
|
|
input logic signed [W-1:0] num,
|
|
input logic signed [W-1:0] den,
|
|
output logic signed [W-1:0] quo, // truncate-toward-zero quotient (== $signed(num)/$signed(den))
|
|
output logic busy,
|
|
output logic done // 1-cycle pulse when quo is valid
|
|
);
|
|
localparam int CW = $clog2(W+1);
|
|
|
|
// magnitude + sign capture
|
|
function automatic logic [W-1:0] absval(input logic signed [W-1:0] v);
|
|
absval = v[W-1] ? (~v + 1'b1) : v; // |v| (the most-negative wraps to 2^(W-1), which fits unsigned W)
|
|
endfunction
|
|
|
|
logic [W:0] rem; // remainder, W+1 bits for the compare/subtract
|
|
logic [W-1:0] qbuild; // quotient under construction (also shifts the dividend out of its top)
|
|
logic [W-1:0] den_mag; // |den|
|
|
logic qsign; // result sign = num_sign ^ den_sign
|
|
logic [CW-1:0] iter;
|
|
logic run;
|
|
|
|
// one restoring step: bring the next dividend bit into rem, conditionally subtract |den|.
|
|
wire [W:0] rem_sh = {rem[W-1:0], qbuild[W-1]}; // rem<<1 | dividend MSB
|
|
wire sub_ok = (rem_sh >= {1'b0, den_mag});
|
|
wire [W:0] rem_nxt = sub_ok ? (rem_sh - {1'b0, den_mag}) : rem_sh;
|
|
|
|
always_ff @(posedge clk or negedge rst_n) begin
|
|
if (!rst_n) begin
|
|
rem <= '0; qbuild <= '0; den_mag <= '0; qsign <= 1'b0; iter <= '0;
|
|
run <= 1'b0; busy <= 1'b0; done <= 1'b0; quo <= '0;
|
|
end else begin
|
|
done <= 1'b0;
|
|
if (start && !busy) begin
|
|
if (den == '0) begin
|
|
// den == 0 -> quotient 0 (matches the gs_stub guard), available next cycle
|
|
quo <= '0;
|
|
done <= 1'b1;
|
|
busy <= 1'b0;
|
|
run <= 1'b0;
|
|
end else begin
|
|
rem <= '0;
|
|
qbuild <= absval(num);
|
|
den_mag <= absval(den);
|
|
qsign <= num[W-1] ^ den[W-1];
|
|
iter <= CW'(W);
|
|
run <= 1'b1;
|
|
busy <= 1'b1;
|
|
end
|
|
end else if (run) begin
|
|
rem <= rem_nxt;
|
|
qbuild <= {qbuild[W-2:0], sub_ok}; // shift dividend out, shift quotient bit in
|
|
iter <= iter - 1'b1;
|
|
if (iter == CW'(1)) begin
|
|
// final iteration: qbuild now holds the W-bit magnitude quotient -> apply sign
|
|
run <= 1'b0;
|
|
busy <= 1'b0;
|
|
done <= 1'b1;
|
|
quo <= qsign ? (~{qbuild[W-2:0], sub_ok} + 1'b1) : {qbuild[W-2:0], sub_ok};
|
|
end
|
|
end
|
|
end
|
|
end
|
|
endmodule
|