ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
158 lines
6.8 KiB
Systemverilog
158 lines
6.8 KiB
Systemverilog
// retroDE_ps2 — gs_alpha_blend
|
|
//
|
|
// Brick 2a — GS ALPHA blending (transparency), the source-over case.
|
|
//
|
|
// Computes, per RGB channel:
|
|
// Cv = ((Cs - Cd) * As) >> 7 + Cd (clamped to [0,255])
|
|
// where
|
|
// Cs = source color (the sprite's RGBAQ color channel)
|
|
// Cd = destination (the framebuffer pixel READ back at the write addr)
|
|
// As = source alpha (RGBAQ.A; PS2 0..128 scale where 0x80 == 1.0)
|
|
//
|
|
// This is the standard PS2 GS ALPHA register config A=0(Cs) B=1(Cd)
|
|
// C=0(As) D=1(Cd) — i.e. the canonical alpha-over blend. The brick-2a
|
|
// scope implements ONLY this config; selecting any other (A,B,C,D)
|
|
// tuple is handled by the caller (gs_stub) which falls back to an
|
|
// opaque write for unsupported configs, so this unit is always asked
|
|
// for the source-over result.
|
|
//
|
|
// Purely combinational: a subtract, a multiply by As (<= 8 bits), an
|
|
// arithmetic shift right by 7, an add, and a clamp. No divide. Fully
|
|
// synthesizable — there is NO `// synthesis translate_off` on this
|
|
// path. The >>7 is a wire shift; the *As is a single small multiply.
|
|
//
|
|
// The (Cs - Cd) term is signed (can be negative when the dest is
|
|
// brighter than the source), so the multiply and the shift are done
|
|
// in signed arithmetic and the final sum is clamped back into the
|
|
// unsigned [0,255] byte range. As is treated as unsigned 0..128; the
|
|
// caller passes RGBAQ.A[7:0] (real GS uses A[6:0]*2 internally for
|
|
// the 0..128 mapping, but A[7:0] already encodes 0x80=1.0 for the
|
|
// values our demo programs, and clamping As at 128 keeps a stray
|
|
// A>0x80 from over-shooting).
|
|
//
|
|
// Alpha (the A channel of the output) follows real-GS behavior for a
|
|
// framebuffer write: the SOURCE alpha is written through. brick-2a
|
|
// keeps the existing emit-lane A byte (= source A) unchanged; only
|
|
// R/G/B are blended. The 'a_out' port forwards the source A so the
|
|
// caller can repack the 32-bit ABGR.
|
|
|
|
`timescale 1ns/1ps
|
|
|
|
module gs_alpha_blend #(
|
|
// Brick-2c — generic GS blend selector. Default OFF → the module is
|
|
// BYTE-IDENTICAL to the original source-over-only implementation,
|
|
// regardless of the selector inputs (which default to 0 when an
|
|
// instantiation leaves them unconnected). When set, the generic
|
|
// GS ALPHA datapath (A/B/C/D selectors + FIX) is used.
|
|
parameter bit ALPHA_MODES = 1'b0
|
|
) (
|
|
// Source (sprite) color channels + alpha.
|
|
input logic [7:0] cs_r,
|
|
input logic [7:0] cs_g,
|
|
input logic [7:0] cs_b,
|
|
input logic [7:0] as, // source alpha, 0..128 scale (0x80 = 1.0)
|
|
|
|
// Destination (framebuffer) color channels.
|
|
input logic [7:0] cd_r,
|
|
input logic [7:0] cd_g,
|
|
input logic [7:0] cd_b,
|
|
|
|
// Brick-2c — generic GS ALPHA_1 selectors (only read when ALPHA_MODES=1).
|
|
// a_sel : A operand 0=Cs 1=Cd 2=0
|
|
// b_sel : B operand 0=Cs 1=Cd 2=0
|
|
// c_sel : C coeff 0=As 1=Ad 2=FIX
|
|
// d_sel : D operand 0=Cs 1=Cd 2=0
|
|
// ad : destination alpha (8-bit, used when c_sel==1)
|
|
// fix : fixed alpha coefficient (8-bit, used when c_sel==2)
|
|
input logic [1:0] a_sel,
|
|
input logic [1:0] b_sel,
|
|
input logic [1:0] c_sel,
|
|
input logic [1:0] d_sel,
|
|
input logic [7:0] ad,
|
|
input logic [7:0] fix,
|
|
|
|
// Blended output.
|
|
output logic [7:0] cv_r,
|
|
output logic [7:0] cv_g,
|
|
output logic [7:0] cv_b,
|
|
output logic [7:0] a_out // source alpha, passed through
|
|
);
|
|
|
|
// Clamp As at 128 (0x80) — anything above 1.0 is treated as 1.0.
|
|
logic [7:0] as_eff;
|
|
assign as_eff = (as > 8'd128) ? 8'd128 : as;
|
|
|
|
function automatic logic [7:0] blend_ch(input logic [7:0] cs,
|
|
input logic [7:0] cd,
|
|
input logic [7:0] alpha);
|
|
logic signed [9:0] diff; // Cs - Cd, range -255..+255
|
|
logic signed [17:0] prod; // diff * alpha, alpha 0..128
|
|
logic signed [17:0] shifted; // prod >>> 7
|
|
logic signed [17:0] sum; // shifted + Cd
|
|
diff = $signed({2'b00, cs}) - $signed({2'b00, cd});
|
|
prod = diff * $signed({1'b0, alpha});
|
|
shifted = prod >>> 7; // arithmetic shift
|
|
sum = shifted + $signed({10'd0, cd});
|
|
// Clamp to [0,255].
|
|
if (sum < 18'sd0)
|
|
return 8'd0;
|
|
else if (sum > 18'sd255)
|
|
return 8'd255;
|
|
else
|
|
return sum[7:0];
|
|
endfunction
|
|
|
|
// ------------------------------------------------------------------
|
|
// Brick-2c — generic GS blend selector datapath.
|
|
// Cv = clamp( (((A - B) * C) >>> 7) + D ) per RGB channel.
|
|
// A/B/D ∈ {Cs, Cd, 0}; C ∈ {As, Ad, FIX} (8-bit coeff, 0x80==1.0).
|
|
// (A-B) is signed; *C is unsigned 0..255; >>>7 arithmetic; +D; clamp.
|
|
// ------------------------------------------------------------------
|
|
function automatic logic [7:0] blend_generic(
|
|
input logic [7:0] cs, input logic [7:0] cd,
|
|
input logic [1:0] asel, input logic [1:0] bsel,
|
|
input logic [1:0] dsel, input logic [7:0] coef);
|
|
logic [7:0] op_a;
|
|
logic [7:0] op_b;
|
|
logic [7:0] op_d;
|
|
logic signed [31:0] diff; // signed (A - B)
|
|
logic signed [31:0] prod; // diff * coef (coef unsigned 0..255)
|
|
logic signed [31:0] shifted; // prod >>> 7
|
|
logic signed [31:0] sum; // shifted + D
|
|
op_a = (asel == 2'd0) ? cs : (asel == 2'd1) ? cd : 8'd0;
|
|
op_b = (bsel == 2'd0) ? cs : (bsel == 2'd1) ? cd : 8'd0;
|
|
op_d = (dsel == 2'd0) ? cs : (dsel == 2'd1) ? cd : 8'd0;
|
|
diff = $signed({1'b0, op_a}) - $signed({1'b0, op_b});
|
|
prod = diff * $signed({24'd0, coef});
|
|
shifted = prod >>> 7; // arithmetic shift
|
|
sum = shifted + $signed({24'd0, op_d});
|
|
if (sum < 32'sd0)
|
|
return 8'd0;
|
|
else if (sum > 32'sd255)
|
|
return 8'd255;
|
|
else
|
|
return sum[7:0];
|
|
endfunction
|
|
|
|
// Shared 8-bit C coefficient (same for all three channels).
|
|
logic [7:0] coef_c;
|
|
assign coef_c = (c_sel == 2'd0) ? as_eff :
|
|
(c_sel == 2'd1) ? ad : fix;
|
|
|
|
generate
|
|
if (ALPHA_MODES) begin : g_generic
|
|
assign cv_r = blend_generic(cs_r, cd_r, a_sel, b_sel, d_sel, coef_c);
|
|
assign cv_g = blend_generic(cs_g, cd_g, a_sel, b_sel, d_sel, coef_c);
|
|
assign cv_b = blend_generic(cs_b, cd_b, a_sel, b_sel, d_sel, coef_c);
|
|
end else begin : g_source_over
|
|
// EXACT original source-over expression — byte-identical.
|
|
assign cv_r = blend_ch(cs_r, cd_r, as_eff);
|
|
assign cv_g = blend_ch(cs_g, cd_g, as_eff);
|
|
assign cv_b = blend_ch(cs_b, cd_b, as_eff);
|
|
end
|
|
endgenerate
|
|
|
|
assign a_out = as; // source alpha passes through unchanged
|
|
|
|
endmodule : gs_alpha_blend
|