Files
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

158 lines
6.8 KiB
Systemverilog

// retroDE_ps2 — gs_alpha_blend
//
// Brick 2a — GS ALPHA blending (transparency), the source-over case.
//
// Computes, per RGB channel:
// Cv = ((Cs - Cd) * As) >> 7 + Cd (clamped to [0,255])
// where
// Cs = source color (the sprite's RGBAQ color channel)
// Cd = destination (the framebuffer pixel READ back at the write addr)
// As = source alpha (RGBAQ.A; PS2 0..128 scale where 0x80 == 1.0)
//
// This is the standard PS2 GS ALPHA register config A=0(Cs) B=1(Cd)
// C=0(As) D=1(Cd) — i.e. the canonical alpha-over blend. The brick-2a
// scope implements ONLY this config; selecting any other (A,B,C,D)
// tuple is handled by the caller (gs_stub) which falls back to an
// opaque write for unsupported configs, so this unit is always asked
// for the source-over result.
//
// Purely combinational: a subtract, a multiply by As (<= 8 bits), an
// arithmetic shift right by 7, an add, and a clamp. No divide. Fully
// synthesizable — there is NO `// synthesis translate_off` on this
// path. The >>7 is a wire shift; the *As is a single small multiply.
//
// The (Cs - Cd) term is signed (can be negative when the dest is
// brighter than the source), so the multiply and the shift are done
// in signed arithmetic and the final sum is clamped back into the
// unsigned [0,255] byte range. As is treated as unsigned 0..128; the
// caller passes RGBAQ.A[7:0] (real GS uses A[6:0]*2 internally for
// the 0..128 mapping, but A[7:0] already encodes 0x80=1.0 for the
// values our demo programs, and clamping As at 128 keeps a stray
// A>0x80 from over-shooting).
//
// Alpha (the A channel of the output) follows real-GS behavior for a
// framebuffer write: the SOURCE alpha is written through. brick-2a
// keeps the existing emit-lane A byte (= source A) unchanged; only
// R/G/B are blended. The 'a_out' port forwards the source A so the
// caller can repack the 32-bit ABGR.
`timescale 1ns/1ps
module gs_alpha_blend #(
// Brick-2c — generic GS blend selector. Default OFF → the module is
// BYTE-IDENTICAL to the original source-over-only implementation,
// regardless of the selector inputs (which default to 0 when an
// instantiation leaves them unconnected). When set, the generic
// GS ALPHA datapath (A/B/C/D selectors + FIX) is used.
parameter bit ALPHA_MODES = 1'b0
) (
// Source (sprite) color channels + alpha.
input logic [7:0] cs_r,
input logic [7:0] cs_g,
input logic [7:0] cs_b,
input logic [7:0] as, // source alpha, 0..128 scale (0x80 = 1.0)
// Destination (framebuffer) color channels.
input logic [7:0] cd_r,
input logic [7:0] cd_g,
input logic [7:0] cd_b,
// Brick-2c — generic GS ALPHA_1 selectors (only read when ALPHA_MODES=1).
// a_sel : A operand 0=Cs 1=Cd 2=0
// b_sel : B operand 0=Cs 1=Cd 2=0
// c_sel : C coeff 0=As 1=Ad 2=FIX
// d_sel : D operand 0=Cs 1=Cd 2=0
// ad : destination alpha (8-bit, used when c_sel==1)
// fix : fixed alpha coefficient (8-bit, used when c_sel==2)
input logic [1:0] a_sel,
input logic [1:0] b_sel,
input logic [1:0] c_sel,
input logic [1:0] d_sel,
input logic [7:0] ad,
input logic [7:0] fix,
// Blended output.
output logic [7:0] cv_r,
output logic [7:0] cv_g,
output logic [7:0] cv_b,
output logic [7:0] a_out // source alpha, passed through
);
// Clamp As at 128 (0x80) — anything above 1.0 is treated as 1.0.
logic [7:0] as_eff;
assign as_eff = (as > 8'd128) ? 8'd128 : as;
function automatic logic [7:0] blend_ch(input logic [7:0] cs,
input logic [7:0] cd,
input logic [7:0] alpha);
logic signed [9:0] diff; // Cs - Cd, range -255..+255
logic signed [17:0] prod; // diff * alpha, alpha 0..128
logic signed [17:0] shifted; // prod >>> 7
logic signed [17:0] sum; // shifted + Cd
diff = $signed({2'b00, cs}) - $signed({2'b00, cd});
prod = diff * $signed({1'b0, alpha});
shifted = prod >>> 7; // arithmetic shift
sum = shifted + $signed({10'd0, cd});
// Clamp to [0,255].
if (sum < 18'sd0)
return 8'd0;
else if (sum > 18'sd255)
return 8'd255;
else
return sum[7:0];
endfunction
// ------------------------------------------------------------------
// Brick-2c — generic GS blend selector datapath.
// Cv = clamp( (((A - B) * C) >>> 7) + D ) per RGB channel.
// A/B/D ∈ {Cs, Cd, 0}; C ∈ {As, Ad, FIX} (8-bit coeff, 0x80==1.0).
// (A-B) is signed; *C is unsigned 0..255; >>>7 arithmetic; +D; clamp.
// ------------------------------------------------------------------
function automatic logic [7:0] blend_generic(
input logic [7:0] cs, input logic [7:0] cd,
input logic [1:0] asel, input logic [1:0] bsel,
input logic [1:0] dsel, input logic [7:0] coef);
logic [7:0] op_a;
logic [7:0] op_b;
logic [7:0] op_d;
logic signed [31:0] diff; // signed (A - B)
logic signed [31:0] prod; // diff * coef (coef unsigned 0..255)
logic signed [31:0] shifted; // prod >>> 7
logic signed [31:0] sum; // shifted + D
op_a = (asel == 2'd0) ? cs : (asel == 2'd1) ? cd : 8'd0;
op_b = (bsel == 2'd0) ? cs : (bsel == 2'd1) ? cd : 8'd0;
op_d = (dsel == 2'd0) ? cs : (dsel == 2'd1) ? cd : 8'd0;
diff = $signed({1'b0, op_a}) - $signed({1'b0, op_b});
prod = diff * $signed({24'd0, coef});
shifted = prod >>> 7; // arithmetic shift
sum = shifted + $signed({24'd0, op_d});
if (sum < 32'sd0)
return 8'd0;
else if (sum > 32'sd255)
return 8'd255;
else
return sum[7:0];
endfunction
// Shared 8-bit C coefficient (same for all three channels).
logic [7:0] coef_c;
assign coef_c = (c_sel == 2'd0) ? as_eff :
(c_sel == 2'd1) ? ad : fix;
generate
if (ALPHA_MODES) begin : g_generic
assign cv_r = blend_generic(cs_r, cd_r, a_sel, b_sel, d_sel, coef_c);
assign cv_g = blend_generic(cs_g, cd_g, a_sel, b_sel, d_sel, coef_c);
assign cv_b = blend_generic(cs_b, cd_b, a_sel, b_sel, d_sel, coef_c);
end else begin : g_source_over
// EXACT original source-over expression — byte-identical.
assign cv_r = blend_ch(cs_r, cd_r, as_eff);
assign cv_g = blend_ch(cs_g, cd_g, as_eff);
assign cv_b = blend_ch(cs_b, cd_b, as_eff);
end
endgenerate
assign a_out = as; // source alpha passes through unchanged
endmodule : gs_alpha_blend