ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
174 lines
8.2 KiB
Systemverilog
174 lines
8.2 KiB
Systemverilog
// SPDX-License-Identifier: GPL-3.0-or-later
|
||
// Copyright (c) 2025-2026 retroDE contributors
|
||
// ============================================================================
|
||
// tile_ram_cdc — Ch229 bridge-clock → design-clock tile-RAM shadow
|
||
// ============================================================================
|
||
// Implements the design-domain side of the Ch229 tile-RAM CDC. Owns a
|
||
// 1024 × 32-bit shadow memory in the design clock domain. Bridge-side
|
||
// writes arrive as a toggle-based "event" signal plus latched index +
|
||
// data; a 2-FF synchronizer + XOR edge detector turns each toggle edge
|
||
// into a 1-cycle write pulse against the shadow RAM. Read port is
|
||
// purely combinational (the consumer is the Ch245 platform-OSD
|
||
// char-BRAM read adapter in the top, which selects high/low 16-bit
|
||
// cells from each 32-bit shadow word and feeds them to the platform
|
||
// `osd_overlay`. Pre-Ch245 the consumer was the now-retired
|
||
// PS2-local `osd_overlay_stub`). No back-pressure — the bridge is assumed
|
||
// to space tile writes far enough apart for the sync chain to keep up.
|
||
//
|
||
// **CDC contract (read carefully before refactoring):**
|
||
// - The bridge updates `bclk_wr_toggle`, `bclk_wr_index`, `bclk_wr_data`
|
||
// at the same `bclk` edge (one bridge clock cycle).
|
||
// - The receiver sees the toggle through a 2-FF synchronizer; the
|
||
// edge-detection wire `wr_pulse` fires on the dclk cycle where the
|
||
// synchronized toggle has FULLY settled. That guarantees ≥ 2 dclk
|
||
// periods of stability on `bclk_wr_index/data` before they're
|
||
// sampled into the shadow memory.
|
||
// - Multiple bridge writes faster than ~3 dclk periods apart will
|
||
// race and may drop or merge events. For the Ch229 use case
|
||
// (retrodesd OSD updates at ≤ 1 kHz, design_clk at 25–50 MHz),
|
||
// this is many orders of magnitude of slack. **Do not** wire a
|
||
// fast-cycling source (e.g. a counter) into the bridge's tile
|
||
// write path without first replacing this CDC with an async FIFO.
|
||
//
|
||
// **Reset behavior:**
|
||
// - On `breset_n` deasserted: bridge clears `bclk_wr_toggle` to 0
|
||
// (matching the receiver's post-reset state). When both domains
|
||
// reset together (the normal case on FPGA configure), no spurious
|
||
// edge fires after release.
|
||
// - On `dreset_n` deasserted: synchronizer chain clears to 0;
|
||
// shadow memory contents are NOT cleared (matches Ch227 retention
|
||
// semantics — sim `initial` block zeroes for determinism, hardware
|
||
// power-up is undefined). The Ch229 contract is "tile RAM survives
|
||
// warm reset"; rebooting both sides is a power-cycle scenario and
|
||
// the bridge will re-broadcast any written tiles via the next set
|
||
// of AXI writes from HPS.
|
||
// ============================================================================
|
||
|
||
`timescale 1ns/1ps
|
||
|
||
module tile_ram_cdc (
|
||
// ---- Bridge clock domain (write port) ----
|
||
input logic bclk,
|
||
input logic breset_n,
|
||
input logic bclk_wr_toggle,
|
||
input logic [9:0] bclk_wr_index,
|
||
input logic [31:0] bclk_wr_data,
|
||
|
||
// ---- Design clock domain (read port) ----
|
||
input logic dclk,
|
||
input logic dreset_n,
|
||
input logic [9:0] dclk_rd_index,
|
||
output logic [31:0] dclk_rd_data,
|
||
|
||
// ---- Ch230 design-domain diagnostic counter ----
|
||
// Saturating count of "tile writes too close" events — successive
|
||
// wr_pulse events fewer than MIN_DCLK_GAP dclk cycles apart.
|
||
// Exposed as an output so the top can route it to a reverse-CDC +
|
||
// bridge-readable diagnostic register in a future chapter (Ch231+).
|
||
// For Ch230 the top leaves it unconnected; the counter still exists
|
||
// in the design domain as a synthesis artifact ready for hookup.
|
||
output logic [15:0] tile_wr_too_close_count
|
||
);
|
||
|
||
// Shadow RAM lives in the design clock domain. Matched-size with
|
||
// the bridge-side `ps2_hps_bridge.tile_mem` (1024 × 32-bit). The
|
||
// `ramstyle = "M20K"` attribute (added in the Ch232 hardware
|
||
// bring-up hotfix) forces Quartus to use a single M20K block
|
||
// instead of distributing the storage across LABs.
|
||
(* ramstyle = "M20K" *) logic [31:0] shadow_mem [0:1023];
|
||
initial begin
|
||
for (int i = 0; i < 1024; i++)
|
||
shadow_mem[i] = 32'd0;
|
||
end
|
||
|
||
// 2-FF synchronizer on the bridge toggle into the design clock.
|
||
// Three stages let us compute an edge detector against the
|
||
// already-resampled bits ([2] ^ [1]), giving the wr_pulse a full
|
||
// dclk cycle of bclk_wr_index/data stability before we sample.
|
||
logic [2:0] toggle_sync;
|
||
always_ff @(posedge dclk or negedge dreset_n) begin
|
||
if (!dreset_n)
|
||
toggle_sync <= 3'b000;
|
||
else
|
||
toggle_sync <= {toggle_sync[1:0], bclk_wr_toggle};
|
||
end
|
||
wire wr_pulse = toggle_sync[2] ^ toggle_sync[1];
|
||
|
||
// Shadow write port. At the dclk edge where wr_pulse fires,
|
||
// sample bclk_wr_index + bclk_wr_data. Both have been stable for
|
||
// ≥ 2 dclk cycles by construction of the CDC contract above.
|
||
always_ff @(posedge dclk) begin
|
||
if (wr_pulse)
|
||
shadow_mem[bclk_wr_index] <= bclk_wr_data;
|
||
end
|
||
|
||
// Read port: combinational lookup. The consumer pulls index from
|
||
// its pixel position and uses the data to decide overlay vs
|
||
// transparent for each pixel.
|
||
assign dclk_rd_data = shadow_mem[dclk_rd_index];
|
||
|
||
// ---- Ch229 / Ch230 tile-write rate watchdog ----
|
||
// The CDC contract requires writes to be spaced far enough apart
|
||
// that each toggle edge passes through the sync chain cleanly.
|
||
// Two consecutive bridge writes that both flip toggle within one
|
||
// dclk of each other can be merged into a single transition at
|
||
// sync[0] — the first write's bclk_wr_index/bclk_wr_data are
|
||
// overwritten before the receiver samples them, and the write is
|
||
// silently lost.
|
||
//
|
||
// The actual minimum gap is ≥ 3 dclk between successive
|
||
// wr_pulse events at the receiver:
|
||
// - 1 dclk for the synchronizer to fully settle (so the
|
||
// second edge is visible as a distinct transition)
|
||
// - 1 dclk for the receiver to fire wr_pulse for write 1
|
||
// - 1 dclk of margin for jitter / setup time
|
||
//
|
||
// Production rate enforcer is software-side (retrodesd OSD
|
||
// updates at ≤ 1 kHz ≫ 3 dclk @ 25 MHz = 120 ns); the bridge
|
||
// does not back-pressure AXI on this constraint. Ch229 added a
|
||
// sim-only `$display` warning; Ch230 promotes the gap-tracker to
|
||
// a real **saturating counter** (16-bit) exposed as
|
||
// `tile_wr_too_close_count` so a future chapter can route it
|
||
// through a reverse CDC into a bridge-readable register
|
||
// (HDMI_DIAG upper bits or a new diagnostic offset). The
|
||
// `$display` aid remains in `\`ifndef SYNTHESIS` for pre-silicon
|
||
// log visibility.
|
||
localparam int unsigned MIN_DCLK_GAP = 3;
|
||
logic [31:0] dclk_since_last_pulse;
|
||
wire too_close = wr_pulse && (dclk_since_last_pulse < MIN_DCLK_GAP);
|
||
|
||
always_ff @(posedge dclk or negedge dreset_n) begin
|
||
if (!dreset_n) begin
|
||
dclk_since_last_pulse <= 32'hFFFF_FFFF;
|
||
tile_wr_too_close_count <= 16'd0;
|
||
end else begin
|
||
if (wr_pulse)
|
||
dclk_since_last_pulse <= 32'd0;
|
||
else if (dclk_since_last_pulse != 32'hFFFF_FFFF)
|
||
dclk_since_last_pulse <= dclk_since_last_pulse + 32'd1;
|
||
|
||
if (too_close && (tile_wr_too_close_count != 16'hFFFF))
|
||
tile_wr_too_close_count <= tile_wr_too_close_count + 16'd1;
|
||
end
|
||
end
|
||
|
||
`ifndef SYNTHESIS
|
||
always_ff @(posedge dclk) begin
|
||
if (dreset_n && too_close) begin
|
||
$display(
|
||
"[tile_ram_cdc] WARN time=%0t: tile writes too close - %0d dclk cycles between toggle edges (CDC needs >= %0d for safe sample).",
|
||
$time, dclk_since_last_pulse, MIN_DCLK_GAP);
|
||
end
|
||
end
|
||
`endif
|
||
|
||
// ---- Lint: bclk + breset_n are intentionally referenced ONLY
|
||
// via the bclk_wr_toggle path. Tie a placeholder reference
|
||
// to silence "unused" warnings on tools that don't trace
|
||
// through the upstream toggle source.
|
||
// verilator lint_off UNUSED
|
||
wire _unused_ok = &{1'b0, bclk, breset_n, 1'b0};
|
||
// verilator lint_on UNUSED
|
||
|
||
endmodule : tile_ram_cdc
|