ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1345 lines
78 KiB
Systemverilog
1345 lines
78 KiB
Systemverilog
// SPDX-License-Identifier: GPL-3.0-or-later
|
||
// Copyright (c) 2025-2026 retroDE contributors
|
||
// ============================================================================
|
||
// ps2_hps_bridge — Ch173/Ch174/Ch176 AXI4 register surface for the PS2 core
|
||
// ============================================================================
|
||
//
|
||
// Replaces ps2_hps_bridge_null (Ch170 placeholder). Adds live status
|
||
// registers the HPS-side Linux userspace can read to confirm what the
|
||
// fabric is doing without watching the HDMI output. All identity +
|
||
// status registers are read-only; writes ACK with OKAY and discard
|
||
// (matches the Codex Ch173 caution to keep Ch173 read-only unless
|
||
// the write path is trivial).
|
||
//
|
||
// Port signature is a strict superset of ps2_hps_bridge_null so the
|
||
// top wrapper can swap the instantiation without re-plumbing AXI.
|
||
//
|
||
// Register map (read-only, single-beat AXI4, 128-bit data with 32-bit
|
||
// lane selection via {awaddr[3:2] / araddr[3:2]}). 0x000-0x01F is the
|
||
// retroDE common ABI v1.0 window (matches splash/nes/coco2 layout per
|
||
// retroDE_splash/software/input_common.h); the PS2-specific counters
|
||
// land at 0x020+ to avoid colliding with shared-ABI semantics.
|
||
//
|
||
// 0x000 CORE_ID 32'h50533200 ("PS2\0")
|
||
// 0x004 ABI_VERSION 32'h00000100 (v1.0)
|
||
// 0x008 CORE_STATUS
|
||
// bit 0: loaded (always 1 after configuration)
|
||
// bit 1: core_halt (EE core syscall'd / halted)
|
||
// bit 2: dma_done_seen (DMAC channel 2 reached done)
|
||
// bit 3: frame_seen (PCRTC produced end-of-frame)
|
||
// bit 4: hdmi_init_done (ADV7513 I²C config FSM complete)
|
||
// bit 5: hdmi_i2c_error (Ch166 NACK watchdog latched)
|
||
// bit 6: raster_overflow (Ch87 FIFO push_drop ever fired
|
||
// — should stay 0 under Ch172
|
||
// backpressure)
|
||
// bits[31:7]: reserved-zero
|
||
// 0x00C CORE_CAPS 32'h00000000 (no caps advertised
|
||
// yet; ROM/savestate/etc capability
|
||
// bits land here when wired)
|
||
// 0x010 CORE_CTRL Ch176 writable + readable. Bit layout
|
||
// matches shared retroDE ABI:
|
||
// [0] RESET — functional. Held
|
||
// high = PS2 design
|
||
// domain in reset.
|
||
// Exposed on the
|
||
// `core_reset_req`
|
||
// port; the board
|
||
// top 2-FF syncs it
|
||
// into design_clk
|
||
// and gates the
|
||
// existing reset.
|
||
// [1] ROM_LOADED — latch only. PS2
|
||
// has no ROM
|
||
// lifecycle today
|
||
// (we ship as
|
||
// backend=splash).
|
||
// Written/read for
|
||
// ABI shape only.
|
||
// [2] PAUSE — latch only. No
|
||
// clean stall point
|
||
// in this design
|
||
// yet; later chapter
|
||
// wires the gate.
|
||
// Unused bits readback as 0.
|
||
// 0x014 CORE_PULSE Ch176 write-only side-effect register.
|
||
// Self-clearing (always reads 0).
|
||
// Bit layout matches shared ABI:
|
||
// [3] HDMI_CLR — clears FRAME_COUNT,
|
||
// DMA_DONE_COUNT,
|
||
// RASTER_OVERFLOW_COUNT.
|
||
// PS2 widens the
|
||
// shared "HDMI
|
||
// diagnostic clear"
|
||
// to "video/bridge
|
||
// diagnostic clear"
|
||
// per Codex Ch176
|
||
// naming note.
|
||
// [0,1,2] DIRTY_CLR / SS_DONE_CLR /
|
||
// VIDEO_REC — ACK'd
|
||
// and ignored (PS2
|
||
// has no savestate
|
||
// or HDMI re-init
|
||
// path yet).
|
||
// 0x018 reserved 32'h00000000
|
||
// 0x01C reserved 32'h00000000
|
||
//
|
||
// PS2-specific diagnostics (Ch173, retroDE-ps2 extension to the shared
|
||
// ABI; safe because the shared map only reserves 0x000-0x01F):
|
||
//
|
||
// 0x020 FRAME_COUNT Ch174: increments on each edge of
|
||
// `frame_toggle` (the event-toggle
|
||
// primitive — flips on every PCRTC
|
||
// end-of-frame pulse in design_clk).
|
||
// Replaces Ch173's edge-of-sticky
|
||
// source so the counter actually
|
||
// tracks frames in hardware.
|
||
// 0x024 DMA_DONE_COUNT Ch174: increments on each edge of
|
||
// `dma_done_toggle` (event-toggle —
|
||
// flips on every DMAC EV_DMA_DONE).
|
||
// 0x028 RASTER_OVERFLOW_COUNT counts raster_overflow cycles
|
||
// (cycle-by-cycle, not edge — a
|
||
// non-zero value = the backpressure
|
||
// path missed somewhere)
|
||
// 0x02C reserved 32'h00000000
|
||
//
|
||
// Ch222 — input latch block. HPS-visible write/read registers at the
|
||
// shared-ABI input offsets. No SIO2 / DualShock wiring yet; retrodesd
|
||
// writes here so future PS2-side controller emulation has a stable
|
||
// read port. Reset clears all three to 0.
|
||
//
|
||
// 0x040 INPUT_P1 Ch222: 32-bit RW latch. retrodesd's
|
||
// `input_thread` writes the remapped
|
||
// gamepad bitmap (JOY_* bits per
|
||
// input_common.h).
|
||
// 0x044 INPUT_P2 Ch222: 32-bit RW latch (player 2).
|
||
// 0x048 INPUT_P1_RAW Ch222: 32-bit RW latch. Un-remapped
|
||
// mirror of the player-1 buttons,
|
||
// used by the OSD navigation FSM in
|
||
// other cores. PS2 stores it without
|
||
// consumers today.
|
||
// 0x04C reserved 32'h00000000
|
||
//
|
||
// Ch225 — VIDEO_STATUS / HDMI_DIAG diagnostic read surface. Both are
|
||
// pure-read views into the synchronized status signals the bridge
|
||
// already tracks; writes are ack'd-and-ignored. retrodesd / operator
|
||
// tooling can poll these to answer "is the display path alive?"
|
||
// without depending on LEDs or `ps2_status.sh`.
|
||
//
|
||
// 0x060 VIDEO_STATUS Ch225: read-only diagnostic bits.
|
||
// [0] frame_seen (live sticky from
|
||
// PCRTC end-of-frame)
|
||
// [1] scanout_alive (= frame_count!=0,
|
||
// i.e. ≥ 1 frame
|
||
// completed since
|
||
// last HDMI_CLR)
|
||
// [2] raster_overflow (live — drops if
|
||
// the input dies)
|
||
// [3] dma_done_seen (live sticky)
|
||
// [31:4] reserved-zero.
|
||
// 0x064 HDMI_DIAG Ch225: read-only diagnostic bits.
|
||
// [0] hdmi_init_done (ADV7513 LUT walk
|
||
// complete)
|
||
// [1] hdmi_i2c_error (Ch166 NACK
|
||
// watchdog latched)
|
||
// [31:2] reserved-zero.
|
||
//
|
||
// Ch226 — DS2 wired-controller stub at the shared-ABI offsets. PS2
|
||
// has no DS2 platform path today, so the stub honors the sibling-ABI
|
||
// contract consumed by retrodesd's `ds2_poll_thread.c`:
|
||
// `connected = status & 0x1` and `error = (status >> 1) & 0x1`. With
|
||
// connected = 0, retrodesd's poll thread treats DS2 as absent and
|
||
// never calls `osd_input_update_ds2(buttons)` — the right behavior.
|
||
// Bit [2] is a PS2-local diagnostic (`input_latches_valid`) confirming
|
||
// that the Ch222 latch surface exists; it deliberately sits above the
|
||
// shared bits to avoid colliding with the sibling-consumed
|
||
// `[0]=connected / [1]=error` semantics. DS2_BUTTONS mirrors
|
||
// INPUT_P1 so HPS can confirm the Ch222 store landed via the same
|
||
// bridge offset the sibling cores expose.
|
||
//
|
||
// 0x0F0 DS2_STATUS Ch226: read-only sibling-ABI stub.
|
||
// [0] connected = 1'b0
|
||
// (no DS2 wired path)
|
||
// [1] error = 1'b0
|
||
// (not an error state)
|
||
// [2] input_latches_valid = 1'b1
|
||
// (PS2-local: Ch222 OK)
|
||
// [31:3] reserved-zero.
|
||
// Reads = 32'h00000004.
|
||
// 0x0F4 DS2_BUTTONS Ch226: read-only mirror of INPUT_P1
|
||
// (Ch222). Reads return whatever
|
||
// retrodesd last wrote to 0x040.
|
||
// With `connected=0` retrodesd
|
||
// never consumes this, but operator
|
||
// tooling can use it to confirm the
|
||
// Ch222 store via the DS2 path.
|
||
//
|
||
// Ch227 — tile RAM storage surface at 0x1000..0x1FFF (4 KB window).
|
||
// The shared-ABI software view is "2048 × 16-bit cells packed 2/word"
|
||
// (see input_common.h's `osd_setup` + `osd_putchar`), but at the bus
|
||
// level the bridge stores plain 32-bit words and returns exactly
|
||
// what HPS wrote. Sibling cores forward tile writes to an external
|
||
// overlay engine via `tile_wr_addr/data/en` output ports; PS2 has
|
||
// no overlay engine yet (Ch228 will wire one), so for Ch227 the
|
||
// bridge owns the storage internally as a 1024 × 32-bit memory
|
||
// array. No PCRTC overlay composition; reads simply mirror the
|
||
// last write. Reset semantics: contents are *retained* across warm
|
||
// reset (no sibling precedent for clearing — siblings don't even
|
||
// own the storage). Sim initializes to 0 for determinism; power-up
|
||
// value on hardware is undefined until the overlay-engine chapter
|
||
// pins a reset contract.
|
||
//
|
||
// 0x1000..0x1FFF Tile RAM Ch227: 1024 × 32-bit RW. Decode
|
||
// guard `addr[37:12] == 26'h1`;
|
||
// word index = addr[11:2]. WSTRB
|
||
// ignored (full-word writes only,
|
||
// same contract as the rest of
|
||
// the bridge).
|
||
//
|
||
// Ch223 — OSD compatibility sink. retrodesd's splash backend writes
|
||
// OSD_CTRL / OSD_CFG0 / OSD_CFG1 at these offsets during start(),
|
||
// followed by tile RAM writes at 0x1000+. PS2 has no overlay path
|
||
// yet, so these three latches just store-and-readback — they exist
|
||
// so retrodesd's writes land in real registers instead of being
|
||
// silently dropped on the side-effect map (Ch221 boundary).
|
||
//
|
||
// 0x100 OSD_CTRL Ch223: 32-bit RW latch. Splash sets
|
||
// ENABLE | INPUT_LOCK | FORCE_OPEN
|
||
// here; the bridge doesn't act on
|
||
// those bits yet but the value is
|
||
// stored and readable for software
|
||
// introspection.
|
||
// 0x104 OSD_STATUS Ch224: always-zero source. Reads
|
||
// return 0; writes accepted with
|
||
// BRESP=OKAY and ignored. The FSM
|
||
// that drives [0]=active and
|
||
// [12:8]=cursor_row exists in other
|
||
// cores' bridges but PS2 has no
|
||
// overlay engine yet, so the status
|
||
// bits stay 0.
|
||
// 0x108 OSD_TRIGGER Ch224: W1C-shape sink. Reads
|
||
// return 0; writes accepted with
|
||
// BRESP=OKAY and ignored. In other
|
||
// cores the FSM SETs the action /
|
||
// back / scroll bits, retrodesd
|
||
// reads, processes, and writes-1
|
||
// to clear. PS2 has no FSM source
|
||
// today so the bits stay 0 and the
|
||
// W1C semantics collapse to a sink.
|
||
// Behavior matches the ABI v1.0
|
||
// contract — retrodesd's poll loop
|
||
// sees `trig=0` and never invokes
|
||
// `osd_action`.
|
||
// 0x10C OSD_INPUT reserved — reads 0. Sibling cores
|
||
// expose this as a "debug override"
|
||
// for injecting simulated button
|
||
// events; PS2 has no compelling use
|
||
// for it until an overlay engine
|
||
// exists.
|
||
// 0x110 OSD_CFG0 Ch223: 32-bit RW latch. cols/rows/
|
||
// x_chars/y_chars layout fields per
|
||
// input_common.h's `osd_setup`.
|
||
// 0x114 OSD_CFG1 Ch223: 32-bit RW latch. first_row/
|
||
// last_row + highlight/normal attrs.
|
||
// 0x118..0x11F reserved 32'h00000000
|
||
//
|
||
// CDC: status signals arrive on the design_clk (Ch169 IOPLL @ 25 MHz)
|
||
// domain or the CLOCK2_50 domain (HDMI I²C FSM). The bridge runs on
|
||
// clk (= CLOCK2_50, the qsys bridge clock per Ch170). Every signal
|
||
// is passed through a 2-stage synchronizer before being consumed by
|
||
// register logic.
|
||
//
|
||
// Ch174 — event-counter CDC contract: a 1-cycle pulse in design_clk
|
||
// (40 ns @ 25 MHz) is borderline against a 50 MHz 2-FF synchronizer
|
||
// and would occasionally miss frames on hardware. Counters here are
|
||
// fed by *event toggles* (`frame_toggle` / `dma_done_toggle`), which
|
||
// flip on every event in the design domain and stay at the new value
|
||
// until the next event (~16.7 ms for frames). The synchronizer has
|
||
// megacycles of slack and cannot miss an edge; the counter increments
|
||
// on each XOR between sync_d[1] and sync_d[2]. The sticky-latch
|
||
// signals `frame_seen` / `dma_done_seen` remain the *status-bit*
|
||
// sources (CORE_STATUS[3] / [2]) — do not "simplify" this back into
|
||
// raw pulse synchronizers.
|
||
//
|
||
// Write contract: the bridge accepts every AXI write addressed inside
|
||
// its 38-bit window (`s_axi_awready` rises in W_IDLE regardless of
|
||
// address) and always returns `BRESP=OKAY`. The 32-bit register slot
|
||
// is picked out of `s_axi_wdata[127:0]` by `aw_addr_q[3:2]` (the lane
|
||
// select). **`s_axi_wstrb` is intentionally ignored** — every accepted
|
||
// write is treated as a full 32-bit-word write into the targeted
|
||
// register. Partial-byte writes are not supported; the corresponding
|
||
// register field will be overwritten in full. This matches the AXI4
|
||
// usage that retroDE userspace produces (`devmem2 ... w` / native
|
||
// 32-bit stores from retrodesd's bridge mmap) and the sibling-core
|
||
// bridges, all of which also ignore WSTRB. Writes to undecoded
|
||
// offsets inside or outside the side-effect window are still ACK'd
|
||
// but have no register effect.
|
||
// ============================================================================
|
||
|
||
`timescale 1ns/1ps
|
||
|
||
module ps2_hps_bridge (
|
||
input logic clk, // qsys clk_100_clk (=CLOCK2_50)
|
||
input logic reset_n,
|
||
input logic h2f_reset, // HPS-driven fabric reset (unused; reserved)
|
||
|
||
// ---- Ch173 status inputs (asynchronous to clk; sync'd internally) ----
|
||
input logic core_halt,
|
||
input logic dma_done_seen,
|
||
input logic frame_seen,
|
||
input logic hdmi_init_done,
|
||
input logic hdmi_i2c_error,
|
||
input logic raster_overflow,
|
||
|
||
// ---- Ch174 event-toggle counter inputs (async; sync'd internally) ----
|
||
input logic frame_toggle,
|
||
input logic dma_done_toggle,
|
||
|
||
// ---- Ch318 LPDDR-framebuffer test control/status (CLOCK2_50 — SAME domain as
|
||
// the gs_lpddr_axi_master AXI side, so no synchronizers). HPS-armed runtime
|
||
// controls + status counters; defaults safe (arm=0, canary=1, base=0x80000000).
|
||
output logic lpddr_arm_o, // 0x018[0] — permit AXI writes (default 0)
|
||
output logic lpddr_canary_o, // 0x018[1] — write only the 32-byte top line (default 1)
|
||
output logic [31:0] lpddr_fb_base_o, // 0x01C — LPDDR byte base (default 0x8000_0000)
|
||
output logic lpddr_ctrl_commit_o,// Ch352 — toggles on any arm/canary/fb_base write. The EMIF
|
||
// domain syncs this and latches {arm,canary,fb_base} on its edge,
|
||
// so the multi-bit control crosses COHERENTLY (no raw fb_base bus).
|
||
input logic [31:0] lpddr_bytes_i, // 0x030 — bytes written
|
||
input logic [31:0] lpddr_bursts_i, // 0x034 — bursts issued
|
||
input logic [31:0] lpddr_bresp_err_i, // (folded into 0x02C status)
|
||
input logic [31:0] lpddr_fifo_ovf_i, // (folded into 0x02C status)
|
||
input logic lpddr_idle_i, // 0x02C[0] — writer idle/done
|
||
|
||
// ---- Ch319 Brick 3: LPDDR4B read-probe (HPS reads FB content back through the
|
||
// bridge — no /dev/mem). 0x03C: WRITE sets the byte address + triggers a read;
|
||
// READ returns the 32-bit word. rd_done/rd_data arrive on emif_clk → synced here.
|
||
output logic [31:0] lpddr_rd_addr_o, // 0x03C write — read byte address (write triggers a read)
|
||
output logic lpddr_rd_pulse_o, // toggles on each 0x03C write (the read trigger)
|
||
input logic [31:0] lpddr_rd_data_i, // read-probe result word (emif_clk; latched here)
|
||
input logic lpddr_rd_done_i, // read-probe done toggle (emif_clk; synced here)
|
||
|
||
// ---- Ch320: LPDDR4B scanout source-select + status ----
|
||
output logic lpddr_video_src_o, // 0x018[2] — 1 = HDMI sourced from LPDDR4B scanout (default 0 = BRAM)
|
||
output logic lpddr_scanout_lb_o, // 0x018[3] — 1 = line-buffer scanout, 0 = frame-cache (Ch321; default 0)
|
||
input logic lpddr_scan_valid_i, // scanout frame cache loaded (emif_clk; synced -> 0x02C[4])
|
||
input logic lpddr_scan_err_i, // scanout read errors seen (emif_clk; synced -> 0x02C[5])
|
||
|
||
// ---- Ch322: LPDDR write-probe (HPS stages texture words) + texture-cache fill ----
|
||
// 0x040 LPDDR_WRADDR (W): set the LPDDR byte address (auto-increments +4 per data write).
|
||
// 0x044 LPDDR_WRDATA (W): a 32-bit word -> latches data + toggles wr_pulse (one write).
|
||
// 0x048 TEX_FILL_CTRL (W bit0): arm the texture-cache fill. 0x048 (R): tex status.
|
||
output logic [31:0] lpddr_wr_addr_o, // 0x040 — write byte address (auto-inc per 0x044 write)
|
||
output logic [31:0] lpddr_wr_data_o, // 0x044 — write data word (stable when wr_pulse flips)
|
||
output logic lpddr_wr_pulse_o, // toggles on each 0x044 write (write trigger)
|
||
input logic lpddr_wr_busy_i, // write-probe busy (emif_clk; synced)
|
||
input logic lpddr_wr_done_i, // write-probe done toggle (emif_clk; synced)
|
||
input logic [31:0] lpddr_wr_bresp_err_i,// 0x058 — write-probe non-OKAY responses
|
||
output logic [7:0] diag_ctrl_o, // 0x0C0 — Ch323 diag: [0]wr-probe full-beat [1]spill force-lane0
|
||
// [3:2]probe-port(0=s3,1=s1,2=s2) [5:4]spill-awid(0=6,1=5,2=0)
|
||
output logic tex_fill_start_o, // 0x048[0] — arm the prefilled texture-cache fill
|
||
input logic tex_fill_done_i, // texture fully resident (emif_clk; synced -> 0x048[0])
|
||
input logic [31:0] tex_fill_beats_i, // 0x058 — fill beats completed
|
||
input logic [31:0] tex_fill_bytes_i, // 0x05C — fill bytes completed
|
||
input logic [31:0] tex_rd_errs_i, // 0x068 — texture-fill non-OKAY read responses
|
||
input logic [31:0] tex_fill_crc_i, // 0x070 — Ch352 sum32 of words written to tex_mem (integrity proof)
|
||
// Ch322 — texel-source proof counters (design_clk; reset per render). After a render,
|
||
// tex_cache_hits_i > 0 proves the triangle's texels came from the LPDDR cache.
|
||
input logic [31:0] tex_cache_hits_i, // 0x078 — texel reads served from the LPDDR cache
|
||
input logic [31:0] tex_bram_hits_i, // 0x07C — texel reads served from BRAM (fallback)
|
||
|
||
// Ch323 — tile COLOR+Z spill/reload status counters (R-only; 0 when GS_TILE_SPILL off).
|
||
// The HPS board script reads these to prove the spill/reload round-trip on silicon:
|
||
// both spill writers advance, the reload reads them back, no write/read errors.
|
||
input logic [31:0] spill_color_beats_i, // 0x080 — color flush words written to LPDDR
|
||
input logic [31:0] spill_z_beats_i, // 0x084 — Z flush words written to LPDDR
|
||
input logic [31:0] reload_color_beats_i,// 0x088 — color beats read back per reload
|
||
input logic [31:0] reload_z_beats_i, // 0x08C — Z beats read back per reload
|
||
input logic [31:0] reload_rd_errs_i, // 0x090 — reload non-OKAY read responses
|
||
input logic [31:0] spill_color_errs_i, // 0x094 — color flush non-OKAY write responses
|
||
input logic [31:0] spill_z_errs_i, // 0x098 — Z flush non-OKAY write responses
|
||
input logic spill_color_ovf_i, // 0x09C[0] — color spill FIFO overflow (sticky)
|
||
input logic spill_z_ovf_i, // 0x09C[1] — Z spill FIFO overflow (sticky)
|
||
// Ch323 diag — upstream tile-spill/reload event counters (reset per-render; 0 off-spill).
|
||
input logic [31:0] ev_tp_flush_i, // 0x0A0 — entries into TP_FLUSH
|
||
input logic [31:0] ev_tp_zflush_i, // 0x0A4 — entries into TP_ZFLUSH
|
||
input logic [31:0] ev_tp_reload_i, // 0x0A8 — entries into TP_RELOAD
|
||
input logic [31:0] ev_tp_render_i, // 0x0AC — entries into TP_RENDER
|
||
input logic [31:0] ev_flush_emit_i, // 0x0B0 — color flush emit pulses
|
||
input logic [31:0] ev_zflush_emit_i, // 0x0B4 — Z flush emit pulses
|
||
input logic [31:0] ev_reload_start_i, // 0x0B8 — reload-arm pulses
|
||
input logic [31:0] ev_reload_ready_i, // 0x0BC — reload-ready rising edges
|
||
// Color-writer spill PIPELINE counters (localize any spill divergence): beats=B handshakes,
|
||
// emit/push=GS side, pop=EMIF side. Healthy two-batch render = 512/64/64/64.
|
||
input logic [31:0] dbg_c_beat_count_i, // 0x0E0 — beats committed since trace_clear
|
||
input logic [31:0] dbg_c_emit_count_i, // 0x0CC — GS: emits accepted (expect 512)
|
||
input logic [31:0] dbg_c_push_count_i, // 0x0F8 — GS: beats pushed to FIFO (expect 64)
|
||
input logic [31:0] dbg_c_pop_count_i, // 0x0FC — EMIF: beats popped from FIFO (expect == beats)
|
||
// Ch324 Z-writer pipeline counters (same split): compare against color in one run to localize
|
||
// the grid spill_z over-production. emit too high=TP_ZFLUSH/iteration; push high=packer; pop/beats high=FIFO/CDC.
|
||
input logic [31:0] dbg_z_beat_count_i, // 0x0C4 — Z beats committed since trace_clear
|
||
input logic [31:0] dbg_z_emit_count_i, // 0x0C8 — Z GS emits accepted
|
||
input logic [31:0] dbg_z_push_count_i, // 0x0D0 — Z GS beats pushed to FIFO
|
||
input logic [31:0] dbg_z_pop_count_i, // 0x0D4 — Z EMIF beats popped from FIFO
|
||
|
||
// ---- Ch330 Brick 4 — runtime command-list feeder control ----
|
||
// The HPS writes a staging RAM entry as two 32-bit halves (0x0DC LO, 0x0E4 HI);
|
||
// writing HI commits {hi,lo} -> staging[addr] and auto-increments addr. 0x0E8 GO
|
||
// retriggers. The we/go outputs are TOGGLES (the feeder lives in design_clk; the
|
||
// de25 top syncs+edge-detects them into pulses) with stable level addr/data.
|
||
output logic feeder_stg_we_tgl_o, // toggles on each staging commit (HI write)
|
||
output logic [11:0] feeder_stg_waddr_o, // staging word index for the committed word (level)
|
||
output logic [63:0] feeder_stg_wdata_o, // {hi,lo} committed word (level)
|
||
output logic feeder_go_tgl_o, // toggles on each 0x0E8 GO write (bit0=1)
|
||
input logic feeder_ready_i, // 0x0D8[0] feeder control FSM in C_READY (async; synced)
|
||
input logic [15:0] feeder_records_i, // 0x0E4 records_emitted (async counter)
|
||
input logic [31:0] feeder_waits_i, // 0x0E8 fifo_wait_cycles (async counter)
|
||
|
||
// ---- AXI4 slave (signature matches ps2_hps_bridge_null) ----
|
||
input logic [3:0] s_axi_awid,
|
||
input logic [37:0] s_axi_awaddr,
|
||
input logic [7:0] s_axi_awlen,
|
||
input logic [2:0] s_axi_awsize,
|
||
input logic [1:0] s_axi_awburst,
|
||
input logic s_axi_awlock,
|
||
input logic [3:0] s_axi_awcache,
|
||
input logic [2:0] s_axi_awprot,
|
||
input logic s_axi_awvalid,
|
||
output logic s_axi_awready,
|
||
|
||
input logic [127:0] s_axi_wdata,
|
||
input logic [15:0] s_axi_wstrb,
|
||
input logic s_axi_wlast,
|
||
input logic s_axi_wvalid,
|
||
output logic s_axi_wready,
|
||
|
||
output logic [3:0] s_axi_bid,
|
||
output logic [1:0] s_axi_bresp,
|
||
output logic s_axi_bvalid,
|
||
input logic s_axi_bready,
|
||
|
||
input logic [3:0] s_axi_arid,
|
||
input logic [37:0] s_axi_araddr,
|
||
input logic [7:0] s_axi_arlen,
|
||
input logic [2:0] s_axi_arsize,
|
||
input logic [1:0] s_axi_arburst,
|
||
input logic s_axi_arlock,
|
||
input logic [3:0] s_axi_arcache,
|
||
input logic [2:0] s_axi_arprot,
|
||
input logic s_axi_arvalid,
|
||
output logic s_axi_arready,
|
||
|
||
output logic [3:0] s_axi_rid,
|
||
output logic [127:0] s_axi_rdata,
|
||
output logic [1:0] s_axi_rresp,
|
||
output logic s_axi_rlast,
|
||
output logic s_axi_rvalid,
|
||
input logic s_axi_rready,
|
||
|
||
// ---- Ch176 outputs ---------------------------------------------------
|
||
// core_reset_req: latched CORE_CTRL[0]. Active-high. Asynchronous to
|
||
// the design domain — the board top is responsible for 2-FF syncing
|
||
// this into design_clk and gating the design's reset.
|
||
output logic core_reset_req,
|
||
|
||
// ---- Ch229 tile write-event broadcast --------------------------------
|
||
// On every accepted AXI write into the Ch227 tile RAM window, latch
|
||
// the index + data and toggle a 1-bit "event" signal. A design-clock
|
||
// CDC (`tile_ram_cdc`) detects edges on the synchronized toggle to
|
||
// gate writes into its shadow RAM. The bridge still owns the
|
||
// HPS-side `tile_mem` for AXI readback — Ch229 only adds the
|
||
// broadcast outputs; it does not change the Ch227 storage path.
|
||
// Toggle starts at 0 after reset (matched on the receiver side, so
|
||
// no spurious post-reset edge if both domains reset together).
|
||
output logic tile_wr_toggle, // toggles per tile write
|
||
output logic [9:0] tile_wr_index, // word index (= addr[11:2])
|
||
output logic [31:0] tile_wr_data, // 32-bit word data
|
||
|
||
// ---- Ch235 input latch broadcast (bridge clock domain) ---------------
|
||
// Ch222 `INPUT_P1`/`INPUT_P2` latches (@ 0x040/0x044) surfaced for
|
||
// downstream PS2-fabric consumers. The Ch234 `sio2_input_stub`
|
||
// (instantiated inside `iop_memory_map_stub`) is the first
|
||
// consumer; it owns the bridge-clk → design-clk CDC via its
|
||
// internal 2-FF synchronizer, so this port is plain combinational
|
||
// out of the bridge — no extra sync stage here.
|
||
output logic [31:0] input_p1_o, // = input_p1_q
|
||
output logic [31:0] input_p2_o, // = input_p2_q
|
||
// Ch245 — INPUT_P1_RAW (un-remapped retroDE bitmap) surfaced for
|
||
// the platform osd_menu_fsm. retrodesd may per-game-remap
|
||
// INPUT_P1 (A↔B swap etc.) — the OSD navigation uses the raw
|
||
// bitmap directly so D-pad / Start / Select / A / B map to the
|
||
// FSM's expected bit positions regardless of per-game remaps.
|
||
// Matches NES at retroDE_nes.sv:1235 (".joy0_buttons(bridge_joy0_osd)").
|
||
output logic [31:0] input_p1_raw_o, // = input_p1_raw_q
|
||
|
||
// ---- Ch245 platform OSD register surface -----------------------------
|
||
// Mirror sibling-ABI bridge ports so the shared
|
||
// retroDE_splash osd_overlay + osd_menu_fsm slot in with no
|
||
// PS2-local cell-format assumptions. The whole 32-bit register
|
||
// value is exposed; the top extracts fields per the sibling
|
||
// layout (cols=bits[5:0], rows=bits[12:8], origin_x_chars=
|
||
// bits[23:16], origin_y_chars=bits[31:24]).
|
||
output logic [31:0] osd_ctrl_o, // = osd_ctrl_q
|
||
output logic [31:0] osd_cfg0_o, // = osd_cfg0_q
|
||
output logic [31:0] osd_cfg1_o, // = osd_cfg1_q
|
||
|
||
// Menu-FSM outputs (sys_clk = CLOCK2_50 domain). Compose
|
||
// OSD_STATUS read + OSD_TRIGGER set-bits.
|
||
input logic osd_active_i,
|
||
input logic [4:0] osd_cursor_row_i,
|
||
input logic osd_set_trigger_i, // A button (one-cycle pulse)
|
||
input logic osd_back_trigger_i, // B button (one-cycle pulse)
|
||
input logic osd_scroll_down_trigger_i,// cursor down past end
|
||
input logic osd_scroll_up_trigger_i, // cursor up past start
|
||
input logic osd_open_trigger_i, // OSD just became visible
|
||
input logic [4:0] osd_trigger_row_i, // active row at trigger
|
||
|
||
// ---- Ch248 DS2 wired-controller inputs (from the platform
|
||
// `ds2_controller` instantiated in the top, polling the
|
||
// DE25-Nano GPIO header at the PSX SPI-like protocol rate).
|
||
// These replace the Ch226 hardcoded stub that always reported
|
||
// "no controller plugged in." Same clock domain as the
|
||
// bridge (both ride sys_clk = CLOCK2_50), so no synchronizer
|
||
// needed at this end.
|
||
input logic [31:0] ds2_buttons_i,
|
||
input logic ds2_connected_i,
|
||
input logic ds2_error_i
|
||
);
|
||
|
||
// ----------------------------------------------------------------
|
||
// 2-stage synchronizers for the async status signals.
|
||
// ----------------------------------------------------------------
|
||
logic [1:0] core_halt_sync;
|
||
logic [1:0] dma_done_sync;
|
||
logic [1:0] frame_seen_sync;
|
||
logic [1:0] hdmi_init_sync;
|
||
logic [1:0] hdmi_err_sync;
|
||
logic [1:0] raster_of_sync;
|
||
logic [1:0] frame_tgl_sync; // Ch174
|
||
logic [1:0] dma_tgl_sync; // Ch174
|
||
always_ff @(posedge clk or negedge reset_n) begin
|
||
if (!reset_n) begin
|
||
core_halt_sync <= 2'b00;
|
||
dma_done_sync <= 2'b00;
|
||
frame_seen_sync <= 2'b00;
|
||
hdmi_init_sync <= 2'b00;
|
||
hdmi_err_sync <= 2'b00;
|
||
raster_of_sync <= 2'b00;
|
||
frame_tgl_sync <= 2'b00;
|
||
dma_tgl_sync <= 2'b00;
|
||
end else begin
|
||
core_halt_sync <= {core_halt_sync[0], core_halt};
|
||
dma_done_sync <= {dma_done_sync[0], dma_done_seen};
|
||
frame_seen_sync <= {frame_seen_sync[0], frame_seen};
|
||
hdmi_init_sync <= {hdmi_init_sync[0], hdmi_init_done};
|
||
hdmi_err_sync <= {hdmi_err_sync[0], hdmi_i2c_error};
|
||
raster_of_sync <= {raster_of_sync[0], raster_overflow};
|
||
frame_tgl_sync <= {frame_tgl_sync[0], frame_toggle};
|
||
dma_tgl_sync <= {dma_tgl_sync[0], dma_done_toggle};
|
||
end
|
||
end
|
||
wire core_halt_q = core_halt_sync[1];
|
||
wire dma_done_q = dma_done_sync[1];
|
||
wire frame_seen_q = frame_seen_sync[1];
|
||
wire hdmi_init_q = hdmi_init_sync[1];
|
||
wire hdmi_err_q = hdmi_err_sync[1];
|
||
wire raster_of_q = raster_of_sync[1];
|
||
wire frame_tgl_q = frame_tgl_sync[1];
|
||
wire dma_tgl_q = dma_tgl_sync[1];
|
||
|
||
// ----------------------------------------------------------------
|
||
// Ch176 — writable control register at 0x010.
|
||
// bit 0: RESET (functional, drives core_reset_req)
|
||
// bit 1: ROM_LOADED (latch only)
|
||
// bit 2: PAUSE (latch only)
|
||
// bits[31:3]: writable but ignored (readback as 0 — only the
|
||
// three ABI-defined bits survive a write/read
|
||
// round-trip).
|
||
// Write path: the write FSM signals `core_ctrl_we` for one clk
|
||
// cycle when AXI accepts a write addressed to 0x010 and pulls
|
||
// the new value out of the AXI wstrb-aligned lane.
|
||
// ----------------------------------------------------------------
|
||
logic [2:0] core_ctrl_q;
|
||
logic core_ctrl_we;
|
||
logic [2:0] core_ctrl_d;
|
||
always_ff @(posedge clk or negedge reset_n) begin
|
||
if (!reset_n)
|
||
core_ctrl_q <= 3'b000;
|
||
else if (core_ctrl_we)
|
||
core_ctrl_q <= core_ctrl_d;
|
||
end
|
||
assign core_reset_req = core_ctrl_q[0];
|
||
|
||
// ----------------------------------------------------------------
|
||
// Ch176 — CORE_PULSE write decode. Self-clearing pulses live in
|
||
// CLOCK2_50 (bridge clock) only; HDMI_CLR (bit 3) zeros the
|
||
// diagnostic counters. Other bits ACK and discard.
|
||
// ----------------------------------------------------------------
|
||
logic core_pulse_we;
|
||
logic hdmi_clr_pulse;
|
||
|
||
// ----------------------------------------------------------------
|
||
// Ch222 — INPUT_P1 / INPUT_P2 / INPUT_P1_RAW write/read latches
|
||
// at 0x040 / 0x044 / 0x048. HPS-visible state only — retrodesd's
|
||
// `input_thread.c` writes the active controller bitmap here and
|
||
// the supervisor menu reads INPUT_P1_RAW. PS2 has no SIO2 /
|
||
// DualShock pipeline yet, so the values are stored and read back,
|
||
// nothing else. Reset clears unconditionally. Matches the shared
|
||
// ABI v1.0 input block used by retroDE_{nes,gb,Atari2600,coco2}.
|
||
// ----------------------------------------------------------------
|
||
logic [31:0] input_p1_q;
|
||
logic [31:0] input_p2_q;
|
||
logic [31:0] input_p1_raw_q;
|
||
|
||
// ----------------------------------------------------------------
|
||
// Ch223 — OSD compatibility sink at 0x100 / 0x110 / 0x114. Same
|
||
// "store-and-readback" pattern as Ch222: retrodesd's splash
|
||
// backend writes these latches during `osd_setup`; the bridge
|
||
// doesn't render an overlay yet, but the writes now land in real
|
||
// registers instead of being silently dropped. **Nothing in the
|
||
// PS2 fabric consumes these bits** — readback returns whatever
|
||
// userspace wrote, not a confirmation that an overlay engine
|
||
// observed it. Reset clears unconditionally.
|
||
// ----------------------------------------------------------------
|
||
logic [31:0] osd_ctrl_q;
|
||
// Ch318 — LPDDR test control registers (declared here; reset/write below).
|
||
logic lpddr_arm_q;
|
||
logic lpddr_canary_q;
|
||
logic [31:0] lpddr_fb_base_q;
|
||
// Ch319 Brick 3 — LPDDR4B read-probe registers + return-path CDC.
|
||
logic [31:0] lpddr_rd_addr_q;
|
||
logic lpddr_rd_pulse_q;
|
||
logic [1:0] lpddr_rd_done_sync;
|
||
logic [31:0] lpddr_rd_data_lat;
|
||
wire lpddr_rd_pending = (lpddr_rd_pulse_q != lpddr_rd_done_sync[1]);
|
||
assign lpddr_rd_addr_o = lpddr_rd_addr_q;
|
||
assign lpddr_rd_pulse_o = lpddr_rd_pulse_q;
|
||
// Ch323 diag — write-path commit experiment control (0x0C0).
|
||
logic [7:0] diag_ctrl_q;
|
||
assign diag_ctrl_o = diag_ctrl_q;
|
||
// Ch320 — scanout source-select + synced status.
|
||
logic lpddr_video_src_q;
|
||
logic lpddr_scanout_lb_q;
|
||
logic [1:0] lpddr_scan_valid_sync, lpddr_scan_err_sync;
|
||
assign lpddr_video_src_o = lpddr_video_src_q;
|
||
assign lpddr_scanout_lb_o = lpddr_scanout_lb_q;
|
||
// Ch322 — LPDDR write-probe + texture-cache fill registers + return-path CDC.
|
||
logic [31:0] lpddr_wr_addr_q; // "next write" pointer (auto-increments)
|
||
logic [31:0] lpddr_wr_addr_present_q;// the address that goes WITH the current data word
|
||
logic [31:0] lpddr_wr_data_q;
|
||
logic lpddr_wr_pulse_q;
|
||
logic tex_fill_start_q;
|
||
logic [1:0] tex_fill_done_sync;
|
||
logic [1:0] lpddr_wr_busy_sync;
|
||
// Ch352 CDC hardening (Codex) — the EMIF-domain status no longer crosses raw into reg_read:
|
||
// * lpddr_wr_done_sync : 3-FF sync of the write-probe done TOGGLE -> a STABLE write-pending bit
|
||
// (write_pending = issued_pulse ^ done_synced). Stop polling the transient wr_busy level.
|
||
// * tex_fill_*_snap_q : the multi-bit fill counters latched into THIS (clk) domain on the synced
|
||
// fill_done rising edge — they are stable then (fill complete), so the sample is coherent. reg_read
|
||
// returns these snapshots, never the raw emif counters.
|
||
logic [2:0] lpddr_wr_done_sync;
|
||
logic [31:0] tex_fill_beats_snap_q;
|
||
logic [31:0] tex_fill_bytes_snap_q;
|
||
logic [31:0] tex_rd_errs_snap_q;
|
||
logic [31:0] tex_fill_crc_snap_q;
|
||
logic lpddr_ctrl_commit_q; // toggles on any arm/canary/fb_base write (-> EMIF control snapshot)
|
||
wire write_pending = lpddr_wr_pulse_q ^ lpddr_wr_done_sync[2];
|
||
assign lpddr_ctrl_commit_o = lpddr_ctrl_commit_q;
|
||
// Ch352 (Codex) — the remaining raw emif->bridge status signals. lpddr_idle is single-bit (2-FF sync).
|
||
// The FB-writer counters (bytes/bursts/bresp_err/fifo_ovf) are multi-bit and only stable while the writer
|
||
// is IDLE, so latch them into clk-domain snapshots WHILE the synced idle is high (data stable -> coherent).
|
||
// The write-probe error counter latches on the synced write-done edge (count stable when a write completes).
|
||
logic [1:0] lpddr_idle_sync;
|
||
logic [31:0] lpddr_bytes_snap_q;
|
||
logic [31:0] lpddr_bursts_snap_q;
|
||
logic [31:0] lpddr_bresp_err_snap_q;
|
||
logic [31:0] lpddr_fifo_ovf_snap_q;
|
||
logic [31:0] lpddr_wr_bresp_err_snap_q;
|
||
// Present the SNAPSHOT taken when the data word was written — NOT lpddr_wr_addr_q,
|
||
// which auto-increments the same cycle the pulse toggles. The write-probe samples
|
||
// wr_addr asynchronously across the CDC, so it must stay stable between data writes
|
||
// (like wr_data). Presenting the live (incrementing) addr made every texel land +4B
|
||
// past its slot (Ch322 board bug).
|
||
assign lpddr_wr_addr_o = lpddr_wr_addr_present_q;
|
||
assign lpddr_wr_data_o = lpddr_wr_data_q;
|
||
assign lpddr_wr_pulse_o = lpddr_wr_pulse_q;
|
||
assign tex_fill_start_o = tex_fill_start_q;
|
||
logic [31:0] osd_cfg0_q;
|
||
logic [31:0] osd_cfg1_q;
|
||
// Ch245 — OSD_TRIGGER is a R/W register: HPS clears bits with W1C,
|
||
// the menu FSM (sys_clk domain) sets bits on action edges.
|
||
logic [31:0] osd_trigger_q;
|
||
|
||
// ----------------------------------------------------------------
|
||
// Ch227 — tile RAM storage at 0x1000..0x1FFF (4 KB window).
|
||
// 1024 × 32-bit words; HPS writes land here, reads return the
|
||
// last value written. No overlay engine yet (Ch228). The sim
|
||
// initial block zero-fills for deterministic TBs; on hardware
|
||
// the power-up value is undefined until the overlay chapter
|
||
// pins a reset contract (BRAM blocks on Agilex 5 don't typically
|
||
// sync-reset every word, and there is no sibling precedent for
|
||
// bridge-side tile-RAM clearing).
|
||
//
|
||
// Ch232 hotfix: explicit `ramstyle = "M20K"` attribute forces
|
||
// Quartus to put the 32 Kbit storage into a single M20K block
|
||
// instead of distributed LABs. Without the hint the AXI read
|
||
// path (combinational lookup via a function call) confused the
|
||
// inferencer.
|
||
// ----------------------------------------------------------------
|
||
(* ramstyle = "M20K" *) logic [31:0] tile_mem [0:1023];
|
||
initial begin
|
||
for (int i = 0; i < 1024; i++)
|
||
tile_mem[i] = 32'd0;
|
||
end
|
||
|
||
// ----------------------------------------------------------------
|
||
// Counters. Ch174: FRAME_COUNT / DMA_DONE_COUNT increment on each
|
||
// edge (rising OR falling) of the synchronized event-toggle —
|
||
// because the toggle flips per event in the design domain, every
|
||
// edge in the bridge domain is exactly one event. See the
|
||
// event-counter CDC contract in the header comment.
|
||
//
|
||
// RASTER_OVERFLOW_COUNT counts every clk cycle the synced
|
||
// raster_overflow stays high (cheap stickyness + approximate
|
||
// severity in one register).
|
||
//
|
||
// Ch176: a CORE_PULSE write with HDMI_CLR set takes priority over
|
||
// the increment in the same cycle and zeros all three counters
|
||
// synchronously in CLOCK2_50.
|
||
// ----------------------------------------------------------------
|
||
logic frame_tgl_q1;
|
||
logic dma_tgl_q1;
|
||
logic [31:0] frame_count;
|
||
logic [31:0] dma_done_count;
|
||
logic [31:0] raster_overflow_count;
|
||
always_ff @(posedge clk or negedge reset_n) begin
|
||
if (!reset_n) begin
|
||
frame_tgl_q1 <= 1'b0;
|
||
dma_tgl_q1 <= 1'b0;
|
||
frame_count <= 32'd0;
|
||
dma_done_count <= 32'd0;
|
||
raster_overflow_count <= 32'd0;
|
||
end else begin
|
||
frame_tgl_q1 <= frame_tgl_q;
|
||
dma_tgl_q1 <= dma_tgl_q;
|
||
if (hdmi_clr_pulse) begin
|
||
frame_count <= 32'd0;
|
||
dma_done_count <= 32'd0;
|
||
raster_overflow_count <= 32'd0;
|
||
end else begin
|
||
if (frame_tgl_q ^ frame_tgl_q1)
|
||
frame_count <= frame_count + 32'd1;
|
||
if (dma_tgl_q ^ dma_tgl_q1)
|
||
dma_done_count <= dma_done_count + 32'd1;
|
||
if (raster_of_q)
|
||
raster_overflow_count <= raster_overflow_count + 32'd1;
|
||
end
|
||
end
|
||
end
|
||
|
||
// ----------------------------------------------------------------
|
||
// Register-window decode (read side). Returns the 32-bit register
|
||
// value for the given byte address; out-of-range reads return 0.
|
||
// ----------------------------------------------------------------
|
||
localparam logic [31:0] CORE_ID = 32'h50533200; // "PS2\0"
|
||
localparam logic [31:0] ABI_VERSION = 32'h00000100;
|
||
// Ch246 — advertise OSD geometry via CORE_CAPS following the
|
||
// sibling-ABI bit layout (matches nes_hps_bridge.sv:1024-1031).
|
||
// bit 0 has_save_ram = 0 (PS2 memory card not yet wired)
|
||
// bit 1 has_savestates = 0 (not yet implemented)
|
||
// bit 2 two_player = 0 (P2 input wiring is bringup-only)
|
||
// bit 3 analog_input = 0 (DS2 analog sticks not yet plumbed)
|
||
// [7:4] max_savestate_slots = 0
|
||
// [15:8] osd_columns = 40
|
||
// [20:16] osd_rows = 16
|
||
// = (16<<16) | (40<<8) = 0x00102800
|
||
//
|
||
// The runtime fix for PS2's current 640×480 picture lives in
|
||
// retrodesd's backend (it still computes origin from a hardcoded
|
||
// 1280×720 assumption, which clips on us). This advertisement
|
||
// is forward-looking metadata so a CORE_CAPS-aware retrodesd can
|
||
// use it directly; Ch246's hardware-side change is intentionally
|
||
// minimal per Codex's framing.
|
||
localparam logic [31:0] CORE_CAPS = 32'h00102800;
|
||
|
||
function automatic logic [31:0] reg_read(input logic [37:0] addr);
|
||
logic [31:0] core_status;
|
||
logic [31:0] video_status;
|
||
logic [31:0] hdmi_diag;
|
||
core_status = '0;
|
||
core_status[0] = 1'b1; // loaded
|
||
core_status[1] = core_halt_q;
|
||
core_status[2] = dma_done_q;
|
||
core_status[3] = frame_seen_q;
|
||
core_status[4] = hdmi_init_q;
|
||
core_status[5] = hdmi_err_q;
|
||
core_status[6] = raster_of_q;
|
||
// Ch225 — diagnostic views into the same synchronized signals.
|
||
video_status = '0;
|
||
video_status[0] = frame_seen_q;
|
||
video_status[1] = (frame_count != 32'd0); // scanout_alive
|
||
video_status[2] = raster_of_q;
|
||
video_status[3] = dma_done_q;
|
||
hdmi_diag = '0;
|
||
hdmi_diag[0] = hdmi_init_q;
|
||
hdmi_diag[1] = hdmi_err_q;
|
||
// Decode is split into two windows:
|
||
// 1. 0x000..0x0FF — shared-ABI prefix (0x000-0x01F), Ch174
|
||
// diagnostics (0x020-0x02F), Ch222 input latches
|
||
// (0x040-0x048), Ch225 diagnostic reads (0x060/0x064),
|
||
// Ch226 DS2 stub (0x0F0/0x0F4). Selected by
|
||
// addr[37:8]=='0 (Ch226 widened the window from 128 B
|
||
// → 256 B; existing slot indices keep their values
|
||
// since the high bit is implicitly 0 for offsets ≤ 0x07F).
|
||
// 2. 0x100..0x11F — Ch223 OSD compatibility sink. Selected
|
||
// by addr[37:5]==33'h08 (= 0x100/32).
|
||
// Anything else reads 0.
|
||
reg_read = 32'd0;
|
||
if (addr[37:8] == '0) begin
|
||
case (addr[7:2])
|
||
6'h00: reg_read = CORE_ID; // 0x000
|
||
6'h01: reg_read = ABI_VERSION; // 0x004
|
||
6'h02: reg_read = core_status; // 0x008
|
||
6'h03: reg_read = CORE_CAPS; // 0x00C
|
||
6'h04: reg_read = {29'd0, core_ctrl_q}; // 0x010 CORE_CTRL (Ch176)
|
||
6'h05: reg_read = 32'd0; // 0x014 CORE_PULSE — self-clearing, reads 0
|
||
6'h06: reg_read = {28'd0, lpddr_scanout_lb_q, lpddr_video_src_q, lpddr_canary_q, lpddr_arm_q}; // 0x018 LPDDR_CTRL: [0]arm [1]canary [2]video_src [3]scanout_lb (Ch318/320/321 RW)
|
||
6'h07: reg_read = lpddr_fb_base_q; // 0x01C LPDDR_FB_BASE (Ch318 RW)
|
||
6'h08: reg_read = frame_count; // 0x020
|
||
6'h09: reg_read = dma_done_count; // 0x024
|
||
6'h0A: reg_read = raster_overflow_count; // 0x028
|
||
// 0x02C LPDDR_STATUS (R): [0]idle [1]bresp_err [2]fifo_ovf [3]rd_pending [4]scan_cache_valid [5]scan_rd_err.
|
||
// Ch352 — idle via 2-FF sync; bresp_err/fifo_ovf reduced from the clk-domain snapshots (coherent).
|
||
6'h0B: reg_read = {26'd0, lpddr_scan_err_sync[1], lpddr_scan_valid_sync[1], lpddr_rd_pending, (|lpddr_fifo_ovf_snap_q), (|lpddr_bresp_err_snap_q), lpddr_idle_sync[1]};
|
||
6'h0C: reg_read = lpddr_bytes_snap_q; // 0x030 LPDDR_BYTES (Ch352 — clk-domain snapshot)
|
||
6'h0D: reg_read = lpddr_bursts_snap_q; // 0x034 LPDDR_BURSTS (Ch352 — clk-domain snapshot)
|
||
6'h0E: reg_read = lpddr_bresp_err_snap_q; // 0x038 LPDDR_BRESP_ERRS (Ch352 — clk-domain snapshot)
|
||
6'h0F: reg_read = lpddr_rd_data_lat; // 0x03C LPDDR_RDATA (Ch319 R): last read-probe word. WRITE this offset to set addr + trigger a read.
|
||
6'h10: reg_read = input_p1_q; // 0x040 INPUT_P1 (Ch222)
|
||
6'h11: reg_read = input_p2_q; // 0x044 INPUT_P2 (Ch222)
|
||
6'h12: reg_read = input_p1_raw_q; // 0x048 INPUT_P1_RAW (Ch222)
|
||
6'h13: reg_read = lpddr_wr_addr_q; // 0x04C LPDDR_WRADDR (Ch322 RW; auto-inc on 0x050 write)
|
||
6'h14: reg_read = 32'd0; // 0x050 LPDDR_WRDATA (Ch322 W-only: data+pulse; read 0)
|
||
// 0x054 TEX_FILL_CTRL(W[0]) / TEX_STATUS(R): [0]fill_done [1]wr_busy [2]write_pending (Ch352 — the
|
||
// STABLE write-done flag; poll [2]==0 between writes instead of catching the transient [1]wr_busy).
|
||
6'h15: reg_read = {29'd0, write_pending, lpddr_wr_busy_sync[1], tex_fill_done_sync[1]};
|
||
6'h16: reg_read = tex_fill_beats_snap_q; // 0x058 TEX_FILL_BEATS (Ch352 — clk-domain snapshot)
|
||
6'h17: reg_read = tex_fill_bytes_snap_q; // 0x05C TEX_FILL_BYTES (Ch352 — clk-domain snapshot)
|
||
6'h1A: reg_read = tex_rd_errs_snap_q; // 0x068 TEX_RD_ERRS (Ch352 — clk-domain snapshot)
|
||
6'h1B: reg_read = lpddr_wr_bresp_err_snap_q;// 0x06C LPDDR_WR_BRESP_ERRS (Ch352 — clk-domain snapshot)
|
||
6'h1C: reg_read = tex_fill_crc_snap_q; // 0x070 TEX_FILL_CRC (Ch352 — sum32 of tex_mem words; integrity proof)
|
||
6'h1E: reg_read = tex_cache_hits_i; // 0x078 TEX_CACHE_HITS (Ch322 R): texels served from LPDDR cache
|
||
6'h1F: reg_read = tex_bram_hits_i; // 0x07C TEX_BRAM_HITS (Ch322 R): texels served from BRAM
|
||
// Ch323 — tile COLOR+Z spill/reload counters (all R; 0 when GS_TILE_SPILL off).
|
||
6'h20: reg_read = spill_color_beats_i; // 0x080 SPILL_COLOR_WR_BEATS: color flush words → LPDDR
|
||
6'h21: reg_read = spill_z_beats_i; // 0x084 SPILL_Z_WR_BEATS: Z flush words → LPDDR
|
||
6'h22: reg_read = reload_color_beats_i; // 0x088 RELOAD_COLOR_BEATS: color beats read back per reload
|
||
6'h23: reg_read = reload_z_beats_i; // 0x08C RELOAD_Z_BEATS: Z beats read back per reload
|
||
6'h24: reg_read = reload_rd_errs_i; // 0x090 RELOAD_RD_ERRS: reload non-OKAY read responses
|
||
6'h25: reg_read = spill_color_errs_i; // 0x094 SPILL_COLOR_WR_ERRS: color flush non-OKAY write responses
|
||
6'h26: reg_read = spill_z_errs_i; // 0x098 SPILL_Z_WR_ERRS: Z flush non-OKAY write responses
|
||
6'h27: reg_read = {30'd0, spill_z_ovf_i, spill_color_ovf_i}; // 0x09C SPILL_OVF: [0]color FIFO ovf [1]Z FIFO ovf
|
||
// Ch323 diag — UPSTREAM tile-spill/reload EVENT counters (reset per-render). Prove
|
||
// the rasterizer entered the phases + emitted BEFORE diagnosing LPDDR commit.
|
||
6'h28: reg_read = ev_tp_flush_i; // 0x0A0 EV_TP_FLUSH : entries into TP_FLUSH
|
||
6'h29: reg_read = ev_tp_zflush_i; // 0x0A4 EV_TP_ZFLUSH : entries into TP_ZFLUSH
|
||
6'h2A: reg_read = ev_tp_reload_i; // 0x0A8 EV_TP_RELOAD : entries into TP_RELOAD
|
||
6'h2B: reg_read = ev_tp_render_i; // 0x0AC EV_TP_RENDER : entries into TP_RENDER
|
||
6'h2C: reg_read = ev_flush_emit_i; // 0x0B0 EV_FLUSH_EMIT : color flush emit pulses
|
||
6'h2D: reg_read = ev_zflush_emit_i; // 0x0B4 EV_ZFLUSH_EMIT: Z flush emit pulses
|
||
6'h2E: reg_read = ev_reload_start_i; // 0x0B8 EV_RELOAD_START: reload-arm pulses
|
||
6'h2F: reg_read = ev_reload_ready_i; // 0x0BC EV_RELOAD_READY: reload-ready rising edges
|
||
6'h30: reg_read = {24'd0, diag_ctrl_q}; // 0x0C0 DIAG_CTRL (RW): [6]=trace_clear (per-render counter reset)
|
||
6'h31: reg_read = dbg_z_beat_count_i; // 0x0C4 — Z writer beats since trace_clear
|
||
6'h32: reg_read = dbg_z_emit_count_i; // 0x0C8 — Z writer GS emits accepted
|
||
6'h33: reg_read = dbg_c_emit_count_i; // 0x0CC — color writer GS emits accepted (expect 512)
|
||
6'h34: reg_read = dbg_z_push_count_i; // 0x0D0 — Z writer GS beats pushed to FIFO
|
||
6'h35: reg_read = dbg_z_pop_count_i; // 0x0D4 — Z writer EMIF beats popped from FIFO
|
||
6'h38: reg_read = dbg_c_beat_count_i; // 0x0E0 — color writer beats since trace_clear
|
||
// Ch330 Brick 4 — feeder status reads.
|
||
6'h36: reg_read = {31'd0, feeder_ready_sync[1]}; // 0x0D8 FEEDER_STATUS [0]=ready (synced)
|
||
6'h37: reg_read = {20'd0, feeder_addr_q}; // 0x0DC FEEDER_STG_ADDR (current write index)
|
||
6'h39: reg_read = {16'd0, feeder_records_i}; // 0x0E4 FEEDER_RECORDS (records_emitted)
|
||
6'h3A: reg_read = feeder_waits_i; // 0x0E8 FEEDER_WAITS (fifo_wait_cycles)
|
||
6'h18: reg_read = video_status; // 0x060 VIDEO_STATUS (Ch225)
|
||
6'h19: reg_read = hdmi_diag; // 0x064 HDMI_DIAG (Ch225)
|
||
// Ch248 — replaced the Ch226 fake DS2 with the real
|
||
// platform `ds2_controller` outputs. Bit layout matches
|
||
// the sibling-ABI shape that retrodesd's
|
||
// `ds2_poll_thread` consumes:
|
||
// [0] connected (1 = wired controller present + IDed)
|
||
// [1] error (1 = comms timeout / bad framing)
|
||
// [2] reserved (PS2-local "input_latches_valid" bit
|
||
// from the Ch226 stub kept at 1 so
|
||
// operators-of-prior-tooling still
|
||
// see a familiar pattern when the
|
||
// controller is unplugged)
|
||
6'h3C: reg_read = {29'd0, 1'b1, ds2_error_i, ds2_connected_i}; // 0x0F0 DS2_STATUS
|
||
6'h3D: reg_read = ds2_buttons_i; // 0x0F4 DS2_BUTTONS
|
||
6'h3E: reg_read = dbg_c_push_count_i; // 0x0F8 — color writer GS beats pushed to FIFO (expect 64)
|
||
6'h3F: reg_read = dbg_c_pop_count_i; // 0x0FC — color writer EMIF beats popped (expect == beats)
|
||
default: reg_read = 32'd0;
|
||
endcase
|
||
end else if (addr[37:5] == 33'h08) begin
|
||
case (addr[4:2])
|
||
3'h0: reg_read = osd_ctrl_q; // 0x100 OSD_CTRL
|
||
// Ch245 — OSD_STATUS in sibling-ABI layout:
|
||
// bit 0 = osd_active (from menu FSM)
|
||
// bits 12:8 = cursor_row (from menu FSM)
|
||
// sys_clk → bridge_clk are the SAME clock here
|
||
// (both run from CLOCK2_50), so no synchronizer needed.
|
||
3'h1: reg_read = {19'd0, osd_cursor_row_i, 7'd0, osd_active_i};
|
||
3'h2: reg_read = osd_trigger_q; // 0x108 OSD_TRIGGER (Ch245 R/W)
|
||
3'h3: reg_read = 32'd0; // 0x10C OSD_INPUT (reserved)
|
||
3'h4: reg_read = osd_cfg0_q; // 0x110 OSD_CFG0
|
||
3'h5: reg_read = osd_cfg1_q; // 0x114 OSD_CFG1
|
||
default: reg_read = 32'd0; // 0x118 / 0x11C reserved-zero
|
||
endcase
|
||
end else if (addr[37:12] == 26'h1) begin
|
||
// Ch227 — tile RAM 0x1000..0x1FFF, 1024 × 32-bit.
|
||
reg_read = tile_mem[addr[11:2]];
|
||
end
|
||
endfunction
|
||
|
||
// ----------------------------------------------------------------
|
||
// Write FSM. Single-beat: accept aw + w together, emit b, return.
|
||
// Ch176: writes to 0x010 / 0x014 now take effect (CORE_CTRL latch
|
||
// + CORE_PULSE self-clearing). Writes to any other address are
|
||
// ACK'd-and-discarded (the read-only identity / counter window).
|
||
// ----------------------------------------------------------------
|
||
typedef enum logic [1:0] { W_IDLE, W_DATA, W_RESP } w_state_t;
|
||
w_state_t w_state;
|
||
logic [3:0] aw_id_q;
|
||
logic [37:0] aw_addr_q;
|
||
|
||
// Pull the 32-bit register value out of the 128-bit AXI wdata via
|
||
// the awaddr[3:2] lane select (matches the read-side encoding).
|
||
logic [31:0] wdata_lane;
|
||
always_comb begin
|
||
case (aw_addr_q[3:2])
|
||
2'b00: wdata_lane = s_axi_wdata[31:0];
|
||
2'b01: wdata_lane = s_axi_wdata[63:32];
|
||
2'b10: wdata_lane = s_axi_wdata[95:64];
|
||
default: wdata_lane = s_axi_wdata[127:96];
|
||
endcase
|
||
end
|
||
|
||
// Address decode for write side. Same 6-bit word index inside the
|
||
// first 256 bytes (Ch226 widened from 128 B) that the read side
|
||
// uses. DS2 stub at 0x0F0/0x0F4 is read-only — no write_is_*
|
||
// signals; writes there fall through to the no-side-effect path.
|
||
wire write_in_window = (aw_addr_q[37:8] == '0);
|
||
wire write_is_ctrl = write_in_window && (aw_addr_q[7:2] == 6'h04);
|
||
wire write_is_pulse = write_in_window && (aw_addr_q[7:2] == 6'h05);
|
||
// Ch222 — input-latch decode (offsets 0x040 / 0x044 / 0x048).
|
||
wire write_is_p1 = write_in_window && (aw_addr_q[7:2] == 6'h10);
|
||
wire write_is_p2 = write_in_window && (aw_addr_q[7:2] == 6'h11);
|
||
wire write_is_p1_raw = write_in_window && (aw_addr_q[7:2] == 6'h12);
|
||
// Ch318 — LPDDR test control writes (0x018 LPDDR_CTRL, 0x01C LPDDR_FB_BASE).
|
||
wire write_is_lpddr_ctrl = write_in_window && (aw_addr_q[7:2] == 6'h06);
|
||
wire write_is_lpddr_base = write_in_window && (aw_addr_q[7:2] == 6'h07);
|
||
// Ch319 Brick 3 — LPDDR4B read-probe address write (0x03C); the write also triggers a read.
|
||
wire write_is_lpddr_rdaddr = write_in_window && (aw_addr_q[7:2] == 6'h0F);
|
||
// Ch323 diag — write-commit experiment control (0x0C0 = 6'h30).
|
||
wire write_is_diag_ctrl = write_in_window && (aw_addr_q[7:2] == 6'h30);
|
||
// Ch322 — LPDDR write-probe + texture-cache fill writes.
|
||
wire write_is_lpddr_wraddr = write_in_window && (aw_addr_q[7:2] == 6'h13); // 0x04C set write addr
|
||
wire write_is_lpddr_wrdata = write_in_window && (aw_addr_q[7:2] == 6'h14); // 0x050 data -> write + pulse
|
||
wire write_is_tex_fill = write_in_window && (aw_addr_q[7:2] == 6'h15); // 0x054 bit0 arm fill
|
||
// Ch330 Brick 4 — feeder staging/control writes (0x0D8/0x0DC/0x0E4/0x0E8).
|
||
wire write_is_feeder_addr = write_in_window && (aw_addr_q[7:2] == 6'h36); // 0x0D8 set staging word index
|
||
wire write_is_feeder_lo = write_in_window && (aw_addr_q[7:2] == 6'h37); // 0x0DC low 32 bits (latched)
|
||
wire write_is_feeder_hi = write_in_window && (aw_addr_q[7:2] == 6'h39); // 0x0E4 high 32 -> commit {hi,lo}, addr++
|
||
wire write_is_feeder_go = write_in_window && (aw_addr_q[7:2] == 6'h3A); // 0x0E8 bit0 -> retrigger pulse
|
||
|
||
// Ch223 — OSD compatibility-sink decode at 0x100..0x11F. The OSD
|
||
// block is a 32-byte window outside the first-128-byte side-effect
|
||
// region, so it gets its own `addr[37:5] == 33'h08` guard
|
||
// (addr[37:5]==0x08 ⇔ addr ∈ [0x100, 0x120)).
|
||
wire write_in_osd = (aw_addr_q[37:5] == 33'h08);
|
||
wire write_is_osd_ctrl = write_in_osd && (aw_addr_q[4:2] == 3'h0); // 0x100
|
||
wire write_is_osd_trigger= write_in_osd && (aw_addr_q[4:2] == 3'h2); // 0x108 (Ch245 W1C)
|
||
wire write_is_osd_cfg0 = write_in_osd && (aw_addr_q[4:2] == 3'h4); // 0x110
|
||
wire write_is_osd_cfg1 = write_in_osd && (aw_addr_q[4:2] == 3'h5); // 0x114
|
||
|
||
// Ch227 — tile RAM write decode at 0x1000..0x1FFF (4 KB window).
|
||
// 1024 × 32-bit words; addr[11:2] picks the slot.
|
||
wire write_in_tile = (aw_addr_q[37:12] == 26'h1);
|
||
|
||
always_ff @(posedge clk or negedge reset_n) begin
|
||
if (!reset_n) begin
|
||
w_state <= W_IDLE;
|
||
aw_id_q <= '0;
|
||
aw_addr_q <= '0;
|
||
s_axi_bvalid <= 1'b0;
|
||
end else begin
|
||
case (w_state)
|
||
W_IDLE: begin
|
||
s_axi_bvalid <= 1'b0;
|
||
if (s_axi_awvalid && s_axi_awready) begin
|
||
aw_id_q <= s_axi_awid;
|
||
aw_addr_q <= s_axi_awaddr;
|
||
w_state <= W_DATA;
|
||
end
|
||
end
|
||
W_DATA: begin
|
||
if (s_axi_wvalid && s_axi_wready) begin
|
||
s_axi_bvalid <= 1'b1;
|
||
w_state <= W_RESP;
|
||
end
|
||
end
|
||
W_RESP: begin
|
||
if (s_axi_bready) begin
|
||
s_axi_bvalid <= 1'b0;
|
||
w_state <= W_IDLE;
|
||
end
|
||
end
|
||
default: w_state <= W_IDLE;
|
||
endcase
|
||
end
|
||
end
|
||
assign s_axi_awready = (w_state == W_IDLE);
|
||
assign s_axi_wready = (w_state == W_DATA);
|
||
assign s_axi_bid = aw_id_q;
|
||
assign s_axi_bresp = 2'b00; // OKAY
|
||
|
||
// Ch176 — one-cycle write enable when the W beat lands on a write
|
||
// addressed at CORE_CTRL or CORE_PULSE. Use the W_DATA accept edge
|
||
// so the side effect happens exactly once per AXI write.
|
||
wire write_accept = (w_state == W_DATA) && s_axi_wvalid && s_axi_wready;
|
||
assign core_ctrl_we = write_accept && write_is_ctrl;
|
||
assign core_ctrl_d = wdata_lane[2:0];
|
||
assign core_pulse_we = write_accept && write_is_pulse;
|
||
assign hdmi_clr_pulse = core_pulse_we && wdata_lane[3];
|
||
|
||
// Ch222 — input-latch register file. One always_ff covers all three
|
||
// 32-bit latches; the lane-aligned `wdata_lane` (selected by
|
||
// aw_addr_q[3:2] earlier) supplies the data for each. Reset clears
|
||
// to 0; non-decoded writes leave existing values untouched.
|
||
always_ff @(posedge clk or negedge reset_n) begin
|
||
if (!reset_n) begin
|
||
input_p1_q <= 32'd0;
|
||
input_p2_q <= 32'd0;
|
||
input_p1_raw_q <= 32'd0;
|
||
end else if (write_accept) begin
|
||
if (write_is_p1) input_p1_q <= wdata_lane;
|
||
if (write_is_p2) input_p2_q <= wdata_lane;
|
||
if (write_is_p1_raw) input_p1_raw_q <= wdata_lane;
|
||
end
|
||
end
|
||
|
||
// Ch318 — LPDDR test control registers. SAFE reset defaults: arm OFF, canary ON,
|
||
// base = 0x8000_0000 (the HPS-physical reserved region). The HPS arms via 0x018.
|
||
assign lpddr_arm_o = lpddr_arm_q;
|
||
assign lpddr_canary_o = lpddr_canary_q;
|
||
assign lpddr_fb_base_o = lpddr_fb_base_q;
|
||
always_ff @(posedge clk or negedge reset_n) begin
|
||
if (!reset_n) begin
|
||
lpddr_arm_q <= 1'b0;
|
||
lpddr_canary_q <= 1'b1;
|
||
lpddr_fb_base_q <= 32'h8000_0000;
|
||
lpddr_rd_addr_q <= 32'd0;
|
||
lpddr_rd_pulse_q <= 1'b0;
|
||
lpddr_rd_done_sync <= 2'b00;
|
||
lpddr_rd_data_lat <= 32'd0;
|
||
lpddr_video_src_q <= 1'b0; // Ch320 — default BRAM scanout
|
||
lpddr_scanout_lb_q <= 1'b0; // Ch321 — default frame-cache (line-buffer = opt-in)
|
||
lpddr_scan_valid_sync <= 2'b00;
|
||
lpddr_scan_err_sync <= 2'b00;
|
||
lpddr_wr_addr_q <= 32'd0;
|
||
lpddr_wr_addr_present_q <= 32'd0;
|
||
lpddr_wr_data_q <= 32'd0;
|
||
lpddr_wr_pulse_q <= 1'b0;
|
||
tex_fill_start_q <= 1'b0;
|
||
diag_ctrl_q <= 8'd0;
|
||
tex_fill_done_sync <= 2'b00;
|
||
lpddr_wr_busy_sync <= 2'b00;
|
||
lpddr_wr_done_sync <= 3'b000;
|
||
tex_fill_crc_snap_q <= 32'd0;
|
||
tex_fill_beats_snap_q <= 32'd0;
|
||
tex_fill_bytes_snap_q <= 32'd0;
|
||
tex_rd_errs_snap_q <= 32'd0;
|
||
lpddr_ctrl_commit_q <= 1'b0;
|
||
lpddr_idle_sync <= 2'b00;
|
||
lpddr_bytes_snap_q <= 32'd0;
|
||
lpddr_bursts_snap_q <= 32'd0;
|
||
lpddr_bresp_err_snap_q<= 32'd0;
|
||
lpddr_fifo_ovf_snap_q <= 32'd0;
|
||
lpddr_wr_bresp_err_snap_q <= 32'd0;
|
||
end else begin
|
||
// Ch319 — read-probe return path CDC (emif_clk -> clk). rd_data is stable in
|
||
// the probe before rd_done toggles, so latching on the synced edge is safe.
|
||
lpddr_rd_done_sync <= {lpddr_rd_done_sync[0], lpddr_rd_done_i};
|
||
if (lpddr_rd_done_sync[0] != lpddr_rd_done_sync[1])
|
||
lpddr_rd_data_lat <= lpddr_rd_data_i;
|
||
// Ch320 — scanout status sync (emif_clk -> clk).
|
||
lpddr_scan_valid_sync <= {lpddr_scan_valid_sync[0], lpddr_scan_valid_i};
|
||
lpddr_scan_err_sync <= {lpddr_scan_err_sync[0], lpddr_scan_err_i};
|
||
// Ch322 — texture-fill / write-probe status sync (emif_clk -> clk).
|
||
tex_fill_done_sync <= {tex_fill_done_sync[0], tex_fill_done_i};
|
||
lpddr_wr_busy_sync <= {lpddr_wr_busy_sync[0], lpddr_wr_busy_i};
|
||
// Ch352 — 3-FF sync of the write-probe done toggle (feeds the stable write_pending bit).
|
||
lpddr_wr_done_sync <= {lpddr_wr_done_sync[1:0], lpddr_wr_done_i};
|
||
// Ch352 — latch the fill counters into clk domain on the synced fill_done RISING edge. The fill
|
||
// FSM raises fill_done with the final beat, so the counters are stable when the synced edge lands
|
||
// (2 FF later) — coherent multi-bit capture, same idea as the rd_data latch above.
|
||
if (tex_fill_done_sync[0] && !tex_fill_done_sync[1]) begin
|
||
tex_fill_beats_snap_q <= tex_fill_beats_i;
|
||
tex_fill_bytes_snap_q <= tex_fill_bytes_i;
|
||
tex_rd_errs_snap_q <= tex_rd_errs_i;
|
||
tex_fill_crc_snap_q <= tex_fill_crc_i;
|
||
end
|
||
// Ch352 — remaining status CDC. idle: 2-FF sync. FB-writer counters: capture WHILE idle (stable).
|
||
lpddr_idle_sync <= {lpddr_idle_sync[0], lpddr_idle_i};
|
||
if (lpddr_idle_sync[1]) begin
|
||
lpddr_bytes_snap_q <= lpddr_bytes_i;
|
||
lpddr_bursts_snap_q <= lpddr_bursts_i;
|
||
lpddr_bresp_err_snap_q <= lpddr_bresp_err_i;
|
||
lpddr_fifo_ovf_snap_q <= lpddr_fifo_ovf_i;
|
||
end
|
||
// write-probe error count: stable when a write completes -> latch on the synced write-done edge.
|
||
if (lpddr_wr_done_sync[2] != lpddr_wr_done_sync[1])
|
||
lpddr_wr_bresp_err_snap_q <= lpddr_wr_bresp_err_i;
|
||
if (write_accept) begin
|
||
if (write_is_lpddr_ctrl) begin
|
||
lpddr_arm_q <= wdata_lane[0];
|
||
lpddr_canary_q <= wdata_lane[1];
|
||
lpddr_video_src_q <= wdata_lane[2]; // Ch320 — select LPDDR4B scanout
|
||
lpddr_scanout_lb_q <= wdata_lane[3]; // Ch321 — line-buffer vs frame-cache
|
||
end
|
||
if (write_is_lpddr_base) lpddr_fb_base_q <= wdata_lane;
|
||
// Ch352 — flip the commit toggle on any control/base write so the EMIF domain re-latches
|
||
// {arm,canary,fb_base} coherently. (arm/canary/video_src/scanout_lb all live in the ctrl word.)
|
||
if (write_is_lpddr_ctrl || write_is_lpddr_base)
|
||
lpddr_ctrl_commit_q <= ~lpddr_ctrl_commit_q;
|
||
// 0x03C write: latch the read address AND pulse the read trigger.
|
||
if (write_is_lpddr_rdaddr) begin
|
||
lpddr_rd_addr_q <= wdata_lane;
|
||
lpddr_rd_pulse_q <= ~lpddr_rd_pulse_q;
|
||
end
|
||
// Ch322 — write-probe: 0x04C sets the byte address; 0x050 writes a data
|
||
// word (toggles the write trigger) and auto-increments the address by 4 so
|
||
// the HPS can stream a texture without re-writing the address each word.
|
||
if (write_is_lpddr_wraddr) lpddr_wr_addr_q <= wdata_lane;
|
||
if (write_is_lpddr_wrdata) begin
|
||
lpddr_wr_data_q <= wdata_lane;
|
||
lpddr_wr_pulse_q <= ~lpddr_wr_pulse_q;
|
||
lpddr_wr_addr_present_q <= lpddr_wr_addr_q; // addr for THIS word (snapshot)
|
||
lpddr_wr_addr_q <= lpddr_wr_addr_q + 32'd4; // advance for NEXT word
|
||
end
|
||
// arm (bit0=1) TOGGLES the cache fill_start — the cache edge-detects it and
|
||
// (re)fills, so re-staging a new texture + re-arming reloads the cache.
|
||
if (write_is_tex_fill && wdata_lane[0]) tex_fill_start_q <= ~tex_fill_start_q;
|
||
if (write_is_diag_ctrl) diag_ctrl_q <= wdata_lane[7:0];
|
||
end
|
||
end
|
||
end
|
||
|
||
// ----------------------------------------------------------------
|
||
// Ch330 Brick 4 — feeder staging-write + retrigger control.
|
||
// A staging entry is written as two halves (0x0DC LO then 0x0E4 HI);
|
||
// the HI write commits {hi,lo} -> staging[addr], TOGGLES the we strobe,
|
||
// and auto-increments addr so the HPS streams a whole list without
|
||
// re-writing the address. 0x0E8 GO bit0 toggles the retrigger. we/go are
|
||
// toggles (not pulses) because the feeder runs in design_clk; the de25 top
|
||
// syncs+edge-detects them. feeder_ready_i is synced for the 0x0D8 read.
|
||
// ----------------------------------------------------------------
|
||
logic [11:0] feeder_addr_q;
|
||
logic [31:0] feeder_lo_q;
|
||
logic [63:0] feeder_wdata_q;
|
||
logic [11:0] feeder_waddr_q;
|
||
logic feeder_we_tgl_q;
|
||
logic feeder_go_tgl_q;
|
||
logic [1:0] feeder_ready_sync;
|
||
always_ff @(posedge clk or negedge reset_n) begin
|
||
if (!reset_n) begin
|
||
feeder_addr_q <= 12'd0;
|
||
feeder_lo_q <= 32'd0;
|
||
feeder_wdata_q <= 64'd0;
|
||
feeder_waddr_q <= 12'd0;
|
||
feeder_we_tgl_q <= 1'b0;
|
||
feeder_go_tgl_q <= 1'b0;
|
||
feeder_ready_sync <= 2'b00;
|
||
end else begin
|
||
feeder_ready_sync <= {feeder_ready_sync[0], feeder_ready_i};
|
||
if (write_accept) begin
|
||
if (write_is_feeder_addr) feeder_addr_q <= wdata_lane[11:0];
|
||
if (write_is_feeder_lo) feeder_lo_q <= wdata_lane;
|
||
if (write_is_feeder_hi) begin
|
||
feeder_wdata_q <= {wdata_lane, feeder_lo_q}; // {hi,lo}
|
||
feeder_waddr_q <= feeder_addr_q; // addr for THIS word (snapshot)
|
||
feeder_we_tgl_q <= ~feeder_we_tgl_q; // commit strobe (toggle)
|
||
feeder_addr_q <= feeder_addr_q + 12'd1; // advance for NEXT word
|
||
end
|
||
if (write_is_feeder_go && wdata_lane[0]) feeder_go_tgl_q <= ~feeder_go_tgl_q;
|
||
end
|
||
end
|
||
end
|
||
assign feeder_stg_we_tgl_o = feeder_we_tgl_q;
|
||
assign feeder_stg_waddr_o = feeder_waddr_q;
|
||
assign feeder_stg_wdata_o = feeder_wdata_q;
|
||
assign feeder_go_tgl_o = feeder_go_tgl_q;
|
||
|
||
// Ch223 — OSD-sink register file. Same pattern as Ch222.
|
||
// Ch245 — OSD_TRIGGER added with sibling-ABI semantics: bits are
|
||
// SET by single-cycle pulses from the menu FSM and CLEARED by HPS
|
||
// via write-1-to-clear. The bridge clock IS sys_clk (both ride
|
||
// CLOCK2_50), so the FSM-side set pulses are same-cycle inputs —
|
||
// no synchronizer required. OSD_CTRL[3] is a self-clearing
|
||
// request bit (matches nes_hps_bridge.sv:741).
|
||
always_ff @(posedge clk or negedge reset_n) begin
|
||
if (!reset_n) begin
|
||
osd_ctrl_q <= 32'd0;
|
||
osd_cfg0_q <= 32'd0;
|
||
osd_cfg1_q <= 32'd0;
|
||
osd_trigger_q <= 32'd0;
|
||
end else begin
|
||
// OSD_CTRL[3] self-clear (HPS-write below overrides if it
|
||
// wrote a fresh 1 on the same cycle, NBA last-write-wins).
|
||
if (osd_ctrl_q[3]) osd_ctrl_q[3] <= 1'b0;
|
||
|
||
// OSD_TRIGGER: HPS W1C clear AND/OR FSM set pulses. Sets
|
||
// take priority over clears on the same bit (the set-bit
|
||
// assignments are lexically later in this block, so NBA
|
||
// semantics route them to win for any common bit).
|
||
if (write_accept && write_is_osd_trigger) begin
|
||
osd_trigger_q <= osd_trigger_q & ~wdata_lane;
|
||
end
|
||
if (osd_set_trigger_i) begin
|
||
osd_trigger_q[4] <= 1'b1; // action_pending (A)
|
||
osd_trigger_q[12:8] <= osd_trigger_row_i;
|
||
end
|
||
if (osd_back_trigger_i) begin
|
||
osd_trigger_q[5] <= 1'b1; // back_pending (B)
|
||
osd_trigger_q[12:8] <= osd_trigger_row_i;
|
||
end
|
||
if (osd_scroll_down_trigger_i) osd_trigger_q[6] <= 1'b1;
|
||
if (osd_scroll_up_trigger_i) osd_trigger_q[7] <= 1'b1;
|
||
if (osd_open_trigger_i) osd_trigger_q[16] <= 1'b1;
|
||
|
||
// HPS writes to OSD_CTRL/CFG0/CFG1 — full-register overwrite
|
||
// (intentionally last so it wins over the self-clear above).
|
||
if (write_accept) begin
|
||
if (write_is_osd_ctrl) osd_ctrl_q <= wdata_lane;
|
||
if (write_is_osd_cfg0) osd_cfg0_q <= wdata_lane;
|
||
if (write_is_osd_cfg1) osd_cfg1_q <= wdata_lane;
|
||
end
|
||
end
|
||
end
|
||
|
||
// Ch249 — Ch230 osd_ctrl_enable retired. The platform OSD path
|
||
// reads OSD_CTRL[0] out of `osd_ctrl_o[0]` (one of the three new
|
||
// 32-bit register exports), so the single-bit Ch230 broadcast
|
||
// had become redundant when the stub was unwired in Ch245 and
|
||
// fully obsolete after Ch249 removed the stub instantiation.
|
||
|
||
// Ch245 — surface full OSD register values for the platform OSD
|
||
// stack. Sibling cores (NES, etc.) extract field bits at the top:
|
||
// osd_cfg0[5:0] = cols
|
||
// osd_cfg0[12:8] = rows
|
||
// osd_cfg0[23:16] = origin_x_chars (× 16 for 2x scale)
|
||
// osd_cfg0[31:24] = origin_y_chars (× 16 for 2x scale)
|
||
// osd_cfg1[23:16] = cursor_attr
|
||
assign osd_ctrl_o = osd_ctrl_q;
|
||
assign osd_cfg0_o = osd_cfg0_q;
|
||
assign osd_cfg1_o = osd_cfg1_q;
|
||
|
||
// Ch235 — surface Ch222 INPUT_P1/P2 latches as bridge-clock-domain
|
||
// outputs for the downstream sio2_input_stub (which owns the CDC).
|
||
assign input_p1_o = input_p1_q;
|
||
assign input_p2_o = input_p2_q;
|
||
assign input_p1_raw_o = input_p1_raw_q;
|
||
|
||
// Ch227 — tile RAM write port. No reset clear (see header note —
|
||
// retained across warm reset, sim-initialized to 0 via the
|
||
// `initial` block above).
|
||
always_ff @(posedge clk) begin
|
||
if (write_accept && write_in_tile)
|
||
tile_mem[aw_addr_q[11:2]] <= wdata_lane;
|
||
end
|
||
|
||
// Ch229 — broadcast tile writes to the design-domain CDC. Toggle
|
||
// bit flips on every write; index + data latch at the same edge
|
||
// and remain stable until the next write. CDC primitive on the
|
||
// receiver detects toggle edges and samples index/data once they
|
||
// are guaranteed stable (2 dclk synchronizer stages = ≥ 2 dclk
|
||
// periods of holdover, more than enough at any reasonable HPS
|
||
// tile-write rate). Reset clears toggle + latches so a co-reset
|
||
// of bridge and design domains starts both sides at the same
|
||
// value (no spurious post-reset edge).
|
||
always_ff @(posedge clk or negedge reset_n) begin
|
||
if (!reset_n) begin
|
||
tile_wr_toggle <= 1'b0;
|
||
tile_wr_index <= 10'd0;
|
||
tile_wr_data <= 32'd0;
|
||
end else if (write_accept && write_in_tile) begin
|
||
tile_wr_toggle <= ~tile_wr_toggle;
|
||
tile_wr_index <= aw_addr_q[11:2];
|
||
tile_wr_data <= wdata_lane;
|
||
end
|
||
end
|
||
|
||
// ----------------------------------------------------------------
|
||
// Read FSM. Same shape — accept arvalid, drive rdata + rvalid,
|
||
// hold until rready. rdata is the 32-bit reg value replicated
|
||
// into the matching 32-bit lane of the 128-bit response.
|
||
// ----------------------------------------------------------------
|
||
typedef enum logic [0:0] { R_IDLE, R_RESP } r_state_t;
|
||
r_state_t r_state;
|
||
logic [3:0] ar_id_q;
|
||
logic [37:0] ar_addr_q;
|
||
logic [127:0] rdata_q;
|
||
always_ff @(posedge clk or negedge reset_n) begin
|
||
if (!reset_n) begin
|
||
r_state <= R_IDLE;
|
||
ar_id_q <= '0;
|
||
ar_addr_q <= '0;
|
||
rdata_q <= '0;
|
||
s_axi_rvalid <= 1'b0;
|
||
end else begin
|
||
case (r_state)
|
||
R_IDLE: begin
|
||
s_axi_rvalid <= 1'b0;
|
||
if (s_axi_arvalid && s_axi_arready) begin
|
||
ar_id_q <= s_axi_arid;
|
||
ar_addr_q <= s_axi_araddr;
|
||
case (s_axi_araddr[3:2])
|
||
2'b00: rdata_q <= {96'd0, reg_read(s_axi_araddr)};
|
||
2'b01: rdata_q <= {64'd0, reg_read(s_axi_araddr), 32'd0};
|
||
2'b10: rdata_q <= {32'd0, reg_read(s_axi_araddr), 64'd0};
|
||
default: rdata_q <= {reg_read(s_axi_araddr), 96'd0};
|
||
endcase
|
||
s_axi_rvalid <= 1'b1;
|
||
r_state <= R_RESP;
|
||
end
|
||
end
|
||
R_RESP: begin
|
||
if (s_axi_rready) begin
|
||
s_axi_rvalid <= 1'b0;
|
||
r_state <= R_IDLE;
|
||
end
|
||
end
|
||
default: r_state <= R_IDLE;
|
||
endcase
|
||
end
|
||
end
|
||
assign s_axi_arready = (r_state == R_IDLE);
|
||
assign s_axi_rid = ar_id_q;
|
||
assign s_axi_rdata = rdata_q;
|
||
assign s_axi_rresp = 2'b00; // OKAY
|
||
assign s_axi_rlast = 1'b1; // single-beat
|
||
|
||
// ----------------------------------------------------------------
|
||
// Tie off unused AXI4 fields to silence lint warnings.
|
||
// ----------------------------------------------------------------
|
||
// verilator lint_off UNUSED
|
||
wire _unused_ok = &{ 1'b0,
|
||
s_axi_awlen, s_axi_awsize, s_axi_awburst,
|
||
s_axi_awlock, s_axi_awcache, s_axi_awprot,
|
||
s_axi_wdata, s_axi_wstrb, s_axi_wlast,
|
||
s_axi_arlen, s_axi_arsize, s_axi_arburst,
|
||
s_axi_arlock, s_axi_arcache, s_axi_arprot,
|
||
h2f_reset,
|
||
1'b0 };
|
||
// verilator lint_on UNUSED
|
||
|
||
endmodule : ps2_hps_bridge
|