ec82764bef
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
4223 lines
207 KiB
Python
4223 lines
207 KiB
Python
#!/usr/bin/env python3
|
||
# retroDE_ps2 — .mem bake for top_psmct32_raster_demo.
|
||
#
|
||
# Produces two parallel fixture sets:
|
||
#
|
||
# Ch171 (current production — used by the synth / board build):
|
||
# bios.mem — 19-word EE bootlet at 0xBFC0_0000
|
||
# payload.mem — 40 qwords: 16 zero + 24 GIF qwords (4 SPRITEs)
|
||
# Pattern: 320x240 four-quadrant test card (160x120 each,
|
||
# R / G / B / W) painted at PCRTC origin. DISPLAY1
|
||
# configured to (DW=319, DH=239) so PCRTC scans the
|
||
# full 320x240; the board wrapper bumps VRAM_BYTES to
|
||
# 512 KiB so 320*240*4 = 307,200 bytes fits.
|
||
#
|
||
# Ch146 (legacy — used by the Ch155/Ch158 16x8 sim TBs):
|
||
# bios_ch146.mem — 18-word EE bootlet at 0xBFC0_0000
|
||
# payload_ch146.mem — 40 qwords: 16 zero + 24 GIF qwords (4 SPRITEs)
|
||
# Pattern: 16x8 four-quadrant test card (8x4 each) with the
|
||
# pre-Ch171 RGB tints (0x55/0xAA/0xCC etc) that
|
||
# tb_top_psmct32_raster_demo_bram and friends still
|
||
# spot-check against. DISPLAY1 = (DW=15, DH=7).
|
||
#
|
||
# Both bootlet variants drive the same DMAC channel-2 kickoff
|
||
# (MADR=0x100, QWC=24, CHCR=START); only DISPLAY1 + GIF payload
|
||
# differ.
|
||
|
||
import os
|
||
|
||
OUT = os.path.dirname(os.path.abspath(__file__))
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# MIPS opcode helpers.
|
||
# ---------------------------------------------------------------------------
|
||
def enc_lui(rt, imm):
|
||
return (0x0F << 26) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
|
||
|
||
|
||
def enc_ori(rt, rs, imm):
|
||
return (0x0D << 26) | ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
|
||
|
||
|
||
def enc_sw(rt, rs, imm):
|
||
return (0x2B << 26) | ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
|
||
|
||
|
||
def enc_syscall():
|
||
return 0x0000_000C
|
||
|
||
|
||
# Ch251 — additional opcodes for the animated bootlet's main loop.
|
||
def enc_addiu(rt, rs, imm):
|
||
"""opcode 0x09, signed 16-bit imm."""
|
||
return (0x09 << 26) | ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
|
||
|
||
|
||
def enc_bne(rs, rt, offset_words):
|
||
"""opcode 0x05, signed 16-bit word offset (from PC+4, MIPS-standard)."""
|
||
return (0x05 << 26) | ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | (offset_words & 0xFFFF)
|
||
|
||
|
||
def enc_j(target_pc):
|
||
"""opcode 0x02, 26-bit target (target_pc >> 2). HW substitutes the
|
||
top 4 bits of (PC_delay_slot+1) on execute, so this works only when
|
||
the loop fits in the same 256 MiB region as the calling PC — true
|
||
for our BIOS-resident bootlet (lives wholly in 0xBFC0_0xxx)."""
|
||
return (0x02 << 26) | ((target_pc >> 2) & 0x03FFFFFF)
|
||
|
||
|
||
def enc_special(rs, rt, rd, sa, funct):
|
||
return ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | ((rd & 0x1F) << 11) | ((sa & 0x1F) << 6) | (funct & 0x3F)
|
||
|
||
|
||
def enc_xor(rd, rs, rt):
|
||
return enc_special(rs, rt, rd, 0, 0x26)
|
||
|
||
|
||
def enc_andi(rt, rs, imm):
|
||
"""opcode 0x0C, zero-extended 16-bit imm."""
|
||
return (0x0C << 26) | ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
|
||
|
||
|
||
def enc_lw(rt, rs, imm):
|
||
"""opcode 0x23, signed 16-bit byte offset."""
|
||
return (0x23 << 26) | ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
|
||
|
||
|
||
def enc_nop():
|
||
# SLL $0, $0, 0 — the canonical MIPS NOP.
|
||
return enc_special(0, 0, 0, 0, 0)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# GIF / GS field encoders.
|
||
# ---------------------------------------------------------------------------
|
||
R_PRIM = 0x00
|
||
R_RGBAQ = 0x01
|
||
R_XYZ2 = 0x05
|
||
R_FRAME_1 = 0x4C
|
||
|
||
PRIM_SPRITE = 6
|
||
# GS FRAME_1 layout: bits[8:0]=FBP, bits[21:16]=FBW (in 64-pixel units),
|
||
# bits[29:24]=PSM. For PSMCT32 (PSM=0), FBP=0, FBW depends on the
|
||
# framebuffer width we're rendering — see per-fixture overrides below.
|
||
def frame_1_psmct32(fbw):
|
||
return (fbw & 0x3F) << 16
|
||
|
||
# GS DISPFB1 layout: bits[8:0]=FBP, bits[13:9]=FBW (in 64-pixel units),
|
||
# bits[19:15]=PSM, bits[20+]=DBX/DBY. Same FBW units as FRAME_1.
|
||
def dispfb1_psmct32(fbw):
|
||
return (fbw & 0x1F) << 9
|
||
|
||
# PSMCT16 (PSM=2) variants for the Ch308 PSMCT16-tile/framebuffer demo: same FBW
|
||
# (64-pixel) units, PSM field set to 0x02 so the GS flush + PCRTC scanout use the
|
||
# 16-bit RGB5A1 format. FRAME_1.PSM=[29:24], DISPFB1.PSM=[19:15].
|
||
def frame_1_psmct16(fbw):
|
||
return ((fbw & 0x3F) << 16) | (0x02 << 24)
|
||
|
||
def dispfb1_psmct16(fbw):
|
||
return ((fbw & 0x1F) << 9) | (0x02 << 15)
|
||
|
||
|
||
def giftag(nloop, eop, flg, nreg, regs):
|
||
lower = (nreg & 0xF) << 60
|
||
lower |= (flg & 0x3) << 58
|
||
lower |= (eop & 0x1) << 15
|
||
lower |= nloop & 0x7FFF
|
||
return ((regs & ((1 << 64) - 1)) << 64) | (lower & ((1 << 64) - 1))
|
||
|
||
|
||
def aplusd(reg_num, data64):
|
||
return ((reg_num & 0xFF) << 64) | (data64 & ((1 << 64) - 1))
|
||
|
||
|
||
def xyz2_data(x, y):
|
||
v = 0
|
||
v |= ((x & 0xFFF) << 4)
|
||
v |= ((y & 0xFFF) << 20)
|
||
return v
|
||
|
||
|
||
def rgbaq_data(r, g, b, a=0xFF):
|
||
return (a << 24) | (b << 16) | (g << 8) | r
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Bootlet template. The two variants differ only in the DISPLAY1_hi
|
||
# constant: Ch171 wants DW=319 / DH=239 (320x240 scanout window),
|
||
# Ch146 wants DW=15 / DH=7 (16x8 window).
|
||
# ---------------------------------------------------------------------------
|
||
def bootlet_for_display1_hi(value_32, fbw):
|
||
"""Build an EE bootlet that writes value_32 to DISPLAY1_hi and the
|
||
DISPFB1 with the given FBW, then kicks the GIF payload via DMAC
|
||
channel 2 (MADR=0x100, QWC=24)."""
|
||
# Split value_32 into LUI (upper 16) + ORI (lower 16).
|
||
hi16 = (value_32 >> 16) & 0xFFFF
|
||
lo16 = value_32 & 0xFFFF
|
||
dispfb1_val = dispfb1_psmct32(fbw) # fits in 16 bits for FBW <= 31
|
||
assert dispfb1_val <= 0xFFFF
|
||
return [
|
||
enc_lui(1, 0x1200), # r1 = 0x1200_0000 (GS-priv base)
|
||
enc_lui(2, 0x0000), # r2 = 0x0000_0000
|
||
enc_ori(2, 2, dispfb1_val), # r2 = DISPFB1 (PSM=PSMCT32, FBP=0, FBW=fbw)
|
||
enc_sw(2, 1, 0x0070), # *DISPFB1 = r2
|
||
enc_sw(0, 1, 0x0080), # *DISPLAY1_lo = 0
|
||
enc_lui(2, hi16), # r2 = (hi16 << 16)
|
||
enc_ori(2, 2, lo16), # r2 = value_32
|
||
enc_sw(2, 1, 0x0084), # *DISPLAY1_hi = r2
|
||
enc_ori(2, 0, 0x0001), # r2 = PMODE.EN1
|
||
enc_sw(2, 1, 0x0000), # *PMODE = r2
|
||
enc_lui(10, 0x1000), # r10 = 0x1000_0000
|
||
enc_ori(10, 10, 0xA000), # r10 = 0x1000_A000 (DMAC ch2 base)
|
||
enc_ori(11, 0, 0x0100), # r11 = PAYLOAD_MADR (0x100)
|
||
enc_sw(11, 10, 0x0010), # *MADR = r11
|
||
enc_ori(11, 0, 24), # r11 = QWC (24, 4 SPRITEs × 6 qwords)
|
||
enc_sw(11, 10, 0x0020), # *QWC = r11
|
||
enc_ori(11, 0, 0x0001), # r11 = CHCR.start
|
||
enc_sw(11, 10, 0x0000), # *CHCR = r11 (kicks DMA)
|
||
enc_syscall(), # halt
|
||
]
|
||
|
||
|
||
def payload_for_sprites(sprites, fbw):
|
||
"""Build 24 qwords of GIF payload for the given 4 SPRITEs.
|
||
Each sprite is (r, g, b, x0, y0, x1, y1)."""
|
||
frame_1_val = frame_1_psmct32(fbw)
|
||
payload_qwords = []
|
||
for p, (r, g, b, x0, y0, x1, y1) in enumerate(sprites):
|
||
eop = 1 if p == len(sprites) - 1 else 0
|
||
payload_qwords.append(giftag(1, eop, 0, 5, 0x0000_0000_000E_EEEE))
|
||
payload_qwords.append(aplusd(R_PRIM, PRIM_SPRITE))
|
||
payload_qwords.append(aplusd(R_FRAME_1, frame_1_val))
|
||
payload_qwords.append(aplusd(R_RGBAQ, rgbaq_data(r, g, b)))
|
||
payload_qwords.append(aplusd(R_XYZ2, xyz2_data(x0, y0)))
|
||
payload_qwords.append(aplusd(R_XYZ2, xyz2_data(x1, y1)))
|
||
return payload_qwords
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# .mem writers.
|
||
# ---------------------------------------------------------------------------
|
||
BIOS_TOTAL_WORDS = 1024 # matches top_psmct32_raster_demo BIOS_SIZE_BYTES default
|
||
RAM_TOTAL_QWORDS = 256 # matches top_psmct32_raster_demo RAM_SIZE_BYTES default
|
||
|
||
|
||
def write_bios_mem(filename, bootlet, banner):
|
||
with open(os.path.join(OUT, filename), "w") as f:
|
||
f.write(f"// {banner}\n")
|
||
f.write("// Loaded via $readmemh into bios_rom_stub.mem (32-bit/word).\n")
|
||
for w in bootlet:
|
||
f.write(f"{w:08x}\n")
|
||
for _ in range(BIOS_TOTAL_WORDS - len(bootlet)):
|
||
f.write(f"{0:08x}\n")
|
||
|
||
|
||
def write_payload_mem(filename, payload_qwords, banner):
|
||
with open(os.path.join(OUT, filename), "w") as f:
|
||
f.write(f"// {banner}\n")
|
||
f.write("// Loaded via $readmemh into ee_ram_stub.mem (128-bit qword).\n")
|
||
f.write("// qw 0..15: zero; qw 16..N: SPRITE PACKED packets.\n")
|
||
for _ in range(16):
|
||
f.write(f"{0:032x}\n")
|
||
for qw in payload_qwords:
|
||
f.write(f"{qw:032x}\n")
|
||
for _ in range(RAM_TOTAL_QWORDS - 16 - len(payload_qwords)):
|
||
f.write(f"{0:032x}\n")
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch251 — animated 320x240 demo (production, used by synth).
|
||
#
|
||
# Replaces the Ch171 four-quadrant card with a richer scene:
|
||
# - 8 vertical SMPTE-style color bars (full height, 40 px wide each)
|
||
# - 4 thin white border strips (top / bottom / left / right, 4 px)
|
||
# - 4 small orange corner-alignment markers (8x8)
|
||
# - 1 center "heartbeat" SPRITE (16x16) whose RGBAQ is updated by
|
||
# the bootlet's main loop between alternating cyan/red.
|
||
#
|
||
# DISPLAY1_hi layout: bits[11:0]=DW (width-1), bits[22:12]=DH (height-1).
|
||
# DW=319=0x13F, DH=239=0xEF → 0xEF13F.
|
||
# FBW=5 because the framebuffer is 320 pixels wide (320 / 64 = 5 pages).
|
||
#
|
||
# The bootlet does the same initial setup as Ch171 (DISPFB1 / DISPLAY1 /
|
||
# PMODE / DMAC ch2 MADR+QWC) and the first DMAC kick, then enters a
|
||
# forever loop that:
|
||
# - busy-waits a delay counter
|
||
# - XORs the heartbeat RGBAQ qword in EE-RAM kseg0 between two colors
|
||
# - re-arms DMAC (MADR=0x100, QWC) and re-fires CHCR.start
|
||
#
|
||
# **Important semantic shift from Ch171:** the bootlet never SYSCALLs.
|
||
# `core_halt` stays 0 in steady state — that is the EXPECTED state for
|
||
# the animated demo. The new boot-success indicators are FRAME_COUNT
|
||
# advancing + visible heartbeat blink + RASTER_OVERFLOW_COUNT=0.
|
||
# Runbook LED ledger and tb_de25_nano_psmct32_raster_demo_top were
|
||
# updated accordingly.
|
||
# ---------------------------------------------------------------------------
|
||
|
||
CH251_FBW = 5
|
||
CH251_DISPLAY1_HI = 0x000EF13F # DW=319 (320 px), DH=239 (240 px)
|
||
CH251_NUM_SPRITES = 17
|
||
CH251_QWC = CH251_NUM_SPRITES * 6 # 102 qwords (6 per SPRITE)
|
||
assert CH251_QWC <= 0xFFFF, "QWC must fit in an ORI imm field"
|
||
|
||
# Heartbeat byte offset in EE-RAM (kseg0 view).
|
||
# Payload starts at byte 0x100 (= 16 qwords of zero pre-padding).
|
||
# Heartbeat is the LAST SPRITE; each SPRITE = 6 qwords; RGBAQ is the
|
||
# 4th qword within (0-indexed offset +3). So:
|
||
# = 0x100 + (NUM_SPRITES-1)*6*16 + 3*16
|
||
# = 256 + 16*96 + 48 = 1840 = 0x730
|
||
CH251_HB_OFFSET = 0x100 + (CH251_NUM_SPRITES - 1) * 6 * 16 + 3 * 16
|
||
assert CH251_HB_OFFSET == 0x730
|
||
|
||
# Heartbeat alternating colors (packed RGBAQ word, low-half of qword).
|
||
# Bit layout (low 32 bits of RGBAQ data): {A[31:24], B[23:16], G[15:8], R[7:0]}.
|
||
# CYAN = A=FF, B=FF, G=FF, R=00 -> 0xFFFFFF00
|
||
# RED = A=FF, B=00, G=00, R=FF -> 0xFF0000FF
|
||
# XOR = 0x00FFFFFF (flips R/G/B, keeps alpha)
|
||
CH251_HB_COLOR_A = 0xFFFFFF00
|
||
CH251_HB_COLOR_B = 0xFF0000FF
|
||
CH251_HB_XOR = CH251_HB_COLOR_A ^ CH251_HB_COLOR_B
|
||
assert CH251_HB_XOR == 0x00FFFFFF
|
||
|
||
|
||
def build_ch251_sprites():
|
||
"""Generate the 17 SPRITE list for the Ch251 animated demo."""
|
||
sprites = []
|
||
# --- Color bars (full height, 40 px wide each) ---
|
||
bars = [
|
||
(0xFF, 0xFF, 0xFF), # 0 white
|
||
(0xFF, 0xFF, 0x00), # 1 yellow
|
||
(0x00, 0xFF, 0xFF), # 2 cyan
|
||
(0x00, 0xFF, 0x00), # 3 green
|
||
(0xFF, 0x00, 0xFF), # 4 magenta
|
||
(0xFF, 0x00, 0x00), # 5 red
|
||
(0x00, 0x00, 0xFF), # 6 blue
|
||
(0x00, 0x00, 0x00), # 7 black
|
||
]
|
||
for i, (r, g, b) in enumerate(bars):
|
||
sprites.append((r, g, b, i * 40, 0, i * 40 + 39, 239))
|
||
# --- Thin grey border (drawn on top of the bars). Grey instead of
|
||
# white so it stays visible against bar 0 (white) and bar 7 (black). ---
|
||
bw = 4 # border width in pixels
|
||
sprites += [
|
||
(0x80, 0x80, 0x80, 0, 0, 319, bw - 1), # top
|
||
(0x80, 0x80, 0x80, 0, 240 - bw, 319, 239), # bottom
|
||
(0x80, 0x80, 0x80, 0, 0, bw - 1, 239), # left
|
||
(0x80, 0x80, 0x80, 320 - bw, 0, 319, 239), # right
|
||
]
|
||
# --- Orange corner-alignment markers (drawn on top of the border) ---
|
||
cs = 8 # corner-square side
|
||
co = 0xFF
|
||
sprites += [
|
||
(co, 0x80, 0x00, 0, 0, cs - 1, cs - 1), # TL
|
||
(co, 0x80, 0x00, 320 - cs, 0, 319, cs - 1), # TR
|
||
(co, 0x80, 0x00, 0, 240 - cs, cs - 1, 239), # BL
|
||
(co, 0x80, 0x00, 320 - cs, 240 - cs, 319, 239), # BR
|
||
]
|
||
# --- Heartbeat (drawn LAST so its RGBAQ qword is at offset 0x730) ---
|
||
sprites.append((0x00, 0xFF, 0xFF, 152, 112, 167, 127)) # cyan
|
||
assert len(sprites) == CH251_NUM_SPRITES
|
||
return sprites
|
||
|
||
|
||
def build_ch251_animated_bootlet():
|
||
"""Build the Ch251 animated bootlet — initial setup + first DMAC
|
||
kick + forever loop that updates the heartbeat color and re-fires
|
||
DMAC. Word indices below MUST line up with LOOP_START_INDEX and the
|
||
BNE / J target arithmetic. Re-count if you reorder."""
|
||
hi16 = (CH251_DISPLAY1_HI >> 16) & 0xFFFF
|
||
lo16 = CH251_DISPLAY1_HI & 0xFFFF
|
||
dispfb1_val = dispfb1_psmct32(CH251_FBW)
|
||
|
||
color_a_hi = (CH251_HB_COLOR_A >> 16) & 0xFFFF
|
||
color_a_lo = CH251_HB_COLOR_A & 0xFFFF
|
||
xor_hi = (CH251_HB_XOR >> 16) & 0xFFFF
|
||
xor_lo = CH251_HB_XOR & 0xFFFF
|
||
|
||
# Delay counter — number of (addiu + bne + nop) iterations between
|
||
# heartbeat updates.
|
||
#
|
||
# Ch254 cadence characterization (NOT a retune). The bootlet's
|
||
# per-iteration time has TWO additive components:
|
||
#
|
||
# total/toggle = delay_loop_time + fixed_overhead
|
||
#
|
||
# The delay loop is N iterations of ADDIU + BNE + delay-slot NOP,
|
||
# each iter costing ~14-18 cycles through the ee_core_stub FSM
|
||
# (S_IFETCH_REQ → S_IFETCH_WAIT → S_EXECUTE per instruction, ×3
|
||
# instructions, plus a small amount of branch-handling slack).
|
||
#
|
||
# The fixed overhead is the DMAC drain of 102 qwords through the
|
||
# GIF, the GS rasterization of all 17 SPRITEs, the poll loop
|
||
# waiting for DMAC.STR=0, the CHCR re-arm sequence, and the
|
||
# delay-slot NOP after the J. Hardware measurement at Ch251.5
|
||
# (DELAY_HI=0x100 → 6 s/toggle) and Ch253 (DELAY_HI=0x2B → ~2
|
||
# s/toggle) lets us back-solve both:
|
||
#
|
||
# 6 = 0x100_0000 * cyc/iter / 50e6 + overhead
|
||
# 2 = 0x002B_0000 * cyc/iter / 50e6 + overhead
|
||
# => cyc/iter ≈ 14, overhead ≈ 1.2 s
|
||
#
|
||
# The ~1.2 s overhead is THE FLOOR. Even with DELAY_HI=0 the
|
||
# bootlet can't toggle faster than ~0.8 Hz without restructuring
|
||
# the rasterization or the bootlet's serialization (e.g., letting
|
||
# the delay run *during* the drain instead of after it, or
|
||
# shrinking the 17-SPRITE payload). That restructure is
|
||
# deliberately out of scope here — the heartbeat is a LIVENESS
|
||
# CUE, not a precision timer, and the slightly-sub-1-Hz cadence
|
||
# is visible enough to confirm "this thing is animating" without
|
||
# claiming a rate we cannot actually deliver.
|
||
#
|
||
# DELAY_HI = 0x002B is the locked Ch254 value. Empirical
|
||
# cadence: ~2 s per cyan↔red toggle (~0.5 Hz), with natural
|
||
# ±0.5 s jitter from overhead variation. `ps2_status.sh --delta`
|
||
# accepts DMA_DONE Δ ∈ {0,1,2} per 2 s window as healthy; Δ=0
|
||
# is a phase-miss not a failure (rerun once).
|
||
#
|
||
# In sim the EE runs at 100 MHz with a fast-paint raster, but the
|
||
# TB only waits for the FIRST DMAC completion (LED[1] latch) — the
|
||
# loop's delay countdown happens in parallel and doesn't affect
|
||
# TB termination. The DMAC-poll inside the loop (below) makes the
|
||
# re-arm bullet-proof regardless of how delay vs drain race.
|
||
DELAY_HI = 0x002B
|
||
DELAY_LO = 0x0000
|
||
|
||
# Bootlet word indices (must match the instruction list order
|
||
# below — re-count if you reorder).
|
||
LOOP_START_INDEX = 24 # LUI r12, DELAY_HI
|
||
DELAY_INDEX = 26 # ADDIU r12, r12, -1
|
||
BNE_DELAY_INDEX = 27 # BNE r12, $0, DELAY
|
||
POLL_INDEX = 29 # LW r12, 0(r10) — read CHCR
|
||
BNE_POLL_INDEX = 31 # BNE r12, $0, POLL
|
||
|
||
# MIPS BNE offset is signed words from PC+4 (delay slot PC).
|
||
# target_words - (BNE_index + 1)
|
||
BNE_DELAY_OFFSET = DELAY_INDEX - (BNE_DELAY_INDEX + 1) # 26 - 28 = -2
|
||
BNE_POLL_OFFSET = POLL_INDEX - (BNE_POLL_INDEX + 1) # 29 - 32 = -3
|
||
|
||
LOOP_START_PC = 0xBFC0_0000 + LOOP_START_INDEX * 4
|
||
|
||
return [
|
||
# --- Initial setup (matches the existing one-shot bootlet) ---
|
||
enc_lui(1, 0x1200), # 0: r1 = 0x1200_0000 (GS-priv base)
|
||
enc_lui(2, 0x0000), # 1: r2 = 0
|
||
enc_ori(2, 2, dispfb1_val), # 2: r2 = DISPFB1 value (FBP=0, FBW=5)
|
||
enc_sw(2, 1, 0x0070), # 3: *DISPFB1 = r2
|
||
enc_sw(0, 1, 0x0080), # 4: *DISPLAY1_lo = 0
|
||
enc_lui(2, hi16), # 5: r2 = hi
|
||
enc_ori(2, 2, lo16), # 6: r2 = DISPLAY1_hi value
|
||
enc_sw(2, 1, 0x0084), # 7: *DISPLAY1_hi = r2
|
||
enc_ori(2, 0, 0x0001), # 8: r2 = PMODE.EN1
|
||
enc_sw(2, 1, 0x0000), # 9: *PMODE = r2
|
||
|
||
# --- DMAC ch2 setup + first kick ---
|
||
enc_lui(10, 0x1000), # 10: r10 = 0x1000_0000
|
||
enc_ori(10, 10, 0xA000), # 11: r10 = 0x1000_A000 (DMAC ch2)
|
||
enc_ori(11, 0, 0x0100), # 12: r11 = MADR (0x100)
|
||
enc_sw(11, 10, 0x0010), # 13: *MADR = r11
|
||
enc_ori(11, 0, CH251_QWC), # 14: r11 = QWC (102)
|
||
enc_sw(11, 10, 0x0020), # 15: *QWC = r11
|
||
enc_ori(11, 0, 0x0001), # 16: r11 = CHCR.start
|
||
enc_sw(11, 10, 0x0000), # 17: *CHCR = r11 (first kick)
|
||
|
||
# --- Heartbeat-loop state setup ---
|
||
enc_lui(3, 0x8000), # 18: r3 = 0x8000_0000 (kseg0 base)
|
||
enc_ori(3, 3, CH251_HB_OFFSET), # 19: r3 = 0x8000_0730 (heartbeat RGBAQ)
|
||
enc_lui(5, color_a_hi), # 20: r5 = current color (hi)
|
||
enc_ori(5, 5, color_a_lo), # 21: r5 = current color = CYAN
|
||
enc_lui(6, xor_hi), # 22: r6 = XOR mask (hi)
|
||
enc_ori(6, 6, xor_lo), # 23: r6 = XOR mask = 0x00FF_FFFF
|
||
|
||
# --- Main loop: delay + DMAC-drain poll + toggle + re-fire ---
|
||
enc_lui(12, DELAY_HI), # 24: <LOOP_START> r12 = delay count hi
|
||
enc_ori(12, 12, DELAY_LO), # 25: r12 = delay count
|
||
|
||
enc_addiu(12, 12, -1), # 26: <DELAY> r12 -= 1
|
||
enc_bne(12, 0, BNE_DELAY_OFFSET), # 27: BNE r12, $0, DELAY
|
||
enc_nop(), # 28: NOP (delay slot)
|
||
|
||
enc_lw(12, 10, 0x0000), # 29: <POLL> r12 = *CHCR
|
||
enc_andi(12, 12, 0x0001), # 30: r12 &= 0x1 (start bit)
|
||
enc_bne(12, 0, BNE_POLL_OFFSET),# 31: BNE r12, $0, POLL (wait for DMAC done)
|
||
enc_nop(), # 32: NOP (delay slot)
|
||
|
||
enc_xor(5, 5, 6), # 33: r5 ^= r6 (flip color)
|
||
enc_sw(5, 3, 0x0000), # 34: *0x8000_0730 = r5
|
||
|
||
enc_ori(11, 0, 0x0100), # 35: re-arm MADR
|
||
enc_sw(11, 10, 0x0010), # 36:
|
||
enc_ori(11, 0, CH251_QWC), # 37: re-arm QWC
|
||
enc_sw(11, 10, 0x0020), # 38:
|
||
enc_ori(11, 0, 0x0001), # 39: CHCR.start
|
||
enc_sw(11, 10, 0x0000), # 40: fire DMAC
|
||
|
||
enc_j(LOOP_START_PC), # 41: j LOOP_START
|
||
enc_nop(), # 42: NOP (delay slot)
|
||
]
|
||
|
||
|
||
ch251_bootlet = build_ch251_animated_bootlet()
|
||
ch251_sprites = build_ch251_sprites()
|
||
ch251_payload = payload_for_sprites(ch251_sprites, CH251_FBW)
|
||
assert len(ch251_payload) == CH251_QWC
|
||
|
||
write_bios_mem(
|
||
"bios.mem", ch251_bootlet,
|
||
f"Ch251 animated BIOS bootlet ({len(ch251_bootlet)} words active, padded to "
|
||
f"{BIOS_TOTAL_WORDS}); DISPLAY1 = 320x240; LOOPS FOREVER — core_halt=0 is expected"
|
||
)
|
||
write_payload_mem(
|
||
"payload.mem", ch251_payload,
|
||
f"Ch251 GIF payload ({CH251_QWC} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS} qwords); {CH251_NUM_SPRITES} SPRITEs (8 color bars + "
|
||
f"4 border strips + 4 corner markers + 1 heartbeat at 0x{CH251_HB_OFFSET:03x})"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch146 — 16x8 four-quadrant test card (legacy, used by sim TBs that
|
||
# pre-date Ch171 and check the original pre-Ch171 RGB tints).
|
||
# DISPLAY1_hi: DW=15=0xF, DH=7 → 0x700F (matches pre-Ch171 bootlet).
|
||
# ---------------------------------------------------------------------------
|
||
CH146_FBW = 1 # 16-pixel-wide fb fits in 1 page (=64 px)
|
||
ch146_bootlet = bootlet_for_display1_hi(0x0000_700F, CH146_FBW)
|
||
ch146_sprites = [
|
||
(0x55, 0xAA, 0xCC, 0, 0, 7, 3), # Q0
|
||
(0x66, 0xBB, 0xDD, 8, 0, 15, 3), # Q1
|
||
(0x77, 0x33, 0x99, 0, 4, 7, 7), # Q2
|
||
(0x88, 0x44, 0x22, 8, 4, 15, 7), # Q3
|
||
]
|
||
ch146_payload = payload_for_sprites(ch146_sprites, CH146_FBW)
|
||
assert len(ch146_payload) == 24
|
||
|
||
write_bios_mem(
|
||
"bios_ch146.mem", ch146_bootlet,
|
||
f"Ch146 legacy BIOS bootlet ({len(ch146_bootlet)} words active, padded to "
|
||
f"{BIOS_TOTAL_WORDS}); DISPLAY1 = 16x8"
|
||
)
|
||
write_payload_mem(
|
||
"payload_ch146.mem", ch146_payload,
|
||
f"Ch146 legacy GIF payload (24 qwords active, padded to {RAM_TOTAL_QWORDS} qwords); "
|
||
f"4 SPRITEs covering 16x8 with the pre-Ch171 RGB tints"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Brick 1 — TEXTURED-sprite demo fixture.
|
||
#
|
||
# Proves the synthesizable textured-SPRITE path end-to-end through the
|
||
# real top: a small texture is uploaded to VRAM via a BITBLT/IMAGE GIF
|
||
# packet, then a textured SPRITE (PRIM.TME=1, PSMCT32 DECAL) is drawn
|
||
# sampling that texture, plus one FLAT control sprite so the scanout
|
||
# shows both.
|
||
#
|
||
# This fixture is consumed by tb_top_psmct32_textured_demo, which builds
|
||
# top_psmct32_raster_demo with PSMCT32_SWIZZLE=0 so the LINEAR
|
||
# gs_texel_addr fetch and the BITBLT upload share one VRAM layout (the
|
||
# swizzle reconciliation the gs_stub TODO flags is out of scope for v1).
|
||
#
|
||
# Layout (all PSMCT32, linear):
|
||
# Framebuffer : FBP=0, FBW=1 (64 px/row). Active scanout 16x8.
|
||
# Texture : TBP0=8 -> base 8*256 = 2048 bytes. 8x8 texels, TBW=1.
|
||
# FB occupies bytes 0..2047 (8 rows*64px*4 = 2048), so
|
||
# the texture region at >=2048 never overlaps the FB.
|
||
#
|
||
# GIF payload structure (DMAC-streamed from byte 0x100):
|
||
# U1 GIFtag PACKED NREG=4 : BITBLTBUF / TRXPOS / TRXREG / TRXDIR
|
||
# -> arms gif_image_xfer_stub for a host->local upload of the
|
||
# 8x8 texture to DBP=8.
|
||
# U2 GIFtag IMAGE NLOOP=16 : 16 qwords (4 PSMCT32 texels each) =
|
||
# the 8x8 = 64 texels, row-major.
|
||
# U3 GIFtag PACKED NREG=6 : PRIM(SPRITE+TME) / FRAME_1 / TEX0_1 /
|
||
# RGBAQ(flat fallback) / UV(0,0) / XYZ2(0,0)
|
||
# U4 GIFtag PACKED NREG=2 : UV(7,7) / XYZ2(7,7) (closing vertex,
|
||
# EOP=1) -> textured 8x8 sprite at screen (0,0)..(7,7).
|
||
# ---------------------------------------------------------------------------
|
||
|
||
R_ST = 0x02
|
||
R_UV = 0x03
|
||
R_TEX0_1 = 0x06
|
||
R_BITBLTBUF = 0x50
|
||
R_TRXPOS = 0x51
|
||
R_TRXREG = 0x52
|
||
R_TRXDIR = 0x53
|
||
|
||
TEX_DEMO_FBW = 1
|
||
TEX_DEMO_TBP0 = 8 # texture base = 8*256 = 2048 bytes
|
||
TEX_DEMO_TBW = 1 # 64 texels/row
|
||
TEX_DEMO_TEXW = 8
|
||
TEX_DEMO_TEXH = 8
|
||
# DISPLAY1_hi: DW=15 (16 px), DH=7 (8 px) -> 0x700F (same window as Ch146).
|
||
TEX_DEMO_DISPLAY1_HI = 0x0000_700F
|
||
|
||
|
||
def prim_sprite_tme():
|
||
# PRIM[2:0]=6 SPRITE, bit4=TME.
|
||
return 6 | (1 << 4)
|
||
|
||
|
||
def tex0_pack(tbp0, tbw, psm=0, tw=3, th=3, tfx=1):
|
||
# TEX0 texture-side fields (per gs_stub decode):
|
||
# TBP0[13:0], TBW[19:14], PSM[25:20], TW[29:26], TH[33:30], TFX[36:35].
|
||
# Ch333: TFX default = 1 (DECAL, texel replaces color) so the combined path matches the
|
||
# pre-Ch333 behavior for all existing scenes. Color scenes pass tfx=0 (MODULATE: texel*RGBAQ).
|
||
v = 0
|
||
v |= (tbp0 & 0x3FFF)
|
||
v |= (tbw & 0x3F) << 14
|
||
v |= (psm & 0x3F) << 20
|
||
v |= (tw & 0xF) << 26
|
||
v |= (th & 0xF) << 30
|
||
v |= (tfx & 0x3) << 35
|
||
return v
|
||
|
||
|
||
def uv_data(ui, vi):
|
||
# UV reg: U=[13:0], V=[27:14], 10.4 fixed-point (integer texel << 4).
|
||
return ((ui << 4) & 0x3FFF) | (((vi << 4) & 0x3FFF) << 14)
|
||
|
||
|
||
def bitbltbuf_pack(dbp, dbw, dpsm):
|
||
v = 0
|
||
v |= (dbp & 0x3FFF) << 32
|
||
v |= (dbw & 0x3F) << 48
|
||
v |= (dpsm & 0x3F) << 56
|
||
return v
|
||
|
||
|
||
def trxpos_pack(dsax, dsay):
|
||
v = 0
|
||
v |= (dsax & 0x7FF) << 32
|
||
v |= (dsay & 0x7FF) << 48
|
||
return v
|
||
|
||
|
||
def trxreg_pack(rrw, rrh):
|
||
v = 0
|
||
v |= (rrw & 0xFFF)
|
||
v |= (rrh & 0xFFF) << 32
|
||
return v
|
||
|
||
|
||
def trxdir_pack(xdir):
|
||
return xdir & 0x3
|
||
|
||
|
||
def tex_demo_texel(x, y):
|
||
"""A bold 2x2-quadrant ABGR pattern so the sampled texture is
|
||
unmistakable on screen (a flat fill can't produce 4 distinct cells).
|
||
A=0xFF. (x<4,y<4)=RED (x>=4,y<4)=GREEN (x<4,y>=4)=BLUE (x>=4,y>=4)=YELLOW
|
||
ABGR word = A<<24 | B<<16 | G<<8 | R."""
|
||
left = x < 4
|
||
top = y < 4
|
||
if top and left: r, g, b = 0xFF, 0x00, 0x00 # RED
|
||
elif top and not left: r, g, b = 0x00, 0xFF, 0x00 # GREEN
|
||
elif (not top) and left: r, g, b = 0x00, 0x00, 0xFF # BLUE
|
||
else: r, g, b = 0xFF, 0xFF, 0x00 # YELLOW
|
||
return 0xFF000000 | (b << 16) | (g << 8) | r
|
||
|
||
|
||
def build_textured_demo_payload():
|
||
"""GIF payload qwords for the texture upload + textured sprite +
|
||
one flat control sprite."""
|
||
qw = []
|
||
|
||
# --- U1: BITBLTBUF / TRXPOS / TRXREG / TRXDIR (PACKED A+D, NREG=4) ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE)) # 4x A+D descriptors
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(TEX_DEMO_TBP0, TEX_DEMO_TBW, 0)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TEX_DEMO_TEXW, TEX_DEMO_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0))) # 0 = host->local
|
||
|
||
# --- U2: IMAGE qwords (FLG=2). 64 texels / 4 per qword = 16 qwords. ---
|
||
n_image = (TEX_DEMO_TEXW * TEX_DEMO_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0)) # IMAGE, NLOOP=16
|
||
for i in range(n_image):
|
||
base = i * 4 # 4 texels/qword
|
||
word = 0
|
||
for lane in range(4):
|
||
t = base + lane
|
||
tx = t % TEX_DEMO_TEXW
|
||
ty = t // TEX_DEMO_TEXW
|
||
word |= (tex_demo_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U3: PRIM / FRAME_1 / TEX0_1 / RGBAQ / UV0 / XYZ2_0 (PACKED, NREG=6) ---
|
||
frame_1_val = frame_1_psmct32(TEX_DEMO_FBW)
|
||
tex0_val = tex0_pack(TEX_DEMO_TBP0, TEX_DEMO_TBW, 0, 3, 3) # TW=TH=3 -> 8x8
|
||
qw.append(giftag(1, 0, 0, 6, 0x0000_0000_00EE_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_sprite_tme()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00))) # flat fallback (overridden)
|
||
qw.append(aplusd(R_UV, uv_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(0, 0)))
|
||
|
||
# --- U4: UV1 / XYZ2_1 closing the textured sprite (PACKED, NREG=2) ---
|
||
qw.append(giftag(1, 0, 0, 2, 0x0000_0000_0000_00EE)) # 2x A+D descriptors
|
||
qw.append(aplusd(R_UV, uv_data(TEX_DEMO_TEXW - 1, TEX_DEMO_TEXH - 1)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(TEX_DEMO_TEXW - 1, TEX_DEMO_TEXH - 1)))
|
||
|
||
# --- U5: a FLAT control sprite at (8,0)..(15,7) so the scanout shows
|
||
# both textured and flat content side by side. EOP here. ---
|
||
qw.append(giftag(1, 1, 0, 5, 0x0000_0000_000E_EEEE)) # EOP
|
||
qw.append(aplusd(R_PRIM, PRIM_SPRITE)) # SPRITE, no TME
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x20, 0xC0, 0x40))) # distinct flat green
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(8, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(15, 7)))
|
||
|
||
return qw
|
||
|
||
|
||
def build_textured_demo_bootlet_disp(qwc, display1_hi, fbw):
|
||
"""Same one-shot bootlet as build_textured_demo_bootlet but with a
|
||
caller-chosen DISPLAY1_hi (scanout window) and FBW. NO new EE/BIOS
|
||
scaffolding — identical instruction shape, only the two DISPFB1/DISPLAY1
|
||
immediates differ."""
|
||
hi16 = (display1_hi >> 16) & 0xFFFF
|
||
lo16 = display1_hi & 0xFFFF
|
||
dispfb1_val = dispfb1_psmct32(fbw)
|
||
assert dispfb1_val <= 0xFFFF
|
||
assert qwc <= 0xFFFF
|
||
return [
|
||
enc_lui(1, 0x1200),
|
||
enc_lui(2, 0x0000),
|
||
enc_ori(2, 2, dispfb1_val),
|
||
enc_sw(2, 1, 0x0070),
|
||
enc_sw(0, 1, 0x0080),
|
||
enc_lui(2, hi16),
|
||
enc_ori(2, 2, lo16),
|
||
enc_sw(2, 1, 0x0084),
|
||
enc_ori(2, 0, 0x0001),
|
||
enc_sw(2, 1, 0x0000),
|
||
enc_lui(10, 0x1000),
|
||
enc_ori(10, 10, 0xA000),
|
||
enc_ori(11, 0, 0x0100),
|
||
enc_sw(11, 10, 0x0010),
|
||
enc_ori(11, 0, qwc),
|
||
enc_sw(11, 10, 0x0020),
|
||
enc_ori(11, 0, 0x0001),
|
||
enc_sw(11, 10, 0x0000),
|
||
enc_syscall(),
|
||
]
|
||
|
||
|
||
def build_textured_demo_bootlet(qwc):
|
||
"""Bootlet: configure DISPFB1/DISPLAY1/PMODE for a 16x8 PSMCT32
|
||
framebuffer, then kick DMAC ch2 to stream the GIF payload, then
|
||
SYSCALL-halt (one-shot, like the Ch146 fixture)."""
|
||
hi16 = (TEX_DEMO_DISPLAY1_HI >> 16) & 0xFFFF
|
||
lo16 = TEX_DEMO_DISPLAY1_HI & 0xFFFF
|
||
dispfb1_val = dispfb1_psmct32(TEX_DEMO_FBW)
|
||
assert dispfb1_val <= 0xFFFF
|
||
assert qwc <= 0xFFFF
|
||
return [
|
||
enc_lui(1, 0x1200), # r1 = 0x1200_0000 (GS-priv base)
|
||
enc_lui(2, 0x0000),
|
||
enc_ori(2, 2, dispfb1_val), # r2 = DISPFB1 (PSMCT32, FBP=0, FBW=1)
|
||
enc_sw(2, 1, 0x0070), # *DISPFB1 = r2
|
||
enc_sw(0, 1, 0x0080), # *DISPLAY1_lo = 0
|
||
enc_lui(2, hi16),
|
||
enc_ori(2, 2, lo16), # r2 = DISPLAY1_hi (DW=15,DH=7)
|
||
enc_sw(2, 1, 0x0084), # *DISPLAY1_hi = r2
|
||
enc_ori(2, 0, 0x0001), # r2 = PMODE.EN1
|
||
enc_sw(2, 1, 0x0000), # *PMODE = r2
|
||
enc_lui(10, 0x1000),
|
||
enc_ori(10, 10, 0xA000), # r10 = 0x1000_A000 (DMAC ch2 base)
|
||
enc_ori(11, 0, 0x0100), # r11 = MADR (0x100)
|
||
enc_sw(11, 10, 0x0010),
|
||
enc_ori(11, 0, qwc), # r11 = QWC
|
||
enc_sw(11, 10, 0x0020),
|
||
enc_ori(11, 0, 0x0001), # r11 = CHCR.start
|
||
enc_sw(11, 10, 0x0000), # *CHCR = r11 (kick DMA)
|
||
enc_syscall(), # halt
|
||
]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Brick 2a — ALPHA-BLEND (transparency) demo fixture.
|
||
#
|
||
# Proves the FLAT alpha-blended SPRITE path end-to-end: an OPAQUE
|
||
# background sprite is painted first, then a SEMI-TRANSPARENT flat
|
||
# sprite (PRIM.ABE=1, source-over ALPHA config, RGBAQ.A=0x40) overlaps
|
||
# it. The overlap region must show the per-pixel blend of source over
|
||
# dest — NEITHER the pure source nor the pure dest — while the
|
||
# non-overlap region stays the pure background.
|
||
#
|
||
# Layout (PSMCT32, linear, 16x8 framebuffer; same window as the
|
||
# textured demo so the existing scanout plumbing is reused):
|
||
# BG sprite : solid blue (R=0x00, G=0x00, B=0xC0), ABE=0,
|
||
# covers the whole 16x8 area (0,0)..(15,7).
|
||
# OVERLAY sprite: red (R=0xFF, G=0x00, B=0x00), A=0x40, ABE=1,
|
||
# source-over, covers x in [0..7], full height.
|
||
#
|
||
# Source-over blend Cv = ((Cs - Cd) * As) >> 7 + Cd, As=0x40 (=64):
|
||
# R: (255-0)*64>>7 + 0 = 127 (0x7F)
|
||
# G: (0-0)*64>>7 + 0 = 0 (0x00)
|
||
# B: (0-192)*64>>7 + 192 = 96 (0x60)
|
||
# Overlap region (x in [0..7]) -> (0x7F, 0x00, 0x60).
|
||
# Non-overlap (x in [8..15]) -> pure background blue (0x00,0x00,0xC0).
|
||
# ---------------------------------------------------------------------------
|
||
|
||
R_ALPHA_1 = 0x42
|
||
|
||
ALPHA_DEMO_FBW = 1
|
||
ALPHA_DEMO_DISPLAY1_HI = 0x0000_700F # DW=15 (16 px), DH=7 (8 px)
|
||
|
||
# Background (opaque) and overlay (semi-transparent) colors.
|
||
ALPHA_BG_R, ALPHA_BG_G, ALPHA_BG_B = 0x00, 0x00, 0xC0 # blue
|
||
ALPHA_OV_R, ALPHA_OV_G, ALPHA_OV_B = 0xFF, 0x00, 0x00 # red
|
||
ALPHA_OV_A = 0x40 # ~0.5 (0x80 == 1.0)
|
||
|
||
|
||
def prim_sprite_abe():
|
||
# PRIM[2:0]=6 SPRITE, bit6=ABE (alpha-blend enable).
|
||
return 6 | (1 << 6)
|
||
|
||
|
||
def alpha_pack(a, b, c, d, fix=0):
|
||
# ALPHA_1: A[1:0] B[3:2] C[5:4] D[7:6] FIX[39:32].
|
||
v = 0
|
||
v |= (a & 0x3)
|
||
v |= (b & 0x3) << 2
|
||
v |= (c & 0x3) << 4
|
||
v |= (d & 0x3) << 6
|
||
v |= (fix & 0xFF) << 32
|
||
return v
|
||
|
||
|
||
def build_alpha_blend_demo_payload():
|
||
"""GIF payload: opaque BG sprite, then a semi-transparent overlay
|
||
sprite that blends over it."""
|
||
frame_1_val = frame_1_psmct32(ALPHA_DEMO_FBW)
|
||
qw = []
|
||
|
||
# --- U1: opaque BG sprite covering the whole 16x8 area. ABE=0. ---
|
||
qw.append(giftag(1, 0, 0, 5, 0x0000_0000_000E_EEEE))
|
||
qw.append(aplusd(R_PRIM, PRIM_SPRITE)) # SPRITE, ABE=0
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(ALPHA_BG_R, ALPHA_BG_G, ALPHA_BG_B)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(15, 7)))
|
||
|
||
# --- U2: semi-transparent overlay, ABE=1, source-over, x in [0..7]. ---
|
||
# ALPHA source-over = A=0(Cs) B=1(Cd) C=0(As) D=1(Cd).
|
||
# RGBAQ.A carries the source alpha (0x40).
|
||
qw.append(giftag(1, 1, 0, 6, 0x0000_0000_00EE_EEEE)) # EOP
|
||
qw.append(aplusd(R_PRIM, prim_sprite_abe())) # SPRITE + ABE
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(ALPHA_OV_R, ALPHA_OV_G, ALPHA_OV_B, ALPHA_OV_A)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(7, 7)))
|
||
|
||
return qw
|
||
|
||
|
||
alpha_demo_payload = build_alpha_blend_demo_payload()
|
||
alpha_demo_qwc = len(alpha_demo_payload)
|
||
alpha_demo_bootlet = build_textured_demo_bootlet(alpha_demo_qwc)
|
||
|
||
write_bios_mem(
|
||
"bios_alpha.mem", alpha_demo_bootlet,
|
||
f"Brick2a alpha-blend BIOS bootlet ({len(alpha_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x8; QWC={alpha_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_alpha.mem", alpha_demo_payload,
|
||
f"Brick2a alpha-blend GIF payload ({alpha_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); opaque BG sprite + semi-transparent overlay"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch344 — TEXTURED + source-over ALPHA SPRITE demo fixture.
|
||
# Upload an 8x8 checkerboard-ALPHA texture, draw an opaque BG sprite (64x64 blue),
|
||
# then a textured-alpha SPRITE (PRIM SPRITE+TME+ABE) blended over it: opaque-white
|
||
# checks show gray, transparent checks reveal the blue BG. Exercises gs_stub's
|
||
# SPRITE_TEX_ALPHA path end-to-end through the bram-top.
|
||
TEXALPHA_FBW = 1
|
||
TEXALPHA_DISPLAY1_HI = (63 << 12) | 63 # 64x64
|
||
TEXALPHA_TBP = 64 # texture base word = 64*64 = 4096 (right after the 64x64 FB)
|
||
TEXALPHA_TEXW = 8
|
||
TEXALPHA_TEXH = 8
|
||
TEXALPHA_X0, TEXALPHA_Y0 = 16, 16 # sprite screen rect (32x32 -> 4 screen px / texel)
|
||
TEXALPHA_X1, TEXALPHA_Y1 = 48, 48
|
||
TEXALPHA_BG_R, TEXALPHA_BG_G, TEXALPHA_BG_B = 0x00, 0x00, 0xC0 # blue background
|
||
|
||
|
||
def texalpha_texel(u, v):
|
||
# 8x8 checkerboard: white-ish OPAQUE (A=0x80) on one parity, fully TRANSPARENT (A=0x00) on the other.
|
||
if (((u >> 1) ^ (v >> 1)) & 1) == 0:
|
||
return (0x80 << 24) | (0xC0 << 16) | (0xC0 << 8) | 0xC0 # A=80, RGB=C0 (light gray)
|
||
return 0x00000000 # A=0 -> blend keeps the dest (BG)
|
||
|
||
|
||
def prim_sprite_tme_abe():
|
||
return 6 | (1 << 4) | (1 << 6) # SPRITE + TME + ABE
|
||
|
||
|
||
def build_texalpha_demo_payload():
|
||
frame_1_val = frame_1_psmct32(TEXALPHA_FBW)
|
||
qw = []
|
||
# U1: upload the 8x8 alpha texture at TBP.
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(TEXALPHA_TBP, 1, 0)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TEXALPHA_TEXW, TEXALPHA_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TEXALPHA_TEXW * TEXALPHA_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0)) # IMAGE
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane
|
||
word |= (texalpha_texel(t % TEXALPHA_TEXW, t // TEXALPHA_TEXW) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
# U2: opaque BG sprite (fills 64x64), ABE=0.
|
||
qw.append(giftag(1, 0, 0, 5, 0x0000_0000_000E_EEEE))
|
||
qw.append(aplusd(R_PRIM, PRIM_SPRITE))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(TEXALPHA_BG_R, TEXALPHA_BG_G, TEXALPHA_BG_B)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(63, 63)))
|
||
# U3: textured-alpha SPRITE over the BG (9 A+D regs). White tint -> identity MODULATE; As from texel.
|
||
qw.append(giftag(1, 1, 0, 9, 0x0000_000E_EEEE_EEEE)) # EOP, 9x A+D
|
||
qw.append(aplusd(R_PRIM, prim_sprite_tme_abe()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over
|
||
qw.append(aplusd(R_TEX0_1, tex0_pack(TEXALPHA_TBP, 1, psm=0, tw=3, th=3, tfx=0))) # 8x8 PSMCT32 MODULATE
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x80, 0x80, 0x80, 0x80))) # white tint
|
||
qw.append(aplusd(R_UV, uv_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(TEXALPHA_X0, TEXALPHA_Y0)))
|
||
qw.append(aplusd(R_UV, uv_data(TEXALPHA_TEXW, TEXALPHA_TEXH)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(TEXALPHA_X1, TEXALPHA_Y1)))
|
||
return qw
|
||
|
||
|
||
texalpha_demo_payload = build_texalpha_demo_payload()
|
||
texalpha_demo_qwc = len(texalpha_demo_payload)
|
||
write_bios_mem(
|
||
"bios_texalpha.mem", build_textured_demo_bootlet_disp(texalpha_demo_qwc, TEXALPHA_DISPLAY1_HI, TEXALPHA_FBW),
|
||
f"Ch344 textured-alpha sprite BIOS bootlet; DISPLAY1=64x64; QWC={texalpha_demo_qwc}")
|
||
write_payload_mem(
|
||
"payload_texalpha.mem", texalpha_demo_payload,
|
||
f"Ch344 textured-alpha GIF payload ({texalpha_demo_qwc} qwords): 8x8 alpha texture upload + "
|
||
f"opaque BG sprite + textured-alpha overlay sprite")
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Brick 2b — Z-BUFFER (depth test) demo fixture.
|
||
#
|
||
# Proves the FLAT Z-tested PSMCT32 SPRITE path end-to-end: two
|
||
# overlapping sprites at DIFFERENT depths are drawn NEAR-first then
|
||
# FAR-second; with GEQUAL the FAR sprite must NOT overwrite the NEAR
|
||
# sprite in the overlap region (near wins regardless of draw order).
|
||
#
|
||
# Layout (PSMCT32 fb at FBP=0, FBW=1 → 64 px/row; 16x8 scanout window).
|
||
# Z buffer at ZBP=1 → byte base 1*2048 = 0x800, OUTSIDE the 16x8 fb
|
||
# region (fb occupies 0..0x7FF: 8 rows * 256 B/row; Z at 0x1000). VRAM powers
|
||
# on to zero, so the Z buffer starts cleared — the first (NEAR) sprite
|
||
# with Z >= 0 always passes and stamps its Z.
|
||
#
|
||
# NEAR sprite : RED (R=0xFF,G=0,B=0), Z=0x200, x[0..11] y[0..7].
|
||
# FAR sprite : BLUE (R=0,G=0,B=0xFF), Z=0x100, x[4..15] y[0..7],
|
||
# drawn AFTER near.
|
||
# With GEQUAL:
|
||
# overlap x[4..11] : FAR Z(0x100) < stored NEAR Z(0x200) -> FAILS ->
|
||
# stays RED (near wins; proves depth gated, NOT
|
||
# last-write-wins).
|
||
# near-only x[0..3] : RED.
|
||
# far-only x[12..15]: stored Z=0 there, FAR Z(0x100)>=0 -> passes -> BLUE.
|
||
#
|
||
# TEST_1: ZTE=1 (bit16), ZTST=GEQUAL=2 (bits[18:17]) -> 0x0005_0000.
|
||
# ZBUF_1: ZBP=1 (bits[8:0]), PSM=PSMZ32=0, ZMSK=0 -> 0x0000_0001.
|
||
# ---------------------------------------------------------------------------
|
||
|
||
R_TEST_1 = 0x47
|
||
R_ZBUF_1 = 0x4E
|
||
R_SCISSOR_1 = 0x40
|
||
R_CLAMP_1 = 0x48
|
||
R_TEX1_1 = 0x14
|
||
|
||
|
||
def tex1_pack(mmag):
|
||
"""TEX1_1: MMAG=bit5 (0=NEAREST, 1=LINEAR magnification). Other fields 0."""
|
||
return (mmag & 0x1) << 5
|
||
|
||
|
||
def clamp_pack(wms, wmt):
|
||
"""CLAMP_1: WMS[1:0] | WMT[3:2] (0=REPEAT, 1=CLAMP); MIN/MAX (region) left 0."""
|
||
return (wms & 0x3) | ((wmt & 0x3) << 2)
|
||
|
||
|
||
def scissor_pack(x0, x1, y0, y1):
|
||
"""SCISSOR_1: SCAX0[10:0] | SCAX1[26:16] | SCAY0[42:32] | SCAY1[58:48] (inclusive)."""
|
||
return (x0 & 0x7FF) | ((x1 & 0x7FF) << 16) | ((y0 & 0x7FF) << 32) | ((y1 & 0x7FF) << 48)
|
||
|
||
ZBUF_DEMO_FBW = 1
|
||
ZBUF_DEMO_DISPLAY1_HI = 0x0000_700F # DW=15 (16 px), DH=7 (8 px)
|
||
ZBUF_NEAR_R, ZBUF_NEAR_G, ZBUF_NEAR_B = 0xFF, 0x00, 0x00 # red
|
||
ZBUF_FAR_R, ZBUF_FAR_G, ZBUF_FAR_B = 0x00, 0x00, 0xFF # blue
|
||
ZBUF_NEAR_Z = 0x0000_0200
|
||
ZBUF_FAR_Z = 0x0000_0100
|
||
# ZBP must be EVEN: the (task-specified) ZMSK lives at bit 0, which
|
||
# overlaps ZBP[0]. ZBP=2 -> Z buffer byte base 2*2048=0x1000 (outside
|
||
# the 16x8 fb region 0..0x7FF) AND keeps ZMSK=0 (Z updates enabled).
|
||
ZBUF_ZBP = 2
|
||
|
||
|
||
def test1_geq():
|
||
# ZTE=bit16, ZTST=GEQUAL(2)=bits[18:17].
|
||
return (1 << 16) | (2 << 17)
|
||
|
||
|
||
def zbuf1_pack(zbp, zmsk=0, psm=0):
|
||
# ZBUF_1: ZBP[8:0], PSM[27:24], ZMSK bit0.
|
||
return (zbp & 0x1FF) | ((psm & 0xF) << 24) | (zmsk & 0x1)
|
||
|
||
|
||
def xyz2_dataz(x, y, z):
|
||
v = 0
|
||
v |= ((x & 0xFFF) << 4)
|
||
v |= ((y & 0xFFF) << 20)
|
||
v |= ((z & 0xFFFFFFFF) << 32)
|
||
return v
|
||
|
||
|
||
def build_zbuffer_demo_payload():
|
||
"""GIF payload: NEAR red sprite (Z=0x200), then FAR blue sprite
|
||
(Z=0x100) that overlaps it. GEQUAL Z-test active for both."""
|
||
frame_1_val = frame_1_psmct32(ZBUF_DEMO_FBW)
|
||
qw = []
|
||
|
||
# --- U1: NEAR red sprite, Z=0x200, x[0..11] y[0..7]. ZTE=1 GEQUAL. ---
|
||
qw.append(giftag(1, 0, 0, 7, 0x0000_0000_0EEE_EEEE)) # 7 A+D descriptors (7 nibbles of 0xE)
|
||
qw.append(aplusd(R_PRIM, PRIM_SPRITE))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(ZBUF_ZBP)))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(ZBUF_NEAR_R, ZBUF_NEAR_G, ZBUF_NEAR_B)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(0, 0, ZBUF_NEAR_Z)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(11, 7, ZBUF_NEAR_Z)))
|
||
|
||
# --- U2: FAR blue sprite, Z=0x100, x[4..15] y[0..7]. EOP. ---
|
||
qw.append(giftag(1, 1, 0, 4, 0x0000_0000_000E_EEEE))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(ZBUF_FAR_R, ZBUF_FAR_G, ZBUF_FAR_B)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(4, 0, ZBUF_FAR_Z)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(15, 7, ZBUF_FAR_Z)))
|
||
# (PRIM/FRAME_1/TEST_1/ZBUF_1 persist across the GIF context from U1.)
|
||
|
||
return qw
|
||
|
||
|
||
zbuf_demo_payload = build_zbuffer_demo_payload()
|
||
zbuf_demo_qwc = len(zbuf_demo_payload)
|
||
zbuf_demo_bootlet = build_textured_demo_bootlet(zbuf_demo_qwc)
|
||
|
||
write_bios_mem(
|
||
"bios_zbuffer.mem", zbuf_demo_bootlet,
|
||
f"Brick2b Z-buffer BIOS bootlet ({len(zbuf_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x8; QWC={zbuf_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_zbuffer.mem", zbuf_demo_payload,
|
||
f"Brick2b Z-buffer GIF payload ({zbuf_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); NEAR red (Z=0x200) + FAR blue (Z=0x100), GEQUAL depth test"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Brick 3 — NON-AXIS-ALIGNED GOURAUD TRIANGLE demo fixture (interpolated
|
||
# color + interpolated depth), the first triangle on the BRAM board path.
|
||
#
|
||
# Two non-axis-aligned triangles are drawn into a 16x8 PSMCT32 framebuffer
|
||
# with GEQUAL depth test active and per-pixel INTERPOLATED Z:
|
||
#
|
||
# TRI A : a large Gouraud triangle, v0=(1,1) RED, v1=(14,1) GREEN,
|
||
# v2=(7,7) BLUE. Z is FLAT-HIGH everywhere (Z=0x300 at all 3
|
||
# vertices) so it is the NEAR surface and stamps Z=0x300.
|
||
# TRI B : a smaller triangle, v0=(2,5) v1=(13,5) v2=(7,2), painted a
|
||
# solid-ish dim WHITE (all 3 vertices grey 0x80) at FLAT-LOW
|
||
# Z=0x100. Drawn SECOND. With GEQUAL its Z(0x100) is BEHIND
|
||
# TRI A's stamped Z(0x300) in the overlap, so it FAILS there
|
||
# (TRI A wins — proves the interpolated-Z compare gates the
|
||
# write), but PASSES where TRI A did not cover (stored Z=0).
|
||
#
|
||
# This demonstrates: (1) a real non-axis-aligned triangle renders;
|
||
# (2) Gouraud color interpolation (TRI A's RGB gradient); (3) per-pixel
|
||
# interpolated depth feeding the Z-test (TRI B occluded by the nearer
|
||
# TRI A in the overlap). The focused unit TB (tb_gs_tri_interp) pins the
|
||
# exact affine color + Z values; this top-level fixture proves the
|
||
# end-to-end board path.
|
||
#
|
||
# NOTE the two triangles here use FLAT (equal-at-all-3-vertices) Z for a
|
||
# tractable top-level assertion; the per-pixel Z INTERPOLATOR is exercised
|
||
# with non-equal vertex Z in the focused unit TB. The color interpolator
|
||
# is exercised non-trivially here (TRI A has 3 distinct vertex colors).
|
||
# ---------------------------------------------------------------------------
|
||
|
||
TRI_DEMO_FBW = 1
|
||
TRI_DEMO_DISPLAY1_HI = 0x0000_700F # DW=15 (16 px), DH=7 (8 px)
|
||
TRI_DEMO_ZBP = 2 # Z base 2*2048 = 0x1000 (outside 16x8 fb)
|
||
|
||
PRIM_TRIANGLE = 3 # PRIM[2:0]=3 discrete TRIANGLE
|
||
|
||
|
||
def build_triangle_demo_payload():
|
||
"""GIF payload: TRI A (Gouraud, near flat-Z), then TRI B (grey,
|
||
far flat-Z) with GEQUAL depth test + interpolated Z active."""
|
||
frame_1_val = frame_1_psmct32(TRI_DEMO_FBW)
|
||
qw = []
|
||
|
||
# --- U1: TRI A — PRIM/FRAME/TEST/ZBUF + 3 Gouraud vertices.
|
||
# RGBAQ precedes each XYZ2 so the per-vertex color latches in
|
||
# the rolling window {prev, curr, closing}. EOP=0.
|
||
qw.append(giftag(1, 0, 0, 10, 0x0000_00EE_EEEE_EEEE)) # 10 A+D descriptors
|
||
qw.append(aplusd(R_PRIM, PRIM_TRIANGLE))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(TRI_DEMO_ZBP)))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0xFF, 0x00, 0x00))) # v0 RED
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(1, 1, 0x300)))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0xFF, 0x00))) # v1 GREEN
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(14, 1, 0x300)))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0xFF))) # v2 BLUE (closes)
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(7, 7, 0x300)))
|
||
|
||
# --- U2: TRI B — grey triangle at FAR flat Z. EOP=1.
|
||
# PRIM/FRAME/TEST/ZBUF persist from U1's GIF context.
|
||
qw.append(giftag(1, 1, 0, 6, 0x0000_0000_00EE_EEEE)) # 6 A+D descriptors
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x80, 0x80, 0x80))) # v0 grey
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(2, 5, 0x100)))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x80, 0x80, 0x80))) # v1 grey
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(13, 5, 0x100)))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x80, 0x80, 0x80))) # v2 grey (closes)
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(7, 2, 0x100)))
|
||
|
||
return qw
|
||
|
||
|
||
tri_demo_payload = build_triangle_demo_payload()
|
||
tri_demo_qwc = len(tri_demo_payload)
|
||
tri_demo_bootlet = build_textured_demo_bootlet(tri_demo_qwc)
|
||
|
||
write_bios_mem(
|
||
"bios_triangle.mem", tri_demo_bootlet,
|
||
f"Brick3 triangle-demo BIOS bootlet ({len(tri_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x8; QWC={tri_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_triangle.mem", tri_demo_payload,
|
||
f"Brick3 triangle-demo GIF payload ({tri_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); Gouraud TRI A (near) + grey TRI B (far), GEQUAL interp-Z"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Textured-TRIANGLE demo fixture.
|
||
#
|
||
# Proves the textured-triangle rung end-to-end: upload a small PSMCT32
|
||
# texture via BITBLT, then draw ONE non-axis-aligned TRIANGLE (PRIM type
|
||
# 3, TME=1) with per-vertex UV. The rasterizer interpolates U/V affinely
|
||
# (the shared-divider gradient engine, steps 10..13) and DECAL-samples
|
||
# the uploaded texture. ZTE=0, ABE=0 (read2's only consumer is the texel
|
||
# fetch). Reuses the GS_RMW_DEMO board profile (read2 live, small VRAM,
|
||
# swizzle=0) and adds NO new EE/BIOS scaffolding — only a GIF payload via
|
||
# the shared build_textured_demo_bootlet (one-shot DMAC kick + halt).
|
||
#
|
||
# UV == screen XY at each vertex, so the affine (u,v) == (x,y) at every
|
||
# interior pixel and the sampled texel is exactly tex_demo_texel(x,y) —
|
||
# a TB can predict each pixel. Triangle covers part of the 16x8 window
|
||
# with a slanted edge (non-axis-aligned).
|
||
# ---------------------------------------------------------------------------
|
||
def build_textured_triangle_demo_payload():
|
||
frame_1_val = frame_1_psmct32(TEX_DEMO_FBW)
|
||
tex0_val = tex0_pack(TEX_DEMO_TBP0, TEX_DEMO_TBW, 0, 3, 3) # 8x8 texture
|
||
qw = []
|
||
|
||
# --- U1: BITBLTBUF / TRXPOS / TRXREG / TRXDIR (upload the texture). ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(TEX_DEMO_TBP0, TEX_DEMO_TBW, 0)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TEX_DEMO_TEXW, TEX_DEMO_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
|
||
# --- U2: IMAGE qwords (64 texels / 4 per qword = 16 qwords). ---
|
||
n_image = (TEX_DEMO_TEXW * TEX_DEMO_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
base = i * 4
|
||
word = 0
|
||
for lane in range(4):
|
||
t = base + lane
|
||
tx = t % TEX_DEMO_TEXW
|
||
ty = t // TEX_DEMO_TEXW
|
||
word |= (tex_demo_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U3: PRIM(TRI+TME) / FRAME / TEX0 + 3 vertices (UV+XYZ2 each). EOP. ---
|
||
# RGBAQ precedes each XYZ2 so the per-vertex rolling window latches;
|
||
# the colour is overridden by the DECAL texel but still drives the
|
||
# {prev,curr,closing} window the affine setup reads. UV per vertex
|
||
# == screen XY so the interpolated (u,v) == (x,y) interior.
|
||
# 13 A+D descriptors: PRIM,FRAME,TEX0 + 3*(RGBAQ,UV,XYZ2)=9 -> 12,
|
||
# plus nothing else. NREG fields are 0xE per A+D.
|
||
# Non-axis-aligned, with ALL interior UV (==screen XY) inside the
|
||
# 8x8 uploaded texture [0..7] so every sampled texel is well-defined.
|
||
tri = [(1, 1), (7, 1), (2, 6)]
|
||
qw.append(giftag(1, 1, 0, 12, 0x0000_EEEE_EEEE_EEEE)) # 12 A+D, EOP
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for (vx, vy) in tri:
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00))) # overridden by texel
|
||
qw.append(aplusd(R_UV, uv_data(vx, vy)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(vx, vy)))
|
||
|
||
return qw
|
||
|
||
|
||
def prim_tri_tme():
|
||
# PRIM[2:0]=3 TRIANGLE, bit4=TME.
|
||
return 3 | (1 << 4)
|
||
|
||
|
||
tritex_demo_payload = build_textured_triangle_demo_payload()
|
||
tritex_demo_qwc = len(tritex_demo_payload)
|
||
tritex_demo_bootlet = build_textured_demo_bootlet(tritex_demo_qwc)
|
||
|
||
write_bios_mem(
|
||
"bios_tritex.mem", tritex_demo_bootlet,
|
||
f"Textured-triangle demo BIOS bootlet ({len(tritex_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x8; QWC={tritex_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tritex.mem", tritex_demo_payload,
|
||
f"Textured-triangle demo GIF payload ({tritex_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); BITBLT 8x8 texture upload + 1 textured non-axis-aligned TRI"
|
||
)
|
||
|
||
|
||
tex_demo_payload = build_textured_demo_payload()
|
||
tex_demo_qwc = len(tex_demo_payload)
|
||
tex_demo_bootlet = build_textured_demo_bootlet(tex_demo_qwc)
|
||
|
||
write_bios_mem(
|
||
"bios_textured.mem", tex_demo_bootlet,
|
||
f"Brick1 textured-demo BIOS bootlet ({len(tex_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x8; QWC={tex_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_textured.mem", tex_demo_payload,
|
||
f"Brick1 textured-demo GIF payload ({tex_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); BITBLT 8x8 texture upload + textured SPRITE + 1 flat sprite"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch296 — PSMT8 INDEXED-TEXTURE + CLUT demo fixture.
|
||
#
|
||
# Proves the PALETTIZED texture path end-to-end through the BRAM board
|
||
# top: a small PSMT8 texture (an array of 8-bit INDICES) and a CLUT
|
||
# (palette of PSMCT32 colors) are BITBLT-uploaded, a TEX0 with CLUT
|
||
# fields (PSM=PSMT8, CLD=1 -> load CLUT, CPSM=PSMCT32, CSM=CSM2) commits
|
||
# to fire the VRAM->CLUT load, then ONE PSMT8-textured SPRITE is drawn.
|
||
# Mistakes are obvious: each index maps to a boldly distinct color and
|
||
# the index pattern is a recognizable shape (a framed 'X').
|
||
#
|
||
# NO new EE/BIOS scaffolding — GIF payload only (BITBLT for the texture
|
||
# indices AND for the CLUT, then the TEX0 + textured primitive). The
|
||
# bootlet is the SAME one-shot build_textured_demo_bootlet used by every
|
||
# other board fixture.
|
||
#
|
||
# VRAM layout (linear, no swizzle):
|
||
# - PSMT8 texture indices : DBP = CLUT8_TBP0 (= 8 -> byte 0x800)
|
||
# - CLUT (PSMCT32 entries) : DBP = CLUT8_CBP (= 12 -> byte 0xC00)
|
||
# The two regions are disjoint (texture spans 0x800.. ; CLUT 0xC00..).
|
||
|
||
CLUT8_FBW = 1
|
||
CLUT8_TBP0 = 8 # texture base = 8*256 = 0x800
|
||
CLUT8_TBW = 1 # 64 texels/row stride
|
||
CLUT8_TEXW = 8
|
||
CLUT8_TEXH = 8
|
||
CLUT8_CBP = 12 # CLUT base = 12*256 = 0xC00 (CBP is 256-B units)
|
||
CLUT8_DISPLAY1_HI = 0x0000_700F # DW=15 (16 px), DH=7 (8 px) — 16x8 window
|
||
|
||
|
||
def tex0_clut_pack(tbp0, tbw, cbp, cpsm=0, csm=1, csa=0, cld=1):
|
||
"""TEX0_1 with the texture side set to PSMT8 (=0x13) AND the CLUT
|
||
side filled so clut_loader_stub fires a VRAM->CLUT load on commit.
|
||
TBP0[13:0] TBW[19:14] PSM[25:20] TW[29:26] TH[33:30]
|
||
CBP[50:37] CPSM[54:51] CSM[55] CSA[60:56] CLD[63:61]."""
|
||
v = tex0_pack(tbp0, tbw, psm=0x13, tw=3, th=3) # PSMT8, 8x8
|
||
v |= (cbp & 0x3FFF) << 37
|
||
v |= (cpsm & 0xF) << 51
|
||
v |= (csm & 0x1) << 55
|
||
v |= (csa & 0x1F) << 56
|
||
v |= (cld & 0x7) << 61
|
||
return v
|
||
|
||
|
||
def clut8_index(x, y):
|
||
"""A framed 'X' so the sampled indexed texture is unmistakable:
|
||
border = index 1, the two diagonals = index 3, corner (0,0) = 0,
|
||
interior = index 2."""
|
||
if x == 0 and y == 0: return 0
|
||
if x == y or x == (CLUT8_TEXW - 1 - y): return 3
|
||
if x == 0 or y == 0 or x == CLUT8_TEXW-1 or y == CLUT8_TEXH-1:
|
||
return 1
|
||
return 2
|
||
|
||
|
||
def clut8_palette(i):
|
||
"""Boldly distinct PSMCT32 ABGR per index (A=0xFF):
|
||
0=black 1=RED 2=GREEN 3=BLUE. ABGR word = A<<24|B<<16|G<<8|R."""
|
||
if i == 0: r, g, b = 0x00, 0x00, 0x00
|
||
elif i == 1: r, g, b = 0xFF, 0x00, 0x00
|
||
elif i == 2: r, g, b = 0x00, 0xFF, 0x00
|
||
elif i == 3: r, g, b = 0x00, 0x00, 0xFF
|
||
else: r, g, b = 0x7F, 0x7F, 0x7F
|
||
return 0xFF000000 | (b << 16) | (g << 8) | r
|
||
|
||
|
||
def build_clut8_demo_payload():
|
||
"""GIF payload: BITBLT the CLUT, BITBLT the PSMT8 index texture, then
|
||
a TEX0(PSMT8+CLUT load) + one PSMT8-textured SPRITE + a flat control
|
||
sprite."""
|
||
qw = []
|
||
|
||
# --- U1: BITBLT the CLUT (8 PSMCT32 entries) to VRAM[CBP*256]. ---
|
||
n_clut = 8 # entries 0..7 (texture only uses 0..3)
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(CLUT8_CBP, 1, 0))) # DPSM=PSMCT32
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(n_clut, 1))) # n_clut x 1
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_clut_qw = n_clut // 4 # 4 entries/qword
|
||
qw.append(giftag(n_clut_qw, 0, 2, 0, 0)) # IMAGE
|
||
for i in range(n_clut_qw):
|
||
word = 0
|
||
for lane in range(4):
|
||
word |= (clut8_palette(i * 4 + lane) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: BITBLT the PSMT8 index texture to VRAM[TBP0*256]. ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(CLUT8_TBP0, CLUT8_TBW, 0x13))) # DPSM=PSMT8
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(CLUT8_TEXW, CLUT8_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
# PSMT8 IMAGE: 16 indices (1 byte each) per 128-bit qword.
|
||
n_idx = CLUT8_TEXW * CLUT8_TEXH
|
||
n_idx_qw = n_idx // 16
|
||
qw.append(giftag(n_idx_qw, 0, 2, 0, 0))
|
||
for q in range(n_idx_qw):
|
||
word = 0
|
||
for lane in range(16):
|
||
t = q * 16 + lane
|
||
tx = t % CLUT8_TEXW
|
||
ty = t // CLUT8_TEXW
|
||
word |= (clut8_index(tx, ty) & 0xFF) << (8 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U3: PRIM(SPRITE+TME) / FRAME / TEX0(PSMT8+CLUT load) / RGBAQ /
|
||
# UV0 / XYZ2_0. The TEX0 commit fires the VRAM->CLUT load. ---
|
||
frame_1_val = frame_1_psmct32(CLUT8_FBW)
|
||
tex0_val = tex0_clut_pack(CLUT8_TBP0, CLUT8_TBW, CLUT8_CBP,
|
||
cpsm=0, csm=1, csa=0, cld=1)
|
||
qw.append(giftag(1, 0, 0, 6, 0x0000_0000_00EE_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_sprite_tme()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(0, 0)))
|
||
|
||
# --- U4: UV1 / XYZ2_1 closing the textured sprite. ---
|
||
qw.append(giftag(1, 0, 0, 2, 0x0000_0000_0000_00EE))
|
||
qw.append(aplusd(R_UV, uv_data(CLUT8_TEXW - 1, CLUT8_TEXH - 1)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(CLUT8_TEXW - 1, CLUT8_TEXH - 1)))
|
||
|
||
# --- U5: a FLAT control sprite at (8,0)..(15,7). EOP. ---
|
||
qw.append(giftag(1, 1, 0, 5, 0x0000_0000_000E_EEEE))
|
||
qw.append(aplusd(R_PRIM, PRIM_SPRITE))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x20, 0xC0, 0x40))) # flat green
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(8, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(15, 7)))
|
||
|
||
return qw
|
||
|
||
|
||
clut8_demo_payload = build_clut8_demo_payload()
|
||
clut8_demo_qwc = len(clut8_demo_payload)
|
||
clut8_demo_bootlet = build_textured_demo_bootlet(clut8_demo_qwc)
|
||
|
||
write_bios_mem(
|
||
"bios_clut.mem", clut8_demo_bootlet,
|
||
f"Ch296 PSMT8+CLUT demo BIOS bootlet ({len(clut8_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x8; QWC={clut8_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_clut.mem", clut8_demo_payload,
|
||
f"Ch296 PSMT8+CLUT demo GIF payload ({clut8_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); BITBLT CLUT + PSMT8 index texture + TEX0(CLD) + 1 PSMT8 SPRITE"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch297 — PSMT4 INDEXED-TEXTURE + CLUT demo fixture.
|
||
#
|
||
# Proves the 4-bit palettized texture path (the MOST common PS2 texture
|
||
# format) end-to-end through the BRAM board top, built directly on the
|
||
# Ch296 PSMT8+CLUT rung — the ONLY new datapath bit is the nibble extract
|
||
# in gs_texture_unit. A 16-entry CLUT (PSMCT32, boldly distinct colors,
|
||
# all 16 used) and a PSMT4 index texture (2 indices/byte, LINEAR) are
|
||
# BITBLT-uploaded, a TEX0 with CLUT fields (PSM=PSMT4=0x14, CLD=1 ->
|
||
# VRAM->CLUT load, CPSM=PSMCT32, CSM=CSM2) commits to fire the load, then
|
||
# ONE PSMT4-textured SPRITE is drawn + a FLAT control sprite.
|
||
#
|
||
# Index pattern: idx(x,y) = (x + 2*y) mod 16 -> a diagonal RAMP that walks
|
||
# through ALL 16 palette entries, so the 16-color palette is visibly
|
||
# exercised (mistakes are obvious — a stuck nibble collapses the ramp).
|
||
#
|
||
# NO new EE/BIOS scaffolding — GIF payload only; reuses the SAME one-shot
|
||
# build_textured_demo_bootlet as every other board fixture.
|
||
#
|
||
# VRAM layout (linear, no swizzle):
|
||
# - PSMT4 texture indices : DBP = CLUT4_TBP0 (= 8 -> byte 0x800)
|
||
# - CLUT (PSMCT32 entries) : DBP = CLUT4_CBP (= 12 -> byte 0xC00)
|
||
|
||
CLUT4_FBW = 1
|
||
CLUT4_TBP0 = 8 # texture base = 8*256 = 0x800
|
||
CLUT4_TBW = 1 # 64 texels/row stride (texels)
|
||
CLUT4_TEXW = 8
|
||
CLUT4_TEXH = 8
|
||
CLUT4_CBP = 12 # CLUT base = 12*256 = 0xC00 (CBP is 256-B units)
|
||
|
||
|
||
def tex0_clut4_pack(tbp0, tbw, cbp, cpsm=0, csm=1, csa=0, cld=1):
|
||
"""TEX0_1 with the texture side set to PSMT4 (=0x14) AND the CLUT side
|
||
filled so clut_loader_stub fires a VRAM->CLUT load on commit."""
|
||
v = tex0_pack(tbp0, tbw, psm=0x14, tw=3, th=3) # PSMT4, 8x8
|
||
v |= (cbp & 0x3FFF) << 37
|
||
v |= (cpsm & 0xF) << 51
|
||
v |= (csm & 0x1) << 55
|
||
v |= (csa & 0x1F) << 56
|
||
v |= (cld & 0x7) << 61
|
||
return v
|
||
|
||
|
||
def clut4_index(x, y):
|
||
"""A diagonal ramp through ALL 16 indices: idx = (x + 2*y) mod 16."""
|
||
return (x + 2 * y) % 16
|
||
|
||
|
||
def clut4_palette(i):
|
||
"""16 boldly distinct PSMCT32 ABGR entries (A=0xFF). ABGR word =
|
||
A<<24 | B<<16 | G<<8 | R. Spread hue/brightness across all 16 so a
|
||
collapsed nibble is unmistakable."""
|
||
table = [
|
||
(0x00, 0x00, 0x00), # 0 black
|
||
(0xFF, 0x00, 0x00), # 1 red
|
||
(0x00, 0xFF, 0x00), # 2 green
|
||
(0x00, 0x00, 0xFF), # 3 blue
|
||
(0xFF, 0xFF, 0x00), # 4 yellow
|
||
(0xFF, 0x00, 0xFF), # 5 magenta
|
||
(0x00, 0xFF, 0xFF), # 6 cyan
|
||
(0xFF, 0xFF, 0xFF), # 7 white
|
||
(0x80, 0x00, 0x00), # 8 dk red
|
||
(0x00, 0x80, 0x00), # 9 dk green
|
||
(0x00, 0x00, 0x80), # 10 dk blue
|
||
(0x80, 0x80, 0x00), # 11 olive
|
||
(0x80, 0x00, 0x80), # 12 purple
|
||
(0x00, 0x80, 0x80), # 13 teal
|
||
(0xC0, 0x60, 0x20), # 14 orange-brown
|
||
(0x40, 0x40, 0x40), # 15 dk gray
|
||
]
|
||
r, g, b = table[i & 0xF]
|
||
return 0xFF000000 | (b << 16) | (g << 8) | r
|
||
|
||
|
||
def build_clut4_demo_payload():
|
||
"""GIF payload (NO new EE/BIOS scaffolding):
|
||
U1 BITBLT the 16-entry CLUT (PSMCT32, no read2) to VRAM[CBP*256]
|
||
U2 TEX0(CLD=1) LOAD-ONLY -> fires the VRAM->CLUT load (read2)
|
||
U3 BITBLT the PSMT4 index texture (read2 nibble-RMW) to VRAM[TBP0*256]
|
||
U4 PRIM(SPRITE+TME) + TEX0(CLD=2, CBP-unchanged -> NO reload) + sprite
|
||
U6 a FLAT control sprite
|
||
|
||
KEY ORDERING: the PSMT4 texture upload is a read2 nibble-RMW (is_t4_emit),
|
||
and so is NOT time-disjoint from the read2 VRAM->CLUT load. PSMT8 uploads
|
||
use the pure-comb writer (no read2), so Ch296 could load the CLUT on the
|
||
same TEX0 that draws. For PSMT4 we therefore fire the CLUT load on its OWN
|
||
TEX0 (U2) BEFORE the texture upload — the CLUT_STALL FIFO-pop hold keeps
|
||
the load fully ahead of the upload, and the sprite's TEX0 (U4) uses CLD=2
|
||
(CBP unchanged) so it does not re-load and collide with the raster scan."""
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(CLUT4_FBW)
|
||
|
||
# --- U1: BITBLT the CLUT (16 PSMCT32 entries) to VRAM[CBP*256]. ---
|
||
n_clut = 16
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(CLUT4_CBP, 1, 0))) # DPSM=PSMCT32
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(n_clut, 1)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_clut_qw = n_clut // 4 # 4 entries/qword
|
||
qw.append(giftag(n_clut_qw, 0, 2, 0, 0)) # IMAGE
|
||
for i in range(n_clut_qw):
|
||
word = 0
|
||
for lane in range(4):
|
||
word |= (clut4_palette(i * 4 + lane) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: TEX0(CLD=1) LOAD-ONLY. A bare TEX0_1 commit fires the
|
||
# VRAM->CLUT load (gs_tex0_wr) with NO PSMT4 upload in flight,
|
||
# so the read2 CLUT load runs disjoint from the read2 RMW
|
||
# upload that follows. CLD=4 = PARTIAL 16-entry load into the
|
||
# CSA=0 window (exactly the 16 entries this 4-bit texture
|
||
# indexes) — a 16-clock load that finishes before the U3 PSMT4
|
||
# BITBLT drains into the read2 nibble-RMW path. ---
|
||
tex0_load = tex0_clut4_pack(CLUT4_TBP0, CLUT4_TBW, CLUT4_CBP,
|
||
cpsm=0, csm=1, csa=0, cld=4)
|
||
qw.append(giftag(1, 0, 0, 1, 0x0000_0000_0000_000E))
|
||
qw.append(aplusd(R_TEX0_1, tex0_load))
|
||
|
||
# --- U3: BITBLT the PSMT4 index texture to VRAM[TBP0*256]. ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(CLUT4_TBP0, CLUT4_TBW, 0x14))) # DPSM=PSMT4
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(CLUT4_TEXW, CLUT4_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
# PSMT4 IMAGE: 32 pixels (4 bits each) per 128-bit qword. Pixel p sits
|
||
# at nibble p (low nibble of byte p/2 for even p, high for odd p), in
|
||
# raster order across the rect.
|
||
n_px = CLUT4_TEXW * CLUT4_TEXH
|
||
n_px_qw = n_px // 32
|
||
qw.append(giftag(n_px_qw, 0, 2, 0, 0))
|
||
for q in range(n_px_qw):
|
||
word = 0
|
||
for lane in range(32):
|
||
t = q * 32 + lane
|
||
tx = t % CLUT4_TEXW
|
||
ty = t // CLUT4_TEXW
|
||
word |= (clut4_index(tx, ty) & 0xF) << (4 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U4: PRIM(SPRITE+TME) / FRAME / TEX0(CLD=2, no reload) / RGBAQ /
|
||
# UV0 / XYZ2_0. CLD=2 only re-loads if CBP changed; CBP is the
|
||
# same as U2, so NO reload fires here -> no read2 collision with
|
||
# the textured sprite's raster scan. ---
|
||
tex0_draw = tex0_clut4_pack(CLUT4_TBP0, CLUT4_TBW, CLUT4_CBP,
|
||
cpsm=0, csm=1, csa=0, cld=2)
|
||
qw.append(giftag(1, 0, 0, 6, 0x0000_0000_00EE_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_sprite_tme()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEX0_1, tex0_draw))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(0, 0)))
|
||
|
||
# --- U4: UV1 / XYZ2_1 closing the textured sprite. ---
|
||
qw.append(giftag(1, 0, 0, 2, 0x0000_0000_0000_00EE))
|
||
qw.append(aplusd(R_UV, uv_data(CLUT4_TEXW - 1, CLUT4_TEXH - 1)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(CLUT4_TEXW - 1, CLUT4_TEXH - 1)))
|
||
|
||
# --- U5: a FLAT control sprite at (8,0)..(15,7). EOP. ---
|
||
qw.append(giftag(1, 1, 0, 5, 0x0000_0000_000E_EEEE))
|
||
qw.append(aplusd(R_PRIM, PRIM_SPRITE))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x20, 0xC0, 0x40))) # flat green
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(8, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(15, 7)))
|
||
|
||
return qw
|
||
|
||
|
||
clut4_demo_payload = build_clut4_demo_payload()
|
||
clut4_demo_qwc = len(clut4_demo_payload)
|
||
clut4_demo_bootlet = build_textured_demo_bootlet(clut4_demo_qwc)
|
||
|
||
write_bios_mem(
|
||
"bios_clut4.mem", clut4_demo_bootlet,
|
||
f"Ch297 PSMT4+CLUT demo BIOS bootlet ({len(clut4_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x8; QWC={clut4_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_clut4.mem", clut4_demo_payload,
|
||
f"Ch297 PSMT4+CLUT demo GIF payload ({clut4_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); BITBLT 16-CLUT + PSMT4 index texture + TEX0(CLD) + 1 PSMT4 SPRITE"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch298 — SWIZZLED PSMT4 INDEXED-TEXTURE + CLUT demo fixture.
|
||
#
|
||
# The texture INDICES are stored in VRAM in the REAL PS2 PSMT4 BLOCK layout
|
||
# (page/block/column swizzle) and the SAMPLER reads them back swizzled, so
|
||
# the two VRAM views are CONSISTENT and the rendered pattern is the intended
|
||
# diagonal ramp. Built for a board top with PSMT4_SWIZZLE=1 (both the
|
||
# gif_image_xfer UPLOAD and the gs_texture_unit SAMPLE take the swizzle path).
|
||
#
|
||
# WHY THIS IS "VISIBLY WRONG IF INTERPRETED LINEARLY": the texture is 64x32 —
|
||
# WIDER than a 32-px PSMT4 block and TALLER than a 16-px block row — so it
|
||
# spans MULTIPLE blocks within page 0. The block/column permutation reorders
|
||
# the bytes; the SAME VRAM bytes read with a LINEAR addresser (PSMT4_SWIZZLE=0)
|
||
# land in the wrong block and produce a scrambled image. The companion board
|
||
# TB (tb_top_psmct32_raster_demo_bram_swz4) renders this with PSMT4_SWIZZLE=1
|
||
# and checks the diagonal ramp comes back correct.
|
||
#
|
||
# The GIF payload itself is raster-order IMAGE data (identical SHAPE to the
|
||
# clut4 fixture); the swizzle is applied entirely in HARDWARE on the upload
|
||
# write — so this adds NO new EE/BIOS scaffolding (reuses the SAME one-shot
|
||
# build_textured_demo_bootlet). CLUT load sequenced on its OWN TEX0 BEFORE the
|
||
# texture upload, exactly like the linear clut4 fixture (read2 nibble-RMW is
|
||
# not time-disjoint from the read2 VRAM->CLUT load).
|
||
#
|
||
# VRAM layout (swizzled PSMT4):
|
||
# - PSMT4 texture indices : DBP = SWZ4_TBP0 (=16 -> byte 0x1000)
|
||
# - CLUT (PSMCT32 entries) : DBP = SWZ4_CBP (=12 -> byte 0xC00)
|
||
# (texture base bumped past the CLUT so the larger 64x32 swizzled texture,
|
||
# which spreads across several 256-byte blocks of page 0, never overlaps it)
|
||
|
||
SWZ4_FBW = 1 # 16x32 visible framebuffer (PSMCT32 output, 64 px/row page)
|
||
# FB (16x32 PSMCT32 at FBP=0) occupies bytes 0..0x2000. Place the CLUT and the
|
||
# swizzled texture ABOVE the FB so neither overlaps it:
|
||
# CLUT (16 PSMCT32 entries = 64 B) at CBP=32 -> 0x2000..0x203F
|
||
# 64x32 swizzled PSMT4 texture (spans page-0 blocks) at TBP0=36 -> 0x2400..0x27FF
|
||
SWZ4_TBP0 = 36 # texture base = 36*256 = 0x2400
|
||
SWZ4_TBW = 2 # 128 texels/row stride; EVEN (PSMT4 swizzle needs FBW even)
|
||
SWZ4_TEXW = 64 # crosses two 32-px block columns
|
||
SWZ4_TEXH = 32 # crosses two 16-px block rows
|
||
SWZ4_CBP = 32 # CLUT base = 32*256 = 0x2000
|
||
SWZ4_DISPLAY1_HI = 0x0001_F00F # DW=15 (16 px wide window), DH=31 (32 px tall)
|
||
|
||
|
||
def tex0_swz4_pack(tbp0, tbw, cbp, cpsm=0, csm=1, csa=0, cld=1):
|
||
"""TEX0_1 for the swizzled PSMT4 texture (TW=6 -> 64, TH=5 -> 32)."""
|
||
v = tex0_pack(tbp0, tbw, psm=0x14, tw=6, th=5) # PSMT4, 64x32
|
||
v |= (cbp & 0x3FFF) << 37
|
||
v |= (cpsm & 0xF) << 51
|
||
v |= (csm & 0x1) << 55
|
||
v |= (csa & 0x1F) << 56
|
||
v |= (cld & 0x7) << 61
|
||
return v
|
||
|
||
|
||
def build_swz4_demo_payload():
|
||
"""Swizzled PSMT4 demo payload. Same structure as the linear clut4
|
||
fixture: CLUT BITBLT -> TEX0(CLD) load -> PSMT4 texture BITBLT (HW
|
||
swizzles the destination addresses) -> textured sprite -> flat control.
|
||
Reuses clut4_index / clut4_palette so the expected pattern matches."""
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(SWZ4_FBW)
|
||
|
||
# --- U1: BITBLT the 16-entry CLUT (PSMCT32, no swizzle) to VRAM[CBP*256]. ---
|
||
n_clut = 16
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(SWZ4_CBP, 1, 0))) # DPSM=PSMCT32
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(n_clut, 1)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_clut_qw = n_clut // 4
|
||
qw.append(giftag(n_clut_qw, 0, 2, 0, 0))
|
||
for i in range(n_clut_qw):
|
||
word = 0
|
||
for lane in range(4):
|
||
word |= (clut4_palette(i * 4 + lane) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: TEX0(CLD=4) LOAD-ONLY -> fires the VRAM->CLUT load disjoint
|
||
# from the read2 PSMT4 upload that follows. ---
|
||
tex0_load = tex0_swz4_pack(SWZ4_TBP0, SWZ4_TBW, SWZ4_CBP,
|
||
cpsm=0, csm=1, csa=0, cld=4)
|
||
qw.append(giftag(1, 0, 0, 1, 0x0000_0000_0000_000E))
|
||
qw.append(aplusd(R_TEX0_1, tex0_load))
|
||
|
||
# --- U3: BITBLT the 64x32 PSMT4 index texture to VRAM[TBP0*256]. With
|
||
# PSMT4_SWIZZLE=1 the HW writes each nibble at its SWIZZLED block/
|
||
# column address; the raster-order IMAGE data below is unchanged. ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(SWZ4_TBP0, SWZ4_TBW, 0x14))) # DPSM=PSMT4
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(SWZ4_TEXW, SWZ4_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_px = SWZ4_TEXW * SWZ4_TEXH
|
||
n_px_qw = n_px // 32 # 32 4-bit pixels per qword
|
||
qw.append(giftag(n_px_qw, 0, 2, 0, 0))
|
||
for q in range(n_px_qw):
|
||
word = 0
|
||
for lane in range(32):
|
||
t = q * 32 + lane
|
||
tx = t % SWZ4_TEXW
|
||
ty = t // SWZ4_TEXW
|
||
word |= (clut4_index(tx, ty) & 0xF) << (4 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U4: PRIM(SPRITE+TME) + TEX0(CLD=2, no reload) + a 16x32 textured
|
||
# sprite sampling u in [0..15], v in [0..31]. The sampled u range
|
||
# stays within block column 0, but v in [0..31] CROSSES the 16-px
|
||
# block-row boundary — the across-block proof the architect wants
|
||
# visible on screen. ---
|
||
tex0_draw = tex0_swz4_pack(SWZ4_TBP0, SWZ4_TBW, SWZ4_CBP,
|
||
cpsm=0, csm=1, csa=0, cld=2)
|
||
qw.append(giftag(1, 0, 0, 6, 0x0000_0000_00EE_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_sprite_tme()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEX0_1, tex0_draw))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(0, 0)))
|
||
|
||
qw.append(giftag(1, 1, 0, 2, 0x0000_0000_0000_00EE)) # EOP
|
||
qw.append(aplusd(R_UV, uv_data(15, 31)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(15, 31)))
|
||
|
||
return qw
|
||
|
||
|
||
swz4_demo_payload = build_swz4_demo_payload()
|
||
swz4_demo_qwc = len(swz4_demo_payload)
|
||
swz4_demo_bootlet = build_textured_demo_bootlet_disp(swz4_demo_qwc, SWZ4_DISPLAY1_HI, SWZ4_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_swz4.mem", swz4_demo_bootlet,
|
||
f"Ch298 SWIZZLED PSMT4+CLUT demo BIOS bootlet ({len(swz4_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x32; QWC={swz4_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_swz4.mem", swz4_demo_payload,
|
||
f"Ch298 SWIZZLED PSMT4+CLUT demo GIF payload ({swz4_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); BITBLT 16-CLUT + 64x32 SWIZZLED PSMT4 texture + TEX0(CLD) + 16x32 sprite"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch299 — SWIZZLED PSMT8 INDEXED-TEXTURE + CLUT demo fixture.
|
||
#
|
||
# The SIBLING of the Ch298 swizzled-PSMT4 fixture, MINUS the nibble (PSMT8 is
|
||
# 1 byte/texel). The texture INDICES are stored in VRAM in the REAL PS2 PSMT8
|
||
# BLOCK layout (page/block/column swizzle) and the SAMPLER reads them back
|
||
# swizzled, so the two VRAM views are CONSISTENT and the rendered pattern is the
|
||
# intended diagonal ramp. Built for a board top with PSMT8_SWIZZLE=1 (both the
|
||
# gif_image_xfer UPLOAD and the gs_texture_unit SAMPLE take the swizzle path).
|
||
#
|
||
# WHY THIS IS "VISIBLY WRONG IF INTERPRETED LINEARLY": the texture is 64x48 —
|
||
# WIDER than a 16-px PSMT8 block column AND TALLER than a 16-px block row — so
|
||
# it spans MULTIPLE blocks within page 0 (PSMT8 page = 128x64 px, an 8x4 grid of
|
||
# 16x16 blocks). The block/column permutation reorders the bytes; the SAME VRAM
|
||
# bytes read with a LINEAR addresser (PSMT8_SWIZZLE=0) land in the wrong block
|
||
# and produce a scrambled image. The companion board TB
|
||
# (tb_top_psmct32_raster_demo_bram_swz8) renders this with PSMT8_SWIZZLE=1 and
|
||
# checks the diagonal ramp comes back correct across block-row boundaries.
|
||
#
|
||
# SIMPLER SEQUENCING THAN swz4: the PSMT8 upload uses the PURE-COMB writer (one
|
||
# byte per texel, no read2 nibble-RMW), so it is TIME-DISJOINT from the read2
|
||
# VRAM->CLUT load. We therefore use a SINGLE TEX0(CLD=1) that both loads the
|
||
# CLUT and draws — exactly like the Ch296 LINEAR clut8 fixture — rather than the
|
||
# separate load-only TEX0 the swz4 read2 conflict forced. NO new EE/BIOS
|
||
# scaffolding (GIF payload only; reuses the one-shot textured-demo bootlet).
|
||
#
|
||
# The geometry stays within ONE page-COLUMN (64 < 128 wide) and ONE page (48 <
|
||
# 64 tall) so page_index = page_y*bw_pg + page_x never aliases (with TBW=2 ->
|
||
# bw_pg=1, a >1-page-wide AND >1-page-tall texture would alias page(1,0) onto
|
||
# page(0,1)); the across-BLOCK proof (block rows at y=16 and y=32) is what makes
|
||
# linear-vs-swizzled diverge on screen.
|
||
#
|
||
# VRAM layout (swizzled PSMT8):
|
||
# FB (16x48 PSMCT32 at FBP=0) occupies bytes 0..0x3000. Place the CLUT and the
|
||
# swizzled texture ABOVE the FB so neither overlaps it:
|
||
# CLUT (16 PSMCT32 entries = 64 B) at CBP=48 -> 0x3000..0x303F
|
||
# 64x48 swizzled PSMT8 texture (spans page-0 blocks) at TBP0=52 -> 0x3400..
|
||
|
||
SWZ8_FBW = 1 # 16x48 visible framebuffer (PSMCT32 output)
|
||
SWZ8_TBP0 = 52 # texture base = 52*256 = 0x3400
|
||
SWZ8_TBW = 2 # 128 texels/row stride; EVEN (PSMT8 swizzle needs FBW even)
|
||
SWZ8_TEXW = 64 # crosses 16-px block columns (x=16,32,48)
|
||
SWZ8_TEXH = 48 # crosses 16-px block rows (y=16,32)
|
||
SWZ8_CBP = 48 # CLUT base = 48*256 = 0x3000
|
||
SWZ8_DISPLAY1_HI = 0x0002_F00F # DW=15 (16 px wide window), DH=47 (48 px tall)
|
||
|
||
|
||
def tex0_swz8_pack(tbp0, tbw, cbp, cpsm=0, csm=1, csa=0, cld=1):
|
||
"""TEX0_1 for the swizzled PSMT8 texture (TW=6 -> 64, TH=6 -> 64 capacity;
|
||
we sample within 48). PSM=PSMT8=0x13."""
|
||
v = tex0_pack(tbp0, tbw, psm=0x13, tw=6, th=6) # PSMT8, 64x64 capacity
|
||
v |= (cbp & 0x3FFF) << 37
|
||
v |= (cpsm & 0xF) << 51
|
||
v |= (csm & 0x1) << 55
|
||
v |= (csa & 0x1F) << 56
|
||
v |= (cld & 0x7) << 61
|
||
return v
|
||
|
||
|
||
def swz8_index(x, y):
|
||
"""Diagonal index ramp idx(x,y) = (x + 2*y) mod 16 — the SAME shape as the
|
||
swz4 fixture so correct-vs-scrambled is obvious and reuses the 16-entry
|
||
palette below. (PSMT8 can index 0..255, but a 16-entry ramp keeps the
|
||
correct-vs-linear contrast crisp and reuses clut4_palette.)"""
|
||
return (x + 2 * y) % 16
|
||
|
||
|
||
def build_swz8_demo_payload():
|
||
"""Swizzled PSMT8 demo payload. CLUT BITBLT -> PSMT8 texture BITBLT (HW
|
||
swizzles the destination addresses, pure-comb writer) -> single
|
||
TEX0(CLD=1) that loads the CLUT AND draws -> textured sprite -> flat
|
||
control. Reuses clut4_palette so the expected pattern matches the swz4 TB
|
||
palette helpers."""
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(SWZ8_FBW)
|
||
|
||
# --- U1: BITBLT the 16-entry CLUT (PSMCT32, no swizzle) to VRAM[CBP*256]. ---
|
||
n_clut = 16
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(SWZ8_CBP, 1, 0))) # DPSM=PSMCT32
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(n_clut, 1)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_clut_qw = n_clut // 4
|
||
qw.append(giftag(n_clut_qw, 0, 2, 0, 0))
|
||
for i in range(n_clut_qw):
|
||
word = 0
|
||
for lane in range(4):
|
||
word |= (clut4_palette(i * 4 + lane) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: BITBLT the 64x48 PSMT8 index texture to VRAM[TBP0*256]. With
|
||
# PSMT8_SWIZZLE=1 the HW writes each byte at its SWIZZLED block/
|
||
# column address; the raster-order IMAGE data below is unchanged.
|
||
# Pure-comb writer -> time-disjoint from the VRAM->CLUT load. ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(SWZ8_TBP0, SWZ8_TBW, 0x13))) # DPSM=PSMT8
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(SWZ8_TEXW, SWZ8_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_px = SWZ8_TEXW * SWZ8_TEXH
|
||
n_px_qw = n_px // 16 # 16 bytes (indices) per qword
|
||
qw.append(giftag(n_px_qw, 0, 2, 0, 0))
|
||
for q in range(n_px_qw):
|
||
word = 0
|
||
for lane in range(16):
|
||
t = q * 16 + lane
|
||
tx = t % SWZ8_TEXW
|
||
ty = t // SWZ8_TEXW
|
||
word |= (swz8_index(tx, ty) & 0xFF) << (8 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U3: PRIM(SPRITE+TME) + TEX0(CLD=1, loads CLUT AND draws) + a 16x48
|
||
# textured sprite sampling u in [0..15], v in [0..47]. The sampled
|
||
# u range stays within block column 0, but v in [0..47] CROSSES the
|
||
# 16-px block-row boundaries at y=16 AND y=32 — the across-block
|
||
# proof the architect wants visible on screen. ---
|
||
tex0_draw = tex0_swz8_pack(SWZ8_TBP0, SWZ8_TBW, SWZ8_CBP,
|
||
cpsm=0, csm=1, csa=0, cld=1)
|
||
qw.append(giftag(1, 0, 0, 6, 0x0000_0000_00EE_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_sprite_tme()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEX0_1, tex0_draw))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(0, 0)))
|
||
|
||
qw.append(giftag(1, 1, 0, 2, 0x0000_0000_0000_00EE)) # EOP
|
||
qw.append(aplusd(R_UV, uv_data(15, 47)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(15, 47)))
|
||
|
||
return qw
|
||
|
||
|
||
swz8_demo_payload = build_swz8_demo_payload()
|
||
swz8_demo_qwc = len(swz8_demo_payload)
|
||
swz8_demo_bootlet = build_textured_demo_bootlet_disp(swz8_demo_qwc, SWZ8_DISPLAY1_HI, SWZ8_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_swz8.mem", swz8_demo_bootlet,
|
||
f"Ch299 SWIZZLED PSMT8+CLUT demo BIOS bootlet ({len(swz8_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x48; QWC={swz8_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_swz8.mem", swz8_demo_payload,
|
||
f"Ch299 SWIZZLED PSMT8+CLUT demo GIF payload ({swz8_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); BITBLT 16-CLUT + 64x48 SWIZZLED PSMT8 texture + TEX0(CLD) + 16x48 sprite"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch300 — SWIZZLED PSMCT32 (direct-color) texture demo. The CLOSURE rung of the
|
||
# swizzle layout family (after PSMT4 swz4 + PSMT8 swz8). PSMCT32 is DIRECT color
|
||
# (no CLUT), so this fixture is SIMPLER: just a swizzled BITBLT upload + a sprite.
|
||
#
|
||
# WHY VISIBLY WRONG IF READ LINEARLY: a PSMCT32 page is 64x32 px (a 4x8 grid of
|
||
# 8x8 blocks). The sampled 16x48 sprite crosses 8-px BLOCK boundaries (x=8;
|
||
# y=8,16,24,40) AND — UNIQUELY for PSMCT32 vs the taller PSMT8 page — the 32-px
|
||
# PAGE-ROW boundary at y=32 (pixels y>=32 live in PAGE 1, byte 0x2000+ of the
|
||
# texture region). Reading the swizzled bytes LINEARLY (PSMCT32_SWIZZLE=0) lands
|
||
# the y>=32 rows in the wrong page entirely -> scrambled bottom third.
|
||
# PSMCT32_SWIZZLE=1 recovers the intended coordinate-encoded gradient.
|
||
#
|
||
# Each texel ENCODES its own (x,y): R=x<<4, G=y*5, B=(x+y)<<2 — so any swizzle
|
||
# misplacement is BOTH visible (the gradient tears) and exactly checkable per
|
||
# pixel by the companion board TB.
|
||
#
|
||
# VRAM (PSMCT32_SWIZZLE=1 -> the FB ITSELF is also swizzled, same single param):
|
||
# FB (16x40 PSMCT32 at FBP=0) occupies swizzled pages 0..1 -> 0x0000..~0x2200
|
||
# 22x40 swizzled PSMCT32 texture at TBP0=48 -> 0x3000.. (pages 0..1 of its
|
||
# region, max ~0x5500) — clear of the FB. VRAM_BYTES=32 KiB (matches swz8).
|
||
#
|
||
# !!! HEARTBEAT-SPLICER DODGE (the reason the texture is 22 WIDE but only 16
|
||
# SAMPLED) !!! top_psmct32_raster_demo_bram carries the Ch251.3 production
|
||
# "heartbeat splicer": it UNCONDITIONALLY overwrites the low 32 bits of ee_ram
|
||
# qword 115 (byte 0x730) with CYAN (0xFFFFFF00) on every read — that is the
|
||
# animated demo's 17th-SPRITE RGBAQ. The DMAC fetches our GIF payload from this
|
||
# same ee_ram, so whatever texel lands at qword 115 (= image-data qword 93, raster
|
||
# texel index 372) gets clobbered with CYAN. Mirroring the swz8 fix, we make the
|
||
# texture WIDER than the sampled sprite (22 vs 16): texel 372 then sits at column
|
||
# 372 % 22 = 20, which is OUTSIDE the sampled u in [0..15] window, so the splice
|
||
# corrupts an UPLOADED-BUT-UNSAMPLED texel and the on-screen sprite is clean.
|
||
# (If the header size ever changes, texel 372 moves and the board TB — which
|
||
# checks every sampled pixel — will fail loudly, so this is self-verifying, not
|
||
# silently fragile.) NO new EE/BIOS scaffolding (GIF payload only).
|
||
#
|
||
# Budget: 22*40 = 880 texels = 220 image qwords + 16 header/draw = 236 <= 240.
|
||
|
||
SWZ32_FBW = 1 # 16x40 visible framebuffer (PSMCT32 output)
|
||
SWZ32_TBP0 = 48 # texture base = 48*256 = 0x3000 (above the swizzled FB)
|
||
SWZ32_TBW = 1 # 64 texels/row stride; PSMCT32 page is 64 wide (bw_pg=FBW, NO >>1)
|
||
SWZ32_TEXW = 22 # texture WIDER than sampled (16) -> spliced texel 372 at col 20 = unsampled
|
||
SWZ32_TEXH = 40 # 40 tall: sample v in [0..39] crosses block rows (8,16,24,32) AND PAGE row y=32
|
||
SWZ32_SPRITE_W = 16 # sampled sprite width: u in [0..15]; texture cols 16..21 uploaded-but-unsampled
|
||
SWZ32_SPRITE_H = 40 # sampled sprite height: v in [0..39]
|
||
SWZ32_DISPLAY1_HI = 0x0002_700F # DW=15 (16 px wide window), DH=39 (40 px tall)
|
||
|
||
|
||
def swz32_texel(x, y):
|
||
"""Coordinate-encoded direct-color texel: R=x<<4, G=y*5, B=(x+y)<<2, A=0xFF.
|
||
ABGR word = A<<24 | B<<16 | G<<8 | R. Each (x,y) is a distinct color so a
|
||
swizzle misplacement both tears the gradient AND fails the exact per-pixel
|
||
check. (G uses y*5 so the 48-row range spans 0..235 — a smooth vertical
|
||
ramp; the page-row crossing at y=32 stays continuous IFF swizzle is right.)"""
|
||
r = (x << 4) & 0xFF
|
||
g = (y * 5) & 0xFF
|
||
b = ((x + y) << 2) & 0xFF
|
||
return 0xFF000000 | (b << 16) | (g << 8) | r
|
||
|
||
|
||
def build_swz32_demo_payload():
|
||
"""Swizzled PSMCT32 demo payload: BITBLT a 16x48 PSMCT32 texture (HW swizzles
|
||
the destination, pure-comb word writer) -> PRIM(SPRITE+TME) + TEX0(PSMCT32,
|
||
no CLUT) -> a 16x48 textured sprite. Direct color throughout: no CLUT load."""
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(SWZ32_FBW)
|
||
|
||
# --- U1: BITBLT the 16x48 PSMCT32 texture to VRAM[TBP0*256]. DPSM=PSMCT32
|
||
# (0x00); with PSMCT32_SWIZZLE=1 the HW writes each pixel at its
|
||
# SWIZZLED page/block address. 4 PSMCT32 texels per qword. ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(SWZ32_TBP0, SWZ32_TBW, 0x00))) # DPSM=PSMCT32
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(SWZ32_TEXW, SWZ32_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_px = SWZ32_TEXW * SWZ32_TEXH
|
||
n_px_qw = n_px // 4 # 4 PSMCT32 texels per qword
|
||
qw.append(giftag(n_px_qw, 0, 2, 0, 0))
|
||
for q in range(n_px_qw):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = q * 4 + lane
|
||
tx = t % SWZ32_TEXW
|
||
ty = t // SWZ32_TEXW
|
||
word |= (swz32_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: PRIM(SPRITE+TME) + TEX0(PSMCT32, NO CLUT) + a 16x40 textured sprite
|
||
# sampling u in [0..15], v in [0..39] (a sub-window of the 22x40
|
||
# texture; cols 16..21 are uploaded-but-unsampled, see the splicer
|
||
# note above). v crosses the PAGE-row boundary at y=32 — the
|
||
# across-PAGE proof unique to PSMCT32 (page = 64x32 px). ---
|
||
tex0_draw = tex0_pack(SWZ32_TBP0, SWZ32_TBW, psm=0x00, tw=5, th=6) # PSMCT32, 32x64 cap
|
||
qw.append(giftag(1, 0, 0, 6, 0x0000_0000_00EE_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_sprite_tme()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEX0_1, tex0_draw))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(0, 0)))
|
||
|
||
qw.append(giftag(1, 1, 0, 2, 0x0000_0000_0000_00EE)) # EOP
|
||
qw.append(aplusd(R_UV, uv_data(SWZ32_SPRITE_W - 1, SWZ32_SPRITE_H - 1)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(SWZ32_SPRITE_W - 1, SWZ32_SPRITE_H - 1)))
|
||
|
||
return qw
|
||
|
||
|
||
swz32_demo_payload = build_swz32_demo_payload()
|
||
swz32_demo_qwc = len(swz32_demo_payload)
|
||
swz32_demo_bootlet = build_textured_demo_bootlet_disp(swz32_demo_qwc, SWZ32_DISPLAY1_HI, SWZ32_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_swz32.mem", swz32_demo_bootlet,
|
||
f"Ch300 SWIZZLED PSMCT32 demo BIOS bootlet ({len(swz32_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x48; QWC={swz32_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_swz32.mem", swz32_demo_payload,
|
||
f"Ch300 SWIZZLED PSMCT32 demo GIF payload ({swz32_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); BITBLT 16x48 SWIZZLED PSMCT32 texture + TEX0 + 16x48 sprite"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch301 — PERSPECTIVE-CORRECT textured-triangle demo. A receding "floor" quad
|
||
# (two TRIANGLEs) textured with a 4x4-cell checkerboard. The top edge is FAR
|
||
# (w=8) and the bottom edge is NEAR (w=1); the texture coords are supplied via
|
||
# the ST register (S=u/w, T=v/w) + RGBAQ.Q (=1/w), so the rasterizer recovers
|
||
# per-pixel (u,v)=(S/Q, T/Q) through the pipelined reciprocal LUT. Under correct
|
||
# perspective the checkerboard rows COMPRESS toward the far (top) edge; an affine
|
||
# build would space them evenly. PERSPECTIVE_CORRECT=1 + PSMCT32_SWIZZLE=0 (the
|
||
# texture is LINEAR — this rung isolates perspective from the swizzle family).
|
||
#
|
||
# Interface contract (must match gs_stub Ch301 + the board TB):
|
||
# S_fp = round((u/w) * 2^FRAC) -> ST[23:0] FRAC=12
|
||
# T_fp = round((v/w) * 2^FRAC) -> ST[55:32]
|
||
# Q_fp = round((1/w) * 2^FRAC) -> RGBAQ[63:32]
|
||
#
|
||
# Payload is kept <= ~95 qwords so the DMAC (QWC from MADR=0x100=qword16) stops
|
||
# BEFORE absolute ee_ram qword 115 (16+QWC-1 < 115), so the Ch251.3 heartbeat
|
||
# splicer at qword 115 is never read — no fixture/splicer collision.
|
||
PERSP_FBW = 1
|
||
PERSP_TBP0 = 8 # texture base 8*256=0x800 (above the 16x24 linear FB)
|
||
PERSP_TBW = 1
|
||
PERSP_TEXW = 16
|
||
PERSP_TEXH = 16
|
||
PERSP_SCR_W = 16 # on-screen quad width
|
||
PERSP_SCR_H = 24 # on-screen quad height
|
||
PERSP_FRAC = 12
|
||
PERSP_W_FAR = 8 # top-edge depth (far)
|
||
PERSP_W_NEAR = 1 # bottom-edge depth (near)
|
||
PERSP_DISPLAY1_HI = ((PERSP_SCR_H - 1) << 12) | (PERSP_SCR_W - 1) # DH=23, DW=15
|
||
|
||
|
||
def persp_texel(u, v):
|
||
"""4x4-cell checkerboard so perspective foreshortening is unmistakable."""
|
||
cell = ((u >> 2) + (v >> 2)) & 1
|
||
if cell: r, g, b = 0xFF, 0xFF, 0xFF # white
|
||
else: r, g, b = 0x20, 0x20, 0x90 # dark blue
|
||
return 0xFF000000 | (b << 16) | (g << 8) | r
|
||
|
||
|
||
def st_data(s_fp, t_fp):
|
||
return (s_fp & 0xFFFFFF) | ((t_fp & 0xFFFFFF) << 32)
|
||
|
||
|
||
def rgbaq_with_q(r, g, b, q_fp):
|
||
return rgbaq_data(r, g, b) | ((q_fp & 0xFFFFFFFF) << 32)
|
||
|
||
|
||
def persp_attrs(u, v, w):
|
||
s_fp = round((u * (1 << PERSP_FRAC)) / w)
|
||
t_fp = round((v * (1 << PERSP_FRAC)) / w)
|
||
q_fp = round((1 << PERSP_FRAC) / w)
|
||
return s_fp, t_fp, q_fp
|
||
|
||
|
||
def persp_vertex_qws(sx, sy, u, v, w):
|
||
"""One vertex = RGBAQ(with Q) + ST + XYZ2 (3 A+D), screen pos (sx,sy)."""
|
||
s_fp, t_fp, q_fp = persp_attrs(u, v, w)
|
||
return [
|
||
aplusd(R_RGBAQ, rgbaq_with_q(0x00, 0x00, 0x00, q_fp)), # color overridden by DECAL texel
|
||
aplusd(R_ST, st_data(s_fp, t_fp)),
|
||
aplusd(R_XYZ2, xyz2_data(sx, sy)),
|
||
]
|
||
|
||
|
||
def build_persp_demo_payload():
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(PERSP_FBW)
|
||
tex0_val = tex0_pack(PERSP_TBP0, PERSP_TBW, psm=0x00, tw=4, th=4) # PSMCT32 16x16
|
||
|
||
# --- U1: upload the 16x16 LINEAR PSMCT32 checkerboard texture. ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(PERSP_TBP0, PERSP_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(PERSP_TEXW, PERSP_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (PERSP_TEXW * PERSP_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % PERSP_TEXW; ty = t // PERSP_TEXW
|
||
word |= (persp_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: PRIM(TRI+TME) / FRAME / TEX0 setup (3 A+D, no EOP). ---
|
||
qw.append(giftag(1, 0, 0, 3, 0x0000_0000_000E_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
|
||
# quad corners (screen) -> (u,v,w): top far (w=8), bottom near (w=1).
|
||
# texture v 0..15 maps across screen y 0..23 (round); u 0..15 across x.
|
||
def vtx(sx, sy):
|
||
w = PERSP_W_FAR if sy == 0 else PERSP_W_NEAR
|
||
u = sx # 0..15
|
||
v = round(sy * (PERSP_TEXH - 1) / (PERSP_SCR_H - 1)) # 0..15
|
||
return persp_vertex_qws(sx, sy, u, v, w)
|
||
|
||
TL = (0, 0); TR = (PERSP_SCR_W - 1, 0)
|
||
BL = (0, PERSP_SCR_H - 1); BR = (PERSP_SCR_W - 1, PERSP_SCR_H - 1)
|
||
|
||
# discrete TRIANGLE: 6 vertices = 2 tris. Tri A = TL,TR,BL ; Tri B = TR,BR,BL.
|
||
triA = vtx(*TL) + vtx(*TR) + vtx(*BL)
|
||
triB = vtx(*TR) + vtx(*BR) + vtx(*BL)
|
||
qw.append(giftag(1, 0, 0, 9, 0x0000_000E_EEEE_EEEE)) # tri A: 9 A+D
|
||
qw += triA
|
||
qw.append(giftag(1, 1, 0, 9, 0x0000_000E_EEEE_EEEE)) # tri B: 9 A+D, EOP
|
||
qw += triB
|
||
return qw
|
||
|
||
|
||
persp_demo_payload = build_persp_demo_payload()
|
||
persp_demo_qwc = len(persp_demo_payload)
|
||
assert persp_demo_qwc <= 95, f"perspective payload {persp_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
persp_demo_bootlet = build_textured_demo_bootlet_disp(persp_demo_qwc, PERSP_DISPLAY1_HI, PERSP_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_persp.mem", persp_demo_bootlet,
|
||
f"Ch301 PERSPECTIVE-CORRECT demo BIOS bootlet ({len(persp_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = {PERSP_SCR_W}x{PERSP_SCR_H}; QWC={persp_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_persp.mem", persp_demo_payload,
|
||
f"Ch301 PERSPECTIVE-CORRECT demo GIF payload ({persp_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); 16x16 checkerboard + 2 TME TRIs (ST/Q perspective, receding floor)"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch301b — PERSPECTIVE-CORRECT FLOOR demo (the human-recognizable version).
|
||
#
|
||
# The Ch301 demo above proved the per-pixel divide, but its rectangular quad +
|
||
# hand-assigned depths are NOT a single coherent projected plane (S=u/w is
|
||
# inconsistent across the diagonal seam), so it renders a sheared/diagonal
|
||
# pattern that "looks broken". This version derives ALL FOUR vertices from ONE
|
||
# pinhole projection of a flat floor, so both triangles share the SAME
|
||
# projective plane → no seam shear → a checkerboard that COMPRESSES toward the
|
||
# far (top) edge — the unmistakable perspective signature.
|
||
#
|
||
# Pinhole floor (camera height H above a Y=-H floor, looking down +Z; f=focal):
|
||
# screen_x = CX + f*X/Z ; screen_y = CY + f*H/Z ; w = Z
|
||
# texture : u = (X+Xe)/(2Xe)*(TEXW-1) ; v = (Z-Znear)/(Zfar-Znear)*(TEXH-1)
|
||
# With f=16, H=1.5, CX=16, CY=-6, Xe=1, Znear=1, Zfar=4 the four corners land on
|
||
# integer screen coords (computed below). Because screen_x/y AND (u/w,v/w,1/w)
|
||
# all come from this ONE projection, s/w,t/w,1/w are screen-AFFINE over the whole
|
||
# quad (the fundamental theorem of perspective texturing) → the two triangles
|
||
# agree on the shared diagonal. The board TB fits a SINGLE affine S/T/Q plane and
|
||
# checks every pixel against it (a floor-plane / seam-continuity check, per the
|
||
# architect — NOT a per-triangle reference that could hide this exact bug).
|
||
#
|
||
# Floor corners (screen sx,sy ; texel u,v ; depth w):
|
||
# NL (near-left) sx=0 sy=18 u=0 v=0 w=1
|
||
# NR (near-right) sx=32 sy=18 u=15 v=0 w=1
|
||
# FL (far-left) sx=12 sy=0 u=0 v=15 w=4
|
||
# FR (far-right) sx=20 sy=0 u=15 v=15 w=4
|
||
# (near edge wide+low, far edge narrow+high → trapezoid). Tri1=NL,NR,FR ;
|
||
# Tri2=NL,FR,FL ; shared diagonal seam = NL-FR. Texture LINEAR PSMCT32 at 0x1400
|
||
# (clear of the 64x19 FB). Payload < qword 115 (heartbeat-splicer safe).
|
||
PFLOOR_FBW = 1
|
||
PFLOOR_TBP0 = 20 # 20*256 = 0x1400 (above the 64-stride x19-row FB)
|
||
PFLOOR_TBW = 1
|
||
PFLOOR_TEXW = 16
|
||
PFLOOR_TEXH = 16
|
||
PFLOOR_DISPLAY1_HI = (18 << 12) | 33 # DW=33 (34 wide), DH=18 (19 tall)
|
||
# (sx, sy, u, v, w) per corner — derived from the projection above.
|
||
PFLOOR_NL = (0, 18, 0, 0, 1)
|
||
PFLOOR_NR = (32, 18, 15, 0, 1)
|
||
PFLOOR_FL = (12, 0, 0, 15, 4)
|
||
PFLOOR_FR = (20, 0, 15, 15, 4)
|
||
|
||
|
||
def pfloor_vertex_qws(corner):
|
||
sx, sy, u, v, w = corner
|
||
s_fp, t_fp, q_fp = persp_attrs(u, v, w)
|
||
return [
|
||
aplusd(R_RGBAQ, rgbaq_with_q(0x00, 0x00, 0x00, q_fp)),
|
||
aplusd(R_ST, st_data(s_fp, t_fp)),
|
||
aplusd(R_XYZ2, xyz2_data(sx, sy)),
|
||
]
|
||
|
||
|
||
def build_persp_floor_demo_payload():
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(PFLOOR_FBW)
|
||
tex0_val = tex0_pack(PFLOOR_TBP0, PFLOOR_TBW, psm=0x00, tw=4, th=4) # PSMCT32 16x16
|
||
|
||
# U1: upload the 16x16 LINEAR PSMCT32 checkerboard texture.
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(PFLOOR_TBP0, PFLOOR_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(PFLOOR_TEXW, PFLOOR_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (PFLOOR_TEXW * PFLOOR_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % PFLOOR_TEXW; ty = t // PFLOOR_TEXW
|
||
word |= (persp_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# U2: PRIM(TRI+TME)/FRAME/TEX0 setup.
|
||
qw.append(giftag(1, 0, 0, 3, 0x0000_0000_000E_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
|
||
# Two triangles from the ONE projected quad (shared diagonal NL-FR).
|
||
tri1 = pfloor_vertex_qws(PFLOOR_NL) + pfloor_vertex_qws(PFLOOR_NR) + pfloor_vertex_qws(PFLOOR_FR)
|
||
tri2 = pfloor_vertex_qws(PFLOOR_NL) + pfloor_vertex_qws(PFLOOR_FR) + pfloor_vertex_qws(PFLOOR_FL)
|
||
qw.append(giftag(1, 0, 0, 9, 0x0000_000E_EEEE_EEEE)) # tri1: 9 A+D
|
||
qw += tri1
|
||
qw.append(giftag(1, 1, 0, 9, 0x0000_000E_EEEE_EEEE)) # tri2: 9 A+D, EOP
|
||
qw += tri2
|
||
return qw
|
||
|
||
|
||
pfloor_demo_payload = build_persp_floor_demo_payload()
|
||
pfloor_demo_qwc = len(pfloor_demo_payload)
|
||
assert pfloor_demo_qwc <= 95, f"persp-floor payload {pfloor_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
pfloor_demo_bootlet = build_textured_demo_bootlet_disp(pfloor_demo_qwc, PFLOOR_DISPLAY1_HI, PFLOOR_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_persp_floor.mem", pfloor_demo_bootlet,
|
||
f"Ch301b PERSPECTIVE FLOOR demo BIOS bootlet ({len(pfloor_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 34x19; QWC={pfloor_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_persp_floor.mem", pfloor_demo_payload,
|
||
f"Ch301b PERSPECTIVE FLOOR demo GIF payload ({pfloor_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); 16x16 checkerboard + 2 TME TRIs from ONE projected floor plane"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch302 — COMBINED textured + alpha + depth demo (the multi-beat read-schedule
|
||
# probe). Two primitives:
|
||
# 1. Opaque GREEN background sprite, ZTE=GEQUAL over the init-0 Z-buffer, so it
|
||
# writes BOTH the framebuffer color (Cbg=green) AND the Z-buffer (Z=Zbg).
|
||
# 2. A COMBINED triangle: TME (translucent red/blue texture, texel A=0x40) +
|
||
# ABE (source-over) + ZTE (GEQUAL). Its interpolated Z runs from 0x6000 at
|
||
# the top (>= Zbg -> PASS -> blend texel over green, write Z) to 0x2000 at
|
||
# the bottom (< Zbg -> FAIL -> green shows through unchanged, no writes).
|
||
# With COMBINED_TAZ=1 the gs_stub combined FSM runs the 5-beat per-pixel schedule
|
||
# (Zread -> Ztest -> texel -> dest -> colorwrite -> Zwrite). AFFINE UV (this rung
|
||
# is about memory scheduling, not perspective).
|
||
#
|
||
# VRAM (16 KiB): with FBW=1 the FB stride is 256 B/row, so the 16-row FB spans
|
||
# 0x0000..0x0FFF; the Z-buffer (ZBP=2) spans 0x1000..0x1FFF; the texture must
|
||
# therefore live ABOVE both, at TBP0=32 (0x2000) — NOT inside the FB (an earlier
|
||
# 0x800 placement was clobbered by the background sprite). Payload < qword 99
|
||
# (heartbeat-splicer safe).
|
||
COMB_FBW = 1
|
||
COMB_ZBP = 2 # Z-buffer @ 2*2048 = 0x1000 (even -> ZMSK=0)
|
||
COMB_TBP0 = 32 # texture @ 32*256 = 0x2000 (above the 0x1000 FB + 0x1000 Z)
|
||
COMB_TBW = 1
|
||
COMB_TEXW = 8
|
||
COMB_TEXH = 8
|
||
COMB_DISPLAY1_HI = 0x0000_F00F # DW=15 (16 wide), DH=15 (16 tall)
|
||
COMB_CBG = (0x00, 0x80, 0x00) # opaque green background (r,g,b)
|
||
COMB_ZBG = 0x0000_4000 # background stored Z
|
||
# combined triangle (screen x,y ; interpolated Z ; affine texel u,v)
|
||
COMB_V0 = (2, 1, 0x0000_6000, 0, 0) # top-left (PASS: Z>Zbg)
|
||
COMB_V1 = (13, 1, 0x0000_6000, 7, 0) # top-right (PASS)
|
||
COMB_V2 = (7, 14, 0x0000_2000, 3, 7) # bottom (FAIL: Z<Zbg)
|
||
|
||
|
||
def prim_tri_tme_abe():
|
||
# PRIM[2:0]=3 TRIANGLE, bit4=TME, bit6=ABE.
|
||
return 3 | (1 << 4) | (1 << 6)
|
||
|
||
|
||
def comb_texel(tx, ty):
|
||
"""Translucent 2-region texture: left half RED, right half BLUE, A=0x40.
|
||
Distinct halves so the TB can prove the TEXEL contributes to the blend AND
|
||
the affine UV mapping is right. ABGR = A<<24 | B<<16 | G<<8 | R."""
|
||
a = 0x40
|
||
if tx < 4: r, g, b = 0xFF, 0x00, 0x00 # red
|
||
else: r, g, b = 0x00, 0x00, 0xFF # blue
|
||
return (a << 24) | (b << 16) | (g << 8) | r
|
||
|
||
|
||
def build_combined_demo_payload():
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(COMB_FBW)
|
||
tex0_val = tex0_pack(COMB_TBP0, COMB_TBW, psm=0x00, tw=3, th=3) # PSMCT32 8x8
|
||
|
||
# --- U1: upload the 8x8 translucent PSMCT32 texture. ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(COMB_TBP0, COMB_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(COMB_TEXW, COMB_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (COMB_TEXW * COMB_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % COMB_TEXW; ty = t // COMB_TEXW
|
||
word |= (comb_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: opaque GREEN background sprite (ZTE GEQUAL writes FB + Z). ---
|
||
qw.append(giftag(1, 0, 0, 7, 0x0000_0000_0EEE_EEEE))
|
||
qw.append(aplusd(R_PRIM, PRIM_SPRITE))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(COMB_ZBP)))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(*COMB_CBG)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(0, 0, COMB_ZBG)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(15, 15, COMB_ZBG)))
|
||
|
||
# --- U3: COMBINED triangle: TME + ABE(source-over) + ZTE(GEQUAL). EOP. ---
|
||
# PRIM/ALPHA_1/TEX0 set here; TEST_1/ZBUF_1/FRAME_1 persist from U2.
|
||
# 3 vertices, each RGBAQ + UV(affine) + XYZ2(with interpolated Z).
|
||
# 13 A+D: PRIM,ALPHA_1,TEX0 + 3*(RGBAQ,UV,XYZ2).
|
||
qw.append(giftag(1, 1, 0, 12, 0x0000_EEEE_EEEE_EEEE)) # 12 A+D, EOP
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over: Cv=((Cs-Cd)*As)>>7+Cd
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for (sx, sy, sz, tu, tv) in (COMB_V0, COMB_V1, COMB_V2):
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00))) # color overridden by texel (DECAL)
|
||
qw.append(aplusd(R_UV, uv_data(tu, tv)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, sz)))
|
||
return qw
|
||
|
||
|
||
comb_demo_payload = build_combined_demo_payload()
|
||
comb_demo_qwc = len(comb_demo_payload)
|
||
assert comb_demo_qwc <= 95, f"combined payload {comb_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
comb_demo_bootlet = build_textured_demo_bootlet_disp(comb_demo_qwc, COMB_DISPLAY1_HI, COMB_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_combined.mem", comb_demo_bootlet,
|
||
f"Ch302 COMBINED tex+alpha+depth BIOS bootlet ({len(comb_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x16; QWC={comb_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_combined.mem", comb_demo_payload,
|
||
f"Ch302 COMBINED tex+alpha+depth GIF payload ({comb_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); green Z-bg + translucent textured TRI w/ interpolated Z (half pass/half fail)"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch303 — TILE-LOCAL combined renderer demo. Same combined TME+ABE+ZTE triangle
|
||
# as Ch302, but rendered into an ON-CHIP 16x16 color+Z tile (CLEAR→RENDER→FLUSH).
|
||
# Differences from the Ch302 fixture:
|
||
# - NO background sprite: the "green background" is the FSM's CLEAR phase
|
||
# (TILE_CLEAR_COLOR=green, TILE_CLEAR_Z=Zbg=0x4000 — gs_stub params), so the
|
||
# fixture is JUST the texture upload + the combined triangle.
|
||
# - The Z-buffer is ON-CHIP (tile_z), so NO Z-buffer in VRAM; the texture moves
|
||
# down to TBP0=16 (0x1000), above the 16-row FB (0..0xFFF). VRAM 8 KiB.
|
||
# - ZBUF_1 is still written but IGNORED in tile mode (Z is tile-local).
|
||
# Expected result == Ch302: cleared green, triangle top half blended (red->orange,
|
||
# blue->teal over green) where depth passes, bottom half occluded (green).
|
||
TILE_FBW = 1
|
||
TILE_TBP0 = 16 # texture @ 16*256 = 0x1000 (above the 0x1000 FB; no VRAM Z in tile mode)
|
||
TILE_TBW = 1
|
||
TILE_TEXW = 8
|
||
TILE_TEXH = 8
|
||
TILE_ZBP = 2 # written but ignored (Z is on-chip tile_z)
|
||
TILE_DISPLAY1_HI = 0x0000_F00F # DW=15 (16 wide), DH=15 (16 tall)
|
||
TILE_V0 = (2, 1, 0x0000_6000, 0, 0) # top-left (PASS)
|
||
TILE_V1 = (13, 1, 0x0000_6000, 7, 0) # top-right (PASS)
|
||
TILE_V2 = (7, 14, 0x0000_2000, 3, 7) # bottom (FAIL)
|
||
|
||
|
||
def build_tile_demo_payload():
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(TILE_FBW)
|
||
tex0_val = tex0_pack(TILE_TBP0, TILE_TBW, psm=0x00, tw=3, th=3) # PSMCT32 8x8
|
||
|
||
# --- U1: upload the 8x8 translucent PSMCT32 texture (reuses comb_texel). ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(TILE_TBP0, TILE_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TILE_TEXW, TILE_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TILE_TEXW * TILE_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % TILE_TEXW; ty = t // TILE_TEXW
|
||
word |= (comb_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: the COMBINED triangle (TME+ABE+ZTE). No background sprite — the
|
||
# green background is the tile CLEAR phase. EOP. 13 A+D:
|
||
# PRIM,FRAME,ALPHA_1,TEST_1,ZBUF_1,TEX0 (6) + 3*(RGBAQ,UV,XYZ2) (9) = 15 A+D.
|
||
qw.append(giftag(1, 1, 0, 15, 0x0EEE_EEEE_EEEE_EEEE)) # 15 A+D (15 0xE nibbles), EOP
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over
|
||
qw.append(aplusd(R_TEST_1, test1_geq())) # ZTE GEQUAL
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(TILE_ZBP))) # ignored in tile mode
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for (sx, sy, sz, tu, tv) in (TILE_V0, TILE_V1, TILE_V2):
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(tu, tv)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, sz)))
|
||
return qw
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch323 — TILE COLOR+Z SPILL/RELOAD two-batch proof fixture.
|
||
# TWO TME+ABE+ZTE triangles (the tile-local path's close_combined gate REQUIRES
|
||
# triangle+TME+ABE+ZTE — see gs_stub.sv:4764; a flat/sprite/no-texture prim would
|
||
# take the non-tile path and never exercise spill/reload). Texture is INCIDENTAL:
|
||
# an opaque 2-region texel map (left=color1, right=color2), nearest, sampled with
|
||
# uniform UVs so each prim is effectively a flat color. The proof hinges on Z:
|
||
# CLEAR Z = 0x4000 (gs_stub TILE_CLEAR_Z); GEQUAL (frag_z >= stored passes).
|
||
# P1 (batch 1): region A (small top-left tri), Z=0x8000 (near), UV->color1.
|
||
# P2 (batch 2): region A+B (large tri), Z=0x6000 (mid), UV->color2.
|
||
# Single-prim tile mode -> 2 prims = 2 tile cycles; cycle 2 reloads cycle 1's
|
||
# flushed color+Z. Region A overlap: reloaded Z=0x8000 makes P2 (0x6000) FAIL ->
|
||
# keeps color1 (Z survived). Region B (P2 only): reloaded Z=0x4000 -> P2 PASSES ->
|
||
# color2 (control). Negative control (reload off): region A reloads clear 0x4000 ->
|
||
# P2 passes -> color2; the color1<->color2 flip proves the result depends on reload.
|
||
TSPILL_FBW = 1
|
||
TSPILL_TBP0 = 16 # texture @ 0x1000 (above the 16-row FB)
|
||
TSPILL_TBW = 1
|
||
TSPILL_TEXW = 8
|
||
TSPILL_TEXH = 8
|
||
TSPILL_ZBP = 2
|
||
TSPILL_DISPLAY1_HI = 0x0000_F00F # 16x16
|
||
TSPILL_COLOR1 = 0xFF0000FF # ABGR opaque RED (color1, P1 / region A)
|
||
TSPILL_COLOR2 = 0xFFFF0000 # ABGR opaque BLUE (color2, P2 / region B)
|
||
|
||
def tspill_texel(tx, ty):
|
||
# opaque 2-region: left half -> color1, right half -> color2 (A=0xFF so
|
||
# source-over with full alpha is opaque -> rendered color == texel color).
|
||
return TSPILL_COLOR1 if tx < 4 else TSPILL_COLOR2
|
||
|
||
def build_tile_spill_demo_payload():
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(TSPILL_FBW)
|
||
tex0_val = tex0_pack(TSPILL_TBP0, TSPILL_TBW, psm=0x00, tw=3, th=3) # 8x8 PSMCT32
|
||
|
||
# --- U1: upload the opaque 2-region 8x8 texture ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(TSPILL_TBP0, TSPILL_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TSPILL_TEXW, TSPILL_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TSPILL_TEXW * TSPILL_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % TSPILL_TEXW; ty = t // TSPILL_TEXW
|
||
word |= (tspill_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: BATCH 1 — P1 near triangle (region A, top-left), Z=0x8000, UV->color1
|
||
# (texel (0,0), left/color1 region). 15 A+D, EOP=0 (P2 follows). ---
|
||
qw.append(giftag(1, 0, 0, 15, 0x0EEE_EEEE_EEEE_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over
|
||
qw.append(aplusd(R_TEST_1, test1_geq())) # ZTE GEQUAL
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(TSPILL_ZBP))) # ignored in tile mode
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for (sx, sy) in ((0, 0), (8, 0), (0, 8)): # small top-left tri = region A
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(0, 0))) # uniform -> color1
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, 0x0000_8000)))
|
||
|
||
# --- U3: BATCH 2 — P2 mid triangle (region A+B, large), Z=0x6000, UV->color2
|
||
# (texel (7,0), right/color2 region). PRIM/FRAME/ALPHA/TEST/ZBUF/TEX0
|
||
# persist from U2; re-send only RGBAQ/UV/XYZ2. 9 A+D, EOP=1. ---
|
||
qw.append(giftag(1, 1, 0, 9, 0x0000_000E_EEEE_EEEE))
|
||
for (sx, sy) in ((0, 0), (15, 0), (0, 15)): # large tri = region A+B
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(7, 0))) # uniform -> color2
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, 0x0000_6000)))
|
||
return qw
|
||
|
||
tspill_demo_payload = build_tile_spill_demo_payload()
|
||
tspill_demo_qwc = len(tspill_demo_payload)
|
||
assert tspill_demo_qwc <= 95, f"tile-spill payload {tspill_demo_qwc} qwords may collide with splicer"
|
||
tspill_demo_bootlet = build_textured_demo_bootlet_disp(tspill_demo_qwc, TSPILL_DISPLAY1_HI, TSPILL_FBW)
|
||
write_bios_mem(
|
||
"bios_tile_spill.mem", tspill_demo_bootlet,
|
||
f"Ch323 tile spill/reload two-batch proof bootlet ({len(tspill_demo_bootlet)} words; "
|
||
f"DISPLAY1=16x16; QWC={tspill_demo_qwc})"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_spill.mem", tspill_demo_payload,
|
||
f"Ch323 tile spill/reload proof ({tspill_demo_qwc} qwords): P1 near(Z=0x8000,color1) region A + "
|
||
f"P2 mid(Z=0x6000,color2) region A+B; 2 TME+ABE+ZTE tris = 2 tile batches"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch324 — 4x4 MULTI-TILE two-batch spill/reload proof. Same two-triangle Z model
|
||
# as Ch323 (P1 near color1 spills, P2 mid color2 reloads + depth-tests), but scaled
|
||
# to a 64x64 framebuffer = a 4x4 grid of 16x16 tiles (FBW=1). Geometry is placed to
|
||
# satisfy the grid acceptance:
|
||
# - P1 (batch1, near Z=0x8000, color1): SMALL tri (20,20)-(44,20)-(20,44) [x+y<64]
|
||
# - P2 (batch2, mid Z=0x6000, color2): LARGE tri (20,20)-(60,20)-(20,60) [x+y<80], P2 superset P1
|
||
# Overlap (x+y<64) spans tiles (1,1)/(2,1)/(1,2) -> reloaded Z=0x8000 -> P2 FAILS ->
|
||
# color1 SURVIVES in MULTIPLE tiles (depth survival > 1 tile). Region B (64<x+y<80)
|
||
# spans (3,1)/(2,2)/(1,3) -> reloaded clear 0x4000 -> P2 PASSES -> color2 (control).
|
||
# Both tris cross x=32/48 AND y=32/48 seams. Top tile row (y<20) is EMPTY: tile 0
|
||
# (col0,row0) is a LEADING empty tile; tile 15 (col3,row3, x+y up to 126 > 80) is a
|
||
# FULLY EMPTY tile rendered AFTER non-empty tiles. Texture: same boring opaque
|
||
# 2-region (left=color1, right=color2) as Ch323; UVs uniform so each prim is flat.
|
||
TS4_FBW = 1
|
||
TS4_TBP0 = 64 # texture @ 0x4000 (above the 64-row, 16KiB FB)
|
||
TS4_TBW = 1
|
||
TS4_TEXW = 8
|
||
TS4_TEXH = 8
|
||
TS4_ZBP = 8
|
||
TS4_DISPLAY1_HI = (63 << 12) | 63 # DW=63 (64 wide), DH=63 (64 tall)
|
||
TS4_COLOR1 = 0xFF0000FF # ABGR opaque RED (color1, P1 / region A)
|
||
TS4_COLOR2 = 0xFFFF0000 # ABGR opaque BLUE (color2, P2 / region B)
|
||
|
||
def ts4_texel(tx, ty):
|
||
return TS4_COLOR1 if tx < 4 else TS4_COLOR2
|
||
|
||
def build_tile_spill_4x4_demo_payload():
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(TS4_FBW)
|
||
tex0_val = tex0_pack(TS4_TBP0, TS4_TBW, psm=0x00, tw=3, th=3) # 8x8 PSMCT32
|
||
|
||
# --- U1: upload the opaque 2-region 8x8 texture ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(TS4_TBP0, TS4_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TS4_TEXW, TS4_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TS4_TEXW * TS4_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % TS4_TEXW; ty = t // TS4_TEXW
|
||
word |= (ts4_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: BATCH 1 — P1 near tri (region A), Z=0x8000, UV->color1. 15 A+D, EOP=0. ---
|
||
qw.append(giftag(1, 0, 0, 15, 0x0EEE_EEEE_EEEE_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over
|
||
qw.append(aplusd(R_TEST_1, test1_geq())) # ZTE GEQUAL
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(TS4_ZBP))) # ignored in tile mode
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for (sx, sy) in ((20, 20), (44, 20), (20, 44)): # small tri = region A
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(0, 0))) # uniform -> color1
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, 0x0000_8000)))
|
||
|
||
# --- U3: BATCH 2 — P2 mid tri (region A+B), Z=0x6000, UV->color2. ctx persists;
|
||
# re-send RGBAQ/UV/XYZ2. 9 A+D, EOP=1. ---
|
||
qw.append(giftag(1, 1, 0, 9, 0x0000_000E_EEEE_EEEE))
|
||
for (sx, sy) in ((20, 20), (60, 20), (20, 60)): # large tri = region A+B
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(7, 0))) # uniform -> color2
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, 0x0000_6000)))
|
||
return qw
|
||
|
||
ts4_demo_payload = build_tile_spill_4x4_demo_payload()
|
||
ts4_demo_qwc = len(ts4_demo_payload)
|
||
assert ts4_demo_qwc <= 95, f"tile-spill-4x4 payload {ts4_demo_qwc} qwords may collide with splicer"
|
||
ts4_demo_bootlet = build_textured_demo_bootlet_disp(ts4_demo_qwc, TS4_DISPLAY1_HI, TS4_FBW)
|
||
write_bios_mem(
|
||
"bios_tile_spill4x4.mem", ts4_demo_bootlet,
|
||
f"Ch324 4x4 multi-tile spill/reload two-batch proof bootlet ({len(ts4_demo_bootlet)} words; "
|
||
f"DISPLAY1=64x64; QWC={ts4_demo_qwc})"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_spill4x4.mem", ts4_demo_payload,
|
||
f"Ch324 4x4 spill/reload proof ({ts4_demo_qwc} qwords): P1 near(Z=0x8000,color1) small + "
|
||
f"P2 mid(Z=0x6000,color2) large, cross-seam, 4x4 grid; depth survival in overlap tiles"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch325 — 8x8 MULTI-TILE two-batch spill/reload proof. The Ch324 4x4 scene scaled
|
||
# 2x to a 128x128 framebuffer = an 8x8 grid of 16x16 tiles (FBW=2). Same Z model:
|
||
# - P1 (batch1, near Z=0x8000, color1): tri (40,40)-(88,40)-(40,88) [x+y<128]
|
||
# - P2 (batch2, mid Z=0x6000, color2): tri (40,40)-(120,40)-(40,120)[x+y<160], superset
|
||
# Overlap (x+y<128) keeps color1 (depth survival) across MANY tiles; region B
|
||
# (128<x+y<160) takes color2; top rows (y<40) + bottom-right (x+y>160) tiles stay
|
||
# EMPTY. Both tris cross many x/y=16k seams. Texture: same opaque 2-region.
|
||
TS8_FBW = 2
|
||
TS8_TBP0 = 0 # Ch326 — texture @ 0x0 (FB is LPDDR-only, BRAM FB region free)
|
||
TS8_TBW = 2
|
||
TS8_TEXW = 8
|
||
TS8_TEXH = 8
|
||
TS8_ZBP = 16
|
||
TS8_DISPLAY1_HI = (127 << 12) | 127 # DW=127 (128 wide), DH=127 (128 tall)
|
||
TS8_COLOR1 = 0xFF0000FF # ABGR opaque RED (color1, P1)
|
||
TS8_COLOR2 = 0xFFFF0000 # ABGR opaque BLUE (color2, P2)
|
||
|
||
def ts8_texel(tx, ty):
|
||
return TS8_COLOR1 if tx < 4 else TS8_COLOR2
|
||
|
||
def build_tile_spill_8x8_demo_payload():
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(TS8_FBW)
|
||
tex0_val = tex0_pack(TS8_TBP0, TS8_TBW, psm=0x00, tw=3, th=3) # 8x8 PSMCT32
|
||
|
||
# --- U1: upload the opaque 2-region 8x8 texture ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(TS8_TBP0, TS8_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TS8_TEXW, TS8_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TS8_TEXW * TS8_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % TS8_TEXW; ty = t // TS8_TEXW
|
||
word |= (ts8_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
# --- U2: BATCH 1 — P1 near tri, Z=0x8000, UV->color1. 15 A+D, EOP=0. ---
|
||
qw.append(giftag(1, 0, 0, 15, 0x0EEE_EEEE_EEEE_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over
|
||
qw.append(aplusd(R_TEST_1, test1_geq())) # ZTE GEQUAL
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(TS8_ZBP))) # ignored in tile mode
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for (sx, sy) in ((40, 40), (88, 40), (40, 88)):
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(0, 0))) # uniform -> color1
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, 0x0000_8000)))
|
||
|
||
# --- U3: BATCH 2 — P2 mid tri, Z=0x6000, UV->color2. ctx persists. 9 A+D, EOP=1. ---
|
||
qw.append(giftag(1, 1, 0, 9, 0x0000_000E_EEEE_EEEE))
|
||
for (sx, sy) in ((40, 40), (120, 40), (40, 120)):
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(7, 0))) # uniform -> color2
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, 0x0000_6000)))
|
||
return qw
|
||
|
||
ts8_demo_payload = build_tile_spill_8x8_demo_payload()
|
||
ts8_demo_qwc = len(ts8_demo_payload)
|
||
assert ts8_demo_qwc <= 95, f"tile-spill-8x8 payload {ts8_demo_qwc} qwords may collide with splicer"
|
||
ts8_demo_bootlet = build_textured_demo_bootlet_disp(ts8_demo_qwc, TS8_DISPLAY1_HI, TS8_FBW)
|
||
write_bios_mem(
|
||
"bios_tile_spill8x8.mem", ts8_demo_bootlet,
|
||
f"Ch325 8x8 multi-tile spill/reload two-batch proof bootlet ({len(ts8_demo_bootlet)} words; "
|
||
f"DISPLAY1=128x128; QWC={ts8_demo_qwc})"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_spill8x8.mem", ts8_demo_payload,
|
||
f"Ch325 8x8 spill/reload proof ({ts8_demo_qwc} qwords): P1 near(Z=0x8000,color1) + "
|
||
f"P2 mid(Z=0x6000,color2), cross-seam, 8x8 grid / 128x128; depth survival in overlap tiles"
|
||
)
|
||
|
||
# ===== Ch327b — 16x16 grid = 256x256 PSMCT32 raster FB (2x the Ch325 8x8 scene) =====
|
||
TS16_FBW = 4 # 256 px / 64
|
||
TS16_DISPLAY1_HI = (255 << 12) | 255 # DW=255 (256 wide), DH=255 (256 tall)
|
||
def build_tile_spill_16x16_demo_payload():
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(TS16_FBW)
|
||
tex0_val = tex0_pack(TS8_TBP0, TS8_TBW, psm=0x00, tw=3, th=3) # same 8x8 texture @ vram 0
|
||
# --- U1: upload the opaque 2-region 8x8 texture (identical to 8x8) ---
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(TS8_TBP0, TS8_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TS8_TEXW, TS8_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TS8_TEXW * TS8_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % TS8_TEXW; ty = t // TS8_TEXW
|
||
word |= (ts8_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
# --- U2: BATCH 1 — P1 near tri, Z=0x8000, UV->color1 (coords 2x the 8x8) ---
|
||
qw.append(giftag(1, 0, 0, 15, 0x0EEE_EEEE_EEEE_EEEE))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1)))
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(TS8_ZBP)))
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for (sx, sy) in ((80, 80), (176, 80), (80, 176)):
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, 0x0000_8000)))
|
||
# --- U3: BATCH 2 — P2 mid tri, Z=0x6000, UV->color2 (coords 2x) ---
|
||
qw.append(giftag(1, 1, 0, 9, 0x0000_000E_EEEE_EEEE))
|
||
for (sx, sy) in ((80, 80), (240, 80), (80, 240)):
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(7, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, 0x0000_6000)))
|
||
return qw
|
||
|
||
ts16_demo_payload = build_tile_spill_16x16_demo_payload()
|
||
ts16_demo_qwc = len(ts16_demo_payload)
|
||
assert ts16_demo_qwc <= 95, f"tile-spill-16x16 payload {ts16_demo_qwc} qwords may collide with splicer"
|
||
ts16_demo_bootlet = build_textured_demo_bootlet_disp(ts16_demo_qwc, TS16_DISPLAY1_HI, TS16_FBW)
|
||
write_bios_mem(
|
||
"bios_tile_spill16x16.mem", ts16_demo_bootlet,
|
||
f"Ch327b 16x16 multi-tile spill/reload two-batch proof bootlet ({len(ts16_demo_bootlet)} words; "
|
||
f"DISPLAY1=256x256; QWC={ts16_demo_qwc})"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_spill16x16.mem", ts16_demo_payload,
|
||
f"Ch327b 16x16 spill/reload proof ({ts16_demo_qwc} qwords): P1 near(Z=0x8000,color1) + "
|
||
f"P2 mid(Z=0x6000,color2), cross-seam, 16x16 grid / 256x256; depth survival in overlap tiles"
|
||
)
|
||
|
||
|
||
tile_demo_payload = build_tile_demo_payload()
|
||
tile_demo_qwc = len(tile_demo_payload)
|
||
assert tile_demo_qwc <= 95, f"tile payload {tile_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
tile_demo_bootlet = build_textured_demo_bootlet_disp(tile_demo_qwc, TILE_DISPLAY1_HI, TILE_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile.mem", tile_demo_bootlet,
|
||
f"Ch303 TILE-LOCAL combined demo BIOS bootlet ({len(tile_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 16x16; QWC={tile_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile.mem", tile_demo_payload,
|
||
f"Ch303 TILE-LOCAL combined demo GIF payload ({tile_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); 8x8 translucent texture + combined TRI rendered to on-chip 16x16 tile"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch304 — 2x2 MULTI-TILE demo. ONE combined TME+ABE+ZTE triangle spanning the
|
||
# 32x32 region (a 2x2 grid of 16x16 tiles), crossing BOTH tile seams (x=16 AND
|
||
# y=16). The renderer re-tests the triangle against each of the 4 tiles, clears+
|
||
# renders+flushes each independently; seams must be continuous (attributes are
|
||
# screen-space). Same clear/texture model as Ch303.
|
||
# VRAM 16 KiB: FB 32 rows x 256 B/row = 0x0000..0x1FFF (32x32, FBW=1); texture
|
||
# @ TBP0=32 (0x2000); Z is on-chip (tile_z), none in VRAM.
|
||
TILE2_FBW = 1
|
||
TILE2_TBP0 = 32 # texture @ 0x2000 (above the 32-row FB)
|
||
TILE2_TBW = 1
|
||
TILE2_TEXW = 8
|
||
TILE2_TEXH = 8
|
||
TILE2_DISPLAY1_HI = (31 << 12) | 31 # DW=31 (32 wide), DH=31 (32 tall)
|
||
# triangle spanning the 2x2 grid; crosses x=16 and y=16. (screen x,y; Z; texel u,v)
|
||
TILE2_V0 = (3, 3, 0x0000_6000, 0, 0) # top-left (PASS)
|
||
TILE2_V1 = (28, 3, 0x0000_6000, 7, 0) # top-right (PASS)
|
||
TILE2_V2 = (16, 29, 0x0000_2000, 3, 7) # bottom (FAIL)
|
||
|
||
|
||
def build_tile2x2_demo_payload():
|
||
qw = []
|
||
frame_1_val = frame_1_psmct32(TILE2_FBW)
|
||
tex0_val = tex0_pack(TILE2_TBP0, TILE2_TBW, psm=0x00, tw=3, th=3) # PSMCT32 8x8
|
||
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(TILE2_TBP0, TILE2_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TILE2_TEXW, TILE2_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TILE2_TEXW * TILE2_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % TILE2_TEXW; ty = t // TILE2_TEXW
|
||
word |= (comb_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
|
||
qw.append(giftag(1, 1, 0, 15, 0x0EEE_EEEE_EEEE_EEEE)) # 15 A+D, EOP
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1)))
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(2))) # ignored in tile mode
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for (sx, sy, sz, tu, tv) in (TILE2_V0, TILE2_V1, TILE2_V2):
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(tu, tv)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, sz)))
|
||
return qw
|
||
|
||
|
||
tile2_demo_payload = build_tile2x2_demo_payload()
|
||
tile2_demo_qwc = len(tile2_demo_payload)
|
||
assert tile2_demo_qwc <= 95, f"tile2x2 payload {tile2_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
tile2_demo_bootlet = build_textured_demo_bootlet_disp(tile2_demo_qwc, TILE2_DISPLAY1_HI, TILE2_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile2x2.mem", tile2_demo_bootlet,
|
||
f"Ch304 2x2 MULTI-TILE demo BIOS bootlet ({len(tile2_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 32x32; QWC={tile2_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile2x2.mem", tile2_demo_payload,
|
||
f"Ch304 2x2 MULTI-TILE demo GIF payload ({tile2_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); 8x8 texture + combined TRI spanning 2x2 tiles (crosses x=16,y=16 seams)"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch305 — MULTI-PRIMITIVE tiled scene over the 2x2 grid. A fixed LIST of 3
|
||
# combined TME+ABE+ZTE primitives, all spanning the 32x32 region (2x2 grid of
|
||
# 16x16 on-chip tiles), re-rendered per tile IN ORDER so later primitives
|
||
# depth-test / alpha-blend over earlier ones within each tile. Proves draw
|
||
# order + depth + alpha interactions across tile seams.
|
||
#
|
||
# Clear : green (TILE_CLEAR_COLOR), Z=0x4000 (gs_stub params).
|
||
# P0 (blue) : opaque (texel A=0x80 -> source-over == Cs), FLAT Z=0x5000.
|
||
# Big background triangle; blue replaces green where inside.
|
||
# P1 (red) : opaque (A=0x80), FLAT Z=0x6000 (always in front of P0).
|
||
# Smaller triangle inside P0 -> red over blue.
|
||
# P2 (white) : translucent (A=0x40 -> 50% blend), FLAT Z=0x5800.
|
||
# Larger triangle: passes vs blue(0x5000)+green(0x4000) -> blends
|
||
# light-blue / light-green; FAILS vs red(0x6000) -> red shows
|
||
# through (occluded). Drawn last -> blends over P0/clear.
|
||
#
|
||
# FLAT Z per primitive (all 3 verts equal) => no within-triangle Z gradient, so
|
||
# every depth decision is a fixed primitive-Z vs stored-Z compare -> the TB's
|
||
# software reference is an exact integer Z-buffer + source-over replay (only
|
||
# triangle EDGES are fuzzy, skipped via barycentric margins).
|
||
#
|
||
# VRAM 16 KiB: 32-row FB @ 0..0x1FFF (FBW=1, 256 B/row); 3 solid 4x4 PSMCT32
|
||
# textures @ TBP0=32/36/40 (0x2000/0x2400/0x2800). With TBW=1 the texel-row
|
||
# stride is 64 texels = 0x100 B, so a 4-tall texture OCCUPIES 4 strided rows
|
||
# = 0x400 B (the upload writes, and the sampler reads, rows at base+0/100/200/
|
||
# 300). The triangles' interpolated v reaches ~3, so the full 0x400 span IS
|
||
# sampled — the textures MUST be spaced >=0x400 apart or a later upload's
|
||
# base-row overwrites an earlier texture's row 1/2/3 and the sampler reads a
|
||
# neighbouring primitive's colour (the depth/blend path is correct; this is
|
||
# purely a VRAM-layout collision). 4 TBP0 units = 0x400; all fit under VRAM
|
||
# 0x4000 (FB ends 0x2000). Z on-chip (tile_z).
|
||
TMP_FBW = 1
|
||
TMP_TEXW = 4
|
||
TMP_TEXH = 4
|
||
TMP_TBP_BG = 32 # blue @ 0x2000 (rows 0x2000..0x23FF)
|
||
TMP_TBP_MID = 36 # red @ 0x2400 (rows 0x2400..0x27FF)
|
||
TMP_TBP_FG = 40 # white @ 0x2800 (rows 0x2800..0x2BFF)
|
||
TMP_TBW = 1
|
||
TMP_DISPLAY1_HI = (31 << 12) | 31 # DW=31 (32 wide), DH=31 (32 tall)
|
||
|
||
# (screen x, y, Z); UV is irrelevant (solid textures) but kept in-range [0..3].
|
||
# All three triangles cross BOTH tile seams (x=16, y=16). P1/P2 are narrower than
|
||
# P0 so P0's flanks stay pure blue. Regions (verified vs the TB's SW reference):
|
||
# blue 24 (opaque bg) / red 29 (P1 over P0) / light-blue 26 (P2 blend over blue)
|
||
# / occlusion 19 (P2 FAILS vs red -> red shows) / green control 416.
|
||
TMP_P0 = [(1, 1, 0x0000_5000), (30, 1, 0x0000_5000), (15, 30, 0x0000_5000)] # blue bg (far)
|
||
TMP_P1 = [(8, 3, 0x0000_6000), (22, 3, 0x0000_6000), (15, 20, 0x0000_6000)] # red front(near)
|
||
TMP_P2 = [(7, 9, 0x0000_5800), (23, 9, 0x0000_5800), (15, 28, 0x0000_5800)] # white blend(mid)
|
||
TMP_UV = [(0, 0), (3, 0), (0, 3)]
|
||
|
||
|
||
def solid_texel(r, g, b, a):
|
||
"""PSMCT32 ABGR: A<<24 | B<<16 | G<<8 | R (matches comb_texel/blend lanes)."""
|
||
return ((a & 0xFF) << 24) | ((b & 0xFF) << 16) | ((g & 0xFF) << 8) | (r & 0xFF)
|
||
|
||
|
||
def tmp_texture_upload(tbp0, texel):
|
||
"""A 4x4 solid-color PSMCT32 texture upload (linear, PSMCT32_SWIZZLE=0)."""
|
||
qw = []
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(tbp0, TMP_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TMP_TEXW, TMP_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TMP_TEXW * TMP_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for _ in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
word |= (texel & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
return qw
|
||
|
||
|
||
def tmp_triangle(tbp0, verts, eop, first, frame_val=None, alpha_val=None):
|
||
"""One combined TME+ABE+ZTE triangle. Self-contained state (re-issues PRIM/
|
||
ALPHA/TEST/ZBUF/TEX0 each time so draw-order doesn't depend on persistence);
|
||
FRAME only on the first. EOP set on the last triangle of the packet.
|
||
frame_val overrides FRAME_1 (default PSMCT32); the Ch308 PSMCT16 demo passes
|
||
frame_1_psmct16 so the framebuffer is 16-bit."""
|
||
if frame_val is None:
|
||
frame_val = frame_1_psmct32(TMP_FBW)
|
||
if alpha_val is None:
|
||
alpha_val = alpha_pack(0, 1, 0, 1) # source-over (default)
|
||
qw = []
|
||
tex0_val = tex0_pack(tbp0, TMP_TBW, psm=0x00, tw=2, th=2) # 4x4 PSMCT32
|
||
# A+D list: PRIM,ALPHA_1,TEST_1,ZBUF_1,TEX0_1 (5) [+FRAME_1 on first] + 3*(RGBAQ,UV,XYZ2)=9
|
||
if first:
|
||
nreg = 6 + 9 # 15
|
||
qw.append(giftag(1, eop, 0, nreg, int('E' * nreg, 16))) # nreg A+D (0xE) descriptors
|
||
qw.append(aplusd(R_FRAME_1, frame_val))
|
||
else:
|
||
nreg = 5 + 9 # 14
|
||
qw.append(giftag(1, eop, 0, nreg, int('E' * nreg, 16)))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_val)) # blend mode (default source-over)
|
||
qw.append(aplusd(R_TEST_1, test1_geq())) # ZTE GEQUAL
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(2))) # ignored in tile mode
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for i, (sx, sy, sz) in enumerate(verts):
|
||
tu, tv = TMP_UV[i]
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00))) # color from texel (DECAL)
|
||
qw.append(aplusd(R_UV, uv_data(tu, tv)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, sz)))
|
||
return qw
|
||
|
||
|
||
def build_tile_multiprim_demo_payload():
|
||
qw = []
|
||
qw += tmp_texture_upload(TMP_TBP_BG, solid_texel(0x00, 0x00, 0xFF, 0x80)) # opaque blue
|
||
qw += tmp_texture_upload(TMP_TBP_MID, solid_texel(0xFF, 0x00, 0x00, 0x80)) # opaque red
|
||
qw += tmp_texture_upload(TMP_TBP_FG, solid_texel(0xFF, 0xFF, 0xFF, 0x40)) # translucent white
|
||
qw += tmp_triangle(TMP_TBP_BG, TMP_P0, eop=0, first=True)
|
||
qw += tmp_triangle(TMP_TBP_MID, TMP_P1, eop=0, first=False)
|
||
qw += tmp_triangle(TMP_TBP_FG, TMP_P2, eop=1, first=False)
|
||
return qw
|
||
|
||
|
||
tmp_demo_payload = build_tile_multiprim_demo_payload()
|
||
tmp_demo_qwc = len(tmp_demo_payload)
|
||
assert tmp_demo_qwc <= 95, f"tile_multiprim payload {tmp_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
tmp_demo_bootlet = build_textured_demo_bootlet_disp(tmp_demo_qwc, TMP_DISPLAY1_HI, TMP_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_multiprim.mem", tmp_demo_bootlet,
|
||
f"Ch305 MULTI-PRIMITIVE tiled-scene BIOS bootlet ({len(tmp_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 32x32; QWC={tmp_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_multiprim.mem", tmp_demo_payload,
|
||
f"Ch305 MULTI-PRIMITIVE tiled-scene GIF payload ({tmp_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); 3 solid 4x4 textures + 3 combined tris (blue bg / red front / "
|
||
f"white blend) rendered as a LIST per tile over a 2x2 grid"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch306 — GS SCISSOR clipping in the tiled renderer. Same 3-primitive scene as
|
||
# Ch305 (multi-primitive 2x2 grid) PLUS a SCISSOR_1 rectangle that clips the
|
||
# whole scene. Effective raster bounds = primitive bbox INTERSECT tile bbox
|
||
# INTERSECT scissor rect. Pixels outside the scissor stay the CLEAR color (the
|
||
# walker never visits them -> no color/Z write). The rect crosses BOTH tile
|
||
# seams (x=16, y=16) so the clip is proven across tiles.
|
||
# Scissor (inclusive) = [SCAX0..SCAX1] x [SCAY0..SCAY1] = [9..22] x [6..20].
|
||
# Crosses x=16 (9<16<22) and y=16 (6<16<20); clips a FAT chunk of the scene
|
||
# (the wide blue band/top, the sides, and the apex) so the clip is substantial
|
||
# on every side while keeping the red/blend interactions inside. (Needs
|
||
# SCISSOR_ENABLE=1 on the core.)
|
||
TSC_SX0, TSC_SX1, TSC_SY0, TSC_SY1 = 9, 22, 6, 20
|
||
|
||
|
||
def build_tile_scissor_demo_payload():
|
||
qw = []
|
||
qw += tmp_texture_upload(TMP_TBP_BG, solid_texel(0x00, 0x00, 0xFF, 0x80)) # opaque blue
|
||
qw += tmp_texture_upload(TMP_TBP_MID, solid_texel(0xFF, 0x00, 0x00, 0x80)) # opaque red
|
||
qw += tmp_texture_upload(TMP_TBP_FG, solid_texel(0xFF, 0xFF, 0xFF, 0x40)) # translucent white
|
||
# SCISSOR_1 is GS state: set it once (its own 1-A+D GIF tag) before the prims.
|
||
qw.append(giftag(1, 0, 0, 1, int('E', 16)))
|
||
qw.append(aplusd(R_SCISSOR_1, scissor_pack(TSC_SX0, TSC_SX1, TSC_SY0, TSC_SY1)))
|
||
qw += tmp_triangle(TMP_TBP_BG, TMP_P0, eop=0, first=True)
|
||
qw += tmp_triangle(TMP_TBP_MID, TMP_P1, eop=0, first=False)
|
||
qw += tmp_triangle(TMP_TBP_FG, TMP_P2, eop=1, first=False)
|
||
return qw
|
||
|
||
|
||
tsc_demo_payload = build_tile_scissor_demo_payload()
|
||
tsc_demo_qwc = len(tsc_demo_payload)
|
||
assert tsc_demo_qwc <= 95, f"tile_scissor payload {tsc_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
tsc_demo_bootlet = build_textured_demo_bootlet_disp(tsc_demo_qwc, TMP_DISPLAY1_HI, TMP_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_scissor.mem", tsc_demo_bootlet,
|
||
f"Ch306 SCISSOR-clipped tiled-scene BIOS bootlet ({len(tsc_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 32x32; QWC={tsc_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_scissor.mem", tsc_demo_payload,
|
||
f"Ch306 SCISSOR-clipped tiled-scene GIF payload ({tsc_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); Ch305 3-prim scene + SCISSOR_1 [{TSC_SX0}..{TSC_SX1}]x[{TSC_SY0}..{TSC_SY1}]"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch307 — GS texture WRAP MODES (REPEAT + CLAMP). Two textured combined prims
|
||
# over the tiled path, sampling the SAME striped texture with UV running 0..8
|
||
# (2x the 4-wide texture) so out-of-range U exercises the wrap mode:
|
||
# Texture 4x4 PSMCT32 (opaque A=0x80): columns u=0,1 WHITE, u=2,3 BLUE.
|
||
# REPEAT prim (top band): CLAMP_1 WMS=WMT=0 -> white stripe at u=0 AND u=4
|
||
# (texture tiles) -> TWO white stripes.
|
||
# CLAMP prim (bottom band): CLAMP_1 WMS=WMT=1 -> white at u=0..1 only, u>=2
|
||
# sticks to the blue edge -> ONE white stripe.
|
||
# Both span x2..29 (cross the x=16 tile seam); flat Z=0x5000 (opaque over the
|
||
# green clear). Distinguishing region u in [4,6): REPEAT white, CLAMP blue.
|
||
# (Needs TEX_WRAP_ENABLE=1 on the core.)
|
||
TWR_TBP0 = 32 # texture @ 0x2000 (above the 32-row FB)
|
||
TWR_TBW = 1
|
||
TWR_TEXW = 4
|
||
TWR_TEXH = 4
|
||
TWR_Z = 0x0000_5000
|
||
WHITE_TEX = solid_texel(0xFF, 0xFF, 0xFF, 0x80) # ABGR opaque white
|
||
BLUE_TEX = solid_texel(0x00, 0x00, 0xFF, 0x80) # ABGR opaque blue
|
||
# REPEAT prim: right-triangle, u 0->8 along x (v_tex=1 constant). top band.
|
||
TWR_RV = [(2, 4, TWR_Z), (29, 4, TWR_Z), (2, 15, TWR_Z)]
|
||
TWR_RUV = [(0, 1), (8, 1), (0, 1)]
|
||
# CLAMP prim: same shape, bottom band.
|
||
TWR_CV = [(2, 17, TWR_Z), (29, 17, TWR_Z), (2, 28, TWR_Z)]
|
||
TWR_CUV = [(0, 1), (8, 1), (0, 1)]
|
||
|
||
|
||
def wrap_texel(tx, ty):
|
||
return WHITE_TEX if tx <= 1 else BLUE_TEX
|
||
|
||
|
||
def twr_texture_upload(tbp0):
|
||
qw = []
|
||
qw.append(giftag(1, 0, 0, 4, int('E' * 4, 16))) # 4 A+D regs: BITBLTBUF,TRXPOS,TRXREG,TRXDIR
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(tbp0, TWR_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TWR_TEXW, TWR_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TWR_TEXW * TWR_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % TWR_TEXW; ty = t // TWR_TEXW
|
||
word |= (wrap_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
return qw
|
||
|
||
|
||
def twr_triangle(verts, uv3, wms, wmt, eop, first):
|
||
qw = []
|
||
tex0_val = tex0_pack(TWR_TBP0, TWR_TBW, psm=0x00, tw=2, th=2) # 4x4 PSMCT32
|
||
if first:
|
||
nreg = 7 + 9 # FRAME,PRIM,ALPHA,TEST,ZBUF,TEX0,CLAMP (7) + 9
|
||
qw.append(giftag(1, eop, 0, nreg, int('E' * nreg, 16)))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_psmct32(TMP_FBW)))
|
||
else:
|
||
nreg = 6 + 9 # PRIM,ALPHA,TEST,ZBUF,TEX0,CLAMP (6) + 9
|
||
qw.append(giftag(1, eop, 0, nreg, int('E' * nreg, 16)))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over (A=0x80 -> opaque)
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(2))) # ignored in tile mode
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
qw.append(aplusd(R_CLAMP_1, clamp_pack(wms, wmt))) # the wrap mode under test
|
||
for i, (sx, sy, sz) in enumerate(verts):
|
||
tu, tv = uv3[i]
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00))) # color from texel (DECAL)
|
||
qw.append(aplusd(R_UV, uv_data(tu, tv)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, sz)))
|
||
return qw
|
||
|
||
|
||
def build_tile_wrap_demo_payload():
|
||
qw = []
|
||
qw += twr_texture_upload(TWR_TBP0)
|
||
qw += twr_triangle(TWR_RV, TWR_RUV, wms=0, wmt=0, eop=0, first=True) # REPEAT
|
||
qw += twr_triangle(TWR_CV, TWR_CUV, wms=1, wmt=1, eop=1, first=False) # CLAMP
|
||
return qw
|
||
|
||
|
||
twr_demo_payload = build_tile_wrap_demo_payload()
|
||
twr_demo_qwc = len(twr_demo_payload)
|
||
assert twr_demo_qwc <= 95, f"tile_wrap payload {twr_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
twr_demo_bootlet = build_textured_demo_bootlet_disp(twr_demo_qwc, TMP_DISPLAY1_HI, TMP_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_wrap.mem", twr_demo_bootlet,
|
||
f"Ch307 texture WRAP (repeat+clamp) BIOS bootlet ({len(twr_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 32x32; QWC={twr_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_wrap.mem", twr_demo_payload,
|
||
f"Ch307 texture WRAP GIF payload ({twr_demo_qwc} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS}); striped 4x4 tex, REPEAT tri (top, 2 stripes) + CLAMP tri (bottom, 1 stripe)"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch308 — PSMCT16 tile color buffer. The SAME Ch305 3-primitive scene, but the
|
||
# on-chip tile color RAM + the framebuffer are PSMCT16 (RGB5A1, 16-bit) instead
|
||
# of PSMCT32: FRAME_1.PSM=0x02 and DISPFB1.PSM=0x02 so the GS flush writes 16-bit
|
||
# lanes and the PCRTC scanout unpacks RGB5A1->ABGR (5-bit quantized). Proves
|
||
# tile color can be narrower than 32-bit when the frame format allows it.
|
||
# (Needs TILE_COLOR_PSMCT16=1 on the core.) VRAM 16 KiB: PSMCT16 32x32 FB =
|
||
# 0..0x7FF; PSMCT32 textures @ 0x2000/0x2400/0x2800 (Ch305 spacing) — no overlap.
|
||
|
||
|
||
def build_psmct16_demo_bootlet_disp(qwc, display1_hi, fbw):
|
||
"""Ch305 one-shot bootlet but DISPFB1.PSM=PSMCT16 so scanout reads RGB5A1."""
|
||
hi16 = (display1_hi >> 16) & 0xFFFF
|
||
lo16 = display1_hi & 0xFFFF
|
||
dispfb1_val = dispfb1_psmct16(fbw)
|
||
# PSMCT16 DISPFB1 PSM field is at bit 15+, so the value exceeds 16 bits and
|
||
# needs a full LUI+ORI load (not ORI-only like the PSMCT32 path).
|
||
df_hi16 = (dispfb1_val >> 16) & 0xFFFF
|
||
df_lo16 = dispfb1_val & 0xFFFF
|
||
assert qwc <= 0xFFFF
|
||
return [
|
||
enc_lui(1, 0x1200),
|
||
enc_lui(2, df_hi16),
|
||
enc_ori(2, 2, df_lo16),
|
||
enc_sw(2, 1, 0x0070),
|
||
enc_sw(0, 1, 0x0080),
|
||
enc_lui(2, hi16),
|
||
enc_ori(2, 2, lo16),
|
||
enc_sw(2, 1, 0x0084),
|
||
enc_ori(2, 0, 0x0001),
|
||
enc_sw(2, 1, 0x0000),
|
||
enc_lui(10, 0x1000),
|
||
enc_ori(10, 10, 0xA000),
|
||
enc_ori(11, 0, 0x0100),
|
||
enc_sw(11, 10, 0x0010),
|
||
enc_ori(11, 0, qwc),
|
||
enc_sw(11, 10, 0x0020),
|
||
enc_ori(11, 0, 0x0001),
|
||
enc_sw(11, 10, 0x0000),
|
||
enc_syscall(),
|
||
]
|
||
|
||
|
||
def build_tile_psmct16_demo_payload():
|
||
# FRAME_1.PSM stays PSMCT32: this core's COMBINED tile path is gated on a
|
||
# PSMCT32 dest (frame_1_q[29:24]==0), so a PSMCT16 FRAME would disqualify the
|
||
# primitives from the combined classification. The PSMCT16-ness lives in the
|
||
# on-chip tile RAM + the flush (both gated by TILE_COLOR_PSMCT16, independent of
|
||
# FRAME.PSM) and in DISPFB1.PSM=PSMCT16 (so scanout unpacks the flushed RGB5A1).
|
||
qw = []
|
||
qw += tmp_texture_upload(TMP_TBP_BG, solid_texel(0x00, 0x00, 0xFF, 0x80)) # opaque blue
|
||
qw += tmp_texture_upload(TMP_TBP_MID, solid_texel(0xFF, 0x00, 0x00, 0x80)) # opaque red
|
||
qw += tmp_texture_upload(TMP_TBP_FG, solid_texel(0xFF, 0xFF, 0xFF, 0x40)) # translucent white
|
||
qw += tmp_triangle(TMP_TBP_BG, TMP_P0, eop=0, first=True) # FRAME PSMCT32 (combined eligibility)
|
||
qw += tmp_triangle(TMP_TBP_MID, TMP_P1, eop=0, first=False)
|
||
qw += tmp_triangle(TMP_TBP_FG, TMP_P2, eop=1, first=False)
|
||
return qw
|
||
|
||
|
||
t16_demo_payload = build_tile_psmct16_demo_payload()
|
||
t16_demo_qwc = len(t16_demo_payload)
|
||
assert t16_demo_qwc <= 95, f"tile_psmct16 payload {t16_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
t16_demo_bootlet = build_psmct16_demo_bootlet_disp(t16_demo_qwc, TMP_DISPLAY1_HI, TMP_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_psmct16.mem", t16_demo_bootlet,
|
||
f"Ch308 PSMCT16 tile-color BIOS bootlet ({len(t16_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 32x32 PSMCT16; QWC={t16_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_psmct16.mem", t16_demo_payload,
|
||
f"Ch308 PSMCT16 tile-color GIF payload ({t16_demo_qwc} qwords active at byte 0x100, "
|
||
f"padded to {RAM_TOTAL_QWORDS}); Ch305 3-prim scene with FRAME_1.PSM=PSMCT16 (16-bit framebuffer)"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch309 — GS ALPHA mode expansion. The Ch305 3-primitive scene, but P1 uses the
|
||
# ADDITIVE blend mode instead of source-over, so two visibly-different blend
|
||
# equations coexist:
|
||
# P0 (blue bg) : source-over A=Cs,B=Cd,C=As,D=Cd (opaque, As=0x80).
|
||
# P1 (red, additive): A=Cs,B=0,C=FIX(0x80),D=Cd -> Cv=Cs+Cd (clamped). Over the
|
||
# blue bg this BRIGHTENS to magenta (255,0,255); a glow/particle
|
||
# style add. In front (Z=0x6000).
|
||
# P2 (white, src-over translucent A=0x40): blends light-blue over the blue bg
|
||
# where it passes depth (occluded by P1's nearer Z).
|
||
# Proves additive + FIX participate, source-over unchanged, depth-fail suppresses.
|
||
# (Needs ALPHA_MODES_ENABLE=1 on the core.)
|
||
ALPHA_ADDITIVE = alpha_pack(0, 2, 2, 1, fix=0x80) # A=Cs B=0 C=FIX D=Cd, FIX=0x80
|
||
|
||
|
||
def build_tile_alpha_demo_payload():
|
||
qw = []
|
||
qw += tmp_texture_upload(TMP_TBP_BG, solid_texel(0x00, 0x00, 0xFF, 0x80)) # opaque blue
|
||
qw += tmp_texture_upload(TMP_TBP_MID, solid_texel(0xFF, 0x00, 0x00, 0x80)) # red (additive src)
|
||
qw += tmp_texture_upload(TMP_TBP_FG, solid_texel(0xFF, 0xFF, 0xFF, 0x40)) # translucent white
|
||
qw += tmp_triangle(TMP_TBP_BG, TMP_P0, eop=0, first=True) # source-over
|
||
qw += tmp_triangle(TMP_TBP_MID, TMP_P1, eop=0, first=False, alpha_val=ALPHA_ADDITIVE) # ADDITIVE
|
||
qw += tmp_triangle(TMP_TBP_FG, TMP_P2, eop=1, first=False) # source-over
|
||
return qw
|
||
|
||
|
||
tal_demo_payload = build_tile_alpha_demo_payload()
|
||
tal_demo_qwc = len(tal_demo_payload)
|
||
assert tal_demo_qwc <= 95, f"tile_alpha payload {tal_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
tal_demo_bootlet = build_textured_demo_bootlet_disp(tal_demo_qwc, TMP_DISPLAY1_HI, TMP_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_alpha.mem", tal_demo_bootlet,
|
||
f"Ch309 ALPHA-mode (additive) BIOS bootlet ({len(tal_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 32x32; QWC={tal_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_alpha.mem", tal_demo_payload,
|
||
f"Ch309 ALPHA-mode GIF payload ({tal_demo_qwc} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS}); Ch305 scene with P1 ADDITIVE (FIX=0x80) + P0/P2 source-over"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch310 — BILINEAR filtering in the combined tile path. Two textured triangles
|
||
# sampling the SAME 4x4 CHECKER texture, MAGNIFIED (UV 0..4 over a ~12px-wide
|
||
# triangle, ~3 px/texel, so the affine interp produces fractional U/V), with
|
||
# different TEX1.MMAG:
|
||
# LEFT tri (TEX1.MMAG=0, NEAREST): blocky checker (only the 2 texel colors).
|
||
# RIGHT tri (TEX1.MMAG=1, LINEAR): smoothed checker (midtone gradients at
|
||
# texel boundaries -> bilinear visible).
|
||
# Checker = BLUE (0,0,255) / WHITE (255,255,255), opaque A=0x80 (source-over ->
|
||
# texel shown). (Needs BILINEAR_ENABLE=1 on the core.)
|
||
BIL_TBP0 = 32
|
||
BIL_TBW = 1
|
||
BIL_TW = 4
|
||
BIL_TH = 4
|
||
BIL_CB = solid_texel(0x00, 0x00, 0xFF, 0x80) # blue
|
||
BIL_CW = solid_texel(0xFF, 0xFF, 0xFF, 0x80) # white
|
||
BIL_Z = 0x0000_5000
|
||
# magnified: UV 0..4 across each ~12-wide / ~22-tall triangle.
|
||
BIL_LV = [(2, 4, BIL_Z), (14, 4, BIL_Z), (2, 26, BIL_Z)] # left (nearest)
|
||
BIL_RV = [(18, 4, BIL_Z), (30, 4, BIL_Z), (18, 26, BIL_Z)] # right (bilinear)
|
||
BIL_UV = [(0, 0), (4, 0), (0, 4)]
|
||
|
||
|
||
def bil_checker_texel(tx, ty):
|
||
return BIL_CW if ((tx + ty) & 1) else BIL_CB
|
||
|
||
|
||
def bil_texture_upload(tbp0):
|
||
qw = []
|
||
qw.append(giftag(1, 0, 0, 4, int('E' * 4, 16)))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(tbp0, BIL_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(BIL_TW, BIL_TH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (BIL_TW * BIL_TH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i * 4 + lane; tx = t % BIL_TW; ty = t // BIL_TW
|
||
word |= (bil_checker_texel(tx, ty) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
return qw
|
||
|
||
|
||
def bil_triangle(verts, mmag, eop, first):
|
||
qw = []
|
||
tex0_val = tex0_pack(BIL_TBP0, BIL_TBW, psm=0x00, tw=2, th=2) # 4x4 PSMCT32
|
||
# A+D: PRIM,ALPHA,TEST,ZBUF,TEX0,TEX1 (6) [+FRAME on first] + 3*(RGBAQ,UV,XYZ2)=9
|
||
nreg = (7 if first else 6) + 9
|
||
qw.append(giftag(1, eop, 0, nreg, int('E' * nreg, 16)))
|
||
if first:
|
||
qw.append(aplusd(R_FRAME_1, frame_1_psmct32(TMP_FBW)))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over (opaque texel)
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(2)))
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
qw.append(aplusd(R_TEX1_1, tex1_pack(mmag))) # NEAREST(0) vs LINEAR(1)
|
||
for i, (sx, sy, sz) in enumerate(verts):
|
||
tu, tv = BIL_UV[i]
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(tu, tv)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, sz)))
|
||
return qw
|
||
|
||
|
||
def build_tile_bilinear_demo_payload():
|
||
qw = []
|
||
qw += bil_texture_upload(BIL_TBP0)
|
||
qw += bil_triangle(BIL_LV, mmag=0, eop=0, first=True) # LEFT nearest
|
||
qw += bil_triangle(BIL_RV, mmag=1, eop=1, first=False) # RIGHT bilinear
|
||
return qw
|
||
|
||
|
||
bil_demo_payload = build_tile_bilinear_demo_payload()
|
||
bil_demo_qwc = len(bil_demo_payload)
|
||
assert bil_demo_qwc <= 95, f"tile_bilinear payload {bil_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
bil_demo_bootlet = build_textured_demo_bootlet_disp(bil_demo_qwc, TMP_DISPLAY1_HI, TMP_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_bilinear.mem", bil_demo_bootlet,
|
||
f"Ch310 BILINEAR (nearest vs linear) BIOS bootlet ({len(bil_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 32x32; QWC={bil_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_bilinear.mem", bil_demo_payload,
|
||
f"Ch310 BILINEAR GIF payload ({bil_demo_qwc} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS}); 4x4 blue/white checker, LEFT tri TEX1.MMAG=0 nearest / RIGHT MMAG=1 bilinear, magnified"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch314 — BILINEAR for a PALETTIZED (PSMT8 indexed) texture. The Ch310 two-triangle
|
||
# scene (LEFT MMAG=0 nearest, RIGHT MMAG=1 bilinear) but the texture is a 4x4 PSMT8
|
||
# INDEX checker (idx 0/1) + a CLUT (0=blue, 1=white). Each bilinear tap fetches an
|
||
# INDEX, CLUTs it to a color, then the 4 COLORS are interpolated (CLUT-before-interp)
|
||
# — so the right triangle shows interpolated blue<->white midtones, NOT blocky steps
|
||
# and NOT garbage from interpolating indices. VRAM (16 KiB): FB 32x32 PSMCT32 @ 0..0xFFF,
|
||
# PSMT8 index texture @ TBP0=32 (0x2000), CLUT @ CBP=36 (0x2400).
|
||
PB_TBP0 = 32 # 0x2000 (above the 0x1000 FB)
|
||
PB_TBW = 1
|
||
PB_TW = 4
|
||
PB_TH = 4
|
||
PB_CBP = 36 # 0x2400 (256-B units)
|
||
PB_Z = 0x0000_5000
|
||
PB_LV = [(2, 4, PB_Z), (14, 4, PB_Z), (2, 26, PB_Z)] # left (nearest)
|
||
PB_RV = [(18, 4, PB_Z), (30, 4, PB_Z), (18, 26, PB_Z)] # right (bilinear)
|
||
PB_UV = [(0, 0), (4, 0), (0, 4)]
|
||
|
||
|
||
def pb_palette(i):
|
||
# 0=blue, 1=white (A=0x80, source-over over the green clear — mirrors BIL).
|
||
if i == 0: return solid_texel(0x00, 0x00, 0xFF, 0x80) # blue
|
||
elif i == 1: return solid_texel(0xFF, 0xFF, 0xFF, 0x80) # white
|
||
else: return solid_texel(0x7F, 0x7F, 0x7F, 0x80)
|
||
|
||
|
||
def pb_index(tx, ty):
|
||
return (tx + ty) & 1 # blue/white index checker
|
||
|
||
|
||
def pb_tex0(cld):
|
||
# PSMT8 (0x13) 4x4 texture + CLUT-side fields so the TEX0 commit fires a
|
||
# VRAM->CLUT load when cld=1. Mirrors tex0_clut_pack but tw=th=2 (4x4).
|
||
v = tex0_pack(PB_TBP0, PB_TBW, psm=0x13, tw=2, th=2)
|
||
v |= (PB_CBP & 0x3FFF) << 37
|
||
v |= (0 & 0xF) << 51 # CPSM=PSMCT32
|
||
v |= (1 & 0x1) << 55 # CSM
|
||
v |= (0 & 0x1F) << 56 # CSA
|
||
v |= (cld & 0x7) << 61 # CLD: 1 -> load CLUT on commit
|
||
return v
|
||
|
||
|
||
def pb_clut_upload():
|
||
# BITBLT 8 PSMCT32 CLUT entries to VRAM[CBP*256] (texture uses 0/1).
|
||
qw = []
|
||
n_clut = 8
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(PB_CBP, 1, 0))) # DPSM=PSMCT32
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(n_clut, 1)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
qw.append(giftag(n_clut // 4, 0, 2, 0, 0)) # IMAGE
|
||
for i in range(n_clut // 4):
|
||
word = 0
|
||
for lane in range(4):
|
||
word |= (pb_palette(i * 4 + lane) & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
return qw
|
||
|
||
|
||
def pb_index_upload():
|
||
# BITBLT the 4x4 PSMT8 index texture to VRAM[TBP0*256] (16 indices, 1 qword).
|
||
qw = []
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(PB_TBP0, PB_TBW, 0x13))) # DPSM=PSMT8
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(PB_TW, PB_TH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_idx = PB_TW * PB_TH
|
||
qw.append(giftag(n_idx // 16, 0, 2, 0, 0))
|
||
for q in range(n_idx // 16):
|
||
word = 0
|
||
for lane in range(16):
|
||
t = q * 16 + lane; tx = t % PB_TW; ty = t // PB_TW
|
||
word |= (pb_index(tx, ty) & 0xFF) << (8 * lane)
|
||
qw.append(word)
|
||
return qw
|
||
|
||
|
||
def pb_triangle(verts, mmag, cld, eop, first):
|
||
qw = []
|
||
nreg = (7 if first else 6) + 9
|
||
qw.append(giftag(1, eop, 0, nreg, int('E' * nreg, 16)))
|
||
if first:
|
||
qw.append(aplusd(R_FRAME_1, frame_1_psmct32(TMP_FBW)))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(2)))
|
||
qw.append(aplusd(R_TEX0_1, pb_tex0(cld))) # PSMT8 + (first) CLUT load
|
||
qw.append(aplusd(R_TEX1_1, tex1_pack(mmag))) # NEAREST(0) vs LINEAR(1)
|
||
for i, (sx, sy, sz) in enumerate(verts):
|
||
tu, tv = PB_UV[i]
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(tu, tv)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, sz)))
|
||
return qw
|
||
|
||
|
||
def build_tile_palbilinear_demo_payload():
|
||
qw = []
|
||
qw += pb_clut_upload()
|
||
qw += pb_index_upload()
|
||
qw += pb_triangle(PB_LV, mmag=0, cld=1, eop=0, first=True) # LEFT nearest (loads CLUT)
|
||
qw += pb_triangle(PB_RV, mmag=1, cld=0, eop=1, first=False) # RIGHT bilinear
|
||
return qw
|
||
|
||
|
||
pb_demo_payload = build_tile_palbilinear_demo_payload()
|
||
pb_demo_qwc = len(pb_demo_payload)
|
||
assert pb_demo_qwc <= 95, f"tile_palbilinear payload {pb_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
pb_demo_bootlet = build_textured_demo_bootlet_disp(pb_demo_qwc, TMP_DISPLAY1_HI, TMP_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_palbilinear.mem", pb_demo_bootlet,
|
||
f"Ch314 PALETTIZED BILINEAR BIOS bootlet ({len(pb_demo_bootlet)} words active, "
|
||
f"padded to {BIOS_TOTAL_WORDS}); DISPLAY1 = 32x32; QWC={pb_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_palbilinear.mem", pb_demo_payload,
|
||
f"Ch314 PALETTIZED BILINEAR GIF payload ({pb_demo_qwc} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS}); CLUT(blue/white) + 4x4 PSMT8 index checker, LEFT MMAG=0 nearest / RIGHT MMAG=1 bilinear (CLUT-before-interp)"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch311 — per-tile BIN BUFFER demo. Three primitives with deliberately different
|
||
# tile coverage so the bins are checkable:
|
||
# P0 blue bg (Z=0x5000): big triangle, bbox spans ALL 4 tiles -> in every bin.
|
||
# P1 red (Z=0x6000): bbox x8..24 y2..12 -> crosses x=16 seam but stays in
|
||
# tile-row 0 -> in tiles (0,0)+(1,0) ONLY (2 bins).
|
||
# P2 white (Z=0x5800): bbox x20..28 y20..28 -> tile (1,1) ONLY (1 bin).
|
||
# Bins (prim index P0=0,P1=1,P2=2): t0={0,1} t1={0,1} t2={0} t3={0,2}.
|
||
# (Needs BIN_BUFFER_ENABLE=1 + TILE_MULTIPRIM=1.) Same render result as the Ch305
|
||
# re-test-per-tile path; the bin buffer just precomputes the routing.
|
||
BIN_P0 = [(1, 1, 0x0000_5000), (30, 1, 0x0000_5000), (15, 30, 0x0000_5000)] # blue, all tiles
|
||
BIN_P1 = [(8, 2, 0x0000_6000), (24, 2, 0x0000_6000), (16, 12, 0x0000_6000)] # red, tiles 0,1
|
||
BIN_P2 = [(20,20, 0x0000_5800), (28,20, 0x0000_5800), (20, 28, 0x0000_5800)] # white, tile 3 only
|
||
|
||
|
||
def build_tile_bin_demo_payload():
|
||
qw = []
|
||
qw += tmp_texture_upload(TMP_TBP_BG, solid_texel(0x00, 0x00, 0xFF, 0x80)) # opaque blue
|
||
qw += tmp_texture_upload(TMP_TBP_MID, solid_texel(0xFF, 0x00, 0x00, 0x80)) # opaque red
|
||
qw += tmp_texture_upload(TMP_TBP_FG, solid_texel(0xFF, 0xFF, 0xFF, 0x40)) # translucent white
|
||
qw += tmp_triangle(TMP_TBP_BG, BIN_P0, eop=0, first=True) # prim 0
|
||
qw += tmp_triangle(TMP_TBP_MID, BIN_P1, eop=0, first=False) # prim 1
|
||
qw += tmp_triangle(TMP_TBP_FG, BIN_P2, eop=1, first=False) # prim 2
|
||
return qw
|
||
|
||
|
||
bn_demo_payload = build_tile_bin_demo_payload()
|
||
bn_demo_qwc = len(bn_demo_payload)
|
||
assert bn_demo_qwc <= 95, f"tile_bin payload {bn_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
bn_demo_bootlet = build_textured_demo_bootlet_disp(bn_demo_qwc, TMP_DISPLAY1_HI, TMP_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_bin.mem", bn_demo_bootlet,
|
||
f"Ch311 BIN-BUFFER BIOS bootlet ({len(bn_demo_bootlet)} words active, padded to "
|
||
f"{BIOS_TOTAL_WORDS}); DISPLAY1 = 32x32; QWC={bn_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_bin.mem", bn_demo_payload,
|
||
f"Ch311 BIN-BUFFER GIF payload ({bn_demo_qwc} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS}); P0 all-tiles / P1 2-tiles / P2 1-tile -> bins t0{{0,1}} t1{{0,1}} t2{{0}} t3{{0,2}}"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch312 — scale the tiled renderer to a 4x4 grid (16 tiles, 16x16 each = 64x64).
|
||
# Same bin-buffer mechanism (BIN_BUFFER_ENABLE), just TILE_COLS=TILE_ROWS=4. Three
|
||
# prims chosen to stress the larger grid + leave empty tiles:
|
||
# P0 blue (Z5000): top-left, bbox tiles (0,0)(1,0)(0,1)(1,1) = t0,1,4,5 (4 tiles)
|
||
# P1 red (Z6000): mid, crosses x=16/32/48 & y=16/32 seams, bbox tiles
|
||
# (1,1)(2,1)(3,1)(1,2)(2,2)(3,2) = t5,6,7,9,10,11 (6 tiles)
|
||
# P2 white(Z5800): bottom-right corner tile (3,3) = t15 ONLY (1 tile)
|
||
# EMPTY tiles (no prim bbox): t2,3,8,12,13,14 -> stay clear.
|
||
# 64x64 FB PSMCT32 = 0x4000 (fills 16 KiB) -> VRAM 32 KiB, textures @ 0x4000+.
|
||
B4_FBW = 1
|
||
B4_DISPLAY1_HI = (63 << 12) | 63 # DW=63 (64 wide), DH=63 (64 tall)
|
||
B4_TBP0_BG, B4_TBP0_MID, B4_TBP0_FG = 64, 68, 72 # 0x4000 / 0x4400 / 0x4800
|
||
B4_P0 = [(2, 2, 0x0000_5000), (30, 2, 0x0000_5000), (2, 30, 0x0000_5000)] # blue top-left
|
||
B4_P1 = [(20,18, 0x0000_6000), (50,18, 0x0000_6000), (35, 40, 0x0000_6000)] # red mid, crosses seams
|
||
B4_P2 = [(52,52, 0x0000_5800), (60,52, 0x0000_5800), (52, 60, 0x0000_5800)] # white corner tile
|
||
|
||
|
||
def b4_texture_upload(tbp0, texel):
|
||
# 4x4 solid PSMCT32 upload (reuses TMP texture size; placed above the 64x64 FB).
|
||
qw = []
|
||
qw.append(giftag(1, 0, 0, 4, int('E' * 4, 16)))
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(tbp0, TMP_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TMP_TEXW, TMP_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TMP_TEXW * TMP_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0))
|
||
for _ in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
word |= (texel & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
return qw
|
||
|
||
|
||
def b4_triangle(tbp0, verts, eop, first):
|
||
qw = []
|
||
tex0_val = tex0_pack(tbp0, TMP_TBW, psm=0x00, tw=2, th=2)
|
||
nreg = (6 if first else 5) + 9
|
||
qw.append(giftag(1, eop, 0, nreg, int('E' * nreg, 16)))
|
||
if first:
|
||
qw.append(aplusd(R_FRAME_1, frame_1_psmct32(B4_FBW)))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1)))
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(2)))
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for i, (sx, sy, sz) in enumerate(verts):
|
||
tu, tv = TMP_UV[i]
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(tu, tv)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, sz)))
|
||
return qw
|
||
|
||
|
||
def build_tile_bin4x4_demo_payload():
|
||
qw = []
|
||
qw += b4_texture_upload(B4_TBP0_BG, solid_texel(0x00, 0x00, 0xFF, 0x80)) # blue
|
||
qw += b4_texture_upload(B4_TBP0_MID, solid_texel(0xFF, 0x00, 0x00, 0x80)) # red
|
||
qw += b4_texture_upload(B4_TBP0_FG, solid_texel(0xFF, 0xFF, 0xFF, 0x40)) # white
|
||
qw += b4_triangle(B4_TBP0_BG, B4_P0, eop=0, first=True)
|
||
qw += b4_triangle(B4_TBP0_MID, B4_P1, eop=0, first=False)
|
||
qw += b4_triangle(B4_TBP0_FG, B4_P2, eop=1, first=False)
|
||
return qw
|
||
|
||
|
||
b4_demo_payload = build_tile_bin4x4_demo_payload()
|
||
b4_demo_qwc = len(b4_demo_payload)
|
||
assert b4_demo_qwc <= 95, f"tile_bin4x4 payload {b4_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
b4_demo_bootlet = build_textured_demo_bootlet_disp(b4_demo_qwc, B4_DISPLAY1_HI, B4_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_bin4x4.mem", b4_demo_bootlet,
|
||
f"Ch312 4x4-GRID bin-buffer BIOS bootlet ({len(b4_demo_bootlet)} words active, padded to "
|
||
f"{BIOS_TOTAL_WORDS}); DISPLAY1 = 64x64; QWC={b4_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_bin4x4.mem", b4_demo_payload,
|
||
f"Ch312 4x4-GRID bin-buffer GIF payload ({b4_demo_qwc} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS}); P0 4-tile / P1 6-tile cross-seam / P2 1-tile + empty tiles, 64x64 PSMCT32 FB"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch315 — PRIMITIVE/BIN CAPACITY scaling. The 4x4 (64x64) grid but SEVEN prims
|
||
# (vs Ch312's 3), exceeding the old FIFO/bin depth of 4. Six of them (P0..P5) all
|
||
# fall ENTIRELY inside the centre tile t5 (col1,row1 = x[16..31],y[16..31]) so that
|
||
# tile's bin holds SIX prims (occupancy 6 > old depth 4); P6 is a lone corner prim
|
||
# in t15; the other 14 tiles are empty. One shared opaque-blue texture keeps the
|
||
# payload small (subsequent prims re-issue only PRIM + 3 verts; FRAME/TEX0/ALPHA/
|
||
# TEST/ZBUF persist). Proves overlap, draw order (bin order {0..5}), full-ish + empty
|
||
# bins, and capacity past 4. Needs TILE_FIFO_DEPTH>=7 (the demo profile sets 8).
|
||
CAP_TBP0 = 64 # 0x4000 (above the 64x64 FB), shared blue texture
|
||
CAP_FBW = 1
|
||
CAP_DISPLAY1_HI = (63 << 12) | 63 # 64x64
|
||
CAP_BLUE = solid_texel(0x00, 0x00, 0xFF, 0xFF) # OPAQUE blue (order-independent color)
|
||
# Ch333 — a second "unity" texture (0x80 per channel = 1.0 in PS2 modulate fixed-point) so a
|
||
# MODULATE prim's output equals the staging RGBAQ color. Uploaded alongside blue by the setup.
|
||
CAP_TBP1 = 96 # 0x6000 — well clear of FB(0x4000) + blue texture
|
||
CAP_UNITY = solid_texel(0x80, 0x80, 0x80, 0x80) # modulate identity: (0x80 * c) >> 7 == c
|
||
# P0..P5 are SIX IDENTICAL right-triangles (same winding as the proven Ch312 prims:
|
||
# right-angle at top-left) entirely inside tile t0 [0..15]^2, at increasing Z -> they
|
||
# all bin into t0 (depth 6), draw in order, and (opaque, GEQUAL) the top one wins so
|
||
# the union is that one blue triangle. P6 is a lone corner triangle in t15 [48..63]^2.
|
||
# The deep bin is placed in t0 (the FIRST tile rendered) on purpose: a SEPARATE latent
|
||
# bug makes EMPTY tiles that precede the first non-empty tile flush black instead of
|
||
# the clear colour (never hit before — prior demos always had a prim in t0). That is
|
||
# orthogonal to capacity; keeping t0 non-empty avoids it. Identical shapes keep the SW
|
||
# image reference winding-exact; the 6-deep bin (read back from bin_prim/bin_n) is what
|
||
# proves capacity, not visual distinctness.
|
||
CAP_T0 = [(1,1),(14,1),(1,14)]
|
||
CAP_PRIMS = [
|
||
(CAP_T0, 0x0000_5000), # P0
|
||
(CAP_T0, 0x0000_5100), # P1
|
||
(CAP_T0, 0x0000_5200), # P2
|
||
(CAP_T0, 0x0000_5300), # P3
|
||
(CAP_T0, 0x0000_5400), # P4
|
||
(CAP_T0, 0x0000_5500), # P5
|
||
([(50,50),(62,50),(50,62)], 0x0000_5600), # P6 corner t15
|
||
]
|
||
|
||
|
||
def cap_triangle(verts, z, eop, first):
|
||
qw = []
|
||
nreg = (6 if first else 1) + 9 # first: FRAME+PRIM+ALPHA+TEST+ZBUF+TEX0; rest: PRIM only
|
||
qw.append(giftag(1, eop, 0, nreg, int('E' * nreg, 16)))
|
||
if first:
|
||
qw.append(aplusd(R_FRAME_1, frame_1_psmct32(CAP_FBW)))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe())) # re-issued per prim (clean vertex kick)
|
||
if first:
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1)))
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(2)))
|
||
qw.append(aplusd(R_TEX0_1, tex0_pack(CAP_TBP0, TMP_TBW, psm=0x00, tw=2, th=2)))
|
||
for i, (sx, sy) in enumerate(verts):
|
||
tu, tv = TMP_UV[i]
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(tu, tv)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, z)))
|
||
return qw
|
||
|
||
|
||
def build_tile_cap_demo_payload():
|
||
qw = []
|
||
qw += b4_texture_upload(CAP_TBP0, CAP_BLUE)
|
||
for i, (verts, z) in enumerate(CAP_PRIMS):
|
||
qw += cap_triangle(verts, z, eop=(1 if i == len(CAP_PRIMS)-1 else 0), first=(i == 0))
|
||
return qw
|
||
|
||
|
||
def build_tile_late_demo_payload():
|
||
# Ch316 — ONE prim, ONLY in the LAST tile t15 [48..63]^2; tiles t0..t14 empty
|
||
# (LEADING empties). Exercises the empty-tile-before-first-non-empty-tile path.
|
||
qw = []
|
||
qw += b4_texture_upload(CAP_TBP0, CAP_BLUE)
|
||
qw += cap_triangle([(50,50),(62,50),(50,62)], 0x0000_5000, eop=1, first=True)
|
||
return qw
|
||
|
||
|
||
late_demo_payload = build_tile_late_demo_payload()
|
||
late_demo_qwc = len(late_demo_payload)
|
||
assert late_demo_qwc <= 95, f"tile_late payload {late_demo_qwc} qwords"
|
||
late_demo_bootlet = build_textured_demo_bootlet_disp(late_demo_qwc, CAP_DISPLAY1_HI, CAP_FBW)
|
||
write_bios_mem("bios_tile_late.mem", late_demo_bootlet,
|
||
f"Ch316 LATE-ONLY BIOS bootlet ({len(late_demo_bootlet)} words active, padded to "
|
||
f"{BIOS_TOTAL_WORDS}); DISPLAY1 = 64x64; QWC={late_demo_qwc}")
|
||
write_payload_mem("payload_tile_late.mem", late_demo_payload,
|
||
f"Ch316 LATE-ONLY GIF payload ({late_demo_qwc} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS}); 1 prim in t15 only, t0..t14 empty (leading-empty-tile traversal)")
|
||
|
||
|
||
cap_demo_payload = build_tile_cap_demo_payload()
|
||
cap_demo_qwc = len(cap_demo_payload)
|
||
assert cap_demo_qwc <= 95, f"tile_cap payload {cap_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
cap_demo_bootlet = build_textured_demo_bootlet_disp(cap_demo_qwc, CAP_DISPLAY1_HI, CAP_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_cap.mem", cap_demo_bootlet,
|
||
f"Ch315 CAPACITY (7-prim) BIOS bootlet ({len(cap_demo_bootlet)} words active, padded to "
|
||
f"{BIOS_TOTAL_WORDS}); DISPLAY1 = 64x64; QWC={cap_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_cap.mem", cap_demo_payload,
|
||
f"Ch315 CAPACITY GIF payload ({cap_demo_qwc} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS}); 7 prims: P0..P5 all in centre tile t5 (bin depth 6 > old 4), P6 corner t15"
|
||
)
|
||
|
||
# ===== Ch328 1b — DEPTH-64 CAPACITY PROOF: 18 FLAT-blue tris stacked in tile t0 (bin depth 18 > 16) =====
|
||
# Flat (TME=0) opaque-blue triangles: RGBAQ set once (persists -> uniform blue verts), so each prim
|
||
# costs only PRIM + 3*XYZ2 (~5 qwords) — fits 18 prims in <95 qwords (a textured scene maxes ~15).
|
||
# Exercises the TRI path + the M20K grad-prefetch (uniform color gradients) at a bin depth that the
|
||
# old register FIFO (depth-4/8, ~600 ALM/1033 reg per slot) could never have held. All 18 inside t0
|
||
# [0..15]^2 (the FIRST tile -> avoids the leading-empty-tile bug); other 15 tiles empty.
|
||
CAP64_N = 18
|
||
# COMBINED-textured tris (TME+ABE+GEQUAL Z) — the proven tile-multiprim grid path (tile_active =
|
||
# ras_combined; a non-combined/flat prim would drop tile_active and stall the grid walk). Shared
|
||
# UV/RGBAQ/TEX0/ALPHA/TEST/ZBUF set ONCE (persist) so each prim costs only PRIM + 3*XYZ2 (~5 qw),
|
||
# fitting 18 in ~107 qw (a per-vert-UV combined tri like Ch315's would be ~213 qw). All verts UV is
|
||
# one texel of the solid-blue texture -> uniform blue, order-independent.
|
||
def cap64_triangle(z, first, eop):
|
||
qw = []
|
||
setup = 7 if first else 0 # FRAME+ALPHA+TEST+ZBUF+TEX0+RGBAQ+UV (first prim only; they persist)
|
||
nreg = 1 + setup + 3 # PRIM + setup + 3*XYZ2
|
||
qw.append(giftag(1, eop, 0, nreg, int('E' * nreg, 16)))
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe())) # re-issued per prim (clean vertex kick)
|
||
if first:
|
||
qw.append(aplusd(R_FRAME_1, frame_1_psmct32(CAP_FBW)))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1))) # source-over
|
||
qw.append(aplusd(R_TEST_1, test1_geq())) # GEQUAL Z
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(2)))
|
||
qw.append(aplusd(R_TEX0_1, tex0_pack(CAP_TBP0, TMP_TBW, psm=0x00, tw=2, th=2)))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(TMP_UV[0][0], TMP_UV[0][1]))) # shared UV -> solid blue
|
||
for (sx, sy) in CAP_T0:
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, z)))
|
||
return qw
|
||
def build_tile_cap64_demo_payload():
|
||
qw = []
|
||
qw += b4_texture_upload(CAP_TBP0, CAP_BLUE)
|
||
for i in range(CAP64_N):
|
||
qw += cap64_triangle(0x0000_5000 + i*0x100, first=(i == 0), eop=(1 if i == CAP64_N-1 else 0))
|
||
return qw
|
||
|
||
# ===== Ch329 Bug 1 — 18 NON-COMBINED SPRITES in tile t0 (multiprim-grid refusal proof) =====
|
||
# Sprites are never combined (no tile-local color/Z path), so the multiprim grid must REFUSE them
|
||
# cleanly (tile_refused_count==18, grid completes, NO stall) rather than freeze the bin-walk. All 18
|
||
# sit in t0; shared FRAME/RGBAQ so each prim is just PRIM + 2*XYZ2 (~4 qw).
|
||
def build_tile_sprite18_demo_payload():
|
||
qw = []
|
||
for i in range(CAP64_N):
|
||
first = (i == 0)
|
||
eop = (i == CAP64_N - 1)
|
||
nreg = (1 + (2 if first else 0)) + 2 # PRIM + [FRAME+RGBAQ first] + 2*XYZ2
|
||
qw.append(giftag(1, 1 if eop else 0, 0, nreg, int('E' * nreg, 16)))
|
||
qw.append(aplusd(R_PRIM, PRIM_SPRITE))
|
||
if first:
|
||
qw.append(aplusd(R_FRAME_1, frame_1_psmct32(CAP_FBW)))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0xFF))) # blue (would-be, if rendered)
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(1, 1)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(14, 14)))
|
||
return qw
|
||
sprite18_payload = build_tile_sprite18_demo_payload()
|
||
sprite18_qwc = len(sprite18_payload)
|
||
assert sprite18_qwc <= 114, f"tile_sprite18 payload {sprite18_qwc} qwords (limit 114)"
|
||
sprite18_bootlet = build_textured_demo_bootlet_disp(sprite18_qwc, CAP_DISPLAY1_HI, CAP_FBW)
|
||
write_bios_mem("bios_tile_sprite18.mem", sprite18_bootlet,
|
||
f"Ch329 18-sprite (non-combined) refusal bootlet ({len(sprite18_bootlet)} words; QWC={sprite18_qwc})")
|
||
write_payload_mem("payload_tile_sprite18.mem", sprite18_payload,
|
||
f"Ch329 18 NON-combined sprites in t0 ({sprite18_qwc} qwords) — multiprim grid must refuse all 18")
|
||
|
||
cap64_demo_payload = build_tile_cap64_demo_payload()
|
||
cap64_demo_qwc = len(cap64_demo_payload)
|
||
# Payload loads at byte 0x100 = qword 16, so the absolute EE-RAM range is [16 .. 16+qwc). It CROSSES
|
||
# the legacy heartbeat read-splicer's qword 115 (Ch255) — harmless here because the cap64 TB gates the
|
||
# splicer OFF (HEARTBEAT_SPLICE_ENABLE=0); just keep the whole payload inside EE RAM.
|
||
assert 16 + cap64_demo_qwc <= RAM_TOTAL_QWORDS, f"tile_cap64 payload ends at qword {16+cap64_demo_qwc} > RAM {RAM_TOTAL_QWORDS}"
|
||
cap64_demo_bootlet = build_textured_demo_bootlet_disp(cap64_demo_qwc, CAP_DISPLAY1_HI, CAP_FBW)
|
||
write_bios_mem(
|
||
"bios_tile_cap64.mem", cap64_demo_bootlet,
|
||
f"Ch328 DEPTH-64 capacity bootlet ({len(cap64_demo_bootlet)} words; DISPLAY1=64x64; QWC={cap64_demo_qwc})"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_cap64.mem", cap64_demo_payload,
|
||
f"Ch328 DEPTH-64 capacity ({cap64_demo_qwc} qwords): {CAP64_N} flat-blue tris all in tile t0 "
|
||
f"(bin depth {CAP64_N} > 16 — impossible at the old register-FIFO per-slot cost)"
|
||
)
|
||
|
||
|
||
# ===== Ch330 Brick 2 — runtime-feeder image-equivalence fixtures =====
|
||
# SETUP-ONLY payload: upload the blue texture to CAP_TBP0 then EOP — NO baked tris (the feeder
|
||
# draws the prims in phase 1; a baked tri here would double-render). And feeder_stg_cap4.mem:
|
||
# the SAME 4 combined-TAZ tris (CAP_T0, increasing Z) as a normalized feeder list. Both reference
|
||
# the same texture/state, so the feeder render must match the proven baked combined render.
|
||
FEEDER_TRIS = [(CAP_T0, 0x0000_5000 + i*0x100) for i in range(4)] # list A: tile t0 (top-left)
|
||
FEEDER_T15 = [(49,49),(62,49),(49,62)] # tri in tile t15 (col3,row3 = bottom-right)
|
||
FEEDER_TRIS_B = [(FEEDER_T15, 0x0000_5000 + i*0x100) for i in range(4)] # list B: tile t15 (diagonal opposite of t0)
|
||
|
||
# Ch331 — feeder EXPRESSIVENESS: one small triangle placed in an arbitrary 4x4-grid tile.
|
||
def tri_in_tile(t): # t = row*4 + col, 0..15
|
||
ox, oy = (t % 4) * 16, (t // 4) * 16
|
||
return [(ox+1, oy+1), (ox+14, oy+1), (ox+1, oy+14)]
|
||
def scene_from_tiles(tiles): # one prim per tile, increasing Z
|
||
return [(tri_in_tile(t), 0x0000_5000 + i*0x100) for i, t in enumerate(tiles)]
|
||
SCENE_C1_TILES = [0, 5, 10] # 3 prims (< TILE_PRIM_COUNT=4): diagonal
|
||
SCENE_C2_TILES = [0, 3, 5, 9, 12, 15] # 6 prims (> 4): scattered, both diagonals
|
||
SCENE_C3_TILES = [0, 1, 2, 3, 12, 13, 14, 15] # 8 prims (== FIFO_DEPTH): top + bottom rows
|
||
|
||
# Ch332 — second shape: a RECTANGLE (filled quad) = two textured triangles sharing a diagonal.
|
||
# (lowest-risk vocabulary expansion: no new feeder record type, just two triangle records.)
|
||
def rect_tris_in_tile(t):
|
||
ox, oy = (t % 4) * 16, (t // 4) * 16
|
||
ul = [(ox+1, oy+1), (ox+14, oy+1), (ox+1, oy+14)] # upper-left half
|
||
lr = [(ox+14, oy+1), (ox+1, oy+14), (ox+14, oy+14)] # lower-right half (shares the UL diagonal)
|
||
return [ul, lr]
|
||
def scene_shapes(items): # items = [(tile,'tri'|'rect'), ...]
|
||
out = []; i = 0
|
||
for (t, kind) in items:
|
||
for v in ([tri_in_tile(t)] if kind == 'tri' else rect_tris_in_tile(t)):
|
||
out.append((v, 0x0000_5000 + i*0x100)); i += 1
|
||
return out
|
||
SHAPE_TRI = [(0,'tri'), (5,'tri'), (10,'tri')] # 3 prims — half-tile triangles
|
||
SHAPE_RECT = [(0,'rect'), (5,'rect'), (10,'rect')] # 6 prims — same tiles, FILLED quads
|
||
SHAPE_MIXED = [(0,'tri'), (5,'rect'), (10,'rect'), (15,'tri')] # 6 prims — 2 tris + 2 rects
|
||
|
||
# Ch333 — colored shapes: unity texture (CAP_TBP1) + TFX=MODULATE, so the staging RGBAQ IS the
|
||
# rendered color. items = [(tile, 'tri'|'rect', (r,g,b)), ...]; each shape's tri(s) share its color.
|
||
COL_RED, COL_GREEN, COL_BLUE, COL_YELLOW = (0xFF,0,0), (0,0xFF,0), (0,0,0xFF), (0xFF,0xFF,0)
|
||
def scene_shapes_colored(items):
|
||
out = []; i = 0
|
||
for (t, kind, rgb) in items:
|
||
for v in ([tri_in_tile(t)] if kind == 'tri' else rect_tris_in_tile(t)):
|
||
out.append((v, 0x0000_5000 + i*0x100, rgb)); i += 1
|
||
return out
|
||
COLOR_TRI = [(0,'tri',COL_RED), (5,'tri',COL_GREEN), (10,'tri',COL_BLUE)] # 3 prims, 3 colors
|
||
COLOR_RECT= [(0,'rect',COL_RED), (5,'rect',COL_GREEN), (10,'rect',COL_BLUE)] # 6 prims, 3 colors
|
||
COLOR_MIX = [(0,'tri',COL_RED), (5,'rect',COL_GREEN), (10,'tri',COL_BLUE), (15,'rect',COL_YELLOW)] # 6 prims, shape+color vary
|
||
|
||
def build_feeder_setup_payload():
|
||
# Ch333 — upload TWO textures: blue at CAP_TBP0 (DECAL anchors) + unity at CAP_TBP1 (MODULATE
|
||
# color scenes). EOP rides ONLY the last image packet; every earlier packet has eop=0.
|
||
qw = []
|
||
n_image = (TMP_TEXW * TMP_TEXH) // 4
|
||
def upload(tbp, texel, eop):
|
||
qw.append(giftag(1, 0, 0, 4, int('E' * 4, 16))) # A+D: BITBLTBUF/TRXPOS/TRXREG/TRXDIR
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(tbp, TMP_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TMP_TEXW, TMP_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
qw.append(giftag(n_image, eop, 2, 0, 0)) # IMAGE
|
||
for _ in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
word |= (texel & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
upload(CAP_TBP0, CAP_BLUE, 0) # blue — DECAL anchors
|
||
upload(CAP_TBP1, CAP_UNITY, 1) # unity — MODULATE color scenes (EOP here)
|
||
return qw
|
||
|
||
def build_feeder_staging_colored(prims): # prims = [(verts, z, (r,g,b)), ...]
|
||
w = []
|
||
w.append(len(prims)) # [0] count
|
||
w.append(frame_1_psmct32(CAP_FBW)) # [1] FRAME
|
||
w.append(alpha_pack(0, 1, 0, 1)) # [2] ALPHA
|
||
w.append(test1_geq()) # [3] TEST
|
||
w.append(zbuf1_pack(2)) # [4] ZBUF
|
||
w.append(tex0_pack(CAP_TBP1, TMP_TBW, psm=0x00, tw=2, th=2, tfx=0)) # [5] TEX0: UNITY tex + MODULATE
|
||
w.append(prim_tri_tme_abe()) # [6] PRIM
|
||
for (verts, z, (r, g, b)) in prims:
|
||
for vi, (sx, sy) in enumerate(verts):
|
||
tu, tv = TMP_UV[vi]
|
||
w.append(rgbaq_data(r, g, b)) # MODULATE: this color flows through the unity texel
|
||
w.append(uv_data(tu, tv))
|
||
w.append(xyz2_dataz(sx, sy, z))
|
||
return w
|
||
|
||
# Ch334 — NATIVE rectangle record: 3 words (RGBAQ color, corner0 XYZ2, corner1 XYZ2). The feeder
|
||
# expands one record into two colored triangles. count word = {rect_count[31:16], tri_count[15:0]}.
|
||
def rect_record(tile, rgb, z):
|
||
ox, oy = (tile % 4) * 16, (tile // 4) * 16
|
||
return [rgbaq_data(*rgb), xyz2_dataz(ox+1, oy+1, z), xyz2_dataz(ox+14, oy+14, z)]
|
||
|
||
# Ch335 — GOURAUD: per-VERTEX color, so the GS interpolates a smooth gradient across the triangle.
|
||
# prims = [(verts, z, [rgb0, rgb1, rgb2]), ...] (one rgb per vertex)
|
||
def build_feeder_staging_gouraud(prims):
|
||
w = []
|
||
w.append(len(prims))
|
||
w.append(frame_1_psmct32(CAP_FBW)); w.append(alpha_pack(0, 1, 0, 1)); w.append(test1_geq())
|
||
w.append(zbuf1_pack(2)); w.append(tex0_pack(CAP_TBP1, TMP_TBW, psm=0x00, tw=2, th=2, tfx=0)) # unity + MODULATE
|
||
w.append(prim_tri_tme_abe()) # gs_stub ignores PRIM.IIP — interp is driven by per-vertex colors
|
||
for (verts, z, cols) in prims:
|
||
for vi, (sx, sy) in enumerate(verts):
|
||
w.append(rgbaq_data(*cols[vi])); w.append(uv_data(*TMP_UV[vi])); w.append(xyz2_dataz(sx, sy, z))
|
||
return w
|
||
def build_feeder_staging_native(tris, rects): # tris=[(verts,z,rgb)], rects=[(tile,rgb)]
|
||
w = []
|
||
w.append(((len(rects) & 0xFFFF) << 16) | (len(tris) & 0xFFFF)) # [0] {rect_count, tri_count}
|
||
w.append(frame_1_psmct32(CAP_FBW))
|
||
w.append(alpha_pack(0, 1, 0, 1))
|
||
w.append(test1_geq())
|
||
w.append(zbuf1_pack(2))
|
||
w.append(tex0_pack(CAP_TBP1, TMP_TBW, psm=0x00, tw=2, th=2, tfx=0)) # unity tex + MODULATE
|
||
w.append(prim_tri_tme_abe())
|
||
for (verts, z, rgb) in tris:
|
||
for vi, (sx, sy) in enumerate(verts):
|
||
w.append(rgbaq_data(*rgb)); w.append(uv_data(*TMP_UV[vi])); w.append(xyz2_dataz(sx, sy, z))
|
||
for i, (tile, rgb) in enumerate(rects):
|
||
w.extend(rect_record(tile, rgb, 0x0000_5000 + (len(tris)+i)*0x100))
|
||
return w
|
||
|
||
def build_feeder_staging(tris):
|
||
w = []
|
||
w.append(len(tris)) # [0] count
|
||
w.append(frame_1_psmct32(CAP_FBW)) # [1] FRAME
|
||
w.append(alpha_pack(0, 1, 0, 1)) # [2] ALPHA (source-over)
|
||
w.append(test1_geq()) # [3] TEST (GEQUAL)
|
||
w.append(zbuf1_pack(2)) # [4] ZBUF
|
||
w.append(tex0_pack(CAP_TBP0, TMP_TBW, psm=0x00, tw=2, th=2)) # [5] TEX0 (-> the uploaded texture)
|
||
w.append(prim_tri_tme_abe()) # [6] PRIM (combined: TME+ABE)
|
||
for (verts, z) in tris:
|
||
for vi, (sx, sy) in enumerate(verts):
|
||
tu, tv = TMP_UV[vi]
|
||
w.append(rgbaq_data(0x00, 0x00, 0x00)) # textured -> color from texel
|
||
w.append(uv_data(tu, tv))
|
||
w.append(xyz2_dataz(sx, sy, z))
|
||
return w
|
||
|
||
def write_feeder_stg_mem(filename, words, banner, total=256):
|
||
with open(os.path.join(OUT, filename), "w") as f:
|
||
f.write(f"// {banner}\n// $readmemh into feeder_stg [0:{total-1}] (64-bit words).\n")
|
||
for x in words: f.write(f"{x & 0xFFFFFFFFFFFFFFFF:016x}\n")
|
||
for _ in range(total - len(words)): f.write(f"{0:016x}\n")
|
||
|
||
feeder_setup_payload = build_feeder_setup_payload()
|
||
feeder_setup_qwc = len(feeder_setup_payload)
|
||
feeder_setup_bootlet = build_textured_demo_bootlet_disp(feeder_setup_qwc, CAP_DISPLAY1_HI, CAP_FBW)
|
||
write_bios_mem("bios_feeder_setup.mem", feeder_setup_bootlet,
|
||
f"Ch330 SETUP-ONLY bootlet ({len(feeder_setup_bootlet)} words; DISPLAY1=64x64; QWC={feeder_setup_qwc}; texture upload, NO tris)")
|
||
write_payload_mem("payload_feeder_setup.mem", feeder_setup_payload,
|
||
f"Ch330 SETUP-ONLY GIF payload ({feeder_setup_qwc} qwords): blue-texture upload to CAP_TBP0 + EOP, no primitives")
|
||
write_feeder_stg_mem("feeder_stg_cap4.mem", build_feeder_staging(FEEDER_TRIS),
|
||
"Ch330 feeder staging A: 4 combined-TAZ tris in tile t0 (CAP_T0) — same scene as the baked combined render")
|
||
write_feeder_stg_mem("feeder_stg_cap4_B.mem", build_feeder_staging(FEEDER_TRIS_B),
|
||
"Ch330 feeder staging B (runtime swap): 4 combined-TAZ tris in tile t15 — list B for the no-RBF-rebuild retrigger demo")
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch345a — RUNTIME FEEDER SPRITE records: textured + source-over alpha SPRITEs streamed through the feeder
|
||
# (the Ch344-proven subset: PSMCT32, affine UV, ABE source-over, TCC texel alpha). The setup bootlet uploads
|
||
# the 8x8 alpha-checker texture + draws an opaque blue BG; the feeder (sprite_mode = staging word0[33]) then
|
||
# renders 3 textured-alpha sprites over it. Runtime SPRITE ingestion (NOT authentic glyphs — that is Ch345b).
|
||
def build_sprite_feeder_setup_payload():
|
||
frame_1_val = frame_1_psmct32(TEXALPHA_FBW)
|
||
qw = []
|
||
qw.append(giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE)) # texture upload A+D
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(TEXALPHA_TBP, 1, 0)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(TEXALPHA_TEXW, TEXALPHA_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (TEXALPHA_TEXW * TEXALPHA_TEXH) // 4
|
||
qw.append(giftag(n_image, 0, 2, 0, 0)) # IMAGE
|
||
for i in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
t = i*4 + lane
|
||
word |= (texalpha_texel(t % TEXALPHA_TEXW, t // TEXALPHA_TEXW) & 0xFFFFFFFF) << (32*lane)
|
||
qw.append(word)
|
||
qw.append(giftag(1, 1, 0, 5, 0x0000_0000_000E_EEEE)) # opaque blue BG sprite, EOP
|
||
qw.append(aplusd(R_PRIM, PRIM_SPRITE))
|
||
qw.append(aplusd(R_FRAME_1, frame_1_val))
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(TEXALPHA_BG_R, TEXALPHA_BG_G, TEXALPHA_BG_B)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(0, 0)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_data(63, 63)))
|
||
return qw
|
||
|
||
def build_feeder_sprite_staging(sprites):
|
||
w = []
|
||
w.append((len(sprites) & 0xFFFF) | (1 << 33)) # word0: sprite_count + sprite_mode (bit33)
|
||
w.append(frame_1_psmct32(TEXALPHA_FBW)) # FRAME
|
||
w.append(alpha_pack(0, 1, 0, 1)) # ALPHA source-over
|
||
w.append(0) # TEST_1 (ZTE=0, no depth)
|
||
w.append(0) # ZBUF_1
|
||
w.append(tex0_pack(TEXALPHA_TBP, 1, psm=0, tw=3, th=3, tfx=0)) # TEX0 8x8 PSMCT32 MODULATE
|
||
w.append(prim_sprite_tme_abe()) # PRIM SPRITE+TME+ABE
|
||
for (x0,y0,x1,y1,u0,v0,u1,v1) in sprites:
|
||
w += [rgbaq_data(0x80,0x80,0x80,0x80), uv_data(u0,v0), xyz2_data(x0,y0),
|
||
rgbaq_data(0x80,0x80,0x80,0x80), uv_data(u1,v1), xyz2_data(x1,y1)]
|
||
return w
|
||
|
||
SPRITE_FEEDER_SET = [( 8,24,24,40, 0,0,8,8), (26,24,42,40, 0,0,8,8), (44,24,60,40, 0,0,8,8)]
|
||
_spr_setup = build_sprite_feeder_setup_payload()
|
||
write_bios_mem("bios_sprite_setup.mem",
|
||
build_textured_demo_bootlet_disp(len(_spr_setup), TEXALPHA_DISPLAY1_HI, TEXALPHA_FBW),
|
||
f"Ch345a sprite-feeder SETUP bootlet; DISPLAY1=64x64; QWC={len(_spr_setup)}; alpha texture + blue BG")
|
||
write_payload_mem("payload_sprite_setup.mem", _spr_setup,
|
||
f"Ch345a sprite-feeder SETUP payload ({len(_spr_setup)} qw): 8x8 alpha texture upload + opaque blue BG sprite")
|
||
write_feeder_stg_mem("feeder_sprite.mem", build_feeder_sprite_staging(SPRITE_FEEDER_SET),
|
||
f"Ch345a feeder SPRITE staging (sprite_mode word0[33]): {len(SPRITE_FEEDER_SET)} textured-alpha sprites over the BG")
|
||
# Ch331 — variable-size multi-tile scenes (end-of-list flush): 3 / 6 / 8 prims across tiles.
|
||
write_feeder_stg_mem("feeder_scene_c1.mem", build_feeder_staging(scene_from_tiles(SCENE_C1_TILES)),
|
||
f"Ch331 scene C1: {len(SCENE_C1_TILES)} prims (<TILE_PRIM_COUNT) in tiles {SCENE_C1_TILES}")
|
||
write_feeder_stg_mem("feeder_scene_c2.mem", build_feeder_staging(scene_from_tiles(SCENE_C2_TILES)),
|
||
f"Ch331 scene C2: {len(SCENE_C2_TILES)} prims (>TILE_PRIM_COUNT) in tiles {SCENE_C2_TILES}")
|
||
write_feeder_stg_mem("feeder_scene_c3.mem", build_feeder_staging(scene_from_tiles(SCENE_C3_TILES)),
|
||
f"Ch331 scene C3: {len(SCENE_C3_TILES)} prims (==FIFO_DEPTH) in tiles {SCENE_C3_TILES}")
|
||
# Ch332 — shape vocabulary: triangle scene, rectangle (quad) scene, mixed scene (all <= FIFO_DEPTH).
|
||
write_feeder_stg_mem("feeder_shape_tri.mem", build_feeder_staging(scene_shapes(SHAPE_TRI)),
|
||
"Ch332 shape TRI: 3 half-tile triangles in tiles 0/5/10")
|
||
write_feeder_stg_mem("feeder_shape_rect.mem", build_feeder_staging(scene_shapes(SHAPE_RECT)),
|
||
"Ch332 shape RECT: 3 filled quads (2 tris each = 6 prims) in tiles 0/5/10")
|
||
write_feeder_stg_mem("feeder_shape_mixed.mem", build_feeder_staging(scene_shapes(SHAPE_MIXED)),
|
||
"Ch332 shape MIXED: triangles in 0/15 + rectangles in 5/10 (6 prims)")
|
||
# Ch333 — colored scenes (unity texture + MODULATE; color from staging RGBAQ).
|
||
write_feeder_stg_mem("feeder_color_tri.mem", build_feeder_staging_colored(scene_shapes_colored(COLOR_TRI)),
|
||
"Ch333 color TRI: red/green/blue triangles in tiles 0/5/10")
|
||
write_feeder_stg_mem("feeder_color_rect.mem", build_feeder_staging_colored(scene_shapes_colored(COLOR_RECT)),
|
||
"Ch333 color RECT: red/green/blue filled quads in tiles 0/5/10 (6 prims)")
|
||
write_feeder_stg_mem("feeder_color_mix.mem", build_feeder_staging_colored(scene_shapes_colored(COLOR_MIX)),
|
||
"Ch333 color MIX: red tri(0) + green rect(5) + blue tri(10) + yellow rect(15) — shape & color vary")
|
||
# Ch334 — NATIVE rectangle records (1 record -> 2 tris in the feeder). NATIVE_RECT must match the
|
||
# Ch333 COLOR_RECT two-triangle scene visually + by records (3 rects = 6 prims).
|
||
write_feeder_stg_mem("feeder_native_rect.mem", build_feeder_staging_native([], [(0,COL_RED),(5,COL_GREEN),(10,COL_BLUE)]),
|
||
"Ch334 native RECT: 3 native-rect records (red/green/blue quads 0/5/10) — matches color_rect, records=6")
|
||
write_feeder_stg_mem("feeder_native_mix.mem",
|
||
build_feeder_staging_native([(tri_in_tile(0), 0x0000_5000, COL_RED)], [(5,COL_GREEN),(10,COL_BLUE),(15,COL_YELLOW)]),
|
||
"Ch334 native MIX: red triangle(0) + 3 native rects (green5/blue10/yellow15), records=1+6=7")
|
||
# Ch335 — GOURAUD per-vertex color (smooth gradients). _g5 = the 2 triangles of tile-5's quad.
|
||
COL_WHITE = (0xFF, 0xFF, 0xFF)
|
||
_g5 = rect_tris_in_tile(5) # [UL=(TL,TR,BL), LR=(TR,BL,BR)]
|
||
write_feeder_stg_mem("feeder_gouraud_tri.mem",
|
||
build_feeder_staging_gouraud([(tri_in_tile(0), 0x0000_5000, [COL_RED, COL_GREEN, COL_BLUE])]),
|
||
"Ch335 gouraud TRI: tile0 triangle, v0=red v1=green v2=blue -> RGB gradient, records=1")
|
||
write_feeder_stg_mem("feeder_gouraud_rect.mem",
|
||
build_feeder_staging_gouraud([(_g5[0], 0x0000_5000, [COL_RED, COL_GREEN, COL_BLUE]),
|
||
(_g5[1], 0x0000_5100, [COL_GREEN, COL_BLUE, COL_WHITE])]),
|
||
"Ch335 gouraud RECT: tile5 quad (2 tris), corners red/green/blue/white -> gradient quad, records=2")
|
||
write_feeder_stg_mem("feeder_gouraud_mix.mem",
|
||
build_feeder_staging_gouraud([(tri_in_tile(0), 0x0000_5000, [COL_RED, COL_RED, COL_RED]),
|
||
(tri_in_tile(10), 0x0000_5100, [COL_RED, COL_GREEN, COL_BLUE])]),
|
||
"Ch335 gouraud MIX: flat red tri(0) + RGB gradient tri(10), records=2")
|
||
# Ch336 — >FIFO_DEPTH ACCUMULATION: 14 tris across tiles 0-13 (FIFO depth 8 -> 2 batches, 8+6).
|
||
# Prims 0-7 (tiles 0-7) RED = batch 0; prims 8-13 (tiles 8-13) BLUE = batch 1. If batches wipe each
|
||
# other, the RED batch-0 tiles go green; accumulation keeps RED *and* BLUE simultaneously visible.
|
||
ACCUM_PRIMS = [(tri_in_tile(t), 0x0000_5000 + t*0x100, COL_RED if t < 8 else COL_BLUE) for t in range(14)]
|
||
write_feeder_stg_mem("feeder_accum.mem", build_feeder_staging_colored(ACCUM_PRIMS),
|
||
"Ch336 accum: 14 tris (>FIFO_DEPTH); batch0 tiles 0-7 RED, batch1 tiles 8-13 BLUE -> both survive")
|
||
# Ch336 board diagnostic — SWAPPED colors: batch0 (tiles 0-7) BLUE, batch1 (tiles 8-13) RED. Localizes
|
||
# the board color bug: if the swap shows all-BLUE, the FIRST batch's color is sticking for the scene.
|
||
ACCUM_SWAP = [(tri_in_tile(t), 0x0000_5000 + t*0x100, COL_BLUE if t < 8 else COL_RED) for t in range(14)]
|
||
write_feeder_stg_mem("feeder_accum_swap.mem", build_feeder_staging_colored(ACCUM_SWAP),
|
||
"Ch336 diag: batch0 tiles 0-7 BLUE, batch1 tiles 8-13 RED (color-swapped accum)")
|
||
# Ch336 DEFINITIVE diag — batch0 BLUE, batch1 GREEN. GREEN shares NO channel with RED (the suspected
|
||
# default) or BLUE (batch0), so batch1's rendered color is unambiguous:
|
||
# GREEN bottom -> batch1 color tracks its staged value (bug not here)
|
||
# RED bottom -> batch1 falls back to a constant RED (staged color ignored)
|
||
# BLUE bottom -> batch1 reuses batch0's color
|
||
ACCUM_GREEN = [(tri_in_tile(t), 0x0000_5000 + t*0x100, COL_BLUE if t < 8 else COL_GREEN) for t in range(14)]
|
||
write_feeder_stg_mem("feeder_accum_green.mem", build_feeder_staging_colored(ACCUM_GREEN),
|
||
"Ch336 diag: batch0 tiles 0-7 BLUE, batch1 tiles 8-13 GREEN")
|
||
# Ch337 — clean-retrigger acceptance scenes. TWO distinct >FIFO_DEPTH scenes (14 prims each = 2
|
||
# batches) that occupy DIFFERENT tiles AND colors, so any leftover from a prior scene is visible:
|
||
# A: tiles 0-13 RED B: tiles 2-15 BLUE
|
||
# A then B then A: each scene's first (full-flush) batch wipes the whole FB, so the final FB must be
|
||
# EXACTLY the last scene with no residue from the other.
|
||
SCENE_A = [(tri_in_tile(t), 0x0000_5000 + t*0x100, COL_RED) for t in range(14)]
|
||
SCENE_B = [(tri_in_tile(t), 0x0000_5000 + (t-2)*0x100, COL_BLUE) for t in range(2, 16)]
|
||
write_feeder_stg_mem("feeder_scene_a.mem", build_feeder_staging_colored(SCENE_A),
|
||
"Ch337 scene A: 14 tris, tiles 0-13, RED (>FIFO_DEPTH, 2 batches)")
|
||
write_feeder_stg_mem("feeder_scene_b.mem", build_feeder_staging_colored(SCENE_B),
|
||
"Ch337 scene B: 14 tris, tiles 2-15, BLUE (>FIFO_DEPTH, 2 batches)")
|
||
|
||
# Ch338 — CROSS-BATCH Z. A NEAR (RED) and a FAR (BLUE) triangle occupy the SAME tile (tile 5) but are
|
||
# SPLIT across FIFO batches. ZBUF clear = 0x4000, TEST = GEQUAL (higher Z = nearer = wins). NEAR=0x7000,
|
||
# FAR=0x5000, fillers=0x6000 (>= clear so they draw in their own tiles). 14 prims each => batch0 = first
|
||
# 8, batch1 = last 6, so the overlap tile is rendered in BOTH batches. With persistent cross-batch Z the
|
||
# NEAR (RED) triangle wins the overlap in BOTH orderings; with per-batch Z, NEAR_FIRST wrongly shows the
|
||
# later FAR (BLUE) batch on top.
|
||
ZP_NEAR, ZP_FAR, ZP_MID = 0x0000_7000, 0x0000_5000, 0x0000_6000
|
||
_OT = 5; _B0F = [0, 1, 2, 3, 4, 6, 7]; _B1F = [8, 9, 10, 11, 12] # overlap tile + filler tiles
|
||
# NEAR_FIRST: near RED in batch0, far BLUE (same tile) in batch1 -> persistent Z must keep RED on top.
|
||
ZPERSIST_NEAR_FIRST = (
|
||
[(tri_in_tile(_OT), ZP_NEAR, COL_RED)] +
|
||
[(tri_in_tile(t), ZP_MID, COL_RED) for t in _B0F] +
|
||
[(tri_in_tile(_OT), ZP_FAR, COL_BLUE)] +
|
||
[(tri_in_tile(t), ZP_MID, COL_BLUE) for t in _B1F])
|
||
# FAR_FIRST: far BLUE in batch0, near RED (same tile) in batch1 -> near wins either way (control case).
|
||
ZPERSIST_FAR_FIRST = (
|
||
[(tri_in_tile(_OT), ZP_FAR, COL_BLUE)] +
|
||
[(tri_in_tile(t), ZP_MID, COL_BLUE) for t in _B0F] +
|
||
[(tri_in_tile(_OT), ZP_NEAR, COL_RED)] +
|
||
[(tri_in_tile(t), ZP_MID, COL_RED) for t in _B1F])
|
||
write_feeder_stg_mem("feeder_zpersist_near_first.mem", build_feeder_staging_colored(ZPERSIST_NEAR_FIRST),
|
||
"Ch338 cross-batch Z: NEAR(RED,b0)+FAR(BLUE,b1) overlap tile 5 -> tile5 must be RED (near wins)")
|
||
write_feeder_stg_mem("feeder_zpersist_far_first.mem", build_feeder_staging_colored(ZPERSIST_FAR_FIRST),
|
||
"Ch338 cross-batch Z: FAR(BLUE,b0)+NEAR(RED,b1) overlap tile 5 -> tile5 must be RED (near wins)")
|
||
# Ch338 — MIXED colored/gradient cross-batch overlap (Codex ask). The NEAR prim (batch0) is a GOURAUD
|
||
# gradient (RED/GREEN/BLUE per vertex); the FAR prim (batch1, same tile 5) is flat WHITE. Fillers are
|
||
# flat (batch0 GREEN, batch1 WHITE). With persistent Z the near GRADIENT wins the overlap, so tile 5
|
||
# shows interpolated colors and ZERO white (the far prim is Z-rejected). Per-batch Z would paint tile 5
|
||
# white. Built with the gouraud staging so per-vertex colors flow (unity tex + MODULATE).
|
||
ZPERSIST_GRAD = (
|
||
[(tri_in_tile(_OT), ZP_NEAR, [COL_RED, COL_GREEN, COL_BLUE])] +
|
||
[(tri_in_tile(t), ZP_MID, [COL_GREEN]*3) for t in _B0F] +
|
||
[(tri_in_tile(_OT), ZP_FAR, [COL_WHITE]*3)] +
|
||
[(tri_in_tile(t), ZP_MID, [COL_WHITE]*3) for t in _B1F])
|
||
write_feeder_stg_mem("feeder_zpersist_grad.mem", build_feeder_staging_gouraud(ZPERSIST_GRAD),
|
||
"Ch338 cross-batch Z: NEAR gradient(RGB,b0) + FAR flat WHITE(b1) overlap tile 5 -> gradient wins, no white")
|
||
|
||
# Ch342 — PERSPECTIVE feeder staging: drive the Ch301 perspective path THROUGH the feeder (word0[32]=1,
|
||
# per-vertex RGBAQ(+Q_fp)/ST(S_fp,T_fp)/XYZ2) on the tiled/multiprim profile w/ PERSPECTIVE_CORRECT=1.
|
||
# A perspective quad (2 tris): top edge FAR (w=8), bottom NEAR (w=1), checkerboard texture (DECAL) so
|
||
# rows compress toward the top under correct perspective. Reuses persp_texel/persp_attrs/st_data.
|
||
PERSP_FEEDER_TBP = 100 # texture base (block 100 = byte 0x6400; clear of the 64x64 FB)
|
||
def build_feeder_staging_persp(tris): # tris = [ [(sx,sy,u,v,w) x3], ... ] z constant
|
||
w = []
|
||
w.append(len(tris) | (1 << 32)) # word0: tri_count + perspective format flag (bit 32)
|
||
w.append(frame_1_psmct32(CAP_FBW))
|
||
w.append(alpha_pack(0, 1, 0, 1))
|
||
w.append(test1_geq())
|
||
w.append(zbuf1_pack(2))
|
||
w.append(tex0_pack(PERSP_FEEDER_TBP, PERSP_TBW, psm=0x00, tw=4, th=4, tfx=1)) # checkerboard, DECAL
|
||
# Ch342 — PRIM = TRIANGLE + TME, ABE=0 (FST=0) to MATCH the authentic cube (ABE=0). ABE=0 keeps the
|
||
# prim NON-combined -> the S1/legacy perspective path (where gs_persp_uv actually launches), NOT the
|
||
# combined-TAZ tiled path (whose perspective integration is a separate follow-on bug).
|
||
w.append(3 | (1 << 4)) # TRI + TME, ABE clear
|
||
for verts in tris:
|
||
for (sx, sy, u, v, wq) in verts:
|
||
s_fp, t_fp, q_fp = persp_attrs(u, v, wq)
|
||
w.append(rgbaq_with_q(0x00, 0x00, 0x00, q_fp))
|
||
w.append(st_data(s_fp, t_fp))
|
||
w.append(xyz2_dataz(sx, sy, 0x0000_5000))
|
||
return w
|
||
# A 32x40 perspective quad in the 64x64 FB: x 16..48, y 12..52; top FAR (w=8), bottom NEAR (w=1);
|
||
# UV over the 16x16 checkerboard (u,v 0..16). Two tris share the TL-BR diagonal.
|
||
_PX0, _PX1, _PY0, _PY1 = 16, 48, 12, 52
|
||
_PWF, _PWN = PERSP_W_FAR, PERSP_W_NEAR
|
||
_pv_tl = (_PX0, _PY0, 0, 0, _PWF); _pv_tr = (_PX1, _PY0, 16, 0, _PWF)
|
||
_pv_bl = (_PX0, _PY1, 0, 16, _PWN); _pv_br = (_PX1, _PY1, 16, 16, _PWN)
|
||
PERSP_QUAD = [[_pv_tl, _pv_tr, _pv_bl], [_pv_tr, _pv_bl, _pv_br]]
|
||
write_feeder_stg_mem("feeder_persp.mem", build_feeder_staging_persp(PERSP_QUAD),
|
||
"Ch342 perspective quad through the feeder (word0[32]=1, RGBAQ/ST/XYZ2; top FAR w=8, bottom NEAR w=1)")
|
||
# checkerboard texture for backdoor-load into VRAM at PERSP_FEEDER_TBP (16x16 PSMCT32, linear).
|
||
with open(os.path.join(OUT, "feeder_persp_tex.mem"), "w") as _f:
|
||
_f.write("// Ch342 16x16 checkerboard (persp_texel) for backdoor VRAM load at PERSP_FEEDER_TBP\n")
|
||
for _v in range(PERSP_TEXH):
|
||
for _u in range(PERSP_TEXW):
|
||
_f.write(f"{persp_texel(_u, _v) & 0xFFFFFFFF:08x}\n")
|
||
|
||
# Ch342 BOARD profile (GS_FEEDER_PERSP_DEMO) — setup-ONLY bootlet that uploads the 16x16 checkerboard
|
||
# to PERSP_FEEDER_TBP (the feeder then draws the perspective floor from feeder_persp.mem). No triangles
|
||
# in the payload (the feeder owns them). Small (16x16 = 64 image qwords) -> fits the default 4 KiB EE RAM.
|
||
def build_persp_feeder_setup_payload():
|
||
qw = []
|
||
qw.append(giftag(1, 0, 0, 4, int('E' * 4, 16))) # A+D: BITBLTBUF/TRXPOS/TRXREG/TRXDIR
|
||
qw.append(aplusd(R_BITBLTBUF, bitbltbuf_pack(PERSP_FEEDER_TBP, PERSP_TBW, 0x00)))
|
||
qw.append(aplusd(R_TRXPOS, trxpos_pack(0, 0)))
|
||
qw.append(aplusd(R_TRXREG, trxreg_pack(PERSP_TEXW, PERSP_TEXH)))
|
||
qw.append(aplusd(R_TRXDIR, trxdir_pack(0)))
|
||
n_image = (PERSP_TEXW * PERSP_TEXH) // 4
|
||
qw.append(giftag(n_image, 1, 2, 0, 0)) # IMAGE, EOP
|
||
texels = [persp_texel(u, v) for v in range(PERSP_TEXH) for u in range(PERSP_TEXW)]
|
||
for k in range(n_image):
|
||
word = 0
|
||
for lane in range(4):
|
||
word |= (texels[4*k + lane] & 0xFFFFFFFF) << (32 * lane)
|
||
qw.append(word)
|
||
return qw
|
||
_persp_payload = build_persp_feeder_setup_payload()
|
||
write_payload_mem("payload_persp_feeder_setup.mem", _persp_payload,
|
||
f"Ch342 GS_FEEDER_PERSP_DEMO setup payload ({len(_persp_payload)} qw): 16x16 checkerboard upload @ TBP={PERSP_FEEDER_TBP}, no tris")
|
||
write_bios_mem("bios_persp_feeder_setup.mem",
|
||
build_textured_demo_bootlet_disp(len(_persp_payload), CAP_DISPLAY1_HI, CAP_FBW),
|
||
f"Ch342 GS_FEEDER_PERSP_DEMO setup bootlet (DISPLAY1=64x64; QWC={len(_persp_payload)}; checkerboard upload, no tris)")
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Ch313 — FULL PSMCT16 FRAMEBUFFER MODE. Same 4x4 (64x64) bin-buffer scene as Ch312,
|
||
# but the WHOLE framebuffer is PSMCT16: FRAME_1.PSM=PSMCT16 (relaxed close_combined
|
||
# gate accepts it when TILE_COLOR_PSMCT16=1), the on-chip tile RAM is 16-bit, the
|
||
# flush writes RGB5A1 halfword lanes, and DISPFB1.PSM=PSMCT16 scans it out. The
|
||
# textures stay PSMCT32 DECAL (close_combined still requires tex0_psm==PSMCT32).
|
||
# A 64x64 PSMCT16 FB is 64*64*2 = 0x2000 (8 KiB) — HALF the 0x4000 PSMCT32 FB — so
|
||
# this demo runs in a 16 KiB VRAM (vs Ch312's 32 KiB), textures @ 0x2000+. This is
|
||
# the concrete framebuffer-memory saving that motivates the LPDDR-backed FB phase.
|
||
F16_FBW = 1
|
||
F16_DISPLAY1_HI = (63 << 12) | 63 # DW=63 (64 wide), DH=63 (64 tall)
|
||
F16_TBP0_BG, F16_TBP0_MID, F16_TBP0_FG = 32, 36, 40 # 0x2000 / 0x2400 / 0x2800 (above the 8 KiB FB)
|
||
F16_P0 = [(2, 2, 0x0000_5000), (30, 2, 0x0000_5000), (2, 30, 0x0000_5000)] # blue top-left
|
||
F16_P1 = [(20,18, 0x0000_6000), (50,18, 0x0000_6000), (35, 40, 0x0000_6000)] # red mid, crosses seams
|
||
F16_P2 = [(52,52, 0x0000_5800), (60,52, 0x0000_5800), (52, 60, 0x0000_5800)] # white corner tile
|
||
|
||
|
||
def f16_triangle(tbp0, verts, eop, first, fbw=F16_FBW):
|
||
qw = []
|
||
tex0_val = tex0_pack(tbp0, TMP_TBW, psm=0x00, tw=2, th=2)
|
||
nreg = (6 if first else 5) + 9
|
||
qw.append(giftag(1, eop, 0, nreg, int('E' * nreg, 16)))
|
||
if first:
|
||
qw.append(aplusd(R_FRAME_1, frame_1_psmct16(fbw))) # Ch313: FRAME is PSMCT16 (Ch321: fbw param)
|
||
qw.append(aplusd(R_PRIM, prim_tri_tme_abe()))
|
||
qw.append(aplusd(R_ALPHA_1, alpha_pack(0, 1, 0, 1)))
|
||
qw.append(aplusd(R_TEST_1, test1_geq()))
|
||
qw.append(aplusd(R_ZBUF_1, zbuf1_pack(2)))
|
||
qw.append(aplusd(R_TEX0_1, tex0_val))
|
||
for i, (sx, sy, sz) in enumerate(verts):
|
||
tu, tv = TMP_UV[i]
|
||
qw.append(aplusd(R_RGBAQ, rgbaq_data(0x00, 0x00, 0x00)))
|
||
qw.append(aplusd(R_UV, uv_data(tu, tv)))
|
||
qw.append(aplusd(R_XYZ2, xyz2_dataz(sx, sy, sz)))
|
||
return qw
|
||
|
||
|
||
def build_tile_psmct16fb_demo_payload():
|
||
qw = []
|
||
qw += b4_texture_upload(F16_TBP0_BG, solid_texel(0x00, 0x00, 0xFF, 0x80)) # blue
|
||
qw += b4_texture_upload(F16_TBP0_MID, solid_texel(0xFF, 0x00, 0x00, 0x80)) # red
|
||
qw += b4_texture_upload(F16_TBP0_FG, solid_texel(0xFF, 0xFF, 0xFF, 0x40)) # white
|
||
qw += f16_triangle(F16_TBP0_BG, F16_P0, eop=0, first=True)
|
||
qw += f16_triangle(F16_TBP0_MID, F16_P1, eop=0, first=False)
|
||
qw += f16_triangle(F16_TBP0_FG, F16_P2, eop=1, first=False)
|
||
return qw
|
||
|
||
|
||
f16_demo_payload = build_tile_psmct16fb_demo_payload()
|
||
f16_demo_qwc = len(f16_demo_payload)
|
||
assert f16_demo_qwc <= 95, f"tile_psmct16fb payload {f16_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
f16_demo_bootlet = build_psmct16_demo_bootlet_disp(f16_demo_qwc, F16_DISPLAY1_HI, F16_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_psmct16fb.mem", f16_demo_bootlet,
|
||
f"Ch313 FULL-PSMCT16-FB 4x4-GRID BIOS bootlet ({len(f16_demo_bootlet)} words active, padded to "
|
||
f"{BIOS_TOTAL_WORDS}); DISPLAY1 = 64x64 PSMCT16; QWC={f16_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_psmct16fb.mem", f16_demo_payload,
|
||
f"Ch313 FULL-PSMCT16-FB 4x4-GRID GIF payload ({f16_demo_qwc} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS}); P0/P1/P2 over 64x64 FRAME.PSM=PSMCT16 framebuffer (8 KiB) in 16 KiB VRAM"
|
||
)
|
||
|
||
|
||
# ============================================================================
|
||
# Ch321 — 128x128 PSMCT16 framebuffer (32 KiB) for LPDDR4B line-buffer scanout.
|
||
# Same scene as the 64x64 psmct16fb demo, scaled 2x. DISPLAY1 window 128x128,
|
||
# FBW=2 (128 px/row). Textures relocated above the 32 KiB FB. The frame is the
|
||
# larger LPDDR4B framebuffer Ch321 scans out.
|
||
# ============================================================================
|
||
F128_FBW = 2 # 128 px / 64 = 2 pages
|
||
F128_DISPLAY1_HI = (127 << 12) | 127 # DW=127 (128 wide), DH=127 (128 tall)
|
||
F128_TBP0_BG, F128_TBP0_MID, F128_TBP0_FG = 128, 132, 136 # 0x8000/0x8400/0x8800 (above the 32 KiB FB)
|
||
F128_P0 = [(4, 4, 0x0000_5000), (60, 4, 0x0000_5000), (4, 60, 0x0000_5000)] # blue (2x of F16_P0)
|
||
F128_P1 = [(40,36, 0x0000_6000), (100,36, 0x0000_6000), (70, 80, 0x0000_6000)] # red mid (2x F16_P1)
|
||
F128_P2 = [(104,104,0x0000_5800),(120,104,0x0000_5800),(104,120,0x0000_5800)] # white corner (2x F16_P2)
|
||
|
||
|
||
def build_tile_lpddr128_demo_payload():
|
||
qw = []
|
||
qw += b4_texture_upload(F128_TBP0_BG, solid_texel(0x00, 0x00, 0xFF, 0x80)) # blue
|
||
qw += b4_texture_upload(F128_TBP0_MID, solid_texel(0xFF, 0x00, 0x00, 0x80)) # red
|
||
qw += b4_texture_upload(F128_TBP0_FG, solid_texel(0xFF, 0xFF, 0xFF, 0x40)) # white
|
||
qw += f16_triangle(F128_TBP0_BG, F128_P0, eop=0, first=True, fbw=F128_FBW)
|
||
qw += f16_triangle(F128_TBP0_MID, F128_P1, eop=0, first=False, fbw=F128_FBW)
|
||
qw += f16_triangle(F128_TBP0_FG, F128_P2, eop=1, first=False, fbw=F128_FBW)
|
||
return qw
|
||
|
||
|
||
f128_demo_payload = build_tile_lpddr128_demo_payload()
|
||
f128_demo_qwc = len(f128_demo_payload)
|
||
assert f128_demo_qwc <= 95, f"tile_lpddr128 payload {f128_demo_qwc} qwords may collide with heartbeat splicer at qword 115"
|
||
f128_demo_bootlet = build_psmct16_demo_bootlet_disp(f128_demo_qwc, F128_DISPLAY1_HI, F128_FBW)
|
||
|
||
write_bios_mem(
|
||
"bios_tile_lpddr128.mem", f128_demo_bootlet,
|
||
f"Ch321 128x128 PSMCT16 FB BIOS bootlet ({len(f128_demo_bootlet)} words active, padded to "
|
||
f"{BIOS_TOTAL_WORDS}); DISPLAY1 = 128x128 PSMCT16; FBW=2; QWC={f128_demo_qwc}"
|
||
)
|
||
write_payload_mem(
|
||
"payload_tile_lpddr128.mem", f128_demo_payload,
|
||
f"Ch321 128x128 PSMCT16 FB GIF payload ({f128_demo_qwc} qwords active at byte 0x100, padded to "
|
||
f"{RAM_TOTAL_QWORDS}); P0/P1/P2 over 128x128 FRAME.PSM=PSMCT16 framebuffer (32 KiB) in 64 KiB VRAM"
|
||
)
|
||
print(f"[bake] wrote Ch321 bios_tile_lpddr128.mem ({len(f128_demo_bootlet)} active) + payload_tile_lpddr128.mem ({f128_demo_qwc} active)")
|
||
|
||
print(f"[bake] wrote Ch316 bios_tile_late.mem ({len(late_demo_bootlet)} active) + payload_tile_late.mem ({late_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch315 bios_tile_cap.mem ({len(cap_demo_bootlet)} active) + payload_tile_cap.mem ({cap_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch314 bios_tile_palbilinear.mem ({len(pb_demo_bootlet)} active) + payload_tile_palbilinear.mem ({pb_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch313 bios_tile_psmct16fb.mem ({len(f16_demo_bootlet)} active) + payload_tile_psmct16fb.mem ({f16_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch312 bios_tile_bin4x4.mem ({len(b4_demo_bootlet)} active) + payload_tile_bin4x4.mem ({b4_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch311 bios_tile_bin.mem ({len(bn_demo_bootlet)} active) + payload_tile_bin.mem ({bn_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch310 bios_tile_bilinear.mem ({len(bil_demo_bootlet)} active) + payload_tile_bilinear.mem ({bil_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch309 bios_tile_alpha.mem ({len(tal_demo_bootlet)} active) + payload_tile_alpha.mem ({tal_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch308 bios_tile_psmct16.mem ({len(t16_demo_bootlet)} active) + payload_tile_psmct16.mem ({t16_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch307 bios_tile_wrap.mem ({len(twr_demo_bootlet)} active) + payload_tile_wrap.mem ({twr_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch306 bios_tile_scissor.mem ({len(tsc_demo_bootlet)} active) + payload_tile_scissor.mem ({tsc_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch305 bios_tile_multiprim.mem ({len(tmp_demo_bootlet)} active) + payload_tile_multiprim.mem ({tmp_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch304 bios_tile2x2.mem ({len(tile2_demo_bootlet)} active) + payload_tile2x2.mem ({tile2_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch303 bios_tile.mem ({len(tile_demo_bootlet)} active) + payload_tile.mem ({tile_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch302 bios_combined.mem ({len(comb_demo_bootlet)} active) + payload_combined.mem ({comb_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch301b bios_persp_floor.mem ({len(pfloor_demo_bootlet)} active) + payload_persp_floor.mem ({pfloor_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch301 bios_persp.mem ({len(persp_demo_bootlet)} active) + payload_persp.mem ({persp_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch300 bios_swz32.mem ({len(swz32_demo_bootlet)} active) + payload_swz32.mem ({swz32_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch299 bios_swz8.mem ({len(swz8_demo_bootlet)} active) + payload_swz8.mem ({swz8_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch298 bios_swz4.mem ({len(swz4_demo_bootlet)} active) + payload_swz4.mem ({swz4_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch297 bios_clut4.mem ({len(clut4_demo_bootlet)} active) + payload_clut4.mem ({clut4_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch296 bios_clut.mem ({len(clut8_demo_bootlet)} active) + payload_clut.mem ({clut8_demo_qwc} active)")
|
||
print(f"[bake] wrote Ch251 bios.mem ({len(ch251_bootlet)} active) + payload.mem ({CH251_QWC} active)")
|
||
print(f"[bake] wrote Ch146 bios_ch146.mem ({len(ch146_bootlet)} active) + payload_ch146.mem (24 active)")
|
||
print(f"[bake] wrote Brick1 bios_textured.mem ({len(tex_demo_bootlet)} active) + payload_textured.mem ({tex_demo_qwc} active)")
|
||
print(f"[bake] wrote Brick2a bios_alpha.mem ({len(alpha_demo_bootlet)} active) + payload_alpha.mem ({alpha_demo_qwc} active)")
|
||
print(f"[bake] wrote Brick2b bios_zbuffer.mem ({len(zbuf_demo_bootlet)} active) + payload_zbuffer.mem ({zbuf_demo_qwc} active)")
|
||
print(f"[bake] wrote TexTri bios_tritex.mem ({len(tritex_demo_bootlet)} active) + payload_tritex.mem ({tritex_demo_qwc} active)")
|
||
print(f"[bake] wrote Brick3 bios_triangle.mem ({len(tri_demo_bootlet)} active) + payload_triangle.mem ({tri_demo_qwc} active)")
|