Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)

RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-29 20:10:50 -04:00
commit ec82764bef
2462 changed files with 2174303 additions and 0 deletions
+168
View File
@@ -0,0 +1,168 @@
#!/usr/bin/env python3
"""
Convert a PS2 EE ELF into a tb_ee_core_elf_runner-compatible image
manifest pair (no external deps; pure stdlib).
Emits two files at the requested output prefix, identical in format
to generate_synthetic_image.py:
<prefix>.image.hex iverilog $readmemh, @<qw_idx> directives for
populated 128-bit qwords only. Each line is
32 hex chars (MSB-first, byte 15 leftmost).
<prefix>.manifest.hex line 0 = ELF entry point (32-bit hex)
line 1 = stack-top hint (32-bit hex)
Supports ELF32 little-endian, ELFCLASS32, EM_MIPS, e_type ET_EXEC or
ET_DYN. PT_LOAD segments are placed at their physical address
(low 29 bits of p_vaddr — strips the kuseg/kseg0/kseg1 alias bits so
the data lands at the correct phys offset in ee_ram_stub).
Stack-top is approximated as (ee_ram_bytes - 0x10) since real PS2
ELFs don't carry a stack pointer in their headers; the TB will set
$sp to this if the manifest is read.
Usage:
elf_to_eeram.py --in path/to/game.elf --out-prefix /tmp/game
Verdict-aware notes:
* Segments overflowing the EE RAM image cause a fatal error.
* Segments overlapping each other are flagged but not fatal — the
later one wins (matches how a real loader would behave).
"""
import sys
import struct
import argparse
ELFMAG = b"\x7fELF"
ELFCLASS32 = 1
ELFDATA2LSB = 1
EM_MIPS = 8
ET_EXEC = 2
ET_DYN = 3
PT_LOAD = 1
def parse_elf32_le(data: bytes):
"""Return (entry, [(p_vaddr, p_offset, p_filesz, p_memsz), ...])
for PT_LOAD segments of a 32-bit little-endian MIPS ELF.
Raises ValueError on bad magic / wrong class / wrong arch.
"""
if len(data) < 52 or data[:4] != ELFMAG:
raise ValueError("not an ELF file (bad magic)")
if data[4] != ELFCLASS32:
raise ValueError(f"only ELFCLASS32 supported (got class={data[4]})")
if data[5] != ELFDATA2LSB:
raise ValueError(f"only little-endian supported (got data={data[5]})")
(e_type, e_machine, e_version, e_entry, e_phoff, e_shoff, e_flags,
e_ehsize, e_phentsize, e_phnum, e_shentsize, e_shnum, e_shstrndx) = \
struct.unpack_from("<HHIIIIIHHHHHH", data, 16)
if e_machine != EM_MIPS:
raise ValueError(f"only EM_MIPS supported (got machine={e_machine})")
if e_type not in (ET_EXEC, ET_DYN):
raise ValueError(f"only ET_EXEC / ET_DYN supported (got type={e_type})")
pt_load = []
for i in range(e_phnum):
off = e_phoff + i * e_phentsize
(p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz,
p_flags, p_align) = struct.unpack_from("<IIIIIIII", data, off)
if p_type == PT_LOAD:
pt_load.append((p_vaddr, p_offset, p_filesz, p_memsz))
return e_entry, pt_load
def build_image(elf_bytes: bytes, ee_ram_bytes: int):
entry, segs = parse_elf32_le(elf_bytes)
image = bytearray(ee_ram_bytes)
placed = []
for (p_vaddr, p_offset, p_filesz, p_memsz) in segs:
phys = p_vaddr & 0x1FFFFFFF # strip kseg/kuseg bits
if phys + p_memsz > ee_ram_bytes:
raise ValueError(
f"PT_LOAD at vaddr=0x{p_vaddr:08x} phys=0x{phys:08x} "
f"size=0x{p_memsz:x} overflows EE RAM (0x{ee_ram_bytes:x})")
# Detect overlap (informational only).
for (lo, hi) in placed:
if not (phys + p_memsz <= lo or phys >= hi):
print(f"[elf_to_eeram] WARNING: PT_LOAD at phys=0x{phys:08x} "
f"size=0x{p_memsz:x} overlaps prior placement",
file=sys.stderr)
placed.append((phys, phys + p_memsz))
# Copy p_filesz bytes from file at p_offset → phys. p_memsz can
# be larger than p_filesz (.bss tail); image is already zero-
# initialised so the tail is naturally zero.
chunk = elf_bytes[p_offset:p_offset + p_filesz]
image[phys:phys + p_filesz] = chunk
print(f"[elf_to_eeram] placed PT_LOAD vaddr=0x{p_vaddr:08x} "
f"phys=0x{phys:08x} filesz=0x{p_filesz:x} memsz=0x{p_memsz:x}")
return entry, image
def qword_to_hex(image: bytearray, qw_phys: int) -> str:
"""MSB-first hex string for the qword at byte offset qw_phys."""
bytes16 = image[qw_phys:qw_phys + 16]
return bytes16[::-1].hex()
def emit_image_hex(image: bytearray, path: str) -> None:
qw_size = 16
with open(path, "w") as f:
f.write("// Ch270 ELF-derived EE-RAM image\n")
f.write(f"// {len(image)} bytes / {len(image)//qw_size} qwords\n")
f.write("// Populated qwords only; TB zero-inits before $readmemh.\n\n")
any_emitted = False
for qw_idx in range(0, len(image) // qw_size):
qw_byte = qw_idx * qw_size
qw_bytes = image[qw_byte:qw_byte + qw_size]
if any(b != 0 for b in qw_bytes):
f.write(f"@{qw_idx:08x}\n")
f.write(qword_to_hex(image, qw_byte) + "\n")
any_emitted = True
if not any_emitted:
f.write("@00000000\n00000000000000000000000000000000\n")
def emit_manifest_hex(path: str, entry: int, stack_top: int) -> None:
with open(path, "w") as f:
f.write("// Ch270 manifest from ELF\n")
f.write(f"// line 0 = entry, line 1 = stack_top hint\n")
f.write(f"{entry:08x}\n")
f.write(f"{stack_top:08x}\n")
def main() -> int:
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("--in", dest="elf_in", required=True,
help="input ELF path")
p.add_argument("--out-prefix", required=True,
help="output file prefix")
p.add_argument("--ee-ram-bytes", type=lambda s: int(s, 0),
default=2 * 1024 * 1024,
help="EE RAM size in bytes (default 2 MiB)")
args = p.parse_args()
with open(args.elf_in, "rb") as f:
elf_bytes = f.read()
entry, image = build_image(elf_bytes, args.ee_ram_bytes)
stack_top = args.ee_ram_bytes - 0x10
emit_image_hex(image, f"{args.out_prefix}.image.hex")
emit_manifest_hex(f"{args.out_prefix}.manifest.hex", entry, stack_top)
print(f"[elf_to_eeram] wrote {args.out_prefix}.image.hex + "
f"{args.out_prefix}.manifest.hex (entry=0x{entry:08x}, "
f"stack_top=0x{stack_top:08x}, ee_ram={args.ee_ram_bytes} bytes)")
return 0
if __name__ == "__main__":
sys.exit(main())
+169
View File
@@ -0,0 +1,169 @@
#!/usr/bin/env python3
"""
Generate a synthetic EE-RAM image + manifest for Ch270's ELF runner TB.
Produces two files at the requested output prefix:
<prefix>.image.hex iverilog $readmemh compatible. Uses @<hex_qw_idx>
directives so only the populated 128-bit qwords
appear (the TB pre-zeros the array before reading).
Each line is 32 hex chars = one 128-bit qword,
MSB-first (byte 15 leftmost, byte 0 rightmost).
<prefix>.manifest.hex Two lines:
line 0: ELF entry point (32-bit hex)
line 1: stack-top hint (32-bit hex; unused
by current TB but reserved)
The synthetic program lives at PHYS 0x00100000. Entry is given as a
kseg0 address (0x80100008) because the ee_memory_map stub routes
useg (top bit = 0) to a separate useg_shadow region, not ee_ram —
real PS2 ELFs use kseg0 entries for the same reason (cached text):
PHYS 0x00100000 / kseg0 0x80100000: nop pad
PHYS 0x00100004 / kseg0 0x80100004: nop pad
PHYS 0x00100008 / kseg0 0x80100008: addiu $v0,$0,0x1234 *** entry ***
PHYS 0x0010000C / kseg0 0x8010000C: addiu $v1,$0,0x5678
PHYS 0x00100010 / kseg0 0x80100010: j 0x80100010 loop-to-self
PHYS 0x00100014 / kseg0 0x80100014: nop delay slot
The J encoding (0x08040004) is PC-relative: at runtime, j_tgt =
{PC+4[31:28], imm26<<2}, so the high 4 bits come from the PC.
PC=0x80100010 ⇒ j_tgt = 0x80100010 (self) — same encoding works for
both kseg0 and kuseg views.
Expected TB verdict: `elf_timeout_with_hot_pc` with hot_pc near
0x80100010. That confirms the ELF-load + entry-bootstrap + strict-
trace pipeline is sound (no traps, no halts, no unmapped MMIO, EE
reaches and executes real code).
"""
import sys
import struct
import argparse
def encode_addiu(rt: int, rs: int, imm: int) -> int:
"""ADDIU rt, rs, imm. op=0x09."""
return (0x09 << 26) | ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
def encode_j(target: int) -> int:
"""J target. op=0x02. Target must be word-aligned."""
assert target & 3 == 0, "J target must be word-aligned"
return (0x02 << 26) | ((target >> 2) & 0x03FFFFFF)
def encode_lui(rt: int, imm: int) -> int:
"""LUI rt, imm. op=0x0F."""
return (0x0F << 26) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
def encode_ori(rt: int, rs: int, imm: int) -> int:
"""ORI rt, rs, imm. op=0x0D."""
return (0x0D << 26) | ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
def encode_jr(rs: int) -> int:
"""JR rs. SPECIAL/funct=0x08."""
return ((rs & 0x1F) << 21) | 0x08
def write_word_le(image: bytearray, phys_addr: int, word: int) -> None:
"""Write a 32-bit word little-endian into the EE-RAM image."""
assert phys_addr + 4 <= len(image), "phys_addr out of image bounds"
image[phys_addr + 0] = (word >> 0) & 0xFF
image[phys_addr + 1] = (word >> 8) & 0xFF
image[phys_addr + 2] = (word >> 16) & 0xFF
image[phys_addr + 3] = (word >> 24) & 0xFF
def qword_to_hex(image: bytearray, qw_phys: int) -> str:
"""Return the 32-char hex string for the qword at byte offset qw_phys.
iverilog $readmemh expects the leftmost hex char to be the highest
bit of the 128-bit value. Byte 15 is the most significant byte;
byte 0 is the least.
"""
assert qw_phys + 16 <= len(image)
bytes16 = image[qw_phys:qw_phys + 16]
# Reverse to MSB-first for the hex string.
return bytes16[::-1].hex()
def emit_image_hex(image: bytearray, path: str, qw_size: int) -> None:
"""Emit a $readmemh-compatible hex file using @<idx> directives for
every populated (non-zero) qword. Empty qwords are skipped — the TB
pre-zeros the array before reading.
"""
with open(path, "w") as f:
f.write("// Ch270 synthetic EE-RAM image\n")
f.write(f"// {len(image)} bytes / {len(image)//qw_size} qwords\n")
f.write("// Populated qwords only; TB zero-inits before $readmemh.\n\n")
any_emitted = False
for qw_idx in range(0, len(image) // qw_size):
qw_byte = qw_idx * qw_size
qw_bytes = image[qw_byte:qw_byte + qw_size]
if any(b != 0 for b in qw_bytes):
f.write(f"@{qw_idx:08x}\n")
f.write(qword_to_hex(image, qw_byte) + "\n")
any_emitted = True
if not any_emitted:
# iverilog $readmemh errors on empty file; emit a benign entry.
f.write("@00000000\n00000000000000000000000000000000\n")
def emit_manifest_hex(path: str, entry: int, stack_top: int) -> None:
"""Emit the manifest as two 32-bit hex lines."""
with open(path, "w") as f:
f.write("// Ch270 manifest\n")
f.write(f"// line 0 = entry, line 1 = stack_top hint\n")
f.write(f"{entry:08x}\n")
f.write(f"{stack_top:08x}\n")
def build_synthetic_image(image_bytes: int, entry_phys: int) -> bytearray:
"""Build the EE-RAM image with the synthetic program at entry_phys."""
image = bytearray(image_bytes)
# Pad before entry so PC starts on real instructions:
write_word_le(image, entry_phys - 8, 0x00000000) # nop
write_word_le(image, entry_phys - 4, 0x00000000) # nop
# Body:
write_word_le(image, entry_phys + 0, encode_addiu(2, 0, 0x1234)) # $v0 = 0x1234
write_word_le(image, entry_phys + 4, encode_addiu(3, 0, 0x5678)) # $v1 = 0x5678
write_word_le(image, entry_phys + 8, encode_j(entry_phys + 8)) # j self
write_word_le(image, entry_phys + 12, 0x00000000) # nop delay slot
return image
def main() -> int:
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("--out-prefix", required=True,
help="output file prefix (writes <prefix>.image.hex + <prefix>.manifest.hex)")
p.add_argument("--entry", type=lambda s: int(s, 0), default=0x80100008,
help="entry point VIRTUAL address (kseg0 default 0x80100008; "
"physical placement of the code segment is entry & 0x1FFFFFFF)")
p.add_argument("--ee-ram-bytes", type=lambda s: int(s, 0), default=2 * 1024 * 1024,
help="EE RAM size in bytes (default 2 MiB; must be >= entry+16)")
p.add_argument("--stack-top", type=lambda s: int(s, 0), default=0x801FFFF0,
help="stack top hint stored in manifest (default 0x801FFFF0 kseg0)")
args = p.parse_args()
entry_phys = args.entry & 0x1FFFFFFF
if entry_phys < 8 or entry_phys + 16 > args.ee_ram_bytes:
p.error(f"entry 0x{args.entry:08x} (phys 0x{entry_phys:08x}) "
f"doesn't fit into 0x{args.ee_ram_bytes:x}-byte EE RAM")
image = build_synthetic_image(args.ee_ram_bytes, entry_phys)
emit_image_hex(image, f"{args.out_prefix}.image.hex", qw_size=16)
emit_manifest_hex(f"{args.out_prefix}.manifest.hex",
entry=args.entry, stack_top=args.stack_top)
print(f"[generate_synthetic_image] wrote {args.out_prefix}.image.hex "
f"+ {args.out_prefix}.manifest.hex (entry=0x{args.entry:08x}, "
f"phys=0x{entry_phys:08x}, ee_ram={args.ee_ram_bytes} bytes)")
return 0
if __name__ == "__main__":
sys.exit(main())
+95
View File
@@ -0,0 +1,95 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch341 Brick 2: faithful texture downscale + boot-fixture generation.
Reads the LOCAL extracted cube texture (256x256 PSMCT32, from gs_texture.py --extract), box-downsamples
it to 64x64 PSMCT32 (a declared, reported linear transform — NOT a GS-feature approximation; the same
class as the Ch340 viewport fit), and emits the 64x64 texels as a $readmemh .mem the boot setup payload
uploads to a VRAM-resident TBP. Also runs Codex's gate: hash the dump's N texture uploads and report
whether they are byte-identical (so one preload suffices). Only aggregate facts/hashes are committable;
the texel .mem stays LOCAL (derived from the dump).
Declared transform: source 256x256 -> 64x64 (factor 4) => translator must scale UVs by /4.
Usage:
gs_bake_texture.py <dump.gs[.xz|.zst]> [--blob extracted/tex0_blob.bin] [--out outdir] [--report r.txt]
"""
import sys, os, struct, hashlib
sys.path.insert(0, os.path.dirname(__file__))
import gs_parse, gs_texture
SRC=256; DST=64; FACTOR=SRC//DST # 4
def upload_identity_gate(path):
"""Hash every IMAGE upload payload; report if all byte-identical."""
d = gs_parse.read_dump_bytes(path)
h, events = gs_parse.parse_dump(path)
hashes=[]
for e in events:
if e.kind=="IMAGE":
blob = d[e.byte_off:e.byte_off+e.info.get("bytes",0)]
hashes.append(hashlib.sha256(blob).hexdigest())
return hashes
def downsample_psmct32(blob):
"""256x256 ABGR words -> 64x64 ABGR words, box filter (average each 4x4 source block per channel)."""
assert len(blob) == SRC*SRC*4, f"expected {SRC*SRC*4} bytes, got {len(blob)}"
src = struct.unpack(f"<{SRC*SRC}I", blob)
out = []
for oy in range(DST):
for ox in range(DST):
ar=ag=ab=aa=0
for dy in range(FACTOR):
for dx in range(FACTOR):
w = src[(oy*FACTOR+dy)*SRC + (ox*FACTOR+dx)]
ar += w & 0xFF; ag += (w>>8)&0xFF; ab += (w>>16)&0xFF; aa += (w>>24)&0xFF
n = FACTOR*FACTOR
out.append((ar//n) | ((ag//n)<<8) | ((ab//n)<<16) | ((aa//n)<<24))
return out # 64*64 ABGR words
def main(argv):
if len(argv)<2: print(__doc__); return 2
path=argv[1]
def opt(n,d=None): return argv[argv.index(n)+1] if n in argv else d
blobp = opt("--blob", os.path.join(os.path.dirname(path),"extracted","tex0_blob.bin"))
outdir = opt("--out", os.path.join(os.path.dirname(path),"extracted"))
R=[f"# Ch341 Brick 2 texture bake (source {os.path.basename(path)}; aggregate facts/hashes only)"]
# --- gate: are the uploads byte-identical? (auditable: list every distinct hash + its count) ---
hh = upload_identity_gate(path)
from collections import Counter
cnt = Counter(hh)
R.append(f"texture uploads: {len(hh)} distinct payloads: {len(cnt)} "
+ ("ALL BYTE-IDENTICAL -> one preload suffices" if len(cnt)==1 else "DIFFER -> translator must not let a subsegment cross a re-upload to its TBP"))
for i,(hsh,c) in enumerate(sorted(cnt.items(), key=lambda x:-x[1])):
R.append(f" upload payload #{i}: sha256[0..15]={hsh[:16]} count={c}")
if not os.path.exists(blobp):
R.append(f"!! no extracted blob at {blobp} (run gs_texture.py --extract first)")
print("\n".join(R)); return 1
blob = open(blobp,"rb").read()
src_hash = hashlib.sha256(blob).hexdigest()
out = downsample_psmct32(blob)
out_bytes = struct.pack(f"<{len(out)}I", *out)
out_hash = hashlib.sha256(out_bytes).hexdigest()
R.append("")
R.append(f"downsample: {SRC}x{SRC} PSMCT32 -> {DST}x{DST} PSMCT32 (box filter /{FACTOR}; UV scale /{FACTOR})")
R.append(f" src sha256[0..15]={src_hash[:16]} dst sha256[0..15]={out_hash[:16]} dst_bytes={len(out_bytes)} (= {DST}*{DST}*4)")
os.makedirs(outdir, exist_ok=True)
# boot IMAGE upload packs 4 ABGR words / 128-bit qword -> $readmemh as 64-bit? bake.py packs the
# IMAGE payload as 64-bit words (lane pairs). Emit one 32-bit ABGR texel per line (the bake helper
# will pack); also emit a raw .bin for reuse. LOCAL only.
memp = os.path.join(outdir, "cube_tex_64.mem")
with open(memp,"w") as f:
for w in out: f.write(f"{w & 0xFFFFFFFF:08x}\n")
binp = os.path.join(outdir, "cube_tex_64.bin")
open(binp,"wb").write(out_bytes)
R.append(f" wrote LOCAL fixture: {os.path.basename(memp)} ({len(out)} texels) + {os.path.basename(binp)}")
report="\n".join(R)+"\n"
print(report)
if opt("--report"): open(opt("--report"),"w").write(report); print(f"[wrote report -> {opt('--report')}]")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+94
View File
@@ -0,0 +1,94 @@
#!/usr/bin/env python3
"""retroDE_ps2 — SPRITE census (Ch344 sprite-ingestion scoping).
Consumes gs_parse.parse_dump()'s event stream and characterises every SPRITE primitive in a .gs dump:
prim-type split, per-sprite TEX0 PSM, TME/ABE/FST, the ALPHA blend equation, TEX1 mag/min filter, and an
XYOFFSET-corrected size histogram. Output is AGGREGATE (counts/histograms) only — no copyrighted pixel
content — so the report is committable per captures/gs/.gitignore policy. This is census/scoping ONLY;
it renders nothing and asserts nothing.
Usage: gs_census_sprites.py <dump.gs[.xz|.zst]> [--report out.txt]
"""
import sys, os
from collections import Counter
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from gs_parse import parse_dump
PSM = {0x00:"PSMCT32",0x01:"PSMCT24",0x02:"PSMCT16",0x0A:"PSMCT16S",0x13:"PSMT8",0x14:"PSMT4",
0x1B:"PSMT8H",0x24:"PSMT4HL",0x2C:"PSMT4HH"}
PRIMT = {0:"POINT",1:"LINE",2:"LINE_STRIP",3:"TRI",4:"TRISTRIP",5:"TRIFAN",6:"SPRITE",7:"INVALID"}
ABCD = {0:"Cs",1:"Cd",2:"0"}; C_SEL = {0:"As",1:"Ad",2:"FIX"}
FILT = {0:"NEAREST",1:"LINEAR",2:"N_MIPN",3:"L_MIPN",4:"N_MIPL",5:"L_MIPL"}
def dec_tex0(v):
return (v>>20)&0x3F, 1<<((v>>26)&0xF), 1<<((v>>30)&0xF), v&0x3FFF, (v>>35)&3, (v>>34)&1
def dec_alpha(v):
a=v&3; b=(v>>2)&3; c=(v>>4)&3; d=(v>>6)&3; fix=(v>>32)&0xFF
return f"({ABCD[a]}-{ABCD[b]})*{C_SEL[c]}>>7+{ABCD[d]}" + (f" FIX={fix}" if c==2 else "")
def census(path):
h, events = parse_dump(path)
cur = {"type":None,"tme":0,"abe":0,"fst":0}
tex0 = {1:None, 2:None}; xyoff = {1:(0,0), 2:(0,0)}; tex1 = {1:None, 2:None}
alpha = {1:None, 2:None}; ctxt = 0
kicks = {}; xyz = []
spr = {"n":0, "psm":Counter(), "abe":Counter(), "fst":Counter(), "tme":Counter(),
"alpha":Counter(), "magfilt":Counter(), "sizes":Counter()}
for e in events:
if e.kind != "GSREG": continue
a, v = e.addr, e.value
if a == 0x00: # PRIM
cur = {"type":v&7,"tme":(v>>4)&1,"abe":(v>>6)&1,"fst":(v>>8)&1}; ctxt=(v>>9)&1; xyz=[]
elif a in (0x06,0x07): tex0[1 if a==0x06 else 2] = dec_tex0(v)
elif a in (0x14,0x15): tex1[1 if a==0x14 else 2] = (v>>5)&7 # MMAG bit5 -> mag filter (1bit here: 0/1)
elif a in (0x18,0x19): xyoff[1 if a==0x18 else 2] = (v&0xFFFF, (v>>32)&0xFFFF)
elif a in (0x42,0x43): alpha[1 if a==0x42 else 2] = v
elif a == 0x05: # XYZ2 vertex kick
t = cur["type"]
if t is None: continue
kicks[t] = kicks.get(t,0)+1
if t == 6:
xyz.append(v)
if len(xyz) == 2:
cx = 1 if ctxt==0 else 2
ox, oy = xyoff[cx]
x0=(xyz[0]&0xFFFF)-ox; y0=((xyz[0]>>16)&0xFFFF)-oy
x1=(xyz[1]&0xFFFF)-ox; y1=((xyz[1]>>16)&0xFFFF)-oy
w=abs(x1-x0)//16; ht=abs(y1-y0)//16 # 12.4 fixed -> pixels
spr["n"]+=1; spr["sizes"][(w,ht)]+=1
spr["abe"][cur["abe"]]+=1; spr["fst"][cur["fst"]]+=1; spr["tme"][cur["tme"]]+=1
tx = tex0[cx]
spr["psm"][("(untextured)" if not cur["tme"] else
"(no TEX0)" if tx is None else PSM.get(tx[0],f"0x{tx[0]:02x}"))] += 1
if cur["abe"]:
al = alpha[cx]
spr["alpha"][dec_alpha(al) if al is not None else "(no ALPHA set)"] += 1
if cur["tme"]:
f1 = tex1[cx]
spr["magfilt"][("NEAREST" if f1==0 else "LINEAR" if f1==1 else "(unset)") ] += 1
xyz=[]
return h, kicks, spr
def fmt(h, kicks, spr):
L = []
L.append(f"SPRITE CENSUS — serial={h.serial!r} crc=0x{h.crc:08x}")
L.append(f"prim-kicks by type: {dict((PRIMT[k],v) for k,v in sorted(kicks.items(),key=lambda x:-x[1]))}")
L.append(f"SPRITES: {spr['n']} rectangles")
L.append(f" TEX0 PSM : {dict(spr['psm'].most_common())}")
L.append(f" TME(textured) : {dict(spr['tme'])} FST(0=STQ,1=UV): {dict(spr['fst'])}")
L.append(f" ABE(alpha) : {dict(spr['abe'])}")
L.append(f" ALPHA eqn : {dict(spr['alpha'].most_common())}")
L.append(f" mag filter : {dict(spr['magfilt'].most_common())}")
L.append(f" top sizes WxH : {[(f'{w}x{ht}',n) for (w,ht),n in spr['sizes'].most_common(8)]}")
return "\n".join(L)
if __name__ == "__main__":
if len(sys.argv) < 2:
print(__doc__); sys.exit(2)
h, kicks, spr = census(sys.argv[1])
out = fmt(h, kicks, spr)
print(out)
if "--report" in sys.argv:
p = sys.argv[sys.argv.index("--report")+1]
with open(p, "w") as f: f.write(out + "\n")
print(f"\nwrote report -> {p}")
+90
View File
@@ -0,0 +1,90 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch351 step 1: isolate reciprocal quantization from S1 under-interpolation banding.
Compares the RTL FB dump (sh3_real_fb_out.mem, written by tb_top_psmct32_sh3_real_draw_demo) against TWO host
references over the identical crop geometry:
FLOAT — ideal perspective (the Codex pixel-diff oracle).
RECIP-8b — RTL-FAITHFUL: fixed-point vertex attrs + the 8-bit gs_reciprocal_stub (gs_make_sh3_real_draw_fixture).
Verdict:
- RTL ≈ RECIP-8b (tight) AND RECIP-8b ≠ FLOAT (loose) -> the residual is RECIPROCAL QUANTIZATION. Widening
gs_reciprocal_stub IDX_BITS should fix it (Ch351 step 2).
- RTL ≠ RECIP-8b (still loose) -> S1 attribute UNDER-INTERPOLATION beyond the reciprocal.
Usage: gs_ch351_oracle.py (run gs_make_sh3_real_draw_fixture.py + the TB first to produce the .mem files)
"""
import sys, os
ROOT=os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),".."))
DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo")
FBPXW, CH, TW, TH = 256, 120, 512, 512
def loadmem(name):
return [int(l,16) for l in open(os.path.join(DATA,name)) if l.strip() and not l.strip().startswith("//")]
def main():
fb = loadmem("sh3_real_fb_out.mem")
rmF = loadmem("sh3_real_refmap.mem")
rmR = loadmem("sh3_real_refmap_recip.mem")
idxw = loadmem("sh3_real_idx.mem")
pal = loadmem("sh3_real_pal.mem")
def sh3_idx(u,v):
lin=v*TW+u; return (idxw[lin>>2] >> (8*(lin&3))) & 0xFF
def exp_cell(u,v):
return pal[sh3_idx(u,v)] & 0xFFFFFF
palset = set(p & 0xFFFFFF for p in pal)
def histo(refmap, label):
hist=[0]*5; ihist=[0]*5; tot=0; itot=0; exact_color=0; clut_bad=0
sdu=sdv=0.0; nd=0
for o in range(FBPXW*CH):
rm=refmap[o]
if not (rm>>31): continue
interior=(rm>>30)&1; tu=(rm>>9)&0x1FF; tv=rm&0x1FF
col = fb[o] & 0xFFFFFF
tot+=1
if col not in palset: clut_bad+=1
if col == exp_cell(tu,tv): exact_color+=1
D=99; mdu=mdv=0
for rad in range(0,4):
for du in range(-rad,rad+1):
for dv in range(-rad,rad+1):
if max(abs(du),abs(dv))!=rad or D!=99: continue
ttu=tu+du; ttv=tv+dv
if 0<=ttu<TW and 0<=ttv<TH and col==exp_cell(ttu,ttv):
D=rad; mdu=du; mdv=dv
hist[D if D<=3 else 4]+=1
if D<=3: sdu+=mdu; sdv+=mdv; nd+=1
if interior: itot+=1; ihist[D if D<=3 else 4]+=1
print(f"== {label} == covered={tot} interior={itot}")
print(f" ALL hist D0={hist[0]} D1={hist[1]} D2={hist[2]} D3={hist[3]} none={hist[4]} "
f"<=1tex={100.0*(hist[0]+hist[1])/max(1,tot):.1f}%")
print(f" INT hist D0={ihist[0]} D1={ihist[1]} D2={ihist[2]} D3={ihist[3]} none={ihist[4]} "
f"<=1tex={100.0*(ihist[0]+ihist[1])/max(1,itot):.1f}%")
print(f" exact-color-match={100.0*exact_color/max(1,tot):.1f}% clut_bad={clut_bad} "
f"mean-delta=({sdu/max(1,nd):.3f},{sdv/max(1,nd):.3f})")
return ihist
rmA = loadmem("sh3_real_refmap_affine.mem")
print("RTL FB dump vs the references (same geometry):\n")
histo(rmF, "RTL-vs-FLOAT (ideal perspective)")
print()
ih = histo(rmR, "RTL-vs-RECIP8b (perspective + 8-bit reciprocal)")
print()
iha = histo(rmA, "RTL-vs-AFFINE (per-vertex divide + linear u,v)")
print()
# also: how different are the two references themselves (FLOAT vs RECIP8b) — the reciprocal's own error
diff=sum(1 for o in range(FBPXW*CH) if (rmF[o]>>31) and (rmF[o]&0x3FFFF)!=(rmR[o]&0x3FFFF))
covered=sum(1 for o in range(FBPXW*CH) if rmF[o]>>31)
print(f"FLOAT vs RECIP8b reference texel disagreement: {diff}/{covered} ({100.0*diff/max(1,covered):.1f}%) "
f"— the reciprocal LUT's intrinsic error on this geometry")
print()
rtl_tight = (ih[2]+ih[3]+ih[4]) < 0.05*max(1,sum(ih))
if rtl_tight:
print("VERDICT: RTL matches the RTL-faithful 8-bit-reciprocal reference -> residual is RECIPROCAL")
print(" QUANTIZATION. Ch351 step 2: widen gs_reciprocal_stub IDX_BITS (10/11/12).")
else:
print("VERDICT: RTL does NOT match even the 8-bit-reciprocal reference -> S1 attribute UNDER-")
print(" INTERPOLATION banding beyond the reciprocal. Ch351: fix S1 interpolation.")
return 0
if __name__ == "__main__":
sys.exit(main())
+109
View File
@@ -0,0 +1,109 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch347 authentic asset extractor: real Silent Hill 3 PSMT8 texture + real CLUT.
Codex target A: "authentic SH3 palettized texture + real palette rendered through chosen sprite geometry"
— NOT a faithful SH3 draw. We pick a CLEAN candidate whose texture is a NATIVE PSMT8 upload (linear payload,
no GS-memory model needed) and whose CLUT is a clean 256-entry PSMCT32 upload, both validated by rendering to
a coherent SH3 surface (see captures/gs/silenthill3/extracted/*.png).
Default candidate (dump 224139): PSMT8 tex tbp=13824 (128x128, native upload, 99 distinct indices) + CLUT
cbp=14282 (16x16=256 PSMCT32, read LINEARLY — csm=0 but linear order renders coherent; CSM1 bit-swap scrambles).
Primary render = DECAL (opaque): the authentic CLUT RGB is kept BYTE-FOR-BYTE; alpha (SH3's real ~0x04) is
IGNORED by the render mode, never rewritten (Codex guardrail). Emits the index texture, the CLUT, and a
software DECAL reference FB for the pre-fit pixel-diff.
Outputs (LOCAL, dump-derived -> gitignored) into sim/data/top_psmct32_raster_demo/:
sh3_tex_idx.mem 128x128 PSMT8 indices, 4 per 32-bit word (low->high byte), $readmemh
sh3_clut.mem 256 PSMCT32 ABGR CLUT entries (one per line) — authentic RGB, authentic alpha
sh3_ref.mem software DECAL reference FB 128x128 PSMCT32 (CLUT[idx] RGB, opaque) for pixel-diff
sh3_authentic.png visual confirmation (DECAL)
"""
import sys, os, glob, struct
HERE=os.path.dirname(os.path.abspath(__file__)); ROOT=os.path.normpath(os.path.join(HERE,".."))
sys.path.insert(0, os.path.join(ROOT,"tools")); import gs_texture_residency as R
DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo")
EXTR=os.path.join(ROOT,"captures","gs","silenthill3","extracted")
def main(argv):
dump = argv[1] if len(argv)>1 else None
if not dump:
cands = glob.glob(os.path.join(ROOT,"captures","gs","silenthill3","*224139*.gs.zst"))
if not cands: sys.exit("no SH3 dump found; pass the .gs.zst path")
dump = cands[0]
TBP = int(argv[2]) if len(argv)>2 else 13824 # native PSMT8 texture base
CBP = int(argv[3]) if len(argv)>3 else 14282 # CLUT base
W = H = 128
d,h,events,uploads,runs,vram = R.collect(dump,0)
# texture: latest NATIVE PSMT8 upload to TBP (linear row-major payload)
tu = [u for u in uploads if u["dbp"]==TBP and u["dpsm"]==0x13]
if not tu: sys.exit(f"no native PSMT8 (dpsm=0x13) upload to tbp={TBP} — not a clean candidate")
tup = max(tu, key=lambda u:u["idx"]); tex = d[tup["blob_range"][0]:tup["blob_range"][1]]
if len(tex) < W*H: sys.exit(f"texture payload {len(tex)}B < {W*H}")
idx = [tex[y*W+x] for y in range(H) for x in range(W)]
# CLUT: latest PSMCT32 256-entry upload to CBP, read LINEARLY
cu = [u for u in uploads if u["dbp"]==CBP and u["dpsm"]==0x00]
if not cu: sys.exit(f"no PSMCT32 CLUT upload to cbp={CBP}")
cup = max(cu, key=lambda u:u["idx"]); cb = d[cup["blob_range"][0]:cup["blob_range"][1]]
pal = [int.from_bytes(cb[i*4:i*4+4],"little") for i in range(256)]
# software DECAL reference = EXACTLY what the hardware DECAL emit stores: the texel = CLUT[idx], byte-for-
# byte authentic (RGB AND the real ~0x04 alpha — nothing rewritten). The pre-fit TB pixel-diffs RGB (the
# authentic-art claim); the A byte is preserved here but not the focus. The PNG forces opaque for display.
ref = [pal[i] for i in idx]
os.makedirs(DATA, exist_ok=True); os.makedirs(EXTR, exist_ok=True)
# PSMT8 indices packed 4/word (byte0=lowest x), $readmemh as 32-bit
with open(os.path.join(DATA,"sh3_tex_idx.mem"),"w") as f:
f.write(f"// Ch347 LOCAL authentic SH3 PSMT8 indices {W}x{H} (native upload tbp={TBP}). gitignored.\n")
for w in range(0, W*H, 4):
word = idx[w] | (idx[w+1]<<8) | (idx[w+2]<<16) | (idx[w+3]<<24)
f.write(f"{word:08x}\n")
with open(os.path.join(DATA,"sh3_clut.mem"),"w") as f:
f.write(f"// Ch347 LOCAL authentic SH3 CLUT 256xPSMCT32 (cbp={CBP}, linear). RGB+alpha authentic. gitignored.\n")
for p in pal: f.write(f"{p & 0xFFFFFFFF:08x}\n")
with open(os.path.join(DATA,"sh3_ref.mem"),"w") as f:
f.write(f"// Ch347 LOCAL SW DECAL reference FB {W}x{H} (CLUT[idx] RGB, opaque display). gitignored.\n")
for p in ref: f.write(f"{p & 0xFFFFFFFF:08x}\n")
try:
from PIL import Image
img=Image.new('RGBA',(W,H)); img.putdata([(p&0xFF,(p>>8)&0xFF,(p>>16)&0xFF,0xFF) for p in ref])
img.save(os.path.join(EXTR,"sh3_authentic.png"))
except Exception as e:
print("(PIL unavailable, skipped PNG:", e, ")")
# --- Ch347 (ii): a DETERMINISTIC 64x64 authentic CROP (Codex first-silicon target). ---
# The crop is a DIRECT SUBSET of the extracted indices (no resample/transform); the CLUT is unchanged.
# Deterministic rule: the 64x64 window (stride 8) with the MOST distinct indices, tie-break smallest (cy,cx)
# — guarantees real content + a reproducible, reported origin.
CW = CH = 64
best = (-1, 0, 0)
for cy in range(0, H-CH+1, 8):
for cx in range(0, W-CW+1, 8):
s = set(idx[(cy+ly)*W + (cx+lx)] for ly in range(CH) for lx in range(CW))
if len(s) > best[0]: best = (len(s), cx, cy)
_, CX, CY = best
cidx = [idx[(CY+ly)*W + (CX+lx)] for ly in range(CH) for lx in range(CW)]
cref = [pal[i] for i in cidx]
with open(os.path.join(DATA,"sh3_tex_idx64.mem"),"w") as f:
f.write(f"// Ch347 LOCAL authentic SH3 PSMT8 64x64 CROP @({CX},{CY}) of tbp={TBP} 128x128 (direct subset). gitignored.\n")
for w in range(0, CW*CH, 4):
f.write(f"{cidx[w] | (cidx[w+1]<<8) | (cidx[w+2]<<16) | (cidx[w+3]<<24):08x}\n")
with open(os.path.join(DATA,"sh3_ref64.mem"),"w") as f:
f.write(f"// Ch347 LOCAL SW DECAL reference 64x64 CROP @({CX},{CY}) (CLUT[idx] RGB). gitignored.\n")
for p in cref: f.write(f"{p & 0xFFFFFFFF:08x}\n")
try:
from PIL import Image
ci=Image.new('RGBA',(CW,CH)); ci.putdata([(p&0xFF,(p>>8)&0xFF,(p>>16)&0xFF,0xFF) for p in cref])
ci.save(os.path.join(EXTR,"sh3_authentic64.png"))
except Exception: pass
print(f"[Ch347] SH3 authentic asset: tex tbp={TBP} {W}x{H} ({len(set(idx))} distinct indices), CLUT cbp={CBP} "
f"({len(set(pal))} colors). DECAL reference emitted.")
print(f"[Ch347] 64x64 CROP @({CX},{CY}) [deterministic max-distinct window]: {len(set(cidx))} distinct indices, "
f"{len(set(cref))} colors -> sh3_tex_idx64.mem, sh3_ref64.mem")
print(f"[Ch347] -> {DATA}/sh3_tex_idx*.mem, sh3_clut.mem, sh3_ref*.mem (+ {EXTR}/sh3_authentic*.png) — all LOCAL")
if __name__ == "__main__":
main(sys.argv)
+122
View File
@@ -0,0 +1,122 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch344 authentic SPRITE extractor (brick 1).
Selects the EARLIEST contiguous run of v1-eligible sprites from a .gs dump and emits a structured sprite
list + aggregate report. v1 eligibility (per the Ch344 census-gated scope) — FAIL CLOSED on anything else:
* SPRITE primitive (PRIM type 6), TME=1 (textured), FST=1 (UV/affine)
* TEX0 PSM = PSMCT32 (no CLUT / no PSMT8/PSMT4 / no PSMCT16)
* ABE=1 (alpha) — blend equation is DECLARED source-over (ALPHA lives in the GS freeze state, absent
from the packet stream; we emit ALPHA ourselves to the feeder)
* small: width,height <= MAX_SPRITE_PX (excludes the fullscreen/scissored/guard-band blits)
* single shared TEX0 TBP across the run (a TBP change / re-upload ends the run — never silently mixed)
Output is the sprite geometry/UV/color (dump-derived -> the .sprites file is LOCAL) plus an AGGREGATE
report (committable). This is extraction ONLY; rendering/translation to the feeder is brick 2+.
Usage: gs_extract_sprites.py <dump.gs[.xz|.zst]> [--max N] [--out scene.sprites] [--report r.txt]
"""
import sys, os
from collections import Counter
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from gs_parse import parse_dump
PSMCT32 = 0x00
MAX_SPRITE_PX = 64
DEF_MAX = 32
def dec_tex0(v):
return dict(tbp=v&0x3FFF, tbw=(v>>14)&0x3F, psm=(v>>20)&0x3F,
tw=1<<((v>>26)&0xF), th=1<<((v>>30)&0xF), tcc=(v>>34)&1, tfx=(v>>35)&3,
cpsm=(v>>51)&0xF, csm=(v>>55)&1)
def extract(events, max_spr):
cur = {"type":None,"tme":0,"abe":0,"fst":0}
tex0 = {1:None, 2:None}; xyoff = {1:(0,0), 2:(0,0)}; ctxt = 0
uvbuf = []; xyzbuf = []
seg = []; seg_tbp = None; started = False; stop = None; first = last = -1
rejected = Counter()
def eligible(t0):
return t0 is not None and cur["tme"] and cur["fst"] and t0["psm"]==PSMCT32 and cur["abe"] \
and t0["csm"]==0 and t0["cpsm"]==0 # csm/cpsm: no CLUT in play
for e in events:
if e.kind != "GSREG": continue
a, v = e.addr, e.value
if a == 0x00:
cur = {"type":v&7,"tme":(v>>4)&1,"abe":(v>>6)&1,"fst":(v>>8)&1}; ctxt=(v>>9)&1
uvbuf=[]; xyzbuf=[]
elif a in (0x06,0x07): tex0[1 if a==0x06 else 2] = dec_tex0(v)
elif a in (0x18,0x19): xyoff[1 if a==0x18 else 2] = (v&0xFFFF,(v>>32)&0xFFFF)
elif a == 0x03: # UV
uvbuf.append((v & 0x3FFF, (v>>14) & 0x3FFF)) # U,V in 10.4 fixed (14b)
elif a == 0x05: # XYZ2 vertex kick
if cur["type"] != 6: continue
xyzbuf.append(v)
if len(xyzbuf) < 2: continue
cx = 1 if ctxt==0 else 2; t0 = tex0[cx]; ox, oy = xyoff[cx]
x0=((xyzbuf[0]&0xFFFF)-ox)//16; y0=(((xyzbuf[0]>>16)&0xFFFF)-oy)//16
x1=((xyzbuf[1]&0xFFFF)-ox)//16; y1=(((xyzbuf[1]>>16)&0xFFFF)-oy)//16
w=abs(x1-x0); h=abs(y1-y0)
uv = uvbuf[-2:] if len(uvbuf)>=2 else [(0,0),(0,0)]
xyzbuf=[]; uvbuf=[]
ok = eligible(t0) and w<=MAX_SPRITE_PX and h<=MAX_SPRITE_PX and w>0 and h>0
if not ok:
if started: stop = "run ended: next sprite not v1-eligible"; break
if t0 is None: rejected["no_tex0"]+=1
elif not cur["tme"]: rejected["untextured"]+=1
elif t0["psm"]!=PSMCT32: rejected[f"psm_0x{t0['psm']:02x}"]+=1
elif not cur["fst"]: rejected["fst0_stq"]+=1
elif not cur["abe"]: rejected["no_abe"]+=1
elif t0["csm"] or t0["cpsm"]: rejected["clut"]+=1
elif w>MAX_SPRITE_PX or h>MAX_SPRITE_PX: rejected["too_big"]+=1
else: rejected["other"]+=1
continue
if not started:
seg_tbp = t0["tbp"]; seg_t0 = t0; started=True; first=e.idx
if t0["tbp"] != seg_tbp:
stop = "run ended: TEX0 TBP changed (re-upload)"; break
seg.append(dict(x0=min(x0,x1), y0=min(y0,y1), x1=max(x0,x1), y1=max(y0,y1), w=w, h=h,
u0=uv[0][0]/16.0, v0=uv[0][1]/16.0, u1=uv[1][0]/16.0, v1=uv[1][1]/16.0))
last=e.idx
if len(seg) >= max_spr: stop="hit --max"; break
meta = dict(tbp=seg_tbp, tex0=(seg_t0 if started else None), first=first, last=last,
stop=stop, rejected=dict(rejected))
return seg, meta
def main(argv):
if len(argv) < 2: print(__doc__); return 2
path = argv[1]
max_spr = int(argv[argv.index("--max")+1]) if "--max" in argv else DEF_MAX
h, events = parse_dump(path)
seg, meta = extract(events, max_spr)
R = [f"SPRITE EXTRACT — serial={h.serial!r} crc=0x{h.crc:08x} (v1: PSMCT32/UV/ABE, <= {MAX_SPRITE_PX}px)"]
if not seg:
R.append(f"NO v1-eligible sprite run selected. rejections: {meta['rejected']}")
else:
t0 = meta["tex0"]
xs=[s['x0'] for s in seg]+[s['x1'] for s in seg]; ys=[s['y0'] for s in seg]+[s['y1'] for s in seg]
sizes=Counter((s['w'],s['h']) for s in seg)
R.append(f"selected {len(seg)} sprites, events #{meta['first']}..#{meta['last']}, stop: {meta['stop']}")
R.append(f" TEX0: TBP={t0['tbp']} TBW={t0['tbw']} {t0['tw']}x{t0['th']} PSMCT32 TFX={t0['tfx']} TCC={t0['tcc']}")
R.append(f" screen bbox: x[{min(xs)}..{max(xs)}] y[{min(ys)}..{max(ys)}]")
R.append(f" sizes WxH: {[(f'{w}x{hh}',n) for (w,hh),n in sizes.most_common(6)]}")
u=[s['u0'] for s in seg]+[s['u1'] for s in seg]; vv=[s['v0'] for s in seg]+[s['v1'] for s in seg]
R.append(f" UV range: u[{min(u):.1f}..{max(u):.1f}] v[{min(vv):.1f}..{max(vv):.1f}] (texel coords)")
rep = "\n".join(R)
print(rep)
if "--report" in argv:
open(argv[argv.index("--report")+1], "w").write(rep+"\n")
if "--out" in argv and seg:
op = argv[argv.index("--out")+1]
with open(op,"w") as f:
t0=meta["tex0"]
f.write(f"# Ch344 LOCAL authentic sprite run ({len(seg)}) — dump-derived. TBP={t0['tbp']} {t0['tw']}x{t0['th']} PSMCT32\n")
f.write(f"tex0 {t0['tbp']} {t0['tbw']} {t0['tw']} {t0['th']} {t0['tfx']}\n")
for s in seg:
f.write(f"sprite {s['x0']} {s['y0']} {s['x1']} {s['y1']} {s['u0']:.1f} {s['v0']:.1f} {s['u1']:.1f} {s['v1']:.1f}\n")
print(f"\nwrote sprite list -> {op}")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+146
View File
@@ -0,0 +1,146 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch345b authentic 32-glyph segment re-pack (content normalization).
The cubes-dump glyph sprites MINIFY a 256x256 PSMCT32 atlas (256KB >> the <=64KiB BRAM VRAM) and 31/32 wrap
past v=255. The dump's actual texture wrap is in the GS freeze state (absent from the packet stream), so we
DECLARE REPEAT (kept visible in the report). For each glyph we compute, per SCREEN pixel, the exact texel the
hardware samples (gs_stub `dda_uv`: coord = (u0 + ((du_dx*(x-x0))>>>16)) & 0x7FF, du_dx=((u1-u0)<<16)/(x1-x0),
then REPEAT mask & 0xFF for the 256-wide atlas) and BAKE that texel into a dense glyph-sized sub-texture.
Packing those dense glyphs + a 1:1 UV remap reproduces the EXACT rendered pixels (nearest of the original
minified sampling == 1:1 of the baked dense glyph, by construction) while fitting VRAM.
Outputs into sim/data/top_psmct32_raster_demo/ (LOCAL, dump-derived -> gitignored):
glyph_atlas.mem packed dense-glyph atlas (PSMCT32, $readmemh into VRAM at GLYPH_TBP)
glyph_sprites.mem feeder SPRITE staging (sprite_mode word0[33]): grid screen layout + 1:1 packed UV
glyph_ref.mem SOFTWARE REFERENCE FB: original-atlas + declared-REPEAT + nearest render over the BG,
for the pre-fit TB pixel-diff. No authentic claim until that diff passes.
DECLARED: REPEAT wrap, nearest sampling, white(0x80) MODULATE tint (the dump's per-vertex RGBA is a freeze-
state value; identity tint shows the raw glyph texels)."""
import sys, os, struct
HERE = os.path.dirname(os.path.abspath(__file__)); ROOT = os.path.normpath(os.path.join(HERE, ".."))
DATA = os.path.join(ROOT, "sim", "data", "top_psmct32_raster_demo")
# Ch346: source the texture RESIDENT at the glyph-sprite draws (epoch 2, the real font),
# NOT the first upload (epoch 1 = the cube checker the demo ping-pongs into the same TBP).
# tex0_font.bin is extracted by the residency preflight; fall back to the old blob only if absent.
_FONT = os.path.join(ROOT, "captures", "gs", "cubes", "extracted", "tex0_font.bin")
ATLAS = _FONT if os.path.exists(_FONT) else os.path.join(ROOT, "captures", "gs", "cubes", "extracted", "tex0_blob.bin")
SPRITES = os.path.join(ROOT, "captures", "gs", "cubes", "extracted", "cubes.sprites")
ATLAS_W = ATLAS_H = 256
FB_W = FB_H = 128 # 128x128 fits the 32-glyph grid at native (1:1) size
FBW = 2 # FB width in 64-px pages (128-wide FB)
GLYPH_TBP = 256 # packed atlas VRAM word base = 256*64 = 16384 (right after the 128x128 FB = 16384 words)
BG = 0xFF0000C0 # opaque blue background (PSMCT32 {A,B,G,R} = FF,00,00,C0)
GAP = 2
def tdiv(a, b): # truncate-toward-zero (matches SystemVerilog signed `/`)
if b == 0: return 0
q = abs(a) // abs(b)
return q if (a < 0) == (b < 0) else -q
def htexel(p, p0, c0, e0, e1, span): # gs_stub dda_uv (one axis) -> REPEAT-masked atlas index
step = tdiv((e1 - e0) << 16, span) if span != 0 else 0
coord = (c0 + ((step * (p - p0)) >> 16)) & 0x7FF # 11-bit truncation, arithmetic >>16
return coord & 0xFF # REPEAT, 256-wide power-of-two mask
def mod8(t, c): p = (t * c) >> 7; return 0xFF if p > 0xFF else p
def srcover(cs, cd, as_): # gs_alpha_blend: ((cs-cd)*min(as,128)>>7)+cd, clamp
ae = 128 if as_ > 128 else as_
v = (((cs - cd) * ae) >> 7) + cd
return 0 if v < 0 else 255 if v > 255 else v
for p in (ATLAS, SPRITES):
if not os.path.exists(p): sys.exit(f"missing local input: {p}")
# Ch346 fail-closed gate: refuse to repack a non-glyph-plausible atlas (this is what caught Ch345b — the
# original tex0_blob.bin was the cube CHECKER, no alpha mask). The resident font epoch passes (mask-like).
from gs_texture_residency import payload_stats, font_like
_ok, _why = font_like(payload_stats(open(ATLAS, "rb").read(), PSMCT32 := 0x00))
if not _ok:
sys.exit(f"[Ch345b] REFUSING: atlas {os.path.basename(ATLAS)} is not glyph-plausible ({_why}).\n"
f" Pick the RESIDENT font epoch first: gs_texture_residency.py <dump> finds it; the wrong\n"
f" epoch (cube checker) has no alpha mask. See project_ch345b_content_finding memory.")
atlas = list(struct.unpack(f"<{ATLAS_W*ATLAS_H}I", open(ATLAS, "rb").read()))
glyphs = []
for ln in open(SPRITES):
t = ln.split()
if t and t[0] == "sprite":
x0,y0,x1,y1,u0,v0,u1,v1 = (int(round(float(v))) for v in t[1:9])
glyphs.append(dict(x0=x0,y0=y0,x1=x1,y1=y1,u0=u0,v0=v0,u1=u1,v1=v1))
# --- bake each glyph: dense screen-sized sub-texture of the EXACT sampled texels ---
for g in glyphs:
w = abs(g["x1"]-g["x0"]); h = abs(g["y1"]-g["y0"]); g["w"], g["h"] = w, h
sx0 = min(g["x0"],g["x1"]); sy0 = min(g["y0"],g["y1"])
baked = []
for ly in range(h):
for lx in range(w):
ut = htexel(sx0+lx, g["x0"], g["u0"], g["u0"], g["u1"], g["x1"]-g["x0"])
vt = htexel(sy0+ly, g["y0"], g["v0"], g["v0"], g["v1"], g["y1"]-g["y0"])
baked.append(atlas[vt*ATLAS_W + ut])
g["baked"] = baked
# --- pack dense glyphs into a grid atlas, and lay them out on a compact 64x64 screen ---
COLS = 8
cellw = max(g["w"] for g in glyphs); cellh = max(g["h"] for g in glyphs)
rows = (len(glyphs) + COLS - 1) // COLS
PACK_W = COLS * cellw; PACK_H = rows * cellh
TBW = (PACK_W + 63) // 64
pack = [0] * (PACK_W * PACK_H)
for i, g in enumerate(glyphs):
pu = (i % COLS) * cellw; pv = (i // COLS) * cellh; g["pu"], g["pv"] = pu, pv
# screen grid (1:1 so du_dx = 1<<16); must fit FB_W x FB_H
g["sx0"] = GAP + (i % COLS) * (cellw + 1); g["sy0"] = GAP + (i // COLS) * (cellh + 1)
g["sx1"] = g["sx0"] + g["w"]; g["sy1"] = g["sy0"] + g["h"]
for ly in range(g["h"]):
for lx in range(g["w"]):
pack[(pv+ly)*PACK_W + (pu+lx)] = g["baked"][ly*g["w"]+lx]
maxx = max(g["sx1"] for g in glyphs); maxy = max(g["sy1"] for g in glyphs)
if maxx > FB_W or maxy > FB_H:
sys.exit(f"screen grid {maxx}x{maxy} exceeds {FB_W}x{FB_H} — raise FB or COLS")
# --- software reference FB: render baked glyphs (identity tint) source-over the BG ---
ref = [BG] * (FB_W * FB_H)
for g in glyphs:
for ly in range(g["h"]):
for lx in range(g["w"]):
t = g["baked"][ly*g["w"]+lx]; aA = (t >> 24) & 0xFF
cs_r, cs_g, cs_b = t & 0xFF, (t>>8)&0xFF, (t>>16)&0xFF # white tint = identity MODULATE
cd = BG; cd_r, cd_g, cd_b = cd & 0xFF, (cd>>8)&0xFF, (cd>>16)&0xFF
outv = (aA<<24) | (srcover(cs_b,cd_b,aA)<<16) | (srcover(cs_g,cd_g,aA)<<8) | srcover(cs_r,cd_r,aA)
ref[(g["sy0"]+ly)*FB_W + (g["sx0"]+lx)] = outv
# --- feeder SPRITE staging (sprite_mode word0[33]): grid screen + 1:1 packed UV ---
def frame_1(fbw): return (fbw & 0x3F) << 16
def alpha_srcover(): return 0x44
def tex0_pack(tbp, tbw, tw, th, tfx): return tbp | (tbw<<14) | (0<<20) | (tw<<26) | (th<<30)
def uvd(u, v): return ((u<<4)&0x3FFF) | (((v<<4)&0x3FFF)<<14)
def xyz2(x, y): return ((x&0xFFF)<<4) | ((y&0xFFF)<<20)
TW = max(PACK_W-1, 1).bit_length(); TH = max(PACK_H-1, 1).bit_length() # log2 ceil for TEX0
stg = []
stg.append((len(glyphs) & 0xFFFF) | (1 << 33))
stg.append(frame_1(FBW)); stg.append(alpha_srcover()); stg.append(0); stg.append(0)
stg.append(tex0_pack(GLYPH_TBP, TBW, TW, TH, 0)); stg.append(6 | (1<<4) | (1<<6)) # SPRITE+TME+ABE
for g in glyphs:
tint = 0x80808080
stg += [tint, uvd(g["pu"], g["pv"]), xyz2(g["sx0"], g["sy0"]),
tint, uvd(g["pu"]+g["w"], g["pv"]+g["h"]), xyz2(g["sx1"], g["sy1"])]
if len(stg) > 256: sys.exit(f"staging {len(stg)} > 256 words")
os.makedirs(DATA, exist_ok=True)
with open(os.path.join(DATA, "glyph_atlas.mem"), "w") as f:
f.write(f"// Ch345b LOCAL packed glyph atlas {PACK_W}x{PACK_H} PSMCT32 (declared REPEAT). gitignored.\n")
for p in pack: f.write(f"{p & 0xFFFFFFFF:08x}\n")
with open(os.path.join(DATA, "glyph_sprites.mem"), "w") as f:
f.write(f"// Ch345b LOCAL feeder SPRITE staging: {len(glyphs)} re-packed authentic glyphs. gitignored.\n")
for w in stg: f.write(f"{w & 0xFFFFFFFFFFFFFFFF:016x}\n")
for _ in range(256 - len(stg)): f.write(f"{0:016x}\n")
with open(os.path.join(DATA, "glyph_ref.mem"), "w") as f:
f.write(f"// Ch345b LOCAL SOFTWARE REFERENCE FB {FB_W}x{FB_H} (orig atlas + REPEAT + nearest). gitignored.\n")
for p in ref: f.write(f"{p & 0xFFFFFFFF:08x}\n")
print(f"[Ch345b] glyphs={len(glyphs)} packed atlas={PACK_W}x{PACK_H} ({PACK_W*PACK_H*4}B, TBW={TBW} TW={TW} TH={TH})")
print(f"[Ch345b] screen grid {COLS} cols, bbox {maxx}x{maxy} in {FB_W}x{FB_H}; staging {len(stg)} words")
print(f"[Ch345b] DECLARED: REPEAT wrap + nearest + white MODULATE. atlas/sprites/ref -> {DATA} (LOCAL)")
+87
View File
@@ -0,0 +1,87 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch349 GS local-memory model (the bridge Codex framed).
A faithful byte-level model of GS local memory (4 MiB VRAM) so a draw's SAMPLED texture can be reconstructed
as the real hardware sees it — even when the asset is STREAMED in one PSM (e.g. PSMCT32, fast word writes)
and SAMPLED in another (e.g. PSMT8 indexed). Both operations address the SAME physical bytes through their
respective GS swizzles; modeling that crossover is the whole point.
Swizzle math is ported verbatim from the project's own RTL (which is in turn locked to PCSX2 GSTables.cpp):
- PSMCT32 write/read <- rtl/gif_gs/gs_swizzle_psmct32_stub.sv (block grid + byte_in_block = yb*32+xb*4)
- PSMT8 read <- rtl/gif_gs/gs_swizzle_psmt8_stub.sv (block grid + 16x16 columnTable8)
Address convention matches the codebase: VRAM byte base = PTR*256 (TBP0/DBP/CBP are 14-bit, *256 == 4 MiB),
and PTRs are page-aligned (multiple of 32) so page_index*8192 composes correctly off that base.
"""
# block grid is shared by PSMCT32 and PSMT8 (4 rows x 8 cols), value = block index within page
BLOCK = [
[ 0, 1, 4, 5,16,17,20,21],
[ 2, 3, 6, 7,18,19,22,23],
[ 8, 9,12,13,24,25,28,29],
[10,11,14,15,26,27,30,31],
]
# PSMT8 within-block 16x16 -> byte permutation (columnTable8)
COL8 = [
[ 0, 4, 16, 20, 32, 36, 48, 52, 2, 6, 18, 22, 34, 38, 50, 54],
[ 8, 12, 24, 28, 40, 44, 56, 60, 10, 14, 26, 30, 42, 46, 58, 62],
[ 33, 37, 49, 53, 1, 5, 17, 21, 35, 39, 51, 55, 3, 7, 19, 23],
[ 41, 45, 57, 61, 9, 13, 25, 29, 43, 47, 59, 63, 11, 15, 27, 31],
[ 96,100,112,116, 64, 68, 80, 84, 98,102,114,118, 66, 70, 82, 86],
[104,108,120,124, 72, 76, 88, 92,106,110,122,126, 74, 78, 90, 94],
[ 65, 69, 81, 85, 97,101,113,117, 67, 71, 83, 87, 99,103,115,119],
[ 73, 77, 89, 93,105,109,121,125, 75, 79, 91, 95,107,111,123,127],
[128,132,144,148,160,164,176,180,130,134,146,150,162,166,178,182],
[136,140,152,156,168,172,184,188,138,142,154,158,170,174,186,190],
[161,165,177,181,129,133,145,149,163,167,179,183,131,135,147,151],
[169,173,185,189,137,141,153,157,171,175,187,191,139,143,155,159],
[224,228,240,244,192,196,208,212,226,230,242,246,194,198,210,214],
[232,236,248,252,200,204,216,220,234,238,250,254,202,206,218,222],
[193,197,209,213,225,229,241,245,195,199,211,215,227,231,243,247],
[201,205,217,221,233,237,249,253,203,207,219,223,235,239,251,255],
]
def ct32_addr(dbp, dbw, x, y):
"""Byte address of PSMCT32 pixel (x,y) in a buffer based at dbp (256-byte units), width dbw (64px units)."""
page_index = (y >> 5) * dbw + (x >> 6)
block_idx = BLOCK[(y >> 3) & 3][(x >> 3) & 7]
return dbp*256 + page_index*8192 + block_idx*256 + (y & 7)*32 + (x & 7)*4
def psmt8_addr(tbp, fbw, x, y):
"""Byte address of PSMT8 pixel (x,y) in a buffer based at tbp (256-byte units), width fbw (64px units)."""
page_index = (y >> 6) * (fbw >> 1) + (x >> 7)
block_idx = BLOCK[(y >> 4) & 3][(x >> 4) & 7]
return tbp*256 + page_index*8192 + block_idx*256 + COL8[y & 15][x & 15]
class LocalMem:
"""4 MiB GS VRAM. Seed from the dump's initial snapshot, then replay host->local uploads in order."""
SIZE = 0x400000
def __init__(self, init_bytes=None):
if init_bytes is not None and len(init_bytes) >= self.SIZE:
self.m = bytearray(init_bytes[:self.SIZE])
else:
self.m = bytearray(self.SIZE)
def write_image_ct32(self, dbp, dbw, dsax, dsay, w, h, words):
"""Host->local upload in PSMCT32: raster-order words fill (dsax..+w)x(dsay..+h) via ct32 swizzle.
`words` may be shorter than w*h (partial transfer); fill stops when exhausted (GS behaviour)."""
n = len(words); i = 0
for py in range(h):
for px in range(w):
if i >= n: return
a = ct32_addr(dbp, dbw, dsax+px, dsay+py)
if 0 <= a and a+4 <= self.SIZE:
self.m[a:a+4] = (words[i] & 0xFFFFFFFF).to_bytes(4, "little")
i += 1
def read_psmt8(self, tbp, fbw, tw, th):
out = bytearray(tw*th)
for y in range(th):
r = y*tw
for x in range(tw):
a = psmt8_addr(tbp, fbw, x, y)
out[r+x] = self.m[a] if 0 <= a < self.SIZE else 0
return out
def read_ct32_word(self, dbp, dbw, x, y):
a = ct32_addr(dbp, dbw, x, y)
return int.from_bytes(self.m[a:a+4], "little") if a+4 <= self.SIZE else 0
+169
View File
@@ -0,0 +1,169 @@
#!/usr/bin/env python3
"""Ch343 — build the AUTHENTIC cube board fixtures (LOCAL, dump-derived).
Inputs (local, from the cubes .gs dump pipeline):
captures/gs/cubes/extracted/cube_tex_64.bin 64x64 PSMCT32 cube texture (downscaled)
captures/gs/cubes/extracted/cube_persp.scene.txt translated perspective scene (ps2_feeder)
Outputs into sim/data/top_psmct32_raster_demo/ (gitignored — game-derived content):
payload_cube_setup.mem boot GIF payload: upload the 64x64 cube texture @ TBP=64 (32 KiB EE RAM)
bios_cube_setup.mem one-shot bootlet, QWC = payload qword count
feeder_cube_persp.mem cube perspective staging (ps2_feeder --dump-file cube_persp.scene)
Reuses bake.py's GIF packers + bootlet builder so the framing matches the proven feeder_persp path
exactly; only the texture size (64x64 vs 16x16) and the RAM/QWC budget differ. ABE=0 S1 perspective.
This GENERATOR is committable; its .mem outputs are not (provenance: authentic GS dump content)."""
import os
import sys
import struct
import subprocess
HERE = os.path.dirname(os.path.abspath(__file__))
ROOT = os.path.normpath(os.path.join(HERE, ".."))
DATA = os.path.join(ROOT, "sim", "data", "top_psmct32_raster_demo")
sys.path.insert(0, DATA)
import bake # noqa: E402 (reuse giftag/aplusd/*_pack/bootlet/write_bios_mem)
TEX_BIN = os.path.join(ROOT, "captures", "gs", "cubes", "extracted", "cube_tex_64.bin")
SCENE = os.path.join(ROOT, "captures", "gs", "cubes", "extracted", "cube_persp.scene.txt")
# VRAM layout (64 KiB / 16384 words) — three 64x64 surfaces must NOT alias:
# FB FBP=0 -> words 0..4095 ZBP units = 2048 words
# Z ZBP=2 -> words 4096..8191 (ps2_feeder hardcodes zbuf1_pack(2) + GEQUAL)
# TEX TBP=128-> words 8192..12287 TBP0 units = 64 words
# The scene's translator picked TBP=64 (word 4096), which ALIASES the Z buffer: the render then reads
# texel values (~0xFF000000) as Z and GEQUAL rejects the whole cube. Relocate the texture past Z.
TBP = 128 # word 8192 — clear of FB (0..4095) and Z (4096..8191)
TBW = 1 # 64-wide -> TBW=1 (64-px stride); matches the scene's tex0 stride
W = H = 64
RAM_QWORDS = 2048 # 32 KiB / 16 (EE RAM payload; unrelated to VRAM)
for p in (TEX_BIN, SCENE):
if not os.path.exists(p):
sys.exit(f"missing local input (run the cubes .gs extraction first): {p}")
# --- 64x64 PSMCT32 texels, raster order (ty*64 + tx), little-endian u32 ---
raw = open(TEX_BIN, "rb").read()
if len(raw) != W * H * 4:
sys.exit(f"{TEX_BIN}: expected {W*H*4} bytes, got {len(raw)}")
texels = list(struct.unpack(f"<{W*H}I", raw))
# --- boot GIF payload: BITBLTBUF/TRXPOS/TRXREG/TRXDIR (A+D) + IMAGE upload ---
qw = []
qw.append(bake.giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE)) # 4x A+D
qw.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(TBP, TBW, 0)))
qw.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0, 0)))
qw.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(W, H)))
qw.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0))) # host -> local
n_image = (W * H) // 4 # 4 texels / qword = 1024
qw.append(bake.giftag(n_image, 1, 2, 0, 0)) # IMAGE, EOP=1 (setup-only payload)
for i in range(n_image):
word = 0
for lane in range(4):
word |= (texels[i * 4 + lane] & 0xFFFFFFFF) << (32 * lane)
qw.append(word)
qwc = len(qw) # 6 + 1024 = 1030
if 16 + qwc > RAM_QWORDS:
sys.exit(f"payload {qwc} qw + 16 lead > {RAM_QWORDS} (bump RAM_SIZE_BYTES)")
payload_path = os.path.join(DATA, "payload_cube_setup.mem")
with open(payload_path, "w") as f:
f.write("// Ch343 LOCAL authentic-cube texture boot payload (64x64 PSMCT32 @ TBP=64).\n")
f.write("// game-derived (cubes .gs) -> gitignored. qw 0..15 zero; qw 16.. = GIF upload.\n")
for _ in range(16):
f.write(f"{0:032x}\n")
for x in qw:
f.write(f"{x & ((1 << 128) - 1):032x}\n")
for _ in range(RAM_QWORDS - 16 - qwc):
f.write(f"{0:032x}\n")
# --- one-shot bootlet (same shape as feeder_persp setup; QWC chosen for the cube payload) ---
bake.write_bios_mem(
"bios_cube_setup.mem",
bake.build_textured_demo_bootlet_disp(qwc, bake.CAP_DISPLAY1_HI, bake.CAP_FBW),
f"Ch343 LOCAL authentic-cube setup bootlet (QWC={qwc}). game-derived -> gitignored.")
# --- cube perspective staging via ps2_feeder --dump-file ---
# The translator bound TEX0 TBP0 to a word that aliases the Z buffer; rewrite the tex0 line so the
# render samples the relocated texture (TBP) instead. Single source of truth = the TBP above.
# The translator grabbed the EARLIEST ≤27-tri textured run, which is a WIDE-SHORT strip of ~4 tiny
# cubes (source 280x75 viewport-fit to 64x64 -> squashed to a y:[0..17] band of speckle). Isolate ONE
# cube (CUBE_TRIS contiguous tris) and re-fit it to fill the frame: a legitimate viewport zoom. The
# perspective ST/Q are interpolated linearly in SCREEN space, so a uniform 2D scale of the screen verts
# preserves per-pixel u=S/Q,v=T/Q exactly — faithful, just bigger. The first 6 tris (lines 5..10) are a
# corner-view cube: 3 faces meeting at center vertex (59,13).
CUBE_FIRST = 0 # index of the first persptri of the chosen cube
CUBE_TRIS = 6 # 3 faces x 2 tris
MARGIN = 2 # leave a 2px border in the 64x64 frame
SPAN_PX = (W - 1) - 2*MARGIN
# Two faithful coordinate conversions applied to the selected cube:
# (a) ST normalized->texel: retroDE's gs_persp_uv recovers the TEXEL index directly (expects S/Q in
# 0..TW, as bake.py persp_attrs emits S=u_texel/w). The dump's ST are NORMALIZED (0..1) so S/Q
# lands in [0,1] -> samples only texel (0,0). Scale S by TW, T by TH (what real GS does internally).
# (b) TEX0 -> DECAL (TFX=1): emit the authentic texels directly. The dump's per-vertex color came
# through as a uniform (128,0,0) (translator artifact); MODULATE with it masks G/B. DECAL matches
# the proven Ch342 checkerboard. (TODO: backport (a)+(b) into gs_translate_tex.py --perspective.)
scene_lines = open(SCENE).read().splitlines()
header = [] # comments + persp + tex0 (patched)
tris = [] # token-lists of persptri lines (len 20)
for ln in scene_lines:
tok = ln.split()
if not tok or tok[0] == "go":
continue
if tok[0] == "persptri" and len(tok) == 20:
tris.append(tok)
else:
if tok[0] == "tex0":
tok[1] = str(TBP) # tex0 <TBP> <TBW> <TW> <TH> <TFX>
tok[5] = "1" # DECAL
ln = " ".join(tok)
header.append(ln)
sel = tris[CUBE_FIRST:CUBE_FIRST + CUBE_TRIS]
VTX = ((1, 2, 3, 4), (6, 7, 8, 9), (11, 12, 13, 14)) # (X,Y,S,T) token indices per vertex
xs = [int(t[xi]) for t in sel for (xi, _, _, _) in VTX]
ys = [int(t[yi]) for t in sel for (_, yi, _, _) in VTX]
minx, maxx, miny, maxy = min(xs), max(xs), min(ys), max(ys)
scale = SPAN_PX / max(maxx - minx, maxy - miny, 1) # uniform -> preserve cube proportions
offx = MARGIN + (SPAN_PX - (maxx - minx) * scale) / 2.0
offy = MARGIN + (SPAN_PX - (maxy - miny) * scale) / 2.0
for t in sel:
for (xi, yi, si, ti) in VTX:
t[xi] = str(int(round(offx + (int(t[xi]) - minx) * scale))) # re-fit screen X
t[yi] = str(int(round(offy + (int(t[yi]) - miny) * scale))) # re-fit screen Y
t[si] = str(int(round(int(t[si]) * W))) # ST normalized -> texel
t[ti] = str(int(round(int(t[ti]) * H)))
patched = header + [" ".join(t) for t in sel] + ["go"]
N_TRI_OUT = len(sel)
scene_tmp = os.path.join(DATA, ".cube_persp.tbp.scene.txt")
with open(scene_tmp, "w") as f:
f.write("\n".join(patched) + "\n")
psf = os.path.join(HERE, "ps2_feeder")
subprocess.run(["gcc", "-O2", "-o", psf, os.path.join(HERE, "ps2_feeder.c")], check=True)
stg = subprocess.run([psf, "--dump-file", scene_tmp],
capture_output=True, text=True, check=True).stdout
os.remove(scene_tmp)
stg_words = [ln for ln in stg.splitlines() if ln.strip() and not ln.lstrip().startswith("//")]
staging_path = os.path.join(DATA, "feeder_cube_persp.mem")
with open(staging_path, "w") as f:
f.write("// Ch343 LOCAL cube perspective staging (ps2_feeder --dump-file cube_persp.scene).\n")
f.write("// game-derived (cubes .gs) -> gitignored. ABE=0 S1 perspective path.\n")
f.write(stg)
if not stg.endswith("\n"):
f.write("\n")
# --- raster-order texel hex for the smoke TB's VRAM round-trip check (LOCAL) ---
tex_ref_path = os.path.join(DATA, "feeder_cube_tex.mem")
with open(tex_ref_path, "w") as f:
f.write("// Ch343 LOCAL cube texels, raster order (vram word TBP*64+i). game-derived -> gitignored.\n")
for t in texels:
f.write(f"{t & 0xFFFFFFFF:08x}\n")
print(f"[Ch343] payload_cube_setup.mem : {qwc} qw (QWC={qwc}, 32 KiB EE RAM, TBP={TBP} TBW={TBW})")
print(f"[Ch343] bios_cube_setup.mem : bootlet QWC={qwc}")
print(f"[Ch343] feeder_cube_persp.mem : {len(stg_words)} staging words from cube_persp.scene")
print(f"[Ch343] outputs -> {DATA} (LOCAL / gitignored)")
+113
View File
@@ -0,0 +1,113 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch347 authentic SH3 board fixture: bootlet GIF payload that uploads the real SH3 PSMT8
texture + real CLUT and draws it as a DECAL sprite. Built directly on the Ch296 PSMT8+CLUT board template
(bake.build_clut8_demo_payload), scaled to 128x128 + a 256-entry CLUT.
Mirrors gs_make_cube_fixture.py: imports bake.py helpers, reads the dump-derived assets emitted by
gs_extract_sh3_clut.py (sh3_tex_idx.mem + sh3_clut.mem), and writes the LOCAL/gitignored board fixtures:
payload_sh3_clut.mem GIF payload: BITBLT CLUT (256) + BITBLT PSMT8 tex (128x128) + TEX0(PSMT8,CLD=1,CSM2
linear) + one 128x128 DECAL SPRITE
bios_sh3_clut.mem one-shot bootlet, QWC = payload qword count, DISPLAY1 = 128x128
DECAL/opaque (TFX=1) so the authentic art is visible; the real CLUT alpha (~0x04) is kept BYTE-FOR-BYTE in
the palette, just ignored by the render mode (Codex guardrail). CLUT declared CSM2/linear (the order
validated against the authentic render in tools/gs_extract_sh3_clut.py).
"""
import sys, os
HERE=os.path.dirname(os.path.abspath(__file__)); ROOT=os.path.normpath(os.path.join(HERE,".."))
DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo")
sys.path.insert(0, DATA); import bake # reuse giftag/aplusd/*_pack/bootlet helpers
# Ch347 (ii): the 64x64 authentic CROP — fits the PROVEN 64x64 bram CLUT config (no new 128 KiB/scanout
# integration). Full 128x128 is the follow-up rung (i). Swap sh3_tex_idx64->sh3_tex_idx + W/H/FBW for (i).
W = H = 64
FBW = 1 # 64 px / 64
TBP0 = 64 # texture base byte = 64*256 = 0x4000 (just past the 64x64 PSMCT32 FB = 16 KiB)
TBW = 1 # 64 texels/row stride
CBP = 80 # CLUT base byte = 80*256 = 0x5000 (past the 4 KiB texture)
RAM_QWORDS = 512 # 8 KiB EE RAM payload; SH3 64x64 payload ~340 qw fits
def read_mem(name, n):
vals=[]
for ln in open(os.path.join(DATA,name)):
ln=ln.strip()
if ln and not ln.startswith("//"): vals.append(int(ln,16))
if len(vals) < n: sys.exit(f"{name}: {len(vals)} < {n} (run tools/gs_extract_sh3_clut.py first)")
return vals
tex_words = read_mem("sh3_tex_idx64.mem", W*H//4) # 1024 packed words (4 indices each) — the 64x64 crop
clut = read_mem("sh3_clut.mem", 256) # 256 PSMCT32 entries
# TEX0_1: PSMT8 128x128 (TW=TH=7), TFX=DECAL, + CLUT side (CBP, CPSM=PSMCT32, CSM2 linear, CLD=1 -> load)
def tex0_sh3():
v = bake.tex0_pack(TBP0, TBW, psm=0x13, tw=6, th=6, tfx=1) # 64x64 (TW=TH=6)
v |= (CBP & 0x3FFF) << 37
v |= (0 & 0xF) << 51 # CPSM = PSMCT32
v |= (1 & 0x1) << 55 # CSM = CSM2 (linear)
v |= (0 & 0x1F) << 56 # CSA = 0
v |= (1 & 0x7) << 61 # CLD = 1 (fire VRAM->CLUT load on commit)
return v
def build_payload():
qw=[]
# U1: BITBLT the CLUT (256 PSMCT32 entries, 16x16) -> VRAM[CBP*256]
qw.append(bake.giftag(1,0,0,4,0x0000_0000_0000_EEEE))
qw.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(CBP, 1, 0))) # DPSM=PSMCT32
qw.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0)))
qw.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(256,1))) # 256 entries as ONE CONTIGUOUS
# row — PSMCT32 row_stride = DBW*256, so a 16x16 upload would scatter entries (16/256-B row) while
# clut_loader reads them contiguously; 256x1 keeps them packed (the Ch296 8-entry demo used 8x1).
qw.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0)))
qw.append(bake.giftag(256//4,0,2,0,0)) # IMAGE: 4 entries/qword
for q in range(256//4):
word=0
for lane in range(4): word |= (clut[q*4+lane] & 0xFFFFFFFF) << (32*lane)
qw.append(word)
# U2: upload the PSMT8 index texture as a PSMCT32 transfer (full 32-bit WORD writes — NOT PSMT8 byte
# writes, which RMW the 32-bit VRAM word and corrupt one word on 4 consecutive same-word byte-writes).
# The bytes land identically (TBP*256 + w*4); the TEX0 still READS them as PSMT8. This is exactly the
# "upload as PSMCT32, bind as PSMT8" trick SH3 itself uses. Upload as (W*H/4)x1 = 1024x1 PSMCT32.
qw.append(bake.giftag(1,0,0,4,0x0000_0000_0000_EEEE))
qw.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(TBP0, 16, 0x00))) # DPSM=PSMCT32, DBW=16 (1 row)
qw.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0)))
qw.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(W*H//4, 1))) # 1024 PSMCT32 words x 1 row
qw.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0)))
qw.append(bake.giftag(W*H//16,0,2,0,0)) # 256 qwords, 4 PSMCT32 words each (= the packed indices)
for q in range(W*H//16):
word=0
for k in range(4): word |= (tex_words[q*4+k] & 0xFFFFFFFF) << (32*k)
qw.append(word)
# U3: PRIM(SPRITE+TME) + FRAME + TEX0(PSMT8+CLUT load) + RGBAQ + UV0/XYZ2_0
frame_val = bake.frame_1_psmct32(FBW)
qw.append(bake.giftag(1,0,0,6,0x0000_0000_00EE_EEEE))
qw.append(bake.aplusd(bake.R_PRIM, bake.prim_sprite_tme()))
qw.append(bake.aplusd(bake.R_FRAME_1, frame_val))
qw.append(bake.aplusd(bake.R_TEX0_1, tex0_sh3()))
qw.append(bake.aplusd(bake.R_RGBAQ, bake.rgbaq_data(0x80,0x80,0x80)))
qw.append(bake.aplusd(bake.R_UV, bake.uv_data(0,0)))
qw.append(bake.aplusd(bake.R_XYZ2, bake.xyz2_data(0,0)))
# U4: UV1/XYZ2_1 closing the 128x128 textured sprite. EOP.
qw.append(bake.giftag(1,1,0,2,0x0000_0000_0000_00EE))
qw.append(bake.aplusd(bake.R_UV, bake.uv_data(W-1,H-1)))
qw.append(bake.aplusd(bake.R_XYZ2, bake.xyz2_data(W-1,H-1)))
return qw
payload = build_payload()
qwc = len(payload)
if 16 + qwc > RAM_QWORDS: sys.exit(f"payload {qwc} qw + 16 > {RAM_QWORDS} (bump RAM_QWORDS / RAM_SIZE_BYTES)")
# DISPLAY1 hi = (DH<<12)|DW for 64x64 (magh=magv=1): DW=DH=63
disp_hi = (63 << 12) | 63
with open(os.path.join(DATA,"payload_sh3_clut.mem"),"w") as f:
f.write(f"// Ch347 LOCAL authentic SH3 64x64-CROP PSMT8+CLUT board payload (tex @ TBP={TBP0}, CLUT @ CBP={CBP}).\n")
f.write(f"// SH3-derived -> gitignored. qw 0..15 zero; qw 16.. = GIF (BITBLT CLUT + tex + DECAL sprite). QWC={qwc}.\n")
for _ in range(16): f.write(f"{0:032x}\n")
for w in payload: f.write(f"{w & ((1<<128)-1):032x}\n")
for _ in range(RAM_QWORDS-16-qwc): f.write(f"{0:032x}\n")
bake.write_bios_mem(
"bios_sh3_clut.mem",
bake.build_textured_demo_bootlet_disp(qwc, disp_hi, FBW),
f"Ch347 LOCAL authentic SH3 64x64-crop PSMT8+CLUT setup bootlet (QWC={qwc}, DISPLAY1={W}x{H}). SH3-derived -> gitignored.")
print(f"[Ch347] payload_sh3_clut.mem: {qwc} qw (BITBLT 256-CLUT + {W}x{H} PSMT8 tex + DECAL sprite, 8 KiB RAM)")
print(f"[Ch347] bios_sh3_clut.mem : bootlet QWC={qwc}, DISPLAY1={W}x{H} (disp_hi=0x{disp_hi:x}), FBW={FBW}")
print(f"[Ch347] VRAM layout: FB {W}x{H} PSMCT32 @0 ({W*H*4//1024}KiB) + PSMT8 tex @TBP={TBP0} ({W*H//1024}KiB) + CLUT @CBP={CBP} (1KiB) -> fits the proven 64x64 bram config")
+104
View File
@@ -0,0 +1,104 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch348 authentic SH3 PSMT8+CLUT through the PERSPECTIVE feeder path.
Composes Ch342 (perspective ST/Q triangles via the feeder S1-path) + Ch347 (authentic SH3 PSMT8 texture +
real CLUT). The feeder staging carries a perspective QUAD (2 TME tris, word0[32]=1) AND the TEX0 (PSM=PSMT8,
CLD=1, CSM2-linear) — so the feeder's TEX0 commit fires the VRAM->CLUT load; a setup bootlet uploads the SH3
PSMT8 texture (as a PSMCT32 transfer — word writes, no byte-RMW) + the SH3 CLUT (256x1 contiguous, the Ch347
lesson). DECAL/opaque; real CLUT RGB authentic, alpha not blended.
Label (Codex): authentic SH3 PSMT8 texture + real CLUT rendered through the proven perspective-triangle path
on silicon, chosen perspective geometry (NOT authentic SH3 draw geometry).
Outputs (LOCAL, gitignored) into sim/data/top_psmct32_raster_demo/:
bios_sh3_persp.mem / payload_sh3_persp.mem setup bootlet: BITBLT CLUT + PSMT8 texture (no tris)
feeder_sh3_persp.mem feeder staging: perspective quad + TEX0(PSMT8,CLD=1)
"""
import sys, os
HERE=os.path.dirname(os.path.abspath(__file__)); ROOT=os.path.normpath(os.path.join(HERE,".."))
DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo")
sys.path.insert(0, DATA); import bake
W=H=64 # SH3 64x64 crop
TBP0=64 # PSMT8 texture base byte 0x4000 (past the 64x64 PSMCT32 FB = 16 KiB)
CBP=80 # CLUT base byte 0x5000 (past the 4 KiB texture)
FBW=1
RAM_QWORDS=512 # 8 KiB EE RAM (256-CLUT + 64x64 texture upload ~ 340 qw)
# perspective quad in the 64x64 FB: x 8..56, y 8..56; top FAR (w=8), bottom NEAR (w=1); UV over 0..64.
PX0,PX1,PY0,PY1 = 8,56,8,56
WF,WN = 8,1
def read_mem(name,n):
v=[int(l,16) for l in open(os.path.join(DATA,name)) if l.strip() and not l.strip().startswith("//")]
if len(v)<n: sys.exit(f"{name}: {len(v)}<{n} (run gs_extract_sh3_clut.py first)")
return v
tex_words = read_mem("sh3_tex_idx64.mem", W*H//4) # 1024 packed words (4 indices each)
clut = read_mem("sh3_clut.mem", 256)
def tex0_sh3_clut(): # PSMT8 64x64 (TW=TH=6), TFX=DECAL, + CLUT (CBP, CPSM=PSMCT32, CSM2 linear, CLD=1)
v = bake.tex0_pack(TBP0, 1, psm=0x13, tw=6, th=6, tfx=1)
v |= (CBP & 0x3FFF)<<37; v |= (0&0xF)<<51; v |= (1&1)<<55; v |= (0&0x1F)<<56; v |= (1&7)<<61
return v
# --- setup bootlet payload: BITBLT the CLUT (256x1) + the PSMT8 texture (as PSMCT32 words). No tris. ---
def build_setup_payload():
qw=[]
qw.append(bake.giftag(1,0,0,4,int('E'*4,16)))
qw.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(CBP,1,0))) # CLUT, PSMCT32
qw.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0)))
qw.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(256,1))) # 256x1 contiguous
qw.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0)))
qw.append(bake.giftag(256//4,0,2,0,0))
for q in range(256//4):
word=0
for lane in range(4): word |= (clut[q*4+lane]&0xFFFFFFFF)<<(32*lane)
qw.append(word)
qw.append(bake.giftag(1,0,0,4,int('E'*4,16)))
qw.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(TBP0,16,0x00))) # PSMT8 tex as PSMCT32 words
qw.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0)))
qw.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(W*H//4,1))) # 1024x1 PSMCT32 words
qw.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0)))
qw.append(bake.giftag(W*H//16,1,2,0,0)) # EOP
for q in range(W*H//16):
word=0
for k in range(4): word |= (tex_words[q*4+k]&0xFFFFFFFF)<<(32*k)
qw.append(word)
return qw
# --- feeder staging: perspective quad (2 tris) + TEX0(PSMT8,CLD=1). word0[32]=1. ---
def build_feeder_staging():
pv_tl=(PX0,PY0, 0, 0, WF); pv_tr=(PX1,PY0, W,0, WF)
pv_bl=(PX0,PY1, 0, H, WN); pv_br=(PX1,PY1, W,H, WN)
tris=[[pv_tl,pv_tr,pv_bl],[pv_tr,pv_bl,pv_br]]
w=[]
w.append(len(tris)|(1<<32))
w.append(bake.frame_1_psmct32(FBW))
w.append(bake.alpha_pack(0,1,0,1))
w.append(bake.test1_geq())
w.append(bake.zbuf1_pack(2))
w.append(tex0_sh3_clut()) # PSMT8 + CLUT (CLD=1) — feeder commit fires the CLUT load
w.append(3 | (1<<4)) # TRI + TME, ABE=0 (S1 perspective path)
for verts in tris:
for (sx,sy,u,v,wq) in verts:
s_fp,t_fp,q_fp = bake.persp_attrs(u,v,wq)
w.append(bake.rgbaq_with_q(0,0,0,q_fp))
w.append(bake.st_data(s_fp,t_fp))
w.append(bake.xyz2_dataz(sx,sy,0x0000_5000))
return w
payload=build_setup_payload(); qwc=len(payload)
if 16+qwc>RAM_QWORDS: sys.exit(f"payload {qwc}>{RAM_QWORDS-16}")
disp_hi=(63<<12)|63
with open(os.path.join(DATA,"payload_sh3_persp.mem"),"w") as f:
f.write(f"// Ch348 LOCAL SH3 PSMT8+CLUT perspective setup payload (CLUT@{CBP}, tex@{TBP0}). gitignored. QWC={qwc}.\n")
for _ in range(16): f.write(f"{0:032x}\n")
for x in payload: f.write(f"{x&((1<<128)-1):032x}\n")
for _ in range(RAM_QWORDS-16-qwc): f.write(f"{0:032x}\n")
bake.write_bios_mem("bios_sh3_persp.mem",
bake.build_textured_demo_bootlet_disp(qwc, disp_hi, FBW),
f"Ch348 LOCAL SH3 perspective setup bootlet (QWC={qwc}, DISPLAY1={W}x{H}). gitignored.")
stg=build_feeder_staging()
bake.write_feeder_stg_mem("feeder_sh3_persp.mem", stg,
"Ch348 LOCAL SH3 PSMT8+CLUT perspective quad through the feeder (word0[32]=1, TEX0 PSMT8 CLD=1). gitignored.")
print(f"[Ch348] payload_sh3_persp.mem: {qwc} qw (BITBLT 256-CLUT + {W}x{H} PSMT8 tex). feeder_sh3_persp.mem: {len(stg)} words (persp quad).")
print(f"[Ch348] perspective quad x[{PX0}..{PX1}] y[{PY0}..{PY1}], top FAR w={WF}, bottom NEAR w={WN}, UV 0..{W}; TEX0 PSMT8 CLD=1 CBP={CBP}.")
+382
View File
@@ -0,0 +1,382 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch350 fixtures: the EXACT Ch349 SH3 draw for the full-res LPDDR integration TB.
Codex scope (no crop/downscale/repack): the actual draw geometry (f1 idx89761 TRI_STRIP -> triangle LIST),
the reconstructed 512x512 PSMT8 texture (in LPDDR), the real CSM1 CLUT (grid/CT32 order in BRAM), perspective
ST/Q, DECAL. Pixel-diff vs the Ch349 host reference with the Ch348 bounded <=1-texel acceptance.
Emits (LOCAL/gitignored -> sim/data/top_psmct32_raster_demo/):
sh3_real_tex_lpddr.mem 65536 LE words = the PSMT8-SWIZZLED texture bytes mem[TBP*256 : +262144]
(loaded into the behavioral LPDDR model; the GS texture-unit re-swizzles on read).
sh3_real_idx.mem 65536 words = the DE-SWIZZLED 512x512 indices packed 4/word (TB reference search).
sh3_real_clut.mem 256 words = the CT32-grid CLUT bytes mem[CBP*256 : +1024] (backdoored into BRAM;
the Ch350 CSM1 clut_loader reads them in grid order).
feeder_sh3_real.mem feeder staging: 68 tris (translated to FB origin) + TEX0(PSMT8,CSM1,CLD=1,DECAL).
sh3_real_refmap.mem per FB pixel (FBW*64 x FBH): covered|interior|tu|tv for the bounded TB check.
sh3_real_ref.png eyeball reference (host DECAL render at FB scale).
Geometry/addressing constants are printed + emitted as `sh3_real_params.vh` for the TB to include.
Usage: gs_make_sh3_real_draw_fixture.py [dump.gs.zst] [--draw-idx 89761]
"""
import sys, os, glob
HERE=os.path.dirname(os.path.abspath(__file__)); ROOT=os.path.normpath(os.path.join(HERE,".."))
DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo")
sys.path.insert(0, HERE)
import gs_sh3_draw_census as C
import gs_sh3_recon as RC
import gs_localmem as LM
sys.path.insert(0, DATA); import bake
# ---- address map (Ch350 BRAM-CROP diagnostic: read2 needs VRAM < 256 KiB; FB cropped, texture/CLUT/geometry
# FULL-RES). VRAM_BYTES = 0x20000 (128 KiB, 2^15 words) so the read2 tripwire (>=256 KiB) is NOT tripped.
# Only the FRAMEBUFFER/VIEWPORT is cropped to a deterministic CH-tall band; the texture stays full 512x512
# (in LPDDR, cache-intercepted base 0x40000), the CSM1 CLUT is full, the geometry/ST/Q are unchanged. ----
FBW = 4 # 256 px wide FB (64-px units) — full draw width (247) fits
FBPXW = FBW*64 # 256
CH = 120 # FB rows: 256*120*4 = 122880 B; + CLUT(1 KiB) < 128 KiB
VRAM_BYTES = 0x20000 # 128 KiB BRAM VRAM (2^15 words) — under the 256 KiB read2 tripwire
CBP = 0x1E000//256 # 480 (CLUT right after the 256x120 FB = 0x1E000)
NEW_TBP = 0x40000//256 # 1024 (texture VRAM base; cache-intercepted, NOT in the 128 KiB BRAM)
TEX_VRAM_BASE = NEW_TBP*256 # 0x40000
TEX_BYTES = 512*512 # 262144 (PSMT8) — FULL texture, no downscale
LPDDR_TEX_BASE = 0x00200000 # texture byte base in LPDDR4B
TW_LOG, TH_LOG = 9, 9 # 512x512
TBW_TEX = 8 # texture TBW (64-px units) — MUST match the original draw's swizzle
STG_WORDS = 768
def recip8(q, IDX_BITS=8, SCALE=24, Q_W=24):
"""Exact Python replica of gs_reciprocal_stub: recip = (floor(2^(SCALE+IDX_BITS-1)/M) >> e), M = q
normalized to an IDX_BITS mantissa (MSB at TOP_BIT), e = msb index. Used to build the RTL-FAITHFUL
reference that isolates reciprocal quantization from S1 attribute under-interpolation."""
OUT_MAX = (1 << (SCALE+1)) - 1
if q <= 0: return OUT_MAX
TOP_BIT = IDX_BITS - 1
e = q.bit_length() - 1
norm = (q >> (e - TOP_BIT)) if e >= TOP_BIT else (q << (TOP_BIT - e))
M = norm & ((1 << IDX_BITS) - 1)
if M == 0: return OUT_MAX
r = ((1 << (SCALE + TOP_BIT)) // M) >> e
return min(r, OUT_MAX)
def persp_texel_recip(uq, vq, q, tw, th, idx_bits=8, SCALE=24):
"""gs_persp_uv with the idx_bits reciprocal LUT: u=(uq*recip)>>SCALE clamped to 2047, then REPEAT-wrap."""
recip = recip8(int(round(q)), IDX_BITS=idx_bits, SCALE=SCALE)
u = (int(round(uq)) * recip) >> SCALE
v = (int(round(vq)) * recip) >> SCALE
if u > 2047: u = 2047
if v > 2047: v = 2047
return (u % tw), (v % th)
def f32_to(v): return v # placeholder
def tex0_real(tbp, cbp):
# PSMT8 (psm=0x13) 512x512, TFX=DECAL(1); CLUT: CBP, CPSM=PSMCT32(0), CSM=0 (CSM1 grid!), CSA=0, CLD=1.
v = bake.tex0_pack(tbp, TBW_TEX, psm=0x13, tw=TW_LOG, th=TH_LOG, tfx=1)
v |= (cbp & 0x3FFF) << 37
v |= (0 & 0xF) << 51 # CPSM = PSMCT32
v |= (0 & 0x1) << 55 # CSM = 0 -> CSM1 (16x16 CT32 grid) — the Ch350 path
v |= (0 & 0x1F) << 56 # CSA = 0
v |= (1 & 0x7) << 61 # CLD = 1 -> always load
return v
def main(argv):
dump = None; draw_idx = 89761
a = argv[1:]
if a and not a[0].startswith("--"): dump = a[0]
if "--draw-idx" in a: draw_idx = int(a[a.index("--draw-idx")+1])
if dump is None:
c = glob.glob(os.path.join(ROOT,"captures","gs","silenthill3","*224139*.gs.zst"))
if not c: sys.exit("no SH3 dump found; pass the .gs.zst path")
dump = c[0]
PERSP_FRAC = bake.PERSP_FRAC
dr = C.get_draw(dump, draw_idx)
if dr is None: sys.exit(f"draw idx {draw_idx} not found")
t0 = dr["tex0"]; ORIG_TBP = t0["tbp"]; ORIG_CBP = t0["cbp"]
TW, TH = t0["tw"], t0["th"]
assert TW==512 and TH==512 and t0["psm"]==0x13, f"unexpected TEX0 {t0}"
# --- reconstruct GS local memory at draw time (Ch349) ---
mem, replayed, uploads, events, vram = RC.build_localmem_to(dump, draw_idx)
if mem is None: sys.exit("VRAM snapshot absent")
# de-swizzled 512x512 index image (for TB reference) + swizzled bytes (for LPDDR)
idx = mem.read_psmt8(ORIG_TBP, t0["tbw"], TW, TH) # de-swizzled indices
tex_swz = bytes(mem.m[ORIG_TBP*256 : ORIG_TBP*256 + TEX_BYTES]) # swizzled bytes -> LPDDR
clut_bytes = bytes(mem.m[ORIG_CBP*256 : ORIG_CBP*256 + 1024]) # CT32-grid CLUT bytes -> BRAM
pal = RC.read_clut32(mem, ORIG_CBP, order="grid") # for the reference PNG
# --- geometry: translate to draw origin (full frame), then choose a deterministic CH-tall VIEWPORT crop ---
xmin = min(v["x"] for v in dr["verts"]); ymin = min(v["y"] for v in dr["verts"])
OX, OY = int(xmin), int(ymin)
fverts = [dict(x=v["x"]-OX, y=v["y"]-OY, s=v["s"], t=v["t"], q=v["q"]) for v in dr["verts"]]
ftris = [(i-2,i-1,i) for i in range(2,len(fverts))]
full_h = int(max(v["y"] for v in fverts)) + 1
def edge(ax,ay,bx,by,px,py): return (px-ax)*(by-ay)-(py-ay)*(bx-ax)
# per-row coverage histogram over the FULL frame -> pick CY0 = argmax covered pixels in a CH-tall band.
row_cov = [0]*(full_h+CH+2)
for (a0,b0,c0) in ftris:
v0,v1,v2=fverts[a0],fverts[b0],fverts[c0]
x0,y0=v0["x"],v0["y"]; x1,y1=v1["x"],v1["y"]; x2,y2=v2["x"],v2["y"]
ar=edge(x0,y0,x1,y1,x2,y2)
if abs(ar)<1e-9: continue
inv=1.0/ar
for py in range(max(0,int(min(y0,y1,y2))), min(full_h-1,int(max(y0,y1,y2))+1)+1):
for px in range(max(0,int(min(x0,x1,x2))), min(FBPXW-1,int(max(x0,x1,x2))+1)+1):
cx,cy=px+0.5,py+0.5
w0=edge(x1,y1,x2,y2,cx,cy)*inv; w1=edge(x2,y2,x0,y0,cx,cy)*inv; w2=1.0-w0-w1
if w0>=-0.001 and w1>=-0.001 and w2>=-0.001: row_cov[py]+=1
best_cy0, best_sum = 0, -1
for cy0 in range(0, max(1, full_h-CH+1)):
s = sum(row_cov[cy0:cy0+CH])
if s > best_sum: best_sum, best_cy0 = s, cy0
CY0 = best_cy0; CX0 = 0
# apply the viewport crop: shift Y by -CY0 (ST/Q UNCHANGED — only the framebuffer window moves), then
# CLIP each triangle to the crop rect [0,FBPXW]x[0,CH] (Sutherland-Hodgman, interpolating S/T/Q linearly in
# screen space — correct since S,T,Q are already premultiplied by 1/w). This is the VIEWPORT scissor done at
# the host: every emitted vertex lands inside the FB (no out-of-bounds writes), geometry SHAPE + per-vertex
# ST/Q are preserved exactly; only the framebuffer window is cropped. Codex's "cropped or scissored" rule.
def lerp(p1, p2, a):
return dict(x=p1["x"]+a*(p2["x"]-p1["x"]), y=p1["y"]+a*(p2["y"]-p1["y"]),
s=p1["s"]+a*(p2["s"]-p1["s"]), t=p1["t"]+a*(p2["t"]-p1["t"]), q=p1["q"]+a*(p2["q"]-p1["q"]))
def clip_edge(poly, inside, isect):
out=[]
for i in range(len(poly)):
cur=poly[i]; prv=poly[i-1]
ci=inside(cur); pi=inside(prv)
if ci:
if not pi: out.append(isect(prv,cur))
out.append(cur)
elif pi:
out.append(isect(prv,cur))
return out
def clip_rect(poly):
# left x>=0, right x<=FBPXW, top y>=0, bottom y<=CH
poly=clip_edge(poly, lambda p:p["x"]>=0.0, lambda a,b:lerp(a,b,(0.0-a["x"])/(b["x"]-a["x"])))
if not poly: return poly
poly=clip_edge(poly, lambda p:p["x"]<=FBPXW, lambda a,b:lerp(a,b,(FBPXW-a["x"])/(b["x"]-a["x"])))
if not poly: return poly
poly=clip_edge(poly, lambda p:p["y"]>=0.0, lambda a,b:lerp(a,b,(0.0-a["y"])/(b["y"]-a["y"])))
if not poly: return poly
poly=clip_edge(poly, lambda p:p["y"]<=CH, lambda a,b:lerp(a,b,(CH-a["y"])/(b["y"]-a["y"])))
return poly
sverts = [dict(x=v["x"]-CX0, y=v["y"]-CY0, s=v["s"], t=v["t"], q=v["q"]) for v in fverts]
def rnd(v): # round XY to integer screen coords (the feeder gets ints) — host ref MUST use the SAME ints,
return dict(x=float(int(round(v["x"]))), y=float(int(round(v["y"]))), s=v["s"], t=v["t"], q=v["q"])
tris = [] # list of (v0,v1,v2) explicit clipped vertex dicts with INTEGER screen XY
for (a0,b0,c0) in ((i-2,i-1,i) for i in range(2,len(sverts))):
poly = clip_rect([sverts[a0], sverts[b0], sverts[c0]])
poly = [rnd(p) for p in poly]
for k in range(1, len(poly)-1): # fan the clipped polygon into triangles
tris.append((poly[0], poly[k], poly[k+1]))
print(f"[Ch350] draw idx{draw_idx}: {len(sverts)} verts; full frame {FBPXW}x{full_h}; DETERMINISTIC crop "
f"CX0={CX0} CY0={CY0} -> FB {FBPXW}x{CH} ({best_sum} covered px in band); clipped to {len(tris)} tris")
# --- feeder staging ---
# NOTE: gs_persp_uv contract is uq=(u/w)*2^FRAC, q=(1/w)*2^FRAC, u=(uq*floor(2^SCALE/q))>>SCALE. Scaling
# S/T/Q by K is INVARIANT (cancels) — confirmed it doesn't move the residual. The texel-accuracy limit is
# the gs_reciprocal_stub 8-bit (256-entry) LUT: ~0.4% relative -> <=1 texel for Ch348's TW=64 but ~2+ texels
# for this TW=512 texture (plus the S1-path under-interpolation banding). A perspective-precision limit.
# Ch351 — EFFECTIVE PERSP_FRAC. The hardware u=s/q divide cancels the frac scale, so "widening PERSP_FRAC"
# is a HOST PACKING choice: pack S/T/Q with more frac bits so the far-surface denominator q=(1/w)*2^FRACeff
# doesn't round to 1-2 (FRAC=12 collapses for w~2048). PSCALE=2^k gives FRACeff = PERSP_FRAC + k. PSCALE=256
# -> FRACeff=20, which took the SH3 crop 20%->80% (Codex's "Q×256 ≈ +8 frac bits"). Default PSCALE=1 keeps
# Ch342/348 at FRAC=12 (canaries). The 24-bit signed S/T field bounds FRACeff for a given S/T range — checked.
PSCALE = 4096
S24_MAX = (1<<23) - 1
max_sfp = [0]
def vert_words(v):
s_fp = round(v["s"] * TW * (1<<PERSP_FRAC) * PSCALE) # s_fp/q_fp = (S/Q)*TW = texel_u (perspective)
t_fp = round(v["t"] * TH * (1<<PERSP_FRAC) * PSCALE)
q_fp = round(v["q"] * (1<<PERSP_FRAC) * PSCALE)
max_sfp[0] = max(max_sfp[0], abs(s_fp), abs(t_fp))
if abs(s_fp) > S24_MAX or abs(t_fp) > S24_MAX: # 24-bit signed ST field overflow guard (Codex #3)
sys.exit(f"[Ch351] OVERFLOW: |s_fp|={abs(s_fp)} or |t_fp|={abs(t_fp)} > 2^23-1 at PSCALE={PSCALE} "
f"(FRACeff={PERSP_FRAC}+{PSCALE.bit_length()-1}). Lower PSCALE for this S/T range.")
if abs(q_fp) > 0x7FFFFFFF:
sys.exit(f"[Ch351] OVERFLOW: |q_fp|={abs(q_fp)} > 2^31-1 (Q field). Lower PSCALE.")
sx = max(0, min(FBPXW-1, int(round(v["x"]))))
sy = max(0, min(CH-1, int(round(v["y"]))))
return [bake.rgbaq_with_q(0,0,0, q_fp & 0xFFFFFFFF),
bake.st_data(s_fp & 0xFFFFFF, t_fp & 0xFFFFFF),
bake.xyz2_dataz(sx, sy, 0x0000_5000)]
stg = []
stg.append(len(tris) | (1<<32)) # word0: ntris | perspective-format flag
stg.append(bake.frame_1_psmct32(FBW))
stg.append(bake.alpha_pack(0,1,0,1))
stg.append(0) # TEST_1 = 0 (ZTE=0, ATE=0): no depth/alpha test
stg.append(bake.zbuf1_pack(0, zmsk=1)) # ZMSK=1: no Z writes -> no Z buffer needed
stg.append(tex0_real(NEW_TBP, CBP)) # PSMT8 + CSM1 CLUT (CLD=1) -> feeder commit fires the load
stg.append(3 | (1<<4)) # TRI + TME, ABE=0 (S1 perspective path)
for (v0,v1,v2) in tris:
for v in (v0,v1,v2): stg += vert_words(v)
if len(stg) > STG_WORDS: sys.exit(f"staging {len(stg)} > {STG_WORDS} (raise STG_WORDS)")
print(f"[Ch350] feeder staging: {len(stg)} words (<= {STG_WORDS})")
print(f"[Ch351] effective PERSP_FRAC = {PERSP_FRAC}+{PSCALE.bit_length()-1} = {PERSP_FRAC+PSCALE.bit_length()-1} "
f"(PSCALE={PSCALE}); max |s_fp/t_fp|={max_sfp[0]} of 2^23-1 ({100.0*max_sfp[0]/((1<<23)-1):.1f}% of the 24-bit ST field)")
# --- host reference + per-pixel texel map. TWO references over the SAME clipped geometry:
# refmap = FLOAT perspective (ideal) — the Codex pixel-diff oracle.
# refmap_rec = RTL-FAITHFUL: fixed-point vertex attrs (uq=s*TW*2^FRAC, q=Q*2^FRAC), float interp, then the
# 8-bit gs_reciprocal_stub. Comparing the RTL FB vs BOTH isolates reciprocal quantization
# (RTL≈refmap_rec, refmap_rec≠refmap) from S1 under-interpolation banding (RTL≠refmap_rec).
refmap = [0]*(FBPXW*CH); refpix = [(0,0,0)]*(FBPXW*CH)
refmap_rec = [0]*(FBPXW*CH); refpix_rec = [(0,0,0)]*(FBPXW*CH)
refmap_aff = [0]*(FBPXW*CH) # AFFINE: per-vertex texel, linear u,v interp (NOT perspective-correct)
F = 1<<PERSP_FRAC
for (v0,v1,v2) in tris:
x0,y0=v0["x"],v0["y"]; x1,y1=v1["x"],v1["y"]; x2,y2=v2["x"],v2["y"]
area = edge(x0,y0,x1,y1,x2,y2)
if abs(area)<1e-9: continue
inv = 1.0/area
# per-vertex FIXED-POINT attributes (exactly what the feeder staging carries)
uqv=[round(v["s"]*TW*F) for v in (v0,v1,v2)]
vqv=[round(v["t"]*TH*F) for v in (v0,v1,v2)]
qv =[round(v["q"]*F) for v in (v0,v1,v2)]
# per-vertex TEXEL (perspective divide at the vertex) for the affine reference
auv=[((v["s"]/v["q"])*TW if abs(v["q"])>1e-12 else 0.0) for v in (v0,v1,v2)]
avv=[((v["t"]/v["q"])*TH if abs(v["q"])>1e-12 else 0.0) for v in (v0,v1,v2)]
minx=max(0,int(min(x0,x1,x2))); maxx=min(FBPXW-1,int(max(x0,x1,x2))+1)
miny=max(0,int(min(y0,y1,y2))); maxy=min(CH-1,int(max(y0,y1,y2))+1)
for py in range(miny,maxy+1):
for px in range(minx,maxx+1):
cx,cy=px+0.5,py+0.5
w0=edge(x1,y1,x2,y2,cx,cy)*inv; w1=edge(x2,y2,x0,y0,cx,cy)*inv; w2=1.0-w0-w1
if w0<-0.001 or w1<-0.001 or w2<-0.001: continue
# Ch351 convention fix: coverage/interior at pixel CENTER (px+0.5), but the RTL interpolates
# the perspective ATTRIBUTES at the INTEGER pixel coord (tex_dx_s1 = s1_x_q - v0_x, no +0.5).
# Use a CORNER barycentric for S/T/Q to match -> removes the ~0.5-texel drift.
a0w=edge(x1,y1,x2,y2,float(px),float(py))*inv; a1w=edge(x2,y2,x0,y0,float(px),float(py))*inv; a2w=1.0-a0w-a1w
S=a0w*v0["s"]+a1w*v1["s"]+a2w*v2["s"]; T=a0w*v0["t"]+a1w*v1["t"]+a2w*v2["t"]
Q=a0w*v0["q"]+a1w*v1["q"]+a2w*v2["q"]
if abs(Q)<1e-12: continue
tu=int((S/Q)*TW) % TW; tv=int((T/Q)*TH) % TH
if tu<0: tu+=TW
if tv<0: tv+=TH
# RTL-faithful: interp the FIXED-POINT attrs, then the 8-bit reciprocal
uq=w0*uqv[0]+w1*uqv[1]+w2*uqv[2]; vq=w0*vqv[0]+w1*vqv[1]+w2*vqv[2]; qq=w0*qv[0]+w1*qv[1]+w2*qv[2]
tur,tvr = persp_texel_recip(uq,vq,qq,TW,TH,idx_bits=8)
# AFFINE texel: linear interp of the per-vertex texels (the under-interpolation hypothesis)
au=int(w0*auv[0]+w1*auv[1]+w2*auv[2]) % TW; av=int(w0*avv[0]+w1*avv[1]+w2*avv[2]) % TH
if au<0: au+=TW
if av<0: av+=TH
mw = min(w0,w1,w2)
interior = 1 if mw > 0.04 else 0 # away from this triangle's own edges
deep = 1 if mw > 0.15 else 0 # DEEP interior — far from any edge => seam-free zone
o=py*FBPXW+px
refmap[o] = (1<<31)|(interior<<30)|(deep<<29)|((tu&0x1FF)<<9)|(tv&0x1FF)
refmap_rec[o] = (1<<31)|(interior<<30)|((tur&0x1FF)<<9)|(tvr&0x1FF)
refmap_aff[o] = (1<<31)|(interior<<30)|((au&0x1FF)<<9)|(av&0x1FF)
p=pal[idx[tv*TW+tu]&0xFF]; refpix[o]=(p&0xFF,(p>>8)&0xFF,(p>>16)&0xFF)
pr=pal[idx[tvr*TW+tur]&0xFF];refpix_rec[o]=(pr&0xFF,(pr>>8)&0xFF,(pr>>16)&0xFF)
covered = sum(1 for w in refmap if w>>31)
print(f"[Ch350] host reference: {covered} covered FB pixels")
# emit the RTL-faithful refmap + PNG for the Ch351 oracle
with open(os.path.join(DATA,"sh3_real_refmap_recip.mem"),"w") as f:
f.write("// Ch351 RTL-faithful (8-bit reciprocal) per-pixel texel map. gitignored.\n")
for x in refmap_rec: f.write(f"{x & 0xFFFFFFFF:08x}\n")
with open(os.path.join(DATA,"sh3_real_refmap_affine.mem"),"w") as f:
f.write("// Ch351 AFFINE (per-vertex texel, linear interp) per-pixel texel map. gitignored.\n")
for x in refmap_aff: f.write(f"{x & 0xFFFFFFFF:08x}\n")
try:
from PIL import Image
Image.new("RGB",(FBPXW,CH)).copy() # noop guard
im2=Image.new("RGB",(FBPXW,CH)); im2.putdata(refpix_rec)
im2.save(os.path.join(ROOT,"captures","gs","silenthill3","extracted","recon","sh3_real_ref_recip.png"))
except Exception as e:
print("(PIL skip recip png:", e, ")")
# --- setup bootlet: BOARD-READY CLUT upload (Ch347 pattern). The CSM1 loader reads the CLUT from VRAM at
# CBP via GRID offsets; sh3_real_clut.mem IS the raw grid bytes, so a LINEAR 256x1 BITBLT of those 256 words
# (PSMCT32_SWIZZLE=0) places byte CBP+k*4 = word k = the grid byte verbatim -> the loader reads it correctly.
# The upload also fires dma_done_seen, which auto-starts the feeder (C_SETUP->C_RUN). On the board this is the
# ONLY CLUT path (no TB backdoor); the SIM TB still backdoors the same bytes (redundant + consistent).
clut_words_b = [int.from_bytes(clut_bytes[i*4:i*4+4],"little") for i in range(256)]
RAM_QWORDS = 512
pay = []
pay.append(bake.giftag(1,0,0,4,int('E'*4,16)))
pay.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(CBP, 1, 0x00))) # PSMCT32 dest @CBP
pay.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0)))
pay.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(256,1))) # 256x1 contiguous
pay.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0)))
pay.append(bake.giftag(256//4,1,2,0,0)) # 64 qwords image data, EOP
for q in range(256//4):
word=0
for lane in range(4): word |= (clut_words_b[q*4+lane]&0xFFFFFFFF) << (32*lane)
pay.append(word)
qwc=len(pay)
disp_hi=((CH-1)<<12)|(FBPXW-1)
with open(os.path.join(DATA,"payload_sh3_real.mem"),"w") as f:
f.write(f"// Ch352 LOCAL SH3 real-draw setup payload (CSM1 CLUT 256x1 -> CBP={CBP}, grid bytes verbatim). gitignored. QWC={qwc}.\n")
for _ in range(16): f.write(f"{0:032x}\n")
for x in pay: f.write(f"{x&((1<<128)-1):032x}\n")
for _ in range(RAM_QWORDS-16-qwc): f.write(f"{0:032x}\n")
bake.write_bios_mem("bios_sh3_real.mem",
bake.build_textured_demo_bootlet_disp(qwc, disp_hi, FBW),
f"Ch352 LOCAL SH3 real-draw setup bootlet (QWC={qwc}, DISPLAY1={FBPXW}x{CH}). gitignored.")
print(f"[Ch352] setup bootlet: payload {qwc} qw (CSM1 CLUT 256x1 upload to CBP={CBP}).")
# --- emit ---
def wmem(name, words, width_hex, banner):
with open(os.path.join(DATA,name),"w") as f:
f.write(f"// {banner}\n")
for x in words: f.write(f"{x & ((1<<(4*width_hex))-1):0{width_hex}x}\n")
# de-swizzled index image, packed 4 idx/word (LINEAR row-major: byte v*TW+u = idx(u,v))
idx_words = [idx[i*4]|(idx[i*4+1]<<8)|(idx[i*4+2]<<16)|(idx[i*4+3]<<24) for i in range(TW*TH//4)]
wmem("sh3_real_idx.mem", idx_words, 8, "Ch350 LOCAL SH3 512x512 de-swizzled indices (4/word) for TB ref. gitignored.")
# LPDDR texture: the bram-top defaults PSMT8_SWIZZLE=0 (LINEAR read, like Ch347/348) — so store the texture
# LINEAR (de-swizzled, = idx_words). The texture unit's linear PSMT8 addr (base + v*TBW*64 + u) then reads
# texel(u,v)=idx(u,v). (The raw SWIZZLED bytes would need PSMT8_SWIZZLE=1; kept as sh3_real_tex_lpddr_swz.mem
# for that variant.) This is the Ch299/Ch350 root-cause fix: linear texture <-> linear read.
wmem("sh3_real_tex_lpddr.mem", idx_words, 8, "Ch350 LOCAL SH3 512x512 LINEAR de-swizzled indices -> LPDDR model (PSMT8_SWIZZLE=0). gitignored.")
# Ch352 guardrail #2 — board-side READBACK CHECKSUM: after the HPS write-probe uploads these 65536 words to
# LPDDR @0x200000, the HPS read-probe should read them back and confirm sum32 + xor32 BEFORE the cache fill.
tex_sum = sum(idx_words) & 0xFFFFFFFF
tex_xor = 0
for w in idx_words: tex_xor ^= w
print(f"[Ch352] TEXTURE→LPDDR upload checksum (verify via read-probe before cache-fill): "
f"{len(idx_words)} words @ LPDDR 0x{LPDDR_TEX_BASE:07x} sum32=0x{tex_sum:08x} xor32=0x{tex_xor:08x}")
tex_swz_words = [int.from_bytes(tex_swz[i*4:i*4+4],"little") for i in range(TEX_BYTES//4)]
wmem("sh3_real_tex_lpddr_swz.mem", tex_swz_words, 8, "Ch350 LOCAL SH3 512x512 SWIZZLED bytes -> LPDDR (for PSMT8_SWIZZLE=1 variant). gitignored.")
clut_words = [int.from_bytes(clut_bytes[i*4:i*4+4],"little") for i in range(256)]
wmem("sh3_real_clut.mem", clut_words, 8, "Ch350 LOCAL SH3 CSM1 CLUT (raw CT32-grid bytes @CBP) -> BRAM (HW CSM1 loader reads these in grid order). gitignored.")
# de-gridded palette pal[i] (what the HW CSM1 grid-read produces) -> TB reference expected colors
wmem("sh3_real_pal.mem", [p & 0xFFFFFFFF for p in pal], 8,
"Ch350 LOCAL SH3 de-gridded palette pal[i] (grid-read of the CBP bytes) for the TB reference. gitignored.")
bake.write_feeder_stg_mem("feeder_sh3_real.mem", stg,
"Ch350 LOCAL SH3 REAL draw (idx89761) feeder staging: triangle list + TEX0(PSMT8,CSM1,CLD=1,DECAL). gitignored.",
total=STG_WORDS)
wmem("sh3_real_refmap.mem", refmap, 8, "Ch350 LOCAL per-FB-pixel covered|interior|tu|tv reference map. gitignored.")
# params include for the TB
with open(os.path.join(DATA,"sh3_real_params.vh"),"w") as f:
f.write("// Ch350 LOCAL generated params for tb_top_psmct32_sh3_real_draw_demo. gitignored.\n")
f.write(f"localparam int FBW = {FBW};\n")
f.write(f"localparam int FBPXW = {FBPXW};\n")
f.write(f"localparam int FBH = {CH};\n")
f.write(f"localparam int VRAM_BYTES_P = {VRAM_BYTES};\n")
f.write(f"localparam int CROP_CX0 = {CX0};\n")
f.write(f"localparam int CROP_CY0 = {CY0};\n")
f.write(f"localparam int CLUT_CBP = {CBP};\n")
f.write(f"localparam int NEW_TBP = {NEW_TBP};\n")
f.write(f"localparam int TEX_VRAM_BASE= {TEX_VRAM_BASE};\n")
f.write(f"localparam int TEX_BYTES = {TEX_BYTES};\n")
f.write(f"localparam [29:0] LPDDR_TEX_BASE = 30'h{LPDDR_TEX_BASE:07x};\n")
f.write(f"localparam int N_BEATS = {TEX_BYTES//32};\n")
f.write(f"localparam int STG_WORDS = {STG_WORDS};\n")
f.write(f"localparam int TW = {TW};\n")
f.write(f"localparam int TH = {TH};\n")
# eyeball PNG
try:
from PIL import Image
im=Image.new("RGB",(FBPXW,CH)); im.putdata(refpix)
im.save(os.path.join(ROOT,"captures","gs","silenthill3","extracted","recon","sh3_real_ref.png"))
print("[Ch350] wrote sh3_real_ref.png")
except Exception as e:
print("(PIL skipped:", e, ")")
print(f"[Ch350] emitted fixtures -> {DATA} (LOCAL). TEX_VRAM_BASE=0x{TEX_VRAM_BASE:x} TBP={NEW_TBP} CBP={CBP} "
f"LPDDR_TEX_BASE=0x{LPDDR_TEX_BASE:x} N_BEATS={TEX_BYTES//32}")
if __name__ == "__main__":
main(sys.argv)
+85
View File
@@ -0,0 +1,85 @@
#!/usr/bin/env python3
"""Author a small CONTENT-CLEAN synthetic PCSX2 .gs dump for Ch340 byte-exact parser tests.
No copyrighted content — every byte is hand-authored here. Exercises the container header + each
packet type + each GIF mode the parser must handle: PACKED A+D, PACKED PRE-prim, PACKED RGBAQ/XYZ2,
an IMAGE upload, and a VSync frame boundary. Writes captures/gs/synthetic/mini.gs (whitelisted).
"""
import struct, os
OUT = os.path.join(os.path.dirname(__file__), "..", "captures", "gs", "synthetic", "mini.gs")
def giftag(nloop, eop, flg, nreg, regs, pre=0, prim=0):
lo = (nloop & 0x7FFF) | ((eop & 1) << 15) | ((pre & 1) << 46) | ((prim & 0x7FF) << 47) \
| ((flg & 3) << 58) | ((nreg & 0xF) << 60)
return (lo | ((regs & ((1 << 64) - 1)) << 64)).to_bytes(16, "little")
def ad(addr, data): # PACKED A+D qword: data[63:0], addr[72:64]
return ((addr & 0xFF) << 64 | (data & ((1 << 64) - 1))).to_bytes(16, "little")
def packed_rgbaq(r, g, b, a): # R[7:0] G[39:32] B[71:64] A[103:96]
return (r | (g << 32) | (b << 64) | (a << 96)).to_bytes(16, "little")
def packed_xyz2(x, y, z, adc=0): # X[15:0] Y[47:32] Z[95:64] ADC[111]
return ((x & 0xFFFF) | ((y & 0xFFFF) << 32) | ((z & 0xFFFFFFFF) << 64) | (adc << 111)).to_bytes(16, "little")
def transfer(path, gifdata):
return bytes([0]) + bytes([path]) + struct.pack("<I", len(gifdata)) + gifdata
def vsync(field): return bytes([1, field])
# ---- GIF payloads ----
# P1: one A+D write to FRAME_1 (0x4C) = 0x000000000C001807 (an arbitrary recognizable value).
p1 = giftag(1, 0, 0, 1, 0xE) + ad(0x4C, 0x0000_0000_0C00_1807)
# P2: a TRIANGLE (prim type 3, IIP/TME bits) via PRE, PACKED, nreg=2 (RGBAQ=1, XYZ2=5), nloop=3.
prim_val = 3 | (1 << 4) | (1 << 6) # TRIANGLE + TME + ABE
regs2 = 0x1 | (0x5 << 4) # desc0=RGBAQ, desc1=XYZ2
verts = [(100, 50, 0x5000, 0xFF, 0x00, 0x00), (200, 50, 0x5000, 0x00, 0xFF, 0x00),
(100, 150, 0x5000, 0x00, 0x00, 0xFF)]
p2 = giftag(3, 1, 0, 2, regs2, pre=1, prim=prim_val)
for (x, y, z, r, g, b) in verts:
p2 += packed_rgbaq(r, g, b, 0x80) + packed_xyz2(x << 4, y << 4, z)
# P3: an IMAGE upload, 2 qwords of dummy texture bytes.
p3 = giftag(2, 1, 2, 0, 0) + bytes(32)
packets = transfer(3, p1) + transfer(3, p2) + transfer(3, p3) + vsync(0)
def build_container(serial, packets, state_size=16):
state = bytes(state_size); regs_block = bytes(8192)
hdr = struct.pack("<9I", 9, len(state), 36, len(serial), 0x12345678, 0, 0, 36 + len(serial), 0)
header_block = hdr + serial
return (struct.pack("<I", 0xFFFFFFFF) + struct.pack("<I", len(header_block)) + header_block
+ state + regs_block + packets)
def write(path, data):
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "wb") as f: f.write(data)
print(f"wrote {path} ({len(data)} bytes)")
# mini.gs — mixed fixture for the byte-exact PARSER test (P2's triangle is TME=1 = unsupported).
write(OUT, build_container(b"SYNTH001", packets))
# mini_supported.gs — two NON-textured opaque triangles for the TRANSLATOR test (TME=0, ABE=0, flat).
OUT2 = os.path.join(os.path.dirname(__file__), "..", "captures", "gs", "synthetic", "mini_supported.gs")
def flat_tri(prim_val, tris):
out = b""
for (vs, rgb) in tris: # vs = 3x(x,y,z) ; flat color rgb
out += giftag(3, 1, 0, 2, 0x1 | (0x5 << 4), pre=1, prim=prim_val)
for (x, y, z) in vs:
out += packed_rgbaq(rgb[0], rgb[1], rgb[2], 0x80) + packed_xyz2(x << 4, y << 4, z)
return out
sup = transfer(3, giftag(1, 0, 0, 1, 0xE) + ad(0x18, 0x0000_0000_0000_0000)) # XYOFFSET_1 = 0
sup += transfer(3, flat_tri(3, [ # PRIM=TRIANGLE only (TME=0,ABE=0)
([(10, 10, 0x6000), (50, 10, 0x6000), (10, 50, 0x6000)], (0xFF, 0x00, 0x00)), # red
([(50, 50, 0x5000), (14, 50, 0x5000), (50, 14, 0x5000)], (0x00, 0x00, 0xFF)), # blue
]))
sup += vsync(0)
write(OUT2, build_container(b"SYNTHSUP", sup))
# mini_st.gs — Ch342 regression: a PACKED ST write. The GS routes the ST qword's Q lane (bits [95:64])
# to RGBAQ.Q; the parser must expose it as info["q_stq"]. Guards the easy-to-reintroduce drop-Q bug.
OUT3 = os.path.join(os.path.dirname(__file__), "..", "captures", "gs", "synthetic", "mini_st.gs")
def packed_st(s, t, q): # PACKED ST: S[31:0], T[63:32], Q[95:64]
return (s | (t << 32) | (q << 64)).to_bytes(16, "little")
p_st = giftag(1, 1, 0, 1, 0x2) + packed_st(0x11111111, 0x22222222, 0x33333333) # descriptor 0x2 = ST
write(OUT3, build_container(b"SYNTHST0", transfer(3, p_st) + vsync(0)))
+249
View File
@@ -0,0 +1,249 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch340 GS-dump parser + GIF/GS decoder (Bricks 1-2).
Deterministically decodes a PCSX2 .gs/.gs.xz/.gs.zst dump into a NORMALIZED, versioned event stream
(container parse -> GIF tag walk -> GS register-write / IMAGE / transfer events). No hidden
approximation: anything not understood is emitted as an explicit event (MALFORMED / unknown reg /
IMAGE-not-inlined), never silently dropped or guessed.
Container format: see memory/reference_pcsx2_gsdump_format.md (pinned from PCSX2 source + validated
byte-exact against captures/gs/cubes/cubes_frame.gs.zst).
This module is the parser/decoder ONLY. Census/histograms (Brick 3) and the ps2_feeder-scene
translator (Brick 4) consume `parse_dump()`'s event stream. Raw IMAGE/transfer PAYLOADS are never
inlined into committable output — only structural facts (sizes, formats, offsets).
Usage:
gs_parse.py <dump.gs[.xz|.zst]> [--summary] [--events N] [--json events.jsonl]
"""
import sys, struct, lzma, subprocess, shutil, json
from dataclasses import dataclass, field, asdict
SCHEMA_VERSION = 1
# ---- GS register address -> name (A+D + decoded-PACKED targets) ----
GS_REG = {
0x00:"PRIM",0x01:"RGBAQ",0x02:"ST",0x03:"UV",0x04:"XYZF2",0x05:"XYZ2",0x06:"TEX0_1",0x07:"TEX0_2",
0x08:"CLAMP_1",0x09:"CLAMP_2",0x0A:"FOG",0x0C:"XYZF3",0x0D:"XYZ3",0x14:"TEX1_1",0x15:"TEX1_2",
0x16:"TEX2_1",0x17:"TEX2_2",0x18:"XYOFFSET_1",0x19:"XYOFFSET_2",0x1A:"PRMODECONT",0x1B:"PRMODE",
0x1C:"TEXCLUT",0x22:"SCANMSK",0x34:"MIPTBP1_1",0x35:"MIPTBP1_2",0x36:"MIPTBP2_1",0x37:"MIPTBP2_2",
0x3B:"TEXA",0x3D:"FOGCOL",0x3F:"TEXFLUSH",0x40:"SCISSOR_1",0x41:"SCISSOR_2",0x42:"ALPHA_1",
0x43:"ALPHA_2",0x44:"DIMX",0x45:"DTHE",0x46:"COLCLAMP",0x47:"TEST_1",0x48:"TEST_2",0x49:"PABE",
0x4A:"FBA_1",0x4B:"FBA_2",0x4C:"FRAME_1",0x4D:"FRAME_2",0x4E:"ZBUF_1",0x4F:"ZBUF_2",0x50:"BITBLTBUF",
0x51:"TRXPOS",0x52:"TRXREG",0x53:"TRXDIR",0x54:"HWREG",0x60:"SIGNAL",0x61:"FINISH",0x62:"LABEL",
}
# PACKED descriptor (REGS nibble) -> ("how to decode", target GS reg addr)
PACKED_PRIM,PACKED_RGBAQ,PACKED_ST,PACKED_UV = 0x0,0x1,0x2,0x3
PACKED_XYZF2,PACKED_XYZ2,PACKED_TEX0_1,PACKED_TEX0_2 = 0x4,0x5,0x6,0x7
PACKED_CLAMP1,PACKED_CLAMP2,PACKED_FOG = 0x8,0x9,0xA
PACKED_XYZF3,PACKED_XYZ3,PACKED_AD,PACKED_NOP = 0xC,0xD,0xE,0xF
GST = {0:"Transfer",1:"VSync",2:"ReadFIFO2",3:"Registers"}
GSPATH = {0:"Path1Old",1:"Path2",2:"Path3",3:"Path1New",4:"Dummy"}
@dataclass
class Event:
kind: str # GSREG | IMAGE | GIFTAG | FRAME_BOUNDARY | READFIFO | TRANSFER | MALFORMED
frame: int
idx: int
byte_off: int # offset in the DECOMPRESSED .gs of the source byte
reg: str = "" # for GSREG
addr: int = -1
value: int = 0
info: dict = field(default_factory=dict)
# ---------------------------------------------------------------- decompression
def read_dump_bytes(path):
if path.endswith(".gs.xz") or path.endswith(".xz"):
return lzma.open(path, "rb").read()
if path.endswith(".gs.zst") or path.endswith(".zst"):
if shutil.which("zstd") is None:
sys.exit("error: .zst dump but `zstd` not found on PATH")
return subprocess.run(["zstd","-d","-c",path], capture_output=True, check=True).stdout
return open(path,"rb").read()
# ---------------------------------------------------------------- container parse
@dataclass
class Header:
state_version:int; state_size:int; serial_offset:int; serial_size:int; crc:int
ss_w:int; ss_h:int; ss_off:int; ss_size:int; header_size:int; serial:str; packet_start:int
def parse_header(d):
if len(d) < 12 or struct.unpack_from("<I",d,0)[0] != 0xFFFFFFFF:
raise ValueError("not a new-format .gs (missing 0xFFFFFFFF marker)")
header_size = struct.unpack_from("<I",d,4)[0]
f = struct.unpack_from("<9I", d, 8)
h = Header(*f, header_size=header_size, serial="", packet_start=0)
if header_size < 36:
raise ValueError(f"header_size {header_size} < 36")
s0 = 8 + h.serial_offset
h.serial = d[s0:s0+h.serial_size].decode("latin1","replace")
h.packet_start = 8 + header_size + h.state_size + 8192
if h.packet_start > len(d):
raise ValueError(f"packet_start 0x{h.packet_start:x} past EOF 0x{len(d):x}")
return h
# ---------------------------------------------------------------- GIF tag decode
def _bits(q, lo, n): # extract n bits at lo from a 128-bit int
return (q >> lo) & ((1 << n) - 1)
def decode_packed_reg(desc, q):
"""Return (addr, value_64, note) for one PACKED register qword, or (None,0,note) to skip."""
if desc == PACKED_AD:
return _bits(q,64,8), q & 0xFFFFFFFFFFFFFFFF, ""
if desc == PACKED_NOP:
return None, 0, "nop"
if desc == PACKED_PRIM:
return 0x00, _bits(q,0,11), ""
if desc == PACKED_RGBAQ:
r=_bits(q,0,8); g=_bits(q,32,8); b=_bits(q,64,8); a=_bits(q,96,8); Q=_bits(q,96,32) if False else _bits(q,96,32)
# NOTE: Q float lives in [127:96]; we keep R/G/B/A (the 8-bit color the renderer uses).
Qf=_bits(q,96,32) # not used for color
return 0x01, (r | (g<<8) | (b<<16) | (a<<24)), ""
if desc == PACKED_ST:
return 0x02, (_bits(q,0,32) | (_bits(q,32,32)<<32)), "" # S,T floats (Q -> RGBAQ.Q)
if desc == PACKED_UV:
return 0x03, (_bits(q,0,14) | (_bits(q,16,14)<<14)), ""
if desc == PACKED_XYZ2:
return 0x05, (_bits(q,0,16) | (_bits(q,32,16)<<16) | (_bits(q,64,32)<<32)), ("adc" if _bits(q,111,1) else "")
if desc == PACKED_XYZ3:
return 0x0D, (_bits(q,0,16) | (_bits(q,32,16)<<16) | (_bits(q,64,32)<<32)), ""
if desc == PACKED_XYZF2:
return 0x04, (_bits(q,0,16) | (_bits(q,32,16)<<16) | (_bits(q,64,24)<<32) | (_bits(q,100,8)<<56)), ("adc" if _bits(q,111,1) else "")
if desc == PACKED_XYZF3:
return 0x0C, (_bits(q,0,16) | (_bits(q,32,16)<<16) | (_bits(q,64,24)<<32) | (_bits(q,100,8)<<56)), ""
if desc in (PACKED_TEX0_1,PACKED_TEX0_2,PACKED_CLAMP1,PACKED_CLAMP2):
addr = {PACKED_TEX0_1:0x06,PACKED_TEX0_2:0x07,PACKED_CLAMP1:0x08,PACKED_CLAMP2:0x09}[desc]
return addr, q & 0xFFFFFFFFFFFFFFFF, ""
if desc == PACKED_FOG:
return 0x0A, (_bits(q,100,8) << 56), ""
return None, 0, f"packed_desc_0x{desc:x}_unhandled"
def walk_gif(data, base_off, frame, emit):
"""Walk the GIF tag chain in `data` (a Transfer payload). emit(Event)."""
off = 0; n = len(data)
while off + 16 <= n:
q = int.from_bytes(data[off:off+16], "little")
nloop=_bits(q,0,15); eop=_bits(q,15,1); pre=_bits(q,46,1); prim=_bits(q,47,11)
flg=_bits(q,58,2); nreg=_bits(q,60,4); regs=_bits(q,64,64)
nregs = nreg if nreg != 0 else 16
emit(Event("GIFTAG",frame,0,base_off+off,info=dict(nloop=nloop,eop=eop,pre=pre,prim=prim,flg=flg,nreg=nregs)))
off += 16
if pre:
emit(Event("GSREG",frame,0,base_off+off,reg="PRIM",addr=0x00,value=prim,info=dict(via="PRE")))
if flg == 0: # PACKED: nloop * nregs qwords
descs = [(regs >> (4*k)) & 0xF for k in range(nregs)]
need = nloop*nregs*16
for _ in range(nloop):
for d in descs:
if off+16 > n: break
qq = int.from_bytes(data[off:off+16],"little")
addr,val,note = decode_packed_reg(d, qq)
if addr is not None:
inf = {"note":note} if note else {}
# Ch342 audit: PACKED ST also carries Q in lane2 [95:64] -> routed to RGBAQ.Q by
# the GS (the STQ mechanism). PACKED RGBAQ carries NO Q; Q comes from ST. Expose
# it so consumers reconstruct RGBAQ.Q consistently across PACKED/REGLIST/A+D.
if addr == 0x02: inf["q_stq"] = _bits(qq, 64, 32)
emit(Event("GSREG",frame,0,base_off+off,reg=GS_REG.get(addr,f"UNKNOWN_0x{addr:02x}"),
addr=addr,value=val,info=inf))
elif note and note!="nop":
emit(Event("MALFORMED",frame,0,base_off+off,info=dict(reason=note)))
off += 16
off = (base_off+0) and off # keep off as is
# if data ran short, account for it
if need > n - (off): pass
elif flg == 1: # REGLIST: nloop * nregs registers, 2 per qword (64-bit each), A+D-less
descs = [(regs >> (4*k)) & 0xF for k in range(nregs)]
total = nloop*nregs
half = 0
cur = 0
for i in range(total):
if half == 0:
if off+16 > n: break
qq = int.from_bytes(data[off:off+16],"little"); val = qq & 0xFFFFFFFFFFFFFFFF; cur=qq
half = 1
else:
val = (cur >> 64) & 0xFFFFFFFFFFFFFFFF; half = 0; off += 16
d = descs[i % nregs]
if d == PACKED_NOP: continue
addr = d if d in GS_REG else d
emit(Event("GSREG",frame,0,base_off+off,reg=GS_REG.get(d,f"UNKNOWN_0x{d:02x}"),addr=d,value=val,
info=dict(via="REGLIST")))
if half == 1: off += 16
elif flg == 2: # IMAGE: nloop qwords of raw data (texture / FB upload) — NOT inlined
qbytes = nloop*16
emit(Event("IMAGE",frame,0,base_off+off,info=dict(qwc=nloop,bytes=qbytes)))
off += qbytes
else: # flg == 3 disabled
emit(Event("GIFTAG",frame,0,base_off+off,info=dict(flg=3,note="disabled")))
if off > n:
emit(Event("MALFORMED",frame,0,base_off+off,info=dict(reason="gif_payload_overrun")))
break
# ---------------------------------------------------------------- packet stream
def parse_dump(path):
d = read_dump_bytes(path)
h = parse_header(d)
events = []; frame = 0
def emit(ev):
ev.idx = len(events); events.append(ev)
off = h.packet_start
while off < len(d):
tid = d[off]; pkt_off = off; off += 1
if tid == 0: # Transfer
if off+5 > len(d): emit(Event("MALFORMED",frame,0,pkt_off,info=dict(reason="trunc_transfer_hdr"))); break
path_id = d[off]; length = struct.unpack_from("<I",d,off+1)[0]; off += 5
if off+length > len(d): emit(Event("MALFORMED",frame,0,pkt_off,info=dict(reason="trunc_transfer_data",len=length))); break
emit(Event("TRANSFER",frame,0,pkt_off,info=dict(path=GSPATH.get(path_id,path_id),length=length)))
walk_gif(d[off:off+length], off, frame, emit)
off += length
elif tid == 1: # VSync (frame boundary)
if off >= len(d): break
emit(Event("FRAME_BOUNDARY",frame,0,pkt_off,info=dict(field=d[off]))); off += 1; frame += 1
elif tid == 2: # ReadFIFO2
if off+4 > len(d): break
emit(Event("READFIFO",frame,0,pkt_off,info=dict(qwc=struct.unpack_from("<I",d,off)[0]))); off += 4
elif tid == 3: # Registers snapshot
emit(Event("TRANSFER",frame,0,pkt_off,info=dict(regs_snapshot=8192))); off += 8192
else:
emit(Event("MALFORMED",frame,0,pkt_off,info=dict(reason=f"bad_packet_id_{tid}"))); break
return h, events
# ---------------------------------------------------------------- CLI / summary
def main(argv):
if len(argv) < 2:
print(__doc__); return 2
path = argv[1]
h, events = parse_dump(path)
print(f"schema v{SCHEMA_VERSION} serial={h.serial!r} crc=0x{h.crc:08x} ss={h.ss_w}x{h.ss_h} "
f"state=0x{h.state_size:x} packets@0x{h.packet_start:x}")
# histograms
kinds={}; regs={}; prims={}; flgs={}; frames=0; images=0; image_bytes=0; malformed=0
PRIMT={0:"POINT",1:"LINE",2:"LINE_STRIP",3:"TRIANGLE",4:"TRI_STRIP",5:"TRI_FAN",6:"SPRITE",7:"INVALID"}
for e in events:
kinds[e.kind]=kinds.get(e.kind,0)+1
if e.kind=="GSREG": regs[e.reg]=regs.get(e.reg,0)+1
if e.kind=="FRAME_BOUNDARY": frames+=1
if e.kind=="MALFORMED": malformed+=1
if e.kind=="IMAGE": images+=1; image_bytes+=e.info.get("bytes",0)
if e.kind=="GIFTAG":
fl=e.info.get("flg"); flgs[fl]=flgs.get(fl,0)+1
if e.info.get("pre"): prims[PRIMT.get(e.info.get("prim",0)&7,"?")]=prims.get(PRIMT.get(e.info.get("prim",0)&7,"?"),0)+1
print(f"events={len(events)} frames={frames} images={images} image_bytes={image_bytes} malformed={malformed}")
print("event kinds:", dict(sorted(kinds.items(),key=lambda x:-x[1])))
print("GIF flg :", {('PACKED' if k==0 else 'REGLIST' if k==1 else 'IMAGE' if k==2 else 'DISABLE'):v for k,v in sorted(flgs.items())})
print("PRIM types (via PRE):", dict(sorted(prims.items(),key=lambda x:-x[1])))
print("top GS regs:", dict(sorted(regs.items(),key=lambda x:-x[1])[:18]))
if "--events" in argv:
n=int(argv[argv.index("--events")+1])
for e in events[:n]:
print(f" f{e.frame} #{e.idx} @0x{e.byte_off:x} {e.kind} {e.reg} {('0x%x'%e.value) if e.kind=='GSREG' else ''} {e.info}")
if "--json" in argv:
outp=argv[argv.index("--json")+1]
with open(outp,"w") as f:
for e in events: f.write(json.dumps(asdict(e))+"\n")
print(f"wrote {len(events)} events -> {outp}")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+226
View File
@@ -0,0 +1,226 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch349 step 1: census of ACTUAL drawn textured geometry in a GS dump.
Ch347/348 proved authentic ASSETS (SH3 PSMT8 tex + real CLUT) through CHOSEN geometry. The remaining
authenticity gap (Codex Ch349): reconstruct an ACTUAL commercial draw faithfully — the texture as the real
draw samples it (streamed in one format, sampled in another) on the real triangle's ST/Q + screen geometry.
This tool is step 1: walk the GS register stream, reconstruct every textured drawing primitive with its full
per-vertex state (screen XY from XYZF2/XYZ2 12.4 fixed, S/T/Q from ST+RGBAQ.Q, RGBA), group consecutive
primitives that share TEX0+PRIM state into DRAWS, and rank them so a single real environment draw can be
PICKED for reconstruction. Pure stdlib; reuses gs_parse for the GIF/register walk and gs_texture_residency
for the VRAM snapshot + CLUT/texture residency verdict.
A good Ch349 candidate is: TME=1, texture RESIDENT in the VRAM snapshot, a non-trivial on-screen footprint
(a real surface, not a 2px HUD glyph), indexed or CT texture with a known PSM, and enough triangles to be a
genuine mapped surface. The census REPORTS; it does not pick for you — the ranked head is the shortlist.
Usage: gs_sh3_draw_census.py <dump.gs.zst> [--top N] [--frame F] [--json out.json] [--min-prims K]
"""
import sys, os, json, struct
from collections import defaultdict
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import gs_parse
import gs_texture_residency as R
def f32(bits):
return struct.unpack("<f", struct.pack("<I", bits & 0xFFFFFFFF))[0]
PRIMT = {0:"POINT",1:"LINE",2:"LINE_STRIP",3:"TRIANGLE",4:"TRI_STRIP",5:"TRI_FAN",6:"SPRITE",7:"INVALID"}
VERTS_PER = {0:1,1:2,2:2,3:3,4:3,5:3,6:2,7:0} # min verts to kick a primitive
def census(dump, frame_filter=None, min_prims=1, collected=None):
d, h, events, uploads, runs, vram = collected if collected is not None else R.collect(dump, 0)
# live GS state we latch as we walk
prim = dict(type=7, tme=0, fst=0, abe=0, ctxt=0)
tex0 = {1:None, 2:None}
ofx = {1:0.0, 2:0.0}; ofy = {1:0.0, 2:0.0}
cur_st = (0.0, 0.0, 1.0) # S, T, Q
cur_rgba = 0
vqueue = [] # vertices latched since last primitive (for fan/strip we only need a window)
draws = []
cur = None
def newkey():
t0 = tex0[1 if prim["ctxt"]==0 else 2]
if t0 is None: return None
return (t0["tbp"], t0["psm"], t0["tbw"], prim["type"], prim["tme"], prim["abe"])
def close():
nonlocal cur
if cur and cur["nprim"] >= 1:
draws.append(cur)
cur = None
def open_draw():
nonlocal cur
t0 = tex0[1 if prim["ctxt"]==0 else 2]
cur = dict(key=newkey(), tex0=dict(t0), prim=dict(prim), frame=cur_frame,
first_idx=cur_idx, nprim=0, nvert=0,
xmin=1e30, xmax=-1e30, ymin=1e30, ymax=-1e30,
smin=1e30, smax=-1e30, tmin=1e30, tmax=-1e30,
qmin=1e30, qmax=-1e30, verts=[])
cur_frame = 0; cur_idx = 0
for e in events:
if e.kind == "FRAME_BOUNDARY":
cur_frame = e.frame + 1
continue
if e.kind != "GSREG":
continue
cur_frame = e.frame; cur_idx = e.idx
r, v = e.reg, e.value
if r == "PRIM":
close()
prim = dict(type=v&7, tme=(v>>4)&1, fst=(v>>8)&1, abe=(v>>6)&1, ctxt=(v>>9)&1)
vqueue = []
elif r == "PRMODE": # PRIM-less prim mode (rare); ignore topology change w/o reset
pass
elif r == "TEX0_1": tex0[1] = R.dec_tex0(v)
elif r == "TEX0_2": tex0[2] = R.dec_tex0(v)
elif r == "XYOFFSET_1": ofx[1] = (v & 0xFFFF)/16.0; ofy[1] = ((v>>32)&0xFFFF)/16.0
elif r == "XYOFFSET_2": ofx[2] = (v & 0xFFFF)/16.0; ofy[2] = ((v>>32)&0xFFFF)/16.0
elif r == "RGBAQ": cur_rgba = v & 0xFFFFFFFF
elif r == "ST":
s = f32(v & 0xFFFFFFFF); t = f32((v>>32)&0xFFFFFFFF)
q = f32(e.info.get("q_stq", 0x3F800000)) # Q rides in ST lane2; default 1.0
cur_st = (s, t, q)
elif r in ("XYZF2","XYZ2","XYZF3","XYZ3"):
# drawing kick. XY are 12.4 fixed relative to XYOFFSET.
xf = (v & 0xFFFF); yf = (v>>16) & 0xFFFF
ci = 1 if prim["ctxt"]==0 else 2
x = xf/16.0 - ofx[ci]; y = yf/16.0 - ofy[ci]
if r in ("XYZF2","XYZF3"):
z = (v>>32) & 0xFFFFFF
else:
z = (v>>32) & 0xFFFFFFFF
if not prim["tme"]: # only textured draws are Ch349 candidates
continue
if newkey() is None:
continue
if cur is None or cur["key"] != newkey():
close(); open_draw()
vtx = dict(x=x, y=y, z=z, s=cur_st[0], t=cur_st[1], q=cur_st[2], rgba=cur_rgba)
cur["verts"].append(vtx); cur["nvert"] += 1
cur["nprim"] += 1 # each kick completes one primitive in fan/strip/list
cur["xmin"]=min(cur["xmin"],x); cur["xmax"]=max(cur["xmax"],x)
cur["ymin"]=min(cur["ymin"],y); cur["ymax"]=max(cur["ymax"],y)
for (a,lo,hi) in (("s","smin","smax"),("t","tmin","tmax"),("q","qmin","qmax")):
pass
cur["smin"]=min(cur["smin"],cur_st[0]); cur["smax"]=max(cur["smax"],cur_st[0])
cur["tmin"]=min(cur["tmin"],cur_st[1]); cur["tmax"]=max(cur["tmax"],cur_st[1])
cur["qmin"]=min(cur["qmin"],cur_st[2]); cur["qmax"]=max(cur["qmax"],cur_st[2])
close()
# attach residency verdict + derived UV/on-screen metrics + score; filter
SW, SH = 640.0, 480.0 # SH3 internal render res (matches header ss=640x480)
out = []
for dr in draws:
if dr["nprim"] < min_prims: continue
if frame_filter is not None and dr["frame"] != frame_filter: continue
t0 = dr["tex0"]
snap = R.snapshot_present(vram, t0["tbp"], nb=512)
clut = None
if t0["psm"] in R.INDEXED_PSMS:
csnap = R.snapshot_present(vram, t0["cbp"], nb=1024, min_nz=64)
clut = dict(cbp=t0["cbp"], cpsm=t0["cpsm"], cld=t0["cld"], resident=bool(csnap),
distinct=(csnap["distinct"] if csnap else None))
dr["tex_resident"] = bool(snap)
dr["tex_snap"] = snap
dr["clut"] = clut
dr["w_px"] = dr["xmax"]-dr["xmin"]; dr["h_px"] = dr["ymax"]-dr["ymin"]
# per-vertex perspective UV (u_norm=S/Q) -> texel bbox; on-screen vertex fraction
umin=vmin=1e30; umax=vmax=-1e30; onscreen=0
for vtx in dr["verts"]:
if 0.0 <= vtx["x"] <= SW and 0.0 <= vtx["y"] <= SH: onscreen += 1
q = vtx["q"]
if abs(q) < 1e-9: continue
un = vtx["s"]/q; vn = vtx["t"]/q
umin=min(umin,un); umax=max(umax,un); vmin=min(vmin,vn); vmax=max(vmax,vn)
dr["onscreen_frac"] = onscreen/max(1,dr["nvert"])
if umax >= umin:
dr["u_texmin"]=umin*t0["tw"]; dr["u_texmax"]=umax*t0["tw"]
dr["v_texmin"]=vmin*t0["th"]; dr["v_texmax"]=vmax*t0["th"]
else:
dr["u_texmin"]=dr["u_texmax"]=dr["v_texmin"]=dr["v_texmax"]=0.0
# on-screen clipped bbox area
cx0=max(0.0,dr["xmin"]); cx1=min(SW,dr["xmax"]); cy0=max(0.0,dr["ymin"]); cy1=min(SH,dr["ymax"])
dr["onscreen_area"]=max(0.0,cx1-cx0)*max(0.0,cy1-cy0)
dr["score"] = _score(dr)
out.append(dr)
out.sort(key=lambda x: x["score"], reverse=True)
return out, h, vram
def _score(dr):
"""'Good Ch349 candidate' = resident textured surface, MOSTLY ON-SCREEN, sampling a real texel
rectangle in perspective. Reward on-screen containment + sampled-texel span, NOT guard-band area."""
t0 = dr["tex0"]
s = 0.0
if not dr["tex_resident"]: return -1.0 # must be reconstructable
if t0["psm"] in R.INDEXED_PSMS:
if dr["clut"] and dr["clut"]["resident"]: s += 40.0 # indexed + resident CLUT == the real SH3 path
else: return -1.0
elif t0["psm"] in (0x00,0x02,0x0A,0x01): # CT32/CT16/CT16S/CT24 — directly decodable
s += 18.0
else:
return -1.0 # unsupported PSM for host decode
# ON-SCREEN containment is the dominant term (we want a draw we can actually show + check)
s += 100.0 * dr["onscreen_frac"]
s += min(dr["onscreen_area"]/200.0, 120.0) # on-screen area only (guard band excluded)
# sampled texel rectangle must be a real chunk, not a single degenerate texel
du = abs(dr["u_texmax"]-dr["u_texmin"]); dv = abs(dr["v_texmax"]-dr["v_texmin"])
s += min(du, 64.0) + min(dv, 64.0)
if du < 1.0 and dv < 1.0: s -= 80.0 # near-constant UV: flat/degenerate, not interesting
s += min(dr["nprim"], 48)
if dr["qmax"] > dr["qmin"] * 1.02: s += 15.0 # genuine perspective
return s
def fmt(dr):
t0 = dr["tex0"]; p = dr["prim"]
clut = ""
if dr["clut"]:
clut = f" CLUT[{'R' if dr['clut']['resident'] else 'X'} cbp={t0['cbp']} cpsm=0x{t0['cpsm']:02x} cld={t0['cld']} dist={dr['clut']['distinct']}]"
persp = "PERSP" if dr["qmax"] > dr["qmin"]*1.02 else "affine"
return (f"score={dr['score']:6.1f} f{dr['frame']} idx{dr['first_idx']} {PRIMT[p['type']]} "
f"tme={p['tme']} abe={p['abe']} fst={p['fst']} nprim={dr['nprim']} onscr={dr['onscreen_frac']*100:.0f}%\n"
f" TEX0 tbp={t0['tbp']} tbw={t0['tbw']} psm=0x{t0['psm']:02x} {t0['tw']}x{t0['th']} "
f"tcc={t0['tcc']} tfx={t0['tfx']} resident={dr['tex_resident']}{clut}\n"
f" screen x[{dr['xmin']:.1f}..{dr['xmax']:.1f}] y[{dr['ymin']:.1f}..{dr['ymax']:.1f}] "
f"on-area={dr['onscreen_area']:.0f}px2 texel u[{dr['u_texmin']:.1f}..{dr['u_texmax']:.1f}] "
f"v[{dr['v_texmin']:.1f}..{dr['v_texmax']:.1f}] {persp}")
def get_draw(dump, first_idx):
"""Return the single census draw whose first_idx matches (with its full vertex list), or None."""
draws, h, vram = census(dump, frame_filter=None, min_prims=1)
for dr in draws:
if dr["first_idx"] == first_idx:
return dr
return None
def main(argv):
if len(argv) < 2:
print(__doc__); return 2
dump = argv[1]
def opt(n, dv=None): return argv[argv.index(n)+1] if n in argv else dv
top = int(opt("--top","25")); minp = int(opt("--min-prims","1"))
ff = int(opt("--frame")) if "--frame" in argv else None
draws, h, vram = census(dump, frame_filter=ff, min_prims=minp)
print(f"# Ch349 draw census: {os.path.basename(dump)}")
print(f"# textured draws (>= {minp} prim): {len(draws)} vram_snapshot={'present' if vram is not None else 'ABSENT'}")
cand = [d for d in draws if d["score"] > 0]
print(f"# reconstructable candidates (resident tex + known PSM): {len(cand)}\n")
for dr in draws[:top]:
print(fmt(dr)); print()
if "--json" in argv:
slim = [dict(score=d["score"], frame=d["frame"], first_idx=d["first_idx"],
prim=d["prim"], tex0=d["tex0"], nprim=d["nprim"],
screen=dict(xmin=d["xmin"],xmax=d["xmax"],ymin=d["ymin"],ymax=d["ymax"]),
st=dict(smin=d["smin"],smax=d["smax"],tmin=d["tmin"],tmax=d["tmax"],
qmin=d["qmin"],qmax=d["qmax"]),
tex_resident=d["tex_resident"], clut=d["clut"]) for d in draws]
open(opt("--json"),"w").write(json.dumps(slim, indent=1)+"\n")
print(f"# wrote {opt('--json')}")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+113
View File
@@ -0,0 +1,113 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch349 step 4: software reference image for an ACTUAL SH3 draw.
Rasterizes the chosen draw's REAL geometry (its TRI_STRIP, real per-vertex screen XY + perspective S/T/Q +
vertex RGBA) sampling the texture RECONSTRUCTED from GS local memory (gs_sh3_recon / gs_localmem). Perspective-
correct: S,T,Q (= s/w, t/w, 1/w) interpolate linearly in screen space, then u=(S/Q)*TW, v=(T/Q)*TH; texel ->
CLUT -> ABGR; TFX=MODULATE applies the vertex color (×2/255-ish, GS 128=1.0). This is the host-first reference
that must pixel-check against the real PCSX2 frame BEFORE anything goes to feeder/board.
Usage: gs_sh3_draw_ref.py <dump.gs.zst> [--draw-idx N] [--tbp T --cbp C --tbw W --tw 512 --th 512]
[--clut-order grid|linear] [--modulate 0|1] [--out DIR]
"""
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import gs_sh3_draw_census as C
import gs_sh3_recon as RC
SW, SH = 640, 480
def edge(ax, ay, bx, by, px, py):
return (px-ax)*(by-ay) - (py-ay)*(bx-ax)
def raster_strip(verts, prim_type, tex_idx, pal, tw, th, modulate):
"""Rasterize a TRI_STRIP/TRI_FAN/TRIANGLE list of verts into an SW*SH RGBA buffer with a z-buffer."""
fb = [(0,0,0,0)]*(SW*SH)
zb = [-1]*(SW*SH)
def tri(i0, i1, i2):
v0, v1, v2 = verts[i0], verts[i1], verts[i2]
x0,y0 = v0["x"],v0["y"]; x1,y1 = v1["x"],v1["y"]; x2,y2 = v2["x"],v2["y"]
minx = max(0, int(min(x0,x1,x2))); maxx = min(SW-1, int(max(x0,x1,x2))+1)
miny = max(0, int(min(y0,y1,y2))); maxy = min(SH-1, int(max(y0,y1,y2))+1)
area = edge(x0,y0, x1,y1, x2,y2)
if abs(area) < 1e-6: return
inv = 1.0/area
for py in range(miny, maxy+1):
for px in range(minx, maxx+1):
cx, cy = px+0.5, py+0.5
w0 = edge(x1,y1, x2,y2, cx,cy)
w1 = edge(x2,y2, x0,y0, cx,cy)
w2 = edge(x0,y0, x1,y1, cx,cy)
# inside test (either winding)
if not ((w0>=0 and w1>=0 and w2>=0) or (w0<=0 and w1<=0 and w2<=0)):
continue
b0, b1, b2 = w0*inv, w1*inv, w2*inv
S = b0*v0["s"] + b1*v1["s"] + b2*v2["s"]
T = b0*v0["t"] + b1*v1["t"] + b2*v2["t"]
Q = b0*v0["q"] + b1*v1["q"] + b2*v2["q"]
if abs(Q) < 1e-12: continue
u = (S/Q)*tw; v = (T/Q)*th
tx = int(u) % tw; ty = int(v) % th # REPEAT wrap (SH3 CLAMP default)
if tx < 0: tx += tw
if ty < 0: ty += th
pidx = tex_idx[ty*tw + tx]
p = pal[pidx & 0xFF]
r,g,b = p&0xFF, (p>>8)&0xFF, (p>>16)&0xFF
if modulate:
vr = b0*((v0["rgba"])&0xFF)+b1*((v1["rgba"])&0xFF)+b2*((v2["rgba"])&0xFF)
vg = b0*((v0["rgba"]>>8)&0xFF)+b1*((v1["rgba"]>>8)&0xFF)+b2*((v2["rgba"]>>8)&0xFF)
vb = b0*((v0["rgba"]>>16)&0xFF)+b1*((v1["rgba"]>>16)&0xFF)+b2*((v2["rgba"]>>16)&0xFF)
r = min(255, int(r*vr/128.0)); g = min(255, int(g*vg/128.0)); b = min(255, int(b*vb/128.0))
z = int(b0*v0["z"] + b1*v1["z"] + b2*v2["z"])
o = py*SW + px
if z >= zb[o]:
zb[o] = z; fb[o] = (r,g,b,255)
n = len(verts)
if prim_type == 3: # TRIANGLE list
for i in range(0, n-2, 3): tri(i,i+1,i+2)
elif prim_type == 4: # TRI_STRIP
for i in range(2, n): tri(i-2, i-1, i)
elif prim_type == 5: # TRI_FAN
for i in range(2, n): tri(0, i-1, i)
return fb
def main(argv):
if len(argv) < 2:
print(__doc__); return 2
dump = argv[1]
def opt(n, dv=None): return argv[argv.index(n)+1] if n in argv else dv
draw_idx = int(opt("--draw-idx","89761"))
tbp = int(opt("--tbp","9216")); cbp = int(opt("--cbp","13952"))
fbw = int(opt("--tbw","8")); tw = int(opt("--tw","512")); th = int(opt("--th","512"))
order = opt("--clut-order","grid"); modulate = int(opt("--modulate","1"))
outdir = opt("--out", os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"captures","gs","silenthill3","extracted","recon"))
os.makedirs(outdir, exist_ok=True)
dr = C.get_draw(dump, draw_idx)
if dr is None:
print(f"draw first_idx={draw_idx} not found"); return 1
t0 = dr["tex0"]
print(f"[step4] draw f{dr['frame']} idx{draw_idx} {C.PRIMT[dr['prim']['type']]} nprim={dr['nprim']} "
f"nvert={dr['nvert']} TEX0 tbp={t0['tbp']} psm=0x{t0['psm']:02x} {t0['tw']}x{t0['th']}")
mem, replayed, uploads, events, vram = RC.build_localmem_to(dump, draw_idx)
if mem is None: print("VRAM absent"); return 1
tex_idx = mem.read_psmt8(tbp, fbw, tw, th)
pal = RC.read_clut32(mem, cbp, order=order)
print(f"[step4] reconstructed tex {tw}x{th} ({len(set(tex_idx))} idx), CLUT {order} ({len(set(pal))} colors)")
fb = raster_strip(dr["verts"], dr["prim"]["type"], tex_idx, pal, tw, th, modulate)
painted = sum(1 for p in fb if p[3])
RC.save_png(os.path.join(outdir, f"draw_ref_{draw_idx}_{order}.png"), SW, SH, fb)
print(f"[step4] painted {painted} px -> draw_ref_{draw_idx}_{order}.png ({outdir})")
# crop to the draw's on-screen bbox for easier visual compare with the PCSX2 frame
x0=max(0,int(dr['xmin'])); x1=min(SW,int(dr['xmax'])+1); y0=max(0,int(dr['ymin'])); y1=min(SH,int(dr['ymax'])+1)
if x1>x0 and y1>y0:
crop = [fb[y*SW+x] for y in range(y0,y1) for x in range(x0,x1)]
RC.save_png(os.path.join(outdir, f"draw_ref_{draw_idx}_{order}_crop.png"), x1-x0, y1-y0, crop)
print(f"[step4] bbox crop x[{x0}..{x1}] y[{y0}..{y1}] -> draw_ref_{draw_idx}_{order}_crop.png")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+149
View File
@@ -0,0 +1,149 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch349 step 4 (definitive): composite a WHOLE SH3 frame from GS local memory.
Per-draw reconstruction (gs_sh3_draw_ref) proves one surface; this composites EVERY textured draw of a frame
to pixel-check the reconstruction against the real PCSX2 screenshot — the strongest faithfulness test. It walks
draws in capture order, keeping a GS local-memory model live (replaying host->local uploads as their idx is
passed, so each draw samples the texture state IT saw, not a stale end-of-frame one), reconstructs+caches each
bound texture (PSMT8 via grid-CSM1 CLUT, or PSMCT32 direct), and rasterizes perspective-correct with a z-buffer.
Output: composited frame PNG + a side-by-side vs the screenshot + a coverage/color summary. SH3-derived ->
LOCAL/gitignored.
Usage: gs_sh3_frame_ref.py <dump.gs.zst> [--frame F] [--shot path.png] [--max-draws N] [--out DIR]
"""
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import gs_sh3_draw_census as C
import gs_sh3_recon as RC
import gs_localmem as LM
import gs_texture_residency as R
SW, SH = 640, 480
def edge(ax, ay, bx, by, px, py):
return (px-ax)*(by-ay) - (py-ay)*(bx-ax)
def reconstruct_texture(mem, t0):
"""Return (idx_or_words, pal_or_None, kind) for the bound texture. PSMT8 -> (idx, pal,'i8'); PSMCT32 ->
(words, None,'ct32')."""
tbp, psm, tbw, tw, th = t0["tbp"], t0["psm"], t0["tbw"], t0["tw"], t0["th"]
if psm == 0x13: # PSMT8 indexed
idx = mem.read_psmt8(tbp, tbw, tw, th)
pal = RC.read_clut32(mem, t0["cbp"], order="grid")
return idx, pal, "i8"
if psm == 0x00: # PSMCT32 direct
words = [mem.read_ct32_word(tbp, tbw, x, y) for y in range(th) for x in range(tw)]
return words, None, "ct32"
return None, None, None
def sample(tex, pal, kind, tw, th, u, v):
tx = int(u) % tw; ty = int(v) % th
if tx < 0: tx += tw
if ty < 0: ty += th
if kind == "i8":
p = pal[tex[ty*tw+tx] & 0xFF]
else:
p = tex[ty*tw+tx]
return (p&0xFF, (p>>8)&0xFF, (p>>16)&0xFF)
def main(argv):
if len(argv) < 2:
print(__doc__); return 2
dump = argv[1]
def opt(n, dv=None): return argv[argv.index(n)+1] if n in argv else dv
frame = int(opt("--frame","1")); maxd = int(opt("--max-draws","100000"))
min_area = float(opt("--min-area","0"))
outdir = opt("--out", os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"captures","gs","silenthill3","extracted","recon"))
os.makedirs(outdir, exist_ok=True)
# all draws of the frame, in capture (idx) order, with full geometry (parse ONCE, reuse for census)
collected = R.collect(dump, 0)
d, h, events, uploads, runs, vram = collected
draws, _, _ = C.census(dump, frame_filter=frame, min_prims=1, collected=collected)
draws = [dr for dr in draws if dr["prim"]["tme"] and dr["tex0"]["psm"] in (0x13,0x00)
and dr["onscreen_area"] >= min_area]
draws.sort(key=lambda x: x["first_idx"])
print(f"[frameref] frame {frame}: {len(draws)} textured (PSMT8/CT32) draws, min_area={min_area}; "
f"replaying uploads incrementally")
mem = LM.LocalMem(vram)
up_sorted = sorted([u for u in uploads if u["dpsm"]==0x00], key=lambda u: u["idx"])
up_i = 0
# epoch counter per tbp (bumped whenever we apply an upload to that dbp) -> texture cache key
epoch = {}
texcache = {}
fb = [(0,0,0,0)]*(SW*SH); zb = [-1]*(SW*SH)
def apply_uploads_before(idx):
nonlocal up_i
while up_i < len(up_sorted) and up_sorted[up_i]["idx"] < idx:
u = up_sorted[up_i]
off, end = u["blob_range"]; blob = d[off:end]
words = [int.from_bytes(blob[i*4:i*4+4],"little") for i in range(len(blob)//4)]
mem.write_image_ct32(u["dbp"], u["dbw"], u["dx"], u["dy"], u["w"], u["h"], words)
epoch[u["dbp"]] = epoch.get(u["dbp"],0)+1
up_i += 1
painted_total = 0
for n, dr in enumerate(draws[:maxd]):
apply_uploads_before(dr["first_idx"])
t0 = dr["tex0"]
key = (t0["tbp"], t0["psm"], t0["tbw"], t0["tw"], t0["th"], t0.get("cbp"),
epoch.get(t0["tbp"],0), epoch.get(t0.get("cbp"),0))
if key not in texcache:
texcache[key] = reconstruct_texture(mem, t0)
tex, pal, kind = texcache[key]
if kind is None: continue
tw, th = t0["tw"], t0["th"]
verts = dr["verts"]; pt = dr["prim"]["type"]
def tri(i0,i1,i2):
nonlocal painted_total
v0,v1,v2 = verts[i0],verts[i1],verts[i2]
x0,y0=v0["x"],v0["y"]; x1,y1=v1["x"],v1["y"]; x2,y2=v2["x"],v2["y"]
minx=max(0,int(min(x0,x1,x2))); maxx=min(SW-1,int(max(x0,x1,x2))+1)
miny=max(0,int(min(y0,y1,y2))); maxy=min(SH-1,int(max(y0,y1,y2))+1)
if maxx<minx or maxy<miny: return
area=edge(x0,y0,x1,y1,x2,y2)
if abs(area)<1e-6: return
inv=1.0/area
for py in range(miny,maxy+1):
base=py*SW
for px in range(minx,maxx+1):
cx,cy=px+0.5,py+0.5
w0=edge(x1,y1,x2,y2,cx,cy); w1=edge(x2,y2,x0,y0,cx,cy); w2=edge(x0,y0,x1,y1,cx,cy)
if not ((w0>=0 and w1>=0 and w2>=0) or (w0<=0 and w1<=0 and w2<=0)): continue
b0,b1,b2=w0*inv,w1*inv,w2*inv
Q=b0*v0["q"]+b1*v1["q"]+b2*v2["q"]
if abs(Q)<1e-12: continue
S=b0*v0["s"]+b1*v1["s"]+b2*v2["s"]; T=b0*v0["t"]+b1*v1["t"]+b2*v2["t"]
u=(S/Q)*tw; vv=(T/Q)*th
z=int(b0*v0["z"]+b1*v1["z"]+b2*v2["z"])
o=base+px
if z>=zb[o]:
zb[o]=z; fb[o]=sample(tex,pal,kind,tw,th,u,vv)+(255,); painted_total+=1
if pt==4:
for i in range(2,len(verts)): tri(i-2,i-1,i)
elif pt==5:
for i in range(2,len(verts)): tri(0,i-1,i)
elif pt==3:
for i in range(0,len(verts)-2,3): tri(i,i+1,i+2)
if (n+1)%200==0: print(f" ...{n+1}/{len(draws)} draws, {painted_total} px, {len(texcache)} tex cached")
cov = sum(1 for p in fb if p[3])
RC.save_png(os.path.join(outdir, f"frame{frame}_composite.png"), SW, SH, fb)
print(f"[frameref] composite: {cov}/{SW*SH} px painted ({100*cov/(SW*SH):.1f}%), {len(texcache)} textures")
# side-by-side vs screenshot
shot = opt("--shot")
if shot and os.path.exists(shot):
from PIL import Image
comp = Image.open(os.path.join(outdir, f"frame{frame}_composite.png")).convert("RGB")
gt = Image.open(shot).convert("RGB").resize((SW,SH))
sbs = Image.new("RGB",(SW*2+8,SH),(40,40,40))
sbs.paste(comp,(0,0)); sbs.paste(gt,(SW+8,0))
sbs.save(os.path.join(outdir, f"frame{frame}_vs_screenshot.png"))
print(f"[frameref] wrote frame{frame}_vs_screenshot.png (left=recon, right=PCSX2)")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+104
View File
@@ -0,0 +1,104 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch349 steps 2-4: reconstruct ONE real SH3 textured draw from GS local memory.
Closes the Ch347/348 gap (authentic asset on CHOSEN geometry) -> an ACTUAL commercial draw reconstructed
faithfully. Default pick (gs_sh3_draw_census.py top): frame 1 idx89761, a 70-prim PSMT8 512x512 TRI_STRIP at
tbp=9216, CLUT cbp=13952 (CSM1 PSMCT32). That texture is STREAMED as 256x256 PSMCT32 (upload idx13288, same
262144 bytes) and SAMPLED as 512x512 PSMT8 — the exact stream-one-format / sample-another bridge.
step 2 Build a GS local-memory model (gs_localmem.LocalMem) seeded from the dump's initial VRAM snapshot,
replay every host->local PSMCT32 upload up to the draw, then READ the texture back via the PSMT8
swizzle. index -> CLUT -> ABGR. This is "the texture as the real draw sees it".
step 3 decode + print the draw's real TEX0/CLUT/state.
step 4 (gs_sh3_draw_ref.py) rasterize the actual geometry sampling this texture.
All outputs are SH3-derived -> LOCAL/gitignored (captures/gs/silenthill3/extracted/recon/).
Usage: gs_sh3_recon.py <dump.gs.zst> [--draw-idx N] [--tbp T] [--cbp C] [--tbw W] [--tw 512] [--th 512]
[--clut-order linear|grid] [--out DIR]
"""
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import gs_parse
import gs_texture_residency as R
import gs_localmem as LM
def build_localmem_to(dump, draw_idx):
"""Seed VRAM from the initial snapshot and replay all host->local PSMCT32 uploads with idx < draw_idx.
Returns (mem, replayed_uploads, all_uploads, events, vram)."""
d, h, events, uploads, runs, vram = R.collect(dump, 0)
if vram is None:
return None, [], uploads, events, None
mem = LM.LocalMem(vram)
replayed = []
for u in uploads:
if u["idx"] >= draw_idx: continue
if u["dpsm"] != 0x00: # only PSMCT32 stream writes modelled here (SH3 env path)
continue
off, end = u["blob_range"]
blob = d[off:end]
words = [int.from_bytes(blob[i*4:i*4+4], "little") for i in range(len(blob)//4)]
mem.write_image_ct32(u["dbp"], u["dbw"], u["dx"], u["dy"], u["w"], u["h"], words)
replayed.append(u)
return mem, replayed, uploads, events, vram
def read_clut32(mem, cbp, order="grid"):
"""Read a 256-entry PSMCT32 CLUT from the modelled VRAM. 'grid' = read as a 16x16 CT32 surface based at
cbp (dbw=1) — the layout a CSM1 8-bit palette occupies; 'linear' = raw contiguous i*4 from cbp*256.
Returns 256 packed ints (PS2 PSMCT32 word == 0xAABBGGRR, low byte R)."""
pal = [0]*256
if order == "linear":
base = cbp*256
for i in range(256):
a = base + i*4
pal[i] = int.from_bytes(mem.m[a:a+4], "little") if a+4 <= mem.SIZE else 0
else: # grid: palette entry i at (x=i%16, y=i//16) via CT32 swizzle, dbw=1
for i in range(256):
pal[i] = mem.read_ct32_word(cbp, 1, i & 15, i >> 4)
return pal
def decode_pixel(pal, idx):
p = pal[idx & 0xFF]
return (p & 0xFF, (p>>8)&0xFF, (p>>16)&0xFF, (p>>24)&0xFF) # R,G,B,A
def save_png(path, w, h, rgba_pixels):
from PIL import Image
img = Image.new("RGBA", (w, h)); img.putdata(rgba_pixels); img.save(path)
def main(argv):
if len(argv) < 2:
print(__doc__); return 2
dump = argv[1]
def opt(n, dv=None): return argv[argv.index(n)+1] if n in argv else dv
draw_idx = int(opt("--draw-idx","89761"))
tbp = int(opt("--tbp","9216")); cbp = int(opt("--cbp","13952"))
fbw = int(opt("--tbw","8")); tw = int(opt("--tw","512")); th = int(opt("--th","512"))
order = opt("--clut-order","grid")
outdir = opt("--out", os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"captures","gs","silenthill3","extracted","recon"))
os.makedirs(outdir, exist_ok=True)
mem, replayed, uploads, events, vram = build_localmem_to(dump, draw_idx)
if mem is None:
print("VRAM snapshot ABSENT — cannot reconstruct"); return 1
print(f"[Ch349] dump={os.path.basename(dump)} draw_idx={draw_idx} tbp={tbp} cbp={cbp} fbw={fbw} {tw}x{th}")
print(f"[step2] GS local-mem seeded from initial snapshot + replayed {len(replayed)} PSMCT32 upload(s) "
f"before the draw:")
for u in replayed:
print(f" idx{u['idx']} dbp={u['dbp']} dbw={u['dbw']} {u['w']}x{u['h']} {u['bytes']}B")
idx = mem.read_psmt8(tbp, fbw, tw, th)
distinct = len(set(idx))
print(f"[step2] de-swizzled PSMT8 index image: {tw}x{th}, {distinct} distinct indices")
save_png(os.path.join(outdir, "recon_indices_gray.png"), tw, th, [(b,b,b,255) for b in idx])
pal = read_clut32(mem, cbp, order=order)
print(f"[step2] CLUT @cbp={cbp} order={order}: {len(set(pal))} distinct ABGR entries")
color = [decode_pixel(pal, b) for b in idx]
save_png(os.path.join(outdir, f"recon_texture_{order}.png"), tw, th, [(r,g,b,255) for (r,g,b,a) in color])
save_png(os.path.join(outdir, f"recon_clut_{order}.png"), 16, 16,
[(r,g,b,255) for (r,g,b,a) in (decode_pixel(pal,i) for i in range(256))])
print(f"[step2] wrote recon_indices_gray.png, recon_texture_{order}.png, recon_clut_{order}.png -> {outdir}")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+138
View File
@@ -0,0 +1,138 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch341 Brick 1: host-side texture state analysis + extraction from a GS dump.
Decodes the texture UPLOADS (BITBLTBUF / TRXREG / TRXPOS / TRXDIR + IMAGE) and TEX0 BINDS in a dump,
matches the dominant textured-TRIANGLE primitives to the texture they sample, and finds the EARLIEST
contiguous textured-triangle segment that uses a SINGLE TEX0 bind (the no-RTL Ch341 v1 target: one
scene-level TEX0 + per-vertex real UV). Reports aggregate facts (committable). With --extract it
writes the matched texture blob + a generated-fixture descriptor LOCALLY (gitignored, per provenance).
Usage:
gs_texture.py <dump.gs[.xz|.zst]> [--report out.txt] [--extract outdir]
"""
import sys, os, json
sys.path.insert(0, os.path.dirname(__file__))
import gs_parse, gs_translate
PSM = {0x00:"PSMCT32",0x01:"PSMCT24",0x02:"PSMCT16",0x0A:"PSMCT16S",0x13:"PSMT8",0x14:"PSMT4",
0x1B:"PSMT8H",0x24:"PSMT4HL",0x2C:"PSMT4HH",0x30:"PSMZ32",0x31:"PSMZ24",0x32:"PSMZ16",0x3A:"PSMZ16S"}
BPP = {0x00:32,0x01:24,0x02:16,0x0A:16,0x13:8,0x14:4,0x1B:8,0x24:4,0x2C:4}
def dec_bitbltbuf(v): return dict(SBP=v&0x3FFF,SBW=(v>>16)&0x3F,SPSM=(v>>24)&0x3F,DBP=(v>>32)&0x3FFF,DBW=(v>>48)&0x3F,DPSM=(v>>56)&0x3F)
def dec_trxreg(v): return dict(RRW=v&0xFFF, RRH=(v>>32)&0xFFF)
def dec_trxpos(v): return dict(DSAX=v&0x7FF, DSAY=(v>>16)&0x7FF)
def dec_tex0(v): return dict(TBP0=v&0x3FFF,TBW=(v>>14)&0x3F,PSM=(v>>20)&0x3F,TW=(v>>26)&0xF,TH=(v>>30)&0xF,TCC=(v>>34)&1,TFX=(v>>35)&3)
def analyze(path):
d = gs_parse.read_dump_bytes(path)
h, events = gs_parse.parse_dump(path)
# --- pass 1: texture uploads (transfer state machine) ---
st = dict(bitbltbuf=0, trxreg=0, trxpos=0)
uploads = [] # each: dict(dbp,dbw,dpsm,w,h,bytes,data_off,event_idx)
for e in events:
if e.kind=="GSREG":
if e.reg=="BITBLTBUF": st["bitbltbuf"]=e.value
elif e.reg=="TRXREG": st["trxreg"]=e.value
elif e.reg=="TRXPOS": st["trxpos"]=e.value
elif e.kind=="IMAGE":
bb=dec_bitbltbuf(st["bitbltbuf"]); rr=dec_trxreg(st["trxreg"]); tp=dec_trxpos(st["trxpos"])
uploads.append(dict(dbp=bb["DBP"],dbw=bb["DBW"],dpsm=bb["DPSM"],w=rr["RRW"],h=rr["RRH"],
dsax=tp["DSAX"],dsay=tp["DSAY"],bytes=e.info.get("bytes",0),
data_off=e.byte_off,event_idx=e.idx))
# --- pass 2: triangles + their active TEX0 ---
prims,_ = gs_translate.reconstruct_prims(events)
# track TEX0 active at each triangle by re-walking (reconstruct doesn't keep tex0)
tex0_at = {}
cur_tex0 = 0
pi = 0
# rebuild active TEX0 per primitive index, matching reconstruct's order
# (simple: re-run the kick model tracking tex0)
cur_tex0=0; vcount=0; ptype=7; idxmap=[]
for e in events:
if e.kind=="GSREG":
if e.reg=="TEX0_1": cur_tex0=e.value
elif e.reg=="PRIM": ptype=e.value&7; vcount=0
elif e.reg in ("XYZ2","XYZ3","XYZF2","XYZF3"):
need={0:1,1:2,2:2,3:3,4:3,5:3,6:2}.get(ptype,99)
kick = e.reg in ("XYZ2","XYZF2")
vcount+=1
if kick and vcount>=need:
idxmap.append(cur_tex0)
if ptype in (3,6): vcount=0
tris = [(p, idxmap[i] if i < len(idxmap) else 0) for i,p in enumerate(prims) if p.type==3]
# --- earliest contiguous textured-triangle segment with a SINGLE TEX0 ---
seg=[]; seg_tex0=None
for (p,t) in tris:
c,_=gs_translate.classify(p) # textured tri -> unsupported in v0 envelope, but here we WANT textured
if not p.tme:
if seg: break
else: continue
if seg_tex0 is None: seg_tex0=t; seg=[p]
elif t==seg_tex0: seg.append(p)
else: break # crossed a TEX0 bind -> stop (single-TEX0 segment)
return h, uploads, tris, seg, seg_tex0
def match_upload(uploads, tex0):
tx=dec_tex0(tex0)
for u in uploads:
if u["dbp"]==tx["TBP0"] and u["dpsm"]==tx["PSM"]:
return u
# fall back: TBP match only
for u in uploads:
if u["dbp"]==tx["TBP0"]:
return u
return None
def main(argv):
if len(argv)<2: print(__doc__); return 2
path=argv[1]
def opt(n,d=None): return argv[argv.index(n)+1] if n in argv else d
h, uploads, tris, seg, seg_tex0 = analyze(path)
R=[]
R.append(f"# Ch341 Brick 1 texture analysis (source {os.path.basename(path)} serial={h.serial!r}; aggregate facts only)")
R.append(f"texture uploads: {len(uploads)}")
seen=set()
for u in uploads:
k=(u["dbp"],u["dpsm"],u["w"],u["h"])
if k in seen: continue
seen.add(k)
R.append(f" TBP={u['dbp']} DBW={u['dbw']} PSM={PSM.get(u['dpsm'],hex(u['dpsm']))} "
f"{u['w']}x{u['h']} bytes={u['bytes']} (expect {u['w']*u['h']*BPP.get(u['dpsm'],0)//8})")
R.append(f"textured triangles: {sum(1 for p,_ in tris if p.tme)} / {len(tris)} total triangles")
if seg:
tx=dec_tex0(seg_tex0)
R.append("")
R.append(f"EARLIEST single-TEX0 textured-tri segment: {len(seg)} triangles")
R.append(f" TEX0: TBP0={tx['TBP0']} TBW={tx['TBW']} PSM={PSM.get(tx['PSM'],hex(tx['PSM']))} "
f"TW={tx['TW']}({1<<tx['TW']}px) TH={tx['TH']}({1<<tx['TH']}px) TFX={tx['TFX']}")
u=match_upload(uploads, seg_tex0)
if u:
R.append(f" -> matched upload: TBP={u['dbp']} {u['w']}x{u['h']} {PSM.get(u['dpsm'],hex(u['dpsm']))} "
f"bytes={u['bytes']} data@0x{u['data_off']:x}")
R.append(f" VERDICT: single scene-level TEX0 + per-vertex UV — NO RTL feeder change needed for v1.")
else:
R.append(f" !! no matching upload found for TBP0={tx['TBP0']} — texture may be CLUT/region or uploaded elsewhere.")
else:
R.append("NO single-TEX0 textured-triangle segment found (every textured run crosses TEX0 binds).")
report="\n".join(R)+"\n"
print(report)
if opt("--report"):
open(opt("--report"),"w").write(report); print(f"[wrote report -> {opt('--report')}]")
outdir=opt("--extract")
if outdir and seg:
u=match_upload(uploads, seg_tex0)
if u:
os.makedirs(outdir, exist_ok=True)
d = gs_parse.read_dump_bytes(path)
blob = d[u["data_off"]:u["data_off"]+u["bytes"]]
bp=os.path.join(outdir,"tex0_blob.bin"); open(bp,"wb").write(blob)
desc=dict(schema=1, tbp0=dec_tex0(seg_tex0)["TBP0"], tbw=dec_tex0(seg_tex0)["TBW"],
psm=u["dpsm"], psm_name=PSM.get(u["dpsm"]), w=u["w"], h=u["h"], bytes=u["bytes"],
tw=dec_tex0(seg_tex0)["TW"], th=dec_tex0(seg_tex0)["TH"], tfx=dec_tex0(seg_tex0)["TFX"],
provenance="cubes_demo (MIT, glampert/ps2-homebrew) — LOCAL only")
open(os.path.join(outdir,"tex0_desc.json"),"w").write(json.dumps(desc,indent=2))
print(f"[extracted {len(blob)}-byte texture blob + descriptor -> {outdir}/ (LOCAL, gitignored)]")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+306
View File
@@ -0,0 +1,306 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch346 generic texture-residency preflight.
Stops the hand-chasing of stale-VRAM frames (Ch345b: 32 sprites bound TBP=13440, but the captured frame's
VRAM there held cube-checker residue, not the font). Before ANY repack/render/fit, this proves — for each
textured draw run in a dump — that the bound TEX0 maps to a REAL upload whose payload actually covers the
sampled footprint and looks like resident content, NOT stale/placeholder VRAM.
Generic (per Codex): the gate is RESIDENT + PLAUSIBLE content, not "font-like" — future targets may be
sprites, UI, backgrounds, or indexed textures. `--font-like` adds the glyph-specific extra check on top.
Checks (Codex's minimum set):
1. active draw TEX0 maps to a known upload region (DBP/DPSM/stride), not just an address;
2. upload EPOCH tracked — if a later upload overwrites that TBP, the candidate uses the latest payload;
3. sampled footprint (UV bbox, REPEAT-wrapped into the TEX0 TW/TH) is COVERED by the uploaded rect;
4. payload sanity — reject all-zero / single-color flat / flat-alpha-on-alpha-draw / known stale hashes;
5. emit RANKED candidates with frame/event offsets, prim run, TEX0, upload source offset, PSM, dims,
alpha stats, and WHY it passed/failed.
Repack/render tools refuse to run unless `residency_ok()` returns a PASS for the bound texture.
A checker is NOT auto-rejected — it can be legit authentic content (the Ch343 cube). The signal that killed
Ch345b is RESIDENCY (no upload to TBP=13440 at all), not "it's a checker".
Usage: gs_texture_residency.py <dump.gs[.xz|.zst]> [--max-runs N] [--font-like] [--report r.txt] [--json j.json]
gs_texture_residency.py <dump> --assert TBP[:PSM] # exit 0 iff that TBP is resident+plausible
"""
import sys, os, json, hashlib
from collections import Counter
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import gs_parse
PSMCT32 = 0x00
MIN_DISTINCT_COLORS = 2 # <2 => flat fill (stale/cleared); fonts (B/W+transp=3) still pass
KNOWN_STALE_HASHES = set() # sha256[:16] denylist of textures known to be placeholder/test residue
INDEXED_PSMS = (0x13, 0x14) # PSMT8, PSMT4 — need a resident CLUT palette to render
def dec_tex0(v):
return dict(tbp=v&0x3FFF, tbw=(v>>14)&0x3F, psm=(v>>20)&0x3F,
tw=1<<((v>>26)&0xF), th=1<<((v>>30)&0xF), tcc=(v>>34)&1, tfx=(v>>35)&3,
cbp=(v>>37)&0x3FFF, cpsm=(v>>51)&0xF, csm=(v>>55)&1,
csa=(v>>56)&0x1F, cld=(v>>61)&7)
def dec_bitbltbuf(v):
return dict(sbp=v&0x3FFF, sbw=(v>>16)&0x3F, spsm=(v>>24)&0x3F,
dbp=(v>>32)&0x3FFF, dbw=(v>>48)&0x3F, dpsm=(v>>56)&0x3F)
def dec_trxreg(v): return ((v & 0xFFF), ((v>>32) & 0xFFF)) # RRW, RRH
def dec_trxpos(v): return (((v>>32)&0x7FF), ((v>>48)&0x7FF)) # DSAX, DSAY
def dec_trxdir(v): return v & 3 # 0 = host->local (upload)
def payload_stats(blob, psm):
"""Decode an upload payload and summarize. Only PSMCT32 fully supported; others -> stats=None."""
if psm != PSMCT32 or len(blob) < 4:
return None
n = len(blob)//4
texels = [int.from_bytes(blob[i*4:i*4+4], "little") for i in range(n)]
colors = Counter(texels)
a = [(t>>24)&0xFF for t in texels]
transp = sum(1 for x in a if x == 0); opaque = sum(1 for x in a if x >= 0x80)
partial = n - transp - opaque
h = hashlib.sha256(blob).hexdigest()[:16]
return dict(texels=n, distinct=len(colors), top=colors.most_common(1)[0][1],
a_transp=transp, a_opaque=opaque, a_partial=partial,
flat_alpha=(len(set(a))==1), sha16=h,
checkerish=_checkerish(texels))
def _checkerish(texels):
"""Heuristic structural flag (REPORTED, never an auto-reject — a checker can be legit content):
a 2-color image whose value flips on a regular coarse grid. Returns block size or 0."""
vals = set(texels)
if len(vals) > 4: return 0
n = len(texels); side = int(n**0.5)
if side*side != n: return 0
rows = [texels[r*side:(r+1)*side] for r in range(side)]
# find the run length of the first row; a checker has long uniform runs that alternate
first = rows[0]; run = 1
while run < side and first[run] == first[0]: run += 1
if run < 4 or run > side//2: return 0
return run
def collect(path, max_runs):
d = gs_parse.read_dump_bytes(path)
h, events = gs_parse.parse_dump(path)
uploads = [] # ordered upload log
epoch = {} # dbp -> count seen so far
bbuf = trxreg = trxpos = None; trxdir = 3
for e in events:
if e.kind == "GSREG":
if e.reg == "BITBLTBUF": bbuf = dec_bitbltbuf(e.value)
elif e.reg == "TRXREG": trxreg = dec_trxreg(e.value)
elif e.reg == "TRXPOS": trxpos = dec_trxpos(e.value)
elif e.reg == "TRXDIR": trxdir = dec_trxdir(e.value)
elif e.kind == "IMAGE" and bbuf is not None and trxdir == 0: # host->local upload
w, hh = trxreg if trxreg else (0,0); dx, dy = trxpos if trxpos else (0,0)
dbp = bbuf["dbp"]; epoch[dbp] = epoch.get(dbp, 0) + 1
nbytes = e.info.get("bytes", 0)
uploads.append(dict(idx=e.idx, frame=e.frame, byte_off=e.byte_off, bytes=nbytes,
dbp=dbp, dbw=bbuf["dbw"], dpsm=bbuf["dpsm"], w=w, h=hh, dx=dx, dy=dy,
epoch=epoch[dbp], blob_range=(e.byte_off, e.byte_off+nbytes)))
# draw runs: contiguous textured (TME) draws sharing one TEX0 base+psm; track sampled UV bbox
runs = _draw_runs(events, max_runs)
# VRAM snapshot: PCSX2 GS dumps freeze the full 4 MiB GS local memory at the END of the state blob
# (register prefix first, then VRAM). Commercial games upload textures/CLUTs at scene-load — BEFORE the
# dump — so they live here, not as in-stream upload events. This is the correct "resident" source.
VRAM = 0x400000
vstart = h.packet_start - 8192 - VRAM
vram = d[vstart:vstart+VRAM] if 0 <= vstart and vstart + VRAM <= len(d) else None
return d, h, events, uploads, runs, vram
def snapshot_present(vram, base, nb=512, min_nz=16):
"""Is there resident, non-flat content at this base pointer (256-byte units) in the VRAM snapshot?
The snapshot is SWIZZLED, so this is a presence/plausibility check, not accurate content (de-swizzle
is the translation step). Returns stats dict or None."""
if vram is None: return None
o = base * 256
if o < 0 or o + nb > len(vram): return None
chunk = vram[o:o+nb]
nz = sum(1 for b in chunk if b); dist = len(set(chunk))
return dict(nonzero=nz, distinct=dist) if (nz >= min_nz and dist >= 2) else None
def _draw_runs(events, max_runs):
cur = {"type":None,"tme":0,"fst":0,"abe":0}; tex0 = {1:None,2:None}; ctxt = 0
uvbuf = []; runs = []; run = None
def close():
nonlocal run
if run and run["nprim"] > 0: runs.append(run)
run = None
for e in events:
if e.kind != "GSREG": continue
r, v = e.reg, e.value
if r == "PRIM":
cur = {"type":v&7,"tme":(v>>4)&1,"fst":(v>>8)&1,"abe":(v>>6)&1}; ctxt=(v>>9)&1; uvbuf=[]
elif r == "TEX0_1": tex0[1]=dec_tex0(v)
elif r == "TEX0_2": tex0[2]=dec_tex0(v)
elif r == "UV": uvbuf.append((v&0x3FFF,(v>>14)&0x3FFF))
elif r == "XYZ2":
if not cur["tme"]: continue # only textured draws make residency runs
t0 = tex0[1 if ctxt==0 else 2]
if t0 is None: continue
key = (t0["tbp"], t0["psm"], t0["tbw"])
if run is None or run["key"] != key or run["last_idx_gap"] != e.idx-1:
close()
run = dict(key=key, tex0=t0, type=cur["type"], abe=cur["abe"], fst=cur["fst"],
first_idx=e.idx, frame=e.frame, nprim=0,
umin=1<<30,umax=-1,vmin=1<<30,vmax=-1, last_idx_gap=e.idx)
if len(runs) >= max_runs and max_runs>0: break
run["nprim"] += 1; run["last_idx"]=e.idx; run["last_idx_gap"]=e.idx
for (u,vv) in uvbuf[-2:]: # sprite=2 verts; tri uses last 3 — bbox is fine
tu=u>>4; tv=vv>>4
run["umin"]=min(run["umin"],tu); run["umax"]=max(run["umax"],tu)
run["vmin"]=min(run["vmin"],tv); run["vmax"]=max(run["vmax"],tv)
uvbuf=[]
close()
return runs
def evaluate(run, uploads, d, vram=None):
"""Return verdict dict: resident (in-stream upload OR VRAM snapshot) + coverage + plausibility + CLUT."""
t0 = run["tex0"]; tbp=t0["tbp"]; psm=t0["psm"]; tbw=t0["tbw"]
# candidate uploads: same base + psm, occurring BEFORE the run's first draw; pick latest by idx
cands = [u for u in uploads if u["dbp"]==tbp and u["dpsm"]==psm and u["idx"] < run["first_idx"]]
reasons = []
if not cands:
# no in-stream upload — fall back to the VRAM snapshot (scene-load uploads live there)
snap = snapshot_present(vram, tbp, nb=512)
if snap is None:
any_dbp = any(u["dbp"]==tbp for u in uploads)
reasons.append("texture NOT resident: no in-stream upload to TBP and VRAM snapshot empty/absent"
if not any_dbp else "upload(s) to TBP exist but PSM mismatch / all after draw, and snapshot empty")
return dict(verdict="REJECT", reasons=reasons, upload=None, coverage=None, stats=None, clut=None, tex_source=None)
reasons.append(f"texture resident in VRAM SNAPSHOT @tbp={tbp} (nz={snap['nonzero']}/512 distinct={snap['distinct']}; swizzled — content via translation)")
clut = _clut_residency(t0, uploads, run, vram, reasons)
clut_ok = (psm not in INDEXED_PSMS) or (clut is not None and clut["resident"])
return dict(verdict=("PASS" if clut_ok else "REJECT"), reasons=reasons or ["resident (snapshot)"],
upload=None, coverage=None, stats=None, clut=clut, tex_source="snapshot")
up = max(cands, key=lambda u: u["idx"])
# coverage: sampled footprint, REPEAT-wrapped into TW/TH, must fall inside the uploaded rect [dx..dx+w)x[dy..dy+h)
tw, th = t0["tw"], t0["th"]
foot_known = run["umax"] >= 0 and run["vmax"] >= 0 # UV captured (fst=1); fst=0 ST/Q not yet sampled
if not foot_known:
# honest: do NOT claim coverage we didn't verify. Verdict rests on residency + plausibility.
reasons.append("footprint UNVERIFIED (ST/Q draw — UV not captured); coverage not asserted")
inside = True; coverage = None
else:
wrap = (run["umax"]>=tw or run["vmax"]>=th or run["umin"]<0 or run["vmin"]<0)
fmin_u = run["umin"] % tw if tw else run["umin"]; fmin_v = run["vmin"] % th if th else run["vmin"]
fmax_u = (run["umax"] % tw if tw else run["umax"]); fmax_v=(run["vmax"] % th if th else run["vmax"])
inside = (up["dx"] <= fmin_u and fmax_u < up["dx"]+up["w"] and
up["dy"] <= fmin_v and fmax_v < up["dy"]+up["h"]) if (up["w"] and up["h"]) else False
coverage = 1.0 if inside else 0.0
if wrap: reasons.append(f"footprint WRAPS texture ({run['umin']}..{run['umax']} x {run['vmin']}..{run['vmax']} vs {tw}x{th}); REPEAT declared")
if not inside: reasons.append(f"sampled footprint NOT covered by upload rect ({up['dx']}..{up['dx']+up['w']} x {up['dy']}..{up['dy']+up['h']})")
blob = d[up["blob_range"][0]:up["blob_range"][1]]
stats = payload_stats(blob, psm)
plausible = True
if stats is None:
reasons.append(f"payload plausibility UNSUPPORTED for PSM 0x{psm:02x} (fail-closed)"); plausible=False
else:
if in_known_stale(stats["sha16"]): reasons.append(f"payload is a KNOWN stale/test texture ({stats['sha16']})"); plausible=False
if stats["distinct"] < MIN_DISTINCT_COLORS: reasons.append(f"payload flat ({stats['distinct']} color)"); plausible=False
if run["abe"] and stats["flat_alpha"]: reasons.append("alpha draw but payload alpha is FLAT (no mask)"); plausible=False
if stats["checkerish"]: reasons.append(f"payload is structurally checker-like (block~{stats['checkerish']}) — verify it's intended content")
clut = _clut_residency(t0, uploads, run, vram, reasons)
clut_ok = (psm not in INDEXED_PSMS) or (clut is not None and clut["resident"])
verdict = "PASS" if (inside and plausible and clut_ok) else "REJECT"
return dict(verdict=verdict, reasons=reasons or ["resident + plausible"], upload=up, coverage=coverage, stats=stats, clut=clut, tex_source="stream")
def _clut_residency(t0, uploads, run, vram, reasons):
"""Ch347 — indexed textures (PSMT8/PSMT4) need a resident CLUT at CBP (in-stream upload OR VRAM
snapshot). The datapath proof is NOT authentic ingestion: the emitted TEX0's CBP/CPSM/CLD must select
a CLUT that is actually loaded."""
if t0["psm"] not in INDEXED_PSMS: return None
cbp = t0["cbp"]
ccands = [u for u in uploads if u["dbp"] == cbp and u["idx"] < run["first_idx"]]
if ccands:
cup = max(ccands, key=lambda u: u["idx"])
reasons.append(f"CLUT resident (stream) @cbp={cbp} cpsm=0x{t0['cpsm']:02x} cld={t0['cld']} (upload idx{cup['idx']} f{cup['frame']})")
clut = dict(resident=True, source="stream", upload_idx=cup["idx"], frame=cup["frame"], cbp=cbp, cpsm=t0["cpsm"], cld=t0["cld"], distinct=None)
else:
snap = snapshot_present(vram, cbp, nb=1024, min_nz=64) # a real 256-entry palette is rich + non-flat
if snap:
reasons.append(f"CLUT resident (snapshot) @cbp={cbp} cpsm=0x{t0['cpsm']:02x} cld={t0['cld']} (nz={snap['nonzero']}/1024 distinct={snap['distinct']})")
clut = dict(resident=True, source="snapshot", upload_idx=None, frame=None, cbp=cbp, cpsm=t0["cpsm"], cld=t0["cld"], distinct=snap["distinct"])
else:
reasons.append(f"CLUT NOT resident: no upload to CBP={cbp} and snapshot empty — indexed texture cannot render authentically")
clut = dict(resident=False, source=None, upload_idx=None, frame=None, cbp=cbp, cpsm=t0["cpsm"], cld=t0["cld"], distinct=None)
if t0["cld"] == 0:
reasons.append("CLD=0: this TEX0 does not trigger a CLUT (re)load — palette would be whatever a prior load left")
return clut
def in_known_stale(sha16): return sha16 in KNOWN_STALE_HASHES # helper (denylist seeded empty by default)
def font_like(stats):
"""Glyph-specific EXTRA check (only when --font-like): mask-like alpha (real transparent + opaque
regions), modest palette. NOT part of the generic gate."""
if stats is None: return False, "no stats"
if stats["a_transp"] == 0: return False, "no transparent texels (not a glyph mask)"
if stats["a_opaque"] == 0: return False, "no opaque texels"
frac_t = stats["a_transp"]/stats["texels"]
if not (0.05 <= frac_t <= 0.95): return False, f"transparent fraction {frac_t:.2f} not mask-like"
return True, f"mask-like (transp {frac_t:.2f})"
def residency_ok(path, tbp, psm=PSMCT32):
"""Programmatic gate for repack/render tools. True iff SOME textured run binding (tbp,psm) is PASS."""
d, h, events, uploads, runs, vram = collect(path, 0)
for run in runs:
if run["tex0"]["tbp"]==tbp and run["tex0"]["psm"]==psm:
if evaluate(run, uploads, d, vram)["verdict"]=="PASS": return True
return False
def main(argv):
if len(argv) < 2: print(__doc__); return 2
path = argv[1]
def has(f): return f in argv
def opt(n,dv=None): return argv[argv.index(n)+1] if n in argv else dv
d, h, events, uploads, runs, vram = collect(path, int(opt("--max-runs","0")))
if has("--assert"):
spec = opt("--assert"); tbp = int(spec.split(":")[0],0); psm=int(spec.split(":")[1],0) if ":" in spec else PSMCT32
ok = residency_ok(path, tbp, psm)
print(f"residency {'PASS' if ok else 'REJECT'} for TBP={tbp} PSM=0x{psm:02x}")
return 0 if ok else 1
R = [f"# Ch346 texture-residency preflight: {os.path.basename(path)}",
f"uploads(host->local): {len(uploads)} textured draw runs: {len(runs)}"]
results = []
for run in runs:
ev = evaluate(run, uploads, d, vram); t0=run["tex0"]
fl = font_like(ev["stats"]) if has("--font-like") else None
verdict = ev["verdict"]
if verdict=="PASS" and fl is not None and not fl[0]: verdict="REJECT"
results.append(dict(run=run, ev=ev, font=fl, verdict=verdict))
passes = [r for r in results if r["verdict"]=="PASS"]
passes.sort(key=lambda r: (r["ev"]["stats"]["distinct"] if r["ev"]["stats"] else 0, r["run"]["nprim"]), reverse=True)
R.append(f"\n== {len(passes)} PASS / {len(results)-len(passes)} REJECT ==")
for i,r in enumerate(results):
run=r["run"]; ev=r["ev"]; t0=run["tex0"]; up=ev["upload"]
R.append(f"\n[{r['verdict']}] run f{run['frame']} idx{run['first_idx']}+{run['nprim']}prim "
f"TEX0 tbp={t0['tbp']} tbw={t0['tbw']} psm=0x{t0['psm']:02x} {t0['tw']}x{t0['th']} abe={run['abe']} fst={run['fst']}")
if run["umax"] >= 0: R.append(f" footprint u[{run['umin']}..{run['umax']}] v[{run['vmin']}..{run['vmax']}]")
else: R.append(f" footprint UNVERIFIED (fst={run['fst']} ST/Q draw)")
if up: R.append(f" upload @byte0x{up['byte_off']:x} f{up['frame']} epoch{up['epoch']} dbp={up['dbp']} dpsm=0x{up['dpsm']:02x} {up['w']}x{up['h']} @({up['dx']},{up['dy']}) {up['bytes']}B")
if ev["stats"]:
s=ev["stats"]; R.append(f" payload distinct={s['distinct']} alpha[t/o/p]={s['a_transp']}/{s['a_opaque']}/{s['a_partial']} flatA={s['flat_alpha']} checker~{s['checkerish']} sha={s['sha16']}")
if ev.get("clut") is not None:
c=ev["clut"]; R.append(f" CLUT {'RESIDENT' if c['resident'] else 'MISSING'} cbp={c['cbp']} cpsm=0x{c['cpsm']:02x} cld={c['cld']}")
if r["font"] is not None: R.append(f" font-like: {r['font'][0]} ({r['font'][1]})")
for why in ev["reasons"]: R.append(f" -> {why}")
if passes:
R.append(f"\n== RANKED PASS candidates ==")
for r in passes:
run=r["run"]; t0=run["tex0"]
s=r['ev']['stats']; c=r['ev'].get('clut')
extra = f"distinct={s['distinct']}" if s else f"tex_src={r['ev'].get('tex_source')}"
if c: extra += f" CLUT@cbp={c['cbp']}/{c['source']}"
R.append(f" tbp={t0['tbp']} psm=0x{t0['psm']:02x} {run['nprim']}prim {extra} f{run['frame']} idx{run['first_idx']}")
report = "\n".join(R)
print(report)
if has("--report"): open(opt("--report"),"w").write(report+"\n")
if has("--json"):
j = [dict(verdict=r["verdict"], tbp=r["run"]["tex0"]["tbp"], psm=r["run"]["tex0"]["psm"],
frame=r["run"]["frame"], first_idx=r["run"]["first_idx"], nprim=r["run"]["nprim"],
stats=r["ev"]["stats"], clut=r["ev"].get("clut"), reasons=r["ev"]["reasons"]) for r in results]
open(opt("--json"),"w").write(json.dumps(j, indent=1)+"\n")
return 0 if passes else 1
if __name__ == "__main__":
sys.exit(main(sys.argv))
+185
View File
@@ -0,0 +1,185 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch340 support census + translator (Bricks 3-4).
Consumes gs_parse's normalized event stream, reconstructs GS primitives via the vertex-kick model,
classifies EVERY primitive (translated / unsupported, with reason + frame/event/byte offset), emits an
aggregate census + histograms, then translates the EARLIEST mechanically-selected contiguous SUPPORTED
draw segment into a ps2_feeder scene file (Ch339 grammar) — reusing the proven encoder, never
duplicating staging logic. NO hidden approximation: textured prims, sprites, strips/fans, non-source-
over blend, unsupported Z-test, etc. are reported unsupported and the segment STOPS there (fail closed).
If no segment qualifies, Ch340 still succeeds via parser + census and the top census blocker frames Ch341.
A declared, reported VIEWPORT FIT (game bbox -> our 64x64 FB) is a faithful linear transform, not a GS-
feature approximation; it is recorded in the qualification report.
Usage:
gs_translate.py <dump.gs[.xz|.zst]> [--report out.census.txt] [--scene out.scene.txt] [--fb N]
"""
import sys, os
sys.path.insert(0, os.path.dirname(__file__))
import gs_parse
PRIMT = {0:"POINT",1:"LINE",2:"LINE_STRIP",3:"TRIANGLE",4:"TRI_STRIP",5:"TRI_FAN",6:"SPRITE",7:"INVALID"}
def bit(v,b): return (v>>b)&1
class Prim:
__slots__=("type","verts","tme","abe","iip","frame","event_idx","byte_off","test","alpha")
def __init__(s,**k):
for a in s.__slots__: setattr(s,a,k.get(a))
def reconstruct_prims(events):
"""Vertex-kick model -> list of Prim. Each vertex = (x,y,z,(r,g,b)) in raw GS coords."""
prims=[]
st=dict(prim=None, rgbaq=0, xyoff=0, tme=0, abe=0, iip=0, test=0, alpha=0, frame=0, ptype=7)
vtx=[]
for e in events:
if e.kind!="GSREG":
if e.kind=="FRAME_BOUNDARY": vtx=[] # vertex FIFO doesn't survive a frame in our model
continue
r=e.reg; v=e.value
if r=="PRIM":
st["prim"]=v; st["ptype"]=v&7; st["iip"]=bit(v,3); st["tme"]=bit(v,4); st["abe"]=bit(v,6)
vtx=[]
elif r=="PRMODE":
st["iip"]=bit(v,3); st["tme"]=bit(v,4); st["abe"]=bit(v,6)
elif r=="RGBAQ": st["rgbaq"]=v
elif r=="XYOFFSET_1": st["xyoff"]=v
elif r in ("TEST_1",): st["test"]=v
elif r in ("ALPHA_1",): st["alpha"]=v
elif r in ("FRAME_1",): st["frame"]=v
elif r in ("XYZ2","XYZ3","XYZF2","XYZF3"):
x=(v&0xFFFF); y=((v>>16)&0xFFFF); z=(v>>32)&0xFFFFFFFF
col=(st["rgbaq"]&0xFF,(st["rgbaq"]>>8)&0xFF,(st["rgbaq"]>>16)&0xFF)
kick = r in ("XYZ2","XYZF2") # XYZ3/XYZF3 add a vertex without a drawing kick
vtx.append((x,y,z,col))
t=st["ptype"]
need = {0:1,1:2,2:2,3:3,4:3,5:3,6:2}.get(t,99)
if kick and len(vtx)>=need:
pv = vtx[-need:]
prims.append(Prim(type=t,verts=pv,tme=st["tme"],abe=st["abe"],iip=st["iip"],
frame=e.frame,event_idx=e.idx,byte_off=e.byte_off,test=st["test"],alpha=st["alpha"]))
if t==3 or t==6: vtx=[] # independent tri / sprite: consume the FIFO
# strips/fans keep a sliding window (left as-is; classified unsupported below)
return prims, st
# ---- proven envelope: a primitive we can render faithfully via ps2_feeder ----
def classify(p):
if p.type!=3: return "unsupported", f"prim={PRIMT.get(p.type,p.type)} (only TRIANGLE renders via ps2_feeder)"
if p.tme: return "unsupported", "textured triangle (TME=1; no real-texture path in the feeder)"
if p.abe: return "unsupported", "alpha-blended triangle (ABE=1; only opaque is in the proven envelope)"
return "translated", "non-textured opaque triangle"
def census(prims, parse_summary):
cats={}; reasons={}
for p in prims:
c,why=classify(p)
cats[c]=cats.get(c,0)+1
if c=="unsupported": reasons[why]=reasons.get(why,0)+1
return cats, reasons
def earliest_supported_segment(prims):
"""Earliest maximal contiguous run of 'translated' prims (stops at the first unsupported)."""
seg=[]; best=None
for p in prims:
c,_=classify(p)
if c=="translated":
seg.append(p)
else:
if seg: best=seg; break # earliest contiguous run -> stop at first unsupported AFTER it
seg=[]
if not best and seg: best=seg
return best or []
def viewport_fit(prims, fb):
"""Declared linear map of the segment's PIXEL bbox into [1, fb-2] (margin), reported. f works on
RAW (1/16) GS coords; bbox + scale are in screen PIXELS so the report reads intuitively."""
xs=[x/16.0 for p in prims for (x,y,z,c) in p.verts]; ys=[y/16.0 for p in prims for (x,y,z,c) in p.verts]
x0,x1,y0,y1=min(xs),max(xs),min(ys),max(ys)
s=min((fb-2)/max(1.0,(x1-x0)), (fb-2)/max(1.0,(y1-y0))) # output px per source px
def f(x,y): return (1+int((x/16.0-x0)*s), 1+int((y/16.0-y0)*s))
return f, dict(bbox_px=(round(x0,1),round(y0,1),round(x1,1),round(y1,1)), scale_px=round(s,4), fb=fb)
def emit_scene(seg, fb):
f,info=viewport_fit(seg, fb)
lines=[f"# Ch340 translated segment: {len(seg)} non-textured triangles",
f"# viewport fit: src bbox(px) {info['bbox_px']} scale {info['scale_px']} px/px -> {fb}x{fb}"]
for p in seg:
pts=[]
for (x,y,z,c) in p.verts:
X,Y=f(x,y); pts += [X,Y]
(r0,g0,b0)=p.verts[0][3]
if p.iip: # gouraud: trig x0 y0 r0 g0 b0 x1 y1 ... z
a=[]
for (x,y,z,c) in p.verts:
X,Y=f(x,y); a += [X,Y,c[0],c[1],c[2]]
z=p.verts[-1][2]
lines.append("trig "+" ".join(map(str,a+[z])))
else: # flat: tri x0 y0 x1 y1 x2 y2 z r g b (GS flat uses provoking/last vertex color)
(r,g,b)=p.verts[-1][3]; z=p.verts[-1][2]
lines.append(f"tri {pts[0]} {pts[1]} {pts[2]} {pts[3]} {pts[4]} {pts[5]} {z} {r} {g} {b}")
lines.append("go")
return "\n".join(lines)+"\n", info
def main(argv):
if len(argv)<2: print(__doc__); return 2
path=argv[1]
def opt(name,d=None):
return argv[argv.index(name)+1] if name in argv else d
fb=int(opt("--fb","64"))
h, events = gs_parse.parse_dump(path)
prims, _ = reconstruct_prims(events)
cats, reasons = census(prims, h)
# histograms (from the raw event stream)
regs={}; flgs={}; frames=0; images=0; imgb=0; malformed=0
ptypes={}
for e in events:
if e.kind=="GSREG": regs[e.reg]=regs.get(e.reg,0)+1
if e.kind=="FRAME_BOUNDARY": frames+=1
if e.kind=="MALFORMED": malformed+=1
if e.kind=="IMAGE": images+=1; imgb+=e.info.get("bytes",0)
for p in prims: ptypes[PRIMT.get(p.type,p.type)]=ptypes.get(PRIMT.get(p.type,p.type),0)+1
R=[]
R.append(f"# Ch340 GS-dump support census (schema v{gs_parse.SCHEMA_VERSION})")
R.append(f"# source: {os.path.basename(path)} serial={h.serial!r} crc=0x{h.crc:08x} (aggregate counts only; no game content)")
R.append(f"frames={frames} events={len(events)} primitives={len(prims)} malformed={malformed} image_uploads={images} ({imgb} bytes)")
R.append("")
R.append("primitive types: "+", ".join(f"{k}={v}" for k,v in sorted(ptypes.items(),key=lambda x:-x[1])))
R.append("census classes : "+", ".join(f"{k}={v}" for k,v in sorted(cats.items(),key=lambda x:-x[1])))
R.append("")
R.append("UNSUPPORTED reasons (count):")
for why,c in sorted(reasons.items(),key=lambda x:-x[1]): R.append(f" {c:6d} {why}")
R.append("")
R.append("top GS register writes:")
for k,v in sorted(regs.items(),key=lambda x:-x[1])[:18]: R.append(f" {v:6d} {k}")
seg=earliest_supported_segment(prims)
R.append("")
if seg:
first=seg[0]
R.append(f"EARLIEST SUPPORTED SEGMENT: {len(seg)} triangles, starting frame {first.frame} "
f"event #{first.event_idx} @0x{first.byte_off:x}.")
R.append(" qualification: every primitive is a non-textured opaque TRIANGLE; segment stops at the first unsupported prim/state.")
else:
topblk = max(reasons.items(), key=lambda x:x[1])[0] if reasons else "none"
R.append("NO SUPPORTED SEGMENT: no contiguous run of non-textured opaque triangles.")
R.append(f" Ch340 succeeds via parser + census. Top census blocker (Ch341 candidate): {topblk}")
report="\n".join(R)+"\n"
print(report)
rp=opt("--report")
if rp:
with open(rp,"w") as f: f.write(report)
print(f"[wrote census -> {rp}]")
sp=opt("--scene")
if sp and seg:
scene,info=emit_scene(seg,fb)
with open(sp,"w") as f: f.write(scene)
print(f"[wrote {len(seg)}-tri ps2_feeder scene -> {sp} (viewport {info})]")
elif sp:
print("[no scene emitted: no supported segment — fail closed]")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+241
View File
@@ -0,0 +1,241 @@
#!/usr/bin/env python3
"""retroDE_ps2 — Ch341 textured-triangle translator (declared affine ST/Q surrogate).
Extracts the earliest contiguous single-TEX0 textured-triangle subsegment that FITS staging (<=27 tris)
from a GS dump, derives faithful per-vertex texel coords u=S/Q, v=T/Q (FST=0 perspective ST), applies
the declared 256->64 downscale, and emits a ps2_feeder textured scene (tex0 + tritex) — BUT ONLY if the
honesty gate passes: the affine interpolation of vertex (S/Q,T/Q) must stay within MAX_ERR texels of the
true perspective interpolation over the triangle. Otherwise it FAILS CLOSED (Ch342 = real ST/Q).
This is NOT faithful perspective-correct GS texturing. It is authentic cube geometry + authentic
extracted texels through the affine-UV feeder, with a DECLARED affine substitute for perspective ST/Q
on a tiny-span segment. Sprites stay unsupported. Reports are aggregate; scene text is dump-derived.
Usage: gs_translate_tex.py <dump> [--tbp 64] [--dst 64] [--maxtri 27] [--scene out.txt] [--report r.txt]
"""
import sys, os, struct
sys.path.insert(0, os.path.dirname(__file__))
import gs_parse
MAX_ERR = 0.5 # texels, on the 64x64 fixture
def f32(bits): return struct.unpack("<f", struct.pack("<I", bits & 0xFFFFFFFF))[0]
def bit(v,b): return (v>>b)&1
def extract(events, dst, maxtri):
"""Walk events; return (segment tris, meta). Each tri: dict(scr=[(x,y)*3], z, stq=[(S,T,Q)*3], rgb)."""
st_S=st_T=0.0; q=1.0; rgb=(255,255,255); tex0=0; fst=0; ptype=7; xyoff=0; clamp=0
prim_val=0; test_val=0
uploads_to={} # TBP -> count of IMAGE uploads seen so far (to detect re-upload crossing)
bitbltbuf=0
vbuf=[] # pending verts
tris=[] # each: dict
seg=[]; seg_tex0=None; seg_clamp=None; seg_prim=None; seg_test=None; started=False; stop_reason=None
first=None; last=None
for e in events:
if e.kind=="IMAGE":
dbp=(bitbltbuf>>32)&0x3FFF
uploads_to[dbp]=uploads_to.get(dbp,0)+1
if started and seg_tex0 is not None and ((seg_tex0&0x3FFF)==dbp):
stop_reason=f"re-upload to bound TBP {dbp} at event #{e.idx}"; break
continue
if e.kind!="GSREG": continue
r=e.reg; v=e.value
if r=="BITBLTBUF": bitbltbuf=v
elif r=="PRIM": ptype=v&7; fst=bit(v,8); prim_val=v; vbuf=[]
elif r=="TEST_1": test_val=v
elif r=="PRMODE": fst=bit(v,8)
elif r=="ST":
st_S=f32(v&0xFFFFFFFF); st_T=f32((v>>32)&0xFFFFFFFF)
if "q_stq" in e.info: q=f32(e.info["q_stq"]) # PACKED ST routes Q -> RGBAQ.Q
elif r=="RGBAQ": rgb=(v&0xFF,(v>>8)&0xFF,(v>>16)&0xFF); q=f32((v>>32)&0xFFFFFFFF)
elif r=="TEX0_1": tex0=v
elif r in ("CLAMP_1",): clamp=v
elif r in ("XYOFFSET_1",): xyoff=v
elif r in ("XYZ2","XYZ3","XYZF2","XYZF3"):
x=(v&0xFFFF)/16.0 - (xyoff&0xFFFF)/16.0
y=((v>>16)&0xFFFF)/16.0 - ((xyoff>>16)&0xFFFF)/16.0
kick = e.reg in ("XYZ2","XYZF2")
vbuf.append(dict(x=x,y=y,z=(v>>32)&0xFFFFFFFF,S=st_S,T=st_T,Q=q,rgb=rgb,tex0=tex0,fst=fst,clamp=clamp))
if kick and ptype==3 and len(vbuf)>=3:
t=vbuf[-3:]; vbuf=[]
if not all(vv["fst"]==0 for vv in t): # only FST=0 textured tris in this rung
if started: stop_reason="prim left FST=0 ST mode"; break
continue
tt = t[0]["tex0"]
if not started:
seg_tex0=tt; seg_clamp=t[0]["clamp"]; seg_prim=prim_val; seg_test=test_val; started=True; first=e.idx
if tt!=seg_tex0:
stop_reason=f"TEX0 changed at event #{e.idx}"; break
if t[0]["clamp"]!=seg_clamp:
stop_reason=f"CLAMP changed at event #{e.idx}"; break
seg.append(t); last=e.idx
if len(seg)>=maxtri:
stop_reason=f"staging cap ({maxtri} tris)"; break
return seg, dict(tex0=seg_tex0, clamp=seg_clamp, prim=seg_prim, test=seg_test, first=first, last=last, stop=stop_reason)
def reconstruct_all_textured(events):
"""Every FST=0 textured triangle (3 verts) with its active tex0/clamp + texture-upload epoch
(uploads-to-its-TBP seen so far). For the closeout all-window scan — NOT the mechanical earliest
selection (extract() above is that). Documentation only, per Codex."""
st_S=st_T=0.0; q=1.0; rgb=(255,255,255); tex0=0; fst=0; ptype=7; xyoff=0; clamp=0; bitbltbuf=0
epoch={}; vbuf=[]; out=[]
for e in events:
if e.kind=="IMAGE":
dbp=(bitbltbuf>>32)&0x3FFF; epoch[dbp]=epoch.get(dbp,0)+1; continue
if e.kind!="GSREG": continue
r=e.reg; v=e.value
if r=="BITBLTBUF": bitbltbuf=v
elif r=="PRIM": ptype=v&7; fst=(v>>8)&1; vbuf=[]
elif r=="PRMODE": fst=(v>>8)&1
elif r=="ST":
st_S=f32(v&0xFFFFFFFF); st_T=f32((v>>32)&0xFFFFFFFF)
if "q_stq" in e.info: q=f32(e.info["q_stq"]) # PACKED ST routes Q -> RGBAQ.Q
elif r=="RGBAQ": rgb=(v&0xFF,(v>>8)&0xFF,(v>>16)&0xFF); q=f32((v>>32)&0xFFFFFFFF)
elif r=="TEX0_1": tex0=v
elif r=="CLAMP_1": clamp=v
elif r=="XYOFFSET_1": xyoff=v
elif r in ("XYZ2","XYZ3","XYZF2","XYZF3"):
kick=e.reg in ("XYZ2","XYZF2")
vbuf.append(dict(S=st_S,T=st_T,Q=q))
if kick and ptype==3 and len(vbuf)>=3:
t=vbuf[-3:]; vbuf=[]
if fst==0:
out.append(dict(v=t, tex0=tex0, clamp=clamp, epoch=epoch.get(tex0&0x3FFF,0)))
return out
def scan_windows(tris, dst, maxtri):
"""Slide a <=maxtri window from every start; a window breaks on tex0/clamp/epoch change.
Returns (n_windows, n_pass<=0.5, min_window_error)."""
n=len(tris); npass=0; best=1e9; total=0
for i in range(n):
win=[tris[i]]
for j in range(i+1, min(n, i+maxtri)):
a,b=tris[i],tris[j]
if a["tex0"]!=b["tex0"] or a["clamp"]!=b["clamp"] or a["epoch"]!=b["epoch"]: break
win.append(tris[j])
me=max(tri_error(t["v"], dst)[0] for t in win)
total+=1; best=min(best,me)
if me<=MAX_ERR: npass+=1
return total, npass, best
def bary_samples():
pts=[(1/3,1/3,1/3)]
for a in (0.25,0.5,0.75):
pts += [(a,(1-a)/2,(1-a)/2),((1-a)/2,a,(1-a)/2),((1-a)/2,(1-a)/2,a)]
return pts
def tri_error(t, dst):
"""max |affine - perspective| texel error over the triangle, for u and v (dst-sized texture)."""
SQ=[(v["S"]/v["Q"] if v["Q"] else 0.0, v["T"]/v["Q"] if v["Q"] else 0.0) for v in t]
uv=[(sq[0]*dst, sq[1]*dst) for sq in SQ]
me=0.0
for (b0,b1,b2) in bary_samples():
Sb=b0*t[0]["S"]+b1*t[1]["S"]+b2*t[2]["S"]; Tb=b0*t[0]["T"]+b1*t[1]["T"]+b2*t[2]["T"]
Qb=b0*t[0]["Q"]+b1*t[1]["Q"]+b2*t[2]["Q"]
if Qb==0: continue
up=(Sb/Qb)*dst; vp=(Tb/Qb)*dst
ua=b0*uv[0][0]+b1*uv[1][0]+b2*uv[2][0]; va=b0*uv[0][1]+b1*uv[1][1]+b2*uv[2][1]
me=max(me, abs(up-ua), abs(vp-va))
return me, uv
def main(argv):
if len(argv)<2: print(__doc__); return 2
path=argv[1]
def opt(n,d=None): return argv[argv.index(n)+1] if n in argv else d
tbp=int(opt("--tbp","64")); dst=int(opt("--dst","64")); maxtri=int(opt("--maxtri","27"))
h, events = gs_parse.parse_dump(path)
fst0 = sum(1 for e in events if e.kind=="GSREG" and e.reg=="PRIM" and ((e.value>>8)&1)==0)
seg, meta = extract(events, dst, maxtri)
R=[f"# Ch341 textured-triangle translation (source {os.path.basename(path)}; aggregate facts + dump-derived scene)"]
R.append(f"FST=0 (perspective ST) PRIM submissions: {fst0}")
if not seg:
R.append(f"NO textured-tri segment selected (stop: {meta['stop']}).")
print("\n".join(R)); return 0
# Ch342 — FAITHFUL perspective ST/Q emit (no affine, no error gate; the Ch301 path is exact).
# Packs the Ch301 fixed-point contract: S_fp=round(S*4096), T_fp=round(T*4096), Q_fp=round(Q*4096)
# (24-bit FRAC=12). The 256->64 downscale is in the TEXTURE (TW=6); ST/Q stay normalized — the GS
# computes texel=(S/Q)*2^TW=(S/Q)*64. Fails closed on Q<=0 or fixed-point overflow.
if "--perspective" in argv:
xs=[v["x"] for t in seg for v in t]; ys=[v["y"] for t in seg for v in t]
x0,x1,y0,y1=min(xs),max(xs),min(ys),max(ys)
FB=64; sc=min((FB-2)/max(1.0,x1-x0),(FB-2)/max(1.0,y1-y0))
fx=lambda x:max(0,min(FB-1,1+int((x-x0)*sc))); fy=lambda y:max(0,min(FB-1,1+int((y-y0)*sc)))
L=[f"# Ch342 authentic cube subsegment ({len(seg)} tris) — FAITHFUL perspective ST/Q (Ch301 fixed-point, FRAC=12)",
f"# events #{meta['first']}..#{meta['last']}; screen ({x0:.0f},{y0:.0f})..({x1:.0f},{y1:.0f}) viewport-fit -> {FB}x{FB}; texture 256->{dst}",
f"tex0 {tbp} 1 6 6 0", "persp"]
for t in seg:
a=[]
for v in t:
if v["Q"]<=0:
R.append(f"GATE FAILED: Q<=0 ({v['Q']}) at event #{meta['first']} — fail closed."); print("\n".join(R)); return 0
sfp=round(v["S"]*4096); tfp=round(v["T"]*4096); qfp=round(v["Q"]*4096)
if not (0<=sfp<=0xFFFFFF and 0<=tfp<=0xFFFFFF and 0<qfp<=0xFFFFFF):
R.append(f"GATE FAILED: ST/Q fixed-point out of 24-bit range -> fail closed."); print("\n".join(R)); return 0
a += [fx(v["x"]), fy(v["y"]), sfp, tfp, qfp]
(r,g,b)=t[-1]["rgb"]; z=t[-1]["z"]
L.append("persptri "+" ".join(map(str,a+[z,r,g,b])))
L.append("go")
Qs=[v["Q"] for t in seg for v in t]
pv=meta["prim"] or 0; tv=meta["test"] or 0
R.append(f"segment PRIM: type={pv&7}(TRI=3) IIP={(pv>>3)&1} TME={(pv>>4)&1} FGE={(pv>>5)&1} ABE={(pv>>6)&1} FST={(pv>>8)&1}"
f" TEST_1: ZTE={(tv>>16)&1} ZTST={(tv>>17)&3}(GEQ=2)")
if ((pv>>6)&1):
R.append("WARN: segment ABE=1 -> routes to the combined-TAZ path (perspective there is a known follow-on bug), NOT the proven S1 path. Do NOT flip ABE.")
R.append("S1 perspective path honors TME+FST=0 + ZTE/ZTST GEQUAL; cube segment is ABE=0 (S1 path).")
R.append(f"FAITHFUL PERSPECTIVE: {len(seg)} tris, TEX0->TBP={tbp} TW=6 TH=6 TFX=0; S_fp/T_fp/Q_fp=round(*4096); Q span {min(Qs):.4f}..{max(Qs):.4f}")
R.append(f"staging words: {7+9*len(seg)} (perspective format word0[32]=1, no rects)")
print("\n".join(R)+"\n")
sp=opt("--scene")
if sp: open(sp,"w").write("\n".join(L)+"\n"); print(f"[wrote {len(seg)}-tri FAITHFUL perspective scene -> {sp}]")
if opt("--report"): open(opt("--report"),"w").write("\n".join(R)+"\n")
return 0
# span + Q + error
Qs=[v["Q"] for t in seg for v in t]; Ss=[v["S"] for t in seg for v in t]; Ts=[v["T"] for t in seg for v in t]
maxerr=0.0
for t in seg:
me,_=tri_error(t,dst); maxerr=max(maxerr,me)
tx=tbp; tex0=meta["tex0"]
R.append(f"selected segment: {len(seg)} triangles, events #{meta['first']}..#{meta['last']}, stop after: {meta['stop']}")
R.append(f"active TEX0 (orig): TBP0={tex0&0x3FFF} TW={(tex0>>26)&0xF} TH={(tex0>>30)&0xF} TFX={(tex0>>35)&3}")
R.append(f"relocated TEX0 (fixture): TBP0={tbp} TBW=1 TW=6 TH=6 TFX=0 (downscale 256->{dst}, UV scale /{256//dst})")
R.append(f"Q span: {min(Qs):.4f}..{max(Qs):.4f} S span {min(Ss):.4f}..{max(Ss):.4f} T span {min(Ts):.4f}..{max(Ts):.4f}")
R.append(f"perspective-vs-affine max error: {maxerr:.4f} texels (threshold {MAX_ERR})")
ok = maxerr <= MAX_ERR
staging_words = 7 + 9*len(seg)
R.append(f"staging words: {staging_words} (<=256: {'ok' if staging_words<=256 else 'OVERFLOW'})")
if not ok:
R.append("GATE FAILED: affine ST/Q surrogate exceeds error threshold -> FAIL CLOSED. Ch342 = real ST/Q through the feeder.")
else:
R.append(f"DECLARED APPROXIMATION: perspective ST/Q rendered as affine UV; max_error={maxerr:.4f} texels (NOT faithful perspective-correct texturing).")
print("\n".join(R)+"\n")
if opt("--report"): # write the report in BOTH branches (the failure report is the useful one)
open(opt("--report"),"w").write("\n".join(R)+"\n"); print(f"[wrote report -> {opt('--report')}]")
if not ok:
return 0
sp=opt("--scene")
if sp:
# viewport-fit the segment's screen bbox into [1,dst-2] (declared, like Ch340)
xs=[v["x"] for t in seg for v in t]; ys=[v["y"] for t in seg for v in t]
x0,x1,y0,y1=min(xs),max(xs),min(ys),max(ys)
FB=64; s=min((FB-2)/max(1.0,x1-x0),(FB-2)/max(1.0,y1-y0))
def fx(x): return max(0,min(FB-1,1+int((x-x0)*s)))
def fy(y): return max(0,min(FB-1,1+int((y-y0)*s)))
L=[f"# Ch341 authentic cube subsegment ({len(seg)} tris) — DECLARED affine ST/Q surrogate, max_error={maxerr:.3f} texels",
f"# screen bbox ({x0:.0f},{y0:.0f})..({x1:.0f},{y1:.0f}) viewport-fit -> {FB}x{FB}; texture 256->{dst} downscale",
f"tex0 {tbp} 1 6 6 0"]
for t in seg:
a=[]
for v in t:
u=max(0,min(dst-1,int(round((v["S"]/v["Q"])*dst)))) if v["Q"] else 0
w=max(0,min(dst-1,int(round((v["T"]/v["Q"])*dst)))) if v["Q"] else 0
a += [fx(v["x"]), fy(v["y"]), u, w]
(r,g,b)=t[-1]["rgb"]; z=t[-1]["z"]
L.append("tritex "+" ".join(map(str,a+[z,r,g,b])))
L.append("go")
open(sp,"w").write("\n".join(L)+"\n"); print(f"[wrote {len(seg)}-tri textured scene -> {sp}]")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+524
View File
@@ -0,0 +1,524 @@
// retroDE_ps2 — ps2_feeder (Ch339): native HPS userspace command producer for the GS feeder.
//
// Encodes structured drawing commands (triangle / native rectangle / flat or per-vertex color / Z)
// into the PROVEN staging format and streams them to the FPGA over the existing HPS bridge via
// /dev/mem + mmap — the SAME register protocol the docs/hardware/ps2_feeder_*.sh anchors use. The
// RTL and bridge protocol are UNCHANGED; this is purely a host-side encoder + streamer.
//
// The staging word layout and every GS-register packing here are a byte-exact port of the golden
// encoder in sim/data/top_psmct32_raster_demo/bake.py. `--dump <scene>` emits the 256 staging words
// so the encoder can be diffed against the golden feeder_*.mem fixtures (byte-equivalence gate).
//
// Build: gcc -O2 -Wall -o ps2_feeder ps2_feeder.c
// Usage:
// ps2_feeder --list list built-in named scenes
// ps2_feeder --dump <scene> print the 256 staging words (no board access)
// ps2_feeder --dump-file <file> parse a scene file, print the first scene's staging words
// ps2_feeder [opts] <scene>... stream each named scene to the board (submit/go/wait each)
// ps2_feeder [opts] -f <file> stream each scene in a text scene file
// Options:
// --base 0xADDR bridge base (default 0x40000000) --dry-run encode+validate, no board access
//
// Scene file grammar (one scene per `go`/EOF; '#' comments; whitespace-separated):
// tri x0 y0 x1 y1 x2 y2 z r g b flat triangle
// trig x0 y0 r0 g0 b0 x1 y1 r1 g1 b1 x2 y2 r2 g2 b2 z gouraud (per-vertex) triangle
// tritile T z r g b flat triangle filling grid tile T (0..15)
// rect T z r g b native rectangle in grid tile T
// go submit the accumulated scene; start next
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
// ---- profile constants (must match the GS_FEEDER_DEMO build) ----
#define FEEDER_STG_WORDS 256
#define FIFO_DEPTH 8
#define CAP_FBW 1
#define CAP_TBP1 96
#define TMP_TBW 1
static const int TMP_UV[3][2] = {{0,0},{3,0},{0,3}};
// ---- bridge register byte offsets from base (mirror docs/hardware/ps2_feeder_*.sh) ----
#define OFF_STATUS 0x0D8 // R: bit0 = feeder ready ; W: reset staging write address
#define OFF_LO 0x0DC // W: low 32 bits of the next staging word ; R: current staging address
#define OFF_HI 0x0E4 // W: high 32 bits (commits {hi,lo}, auto-increments addr) ; R: records emitted
#define OFF_GO 0x0E8 // W: bit0 = trigger feeder ; R: fifo wait cycles
// ======================== GS register / vertex packers (port of bake.py) ===================
static uint64_t frame_1_psmct32(uint32_t fbw){ return (uint64_t)(fbw & 0x3F) << 16; }
static uint64_t alpha_pack(uint32_t a,uint32_t b,uint32_t c,uint32_t d,uint32_t fix){
return (uint64_t)(a&3) | ((uint64_t)(b&3)<<2) | ((uint64_t)(c&3)<<4)
| ((uint64_t)(d&3)<<6) | ((uint64_t)(fix&0xFF)<<32);
}
static uint64_t test1_geq(void){ return (1ull<<16) | (2ull<<17); }
static uint64_t zbuf1_pack(uint32_t zbp,uint32_t zmsk,uint32_t psm){
return (uint64_t)(zbp&0x1FF) | ((uint64_t)(psm&0xF)<<24) | (uint64_t)(zmsk&1);
}
static uint64_t tex0_pack(uint32_t tbp0,uint32_t tbw,uint32_t psm,uint32_t tw,uint32_t th,uint32_t tfx){
return (uint64_t)(tbp0&0x3FFF) | ((uint64_t)(tbw&0x3F)<<14) | ((uint64_t)(psm&0x3F)<<20)
| ((uint64_t)(tw&0xF)<<26) | ((uint64_t)(th&0xF)<<30) | ((uint64_t)(tfx&3)<<35);
}
static uint64_t prim_tri_tme_abe(void){ return 3 | (1<<4) | (1<<6); } // TRI+TME+ABE (legacy scenes)
static uint64_t prim_tri_tme(void){ return 3 | (1<<4); } // Ch342 TRI+TME, ABE=0 (cube)
static uint64_t prim_sprite_tme_abe(void){ return 6 | (1<<4) | (1<<6); } // Ch345a SPRITE+TME+ABE (textured-alpha)
static uint64_t rgbaq_data(uint8_t r,uint8_t g,uint8_t b){
return ((uint64_t)0xFF<<24) | ((uint64_t)b<<16) | ((uint64_t)g<<8) | r; // a = 0xFF
}
static uint64_t uv_data(int ui,int vi){
return (uint64_t)(((ui<<4)&0x3FFF)) | ((uint64_t)(((vi<<4)&0x3FFF))<<14);
}
static uint64_t xyz2_dataz(int x,int y,uint32_t z){
return ((uint64_t)(x&0xFFF)<<4) | ((uint64_t)(y&0xFFF)<<20) | ((uint64_t)z<<32);
}
// Ch342 — perspective ST/Q packers matching the Ch301 gs_stub contract (24-bit fixed-point, FRAC=12).
// ST register: S_fp in [23:0], T_fp in [55:32]. RGBAQ register: Q_fp in [55:32] (NOT IEEE float).
static uint64_t st_data(uint32_t s_fp,uint32_t t_fp){
return (uint64_t)(s_fp & 0xFFFFFF) | ((uint64_t)(t_fp & 0xFFFFFF) << 32);
}
static uint64_t rgbaq_persp(uint8_t r,uint8_t g,uint8_t b,uint32_t q_fp){
return ((uint64_t)0xFF<<24) | ((uint64_t)b<<16) | ((uint64_t)g<<8) | r | ((uint64_t)(q_fp & 0xFFFFFF) << 32);
}
// ======================== scene model ===================
#define MAX_TRI 64
#define MAX_RECT 64
#define MAX_SPRITE 64
typedef struct { int x[3], y[3]; uint8_t r[3], g[3], b[3]; int u[3], v[3];
uint32_t s_fp[3], t_fp[3], q_fp[3]; uint32_t z; } tri_t; // Ch342 fixed-point ST/Q
typedef struct { int tile; uint8_t r, g, b; uint32_t z; } rect_t;
// Ch345a — textured + source-over alpha SPRITE record (the Ch344-proven subset): screen rect (x0,y0)-(x1,y1),
// per-corner affine UV, flat MODULATE tint. As (source alpha) comes from the TEXEL (TCC=1), not the tint.
typedef struct { int x0,y0,x1,y1; int u0,v0,u1,v1; uint8_t r,g,b; } sprite_t;
typedef struct {
tri_t tri[MAX_TRI]; int ntri; rect_t rect[MAX_RECT]; int nrect; const char *name;
sprite_t sprite[MAX_SPRITE]; int nsprite; // Ch345a
// Ch341 — optional scene-level texture binding. textured=0 keeps the proven unity+MODULATE header
// and TMP_UV (byte-exact with all prior scenes). textured=1 (set by the `tex0` command) overrides
// the scene TEX0 and lets `tritex` carry per-vertex real UV.
int textured; uint32_t tex_tbp, tex_tbw, tex_tw, tex_th, tex_tfx;
// Ch342 — perspective format: word0[32]=1, per-vertex RGBAQ/ST/XYZ2 (fixed-point S/T/Q). Requires
// `textured`; rects are rejected (the host fails closed so a mixed scene is never silently dropped).
int perspective;
// Ch345a — SPRITE format: word0[33]=1, each prim = SPRITE record (2 verts x RGBAQ/UV/XYZ2). Requires
// `textured`; mutually exclusive with tris/rects/perspective (fail closed).
int sprite_mode;
} scene_t;
static void scene_reset(scene_t *s, const char *name){ s->ntri=0; s->nrect=0; s->nsprite=0; s->name=name; s->textured=0; s->perspective=0; s->sprite_mode=0; }
static int scene_empty(const scene_t *s){ return s->ntri==0 && s->nrect==0 && s->nsprite==0; }
// tri_in_tile(t): the half-tile triangle bake.py draws for grid tile t (0..15, row*4+col).
static void tri_in_tile(int t, int vx[3], int vy[3]){
int ox=(t%4)*16, oy=(t/4)*16;
vx[0]=ox+1; vy[0]=oy+1;
vx[1]=ox+14; vy[1]=oy+1;
vx[2]=ox+1; vy[2]=oy+14;
}
// returns 0 ok, -1 on capacity/range error (message printed)
static int add_tri_gouraud(scene_t *s, const int vx[3], const int vy[3],
const uint8_t rr[3], const uint8_t gg[3], const uint8_t bb[3], uint32_t z){
if (s->ntri >= MAX_TRI){ fprintf(stderr,"error: too many triangles (max %d)\n", MAX_TRI); return -1; }
for (int i=0;i<3;i++){
if (vx[i]<0||vx[i]>4095||vy[i]<0||vy[i]>4095){
fprintf(stderr,"error: vertex (%d,%d) out of 12-bit range [0..4095]\n", vx[i], vy[i]); return -1; }
}
tri_t *t=&s->tri[s->ntri++];
for (int i=0;i<3;i++){ t->x[i]=vx[i]; t->y[i]=vy[i]; t->r[i]=rr[i]; t->g[i]=gg[i]; t->b[i]=bb[i];
t->u[i]=TMP_UV[i][0]; t->v[i]=TMP_UV[i][1]; } // default UV (byte-exact w/ prior scenes)
t->z=z;
return 0;
}
// Ch341 — textured triangle: per-vertex real UV (texel coords) + a flat MODULATE color. Requires the
// scene `tex0` binding (caller sets s->textured). UV range [0..4095] (10.4 fixed via uv_data).
static int add_tritex(scene_t *s, const int vx[3], const int vy[3], const int uu[3], const int vv[3],
uint8_t r, uint8_t g, uint8_t b, uint32_t z){
uint8_t rr[3]={r,r,r}, gg[3]={g,g,g}, bb[3]={b,b,b};
if (add_tri_gouraud(s, vx, vy, rr, gg, bb, z)) return -1;
tri_t *t=&s->tri[s->ntri-1];
for (int i=0;i<3;i++){
if (uu[i]<0||uu[i]>4095||vv[i]<0||vv[i]>4095){
fprintf(stderr,"error: UV (%d,%d) out of range [0..4095]\n", uu[i], vv[i]); s->ntri--; return -1; }
t->u[i]=uu[i]; t->v[i]=vv[i];
}
return 0;
}
static int add_tri_flat(scene_t *s, const int vx[3], const int vy[3], uint8_t r, uint8_t g, uint8_t b, uint32_t z){
uint8_t rr[3]={r,r,r}, gg[3]={g,g,g}, bb[3]={b,b,b};
return add_tri_gouraud(s, vx, vy, rr, gg, bb, z);
}
static int add_tritile(scene_t *s, int tile, uint32_t z, uint8_t r, uint8_t g, uint8_t b){
if (tile<0||tile>15){ fprintf(stderr,"error: tile %d out of range [0..15]\n", tile); return -1; }
int vx[3], vy[3]; tri_in_tile(tile, vx, vy);
return add_tri_flat(s, vx, vy, r, g, b, z);
}
// Ch342 — perspective textured triangle: per-vertex screen (x,y) + fixed-point (S_fp,T_fp,Q_fp) + flat
// MODULATE color. Requires the scene `tex0` + `persp`. 24-bit fixed-point (FRAC=12), so S/T/Q <= 0xFFFFFF.
static int add_persptri(scene_t *s, const int vx[3], const int vy[3],
const uint32_t sf[3], const uint32_t tf[3], const uint32_t qf[3],
uint8_t r, uint8_t g, uint8_t b, uint32_t z){
uint8_t rr[3]={r,r,r}, gg[3]={g,g,g}, bb[3]={b,b,b};
if (add_tri_gouraud(s, vx, vy, rr, gg, bb, z)) return -1;
tri_t *t=&s->tri[s->ntri-1];
for (int i=0;i<3;i++){
if (sf[i]>0xFFFFFF||tf[i]>0xFFFFFF||qf[i]>0xFFFFFF){
fprintf(stderr,"error: ST/Q fixed-point out of 24-bit range\n"); s->ntri--; return -1; }
t->s_fp[i]=sf[i]; t->t_fp[i]=tf[i]; t->q_fp[i]=qf[i];
}
return 0;
}
static int add_rect(scene_t *s, int tile, uint32_t z, uint8_t r, uint8_t g, uint8_t b){
if (s->perspective){ fprintf(stderr,"error: rects not allowed in perspective mode\n"); return -1; }
if (tile<0||tile>15){ fprintf(stderr,"error: rect tile %d out of range [0..15]\n", tile); return -1; }
if (s->nrect >= MAX_RECT){ fprintf(stderr,"error: too many rects (max %d)\n", MAX_RECT); return -1; }
rect_t *q=&s->rect[s->nrect++]; q->tile=tile; q->z=z; q->r=r; q->g=g; q->b=b;
return 0;
}
// Ch345a — textured + source-over alpha SPRITE: screen rect + per-corner affine UV + flat MODULATE tint.
// Requires the scene `tex0` binding (textured); sets sprite_mode. Fails closed if mixed with tris/rects/persp.
static int add_sprite(scene_t *s, int x0, int y0, int x1, int y1,
int u0, int v0, int u1, int v1, uint8_t r, uint8_t g, uint8_t b){
if (!s->textured){ fprintf(stderr,"error: sprite requires a prior tex0 binding\n"); return -1; }
if (s->perspective || s->ntri || s->nrect){
fprintf(stderr,"error: sprite mode is exclusive with persp/tris/rects\n"); return -1; }
if (s->nsprite >= MAX_SPRITE){ fprintf(stderr,"error: too many sprites (max %d)\n", MAX_SPRITE); return -1; }
if (x0<0||x0>4095||y0<0||y0>4095||x1<0||x1>4095||y1<0||y1>4095){
fprintf(stderr,"error: sprite vertex out of 12-bit range [0..4095]\n"); return -1; }
if (u0<0||u0>4095||v0<0||v0>4095||u1<0||u1>4095||v1<0||v1>4095){
fprintf(stderr,"error: sprite UV out of range [0..4095]\n"); return -1; }
s->sprite_mode = 1;
sprite_t *q=&s->sprite[s->nsprite++];
q->x0=x0; q->y0=y0; q->x1=x1; q->y1=y1; q->u0=u0; q->v0=v0; q->u1=u1; q->v1=v1; q->r=r; q->g=g; q->b=b;
return 0;
}
// ======================== staging builder (byte-exact with bake.py builders) ===================
// Emits: word0 = {rect_count[31:16], tri_count[15:0]} (== tri_count when rect_count==0, matching the
// colored/gouraud builders), words1..6 = FRAME/ALPHA/TEST/ZBUF/TEX0(unity+MODULATE)/PRIM, then each
// triangle as 3x(RGBAQ,UV,XYZ2), then each rect as rect_record (RGBAQ, corner0 XYZ2, corner1 XYZ2).
// out[] is zero-padded to FEEDER_STG_WORDS. Returns meaningful word count, or -1 if it won't fit.
static int build_staging(const scene_t *s, uint64_t out[FEEDER_STG_WORDS]){
int need = s->sprite_mode ? (7 + s->nsprite*6) : (7 + s->ntri*9 + s->nrect*3);
if (need > FEEDER_STG_WORDS){
fprintf(stderr,"error: scene '%s' needs %d staging words > %d (FEEDER_STG_WORDS) — too large\n",
s->name?s->name:"?", need, FEEDER_STG_WORDS);
return -1;
}
if (s->perspective && (!s->textured || s->nrect)){
fprintf(stderr,"error: perspective mode requires tex0 and no rects\n"); return -1;
}
// Ch345a — SPRITE format: word0[33]=1, SPRITE+TME+ABE source-over, per-corner UV, As from texel (TCC).
if (s->sprite_mode){
if (!s->textured){ fprintf(stderr,"error: sprite mode requires tex0\n"); return -1; }
for (int i=0;i<FEEDER_STG_WORDS;i++) out[i]=0;
int n=0;
out[n++] = (uint64_t)(s->nsprite & 0xFFFF) | ((uint64_t)1 << 33);
out[n++] = frame_1_psmct32(CAP_FBW);
out[n++] = alpha_pack(0,1,0,1,0); // source-over
out[n++] = 0; // TEST_1 (ZTE=0, no depth)
out[n++] = 0; // ZBUF_1
out[n++] = tex0_pack(s->tex_tbp, s->tex_tbw, 0, s->tex_tw, s->tex_th, s->tex_tfx); // PSMCT32
out[n++] = prim_sprite_tme_abe(); // SPRITE+TME+ABE
for (int i=0;i<s->nsprite;i++){
const sprite_t *q=&s->sprite[i];
// RGBAQ A=0x80 (neutral; the vertex alpha is IGNORED — As comes from the texel, TCC=1). Matches
// the bake.py golden so test_ps2_feeder.sh can byte-check the sprite staging.
uint64_t tint = ((uint64_t)0x80<<24) | ((uint64_t)q->b<<16) | ((uint64_t)q->g<<8) | q->r;
out[n++] = tint; out[n++] = uv_data(q->u0,q->v0); out[n++] = xyz2_dataz(q->x0,q->y0,0);
out[n++] = tint; out[n++] = uv_data(q->u1,q->v1); out[n++] = xyz2_dataz(q->x1,q->y1,0);
}
return n;
}
for (int i=0;i<FEEDER_STG_WORDS;i++) out[i]=0;
int n=0;
// Ch342 — word0[32] = perspective format flag (feeder emits ST instead of UV; rects forced off).
out[n++] = ((uint64_t)(s->nrect & 0xFFFF) << 16) | (uint64_t)(s->ntri & 0xFFFF)
| (s->perspective ? ((uint64_t)1 << 32) : 0);
out[n++] = frame_1_psmct32(CAP_FBW);
out[n++] = alpha_pack(0,1,0,1,0);
out[n++] = test1_geq();
out[n++] = zbuf1_pack(2,0,0);
out[n++] = s->textured ? tex0_pack(s->tex_tbp, s->tex_tbw, 0, s->tex_tw, s->tex_th, s->tex_tfx)
: tex0_pack(CAP_TBP1, TMP_TBW, 0, 2, 2, 0); // unity texture + MODULATE (default)
out[n++] = s->perspective ? prim_tri_tme() : prim_tri_tme_abe(); // Ch342: ABE=0 -> S1 perspective path
for (int i=0;i<s->ntri;i++){
const tri_t *t=&s->tri[i];
for (int v=0; v<3; v++){
if (s->perspective){ // RGBAQ(+Q_fp) / ST(S_fp,T_fp) / XYZ2
out[n++] = rgbaq_persp(t->r[v], t->g[v], t->b[v], t->q_fp[v]);
out[n++] = st_data(t->s_fp[v], t->t_fp[v]);
} else { // legacy: RGBAQ / UV / XYZ2 (byte-exact)
out[n++] = rgbaq_data(t->r[v], t->g[v], t->b[v]);
out[n++] = uv_data(t->u[v], t->v[v]);
}
out[n++] = xyz2_dataz(t->x[v], t->y[v], t->z);
}
}
for (int i=0;i<s->nrect;i++){
const rect_t *q=&s->rect[i];
int ox=(q->tile%4)*16, oy=(q->tile/4)*16;
out[n++] = rgbaq_data(q->r, q->g, q->b);
out[n++] = xyz2_dataz(ox+1, oy+1, q->z);
out[n++] = xyz2_dataz(ox+14, oy+14, q->z);
}
return n;
}
// ======================== built-in named scenes (reproduce the Ch333-338 goldens) ===================
#define RED 0xFF,0x00,0x00
#define GREEN 0x00,0xFF,0x00
#define BLUE 0x00,0x00,0xFF
#define YELLOW 0xFF,0xFF,0x00
#define WHITE 0xFF,0xFF,0xFF
static int build_named(const char *name, scene_t *s){
scene_reset(s, name);
if (!strcmp(name,"color-tri")){ // Ch333 feeder_color_tri.mem
return add_tritile(s,0,0x5000,RED) || add_tritile(s,5,0x5100,GREEN) || add_tritile(s,10,0x5200,BLUE);
}
if (!strcmp(name,"sprite")){ // Ch345a feeder_sprite.mem — 3 textured-alpha sprites
// tex0 8x8 PSMCT32 @ TBP=64 (TW=TH=3 log2), MODULATE; white (0x80) tint = identity. As from texel.
s->textured=1; s->tex_tbp=64; s->tex_tbw=1; s->tex_tw=3; s->tex_th=3; s->tex_tfx=0;
return add_sprite(s, 8,24,24,40, 0,0,8,8, 128,128,128)
|| add_sprite(s, 26,24,42,40, 0,0,8,8, 128,128,128)
|| add_sprite(s, 44,24,60,40, 0,0,8,8, 128,128,128);
}
if (!strcmp(name,"native-rect")){ // Ch334 feeder_native_rect.mem (3 rects, 0 tris)
return add_rect(s,0,0x5000,RED) || add_rect(s,5,0x5100,GREEN) || add_rect(s,10,0x5200,BLUE);
}
if (!strcmp(name,"gouraud-tri")){ // Ch335 feeder_gouraud_tri.mem (1 gouraud tri, tile 0)
int vx[3],vy[3]; tri_in_tile(0,vx,vy);
uint8_t rr[3]={0xFF,0x00,0x00}, gg[3]={0x00,0xFF,0x00}, bb[3]={0x00,0x00,0xFF};
return add_tri_gouraud(s,vx,vy,rr,gg,bb,0x5000);
}
if (!strcmp(name,"accum")){ // Ch336 feeder_accum.mem
for (int t=0;t<14;t++) if (t<8 ? add_tritile(s,t,0x5000+t*0x100,RED)
: add_tritile(s,t,0x5000+t*0x100,BLUE)) return -1;
return 0;
}
if (!strcmp(name,"retrigger-a")){ // Ch337 feeder_scene_a.mem
for (int t=0;t<14;t++) if (add_tritile(s,t,0x5000+t*0x100,RED)) return -1;
return 0;
}
if (!strcmp(name,"retrigger-b")){ // Ch337 feeder_scene_b.mem (tiles 2..15)
for (int t=2;t<16;t++) if (add_tritile(s,t,0x5000+(t-2)*0x100,BLUE)) return -1;
return 0;
}
if (!strcmp(name,"zpersist-near") || !strcmp(name,"zpersist-far")){ // Ch338 flat overlap
int near_first = !strcmp(name,"zpersist-near");
static const int b0f[7]={0,1,2,3,4,6,7}, b1f[5]={8,9,10,11,12};
// batch0 = overlap prim + 7 fillers ; batch1 = overlap prim + 5 fillers.
if (near_first){
if (add_tritile(s,5,0x7000,RED)) return -1; // near RED (batch0)
for (int i=0;i<7;i++) if (add_tritile(s,b0f[i],0x6000,RED)) return -1;
if (add_tritile(s,5,0x5000,BLUE)) return -1; // far BLUE (batch1)
for (int i=0;i<5;i++) if (add_tritile(s,b1f[i],0x6000,BLUE)) return -1;
} else {
if (add_tritile(s,5,0x5000,BLUE)) return -1; // far BLUE (batch0)
for (int i=0;i<7;i++) if (add_tritile(s,b0f[i],0x6000,BLUE)) return -1;
if (add_tritile(s,5,0x7000,RED)) return -1; // near RED (batch1)
for (int i=0;i<5;i++) if (add_tritile(s,b1f[i],0x6000,RED)) return -1;
}
return 0;
}
if (!strcmp(name,"zpersist-grad")){ // Ch338 feeder_zpersist_grad.mem (gouraud near + flat far)
static const int b0f[7]={0,1,2,3,4,6,7}, b1f[5]={8,9,10,11,12};
int vx[3],vy[3]; tri_in_tile(5,vx,vy);
uint8_t rr[3]={0xFF,0x00,0x00}, gg[3]={0x00,0xFF,0x00}, bb[3]={0x00,0x00,0xFF};
if (add_tri_gouraud(s,vx,vy,rr,gg,bb,0x7000)) return -1; // near gradient (batch0)
for (int i=0;i<7;i++) if (add_tritile(s,b0f[i],0x6000,GREEN)) return -1;
if (add_tritile(s,5,0x5000,WHITE)) return -1; // far flat white (batch1)
for (int i=0;i<5;i++) if (add_tritile(s,b1f[i],0x6000,WHITE)) return -1;
return 0;
}
fprintf(stderr,"error: unknown scene '%s' (try --list)\n", name);
return -1;
}
static const char *named_scenes[] = {
"color-tri","native-rect","gouraud-tri","accum","retrigger-a","retrigger-b",
"zpersist-near","zpersist-far","zpersist-grad", NULL
};
// ======================== scene-file parser ===================
// Parses tokens into one or more scenes (split on `go`). Returns count, or -1 on error.
// scenes[] must hold at least max_scenes entries.
static int parse_int(const char *tok, long *out){
char *end; errno=0; long v=strtol(tok,&end,0);
if (*end || errno) return -1;
*out=v; return 0;
}
static int parse_scene_file(const char *path, scene_t *scenes, int max_scenes){
FILE *f=fopen(path,"r");
if (!f){ fprintf(stderr,"error: cannot open '%s': %s\n", path, strerror(errno)); return -1; }
int nsc=0; scene_reset(&scenes[0], path);
char line[512]; int lineno=0;
while (fgets(line,sizeof line,f)){
lineno++;
char *h=strchr(line,'#'); if (h) *h=0; // strip comment
char *tok=strtok(line," \t\r\n"); if (!tok) continue;
long a[24]; int na=0; char op[32]; // persptri needs 19 args
snprintf(op,sizeof op,"%s",tok);
for (char *t=strtok(NULL," \t\r\n"); t && na<24; t=strtok(NULL," \t\r\n")){
if (parse_int(t,&a[na])){ fprintf(stderr,"error: %s:%d bad number '%s'\n",path,lineno,t); fclose(f); return -1; }
na++;
}
scene_t *s=&scenes[nsc];
int rc=0;
if (!strcmp(op,"go")){
if (!scene_empty(s)){ if (++nsc>=max_scenes){ fprintf(stderr,"error: too many scenes\n"); fclose(f); return -1; } scene_reset(&scenes[nsc], path); }
} else if (!strcmp(op,"tri") && na==10){
int vx[3]={(int)a[0],(int)a[2],(int)a[4]}, vy[3]={(int)a[1],(int)a[3],(int)a[5]};
rc=add_tri_flat(s,vx,vy,(uint8_t)a[7],(uint8_t)a[8],(uint8_t)a[9],(uint32_t)a[6]);
} else if (!strcmp(op,"trig") && na==16){
int vx[3]={(int)a[0],(int)a[5],(int)a[10]}, vy[3]={(int)a[1],(int)a[6],(int)a[11]};
uint8_t rr[3]={(uint8_t)a[2],(uint8_t)a[7],(uint8_t)a[12]};
uint8_t gg[3]={(uint8_t)a[3],(uint8_t)a[8],(uint8_t)a[13]};
uint8_t bb[3]={(uint8_t)a[4],(uint8_t)a[9],(uint8_t)a[14]};
rc=add_tri_gouraud(s,vx,vy,rr,gg,bb,(uint32_t)a[15]);
} else if (!strcmp(op,"tritile") && na==5){
rc=add_tritile(s,(int)a[0],(uint32_t)a[1],(uint8_t)a[2],(uint8_t)a[3],(uint8_t)a[4]);
} else if (!strcmp(op,"rect") && na==5){
rc=add_rect(s,(int)a[0],(uint32_t)a[1],(uint8_t)a[2],(uint8_t)a[3],(uint8_t)a[4]);
} else if (!strcmp(op,"tex0") && na==5){ // tex0 TBP TBW TW TH TFX — scene-level texture bind
s->textured=1; s->tex_tbp=(uint32_t)a[0]; s->tex_tbw=(uint32_t)a[1];
s->tex_tw=(uint32_t)a[2]; s->tex_th=(uint32_t)a[3]; s->tex_tfx=(uint32_t)a[4];
} else if (!strcmp(op,"tritex") && na==16){ // tritex x0 y0 u0 v0 x1 y1 u1 v1 x2 y2 u2 v2 z r g b
if (!s->textured){ fprintf(stderr,"error: %s:%d tritex needs a prior tex0\n",path,lineno); fclose(f); return -1; }
int vx[3]={(int)a[0],(int)a[4],(int)a[8]}, vy[3]={(int)a[1],(int)a[5],(int)a[9]};
int uu[3]={(int)a[2],(int)a[6],(int)a[10]}, vv[3]={(int)a[3],(int)a[7],(int)a[11]};
rc=add_tritex(s,vx,vy,uu,vv,(uint8_t)a[13],(uint8_t)a[14],(uint8_t)a[15],(uint32_t)a[12]);
} else if (!strcmp(op,"persp") && na==0){ // mark scene perspective (needs prior tex0)
if (!s->textured){ fprintf(stderr,"error: %s:%d persp needs a prior tex0\n",path,lineno); fclose(f); return -1; }
s->perspective=1;
} else if (!strcmp(op,"persptri") && na==19){ // persptri x y s t q (x3) z r g b — fixed-point ST/Q
if (!s->perspective){ fprintf(stderr,"error: %s:%d persptri needs a prior persp\n",path,lineno); fclose(f); return -1; }
int vx[3]={(int)a[0],(int)a[5],(int)a[10]}, vy[3]={(int)a[1],(int)a[6],(int)a[11]};
uint32_t sf[3]={(uint32_t)a[2],(uint32_t)a[7],(uint32_t)a[12]};
uint32_t tf[3]={(uint32_t)a[3],(uint32_t)a[8],(uint32_t)a[13]};
uint32_t qf[3]={(uint32_t)a[4],(uint32_t)a[9],(uint32_t)a[14]};
rc=add_persptri(s,vx,vy,sf,tf,qf,(uint8_t)a[16],(uint8_t)a[17],(uint8_t)a[18],(uint32_t)a[15]);
} else if (!strcmp(op,"sprite") && na==11){ // Ch345a — sprite x0 y0 x1 y1 u0 v0 u1 v1 r g b
// textured + source-over alpha SPRITE (needs a prior tex0). r/g/b = MODULATE tint; As from texel.
rc=add_sprite(s,(int)a[0],(int)a[1],(int)a[2],(int)a[3],(int)a[4],(int)a[5],(int)a[6],(int)a[7],
(uint8_t)a[8],(uint8_t)a[9],(uint8_t)a[10]);
} else {
fprintf(stderr,"error: %s:%d malformed command '%s' (got %d args)\n",path,lineno,op,na); fclose(f); return -1;
}
if (rc){ fprintf(stderr," (at %s:%d)\n",path,lineno); fclose(f); return -1; }
}
fclose(f);
if (!scene_empty(&scenes[nsc])) nsc++; // trailing scene with no closing 'go'
return nsc;
}
// ======================== bridge I/O ===================
typedef struct { volatile uint8_t *base; int dry; } bridge_t;
static void wr32(bridge_t *br, int off, uint32_t v){ if (!br->dry) *(volatile uint32_t*)(br->base+off)=v; }
static uint32_t rd32(bridge_t *br, int off){ return br->dry ? 0 : *(volatile uint32_t*)(br->base+off); }
static int wait_ready(bridge_t *br){
if (br->dry) return 0;
for (int i=0;i<300000;i++){ if (rd32(br,OFF_STATUS)&1) return 0; usleep(10); }
fprintf(stderr,"error: feeder never reported ready\n"); return -1;
}
// Stream one scene: reset addr, write {lo,hi} per word, GO, wait completion. Prints diagnostics.
static int stream_scene(bridge_t *br, const scene_t *s){
uint64_t w[FEEDER_STG_WORDS];
int n = build_staging(s, w);
if (n < 0) return -1;
int exp_prims = s->ntri + s->nrect*2; // feeder expands each rect -> 2 triangles
int exp_batches = (exp_prims + FIFO_DEPTH - 1) / FIFO_DEPTH;
printf("[scene %-14s] tris=%d rects=%d -> staged_words=%d expand_prims=%d batches~=%d\n",
s->name?s->name:"?", s->ntri, s->nrect, n, exp_prims, exp_batches);
if (wait_ready(br)) return -1;
wr32(br, OFF_STATUS, 0); // reset staging write address
for (int i=0;i<n;i++){ wr32(br, OFF_LO, (uint32_t)(w[i]&0xFFFFFFFF)); wr32(br, OFF_HI, (uint32_t)(w[i]>>32)); }
uint32_t addr = rd32(br, OFF_LO);
if (!br->dry && (int)addr != n){ fprintf(stderr,"error: bridge addr=%u after streaming %d words\n", addr, n); return -1; }
if (wait_ready(br)) return -1; // staging accepted
wr32(br, OFF_GO, 1); // trigger
if (wait_ready(br)) return -1; // Ch337: ready only after the WHOLE scene drained
uint32_t records = rd32(br, OFF_HI), waits = rd32(br, OFF_GO);
printf(" staged_addr=%u records=%u waits=%u completion=ready%s\n",
addr, records, waits, br->dry?" (dry-run, no hw readback)":"");
if (!br->dry && (int)records != exp_prims)
fprintf(stderr," warn: hw records=%u != host-expected %d (rect expansion / format?)\n", records, exp_prims);
return 0;
}
// ======================== main ===================
static void dump_words(const uint64_t w[FEEDER_STG_WORDS]){
for (int i=0;i<FEEDER_STG_WORDS;i++) printf("%016llx\n", (unsigned long long)w[i]);
}
static void usage(void){
fprintf(stderr,
"usage: ps2_feeder [--base 0xADDR] [--dry-run] <scene>...\n"
" ps2_feeder [--base 0xADDR] [--dry-run] -f <scenefile>\n"
" ps2_feeder --dump <scene> | --dump-file <scenefile> | --list\n");
}
int main(int argc, char **argv){
unsigned long base = 0x40000000UL;
int dry = 0;
const char *dump = NULL, *dumpfile = NULL, *scenefile = NULL;
const char *names[64]; int nnames=0;
for (int i=1;i<argc;i++){
if (!strcmp(argv[i],"--list")){ for (const char**p=named_scenes;*p;p++) printf("%s\n",*p); return 0; }
else if (!strcmp(argv[i],"--base") && i+1<argc) base=strtoul(argv[++i],NULL,0);
else if (!strcmp(argv[i],"--dry-run")) dry=1;
else if (!strcmp(argv[i],"--dump") && i+1<argc) dump=argv[++i];
else if (!strcmp(argv[i],"--dump-file") && i+1<argc) dumpfile=argv[++i];
else if (!strcmp(argv[i],"-f") && i+1<argc) scenefile=argv[++i];
else if (argv[i][0]=='-'){ usage(); return 2; }
else if (nnames<64) names[nnames++]=argv[i];
}
// ---- dump modes (no board access) ----
if (dump){
scene_t s; if (build_named(dump,&s)) return 1;
uint64_t w[FEEDER_STG_WORDS]; if (build_staging(&s,w)<0) return 1;
dump_words(w); return 0;
}
if (dumpfile){
scene_t scenes[16]; int nsc=parse_scene_file(dumpfile,scenes,16); if (nsc<0) return 1;
if (nsc==0){ fprintf(stderr,"error: no scenes in '%s'\n",dumpfile); return 1; }
uint64_t w[FEEDER_STG_WORDS]; if (build_staging(&scenes[0],w)<0) return 1;
dump_words(w); return 0;
}
// ---- build the work list (named scenes or a scene file) ----
scene_t scenes[64]; int nsc=0;
if (scenefile){ nsc=parse_scene_file(scenefile,scenes,64); if (nsc<0) return 1; }
for (int i=0;i<nnames;i++){ if (nsc>=64){ fprintf(stderr,"too many scenes\n"); return 1; } if (build_named(names[i],&scenes[nsc])) return 1; nsc++; }
if (nsc==0){ usage(); return 2; }
// validate every scene encodes before touching hardware
for (int i=0;i<nsc;i++){ uint64_t w[FEEDER_STG_WORDS]; if (build_staging(&scenes[i],w)<0) return 1; }
// ---- open the bridge (unless dry-run) ----
bridge_t br = { .base=NULL, .dry=dry };
int fd=-1; void *map=NULL;
if (!dry){
fd=open("/dev/mem", O_RDWR|O_SYNC);
if (fd<0){ fprintf(stderr,"error: open /dev/mem: %s\n", strerror(errno)); return 1; }
map=mmap(NULL, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, (off_t)base);
if (map==MAP_FAILED){ fprintf(stderr,"error: mmap 0x%lx: %s\n", base, strerror(errno)); close(fd); return 1; }
br.base=(volatile uint8_t*)map;
printf("ps2_feeder: bridge @ 0x%lx, %d scene(s)\n", base, nsc);
} else {
printf("ps2_feeder: DRY-RUN (encode+validate only), %d scene(s)\n", nsc);
}
int rc=0;
for (int i=0;i<nsc;i++){ if (stream_scene(&br,&scenes[i])){ rc=1; break; } }
if (!dry){ munmap(map,0x1000); close(fd); }
printf("ps2_feeder: %s\n", rc?"FAILED":"done");
return rc;
}
+120
View File
@@ -0,0 +1,120 @@
// retroDE_ps2 — ps2_lpddr_probe_test (Ch352 board bring-up: isolate the HPS->LPDDR write/read path)
//
// Codex debug step 1: prove the write-probe + read-probe ALONE, with a tiny known pattern, BEFORE the texture
// cache or renderer is involved. Writes N known words to a scratch LPDDR region, reads them back, and reports
// per-word: did the auto-increment pointer (0x04C) advance? did wr_busy (0x054 bit1) assert? does the readback
// match? This pinpoints whether "word 0 ok, rest 0" is a PACING race, an AUTO-INCREMENT bug, or a read-probe
// latency issue — none of which the cache/render path can disambiguate.
//
// Build on the board: gcc -O2 -o ps2_lpddr_probe_test ps2_lpddr_probe_test.c
// Run: sudo ./ps2_lpddr_probe_test [N=64] [--base 0x40000000] [--lpddr 0x100000]
//
// Pattern: word i = 0x11110000 + i (non-trivial, low bytes change per word so a stuck/striped path is obvious).
// Scratch LPDDR base defaults to 0x100000 (1 MiB) — clear of the texture region at 0x200000.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#define OFF_LPDDR_STATUS 0x02C // R: [3] rd_pending
#define OFF_LPDDR_RDADDR 0x03C // W: read byte addr + trigger ; R: result word
#define OFF_LPDDR_WRADDR 0x04C // RW: write byte addr (auto-inc +4 per WRDATA write)
#define OFF_LPDDR_WRDATA 0x050 // W: data word -> one 32-bit LPDDR write + addr+=4
#define OFF_TEX_FILL_CTRL 0x054 // R: [0]fill_done [1]wr_busy
#define OFF_LPDDR_WR_ERRS 0x06C // R: write-probe non-OKAY (BRESP) responses
#define OFF_LPDDR_STATUS2 0x02C // R: [0]wr_idle [3]rd_pending [4]scan_valid [5]scan_err
#define OFF_TEX_FILL_BEATS 0x058 // R: cache-fill beats (emif_clk READ liveness)
#define OFF_TEX_FILL_BYTES 0x05C // R: cache-fill bytes
#define OFF_TEX_RD_ERRS 0x068 // R: cache-fill non-OKAY read responses
#define WR_BUSY_BIT 0x2
#define WR_PENDING 0x4 // 0x054 bit2 — Ch352 STABLE write-done flag (poll this, not transient wr_busy)
#define RD_PENDING 0x8
#define FILL_DONE 0x1
typedef struct { volatile uint8_t *base; } br_t;
static void w(br_t *b,int o,uint32_t v){ *(volatile uint32_t*)(b->base+o)=v; }
static uint32_t r(br_t *b,int o){ return *(volatile uint32_t*)(b->base+o); }
int main(int argc,char**argv){
unsigned long base=0x40000000UL, lpddr=0x00100000UL; int N=64;
char *e=getenv("PS2_BRIDGE_BASE"); if(e) base=strtoul(e,NULL,0);
for(int i=1;i<argc;i++){
if(!strcmp(argv[i],"--base")&&i+1<argc) base=strtoul(argv[++i],NULL,0);
else if(!strcmp(argv[i],"--lpddr")&&i+1<argc) lpddr=strtoul(argv[++i],NULL,0);
else if(argv[i][0]!='-') N=atoi(argv[i]);
}
if(N<1||N>4096) N=64;
printf("[probe] N=%d bridge=0x%lx lpddr-scratch=0x%lx pattern word[i]=0x11110000+i\n", N, base, lpddr);
int fd=open("/dev/mem",O_RDWR|O_SYNC);
if(fd<0){ fprintf(stderr,"open /dev/mem (root?): %s\n",strerror(errno)); return 1; }
void*map=mmap(NULL,0x1000,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(off_t)base);
if(map==MAP_FAILED){ fprintf(stderr,"mmap: %s\n",strerror(errno)); return 1; }
br_t b={(volatile uint8_t*)map};
// ---- WRITE: set WRADDR once, then stream; per word verify pointer advance + observe busy high->low ----
uint32_t err0 = r(&b,OFF_LPDDR_WR_ERRS);
w(&b,OFF_LPDDR_WRADDR,(uint32_t)lpddr);
int ptr_bad=0, pending_stuck=0;
for(int i=0;i<N;i++){
uint32_t data=0x11110000u+(uint32_t)i;
w(&b,OFF_LPDDR_WRDATA,data);
// Wait for the STABLE write_pending flag (0x054 bit2) to clear = the write-probe completed THIS write.
// Unlike the transient wr_busy level, this can't be missed by a slow Linux poll (Codex).
int g=0; while((r(&b,OFF_TEX_FILL_CTRL)&WR_PENDING) && g<2000000) g++;
if(g>=2000000) pending_stuck++;
uint32_t ptr=r(&b,OFF_LPDDR_WRADDR), exp=(uint32_t)lpddr+(uint32_t)(i+1)*4;
if(ptr!=exp){ if(ptr_bad<6) printf(" PTR after word %d: 0x%08x exp 0x%08x\n",i,ptr,exp); ptr_bad++; }
}
uint32_t err1=r(&b,OFF_LPDDR_WR_ERRS);
printf("[probe] wrote %d words. ptr-mismatches=%d write_pending_stuck=%d wr_bresp_errs: %u->%u\n",
N, ptr_bad, pending_stuck, err0, err1);
// ---- READBACK: per word set RDADDR, poll rd_pending clear, read result. Count rd_pending TIMEOUTS
// explicitly (Codex): if rd_pending never clears, the value read below is STALE — not a real LPDDR read. ----
int rd_bad=0, rd_timeouts=0;
for(int i=0;i<N;i++){
uint32_t addr=(uint32_t)lpddr+(uint32_t)i*4, exp=0x11110000u+(uint32_t)i;
w(&b,OFF_LPDDR_RDADDR,addr);
int g=0; while((r(&b,OFF_LPDDR_STATUS)&RD_PENDING)&&g<1000000) g++;
if(g>=1000000) rd_timeouts++; // rd_pending stuck -> the read is stale
uint32_t got=r(&b,OFF_LPDDR_RDADDR);
if(got!=exp){ if(rd_bad<8) printf(" RD word %d @0x%08x: got 0x%08x exp 0x%08x\n",i,addr,got,exp); rd_bad++; }
}
printf("[probe] readback mismatches=%d/%d rd_pending_timeouts=%d%s\n", rd_bad, N, rd_timeouts,
rd_timeouts? " (reads were STALE, not real LPDDR reads)":"");
// ---- EMIF READ-domain liveness: arm the texture-cache fill (issues emif_clk reads from LPDDR and
// counts beats), independent of the write path. This disambiguates the failure:
// beats advance -> emif READ domain is ALIVE => the bug is WRITE-specific (probe/pulse wiring).
// beats stay 0 -> the whole emif_clk domain is DEAD (EMIF didn't calibrate / clock / reset),
// which by itself explains busy-never-high AND the all-zero readback.
// Snapshot BEFORE arming (Codex): the fill counters reset only after the new toggle reaches the EMIF
// domain, so a stale fill_done=1 / beats from an earlier fill would be a false positive. Require, in order:
// fill_done seen LOW after arming (the NEW fill is in flight), then fill_done HIGH (it completed), with
// final beats=8192 / bytes=262144 / rd_errs=0.
uint32_t st0 = r(&b,OFF_LPDDR_STATUS2);
uint32_t fd0 = (r(&b,OFF_TEX_FILL_CTRL)&FILL_DONE)?1:0;
uint32_t beats0 = r(&b,OFF_TEX_FILL_BEATS);
w(&b,OFF_TEX_FILL_CTRL,FILL_DONE); // arm: toggle fill_start
int saw_low=0; for(int g=0;g<8000000;g++){ if(!(r(&b,OFF_TEX_FILL_CTRL)&FILL_DONE)){saw_low=1;break;} }
int fdone=0; for(int g=0;g<8000000;g++){ if( r(&b,OFF_TEX_FILL_CTRL)&FILL_DONE){fdone=1;break;} }
uint32_t beats=r(&b,OFF_TEX_FILL_BEATS), bytes=r(&b,OFF_TEX_FILL_BYTES), rderr=r(&b,OFF_TEX_RD_ERRS);
int fill_ok = saw_low && fdone && beats==8192 && bytes==262144 && rderr==0;
printf("[probe] EMIF fill liveness: pre(status=0x%08x fill_done=%u beats=%u) -> armed: saw_done_low=%d final_done=%d\n",
st0, fd0, beats0, saw_low, fdone);
printf("[probe] final beats=%u (exp 8192) bytes=%u (exp 262144) rd_errs=%u (exp 0)\n", beats, bytes, rderr);
if(fill_ok) printf("[probe] => emif READ domain ALIVE and well — failure is WRITE-specific.\n");
else if(beats||fdone) printf("[probe] => emif read PARTIALLY working (counts off) — investigate fill path / cal margin.\n");
else printf("[probe] => emif_clk domain appears DEAD (cal/clock/reset) — explains busy-never-high + zero readback.\n");
if(rd_bad==0 && ptr_bad==0) printf("[probe] write/read path clean.\n");
else printf("[probe] FAIL — write/read path broken; see liveness line above for which domain.\n");
munmap(map,0x1000); close(fd);
return (rd_bad||ptr_bad)?1:0;
}
+147
View File
@@ -0,0 +1,147 @@
// retroDE_ps2 — ps2_sh3_tex_upload (Ch352)
//
// Uploads the reconstructed 512x512 PSMT8 SH3 texture (65536 32-bit words) into FPGA-private LPDDR4B via the
// PS2 HPS-bridge write-probe, verifies the readback, arms the texture-cache fill, and retriggers the feeder.
// One command — mmap'd register pokes (NOT 65536 devmem process spawns). Same bridge protocol as ps2_feeder.c
// and docs/hardware/ps2_lpddr_tex_test.sh (Ch322), just scaled to the full 256 KiB texture.
//
// Build on the board: gcc -O2 -o ps2_sh3_tex_upload ps2_sh3_tex_upload.c
// Run (after fit+boot): sudo ./ps2_sh3_tex_upload sh3_real_tex_lpddr.mem
// (copy sh3_real_tex_lpddr.mem from sim/data/top_psmct32_raster_demo/ to the board alongside the binary.)
//
// Sequence: (1) write WRADDR=0x200000, stream 65536 words to WRDATA; (2) read each back via the read-probe and
// confirm sum32/xor32 match the file; (3) arm cache fill (0x054), poll fill_done, check beats/bytes/rd_errs;
// (4) pulse the feeder retrigger (0x0E8) so the scene re-renders with the now-warm cache.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#define OFF_LPDDR_STATUS 0x02C // R: [3] rd_pending (read-probe in flight)
#define OFF_LPDDR_RDADDR 0x03C // W: set read byte addr + trigger ; R: 32-bit word
#define OFF_LPDDR_WRADDR 0x04C // W: set LPDDR byte addr (auto-increments +4 per WRDATA write)
#define OFF_LPDDR_WRDATA 0x050 // W: data word -> single 32-bit LPDDR write + addr += 4
#define OFF_TEX_FILL_CTRL 0x054 // W[0]: arm cache fill ; R: [0]fill_done [1]wr_busy
#define OFF_LPDDR_WR_ERRS 0x06C // R: write-probe non-OKAY (BRESP) responses (expect 0)
#define WR_PENDING_BIT 0x4 // 0x054 bit2 — Ch352 STABLE write-done flag (poll instead of transient wr_busy)
#define RD_PENDING_BIT 0x8 // 0x02C bit3
#define OFF_TEX_FILL_BEATS 0x058 // R: beats filled (expect TEX_BYTES/32 = 8192)
#define OFF_TEX_FILL_BYTES 0x05C // R: bytes filled (expect 262144)
#define OFF_TEX_RD_ERRS 0x068 // R: fill non-OKAY read responses (expect 0)
#define OFF_TEX_FILL_CRC 0x070 // R: sum32 of EVERY word the cache wrote into tex_mem (must == file sum32)
#define OFF_FEEDER_GO 0x0E8 // W[0]: trigger/retrigger the feeder
#define N_WORDS 65536 // 512*512 PSMT8 / 4
#define TEX_BYTES 262144
#define N_BEATS 8192 // TEX_BYTES / 32
typedef struct { volatile uint8_t *base; int dry; } bridge_t;
static void wr32(bridge_t *b, int off, uint32_t v){ if(!b->dry) *(volatile uint32_t*)(b->base+off)=v; }
static uint32_t rd32(bridge_t *b, int off){ return b->dry?0:*(volatile uint32_t*)(b->base+off); }
int main(int argc, char **argv){
unsigned long base = 0x40000000UL; // PS2 HPS-bridge base (override --base or PS2_BRIDGE_BASE)
unsigned long lpddr_base = 0x00200000; // EMIF byte base where the texture is staged (= TEX_LPDDR_BASE RTL)
const char *texfile = "sh3_real_tex_lpddr.mem";
int dry=0, do_fill=1, do_retrig=1;
char *env = getenv("PS2_BRIDGE_BASE"); if (env) base = strtoul(env,NULL,0);
for (int i=1;i<argc;i++){
if (!strcmp(argv[i],"--base") && i+1<argc) base = strtoul(argv[++i],NULL,0);
else if (!strcmp(argv[i],"--lpddr-base") && i+1<argc) lpddr_base = strtoul(argv[++i],NULL,0);
else if (!strcmp(argv[i],"--dry-run")) dry=1;
else if (!strcmp(argv[i],"--no-fill")) do_fill=0;
else if (!strcmp(argv[i],"--no-retrigger")) do_retrig=0;
else if (argv[i][0] != '-') texfile = argv[i];
else { fprintf(stderr,"usage: %s [tex.mem] [--base 0x40000000] [--lpddr-base 0x200000] [--dry-run] [--no-fill] [--no-retrigger]\n", argv[0]); return 2; }
}
// ---- load the texture hex (.mem: one 32-bit word per line) ----
static uint32_t tex[N_WORDS];
FILE *f = fopen(texfile,"r");
if (!f){ fprintf(stderr,"error: cannot open '%s': %s\n", texfile, strerror(errno)); return 1; }
int n=0; char line[64];
while (n<N_WORDS && fgets(line,sizeof line,f)){
char *s=line; while(*s==' '||*s=='\t') s++;
if (*s=='/'||*s=='\n'||*s==0) continue; // skip blank / // banner lines
tex[n++] = (uint32_t)strtoul(s,NULL,16);
}
fclose(f);
if (n != N_WORDS){ fprintf(stderr,"error: %s has %d words, expected %d\n", texfile, n, N_WORDS); return 1; }
// expected checksums (source of truth = the file)
uint32_t sum=0, xr=0; for (int i=0;i<N_WORDS;i++){ sum+=tex[i]; xr^=tex[i]; }
printf("[ps2_sh3_tex_upload] %d words from %s sum32=0x%08x xor32=0x%08x -> LPDDR 0x%lx (bridge base 0x%lx%s)\n",
n, texfile, sum, xr, lpddr_base, base, dry?", DRY-RUN":"");
// ---- open the bridge ----
bridge_t br = {0,dry}; int fd=-1; void *map=NULL;
if (!dry){
fd=open("/dev/mem", O_RDWR|O_SYNC);
if (fd<0){ fprintf(stderr,"error: open /dev/mem (run as root?): %s\n", strerror(errno)); return 1; }
map=mmap(NULL,0x1000,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(off_t)base);
if (map==MAP_FAILED){ fprintf(stderr,"error: mmap 0x%lx: %s\n", base, strerror(errno)); close(fd); return 1; }
br.base=(volatile uint8_t*)map;
}
// ---- (1) upload: set WRADDR then stream WRDATA. CRITICAL: poll wr_busy (0x054 bit1) clear after each word
// so the write-probe actually COMMITS before the next write — otherwise the fast mmap writes outrun the
// CDC/AXI commit and get DROPPED (the bug: most words read back as 0). The Ch322 devmem script got away with
// no poll only because devmem process-spawns are slow. ----
wr32(&br, OFF_LPDDR_WRADDR, (uint32_t)lpddr_base);
for (int i=0;i<N_WORDS;i++){
wr32(&br, OFF_LPDDR_WRDATA, tex[i]);
// wait for the STABLE write_pending (0x054 bit2) to clear — the probe committed this word
if (!dry){ int g=0; while ((rd32(&br, OFF_TEX_FILL_CTRL) & WR_PENDING_BIT) && g<2000000) g++; }
}
if (!dry){
uint32_t werr = rd32(&br, OFF_LPDDR_WR_ERRS);
printf("[ps2_sh3_tex_upload] uploaded %d words (%d KiB). wr_bresp_errs=%u (exp 0)\n", N_WORDS, TEX_BYTES/1024, werr);
if (werr) fprintf(stderr,"WARN: %u write BRESP errors.\n", werr);
} else printf("[ps2_sh3_tex_upload] uploaded %d words (%d KiB).\n", N_WORDS, TEX_BYTES/1024);
// ---- (2) readback verify via the read-probe (guardrail #2). Poll rd_pending (0x02C bit3) clear before
// reading the latched word — the LPDDR read has latency; reading immediately returns 0/stale. ----
int mism=0; uint32_t rsum=0, rxr=0;
if (!dry){
for (int i=0;i<N_WORDS;i++){
wr32(&br, OFF_LPDDR_RDADDR, (uint32_t)(lpddr_base + (unsigned)i*4)); // set addr + trigger read
{ int g=0; while ((rd32(&br, OFF_LPDDR_STATUS) & RD_PENDING_BIT) && g<1000000) g++; }
uint32_t v = rd32(&br, OFF_LPDDR_RDADDR); // latched word
rsum+=v; rxr^=v;
if (v != tex[i] && mism<8) fprintf(stderr," readback mismatch @word %d: got 0x%08x exp 0x%08x\n", i, v, tex[i]);
if (v != tex[i]) mism++;
}
printf("[ps2_sh3_tex_upload] readback sum32=0x%08x xor32=0x%08x mismatches=%d\n", rsum, rxr, mism);
if (mism){ fprintf(stderr,"FAIL: %d readback mismatches — bad upload, NOT filling cache.\n", mism); munmap(map,0x1000); close(fd); return 1; }
}
// ---- (3) arm the cache fill + poll fill_done; check beats/bytes/rd_errs ----
if (do_fill && !dry){
wr32(&br, OFF_TEX_FILL_CTRL, 0x1);
int done=0; for (int i=0;i<200000;i++){ if (rd32(&br,OFF_TEX_FILL_CTRL)&0x1){ done=1; break; } }
uint32_t beats=rd32(&br,OFF_TEX_FILL_BEATS), bytes=rd32(&br,OFF_TEX_FILL_BYTES), errs=rd32(&br,OFF_TEX_RD_ERRS);
uint32_t fcrc=rd32(&br,OFF_TEX_FILL_CRC);
printf("[ps2_sh3_tex_upload] cache fill: done=%d beats=%u (exp %d) bytes=%u (exp %d) rd_errs=%u (exp 0)\n",
done, beats, N_BEATS, bytes, TEX_BYTES, errs);
// The cache's sum32 over the words it wrote into tex_mem must equal the file's sum32. If it matches,
// tex_mem is byte-correct on silicon — so any residual texture corruption is NOT the cache contents.
printf("[ps2_sh3_tex_upload] cache fill_crc=0x%08x (exp file sum32=0x%08x) -> tex_mem %s\n",
fcrc, sum, (fcrc==sum) ? "INTEGRITY OK" : "CORRUPT");
if (!done || beats!=N_BEATS || bytes!=TEX_BYTES || errs!=0)
fprintf(stderr,"WARN: cache fill stats off — texels may be wrong; check EMIF cal + LPDDR base.\n");
if (fcrc!=sum)
fprintf(stderr,"WARN: cache fill_crc mismatch — tex_mem corrupt on board (NOT a divider/sampler issue).\n");
}
// ---- (4) retrigger the feeder so the scene re-renders with the warm cache ----
if (do_retrig && !dry){ wr32(&br, OFF_FEEDER_GO, 0x1); printf("[ps2_sh3_tex_upload] feeder retriggered.\n"); }
if (!dry){ munmap(map,0x1000); close(fd); }
printf("[ps2_sh3_tex_upload] DONE — check HDMI vs the crop reference (recon/sh3_real_ref.png).\n");
return 0;
}
+30
View File
@@ -0,0 +1,30 @@
#!/bin/sh
# retroDE_ps2 — Ch340 host gate. No board, no real dump needed (uses content-clean synthetic
# fixtures). Proves: (1) byte-exact container/GIF parse, (2) census classifies supported vs
# unsupported, (3) the translator emits a ps2_feeder scene for supported input that the Ch339
# encoder accepts, and (4) fail-closed on unsupported input.
set -u
HERE="$(cd "$(dirname "$0")" && pwd)"
fail=0
echo "== 1. byte-exact parser test =="
python3 "$HERE/test_gs_parse.py" >/tmp/_gp.txt 2>&1
grep -q "RESULT: PASS" /tmp/_gp.txt && echo " parser byte-exact PASS" || { echo " parser FAIL"; tail -5 /tmp/_gp.txt; fail=1; }
echo "== 2. translator chain on the SUPPORTED fixture =="
python3 "$HERE/gs_make_synthetic.py" >/dev/null 2>&1
python3 "$HERE/gs_translate.py" "$HERE/../captures/gs/synthetic/mini_supported.gs" --scene /tmp/_sup.scene > /tmp/_sup.txt 2>&1
grep -q "census classes : translated=2" /tmp/_sup.txt && echo " census: translated=2 OK" || { echo " census wrong"; grep -a "census classes" /tmp/_sup.txt; fail=1; }
grep -q "EARLIEST SUPPORTED SEGMENT: 2 triangles" /tmp/_sup.txt && echo " earliest segment: 2 tris OK" || { echo " segment wrong"; fail=1; }
gcc -O2 -o /tmp/_psf "$HERE/ps2_feeder.c" 2>/dev/null
if /tmp/_psf --dry-run -f /tmp/_sup.scene >/tmp/_psf.txt 2>&1 && grep -q "tris=2 rects=0" /tmp/_psf.txt; then
echo " ps2_feeder accepts the emitted scene (2 tris) OK"
else echo " ps2_feeder rejected the scene"; tail -3 /tmp/_psf.txt; fail=1; fi
echo "== 3. fail-closed on UNSUPPORTED fixture (mini.gs: textured triangle) =="
python3 "$HERE/gs_translate.py" "$HERE/../captures/gs/synthetic/mini.gs" --scene /tmp/_uns.scene > /tmp/_uns.txt 2>&1
grep -q "NO SUPPORTED SEGMENT" /tmp/_uns.txt && echo " no segment + fail-closed OK" || { echo " should have been unsupported"; grep -a "SEGMENT" /tmp/_uns.txt; fail=1; }
[ ! -f /tmp/_uns.scene ] && echo " no scene file written (fail-closed) OK" || { echo " scene wrongly emitted"; fail=1; }
echo "RESULT: $([ $fail -eq 0 ] && echo PASS || echo FAIL)"
exit $fail
+70
View File
@@ -0,0 +1,70 @@
#!/usr/bin/env python3
"""Ch340 byte-exact parser gate. Regenerates the synthetic fixture and asserts gs_parse decodes it
into the EXACT expected normalized event stream (container header + every GIF mode). No board, no
real dump needed. Exit non-zero on any mismatch."""
import os, sys, subprocess
HERE = os.path.dirname(__file__)
sys.path.insert(0, HERE)
import gs_parse
def main():
subprocess.run([sys.executable, os.path.join(HERE, "gs_make_synthetic.py")], check=True)
fix = os.path.join(HERE, "..", "captures", "gs", "synthetic", "mini.gs")
h, ev = gs_parse.parse_dump(fix)
fail = 0
def check(cond, msg):
nonlocal fail
print((" ok " if cond else " FAIL ") + msg)
if not cond: fail = 1
check(h.serial == "SYNTH001", f"serial == SYNTH001 (got {h.serial!r})")
check(h.crc == 0x12345678, f"crc == 0x12345678 (got 0x{h.crc:08x})")
check(h.state_size == 16, f"state_size == 16 (got {h.state_size})")
# The expected ordered (kind, reg, value) skeleton of the event stream.
got = [(e.kind, e.reg, e.value) for e in ev]
exp = [
("TRANSFER","",0),
("GIFTAG","",0),
("GSREG","FRAME_1",0x0000_0000_0C00_1807),
("TRANSFER","",0),
("GIFTAG","",0),
("GSREG","PRIM", 3 | (1<<4) | (1<<6)), # via PRE
("GSREG","RGBAQ", 0xFF | (0<<8) | (0<<16) | (0x80<<24)), # vtx0 red
("GSREG","XYZ2", (100<<4) | ((50<<4)<<16) | (0x5000<<32)),
("GSREG","RGBAQ", 0x00 | (0xFF<<8) | (0<<16) | (0x80<<24)),# vtx1 green
("GSREG","XYZ2", (200<<4) | ((50<<4)<<16) | (0x5000<<32)),
("GSREG","RGBAQ", 0x00 | (0<<8) | (0xFF<<16) | (0x80<<24)),# vtx2 blue
("GSREG","XYZ2", (100<<4) | ((150<<4)<<16) | (0x5000<<32)),
("TRANSFER","",0),
("GIFTAG","",0),
("IMAGE","",0),
("FRAME_BOUNDARY","",0),
]
check(len(got)==len(exp), f"event count == {len(exp)} (got {len(got)})")
for i,(g,e) in enumerate(zip(got,exp)):
# compare kind always; reg+value only where the expected has them
if e[0] in ("GSREG",):
check(g==e, f"event[{i}] {e} (got {g})")
else:
check(g[0]==e[0], f"event[{i}] kind {e[0]} (got {g[0]})")
# IMAGE qwc + no malformed
img=[e for e in ev if e.kind=="IMAGE"]
check(len(img)==1 and img[0].info.get("qwc")==2, f"one IMAGE qwc==2 (got {[e.info for e in img]})")
check(all(e.kind!="MALFORMED" for e in ev), "no MALFORMED events")
# Ch342 — PACKED ST must expose the STQ Q lane as info["q_stq"] (don't drop RGBAQ.Q).
h2, ev2 = gs_parse.parse_dump(os.path.join(HERE, "..", "captures", "gs", "synthetic", "mini_st.gs"))
sts = [e for e in ev2 if e.kind=="GSREG" and e.reg=="ST"]
check(len(sts)==1, f"mini_st: one ST event (got {len(sts)})")
if sts:
e=sts[0]
check((e.value & 0xFFFFFFFF)==0x11111111, "mini_st: ST.S == 0x11111111")
check(((e.value>>32)&0xFFFFFFFF)==0x22222222, "mini_st: ST.T == 0x22222222")
check(e.info.get("q_stq")==0x33333333, f"mini_st: q_stq == 0x33333333 (got {e.info.get('q_stq')})")
print("RESULT:", "PASS" if not fail else "FAIL")
return fail
if __name__ == "__main__":
sys.exit(main())
+50
View File
@@ -0,0 +1,50 @@
#!/bin/sh
# retroDE_ps2 — Ch339 host-encoder gate. Compiles ps2_feeder and proves its staging output is
# BYTE-EQUIVALENT to the golden bake.py fixtures for every representative Ch333-338 scene, then
# checks that malformed / oversized / out-of-range commands are rejected cleanly. No board needed.
set -u
HERE="$(cd "$(dirname "$0")" && pwd)"
REPO="$(cd "$HERE/.." && pwd)"
MEM="$REPO/sim/data/top_psmct32_raster_demo"
BIN="$(mktemp -d)/ps2_feeder"
fail=0
echo "== compile =="
gcc -O2 -Wall -Werror -o "$BIN" "$HERE/ps2_feeder.c" || { echo "COMPILE FAILED"; exit 1; }
echo "== regenerate goldens =="
python3 "$MEM/bake.py" >/dev/null 2>&1 || { echo "bake.py failed"; exit 1; }
echo "== byte-equivalence vs golden .mem =="
check() { # $1 scene $2 golden.mem
"$BIN" --dump "$1" > /tmp/_dump.txt 2>/tmp/_err.txt
tail -n +3 "$MEM/$2" > /tmp/_gold.txt
if diff /tmp/_dump.txt /tmp/_gold.txt >/dev/null 2>&1; then
echo " MATCH $1 == $2"
else
echo " MISMATCH $1 != $2"; fail=1
fi
}
check color-tri feeder_color_tri.mem
check native-rect feeder_native_rect.mem
check gouraud-tri feeder_gouraud_tri.mem
check accum feeder_accum.mem
check retrigger-a feeder_scene_a.mem
check retrigger-b feeder_scene_b.mem
check zpersist-near feeder_zpersist_near_first.mem
check zpersist-far feeder_zpersist_far_first.mem
check zpersist-grad feeder_zpersist_grad.mem
check sprite feeder_sprite.mem # Ch345a — runtime textured-alpha SPRITE staging
echo "== rejection of bad input (each must exit non-zero) =="
reject() { # $1 label ; rest = file contents on stdin
cat > /tmp/_sc.txt
if "$BIN" --dry-run -f /tmp/_sc.txt >/dev/null 2>&1; then echo " NOT-REJECTED $1"; fail=1; else echo " rejected $1"; fi
}
yes 'tritile 0 0x5000 255 0 0' | head -28 | reject "oversized (28 tris)"
printf 'tri 1 2 3\n' | reject "malformed tri"
printf 'tritile 99 0x5000 255 0 0\n' | reject "tile out of range"
printf 'bogus 1 2 3\n' | reject "unknown op"
echo "RESULT: $([ $fail -eq 0 ] && echo PASS || echo FAIL)"
exit $fail