Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression (272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps, and all dump-derived textures/traces) is excluded via .gitignore and stays local. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convert a PS2 EE ELF into a tb_ee_core_elf_runner-compatible image
|
||||
manifest pair (no external deps; pure stdlib).
|
||||
|
||||
Emits two files at the requested output prefix, identical in format
|
||||
to generate_synthetic_image.py:
|
||||
|
||||
<prefix>.image.hex iverilog $readmemh, @<qw_idx> directives for
|
||||
populated 128-bit qwords only. Each line is
|
||||
32 hex chars (MSB-first, byte 15 leftmost).
|
||||
|
||||
<prefix>.manifest.hex line 0 = ELF entry point (32-bit hex)
|
||||
line 1 = stack-top hint (32-bit hex)
|
||||
|
||||
Supports ELF32 little-endian, ELFCLASS32, EM_MIPS, e_type ET_EXEC or
|
||||
ET_DYN. PT_LOAD segments are placed at their physical address
|
||||
(low 29 bits of p_vaddr — strips the kuseg/kseg0/kseg1 alias bits so
|
||||
the data lands at the correct phys offset in ee_ram_stub).
|
||||
|
||||
Stack-top is approximated as (ee_ram_bytes - 0x10) since real PS2
|
||||
ELFs don't carry a stack pointer in their headers; the TB will set
|
||||
$sp to this if the manifest is read.
|
||||
|
||||
Usage:
|
||||
elf_to_eeram.py --in path/to/game.elf --out-prefix /tmp/game
|
||||
|
||||
Verdict-aware notes:
|
||||
* Segments overflowing the EE RAM image cause a fatal error.
|
||||
* Segments overlapping each other are flagged but not fatal — the
|
||||
later one wins (matches how a real loader would behave).
|
||||
"""
|
||||
|
||||
import sys
|
||||
import struct
|
||||
import argparse
|
||||
|
||||
|
||||
ELFMAG = b"\x7fELF"
|
||||
ELFCLASS32 = 1
|
||||
ELFDATA2LSB = 1
|
||||
EM_MIPS = 8
|
||||
ET_EXEC = 2
|
||||
ET_DYN = 3
|
||||
PT_LOAD = 1
|
||||
|
||||
|
||||
def parse_elf32_le(data: bytes):
|
||||
"""Return (entry, [(p_vaddr, p_offset, p_filesz, p_memsz), ...])
|
||||
for PT_LOAD segments of a 32-bit little-endian MIPS ELF.
|
||||
Raises ValueError on bad magic / wrong class / wrong arch.
|
||||
"""
|
||||
if len(data) < 52 or data[:4] != ELFMAG:
|
||||
raise ValueError("not an ELF file (bad magic)")
|
||||
if data[4] != ELFCLASS32:
|
||||
raise ValueError(f"only ELFCLASS32 supported (got class={data[4]})")
|
||||
if data[5] != ELFDATA2LSB:
|
||||
raise ValueError(f"only little-endian supported (got data={data[5]})")
|
||||
|
||||
(e_type, e_machine, e_version, e_entry, e_phoff, e_shoff, e_flags,
|
||||
e_ehsize, e_phentsize, e_phnum, e_shentsize, e_shnum, e_shstrndx) = \
|
||||
struct.unpack_from("<HHIIIIIHHHHHH", data, 16)
|
||||
|
||||
if e_machine != EM_MIPS:
|
||||
raise ValueError(f"only EM_MIPS supported (got machine={e_machine})")
|
||||
if e_type not in (ET_EXEC, ET_DYN):
|
||||
raise ValueError(f"only ET_EXEC / ET_DYN supported (got type={e_type})")
|
||||
|
||||
pt_load = []
|
||||
for i in range(e_phnum):
|
||||
off = e_phoff + i * e_phentsize
|
||||
(p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz,
|
||||
p_flags, p_align) = struct.unpack_from("<IIIIIIII", data, off)
|
||||
if p_type == PT_LOAD:
|
||||
pt_load.append((p_vaddr, p_offset, p_filesz, p_memsz))
|
||||
|
||||
return e_entry, pt_load
|
||||
|
||||
|
||||
def build_image(elf_bytes: bytes, ee_ram_bytes: int):
|
||||
entry, segs = parse_elf32_le(elf_bytes)
|
||||
image = bytearray(ee_ram_bytes)
|
||||
|
||||
placed = []
|
||||
for (p_vaddr, p_offset, p_filesz, p_memsz) in segs:
|
||||
phys = p_vaddr & 0x1FFFFFFF # strip kseg/kuseg bits
|
||||
if phys + p_memsz > ee_ram_bytes:
|
||||
raise ValueError(
|
||||
f"PT_LOAD at vaddr=0x{p_vaddr:08x} phys=0x{phys:08x} "
|
||||
f"size=0x{p_memsz:x} overflows EE RAM (0x{ee_ram_bytes:x})")
|
||||
# Detect overlap (informational only).
|
||||
for (lo, hi) in placed:
|
||||
if not (phys + p_memsz <= lo or phys >= hi):
|
||||
print(f"[elf_to_eeram] WARNING: PT_LOAD at phys=0x{phys:08x} "
|
||||
f"size=0x{p_memsz:x} overlaps prior placement",
|
||||
file=sys.stderr)
|
||||
placed.append((phys, phys + p_memsz))
|
||||
|
||||
# Copy p_filesz bytes from file at p_offset → phys. p_memsz can
|
||||
# be larger than p_filesz (.bss tail); image is already zero-
|
||||
# initialised so the tail is naturally zero.
|
||||
chunk = elf_bytes[p_offset:p_offset + p_filesz]
|
||||
image[phys:phys + p_filesz] = chunk
|
||||
print(f"[elf_to_eeram] placed PT_LOAD vaddr=0x{p_vaddr:08x} "
|
||||
f"phys=0x{phys:08x} filesz=0x{p_filesz:x} memsz=0x{p_memsz:x}")
|
||||
|
||||
return entry, image
|
||||
|
||||
|
||||
def qword_to_hex(image: bytearray, qw_phys: int) -> str:
|
||||
"""MSB-first hex string for the qword at byte offset qw_phys."""
|
||||
bytes16 = image[qw_phys:qw_phys + 16]
|
||||
return bytes16[::-1].hex()
|
||||
|
||||
|
||||
def emit_image_hex(image: bytearray, path: str) -> None:
|
||||
qw_size = 16
|
||||
with open(path, "w") as f:
|
||||
f.write("// Ch270 ELF-derived EE-RAM image\n")
|
||||
f.write(f"// {len(image)} bytes / {len(image)//qw_size} qwords\n")
|
||||
f.write("// Populated qwords only; TB zero-inits before $readmemh.\n\n")
|
||||
any_emitted = False
|
||||
for qw_idx in range(0, len(image) // qw_size):
|
||||
qw_byte = qw_idx * qw_size
|
||||
qw_bytes = image[qw_byte:qw_byte + qw_size]
|
||||
if any(b != 0 for b in qw_bytes):
|
||||
f.write(f"@{qw_idx:08x}\n")
|
||||
f.write(qword_to_hex(image, qw_byte) + "\n")
|
||||
any_emitted = True
|
||||
if not any_emitted:
|
||||
f.write("@00000000\n00000000000000000000000000000000\n")
|
||||
|
||||
|
||||
def emit_manifest_hex(path: str, entry: int, stack_top: int) -> None:
|
||||
with open(path, "w") as f:
|
||||
f.write("// Ch270 manifest from ELF\n")
|
||||
f.write(f"// line 0 = entry, line 1 = stack_top hint\n")
|
||||
f.write(f"{entry:08x}\n")
|
||||
f.write(f"{stack_top:08x}\n")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
p = argparse.ArgumentParser(description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
p.add_argument("--in", dest="elf_in", required=True,
|
||||
help="input ELF path")
|
||||
p.add_argument("--out-prefix", required=True,
|
||||
help="output file prefix")
|
||||
p.add_argument("--ee-ram-bytes", type=lambda s: int(s, 0),
|
||||
default=2 * 1024 * 1024,
|
||||
help="EE RAM size in bytes (default 2 MiB)")
|
||||
args = p.parse_args()
|
||||
|
||||
with open(args.elf_in, "rb") as f:
|
||||
elf_bytes = f.read()
|
||||
|
||||
entry, image = build_image(elf_bytes, args.ee_ram_bytes)
|
||||
stack_top = args.ee_ram_bytes - 0x10
|
||||
emit_image_hex(image, f"{args.out_prefix}.image.hex")
|
||||
emit_manifest_hex(f"{args.out_prefix}.manifest.hex", entry, stack_top)
|
||||
print(f"[elf_to_eeram] wrote {args.out_prefix}.image.hex + "
|
||||
f"{args.out_prefix}.manifest.hex (entry=0x{entry:08x}, "
|
||||
f"stack_top=0x{stack_top:08x}, ee_ram={args.ee_ram_bytes} bytes)")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate a synthetic EE-RAM image + manifest for Ch270's ELF runner TB.
|
||||
|
||||
Produces two files at the requested output prefix:
|
||||
|
||||
<prefix>.image.hex iverilog $readmemh compatible. Uses @<hex_qw_idx>
|
||||
directives so only the populated 128-bit qwords
|
||||
appear (the TB pre-zeros the array before reading).
|
||||
Each line is 32 hex chars = one 128-bit qword,
|
||||
MSB-first (byte 15 leftmost, byte 0 rightmost).
|
||||
|
||||
<prefix>.manifest.hex Two lines:
|
||||
line 0: ELF entry point (32-bit hex)
|
||||
line 1: stack-top hint (32-bit hex; unused
|
||||
by current TB but reserved)
|
||||
|
||||
The synthetic program lives at PHYS 0x00100000. Entry is given as a
|
||||
kseg0 address (0x80100008) because the ee_memory_map stub routes
|
||||
useg (top bit = 0) to a separate useg_shadow region, not ee_ram —
|
||||
real PS2 ELFs use kseg0 entries for the same reason (cached text):
|
||||
|
||||
PHYS 0x00100000 / kseg0 0x80100000: nop pad
|
||||
PHYS 0x00100004 / kseg0 0x80100004: nop pad
|
||||
PHYS 0x00100008 / kseg0 0x80100008: addiu $v0,$0,0x1234 *** entry ***
|
||||
PHYS 0x0010000C / kseg0 0x8010000C: addiu $v1,$0,0x5678
|
||||
PHYS 0x00100010 / kseg0 0x80100010: j 0x80100010 loop-to-self
|
||||
PHYS 0x00100014 / kseg0 0x80100014: nop delay slot
|
||||
|
||||
The J encoding (0x08040004) is PC-relative: at runtime, j_tgt =
|
||||
{PC+4[31:28], imm26<<2}, so the high 4 bits come from the PC.
|
||||
PC=0x80100010 ⇒ j_tgt = 0x80100010 (self) — same encoding works for
|
||||
both kseg0 and kuseg views.
|
||||
|
||||
Expected TB verdict: `elf_timeout_with_hot_pc` with hot_pc near
|
||||
0x80100010. That confirms the ELF-load + entry-bootstrap + strict-
|
||||
trace pipeline is sound (no traps, no halts, no unmapped MMIO, EE
|
||||
reaches and executes real code).
|
||||
"""
|
||||
|
||||
import sys
|
||||
import struct
|
||||
import argparse
|
||||
|
||||
|
||||
def encode_addiu(rt: int, rs: int, imm: int) -> int:
|
||||
"""ADDIU rt, rs, imm. op=0x09."""
|
||||
return (0x09 << 26) | ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
|
||||
|
||||
|
||||
def encode_j(target: int) -> int:
|
||||
"""J target. op=0x02. Target must be word-aligned."""
|
||||
assert target & 3 == 0, "J target must be word-aligned"
|
||||
return (0x02 << 26) | ((target >> 2) & 0x03FFFFFF)
|
||||
|
||||
|
||||
def encode_lui(rt: int, imm: int) -> int:
|
||||
"""LUI rt, imm. op=0x0F."""
|
||||
return (0x0F << 26) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
|
||||
|
||||
|
||||
def encode_ori(rt: int, rs: int, imm: int) -> int:
|
||||
"""ORI rt, rs, imm. op=0x0D."""
|
||||
return (0x0D << 26) | ((rs & 0x1F) << 21) | ((rt & 0x1F) << 16) | (imm & 0xFFFF)
|
||||
|
||||
|
||||
def encode_jr(rs: int) -> int:
|
||||
"""JR rs. SPECIAL/funct=0x08."""
|
||||
return ((rs & 0x1F) << 21) | 0x08
|
||||
|
||||
|
||||
def write_word_le(image: bytearray, phys_addr: int, word: int) -> None:
|
||||
"""Write a 32-bit word little-endian into the EE-RAM image."""
|
||||
assert phys_addr + 4 <= len(image), "phys_addr out of image bounds"
|
||||
image[phys_addr + 0] = (word >> 0) & 0xFF
|
||||
image[phys_addr + 1] = (word >> 8) & 0xFF
|
||||
image[phys_addr + 2] = (word >> 16) & 0xFF
|
||||
image[phys_addr + 3] = (word >> 24) & 0xFF
|
||||
|
||||
|
||||
def qword_to_hex(image: bytearray, qw_phys: int) -> str:
|
||||
"""Return the 32-char hex string for the qword at byte offset qw_phys.
|
||||
|
||||
iverilog $readmemh expects the leftmost hex char to be the highest
|
||||
bit of the 128-bit value. Byte 15 is the most significant byte;
|
||||
byte 0 is the least.
|
||||
"""
|
||||
assert qw_phys + 16 <= len(image)
|
||||
bytes16 = image[qw_phys:qw_phys + 16]
|
||||
# Reverse to MSB-first for the hex string.
|
||||
return bytes16[::-1].hex()
|
||||
|
||||
|
||||
def emit_image_hex(image: bytearray, path: str, qw_size: int) -> None:
|
||||
"""Emit a $readmemh-compatible hex file using @<idx> directives for
|
||||
every populated (non-zero) qword. Empty qwords are skipped — the TB
|
||||
pre-zeros the array before reading.
|
||||
"""
|
||||
with open(path, "w") as f:
|
||||
f.write("// Ch270 synthetic EE-RAM image\n")
|
||||
f.write(f"// {len(image)} bytes / {len(image)//qw_size} qwords\n")
|
||||
f.write("// Populated qwords only; TB zero-inits before $readmemh.\n\n")
|
||||
any_emitted = False
|
||||
for qw_idx in range(0, len(image) // qw_size):
|
||||
qw_byte = qw_idx * qw_size
|
||||
qw_bytes = image[qw_byte:qw_byte + qw_size]
|
||||
if any(b != 0 for b in qw_bytes):
|
||||
f.write(f"@{qw_idx:08x}\n")
|
||||
f.write(qword_to_hex(image, qw_byte) + "\n")
|
||||
any_emitted = True
|
||||
if not any_emitted:
|
||||
# iverilog $readmemh errors on empty file; emit a benign entry.
|
||||
f.write("@00000000\n00000000000000000000000000000000\n")
|
||||
|
||||
|
||||
def emit_manifest_hex(path: str, entry: int, stack_top: int) -> None:
|
||||
"""Emit the manifest as two 32-bit hex lines."""
|
||||
with open(path, "w") as f:
|
||||
f.write("// Ch270 manifest\n")
|
||||
f.write(f"// line 0 = entry, line 1 = stack_top hint\n")
|
||||
f.write(f"{entry:08x}\n")
|
||||
f.write(f"{stack_top:08x}\n")
|
||||
|
||||
|
||||
def build_synthetic_image(image_bytes: int, entry_phys: int) -> bytearray:
|
||||
"""Build the EE-RAM image with the synthetic program at entry_phys."""
|
||||
image = bytearray(image_bytes)
|
||||
# Pad before entry so PC starts on real instructions:
|
||||
write_word_le(image, entry_phys - 8, 0x00000000) # nop
|
||||
write_word_le(image, entry_phys - 4, 0x00000000) # nop
|
||||
# Body:
|
||||
write_word_le(image, entry_phys + 0, encode_addiu(2, 0, 0x1234)) # $v0 = 0x1234
|
||||
write_word_le(image, entry_phys + 4, encode_addiu(3, 0, 0x5678)) # $v1 = 0x5678
|
||||
write_word_le(image, entry_phys + 8, encode_j(entry_phys + 8)) # j self
|
||||
write_word_le(image, entry_phys + 12, 0x00000000) # nop delay slot
|
||||
return image
|
||||
|
||||
|
||||
def main() -> int:
|
||||
p = argparse.ArgumentParser(description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
p.add_argument("--out-prefix", required=True,
|
||||
help="output file prefix (writes <prefix>.image.hex + <prefix>.manifest.hex)")
|
||||
p.add_argument("--entry", type=lambda s: int(s, 0), default=0x80100008,
|
||||
help="entry point VIRTUAL address (kseg0 default 0x80100008; "
|
||||
"physical placement of the code segment is entry & 0x1FFFFFFF)")
|
||||
p.add_argument("--ee-ram-bytes", type=lambda s: int(s, 0), default=2 * 1024 * 1024,
|
||||
help="EE RAM size in bytes (default 2 MiB; must be >= entry+16)")
|
||||
p.add_argument("--stack-top", type=lambda s: int(s, 0), default=0x801FFFF0,
|
||||
help="stack top hint stored in manifest (default 0x801FFFF0 kseg0)")
|
||||
args = p.parse_args()
|
||||
|
||||
entry_phys = args.entry & 0x1FFFFFFF
|
||||
if entry_phys < 8 or entry_phys + 16 > args.ee_ram_bytes:
|
||||
p.error(f"entry 0x{args.entry:08x} (phys 0x{entry_phys:08x}) "
|
||||
f"doesn't fit into 0x{args.ee_ram_bytes:x}-byte EE RAM")
|
||||
|
||||
image = build_synthetic_image(args.ee_ram_bytes, entry_phys)
|
||||
emit_image_hex(image, f"{args.out_prefix}.image.hex", qw_size=16)
|
||||
emit_manifest_hex(f"{args.out_prefix}.manifest.hex",
|
||||
entry=args.entry, stack_top=args.stack_top)
|
||||
print(f"[generate_synthetic_image] wrote {args.out_prefix}.image.hex "
|
||||
f"+ {args.out_prefix}.manifest.hex (entry=0x{args.entry:08x}, "
|
||||
f"phys=0x{entry_phys:08x}, ee_ram={args.ee_ram_bytes} bytes)")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch341 Brick 2: faithful texture downscale + boot-fixture generation.
|
||||
|
||||
Reads the LOCAL extracted cube texture (256x256 PSMCT32, from gs_texture.py --extract), box-downsamples
|
||||
it to 64x64 PSMCT32 (a declared, reported linear transform — NOT a GS-feature approximation; the same
|
||||
class as the Ch340 viewport fit), and emits the 64x64 texels as a $readmemh .mem the boot setup payload
|
||||
uploads to a VRAM-resident TBP. Also runs Codex's gate: hash the dump's N texture uploads and report
|
||||
whether they are byte-identical (so one preload suffices). Only aggregate facts/hashes are committable;
|
||||
the texel .mem stays LOCAL (derived from the dump).
|
||||
|
||||
Declared transform: source 256x256 -> 64x64 (factor 4) => translator must scale UVs by /4.
|
||||
|
||||
Usage:
|
||||
gs_bake_texture.py <dump.gs[.xz|.zst]> [--blob extracted/tex0_blob.bin] [--out outdir] [--report r.txt]
|
||||
"""
|
||||
import sys, os, struct, hashlib
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
import gs_parse, gs_texture
|
||||
|
||||
SRC=256; DST=64; FACTOR=SRC//DST # 4
|
||||
|
||||
def upload_identity_gate(path):
|
||||
"""Hash every IMAGE upload payload; report if all byte-identical."""
|
||||
d = gs_parse.read_dump_bytes(path)
|
||||
h, events = gs_parse.parse_dump(path)
|
||||
hashes=[]
|
||||
for e in events:
|
||||
if e.kind=="IMAGE":
|
||||
blob = d[e.byte_off:e.byte_off+e.info.get("bytes",0)]
|
||||
hashes.append(hashlib.sha256(blob).hexdigest())
|
||||
return hashes
|
||||
|
||||
def downsample_psmct32(blob):
|
||||
"""256x256 ABGR words -> 64x64 ABGR words, box filter (average each 4x4 source block per channel)."""
|
||||
assert len(blob) == SRC*SRC*4, f"expected {SRC*SRC*4} bytes, got {len(blob)}"
|
||||
src = struct.unpack(f"<{SRC*SRC}I", blob)
|
||||
out = []
|
||||
for oy in range(DST):
|
||||
for ox in range(DST):
|
||||
ar=ag=ab=aa=0
|
||||
for dy in range(FACTOR):
|
||||
for dx in range(FACTOR):
|
||||
w = src[(oy*FACTOR+dy)*SRC + (ox*FACTOR+dx)]
|
||||
ar += w & 0xFF; ag += (w>>8)&0xFF; ab += (w>>16)&0xFF; aa += (w>>24)&0xFF
|
||||
n = FACTOR*FACTOR
|
||||
out.append((ar//n) | ((ag//n)<<8) | ((ab//n)<<16) | ((aa//n)<<24))
|
||||
return out # 64*64 ABGR words
|
||||
|
||||
def main(argv):
|
||||
if len(argv)<2: print(__doc__); return 2
|
||||
path=argv[1]
|
||||
def opt(n,d=None): return argv[argv.index(n)+1] if n in argv else d
|
||||
blobp = opt("--blob", os.path.join(os.path.dirname(path),"extracted","tex0_blob.bin"))
|
||||
outdir = opt("--out", os.path.join(os.path.dirname(path),"extracted"))
|
||||
|
||||
R=[f"# Ch341 Brick 2 texture bake (source {os.path.basename(path)}; aggregate facts/hashes only)"]
|
||||
# --- gate: are the uploads byte-identical? (auditable: list every distinct hash + its count) ---
|
||||
hh = upload_identity_gate(path)
|
||||
from collections import Counter
|
||||
cnt = Counter(hh)
|
||||
R.append(f"texture uploads: {len(hh)} distinct payloads: {len(cnt)} "
|
||||
+ ("ALL BYTE-IDENTICAL -> one preload suffices" if len(cnt)==1 else "DIFFER -> translator must not let a subsegment cross a re-upload to its TBP"))
|
||||
for i,(hsh,c) in enumerate(sorted(cnt.items(), key=lambda x:-x[1])):
|
||||
R.append(f" upload payload #{i}: sha256[0..15]={hsh[:16]} count={c}")
|
||||
|
||||
if not os.path.exists(blobp):
|
||||
R.append(f"!! no extracted blob at {blobp} (run gs_texture.py --extract first)")
|
||||
print("\n".join(R)); return 1
|
||||
blob = open(blobp,"rb").read()
|
||||
src_hash = hashlib.sha256(blob).hexdigest()
|
||||
out = downsample_psmct32(blob)
|
||||
out_bytes = struct.pack(f"<{len(out)}I", *out)
|
||||
out_hash = hashlib.sha256(out_bytes).hexdigest()
|
||||
R.append("")
|
||||
R.append(f"downsample: {SRC}x{SRC} PSMCT32 -> {DST}x{DST} PSMCT32 (box filter /{FACTOR}; UV scale /{FACTOR})")
|
||||
R.append(f" src sha256[0..15]={src_hash[:16]} dst sha256[0..15]={out_hash[:16]} dst_bytes={len(out_bytes)} (= {DST}*{DST}*4)")
|
||||
|
||||
os.makedirs(outdir, exist_ok=True)
|
||||
# boot IMAGE upload packs 4 ABGR words / 128-bit qword -> $readmemh as 64-bit? bake.py packs the
|
||||
# IMAGE payload as 64-bit words (lane pairs). Emit one 32-bit ABGR texel per line (the bake helper
|
||||
# will pack); also emit a raw .bin for reuse. LOCAL only.
|
||||
memp = os.path.join(outdir, "cube_tex_64.mem")
|
||||
with open(memp,"w") as f:
|
||||
for w in out: f.write(f"{w & 0xFFFFFFFF:08x}\n")
|
||||
binp = os.path.join(outdir, "cube_tex_64.bin")
|
||||
open(binp,"wb").write(out_bytes)
|
||||
R.append(f" wrote LOCAL fixture: {os.path.basename(memp)} ({len(out)} texels) + {os.path.basename(binp)}")
|
||||
|
||||
report="\n".join(R)+"\n"
|
||||
print(report)
|
||||
if opt("--report"): open(opt("--report"),"w").write(report); print(f"[wrote report -> {opt('--report')}]")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — SPRITE census (Ch344 sprite-ingestion scoping).
|
||||
|
||||
Consumes gs_parse.parse_dump()'s event stream and characterises every SPRITE primitive in a .gs dump:
|
||||
prim-type split, per-sprite TEX0 PSM, TME/ABE/FST, the ALPHA blend equation, TEX1 mag/min filter, and an
|
||||
XYOFFSET-corrected size histogram. Output is AGGREGATE (counts/histograms) only — no copyrighted pixel
|
||||
content — so the report is committable per captures/gs/.gitignore policy. This is census/scoping ONLY;
|
||||
it renders nothing and asserts nothing.
|
||||
|
||||
Usage: gs_census_sprites.py <dump.gs[.xz|.zst]> [--report out.txt]
|
||||
"""
|
||||
import sys, os
|
||||
from collections import Counter
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from gs_parse import parse_dump
|
||||
|
||||
PSM = {0x00:"PSMCT32",0x01:"PSMCT24",0x02:"PSMCT16",0x0A:"PSMCT16S",0x13:"PSMT8",0x14:"PSMT4",
|
||||
0x1B:"PSMT8H",0x24:"PSMT4HL",0x2C:"PSMT4HH"}
|
||||
PRIMT = {0:"POINT",1:"LINE",2:"LINE_STRIP",3:"TRI",4:"TRISTRIP",5:"TRIFAN",6:"SPRITE",7:"INVALID"}
|
||||
ABCD = {0:"Cs",1:"Cd",2:"0"}; C_SEL = {0:"As",1:"Ad",2:"FIX"}
|
||||
FILT = {0:"NEAREST",1:"LINEAR",2:"N_MIPN",3:"L_MIPN",4:"N_MIPL",5:"L_MIPL"}
|
||||
|
||||
def dec_tex0(v):
|
||||
return (v>>20)&0x3F, 1<<((v>>26)&0xF), 1<<((v>>30)&0xF), v&0x3FFF, (v>>35)&3, (v>>34)&1
|
||||
def dec_alpha(v):
|
||||
a=v&3; b=(v>>2)&3; c=(v>>4)&3; d=(v>>6)&3; fix=(v>>32)&0xFF
|
||||
return f"({ABCD[a]}-{ABCD[b]})*{C_SEL[c]}>>7+{ABCD[d]}" + (f" FIX={fix}" if c==2 else "")
|
||||
|
||||
def census(path):
|
||||
h, events = parse_dump(path)
|
||||
cur = {"type":None,"tme":0,"abe":0,"fst":0}
|
||||
tex0 = {1:None, 2:None}; xyoff = {1:(0,0), 2:(0,0)}; tex1 = {1:None, 2:None}
|
||||
alpha = {1:None, 2:None}; ctxt = 0
|
||||
kicks = {}; xyz = []
|
||||
spr = {"n":0, "psm":Counter(), "abe":Counter(), "fst":Counter(), "tme":Counter(),
|
||||
"alpha":Counter(), "magfilt":Counter(), "sizes":Counter()}
|
||||
for e in events:
|
||||
if e.kind != "GSREG": continue
|
||||
a, v = e.addr, e.value
|
||||
if a == 0x00: # PRIM
|
||||
cur = {"type":v&7,"tme":(v>>4)&1,"abe":(v>>6)&1,"fst":(v>>8)&1}; ctxt=(v>>9)&1; xyz=[]
|
||||
elif a in (0x06,0x07): tex0[1 if a==0x06 else 2] = dec_tex0(v)
|
||||
elif a in (0x14,0x15): tex1[1 if a==0x14 else 2] = (v>>5)&7 # MMAG bit5 -> mag filter (1bit here: 0/1)
|
||||
elif a in (0x18,0x19): xyoff[1 if a==0x18 else 2] = (v&0xFFFF, (v>>32)&0xFFFF)
|
||||
elif a in (0x42,0x43): alpha[1 if a==0x42 else 2] = v
|
||||
elif a == 0x05: # XYZ2 vertex kick
|
||||
t = cur["type"]
|
||||
if t is None: continue
|
||||
kicks[t] = kicks.get(t,0)+1
|
||||
if t == 6:
|
||||
xyz.append(v)
|
||||
if len(xyz) == 2:
|
||||
cx = 1 if ctxt==0 else 2
|
||||
ox, oy = xyoff[cx]
|
||||
x0=(xyz[0]&0xFFFF)-ox; y0=((xyz[0]>>16)&0xFFFF)-oy
|
||||
x1=(xyz[1]&0xFFFF)-ox; y1=((xyz[1]>>16)&0xFFFF)-oy
|
||||
w=abs(x1-x0)//16; ht=abs(y1-y0)//16 # 12.4 fixed -> pixels
|
||||
spr["n"]+=1; spr["sizes"][(w,ht)]+=1
|
||||
spr["abe"][cur["abe"]]+=1; spr["fst"][cur["fst"]]+=1; spr["tme"][cur["tme"]]+=1
|
||||
tx = tex0[cx]
|
||||
spr["psm"][("(untextured)" if not cur["tme"] else
|
||||
"(no TEX0)" if tx is None else PSM.get(tx[0],f"0x{tx[0]:02x}"))] += 1
|
||||
if cur["abe"]:
|
||||
al = alpha[cx]
|
||||
spr["alpha"][dec_alpha(al) if al is not None else "(no ALPHA set)"] += 1
|
||||
if cur["tme"]:
|
||||
f1 = tex1[cx]
|
||||
spr["magfilt"][("NEAREST" if f1==0 else "LINEAR" if f1==1 else "(unset)") ] += 1
|
||||
xyz=[]
|
||||
return h, kicks, spr
|
||||
|
||||
def fmt(h, kicks, spr):
|
||||
L = []
|
||||
L.append(f"SPRITE CENSUS — serial={h.serial!r} crc=0x{h.crc:08x}")
|
||||
L.append(f"prim-kicks by type: {dict((PRIMT[k],v) for k,v in sorted(kicks.items(),key=lambda x:-x[1]))}")
|
||||
L.append(f"SPRITES: {spr['n']} rectangles")
|
||||
L.append(f" TEX0 PSM : {dict(spr['psm'].most_common())}")
|
||||
L.append(f" TME(textured) : {dict(spr['tme'])} FST(0=STQ,1=UV): {dict(spr['fst'])}")
|
||||
L.append(f" ABE(alpha) : {dict(spr['abe'])}")
|
||||
L.append(f" ALPHA eqn : {dict(spr['alpha'].most_common())}")
|
||||
L.append(f" mag filter : {dict(spr['magfilt'].most_common())}")
|
||||
L.append(f" top sizes WxH : {[(f'{w}x{ht}',n) for (w,ht),n in spr['sizes'].most_common(8)]}")
|
||||
return "\n".join(L)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print(__doc__); sys.exit(2)
|
||||
h, kicks, spr = census(sys.argv[1])
|
||||
out = fmt(h, kicks, spr)
|
||||
print(out)
|
||||
if "--report" in sys.argv:
|
||||
p = sys.argv[sys.argv.index("--report")+1]
|
||||
with open(p, "w") as f: f.write(out + "\n")
|
||||
print(f"\nwrote report -> {p}")
|
||||
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch351 step 1: isolate reciprocal quantization from S1 under-interpolation banding.
|
||||
|
||||
Compares the RTL FB dump (sh3_real_fb_out.mem, written by tb_top_psmct32_sh3_real_draw_demo) against TWO host
|
||||
references over the identical crop geometry:
|
||||
FLOAT — ideal perspective (the Codex pixel-diff oracle).
|
||||
RECIP-8b — RTL-FAITHFUL: fixed-point vertex attrs + the 8-bit gs_reciprocal_stub (gs_make_sh3_real_draw_fixture).
|
||||
Verdict:
|
||||
- RTL ≈ RECIP-8b (tight) AND RECIP-8b ≠ FLOAT (loose) -> the residual is RECIPROCAL QUANTIZATION. Widening
|
||||
gs_reciprocal_stub IDX_BITS should fix it (Ch351 step 2).
|
||||
- RTL ≠ RECIP-8b (still loose) -> S1 attribute UNDER-INTERPOLATION beyond the reciprocal.
|
||||
|
||||
Usage: gs_ch351_oracle.py (run gs_make_sh3_real_draw_fixture.py + the TB first to produce the .mem files)
|
||||
"""
|
||||
import sys, os
|
||||
ROOT=os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),".."))
|
||||
DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo")
|
||||
FBPXW, CH, TW, TH = 256, 120, 512, 512
|
||||
|
||||
def loadmem(name):
|
||||
return [int(l,16) for l in open(os.path.join(DATA,name)) if l.strip() and not l.strip().startswith("//")]
|
||||
|
||||
def main():
|
||||
fb = loadmem("sh3_real_fb_out.mem")
|
||||
rmF = loadmem("sh3_real_refmap.mem")
|
||||
rmR = loadmem("sh3_real_refmap_recip.mem")
|
||||
idxw = loadmem("sh3_real_idx.mem")
|
||||
pal = loadmem("sh3_real_pal.mem")
|
||||
def sh3_idx(u,v):
|
||||
lin=v*TW+u; return (idxw[lin>>2] >> (8*(lin&3))) & 0xFF
|
||||
def exp_cell(u,v):
|
||||
return pal[sh3_idx(u,v)] & 0xFFFFFF
|
||||
palset = set(p & 0xFFFFFF for p in pal)
|
||||
|
||||
def histo(refmap, label):
|
||||
hist=[0]*5; ihist=[0]*5; tot=0; itot=0; exact_color=0; clut_bad=0
|
||||
sdu=sdv=0.0; nd=0
|
||||
for o in range(FBPXW*CH):
|
||||
rm=refmap[o]
|
||||
if not (rm>>31): continue
|
||||
interior=(rm>>30)&1; tu=(rm>>9)&0x1FF; tv=rm&0x1FF
|
||||
col = fb[o] & 0xFFFFFF
|
||||
tot+=1
|
||||
if col not in palset: clut_bad+=1
|
||||
if col == exp_cell(tu,tv): exact_color+=1
|
||||
D=99; mdu=mdv=0
|
||||
for rad in range(0,4):
|
||||
for du in range(-rad,rad+1):
|
||||
for dv in range(-rad,rad+1):
|
||||
if max(abs(du),abs(dv))!=rad or D!=99: continue
|
||||
ttu=tu+du; ttv=tv+dv
|
||||
if 0<=ttu<TW and 0<=ttv<TH and col==exp_cell(ttu,ttv):
|
||||
D=rad; mdu=du; mdv=dv
|
||||
hist[D if D<=3 else 4]+=1
|
||||
if D<=3: sdu+=mdu; sdv+=mdv; nd+=1
|
||||
if interior: itot+=1; ihist[D if D<=3 else 4]+=1
|
||||
print(f"== {label} == covered={tot} interior={itot}")
|
||||
print(f" ALL hist D0={hist[0]} D1={hist[1]} D2={hist[2]} D3={hist[3]} none={hist[4]} "
|
||||
f"<=1tex={100.0*(hist[0]+hist[1])/max(1,tot):.1f}%")
|
||||
print(f" INT hist D0={ihist[0]} D1={ihist[1]} D2={ihist[2]} D3={ihist[3]} none={ihist[4]} "
|
||||
f"<=1tex={100.0*(ihist[0]+ihist[1])/max(1,itot):.1f}%")
|
||||
print(f" exact-color-match={100.0*exact_color/max(1,tot):.1f}% clut_bad={clut_bad} "
|
||||
f"mean-delta=({sdu/max(1,nd):.3f},{sdv/max(1,nd):.3f})")
|
||||
return ihist
|
||||
|
||||
rmA = loadmem("sh3_real_refmap_affine.mem")
|
||||
print("RTL FB dump vs the references (same geometry):\n")
|
||||
histo(rmF, "RTL-vs-FLOAT (ideal perspective)")
|
||||
print()
|
||||
ih = histo(rmR, "RTL-vs-RECIP8b (perspective + 8-bit reciprocal)")
|
||||
print()
|
||||
iha = histo(rmA, "RTL-vs-AFFINE (per-vertex divide + linear u,v)")
|
||||
print()
|
||||
# also: how different are the two references themselves (FLOAT vs RECIP8b) — the reciprocal's own error
|
||||
diff=sum(1 for o in range(FBPXW*CH) if (rmF[o]>>31) and (rmF[o]&0x3FFFF)!=(rmR[o]&0x3FFFF))
|
||||
covered=sum(1 for o in range(FBPXW*CH) if rmF[o]>>31)
|
||||
print(f"FLOAT vs RECIP8b reference texel disagreement: {diff}/{covered} ({100.0*diff/max(1,covered):.1f}%) "
|
||||
f"— the reciprocal LUT's intrinsic error on this geometry")
|
||||
print()
|
||||
rtl_tight = (ih[2]+ih[3]+ih[4]) < 0.05*max(1,sum(ih))
|
||||
if rtl_tight:
|
||||
print("VERDICT: RTL matches the RTL-faithful 8-bit-reciprocal reference -> residual is RECIPROCAL")
|
||||
print(" QUANTIZATION. Ch351 step 2: widen gs_reciprocal_stub IDX_BITS (10/11/12).")
|
||||
else:
|
||||
print("VERDICT: RTL does NOT match even the 8-bit-reciprocal reference -> S1 attribute UNDER-")
|
||||
print(" INTERPOLATION banding beyond the reciprocal. Ch351: fix S1 interpolation.")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch347 authentic asset extractor: real Silent Hill 3 PSMT8 texture + real CLUT.
|
||||
|
||||
Codex target A: "authentic SH3 palettized texture + real palette rendered through chosen sprite geometry"
|
||||
— NOT a faithful SH3 draw. We pick a CLEAN candidate whose texture is a NATIVE PSMT8 upload (linear payload,
|
||||
no GS-memory model needed) and whose CLUT is a clean 256-entry PSMCT32 upload, both validated by rendering to
|
||||
a coherent SH3 surface (see captures/gs/silenthill3/extracted/*.png).
|
||||
|
||||
Default candidate (dump 224139): PSMT8 tex tbp=13824 (128x128, native upload, 99 distinct indices) + CLUT
|
||||
cbp=14282 (16x16=256 PSMCT32, read LINEARLY — csm=0 but linear order renders coherent; CSM1 bit-swap scrambles).
|
||||
|
||||
Primary render = DECAL (opaque): the authentic CLUT RGB is kept BYTE-FOR-BYTE; alpha (SH3's real ~0x04) is
|
||||
IGNORED by the render mode, never rewritten (Codex guardrail). Emits the index texture, the CLUT, and a
|
||||
software DECAL reference FB for the pre-fit pixel-diff.
|
||||
|
||||
Outputs (LOCAL, dump-derived -> gitignored) into sim/data/top_psmct32_raster_demo/:
|
||||
sh3_tex_idx.mem 128x128 PSMT8 indices, 4 per 32-bit word (low->high byte), $readmemh
|
||||
sh3_clut.mem 256 PSMCT32 ABGR CLUT entries (one per line) — authentic RGB, authentic alpha
|
||||
sh3_ref.mem software DECAL reference FB 128x128 PSMCT32 (CLUT[idx] RGB, opaque) for pixel-diff
|
||||
sh3_authentic.png visual confirmation (DECAL)
|
||||
"""
|
||||
import sys, os, glob, struct
|
||||
HERE=os.path.dirname(os.path.abspath(__file__)); ROOT=os.path.normpath(os.path.join(HERE,".."))
|
||||
sys.path.insert(0, os.path.join(ROOT,"tools")); import gs_texture_residency as R
|
||||
DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo")
|
||||
EXTR=os.path.join(ROOT,"captures","gs","silenthill3","extracted")
|
||||
|
||||
def main(argv):
|
||||
dump = argv[1] if len(argv)>1 else None
|
||||
if not dump:
|
||||
cands = glob.glob(os.path.join(ROOT,"captures","gs","silenthill3","*224139*.gs.zst"))
|
||||
if not cands: sys.exit("no SH3 dump found; pass the .gs.zst path")
|
||||
dump = cands[0]
|
||||
TBP = int(argv[2]) if len(argv)>2 else 13824 # native PSMT8 texture base
|
||||
CBP = int(argv[3]) if len(argv)>3 else 14282 # CLUT base
|
||||
W = H = 128
|
||||
|
||||
d,h,events,uploads,runs,vram = R.collect(dump,0)
|
||||
# texture: latest NATIVE PSMT8 upload to TBP (linear row-major payload)
|
||||
tu = [u for u in uploads if u["dbp"]==TBP and u["dpsm"]==0x13]
|
||||
if not tu: sys.exit(f"no native PSMT8 (dpsm=0x13) upload to tbp={TBP} — not a clean candidate")
|
||||
tup = max(tu, key=lambda u:u["idx"]); tex = d[tup["blob_range"][0]:tup["blob_range"][1]]
|
||||
if len(tex) < W*H: sys.exit(f"texture payload {len(tex)}B < {W*H}")
|
||||
idx = [tex[y*W+x] for y in range(H) for x in range(W)]
|
||||
# CLUT: latest PSMCT32 256-entry upload to CBP, read LINEARLY
|
||||
cu = [u for u in uploads if u["dbp"]==CBP and u["dpsm"]==0x00]
|
||||
if not cu: sys.exit(f"no PSMCT32 CLUT upload to cbp={CBP}")
|
||||
cup = max(cu, key=lambda u:u["idx"]); cb = d[cup["blob_range"][0]:cup["blob_range"][1]]
|
||||
pal = [int.from_bytes(cb[i*4:i*4+4],"little") for i in range(256)]
|
||||
|
||||
# software DECAL reference = EXACTLY what the hardware DECAL emit stores: the texel = CLUT[idx], byte-for-
|
||||
# byte authentic (RGB AND the real ~0x04 alpha — nothing rewritten). The pre-fit TB pixel-diffs RGB (the
|
||||
# authentic-art claim); the A byte is preserved here but not the focus. The PNG forces opaque for display.
|
||||
ref = [pal[i] for i in idx]
|
||||
|
||||
os.makedirs(DATA, exist_ok=True); os.makedirs(EXTR, exist_ok=True)
|
||||
# PSMT8 indices packed 4/word (byte0=lowest x), $readmemh as 32-bit
|
||||
with open(os.path.join(DATA,"sh3_tex_idx.mem"),"w") as f:
|
||||
f.write(f"// Ch347 LOCAL authentic SH3 PSMT8 indices {W}x{H} (native upload tbp={TBP}). gitignored.\n")
|
||||
for w in range(0, W*H, 4):
|
||||
word = idx[w] | (idx[w+1]<<8) | (idx[w+2]<<16) | (idx[w+3]<<24)
|
||||
f.write(f"{word:08x}\n")
|
||||
with open(os.path.join(DATA,"sh3_clut.mem"),"w") as f:
|
||||
f.write(f"// Ch347 LOCAL authentic SH3 CLUT 256xPSMCT32 (cbp={CBP}, linear). RGB+alpha authentic. gitignored.\n")
|
||||
for p in pal: f.write(f"{p & 0xFFFFFFFF:08x}\n")
|
||||
with open(os.path.join(DATA,"sh3_ref.mem"),"w") as f:
|
||||
f.write(f"// Ch347 LOCAL SW DECAL reference FB {W}x{H} (CLUT[idx] RGB, opaque display). gitignored.\n")
|
||||
for p in ref: f.write(f"{p & 0xFFFFFFFF:08x}\n")
|
||||
try:
|
||||
from PIL import Image
|
||||
img=Image.new('RGBA',(W,H)); img.putdata([(p&0xFF,(p>>8)&0xFF,(p>>16)&0xFF,0xFF) for p in ref])
|
||||
img.save(os.path.join(EXTR,"sh3_authentic.png"))
|
||||
except Exception as e:
|
||||
print("(PIL unavailable, skipped PNG:", e, ")")
|
||||
|
||||
# --- Ch347 (ii): a DETERMINISTIC 64x64 authentic CROP (Codex first-silicon target). ---
|
||||
# The crop is a DIRECT SUBSET of the extracted indices (no resample/transform); the CLUT is unchanged.
|
||||
# Deterministic rule: the 64x64 window (stride 8) with the MOST distinct indices, tie-break smallest (cy,cx)
|
||||
# — guarantees real content + a reproducible, reported origin.
|
||||
CW = CH = 64
|
||||
best = (-1, 0, 0)
|
||||
for cy in range(0, H-CH+1, 8):
|
||||
for cx in range(0, W-CW+1, 8):
|
||||
s = set(idx[(cy+ly)*W + (cx+lx)] for ly in range(CH) for lx in range(CW))
|
||||
if len(s) > best[0]: best = (len(s), cx, cy)
|
||||
_, CX, CY = best
|
||||
cidx = [idx[(CY+ly)*W + (CX+lx)] for ly in range(CH) for lx in range(CW)]
|
||||
cref = [pal[i] for i in cidx]
|
||||
with open(os.path.join(DATA,"sh3_tex_idx64.mem"),"w") as f:
|
||||
f.write(f"// Ch347 LOCAL authentic SH3 PSMT8 64x64 CROP @({CX},{CY}) of tbp={TBP} 128x128 (direct subset). gitignored.\n")
|
||||
for w in range(0, CW*CH, 4):
|
||||
f.write(f"{cidx[w] | (cidx[w+1]<<8) | (cidx[w+2]<<16) | (cidx[w+3]<<24):08x}\n")
|
||||
with open(os.path.join(DATA,"sh3_ref64.mem"),"w") as f:
|
||||
f.write(f"// Ch347 LOCAL SW DECAL reference 64x64 CROP @({CX},{CY}) (CLUT[idx] RGB). gitignored.\n")
|
||||
for p in cref: f.write(f"{p & 0xFFFFFFFF:08x}\n")
|
||||
try:
|
||||
from PIL import Image
|
||||
ci=Image.new('RGBA',(CW,CH)); ci.putdata([(p&0xFF,(p>>8)&0xFF,(p>>16)&0xFF,0xFF) for p in cref])
|
||||
ci.save(os.path.join(EXTR,"sh3_authentic64.png"))
|
||||
except Exception: pass
|
||||
|
||||
print(f"[Ch347] SH3 authentic asset: tex tbp={TBP} {W}x{H} ({len(set(idx))} distinct indices), CLUT cbp={CBP} "
|
||||
f"({len(set(pal))} colors). DECAL reference emitted.")
|
||||
print(f"[Ch347] 64x64 CROP @({CX},{CY}) [deterministic max-distinct window]: {len(set(cidx))} distinct indices, "
|
||||
f"{len(set(cref))} colors -> sh3_tex_idx64.mem, sh3_ref64.mem")
|
||||
print(f"[Ch347] -> {DATA}/sh3_tex_idx*.mem, sh3_clut.mem, sh3_ref*.mem (+ {EXTR}/sh3_authentic*.png) — all LOCAL")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
||||
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch344 authentic SPRITE extractor (brick 1).
|
||||
|
||||
Selects the EARLIEST contiguous run of v1-eligible sprites from a .gs dump and emits a structured sprite
|
||||
list + aggregate report. v1 eligibility (per the Ch344 census-gated scope) — FAIL CLOSED on anything else:
|
||||
* SPRITE primitive (PRIM type 6), TME=1 (textured), FST=1 (UV/affine)
|
||||
* TEX0 PSM = PSMCT32 (no CLUT / no PSMT8/PSMT4 / no PSMCT16)
|
||||
* ABE=1 (alpha) — blend equation is DECLARED source-over (ALPHA lives in the GS freeze state, absent
|
||||
from the packet stream; we emit ALPHA ourselves to the feeder)
|
||||
* small: width,height <= MAX_SPRITE_PX (excludes the fullscreen/scissored/guard-band blits)
|
||||
* single shared TEX0 TBP across the run (a TBP change / re-upload ends the run — never silently mixed)
|
||||
|
||||
Output is the sprite geometry/UV/color (dump-derived -> the .sprites file is LOCAL) plus an AGGREGATE
|
||||
report (committable). This is extraction ONLY; rendering/translation to the feeder is brick 2+.
|
||||
|
||||
Usage: gs_extract_sprites.py <dump.gs[.xz|.zst]> [--max N] [--out scene.sprites] [--report r.txt]
|
||||
"""
|
||||
import sys, os
|
||||
from collections import Counter
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from gs_parse import parse_dump
|
||||
|
||||
PSMCT32 = 0x00
|
||||
MAX_SPRITE_PX = 64
|
||||
DEF_MAX = 32
|
||||
|
||||
def dec_tex0(v):
|
||||
return dict(tbp=v&0x3FFF, tbw=(v>>14)&0x3F, psm=(v>>20)&0x3F,
|
||||
tw=1<<((v>>26)&0xF), th=1<<((v>>30)&0xF), tcc=(v>>34)&1, tfx=(v>>35)&3,
|
||||
cpsm=(v>>51)&0xF, csm=(v>>55)&1)
|
||||
|
||||
def extract(events, max_spr):
|
||||
cur = {"type":None,"tme":0,"abe":0,"fst":0}
|
||||
tex0 = {1:None, 2:None}; xyoff = {1:(0,0), 2:(0,0)}; ctxt = 0
|
||||
uvbuf = []; xyzbuf = []
|
||||
seg = []; seg_tbp = None; started = False; stop = None; first = last = -1
|
||||
rejected = Counter()
|
||||
|
||||
def eligible(t0):
|
||||
return t0 is not None and cur["tme"] and cur["fst"] and t0["psm"]==PSMCT32 and cur["abe"] \
|
||||
and t0["csm"]==0 and t0["cpsm"]==0 # csm/cpsm: no CLUT in play
|
||||
|
||||
for e in events:
|
||||
if e.kind != "GSREG": continue
|
||||
a, v = e.addr, e.value
|
||||
if a == 0x00:
|
||||
cur = {"type":v&7,"tme":(v>>4)&1,"abe":(v>>6)&1,"fst":(v>>8)&1}; ctxt=(v>>9)&1
|
||||
uvbuf=[]; xyzbuf=[]
|
||||
elif a in (0x06,0x07): tex0[1 if a==0x06 else 2] = dec_tex0(v)
|
||||
elif a in (0x18,0x19): xyoff[1 if a==0x18 else 2] = (v&0xFFFF,(v>>32)&0xFFFF)
|
||||
elif a == 0x03: # UV
|
||||
uvbuf.append((v & 0x3FFF, (v>>14) & 0x3FFF)) # U,V in 10.4 fixed (14b)
|
||||
elif a == 0x05: # XYZ2 vertex kick
|
||||
if cur["type"] != 6: continue
|
||||
xyzbuf.append(v)
|
||||
if len(xyzbuf) < 2: continue
|
||||
cx = 1 if ctxt==0 else 2; t0 = tex0[cx]; ox, oy = xyoff[cx]
|
||||
x0=((xyzbuf[0]&0xFFFF)-ox)//16; y0=(((xyzbuf[0]>>16)&0xFFFF)-oy)//16
|
||||
x1=((xyzbuf[1]&0xFFFF)-ox)//16; y1=(((xyzbuf[1]>>16)&0xFFFF)-oy)//16
|
||||
w=abs(x1-x0); h=abs(y1-y0)
|
||||
uv = uvbuf[-2:] if len(uvbuf)>=2 else [(0,0),(0,0)]
|
||||
xyzbuf=[]; uvbuf=[]
|
||||
ok = eligible(t0) and w<=MAX_SPRITE_PX and h<=MAX_SPRITE_PX and w>0 and h>0
|
||||
if not ok:
|
||||
if started: stop = "run ended: next sprite not v1-eligible"; break
|
||||
if t0 is None: rejected["no_tex0"]+=1
|
||||
elif not cur["tme"]: rejected["untextured"]+=1
|
||||
elif t0["psm"]!=PSMCT32: rejected[f"psm_0x{t0['psm']:02x}"]+=1
|
||||
elif not cur["fst"]: rejected["fst0_stq"]+=1
|
||||
elif not cur["abe"]: rejected["no_abe"]+=1
|
||||
elif t0["csm"] or t0["cpsm"]: rejected["clut"]+=1
|
||||
elif w>MAX_SPRITE_PX or h>MAX_SPRITE_PX: rejected["too_big"]+=1
|
||||
else: rejected["other"]+=1
|
||||
continue
|
||||
if not started:
|
||||
seg_tbp = t0["tbp"]; seg_t0 = t0; started=True; first=e.idx
|
||||
if t0["tbp"] != seg_tbp:
|
||||
stop = "run ended: TEX0 TBP changed (re-upload)"; break
|
||||
seg.append(dict(x0=min(x0,x1), y0=min(y0,y1), x1=max(x0,x1), y1=max(y0,y1), w=w, h=h,
|
||||
u0=uv[0][0]/16.0, v0=uv[0][1]/16.0, u1=uv[1][0]/16.0, v1=uv[1][1]/16.0))
|
||||
last=e.idx
|
||||
if len(seg) >= max_spr: stop="hit --max"; break
|
||||
meta = dict(tbp=seg_tbp, tex0=(seg_t0 if started else None), first=first, last=last,
|
||||
stop=stop, rejected=dict(rejected))
|
||||
return seg, meta
|
||||
|
||||
def main(argv):
|
||||
if len(argv) < 2: print(__doc__); return 2
|
||||
path = argv[1]
|
||||
max_spr = int(argv[argv.index("--max")+1]) if "--max" in argv else DEF_MAX
|
||||
h, events = parse_dump(path)
|
||||
seg, meta = extract(events, max_spr)
|
||||
R = [f"SPRITE EXTRACT — serial={h.serial!r} crc=0x{h.crc:08x} (v1: PSMCT32/UV/ABE, <= {MAX_SPRITE_PX}px)"]
|
||||
if not seg:
|
||||
R.append(f"NO v1-eligible sprite run selected. rejections: {meta['rejected']}")
|
||||
else:
|
||||
t0 = meta["tex0"]
|
||||
xs=[s['x0'] for s in seg]+[s['x1'] for s in seg]; ys=[s['y0'] for s in seg]+[s['y1'] for s in seg]
|
||||
sizes=Counter((s['w'],s['h']) for s in seg)
|
||||
R.append(f"selected {len(seg)} sprites, events #{meta['first']}..#{meta['last']}, stop: {meta['stop']}")
|
||||
R.append(f" TEX0: TBP={t0['tbp']} TBW={t0['tbw']} {t0['tw']}x{t0['th']} PSMCT32 TFX={t0['tfx']} TCC={t0['tcc']}")
|
||||
R.append(f" screen bbox: x[{min(xs)}..{max(xs)}] y[{min(ys)}..{max(ys)}]")
|
||||
R.append(f" sizes WxH: {[(f'{w}x{hh}',n) for (w,hh),n in sizes.most_common(6)]}")
|
||||
u=[s['u0'] for s in seg]+[s['u1'] for s in seg]; vv=[s['v0'] for s in seg]+[s['v1'] for s in seg]
|
||||
R.append(f" UV range: u[{min(u):.1f}..{max(u):.1f}] v[{min(vv):.1f}..{max(vv):.1f}] (texel coords)")
|
||||
rep = "\n".join(R)
|
||||
print(rep)
|
||||
if "--report" in argv:
|
||||
open(argv[argv.index("--report")+1], "w").write(rep+"\n")
|
||||
if "--out" in argv and seg:
|
||||
op = argv[argv.index("--out")+1]
|
||||
with open(op,"w") as f:
|
||||
t0=meta["tex0"]
|
||||
f.write(f"# Ch344 LOCAL authentic sprite run ({len(seg)}) — dump-derived. TBP={t0['tbp']} {t0['tw']}x{t0['th']} PSMCT32\n")
|
||||
f.write(f"tex0 {t0['tbp']} {t0['tbw']} {t0['tw']} {t0['th']} {t0['tfx']}\n")
|
||||
for s in seg:
|
||||
f.write(f"sprite {s['x0']} {s['y0']} {s['x1']} {s['y1']} {s['u0']:.1f} {s['v0']:.1f} {s['u1']:.1f} {s['v1']:.1f}\n")
|
||||
print(f"\nwrote sprite list -> {op}")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch345b authentic 32-glyph segment re-pack (content normalization).
|
||||
|
||||
The cubes-dump glyph sprites MINIFY a 256x256 PSMCT32 atlas (256KB >> the <=64KiB BRAM VRAM) and 31/32 wrap
|
||||
past v=255. The dump's actual texture wrap is in the GS freeze state (absent from the packet stream), so we
|
||||
DECLARE REPEAT (kept visible in the report). For each glyph we compute, per SCREEN pixel, the exact texel the
|
||||
hardware samples (gs_stub `dda_uv`: coord = (u0 + ((du_dx*(x-x0))>>>16)) & 0x7FF, du_dx=((u1-u0)<<16)/(x1-x0),
|
||||
then REPEAT mask & 0xFF for the 256-wide atlas) and BAKE that texel into a dense glyph-sized sub-texture.
|
||||
Packing those dense glyphs + a 1:1 UV remap reproduces the EXACT rendered pixels (nearest of the original
|
||||
minified sampling == 1:1 of the baked dense glyph, by construction) while fitting VRAM.
|
||||
|
||||
Outputs into sim/data/top_psmct32_raster_demo/ (LOCAL, dump-derived -> gitignored):
|
||||
glyph_atlas.mem packed dense-glyph atlas (PSMCT32, $readmemh into VRAM at GLYPH_TBP)
|
||||
glyph_sprites.mem feeder SPRITE staging (sprite_mode word0[33]): grid screen layout + 1:1 packed UV
|
||||
glyph_ref.mem SOFTWARE REFERENCE FB: original-atlas + declared-REPEAT + nearest render over the BG,
|
||||
for the pre-fit TB pixel-diff. No authentic claim until that diff passes.
|
||||
DECLARED: REPEAT wrap, nearest sampling, white(0x80) MODULATE tint (the dump's per-vertex RGBA is a freeze-
|
||||
state value; identity tint shows the raw glyph texels)."""
|
||||
|
||||
import sys, os, struct
|
||||
|
||||
HERE = os.path.dirname(os.path.abspath(__file__)); ROOT = os.path.normpath(os.path.join(HERE, ".."))
|
||||
DATA = os.path.join(ROOT, "sim", "data", "top_psmct32_raster_demo")
|
||||
# Ch346: source the texture RESIDENT at the glyph-sprite draws (epoch 2, the real font),
|
||||
# NOT the first upload (epoch 1 = the cube checker the demo ping-pongs into the same TBP).
|
||||
# tex0_font.bin is extracted by the residency preflight; fall back to the old blob only if absent.
|
||||
_FONT = os.path.join(ROOT, "captures", "gs", "cubes", "extracted", "tex0_font.bin")
|
||||
ATLAS = _FONT if os.path.exists(_FONT) else os.path.join(ROOT, "captures", "gs", "cubes", "extracted", "tex0_blob.bin")
|
||||
SPRITES = os.path.join(ROOT, "captures", "gs", "cubes", "extracted", "cubes.sprites")
|
||||
ATLAS_W = ATLAS_H = 256
|
||||
FB_W = FB_H = 128 # 128x128 fits the 32-glyph grid at native (1:1) size
|
||||
FBW = 2 # FB width in 64-px pages (128-wide FB)
|
||||
GLYPH_TBP = 256 # packed atlas VRAM word base = 256*64 = 16384 (right after the 128x128 FB = 16384 words)
|
||||
BG = 0xFF0000C0 # opaque blue background (PSMCT32 {A,B,G,R} = FF,00,00,C0)
|
||||
GAP = 2
|
||||
|
||||
def tdiv(a, b): # truncate-toward-zero (matches SystemVerilog signed `/`)
|
||||
if b == 0: return 0
|
||||
q = abs(a) // abs(b)
|
||||
return q if (a < 0) == (b < 0) else -q
|
||||
|
||||
def htexel(p, p0, c0, e0, e1, span): # gs_stub dda_uv (one axis) -> REPEAT-masked atlas index
|
||||
step = tdiv((e1 - e0) << 16, span) if span != 0 else 0
|
||||
coord = (c0 + ((step * (p - p0)) >> 16)) & 0x7FF # 11-bit truncation, arithmetic >>16
|
||||
return coord & 0xFF # REPEAT, 256-wide power-of-two mask
|
||||
|
||||
def mod8(t, c): p = (t * c) >> 7; return 0xFF if p > 0xFF else p
|
||||
def srcover(cs, cd, as_): # gs_alpha_blend: ((cs-cd)*min(as,128)>>7)+cd, clamp
|
||||
ae = 128 if as_ > 128 else as_
|
||||
v = (((cs - cd) * ae) >> 7) + cd
|
||||
return 0 if v < 0 else 255 if v > 255 else v
|
||||
|
||||
for p in (ATLAS, SPRITES):
|
||||
if not os.path.exists(p): sys.exit(f"missing local input: {p}")
|
||||
# Ch346 fail-closed gate: refuse to repack a non-glyph-plausible atlas (this is what caught Ch345b — the
|
||||
# original tex0_blob.bin was the cube CHECKER, no alpha mask). The resident font epoch passes (mask-like).
|
||||
from gs_texture_residency import payload_stats, font_like
|
||||
_ok, _why = font_like(payload_stats(open(ATLAS, "rb").read(), PSMCT32 := 0x00))
|
||||
if not _ok:
|
||||
sys.exit(f"[Ch345b] REFUSING: atlas {os.path.basename(ATLAS)} is not glyph-plausible ({_why}).\n"
|
||||
f" Pick the RESIDENT font epoch first: gs_texture_residency.py <dump> finds it; the wrong\n"
|
||||
f" epoch (cube checker) has no alpha mask. See project_ch345b_content_finding memory.")
|
||||
atlas = list(struct.unpack(f"<{ATLAS_W*ATLAS_H}I", open(ATLAS, "rb").read()))
|
||||
|
||||
glyphs = []
|
||||
for ln in open(SPRITES):
|
||||
t = ln.split()
|
||||
if t and t[0] == "sprite":
|
||||
x0,y0,x1,y1,u0,v0,u1,v1 = (int(round(float(v))) for v in t[1:9])
|
||||
glyphs.append(dict(x0=x0,y0=y0,x1=x1,y1=y1,u0=u0,v0=v0,u1=u1,v1=v1))
|
||||
|
||||
# --- bake each glyph: dense screen-sized sub-texture of the EXACT sampled texels ---
|
||||
for g in glyphs:
|
||||
w = abs(g["x1"]-g["x0"]); h = abs(g["y1"]-g["y0"]); g["w"], g["h"] = w, h
|
||||
sx0 = min(g["x0"],g["x1"]); sy0 = min(g["y0"],g["y1"])
|
||||
baked = []
|
||||
for ly in range(h):
|
||||
for lx in range(w):
|
||||
ut = htexel(sx0+lx, g["x0"], g["u0"], g["u0"], g["u1"], g["x1"]-g["x0"])
|
||||
vt = htexel(sy0+ly, g["y0"], g["v0"], g["v0"], g["v1"], g["y1"]-g["y0"])
|
||||
baked.append(atlas[vt*ATLAS_W + ut])
|
||||
g["baked"] = baked
|
||||
|
||||
# --- pack dense glyphs into a grid atlas, and lay them out on a compact 64x64 screen ---
|
||||
COLS = 8
|
||||
cellw = max(g["w"] for g in glyphs); cellh = max(g["h"] for g in glyphs)
|
||||
rows = (len(glyphs) + COLS - 1) // COLS
|
||||
PACK_W = COLS * cellw; PACK_H = rows * cellh
|
||||
TBW = (PACK_W + 63) // 64
|
||||
pack = [0] * (PACK_W * PACK_H)
|
||||
for i, g in enumerate(glyphs):
|
||||
pu = (i % COLS) * cellw; pv = (i // COLS) * cellh; g["pu"], g["pv"] = pu, pv
|
||||
# screen grid (1:1 so du_dx = 1<<16); must fit FB_W x FB_H
|
||||
g["sx0"] = GAP + (i % COLS) * (cellw + 1); g["sy0"] = GAP + (i // COLS) * (cellh + 1)
|
||||
g["sx1"] = g["sx0"] + g["w"]; g["sy1"] = g["sy0"] + g["h"]
|
||||
for ly in range(g["h"]):
|
||||
for lx in range(g["w"]):
|
||||
pack[(pv+ly)*PACK_W + (pu+lx)] = g["baked"][ly*g["w"]+lx]
|
||||
|
||||
maxx = max(g["sx1"] for g in glyphs); maxy = max(g["sy1"] for g in glyphs)
|
||||
if maxx > FB_W or maxy > FB_H:
|
||||
sys.exit(f"screen grid {maxx}x{maxy} exceeds {FB_W}x{FB_H} — raise FB or COLS")
|
||||
|
||||
# --- software reference FB: render baked glyphs (identity tint) source-over the BG ---
|
||||
ref = [BG] * (FB_W * FB_H)
|
||||
for g in glyphs:
|
||||
for ly in range(g["h"]):
|
||||
for lx in range(g["w"]):
|
||||
t = g["baked"][ly*g["w"]+lx]; aA = (t >> 24) & 0xFF
|
||||
cs_r, cs_g, cs_b = t & 0xFF, (t>>8)&0xFF, (t>>16)&0xFF # white tint = identity MODULATE
|
||||
cd = BG; cd_r, cd_g, cd_b = cd & 0xFF, (cd>>8)&0xFF, (cd>>16)&0xFF
|
||||
outv = (aA<<24) | (srcover(cs_b,cd_b,aA)<<16) | (srcover(cs_g,cd_g,aA)<<8) | srcover(cs_r,cd_r,aA)
|
||||
ref[(g["sy0"]+ly)*FB_W + (g["sx0"]+lx)] = outv
|
||||
|
||||
# --- feeder SPRITE staging (sprite_mode word0[33]): grid screen + 1:1 packed UV ---
|
||||
def frame_1(fbw): return (fbw & 0x3F) << 16
|
||||
def alpha_srcover(): return 0x44
|
||||
def tex0_pack(tbp, tbw, tw, th, tfx): return tbp | (tbw<<14) | (0<<20) | (tw<<26) | (th<<30)
|
||||
def uvd(u, v): return ((u<<4)&0x3FFF) | (((v<<4)&0x3FFF)<<14)
|
||||
def xyz2(x, y): return ((x&0xFFF)<<4) | ((y&0xFFF)<<20)
|
||||
TW = max(PACK_W-1, 1).bit_length(); TH = max(PACK_H-1, 1).bit_length() # log2 ceil for TEX0
|
||||
stg = []
|
||||
stg.append((len(glyphs) & 0xFFFF) | (1 << 33))
|
||||
stg.append(frame_1(FBW)); stg.append(alpha_srcover()); stg.append(0); stg.append(0)
|
||||
stg.append(tex0_pack(GLYPH_TBP, TBW, TW, TH, 0)); stg.append(6 | (1<<4) | (1<<6)) # SPRITE+TME+ABE
|
||||
for g in glyphs:
|
||||
tint = 0x80808080
|
||||
stg += [tint, uvd(g["pu"], g["pv"]), xyz2(g["sx0"], g["sy0"]),
|
||||
tint, uvd(g["pu"]+g["w"], g["pv"]+g["h"]), xyz2(g["sx1"], g["sy1"])]
|
||||
if len(stg) > 256: sys.exit(f"staging {len(stg)} > 256 words")
|
||||
|
||||
os.makedirs(DATA, exist_ok=True)
|
||||
with open(os.path.join(DATA, "glyph_atlas.mem"), "w") as f:
|
||||
f.write(f"// Ch345b LOCAL packed glyph atlas {PACK_W}x{PACK_H} PSMCT32 (declared REPEAT). gitignored.\n")
|
||||
for p in pack: f.write(f"{p & 0xFFFFFFFF:08x}\n")
|
||||
with open(os.path.join(DATA, "glyph_sprites.mem"), "w") as f:
|
||||
f.write(f"// Ch345b LOCAL feeder SPRITE staging: {len(glyphs)} re-packed authentic glyphs. gitignored.\n")
|
||||
for w in stg: f.write(f"{w & 0xFFFFFFFFFFFFFFFF:016x}\n")
|
||||
for _ in range(256 - len(stg)): f.write(f"{0:016x}\n")
|
||||
with open(os.path.join(DATA, "glyph_ref.mem"), "w") as f:
|
||||
f.write(f"// Ch345b LOCAL SOFTWARE REFERENCE FB {FB_W}x{FB_H} (orig atlas + REPEAT + nearest). gitignored.\n")
|
||||
for p in ref: f.write(f"{p & 0xFFFFFFFF:08x}\n")
|
||||
|
||||
print(f"[Ch345b] glyphs={len(glyphs)} packed atlas={PACK_W}x{PACK_H} ({PACK_W*PACK_H*4}B, TBW={TBW} TW={TW} TH={TH})")
|
||||
print(f"[Ch345b] screen grid {COLS} cols, bbox {maxx}x{maxy} in {FB_W}x{FB_H}; staging {len(stg)} words")
|
||||
print(f"[Ch345b] DECLARED: REPEAT wrap + nearest + white MODULATE. atlas/sprites/ref -> {DATA} (LOCAL)")
|
||||
@@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch349 GS local-memory model (the bridge Codex framed).
|
||||
|
||||
A faithful byte-level model of GS local memory (4 MiB VRAM) so a draw's SAMPLED texture can be reconstructed
|
||||
as the real hardware sees it — even when the asset is STREAMED in one PSM (e.g. PSMCT32, fast word writes)
|
||||
and SAMPLED in another (e.g. PSMT8 indexed). Both operations address the SAME physical bytes through their
|
||||
respective GS swizzles; modeling that crossover is the whole point.
|
||||
|
||||
Swizzle math is ported verbatim from the project's own RTL (which is in turn locked to PCSX2 GSTables.cpp):
|
||||
- PSMCT32 write/read <- rtl/gif_gs/gs_swizzle_psmct32_stub.sv (block grid + byte_in_block = yb*32+xb*4)
|
||||
- PSMT8 read <- rtl/gif_gs/gs_swizzle_psmt8_stub.sv (block grid + 16x16 columnTable8)
|
||||
Address convention matches the codebase: VRAM byte base = PTR*256 (TBP0/DBP/CBP are 14-bit, *256 == 4 MiB),
|
||||
and PTRs are page-aligned (multiple of 32) so page_index*8192 composes correctly off that base.
|
||||
"""
|
||||
|
||||
# block grid is shared by PSMCT32 and PSMT8 (4 rows x 8 cols), value = block index within page
|
||||
BLOCK = [
|
||||
[ 0, 1, 4, 5,16,17,20,21],
|
||||
[ 2, 3, 6, 7,18,19,22,23],
|
||||
[ 8, 9,12,13,24,25,28,29],
|
||||
[10,11,14,15,26,27,30,31],
|
||||
]
|
||||
# PSMT8 within-block 16x16 -> byte permutation (columnTable8)
|
||||
COL8 = [
|
||||
[ 0, 4, 16, 20, 32, 36, 48, 52, 2, 6, 18, 22, 34, 38, 50, 54],
|
||||
[ 8, 12, 24, 28, 40, 44, 56, 60, 10, 14, 26, 30, 42, 46, 58, 62],
|
||||
[ 33, 37, 49, 53, 1, 5, 17, 21, 35, 39, 51, 55, 3, 7, 19, 23],
|
||||
[ 41, 45, 57, 61, 9, 13, 25, 29, 43, 47, 59, 63, 11, 15, 27, 31],
|
||||
[ 96,100,112,116, 64, 68, 80, 84, 98,102,114,118, 66, 70, 82, 86],
|
||||
[104,108,120,124, 72, 76, 88, 92,106,110,122,126, 74, 78, 90, 94],
|
||||
[ 65, 69, 81, 85, 97,101,113,117, 67, 71, 83, 87, 99,103,115,119],
|
||||
[ 73, 77, 89, 93,105,109,121,125, 75, 79, 91, 95,107,111,123,127],
|
||||
[128,132,144,148,160,164,176,180,130,134,146,150,162,166,178,182],
|
||||
[136,140,152,156,168,172,184,188,138,142,154,158,170,174,186,190],
|
||||
[161,165,177,181,129,133,145,149,163,167,179,183,131,135,147,151],
|
||||
[169,173,185,189,137,141,153,157,171,175,187,191,139,143,155,159],
|
||||
[224,228,240,244,192,196,208,212,226,230,242,246,194,198,210,214],
|
||||
[232,236,248,252,200,204,216,220,234,238,250,254,202,206,218,222],
|
||||
[193,197,209,213,225,229,241,245,195,199,211,215,227,231,243,247],
|
||||
[201,205,217,221,233,237,249,253,203,207,219,223,235,239,251,255],
|
||||
]
|
||||
|
||||
def ct32_addr(dbp, dbw, x, y):
|
||||
"""Byte address of PSMCT32 pixel (x,y) in a buffer based at dbp (256-byte units), width dbw (64px units)."""
|
||||
page_index = (y >> 5) * dbw + (x >> 6)
|
||||
block_idx = BLOCK[(y >> 3) & 3][(x >> 3) & 7]
|
||||
return dbp*256 + page_index*8192 + block_idx*256 + (y & 7)*32 + (x & 7)*4
|
||||
|
||||
def psmt8_addr(tbp, fbw, x, y):
|
||||
"""Byte address of PSMT8 pixel (x,y) in a buffer based at tbp (256-byte units), width fbw (64px units)."""
|
||||
page_index = (y >> 6) * (fbw >> 1) + (x >> 7)
|
||||
block_idx = BLOCK[(y >> 4) & 3][(x >> 4) & 7]
|
||||
return tbp*256 + page_index*8192 + block_idx*256 + COL8[y & 15][x & 15]
|
||||
|
||||
class LocalMem:
|
||||
"""4 MiB GS VRAM. Seed from the dump's initial snapshot, then replay host->local uploads in order."""
|
||||
SIZE = 0x400000
|
||||
def __init__(self, init_bytes=None):
|
||||
if init_bytes is not None and len(init_bytes) >= self.SIZE:
|
||||
self.m = bytearray(init_bytes[:self.SIZE])
|
||||
else:
|
||||
self.m = bytearray(self.SIZE)
|
||||
|
||||
def write_image_ct32(self, dbp, dbw, dsax, dsay, w, h, words):
|
||||
"""Host->local upload in PSMCT32: raster-order words fill (dsax..+w)x(dsay..+h) via ct32 swizzle.
|
||||
`words` may be shorter than w*h (partial transfer); fill stops when exhausted (GS behaviour)."""
|
||||
n = len(words); i = 0
|
||||
for py in range(h):
|
||||
for px in range(w):
|
||||
if i >= n: return
|
||||
a = ct32_addr(dbp, dbw, dsax+px, dsay+py)
|
||||
if 0 <= a and a+4 <= self.SIZE:
|
||||
self.m[a:a+4] = (words[i] & 0xFFFFFFFF).to_bytes(4, "little")
|
||||
i += 1
|
||||
|
||||
def read_psmt8(self, tbp, fbw, tw, th):
|
||||
out = bytearray(tw*th)
|
||||
for y in range(th):
|
||||
r = y*tw
|
||||
for x in range(tw):
|
||||
a = psmt8_addr(tbp, fbw, x, y)
|
||||
out[r+x] = self.m[a] if 0 <= a < self.SIZE else 0
|
||||
return out
|
||||
|
||||
def read_ct32_word(self, dbp, dbw, x, y):
|
||||
a = ct32_addr(dbp, dbw, x, y)
|
||||
return int.from_bytes(self.m[a:a+4], "little") if a+4 <= self.SIZE else 0
|
||||
@@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Ch343 — build the AUTHENTIC cube board fixtures (LOCAL, dump-derived).
|
||||
|
||||
Inputs (local, from the cubes .gs dump pipeline):
|
||||
captures/gs/cubes/extracted/cube_tex_64.bin 64x64 PSMCT32 cube texture (downscaled)
|
||||
captures/gs/cubes/extracted/cube_persp.scene.txt translated perspective scene (ps2_feeder)
|
||||
|
||||
Outputs into sim/data/top_psmct32_raster_demo/ (gitignored — game-derived content):
|
||||
payload_cube_setup.mem boot GIF payload: upload the 64x64 cube texture @ TBP=64 (32 KiB EE RAM)
|
||||
bios_cube_setup.mem one-shot bootlet, QWC = payload qword count
|
||||
feeder_cube_persp.mem cube perspective staging (ps2_feeder --dump-file cube_persp.scene)
|
||||
|
||||
Reuses bake.py's GIF packers + bootlet builder so the framing matches the proven feeder_persp path
|
||||
exactly; only the texture size (64x64 vs 16x16) and the RAM/QWC budget differ. ABE=0 S1 perspective.
|
||||
This GENERATOR is committable; its .mem outputs are not (provenance: authentic GS dump content)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import struct
|
||||
import subprocess
|
||||
|
||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
ROOT = os.path.normpath(os.path.join(HERE, ".."))
|
||||
DATA = os.path.join(ROOT, "sim", "data", "top_psmct32_raster_demo")
|
||||
sys.path.insert(0, DATA)
|
||||
import bake # noqa: E402 (reuse giftag/aplusd/*_pack/bootlet/write_bios_mem)
|
||||
|
||||
TEX_BIN = os.path.join(ROOT, "captures", "gs", "cubes", "extracted", "cube_tex_64.bin")
|
||||
SCENE = os.path.join(ROOT, "captures", "gs", "cubes", "extracted", "cube_persp.scene.txt")
|
||||
|
||||
# VRAM layout (64 KiB / 16384 words) — three 64x64 surfaces must NOT alias:
|
||||
# FB FBP=0 -> words 0..4095 ZBP units = 2048 words
|
||||
# Z ZBP=2 -> words 4096..8191 (ps2_feeder hardcodes zbuf1_pack(2) + GEQUAL)
|
||||
# TEX TBP=128-> words 8192..12287 TBP0 units = 64 words
|
||||
# The scene's translator picked TBP=64 (word 4096), which ALIASES the Z buffer: the render then reads
|
||||
# texel values (~0xFF000000) as Z and GEQUAL rejects the whole cube. Relocate the texture past Z.
|
||||
TBP = 128 # word 8192 — clear of FB (0..4095) and Z (4096..8191)
|
||||
TBW = 1 # 64-wide -> TBW=1 (64-px stride); matches the scene's tex0 stride
|
||||
W = H = 64
|
||||
RAM_QWORDS = 2048 # 32 KiB / 16 (EE RAM payload; unrelated to VRAM)
|
||||
|
||||
for p in (TEX_BIN, SCENE):
|
||||
if not os.path.exists(p):
|
||||
sys.exit(f"missing local input (run the cubes .gs extraction first): {p}")
|
||||
|
||||
# --- 64x64 PSMCT32 texels, raster order (ty*64 + tx), little-endian u32 ---
|
||||
raw = open(TEX_BIN, "rb").read()
|
||||
if len(raw) != W * H * 4:
|
||||
sys.exit(f"{TEX_BIN}: expected {W*H*4} bytes, got {len(raw)}")
|
||||
texels = list(struct.unpack(f"<{W*H}I", raw))
|
||||
|
||||
# --- boot GIF payload: BITBLTBUF/TRXPOS/TRXREG/TRXDIR (A+D) + IMAGE upload ---
|
||||
qw = []
|
||||
qw.append(bake.giftag(1, 0, 0, 4, 0x0000_0000_0000_EEEE)) # 4x A+D
|
||||
qw.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(TBP, TBW, 0)))
|
||||
qw.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0, 0)))
|
||||
qw.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(W, H)))
|
||||
qw.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0))) # host -> local
|
||||
n_image = (W * H) // 4 # 4 texels / qword = 1024
|
||||
qw.append(bake.giftag(n_image, 1, 2, 0, 0)) # IMAGE, EOP=1 (setup-only payload)
|
||||
for i in range(n_image):
|
||||
word = 0
|
||||
for lane in range(4):
|
||||
word |= (texels[i * 4 + lane] & 0xFFFFFFFF) << (32 * lane)
|
||||
qw.append(word)
|
||||
|
||||
qwc = len(qw) # 6 + 1024 = 1030
|
||||
if 16 + qwc > RAM_QWORDS:
|
||||
sys.exit(f"payload {qwc} qw + 16 lead > {RAM_QWORDS} (bump RAM_SIZE_BYTES)")
|
||||
|
||||
payload_path = os.path.join(DATA, "payload_cube_setup.mem")
|
||||
with open(payload_path, "w") as f:
|
||||
f.write("// Ch343 LOCAL authentic-cube texture boot payload (64x64 PSMCT32 @ TBP=64).\n")
|
||||
f.write("// game-derived (cubes .gs) -> gitignored. qw 0..15 zero; qw 16.. = GIF upload.\n")
|
||||
for _ in range(16):
|
||||
f.write(f"{0:032x}\n")
|
||||
for x in qw:
|
||||
f.write(f"{x & ((1 << 128) - 1):032x}\n")
|
||||
for _ in range(RAM_QWORDS - 16 - qwc):
|
||||
f.write(f"{0:032x}\n")
|
||||
|
||||
# --- one-shot bootlet (same shape as feeder_persp setup; QWC chosen for the cube payload) ---
|
||||
bake.write_bios_mem(
|
||||
"bios_cube_setup.mem",
|
||||
bake.build_textured_demo_bootlet_disp(qwc, bake.CAP_DISPLAY1_HI, bake.CAP_FBW),
|
||||
f"Ch343 LOCAL authentic-cube setup bootlet (QWC={qwc}). game-derived -> gitignored.")
|
||||
|
||||
# --- cube perspective staging via ps2_feeder --dump-file ---
|
||||
# The translator bound TEX0 TBP0 to a word that aliases the Z buffer; rewrite the tex0 line so the
|
||||
# render samples the relocated texture (TBP) instead. Single source of truth = the TBP above.
|
||||
# The translator grabbed the EARLIEST ≤27-tri textured run, which is a WIDE-SHORT strip of ~4 tiny
|
||||
# cubes (source 280x75 viewport-fit to 64x64 -> squashed to a y:[0..17] band of speckle). Isolate ONE
|
||||
# cube (CUBE_TRIS contiguous tris) and re-fit it to fill the frame: a legitimate viewport zoom. The
|
||||
# perspective ST/Q are interpolated linearly in SCREEN space, so a uniform 2D scale of the screen verts
|
||||
# preserves per-pixel u=S/Q,v=T/Q exactly — faithful, just bigger. The first 6 tris (lines 5..10) are a
|
||||
# corner-view cube: 3 faces meeting at center vertex (59,13).
|
||||
CUBE_FIRST = 0 # index of the first persptri of the chosen cube
|
||||
CUBE_TRIS = 6 # 3 faces x 2 tris
|
||||
MARGIN = 2 # leave a 2px border in the 64x64 frame
|
||||
SPAN_PX = (W - 1) - 2*MARGIN
|
||||
|
||||
# Two faithful coordinate conversions applied to the selected cube:
|
||||
# (a) ST normalized->texel: retroDE's gs_persp_uv recovers the TEXEL index directly (expects S/Q in
|
||||
# 0..TW, as bake.py persp_attrs emits S=u_texel/w). The dump's ST are NORMALIZED (0..1) so S/Q
|
||||
# lands in [0,1] -> samples only texel (0,0). Scale S by TW, T by TH (what real GS does internally).
|
||||
# (b) TEX0 -> DECAL (TFX=1): emit the authentic texels directly. The dump's per-vertex color came
|
||||
# through as a uniform (128,0,0) (translator artifact); MODULATE with it masks G/B. DECAL matches
|
||||
# the proven Ch342 checkerboard. (TODO: backport (a)+(b) into gs_translate_tex.py --perspective.)
|
||||
scene_lines = open(SCENE).read().splitlines()
|
||||
header = [] # comments + persp + tex0 (patched)
|
||||
tris = [] # token-lists of persptri lines (len 20)
|
||||
for ln in scene_lines:
|
||||
tok = ln.split()
|
||||
if not tok or tok[0] == "go":
|
||||
continue
|
||||
if tok[0] == "persptri" and len(tok) == 20:
|
||||
tris.append(tok)
|
||||
else:
|
||||
if tok[0] == "tex0":
|
||||
tok[1] = str(TBP) # tex0 <TBP> <TBW> <TW> <TH> <TFX>
|
||||
tok[5] = "1" # DECAL
|
||||
ln = " ".join(tok)
|
||||
header.append(ln)
|
||||
|
||||
sel = tris[CUBE_FIRST:CUBE_FIRST + CUBE_TRIS]
|
||||
VTX = ((1, 2, 3, 4), (6, 7, 8, 9), (11, 12, 13, 14)) # (X,Y,S,T) token indices per vertex
|
||||
xs = [int(t[xi]) for t in sel for (xi, _, _, _) in VTX]
|
||||
ys = [int(t[yi]) for t in sel for (_, yi, _, _) in VTX]
|
||||
minx, maxx, miny, maxy = min(xs), max(xs), min(ys), max(ys)
|
||||
scale = SPAN_PX / max(maxx - minx, maxy - miny, 1) # uniform -> preserve cube proportions
|
||||
offx = MARGIN + (SPAN_PX - (maxx - minx) * scale) / 2.0
|
||||
offy = MARGIN + (SPAN_PX - (maxy - miny) * scale) / 2.0
|
||||
for t in sel:
|
||||
for (xi, yi, si, ti) in VTX:
|
||||
t[xi] = str(int(round(offx + (int(t[xi]) - minx) * scale))) # re-fit screen X
|
||||
t[yi] = str(int(round(offy + (int(t[yi]) - miny) * scale))) # re-fit screen Y
|
||||
t[si] = str(int(round(int(t[si]) * W))) # ST normalized -> texel
|
||||
t[ti] = str(int(round(int(t[ti]) * H)))
|
||||
patched = header + [" ".join(t) for t in sel] + ["go"]
|
||||
N_TRI_OUT = len(sel)
|
||||
scene_tmp = os.path.join(DATA, ".cube_persp.tbp.scene.txt")
|
||||
with open(scene_tmp, "w") as f:
|
||||
f.write("\n".join(patched) + "\n")
|
||||
|
||||
psf = os.path.join(HERE, "ps2_feeder")
|
||||
subprocess.run(["gcc", "-O2", "-o", psf, os.path.join(HERE, "ps2_feeder.c")], check=True)
|
||||
stg = subprocess.run([psf, "--dump-file", scene_tmp],
|
||||
capture_output=True, text=True, check=True).stdout
|
||||
os.remove(scene_tmp)
|
||||
stg_words = [ln for ln in stg.splitlines() if ln.strip() and not ln.lstrip().startswith("//")]
|
||||
staging_path = os.path.join(DATA, "feeder_cube_persp.mem")
|
||||
with open(staging_path, "w") as f:
|
||||
f.write("// Ch343 LOCAL cube perspective staging (ps2_feeder --dump-file cube_persp.scene).\n")
|
||||
f.write("// game-derived (cubes .gs) -> gitignored. ABE=0 S1 perspective path.\n")
|
||||
f.write(stg)
|
||||
if not stg.endswith("\n"):
|
||||
f.write("\n")
|
||||
|
||||
# --- raster-order texel hex for the smoke TB's VRAM round-trip check (LOCAL) ---
|
||||
tex_ref_path = os.path.join(DATA, "feeder_cube_tex.mem")
|
||||
with open(tex_ref_path, "w") as f:
|
||||
f.write("// Ch343 LOCAL cube texels, raster order (vram word TBP*64+i). game-derived -> gitignored.\n")
|
||||
for t in texels:
|
||||
f.write(f"{t & 0xFFFFFFFF:08x}\n")
|
||||
|
||||
print(f"[Ch343] payload_cube_setup.mem : {qwc} qw (QWC={qwc}, 32 KiB EE RAM, TBP={TBP} TBW={TBW})")
|
||||
print(f"[Ch343] bios_cube_setup.mem : bootlet QWC={qwc}")
|
||||
print(f"[Ch343] feeder_cube_persp.mem : {len(stg_words)} staging words from cube_persp.scene")
|
||||
print(f"[Ch343] outputs -> {DATA} (LOCAL / gitignored)")
|
||||
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch347 authentic SH3 board fixture: bootlet GIF payload that uploads the real SH3 PSMT8
|
||||
texture + real CLUT and draws it as a DECAL sprite. Built directly on the Ch296 PSMT8+CLUT board template
|
||||
(bake.build_clut8_demo_payload), scaled to 128x128 + a 256-entry CLUT.
|
||||
|
||||
Mirrors gs_make_cube_fixture.py: imports bake.py helpers, reads the dump-derived assets emitted by
|
||||
gs_extract_sh3_clut.py (sh3_tex_idx.mem + sh3_clut.mem), and writes the LOCAL/gitignored board fixtures:
|
||||
payload_sh3_clut.mem GIF payload: BITBLT CLUT (256) + BITBLT PSMT8 tex (128x128) + TEX0(PSMT8,CLD=1,CSM2
|
||||
linear) + one 128x128 DECAL SPRITE
|
||||
bios_sh3_clut.mem one-shot bootlet, QWC = payload qword count, DISPLAY1 = 128x128
|
||||
|
||||
DECAL/opaque (TFX=1) so the authentic art is visible; the real CLUT alpha (~0x04) is kept BYTE-FOR-BYTE in
|
||||
the palette, just ignored by the render mode (Codex guardrail). CLUT declared CSM2/linear (the order
|
||||
validated against the authentic render in tools/gs_extract_sh3_clut.py).
|
||||
"""
|
||||
import sys, os
|
||||
HERE=os.path.dirname(os.path.abspath(__file__)); ROOT=os.path.normpath(os.path.join(HERE,".."))
|
||||
DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo")
|
||||
sys.path.insert(0, DATA); import bake # reuse giftag/aplusd/*_pack/bootlet helpers
|
||||
|
||||
# Ch347 (ii): the 64x64 authentic CROP — fits the PROVEN 64x64 bram CLUT config (no new 128 KiB/scanout
|
||||
# integration). Full 128x128 is the follow-up rung (i). Swap sh3_tex_idx64->sh3_tex_idx + W/H/FBW for (i).
|
||||
W = H = 64
|
||||
FBW = 1 # 64 px / 64
|
||||
TBP0 = 64 # texture base byte = 64*256 = 0x4000 (just past the 64x64 PSMCT32 FB = 16 KiB)
|
||||
TBW = 1 # 64 texels/row stride
|
||||
CBP = 80 # CLUT base byte = 80*256 = 0x5000 (past the 4 KiB texture)
|
||||
RAM_QWORDS = 512 # 8 KiB EE RAM payload; SH3 64x64 payload ~340 qw fits
|
||||
|
||||
def read_mem(name, n):
|
||||
vals=[]
|
||||
for ln in open(os.path.join(DATA,name)):
|
||||
ln=ln.strip()
|
||||
if ln and not ln.startswith("//"): vals.append(int(ln,16))
|
||||
if len(vals) < n: sys.exit(f"{name}: {len(vals)} < {n} (run tools/gs_extract_sh3_clut.py first)")
|
||||
return vals
|
||||
|
||||
tex_words = read_mem("sh3_tex_idx64.mem", W*H//4) # 1024 packed words (4 indices each) — the 64x64 crop
|
||||
clut = read_mem("sh3_clut.mem", 256) # 256 PSMCT32 entries
|
||||
|
||||
# TEX0_1: PSMT8 128x128 (TW=TH=7), TFX=DECAL, + CLUT side (CBP, CPSM=PSMCT32, CSM2 linear, CLD=1 -> load)
|
||||
def tex0_sh3():
|
||||
v = bake.tex0_pack(TBP0, TBW, psm=0x13, tw=6, th=6, tfx=1) # 64x64 (TW=TH=6)
|
||||
v |= (CBP & 0x3FFF) << 37
|
||||
v |= (0 & 0xF) << 51 # CPSM = PSMCT32
|
||||
v |= (1 & 0x1) << 55 # CSM = CSM2 (linear)
|
||||
v |= (0 & 0x1F) << 56 # CSA = 0
|
||||
v |= (1 & 0x7) << 61 # CLD = 1 (fire VRAM->CLUT load on commit)
|
||||
return v
|
||||
|
||||
def build_payload():
|
||||
qw=[]
|
||||
# U1: BITBLT the CLUT (256 PSMCT32 entries, 16x16) -> VRAM[CBP*256]
|
||||
qw.append(bake.giftag(1,0,0,4,0x0000_0000_0000_EEEE))
|
||||
qw.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(CBP, 1, 0))) # DPSM=PSMCT32
|
||||
qw.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0)))
|
||||
qw.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(256,1))) # 256 entries as ONE CONTIGUOUS
|
||||
# row — PSMCT32 row_stride = DBW*256, so a 16x16 upload would scatter entries (16/256-B row) while
|
||||
# clut_loader reads them contiguously; 256x1 keeps them packed (the Ch296 8-entry demo used 8x1).
|
||||
qw.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0)))
|
||||
qw.append(bake.giftag(256//4,0,2,0,0)) # IMAGE: 4 entries/qword
|
||||
for q in range(256//4):
|
||||
word=0
|
||||
for lane in range(4): word |= (clut[q*4+lane] & 0xFFFFFFFF) << (32*lane)
|
||||
qw.append(word)
|
||||
# U2: upload the PSMT8 index texture as a PSMCT32 transfer (full 32-bit WORD writes — NOT PSMT8 byte
|
||||
# writes, which RMW the 32-bit VRAM word and corrupt one word on 4 consecutive same-word byte-writes).
|
||||
# The bytes land identically (TBP*256 + w*4); the TEX0 still READS them as PSMT8. This is exactly the
|
||||
# "upload as PSMCT32, bind as PSMT8" trick SH3 itself uses. Upload as (W*H/4)x1 = 1024x1 PSMCT32.
|
||||
qw.append(bake.giftag(1,0,0,4,0x0000_0000_0000_EEEE))
|
||||
qw.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(TBP0, 16, 0x00))) # DPSM=PSMCT32, DBW=16 (1 row)
|
||||
qw.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0)))
|
||||
qw.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(W*H//4, 1))) # 1024 PSMCT32 words x 1 row
|
||||
qw.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0)))
|
||||
qw.append(bake.giftag(W*H//16,0,2,0,0)) # 256 qwords, 4 PSMCT32 words each (= the packed indices)
|
||||
for q in range(W*H//16):
|
||||
word=0
|
||||
for k in range(4): word |= (tex_words[q*4+k] & 0xFFFFFFFF) << (32*k)
|
||||
qw.append(word)
|
||||
# U3: PRIM(SPRITE+TME) + FRAME + TEX0(PSMT8+CLUT load) + RGBAQ + UV0/XYZ2_0
|
||||
frame_val = bake.frame_1_psmct32(FBW)
|
||||
qw.append(bake.giftag(1,0,0,6,0x0000_0000_00EE_EEEE))
|
||||
qw.append(bake.aplusd(bake.R_PRIM, bake.prim_sprite_tme()))
|
||||
qw.append(bake.aplusd(bake.R_FRAME_1, frame_val))
|
||||
qw.append(bake.aplusd(bake.R_TEX0_1, tex0_sh3()))
|
||||
qw.append(bake.aplusd(bake.R_RGBAQ, bake.rgbaq_data(0x80,0x80,0x80)))
|
||||
qw.append(bake.aplusd(bake.R_UV, bake.uv_data(0,0)))
|
||||
qw.append(bake.aplusd(bake.R_XYZ2, bake.xyz2_data(0,0)))
|
||||
# U4: UV1/XYZ2_1 closing the 128x128 textured sprite. EOP.
|
||||
qw.append(bake.giftag(1,1,0,2,0x0000_0000_0000_00EE))
|
||||
qw.append(bake.aplusd(bake.R_UV, bake.uv_data(W-1,H-1)))
|
||||
qw.append(bake.aplusd(bake.R_XYZ2, bake.xyz2_data(W-1,H-1)))
|
||||
return qw
|
||||
|
||||
payload = build_payload()
|
||||
qwc = len(payload)
|
||||
if 16 + qwc > RAM_QWORDS: sys.exit(f"payload {qwc} qw + 16 > {RAM_QWORDS} (bump RAM_QWORDS / RAM_SIZE_BYTES)")
|
||||
|
||||
# DISPLAY1 hi = (DH<<12)|DW for 64x64 (magh=magv=1): DW=DH=63
|
||||
disp_hi = (63 << 12) | 63
|
||||
with open(os.path.join(DATA,"payload_sh3_clut.mem"),"w") as f:
|
||||
f.write(f"// Ch347 LOCAL authentic SH3 64x64-CROP PSMT8+CLUT board payload (tex @ TBP={TBP0}, CLUT @ CBP={CBP}).\n")
|
||||
f.write(f"// SH3-derived -> gitignored. qw 0..15 zero; qw 16.. = GIF (BITBLT CLUT + tex + DECAL sprite). QWC={qwc}.\n")
|
||||
for _ in range(16): f.write(f"{0:032x}\n")
|
||||
for w in payload: f.write(f"{w & ((1<<128)-1):032x}\n")
|
||||
for _ in range(RAM_QWORDS-16-qwc): f.write(f"{0:032x}\n")
|
||||
bake.write_bios_mem(
|
||||
"bios_sh3_clut.mem",
|
||||
bake.build_textured_demo_bootlet_disp(qwc, disp_hi, FBW),
|
||||
f"Ch347 LOCAL authentic SH3 64x64-crop PSMT8+CLUT setup bootlet (QWC={qwc}, DISPLAY1={W}x{H}). SH3-derived -> gitignored.")
|
||||
print(f"[Ch347] payload_sh3_clut.mem: {qwc} qw (BITBLT 256-CLUT + {W}x{H} PSMT8 tex + DECAL sprite, 8 KiB RAM)")
|
||||
print(f"[Ch347] bios_sh3_clut.mem : bootlet QWC={qwc}, DISPLAY1={W}x{H} (disp_hi=0x{disp_hi:x}), FBW={FBW}")
|
||||
print(f"[Ch347] VRAM layout: FB {W}x{H} PSMCT32 @0 ({W*H*4//1024}KiB) + PSMT8 tex @TBP={TBP0} ({W*H//1024}KiB) + CLUT @CBP={CBP} (1KiB) -> fits the proven 64x64 bram config")
|
||||
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch348 authentic SH3 PSMT8+CLUT through the PERSPECTIVE feeder path.
|
||||
|
||||
Composes Ch342 (perspective ST/Q triangles via the feeder S1-path) + Ch347 (authentic SH3 PSMT8 texture +
|
||||
real CLUT). The feeder staging carries a perspective QUAD (2 TME tris, word0[32]=1) AND the TEX0 (PSM=PSMT8,
|
||||
CLD=1, CSM2-linear) — so the feeder's TEX0 commit fires the VRAM->CLUT load; a setup bootlet uploads the SH3
|
||||
PSMT8 texture (as a PSMCT32 transfer — word writes, no byte-RMW) + the SH3 CLUT (256x1 contiguous, the Ch347
|
||||
lesson). DECAL/opaque; real CLUT RGB authentic, alpha not blended.
|
||||
|
||||
Label (Codex): authentic SH3 PSMT8 texture + real CLUT rendered through the proven perspective-triangle path
|
||||
on silicon, chosen perspective geometry (NOT authentic SH3 draw geometry).
|
||||
|
||||
Outputs (LOCAL, gitignored) into sim/data/top_psmct32_raster_demo/:
|
||||
bios_sh3_persp.mem / payload_sh3_persp.mem setup bootlet: BITBLT CLUT + PSMT8 texture (no tris)
|
||||
feeder_sh3_persp.mem feeder staging: perspective quad + TEX0(PSMT8,CLD=1)
|
||||
"""
|
||||
import sys, os
|
||||
HERE=os.path.dirname(os.path.abspath(__file__)); ROOT=os.path.normpath(os.path.join(HERE,".."))
|
||||
DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo")
|
||||
sys.path.insert(0, DATA); import bake
|
||||
|
||||
W=H=64 # SH3 64x64 crop
|
||||
TBP0=64 # PSMT8 texture base byte 0x4000 (past the 64x64 PSMCT32 FB = 16 KiB)
|
||||
CBP=80 # CLUT base byte 0x5000 (past the 4 KiB texture)
|
||||
FBW=1
|
||||
RAM_QWORDS=512 # 8 KiB EE RAM (256-CLUT + 64x64 texture upload ~ 340 qw)
|
||||
# perspective quad in the 64x64 FB: x 8..56, y 8..56; top FAR (w=8), bottom NEAR (w=1); UV over 0..64.
|
||||
PX0,PX1,PY0,PY1 = 8,56,8,56
|
||||
WF,WN = 8,1
|
||||
|
||||
def read_mem(name,n):
|
||||
v=[int(l,16) for l in open(os.path.join(DATA,name)) if l.strip() and not l.strip().startswith("//")]
|
||||
if len(v)<n: sys.exit(f"{name}: {len(v)}<{n} (run gs_extract_sh3_clut.py first)")
|
||||
return v
|
||||
tex_words = read_mem("sh3_tex_idx64.mem", W*H//4) # 1024 packed words (4 indices each)
|
||||
clut = read_mem("sh3_clut.mem", 256)
|
||||
|
||||
def tex0_sh3_clut(): # PSMT8 64x64 (TW=TH=6), TFX=DECAL, + CLUT (CBP, CPSM=PSMCT32, CSM2 linear, CLD=1)
|
||||
v = bake.tex0_pack(TBP0, 1, psm=0x13, tw=6, th=6, tfx=1)
|
||||
v |= (CBP & 0x3FFF)<<37; v |= (0&0xF)<<51; v |= (1&1)<<55; v |= (0&0x1F)<<56; v |= (1&7)<<61
|
||||
return v
|
||||
|
||||
# --- setup bootlet payload: BITBLT the CLUT (256x1) + the PSMT8 texture (as PSMCT32 words). No tris. ---
|
||||
def build_setup_payload():
|
||||
qw=[]
|
||||
qw.append(bake.giftag(1,0,0,4,int('E'*4,16)))
|
||||
qw.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(CBP,1,0))) # CLUT, PSMCT32
|
||||
qw.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0)))
|
||||
qw.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(256,1))) # 256x1 contiguous
|
||||
qw.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0)))
|
||||
qw.append(bake.giftag(256//4,0,2,0,0))
|
||||
for q in range(256//4):
|
||||
word=0
|
||||
for lane in range(4): word |= (clut[q*4+lane]&0xFFFFFFFF)<<(32*lane)
|
||||
qw.append(word)
|
||||
qw.append(bake.giftag(1,0,0,4,int('E'*4,16)))
|
||||
qw.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(TBP0,16,0x00))) # PSMT8 tex as PSMCT32 words
|
||||
qw.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0)))
|
||||
qw.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(W*H//4,1))) # 1024x1 PSMCT32 words
|
||||
qw.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0)))
|
||||
qw.append(bake.giftag(W*H//16,1,2,0,0)) # EOP
|
||||
for q in range(W*H//16):
|
||||
word=0
|
||||
for k in range(4): word |= (tex_words[q*4+k]&0xFFFFFFFF)<<(32*k)
|
||||
qw.append(word)
|
||||
return qw
|
||||
|
||||
# --- feeder staging: perspective quad (2 tris) + TEX0(PSMT8,CLD=1). word0[32]=1. ---
|
||||
def build_feeder_staging():
|
||||
pv_tl=(PX0,PY0, 0, 0, WF); pv_tr=(PX1,PY0, W,0, WF)
|
||||
pv_bl=(PX0,PY1, 0, H, WN); pv_br=(PX1,PY1, W,H, WN)
|
||||
tris=[[pv_tl,pv_tr,pv_bl],[pv_tr,pv_bl,pv_br]]
|
||||
w=[]
|
||||
w.append(len(tris)|(1<<32))
|
||||
w.append(bake.frame_1_psmct32(FBW))
|
||||
w.append(bake.alpha_pack(0,1,0,1))
|
||||
w.append(bake.test1_geq())
|
||||
w.append(bake.zbuf1_pack(2))
|
||||
w.append(tex0_sh3_clut()) # PSMT8 + CLUT (CLD=1) — feeder commit fires the CLUT load
|
||||
w.append(3 | (1<<4)) # TRI + TME, ABE=0 (S1 perspective path)
|
||||
for verts in tris:
|
||||
for (sx,sy,u,v,wq) in verts:
|
||||
s_fp,t_fp,q_fp = bake.persp_attrs(u,v,wq)
|
||||
w.append(bake.rgbaq_with_q(0,0,0,q_fp))
|
||||
w.append(bake.st_data(s_fp,t_fp))
|
||||
w.append(bake.xyz2_dataz(sx,sy,0x0000_5000))
|
||||
return w
|
||||
|
||||
payload=build_setup_payload(); qwc=len(payload)
|
||||
if 16+qwc>RAM_QWORDS: sys.exit(f"payload {qwc}>{RAM_QWORDS-16}")
|
||||
disp_hi=(63<<12)|63
|
||||
with open(os.path.join(DATA,"payload_sh3_persp.mem"),"w") as f:
|
||||
f.write(f"// Ch348 LOCAL SH3 PSMT8+CLUT perspective setup payload (CLUT@{CBP}, tex@{TBP0}). gitignored. QWC={qwc}.\n")
|
||||
for _ in range(16): f.write(f"{0:032x}\n")
|
||||
for x in payload: f.write(f"{x&((1<<128)-1):032x}\n")
|
||||
for _ in range(RAM_QWORDS-16-qwc): f.write(f"{0:032x}\n")
|
||||
bake.write_bios_mem("bios_sh3_persp.mem",
|
||||
bake.build_textured_demo_bootlet_disp(qwc, disp_hi, FBW),
|
||||
f"Ch348 LOCAL SH3 perspective setup bootlet (QWC={qwc}, DISPLAY1={W}x{H}). gitignored.")
|
||||
stg=build_feeder_staging()
|
||||
bake.write_feeder_stg_mem("feeder_sh3_persp.mem", stg,
|
||||
"Ch348 LOCAL SH3 PSMT8+CLUT perspective quad through the feeder (word0[32]=1, TEX0 PSMT8 CLD=1). gitignored.")
|
||||
print(f"[Ch348] payload_sh3_persp.mem: {qwc} qw (BITBLT 256-CLUT + {W}x{H} PSMT8 tex). feeder_sh3_persp.mem: {len(stg)} words (persp quad).")
|
||||
print(f"[Ch348] perspective quad x[{PX0}..{PX1}] y[{PY0}..{PY1}], top FAR w={WF}, bottom NEAR w={WN}, UV 0..{W}; TEX0 PSMT8 CLD=1 CBP={CBP}.")
|
||||
@@ -0,0 +1,382 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch350 fixtures: the EXACT Ch349 SH3 draw for the full-res LPDDR integration TB.
|
||||
|
||||
Codex scope (no crop/downscale/repack): the actual draw geometry (f1 idx89761 TRI_STRIP -> triangle LIST),
|
||||
the reconstructed 512x512 PSMT8 texture (in LPDDR), the real CSM1 CLUT (grid/CT32 order in BRAM), perspective
|
||||
ST/Q, DECAL. Pixel-diff vs the Ch349 host reference with the Ch348 bounded <=1-texel acceptance.
|
||||
|
||||
Emits (LOCAL/gitignored -> sim/data/top_psmct32_raster_demo/):
|
||||
sh3_real_tex_lpddr.mem 65536 LE words = the PSMT8-SWIZZLED texture bytes mem[TBP*256 : +262144]
|
||||
(loaded into the behavioral LPDDR model; the GS texture-unit re-swizzles on read).
|
||||
sh3_real_idx.mem 65536 words = the DE-SWIZZLED 512x512 indices packed 4/word (TB reference search).
|
||||
sh3_real_clut.mem 256 words = the CT32-grid CLUT bytes mem[CBP*256 : +1024] (backdoored into BRAM;
|
||||
the Ch350 CSM1 clut_loader reads them in grid order).
|
||||
feeder_sh3_real.mem feeder staging: 68 tris (translated to FB origin) + TEX0(PSMT8,CSM1,CLD=1,DECAL).
|
||||
sh3_real_refmap.mem per FB pixel (FBW*64 x FBH): covered|interior|tu|tv for the bounded TB check.
|
||||
sh3_real_ref.png eyeball reference (host DECAL render at FB scale).
|
||||
|
||||
Geometry/addressing constants are printed + emitted as `sh3_real_params.vh` for the TB to include.
|
||||
Usage: gs_make_sh3_real_draw_fixture.py [dump.gs.zst] [--draw-idx 89761]
|
||||
"""
|
||||
import sys, os, glob
|
||||
HERE=os.path.dirname(os.path.abspath(__file__)); ROOT=os.path.normpath(os.path.join(HERE,".."))
|
||||
DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo")
|
||||
sys.path.insert(0, HERE)
|
||||
import gs_sh3_draw_census as C
|
||||
import gs_sh3_recon as RC
|
||||
import gs_localmem as LM
|
||||
sys.path.insert(0, DATA); import bake
|
||||
|
||||
# ---- address map (Ch350 BRAM-CROP diagnostic: read2 needs VRAM < 256 KiB; FB cropped, texture/CLUT/geometry
|
||||
# FULL-RES). VRAM_BYTES = 0x20000 (128 KiB, 2^15 words) so the read2 tripwire (>=256 KiB) is NOT tripped.
|
||||
# Only the FRAMEBUFFER/VIEWPORT is cropped to a deterministic CH-tall band; the texture stays full 512x512
|
||||
# (in LPDDR, cache-intercepted base 0x40000), the CSM1 CLUT is full, the geometry/ST/Q are unchanged. ----
|
||||
FBW = 4 # 256 px wide FB (64-px units) — full draw width (247) fits
|
||||
FBPXW = FBW*64 # 256
|
||||
CH = 120 # FB rows: 256*120*4 = 122880 B; + CLUT(1 KiB) < 128 KiB
|
||||
VRAM_BYTES = 0x20000 # 128 KiB BRAM VRAM (2^15 words) — under the 256 KiB read2 tripwire
|
||||
CBP = 0x1E000//256 # 480 (CLUT right after the 256x120 FB = 0x1E000)
|
||||
NEW_TBP = 0x40000//256 # 1024 (texture VRAM base; cache-intercepted, NOT in the 128 KiB BRAM)
|
||||
TEX_VRAM_BASE = NEW_TBP*256 # 0x40000
|
||||
TEX_BYTES = 512*512 # 262144 (PSMT8) — FULL texture, no downscale
|
||||
LPDDR_TEX_BASE = 0x00200000 # texture byte base in LPDDR4B
|
||||
TW_LOG, TH_LOG = 9, 9 # 512x512
|
||||
TBW_TEX = 8 # texture TBW (64-px units) — MUST match the original draw's swizzle
|
||||
STG_WORDS = 768
|
||||
|
||||
def recip8(q, IDX_BITS=8, SCALE=24, Q_W=24):
|
||||
"""Exact Python replica of gs_reciprocal_stub: recip = (floor(2^(SCALE+IDX_BITS-1)/M) >> e), M = q
|
||||
normalized to an IDX_BITS mantissa (MSB at TOP_BIT), e = msb index. Used to build the RTL-FAITHFUL
|
||||
reference that isolates reciprocal quantization from S1 attribute under-interpolation."""
|
||||
OUT_MAX = (1 << (SCALE+1)) - 1
|
||||
if q <= 0: return OUT_MAX
|
||||
TOP_BIT = IDX_BITS - 1
|
||||
e = q.bit_length() - 1
|
||||
norm = (q >> (e - TOP_BIT)) if e >= TOP_BIT else (q << (TOP_BIT - e))
|
||||
M = norm & ((1 << IDX_BITS) - 1)
|
||||
if M == 0: return OUT_MAX
|
||||
r = ((1 << (SCALE + TOP_BIT)) // M) >> e
|
||||
return min(r, OUT_MAX)
|
||||
|
||||
def persp_texel_recip(uq, vq, q, tw, th, idx_bits=8, SCALE=24):
|
||||
"""gs_persp_uv with the idx_bits reciprocal LUT: u=(uq*recip)>>SCALE clamped to 2047, then REPEAT-wrap."""
|
||||
recip = recip8(int(round(q)), IDX_BITS=idx_bits, SCALE=SCALE)
|
||||
u = (int(round(uq)) * recip) >> SCALE
|
||||
v = (int(round(vq)) * recip) >> SCALE
|
||||
if u > 2047: u = 2047
|
||||
if v > 2047: v = 2047
|
||||
return (u % tw), (v % th)
|
||||
|
||||
def f32_to(v): return v # placeholder
|
||||
|
||||
def tex0_real(tbp, cbp):
|
||||
# PSMT8 (psm=0x13) 512x512, TFX=DECAL(1); CLUT: CBP, CPSM=PSMCT32(0), CSM=0 (CSM1 grid!), CSA=0, CLD=1.
|
||||
v = bake.tex0_pack(tbp, TBW_TEX, psm=0x13, tw=TW_LOG, th=TH_LOG, tfx=1)
|
||||
v |= (cbp & 0x3FFF) << 37
|
||||
v |= (0 & 0xF) << 51 # CPSM = PSMCT32
|
||||
v |= (0 & 0x1) << 55 # CSM = 0 -> CSM1 (16x16 CT32 grid) — the Ch350 path
|
||||
v |= (0 & 0x1F) << 56 # CSA = 0
|
||||
v |= (1 & 0x7) << 61 # CLD = 1 -> always load
|
||||
return v
|
||||
|
||||
def main(argv):
|
||||
dump = None; draw_idx = 89761
|
||||
a = argv[1:]
|
||||
if a and not a[0].startswith("--"): dump = a[0]
|
||||
if "--draw-idx" in a: draw_idx = int(a[a.index("--draw-idx")+1])
|
||||
if dump is None:
|
||||
c = glob.glob(os.path.join(ROOT,"captures","gs","silenthill3","*224139*.gs.zst"))
|
||||
if not c: sys.exit("no SH3 dump found; pass the .gs.zst path")
|
||||
dump = c[0]
|
||||
|
||||
PERSP_FRAC = bake.PERSP_FRAC
|
||||
|
||||
dr = C.get_draw(dump, draw_idx)
|
||||
if dr is None: sys.exit(f"draw idx {draw_idx} not found")
|
||||
t0 = dr["tex0"]; ORIG_TBP = t0["tbp"]; ORIG_CBP = t0["cbp"]
|
||||
TW, TH = t0["tw"], t0["th"]
|
||||
assert TW==512 and TH==512 and t0["psm"]==0x13, f"unexpected TEX0 {t0}"
|
||||
|
||||
# --- reconstruct GS local memory at draw time (Ch349) ---
|
||||
mem, replayed, uploads, events, vram = RC.build_localmem_to(dump, draw_idx)
|
||||
if mem is None: sys.exit("VRAM snapshot absent")
|
||||
# de-swizzled 512x512 index image (for TB reference) + swizzled bytes (for LPDDR)
|
||||
idx = mem.read_psmt8(ORIG_TBP, t0["tbw"], TW, TH) # de-swizzled indices
|
||||
tex_swz = bytes(mem.m[ORIG_TBP*256 : ORIG_TBP*256 + TEX_BYTES]) # swizzled bytes -> LPDDR
|
||||
clut_bytes = bytes(mem.m[ORIG_CBP*256 : ORIG_CBP*256 + 1024]) # CT32-grid CLUT bytes -> BRAM
|
||||
pal = RC.read_clut32(mem, ORIG_CBP, order="grid") # for the reference PNG
|
||||
|
||||
# --- geometry: translate to draw origin (full frame), then choose a deterministic CH-tall VIEWPORT crop ---
|
||||
xmin = min(v["x"] for v in dr["verts"]); ymin = min(v["y"] for v in dr["verts"])
|
||||
OX, OY = int(xmin), int(ymin)
|
||||
fverts = [dict(x=v["x"]-OX, y=v["y"]-OY, s=v["s"], t=v["t"], q=v["q"]) for v in dr["verts"]]
|
||||
ftris = [(i-2,i-1,i) for i in range(2,len(fverts))]
|
||||
full_h = int(max(v["y"] for v in fverts)) + 1
|
||||
|
||||
def edge(ax,ay,bx,by,px,py): return (px-ax)*(by-ay)-(py-ay)*(bx-ax)
|
||||
# per-row coverage histogram over the FULL frame -> pick CY0 = argmax covered pixels in a CH-tall band.
|
||||
row_cov = [0]*(full_h+CH+2)
|
||||
for (a0,b0,c0) in ftris:
|
||||
v0,v1,v2=fverts[a0],fverts[b0],fverts[c0]
|
||||
x0,y0=v0["x"],v0["y"]; x1,y1=v1["x"],v1["y"]; x2,y2=v2["x"],v2["y"]
|
||||
ar=edge(x0,y0,x1,y1,x2,y2)
|
||||
if abs(ar)<1e-9: continue
|
||||
inv=1.0/ar
|
||||
for py in range(max(0,int(min(y0,y1,y2))), min(full_h-1,int(max(y0,y1,y2))+1)+1):
|
||||
for px in range(max(0,int(min(x0,x1,x2))), min(FBPXW-1,int(max(x0,x1,x2))+1)+1):
|
||||
cx,cy=px+0.5,py+0.5
|
||||
w0=edge(x1,y1,x2,y2,cx,cy)*inv; w1=edge(x2,y2,x0,y0,cx,cy)*inv; w2=1.0-w0-w1
|
||||
if w0>=-0.001 and w1>=-0.001 and w2>=-0.001: row_cov[py]+=1
|
||||
best_cy0, best_sum = 0, -1
|
||||
for cy0 in range(0, max(1, full_h-CH+1)):
|
||||
s = sum(row_cov[cy0:cy0+CH])
|
||||
if s > best_sum: best_sum, best_cy0 = s, cy0
|
||||
CY0 = best_cy0; CX0 = 0
|
||||
# apply the viewport crop: shift Y by -CY0 (ST/Q UNCHANGED — only the framebuffer window moves), then
|
||||
# CLIP each triangle to the crop rect [0,FBPXW]x[0,CH] (Sutherland-Hodgman, interpolating S/T/Q linearly in
|
||||
# screen space — correct since S,T,Q are already premultiplied by 1/w). This is the VIEWPORT scissor done at
|
||||
# the host: every emitted vertex lands inside the FB (no out-of-bounds writes), geometry SHAPE + per-vertex
|
||||
# ST/Q are preserved exactly; only the framebuffer window is cropped. Codex's "cropped or scissored" rule.
|
||||
def lerp(p1, p2, a):
|
||||
return dict(x=p1["x"]+a*(p2["x"]-p1["x"]), y=p1["y"]+a*(p2["y"]-p1["y"]),
|
||||
s=p1["s"]+a*(p2["s"]-p1["s"]), t=p1["t"]+a*(p2["t"]-p1["t"]), q=p1["q"]+a*(p2["q"]-p1["q"]))
|
||||
def clip_edge(poly, inside, isect):
|
||||
out=[]
|
||||
for i in range(len(poly)):
|
||||
cur=poly[i]; prv=poly[i-1]
|
||||
ci=inside(cur); pi=inside(prv)
|
||||
if ci:
|
||||
if not pi: out.append(isect(prv,cur))
|
||||
out.append(cur)
|
||||
elif pi:
|
||||
out.append(isect(prv,cur))
|
||||
return out
|
||||
def clip_rect(poly):
|
||||
# left x>=0, right x<=FBPXW, top y>=0, bottom y<=CH
|
||||
poly=clip_edge(poly, lambda p:p["x"]>=0.0, lambda a,b:lerp(a,b,(0.0-a["x"])/(b["x"]-a["x"])))
|
||||
if not poly: return poly
|
||||
poly=clip_edge(poly, lambda p:p["x"]<=FBPXW, lambda a,b:lerp(a,b,(FBPXW-a["x"])/(b["x"]-a["x"])))
|
||||
if not poly: return poly
|
||||
poly=clip_edge(poly, lambda p:p["y"]>=0.0, lambda a,b:lerp(a,b,(0.0-a["y"])/(b["y"]-a["y"])))
|
||||
if not poly: return poly
|
||||
poly=clip_edge(poly, lambda p:p["y"]<=CH, lambda a,b:lerp(a,b,(CH-a["y"])/(b["y"]-a["y"])))
|
||||
return poly
|
||||
sverts = [dict(x=v["x"]-CX0, y=v["y"]-CY0, s=v["s"], t=v["t"], q=v["q"]) for v in fverts]
|
||||
def rnd(v): # round XY to integer screen coords (the feeder gets ints) — host ref MUST use the SAME ints,
|
||||
return dict(x=float(int(round(v["x"]))), y=float(int(round(v["y"]))), s=v["s"], t=v["t"], q=v["q"])
|
||||
tris = [] # list of (v0,v1,v2) explicit clipped vertex dicts with INTEGER screen XY
|
||||
for (a0,b0,c0) in ((i-2,i-1,i) for i in range(2,len(sverts))):
|
||||
poly = clip_rect([sverts[a0], sverts[b0], sverts[c0]])
|
||||
poly = [rnd(p) for p in poly]
|
||||
for k in range(1, len(poly)-1): # fan the clipped polygon into triangles
|
||||
tris.append((poly[0], poly[k], poly[k+1]))
|
||||
print(f"[Ch350] draw idx{draw_idx}: {len(sverts)} verts; full frame {FBPXW}x{full_h}; DETERMINISTIC crop "
|
||||
f"CX0={CX0} CY0={CY0} -> FB {FBPXW}x{CH} ({best_sum} covered px in band); clipped to {len(tris)} tris")
|
||||
|
||||
# --- feeder staging ---
|
||||
# NOTE: gs_persp_uv contract is uq=(u/w)*2^FRAC, q=(1/w)*2^FRAC, u=(uq*floor(2^SCALE/q))>>SCALE. Scaling
|
||||
# S/T/Q by K is INVARIANT (cancels) — confirmed it doesn't move the residual. The texel-accuracy limit is
|
||||
# the gs_reciprocal_stub 8-bit (256-entry) LUT: ~0.4% relative -> <=1 texel for Ch348's TW=64 but ~2+ texels
|
||||
# for this TW=512 texture (plus the S1-path under-interpolation banding). A perspective-precision limit.
|
||||
# Ch351 — EFFECTIVE PERSP_FRAC. The hardware u=s/q divide cancels the frac scale, so "widening PERSP_FRAC"
|
||||
# is a HOST PACKING choice: pack S/T/Q with more frac bits so the far-surface denominator q=(1/w)*2^FRACeff
|
||||
# doesn't round to 1-2 (FRAC=12 collapses for w~2048). PSCALE=2^k gives FRACeff = PERSP_FRAC + k. PSCALE=256
|
||||
# -> FRACeff=20, which took the SH3 crop 20%->80% (Codex's "Q×256 ≈ +8 frac bits"). Default PSCALE=1 keeps
|
||||
# Ch342/348 at FRAC=12 (canaries). The 24-bit signed S/T field bounds FRACeff for a given S/T range — checked.
|
||||
PSCALE = 4096
|
||||
S24_MAX = (1<<23) - 1
|
||||
max_sfp = [0]
|
||||
def vert_words(v):
|
||||
s_fp = round(v["s"] * TW * (1<<PERSP_FRAC) * PSCALE) # s_fp/q_fp = (S/Q)*TW = texel_u (perspective)
|
||||
t_fp = round(v["t"] * TH * (1<<PERSP_FRAC) * PSCALE)
|
||||
q_fp = round(v["q"] * (1<<PERSP_FRAC) * PSCALE)
|
||||
max_sfp[0] = max(max_sfp[0], abs(s_fp), abs(t_fp))
|
||||
if abs(s_fp) > S24_MAX or abs(t_fp) > S24_MAX: # 24-bit signed ST field overflow guard (Codex #3)
|
||||
sys.exit(f"[Ch351] OVERFLOW: |s_fp|={abs(s_fp)} or |t_fp|={abs(t_fp)} > 2^23-1 at PSCALE={PSCALE} "
|
||||
f"(FRACeff={PERSP_FRAC}+{PSCALE.bit_length()-1}). Lower PSCALE for this S/T range.")
|
||||
if abs(q_fp) > 0x7FFFFFFF:
|
||||
sys.exit(f"[Ch351] OVERFLOW: |q_fp|={abs(q_fp)} > 2^31-1 (Q field). Lower PSCALE.")
|
||||
sx = max(0, min(FBPXW-1, int(round(v["x"]))))
|
||||
sy = max(0, min(CH-1, int(round(v["y"]))))
|
||||
return [bake.rgbaq_with_q(0,0,0, q_fp & 0xFFFFFFFF),
|
||||
bake.st_data(s_fp & 0xFFFFFF, t_fp & 0xFFFFFF),
|
||||
bake.xyz2_dataz(sx, sy, 0x0000_5000)]
|
||||
stg = []
|
||||
stg.append(len(tris) | (1<<32)) # word0: ntris | perspective-format flag
|
||||
stg.append(bake.frame_1_psmct32(FBW))
|
||||
stg.append(bake.alpha_pack(0,1,0,1))
|
||||
stg.append(0) # TEST_1 = 0 (ZTE=0, ATE=0): no depth/alpha test
|
||||
stg.append(bake.zbuf1_pack(0, zmsk=1)) # ZMSK=1: no Z writes -> no Z buffer needed
|
||||
stg.append(tex0_real(NEW_TBP, CBP)) # PSMT8 + CSM1 CLUT (CLD=1) -> feeder commit fires the load
|
||||
stg.append(3 | (1<<4)) # TRI + TME, ABE=0 (S1 perspective path)
|
||||
for (v0,v1,v2) in tris:
|
||||
for v in (v0,v1,v2): stg += vert_words(v)
|
||||
if len(stg) > STG_WORDS: sys.exit(f"staging {len(stg)} > {STG_WORDS} (raise STG_WORDS)")
|
||||
print(f"[Ch350] feeder staging: {len(stg)} words (<= {STG_WORDS})")
|
||||
print(f"[Ch351] effective PERSP_FRAC = {PERSP_FRAC}+{PSCALE.bit_length()-1} = {PERSP_FRAC+PSCALE.bit_length()-1} "
|
||||
f"(PSCALE={PSCALE}); max |s_fp/t_fp|={max_sfp[0]} of 2^23-1 ({100.0*max_sfp[0]/((1<<23)-1):.1f}% of the 24-bit ST field)")
|
||||
|
||||
# --- host reference + per-pixel texel map. TWO references over the SAME clipped geometry:
|
||||
# refmap = FLOAT perspective (ideal) — the Codex pixel-diff oracle.
|
||||
# refmap_rec = RTL-FAITHFUL: fixed-point vertex attrs (uq=s*TW*2^FRAC, q=Q*2^FRAC), float interp, then the
|
||||
# 8-bit gs_reciprocal_stub. Comparing the RTL FB vs BOTH isolates reciprocal quantization
|
||||
# (RTL≈refmap_rec, refmap_rec≠refmap) from S1 under-interpolation banding (RTL≠refmap_rec).
|
||||
refmap = [0]*(FBPXW*CH); refpix = [(0,0,0)]*(FBPXW*CH)
|
||||
refmap_rec = [0]*(FBPXW*CH); refpix_rec = [(0,0,0)]*(FBPXW*CH)
|
||||
refmap_aff = [0]*(FBPXW*CH) # AFFINE: per-vertex texel, linear u,v interp (NOT perspective-correct)
|
||||
F = 1<<PERSP_FRAC
|
||||
for (v0,v1,v2) in tris:
|
||||
x0,y0=v0["x"],v0["y"]; x1,y1=v1["x"],v1["y"]; x2,y2=v2["x"],v2["y"]
|
||||
area = edge(x0,y0,x1,y1,x2,y2)
|
||||
if abs(area)<1e-9: continue
|
||||
inv = 1.0/area
|
||||
# per-vertex FIXED-POINT attributes (exactly what the feeder staging carries)
|
||||
uqv=[round(v["s"]*TW*F) for v in (v0,v1,v2)]
|
||||
vqv=[round(v["t"]*TH*F) for v in (v0,v1,v2)]
|
||||
qv =[round(v["q"]*F) for v in (v0,v1,v2)]
|
||||
# per-vertex TEXEL (perspective divide at the vertex) for the affine reference
|
||||
auv=[((v["s"]/v["q"])*TW if abs(v["q"])>1e-12 else 0.0) for v in (v0,v1,v2)]
|
||||
avv=[((v["t"]/v["q"])*TH if abs(v["q"])>1e-12 else 0.0) for v in (v0,v1,v2)]
|
||||
minx=max(0,int(min(x0,x1,x2))); maxx=min(FBPXW-1,int(max(x0,x1,x2))+1)
|
||||
miny=max(0,int(min(y0,y1,y2))); maxy=min(CH-1,int(max(y0,y1,y2))+1)
|
||||
for py in range(miny,maxy+1):
|
||||
for px in range(minx,maxx+1):
|
||||
cx,cy=px+0.5,py+0.5
|
||||
w0=edge(x1,y1,x2,y2,cx,cy)*inv; w1=edge(x2,y2,x0,y0,cx,cy)*inv; w2=1.0-w0-w1
|
||||
if w0<-0.001 or w1<-0.001 or w2<-0.001: continue
|
||||
# Ch351 convention fix: coverage/interior at pixel CENTER (px+0.5), but the RTL interpolates
|
||||
# the perspective ATTRIBUTES at the INTEGER pixel coord (tex_dx_s1 = s1_x_q - v0_x, no +0.5).
|
||||
# Use a CORNER barycentric for S/T/Q to match -> removes the ~0.5-texel drift.
|
||||
a0w=edge(x1,y1,x2,y2,float(px),float(py))*inv; a1w=edge(x2,y2,x0,y0,float(px),float(py))*inv; a2w=1.0-a0w-a1w
|
||||
S=a0w*v0["s"]+a1w*v1["s"]+a2w*v2["s"]; T=a0w*v0["t"]+a1w*v1["t"]+a2w*v2["t"]
|
||||
Q=a0w*v0["q"]+a1w*v1["q"]+a2w*v2["q"]
|
||||
if abs(Q)<1e-12: continue
|
||||
tu=int((S/Q)*TW) % TW; tv=int((T/Q)*TH) % TH
|
||||
if tu<0: tu+=TW
|
||||
if tv<0: tv+=TH
|
||||
# RTL-faithful: interp the FIXED-POINT attrs, then the 8-bit reciprocal
|
||||
uq=w0*uqv[0]+w1*uqv[1]+w2*uqv[2]; vq=w0*vqv[0]+w1*vqv[1]+w2*vqv[2]; qq=w0*qv[0]+w1*qv[1]+w2*qv[2]
|
||||
tur,tvr = persp_texel_recip(uq,vq,qq,TW,TH,idx_bits=8)
|
||||
# AFFINE texel: linear interp of the per-vertex texels (the under-interpolation hypothesis)
|
||||
au=int(w0*auv[0]+w1*auv[1]+w2*auv[2]) % TW; av=int(w0*avv[0]+w1*avv[1]+w2*avv[2]) % TH
|
||||
if au<0: au+=TW
|
||||
if av<0: av+=TH
|
||||
mw = min(w0,w1,w2)
|
||||
interior = 1 if mw > 0.04 else 0 # away from this triangle's own edges
|
||||
deep = 1 if mw > 0.15 else 0 # DEEP interior — far from any edge => seam-free zone
|
||||
o=py*FBPXW+px
|
||||
refmap[o] = (1<<31)|(interior<<30)|(deep<<29)|((tu&0x1FF)<<9)|(tv&0x1FF)
|
||||
refmap_rec[o] = (1<<31)|(interior<<30)|((tur&0x1FF)<<9)|(tvr&0x1FF)
|
||||
refmap_aff[o] = (1<<31)|(interior<<30)|((au&0x1FF)<<9)|(av&0x1FF)
|
||||
p=pal[idx[tv*TW+tu]&0xFF]; refpix[o]=(p&0xFF,(p>>8)&0xFF,(p>>16)&0xFF)
|
||||
pr=pal[idx[tvr*TW+tur]&0xFF];refpix_rec[o]=(pr&0xFF,(pr>>8)&0xFF,(pr>>16)&0xFF)
|
||||
|
||||
covered = sum(1 for w in refmap if w>>31)
|
||||
print(f"[Ch350] host reference: {covered} covered FB pixels")
|
||||
# emit the RTL-faithful refmap + PNG for the Ch351 oracle
|
||||
with open(os.path.join(DATA,"sh3_real_refmap_recip.mem"),"w") as f:
|
||||
f.write("// Ch351 RTL-faithful (8-bit reciprocal) per-pixel texel map. gitignored.\n")
|
||||
for x in refmap_rec: f.write(f"{x & 0xFFFFFFFF:08x}\n")
|
||||
with open(os.path.join(DATA,"sh3_real_refmap_affine.mem"),"w") as f:
|
||||
f.write("// Ch351 AFFINE (per-vertex texel, linear interp) per-pixel texel map. gitignored.\n")
|
||||
for x in refmap_aff: f.write(f"{x & 0xFFFFFFFF:08x}\n")
|
||||
try:
|
||||
from PIL import Image
|
||||
Image.new("RGB",(FBPXW,CH)).copy() # noop guard
|
||||
im2=Image.new("RGB",(FBPXW,CH)); im2.putdata(refpix_rec)
|
||||
im2.save(os.path.join(ROOT,"captures","gs","silenthill3","extracted","recon","sh3_real_ref_recip.png"))
|
||||
except Exception as e:
|
||||
print("(PIL skip recip png:", e, ")")
|
||||
|
||||
# --- setup bootlet: BOARD-READY CLUT upload (Ch347 pattern). The CSM1 loader reads the CLUT from VRAM at
|
||||
# CBP via GRID offsets; sh3_real_clut.mem IS the raw grid bytes, so a LINEAR 256x1 BITBLT of those 256 words
|
||||
# (PSMCT32_SWIZZLE=0) places byte CBP+k*4 = word k = the grid byte verbatim -> the loader reads it correctly.
|
||||
# The upload also fires dma_done_seen, which auto-starts the feeder (C_SETUP->C_RUN). On the board this is the
|
||||
# ONLY CLUT path (no TB backdoor); the SIM TB still backdoors the same bytes (redundant + consistent).
|
||||
clut_words_b = [int.from_bytes(clut_bytes[i*4:i*4+4],"little") for i in range(256)]
|
||||
RAM_QWORDS = 512
|
||||
pay = []
|
||||
pay.append(bake.giftag(1,0,0,4,int('E'*4,16)))
|
||||
pay.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(CBP, 1, 0x00))) # PSMCT32 dest @CBP
|
||||
pay.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0)))
|
||||
pay.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(256,1))) # 256x1 contiguous
|
||||
pay.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0)))
|
||||
pay.append(bake.giftag(256//4,1,2,0,0)) # 64 qwords image data, EOP
|
||||
for q in range(256//4):
|
||||
word=0
|
||||
for lane in range(4): word |= (clut_words_b[q*4+lane]&0xFFFFFFFF) << (32*lane)
|
||||
pay.append(word)
|
||||
qwc=len(pay)
|
||||
disp_hi=((CH-1)<<12)|(FBPXW-1)
|
||||
with open(os.path.join(DATA,"payload_sh3_real.mem"),"w") as f:
|
||||
f.write(f"// Ch352 LOCAL SH3 real-draw setup payload (CSM1 CLUT 256x1 -> CBP={CBP}, grid bytes verbatim). gitignored. QWC={qwc}.\n")
|
||||
for _ in range(16): f.write(f"{0:032x}\n")
|
||||
for x in pay: f.write(f"{x&((1<<128)-1):032x}\n")
|
||||
for _ in range(RAM_QWORDS-16-qwc): f.write(f"{0:032x}\n")
|
||||
bake.write_bios_mem("bios_sh3_real.mem",
|
||||
bake.build_textured_demo_bootlet_disp(qwc, disp_hi, FBW),
|
||||
f"Ch352 LOCAL SH3 real-draw setup bootlet (QWC={qwc}, DISPLAY1={FBPXW}x{CH}). gitignored.")
|
||||
print(f"[Ch352] setup bootlet: payload {qwc} qw (CSM1 CLUT 256x1 upload to CBP={CBP}).")
|
||||
|
||||
# --- emit ---
|
||||
def wmem(name, words, width_hex, banner):
|
||||
with open(os.path.join(DATA,name),"w") as f:
|
||||
f.write(f"// {banner}\n")
|
||||
for x in words: f.write(f"{x & ((1<<(4*width_hex))-1):0{width_hex}x}\n")
|
||||
# de-swizzled index image, packed 4 idx/word (LINEAR row-major: byte v*TW+u = idx(u,v))
|
||||
idx_words = [idx[i*4]|(idx[i*4+1]<<8)|(idx[i*4+2]<<16)|(idx[i*4+3]<<24) for i in range(TW*TH//4)]
|
||||
wmem("sh3_real_idx.mem", idx_words, 8, "Ch350 LOCAL SH3 512x512 de-swizzled indices (4/word) for TB ref. gitignored.")
|
||||
# LPDDR texture: the bram-top defaults PSMT8_SWIZZLE=0 (LINEAR read, like Ch347/348) — so store the texture
|
||||
# LINEAR (de-swizzled, = idx_words). The texture unit's linear PSMT8 addr (base + v*TBW*64 + u) then reads
|
||||
# texel(u,v)=idx(u,v). (The raw SWIZZLED bytes would need PSMT8_SWIZZLE=1; kept as sh3_real_tex_lpddr_swz.mem
|
||||
# for that variant.) This is the Ch299/Ch350 root-cause fix: linear texture <-> linear read.
|
||||
wmem("sh3_real_tex_lpddr.mem", idx_words, 8, "Ch350 LOCAL SH3 512x512 LINEAR de-swizzled indices -> LPDDR model (PSMT8_SWIZZLE=0). gitignored.")
|
||||
# Ch352 guardrail #2 — board-side READBACK CHECKSUM: after the HPS write-probe uploads these 65536 words to
|
||||
# LPDDR @0x200000, the HPS read-probe should read them back and confirm sum32 + xor32 BEFORE the cache fill.
|
||||
tex_sum = sum(idx_words) & 0xFFFFFFFF
|
||||
tex_xor = 0
|
||||
for w in idx_words: tex_xor ^= w
|
||||
print(f"[Ch352] TEXTURE→LPDDR upload checksum (verify via read-probe before cache-fill): "
|
||||
f"{len(idx_words)} words @ LPDDR 0x{LPDDR_TEX_BASE:07x} sum32=0x{tex_sum:08x} xor32=0x{tex_xor:08x}")
|
||||
tex_swz_words = [int.from_bytes(tex_swz[i*4:i*4+4],"little") for i in range(TEX_BYTES//4)]
|
||||
wmem("sh3_real_tex_lpddr_swz.mem", tex_swz_words, 8, "Ch350 LOCAL SH3 512x512 SWIZZLED bytes -> LPDDR (for PSMT8_SWIZZLE=1 variant). gitignored.")
|
||||
clut_words = [int.from_bytes(clut_bytes[i*4:i*4+4],"little") for i in range(256)]
|
||||
wmem("sh3_real_clut.mem", clut_words, 8, "Ch350 LOCAL SH3 CSM1 CLUT (raw CT32-grid bytes @CBP) -> BRAM (HW CSM1 loader reads these in grid order). gitignored.")
|
||||
# de-gridded palette pal[i] (what the HW CSM1 grid-read produces) -> TB reference expected colors
|
||||
wmem("sh3_real_pal.mem", [p & 0xFFFFFFFF for p in pal], 8,
|
||||
"Ch350 LOCAL SH3 de-gridded palette pal[i] (grid-read of the CBP bytes) for the TB reference. gitignored.")
|
||||
bake.write_feeder_stg_mem("feeder_sh3_real.mem", stg,
|
||||
"Ch350 LOCAL SH3 REAL draw (idx89761) feeder staging: triangle list + TEX0(PSMT8,CSM1,CLD=1,DECAL). gitignored.",
|
||||
total=STG_WORDS)
|
||||
wmem("sh3_real_refmap.mem", refmap, 8, "Ch350 LOCAL per-FB-pixel covered|interior|tu|tv reference map. gitignored.")
|
||||
# params include for the TB
|
||||
with open(os.path.join(DATA,"sh3_real_params.vh"),"w") as f:
|
||||
f.write("// Ch350 LOCAL generated params for tb_top_psmct32_sh3_real_draw_demo. gitignored.\n")
|
||||
f.write(f"localparam int FBW = {FBW};\n")
|
||||
f.write(f"localparam int FBPXW = {FBPXW};\n")
|
||||
f.write(f"localparam int FBH = {CH};\n")
|
||||
f.write(f"localparam int VRAM_BYTES_P = {VRAM_BYTES};\n")
|
||||
f.write(f"localparam int CROP_CX0 = {CX0};\n")
|
||||
f.write(f"localparam int CROP_CY0 = {CY0};\n")
|
||||
f.write(f"localparam int CLUT_CBP = {CBP};\n")
|
||||
f.write(f"localparam int NEW_TBP = {NEW_TBP};\n")
|
||||
f.write(f"localparam int TEX_VRAM_BASE= {TEX_VRAM_BASE};\n")
|
||||
f.write(f"localparam int TEX_BYTES = {TEX_BYTES};\n")
|
||||
f.write(f"localparam [29:0] LPDDR_TEX_BASE = 30'h{LPDDR_TEX_BASE:07x};\n")
|
||||
f.write(f"localparam int N_BEATS = {TEX_BYTES//32};\n")
|
||||
f.write(f"localparam int STG_WORDS = {STG_WORDS};\n")
|
||||
f.write(f"localparam int TW = {TW};\n")
|
||||
f.write(f"localparam int TH = {TH};\n")
|
||||
# eyeball PNG
|
||||
try:
|
||||
from PIL import Image
|
||||
im=Image.new("RGB",(FBPXW,CH)); im.putdata(refpix)
|
||||
im.save(os.path.join(ROOT,"captures","gs","silenthill3","extracted","recon","sh3_real_ref.png"))
|
||||
print("[Ch350] wrote sh3_real_ref.png")
|
||||
except Exception as e:
|
||||
print("(PIL skipped:", e, ")")
|
||||
print(f"[Ch350] emitted fixtures -> {DATA} (LOCAL). TEX_VRAM_BASE=0x{TEX_VRAM_BASE:x} TBP={NEW_TBP} CBP={CBP} "
|
||||
f"LPDDR_TEX_BASE=0x{LPDDR_TEX_BASE:x} N_BEATS={TEX_BYTES//32}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
||||
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Author a small CONTENT-CLEAN synthetic PCSX2 .gs dump for Ch340 byte-exact parser tests.
|
||||
|
||||
No copyrighted content — every byte is hand-authored here. Exercises the container header + each
|
||||
packet type + each GIF mode the parser must handle: PACKED A+D, PACKED PRE-prim, PACKED RGBAQ/XYZ2,
|
||||
an IMAGE upload, and a VSync frame boundary. Writes captures/gs/synthetic/mini.gs (whitelisted).
|
||||
"""
|
||||
import struct, os
|
||||
|
||||
OUT = os.path.join(os.path.dirname(__file__), "..", "captures", "gs", "synthetic", "mini.gs")
|
||||
|
||||
def giftag(nloop, eop, flg, nreg, regs, pre=0, prim=0):
|
||||
lo = (nloop & 0x7FFF) | ((eop & 1) << 15) | ((pre & 1) << 46) | ((prim & 0x7FF) << 47) \
|
||||
| ((flg & 3) << 58) | ((nreg & 0xF) << 60)
|
||||
return (lo | ((regs & ((1 << 64) - 1)) << 64)).to_bytes(16, "little")
|
||||
|
||||
def ad(addr, data): # PACKED A+D qword: data[63:0], addr[72:64]
|
||||
return ((addr & 0xFF) << 64 | (data & ((1 << 64) - 1))).to_bytes(16, "little")
|
||||
|
||||
def packed_rgbaq(r, g, b, a): # R[7:0] G[39:32] B[71:64] A[103:96]
|
||||
return (r | (g << 32) | (b << 64) | (a << 96)).to_bytes(16, "little")
|
||||
|
||||
def packed_xyz2(x, y, z, adc=0): # X[15:0] Y[47:32] Z[95:64] ADC[111]
|
||||
return ((x & 0xFFFF) | ((y & 0xFFFF) << 32) | ((z & 0xFFFFFFFF) << 64) | (adc << 111)).to_bytes(16, "little")
|
||||
|
||||
def transfer(path, gifdata):
|
||||
return bytes([0]) + bytes([path]) + struct.pack("<I", len(gifdata)) + gifdata
|
||||
|
||||
def vsync(field): return bytes([1, field])
|
||||
|
||||
# ---- GIF payloads ----
|
||||
# P1: one A+D write to FRAME_1 (0x4C) = 0x000000000C001807 (an arbitrary recognizable value).
|
||||
p1 = giftag(1, 0, 0, 1, 0xE) + ad(0x4C, 0x0000_0000_0C00_1807)
|
||||
# P2: a TRIANGLE (prim type 3, IIP/TME bits) via PRE, PACKED, nreg=2 (RGBAQ=1, XYZ2=5), nloop=3.
|
||||
prim_val = 3 | (1 << 4) | (1 << 6) # TRIANGLE + TME + ABE
|
||||
regs2 = 0x1 | (0x5 << 4) # desc0=RGBAQ, desc1=XYZ2
|
||||
verts = [(100, 50, 0x5000, 0xFF, 0x00, 0x00), (200, 50, 0x5000, 0x00, 0xFF, 0x00),
|
||||
(100, 150, 0x5000, 0x00, 0x00, 0xFF)]
|
||||
p2 = giftag(3, 1, 0, 2, regs2, pre=1, prim=prim_val)
|
||||
for (x, y, z, r, g, b) in verts:
|
||||
p2 += packed_rgbaq(r, g, b, 0x80) + packed_xyz2(x << 4, y << 4, z)
|
||||
# P3: an IMAGE upload, 2 qwords of dummy texture bytes.
|
||||
p3 = giftag(2, 1, 2, 0, 0) + bytes(32)
|
||||
|
||||
packets = transfer(3, p1) + transfer(3, p2) + transfer(3, p3) + vsync(0)
|
||||
|
||||
def build_container(serial, packets, state_size=16):
|
||||
state = bytes(state_size); regs_block = bytes(8192)
|
||||
hdr = struct.pack("<9I", 9, len(state), 36, len(serial), 0x12345678, 0, 0, 36 + len(serial), 0)
|
||||
header_block = hdr + serial
|
||||
return (struct.pack("<I", 0xFFFFFFFF) + struct.pack("<I", len(header_block)) + header_block
|
||||
+ state + regs_block + packets)
|
||||
|
||||
def write(path, data):
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
with open(path, "wb") as f: f.write(data)
|
||||
print(f"wrote {path} ({len(data)} bytes)")
|
||||
|
||||
# mini.gs — mixed fixture for the byte-exact PARSER test (P2's triangle is TME=1 = unsupported).
|
||||
write(OUT, build_container(b"SYNTH001", packets))
|
||||
|
||||
# mini_supported.gs — two NON-textured opaque triangles for the TRANSLATOR test (TME=0, ABE=0, flat).
|
||||
OUT2 = os.path.join(os.path.dirname(__file__), "..", "captures", "gs", "synthetic", "mini_supported.gs")
|
||||
def flat_tri(prim_val, tris):
|
||||
out = b""
|
||||
for (vs, rgb) in tris: # vs = 3x(x,y,z) ; flat color rgb
|
||||
out += giftag(3, 1, 0, 2, 0x1 | (0x5 << 4), pre=1, prim=prim_val)
|
||||
for (x, y, z) in vs:
|
||||
out += packed_rgbaq(rgb[0], rgb[1], rgb[2], 0x80) + packed_xyz2(x << 4, y << 4, z)
|
||||
return out
|
||||
sup = transfer(3, giftag(1, 0, 0, 1, 0xE) + ad(0x18, 0x0000_0000_0000_0000)) # XYOFFSET_1 = 0
|
||||
sup += transfer(3, flat_tri(3, [ # PRIM=TRIANGLE only (TME=0,ABE=0)
|
||||
([(10, 10, 0x6000), (50, 10, 0x6000), (10, 50, 0x6000)], (0xFF, 0x00, 0x00)), # red
|
||||
([(50, 50, 0x5000), (14, 50, 0x5000), (50, 14, 0x5000)], (0x00, 0x00, 0xFF)), # blue
|
||||
]))
|
||||
sup += vsync(0)
|
||||
write(OUT2, build_container(b"SYNTHSUP", sup))
|
||||
|
||||
# mini_st.gs — Ch342 regression: a PACKED ST write. The GS routes the ST qword's Q lane (bits [95:64])
|
||||
# to RGBAQ.Q; the parser must expose it as info["q_stq"]. Guards the easy-to-reintroduce drop-Q bug.
|
||||
OUT3 = os.path.join(os.path.dirname(__file__), "..", "captures", "gs", "synthetic", "mini_st.gs")
|
||||
def packed_st(s, t, q): # PACKED ST: S[31:0], T[63:32], Q[95:64]
|
||||
return (s | (t << 32) | (q << 64)).to_bytes(16, "little")
|
||||
p_st = giftag(1, 1, 0, 1, 0x2) + packed_st(0x11111111, 0x22222222, 0x33333333) # descriptor 0x2 = ST
|
||||
write(OUT3, build_container(b"SYNTHST0", transfer(3, p_st) + vsync(0)))
|
||||
@@ -0,0 +1,249 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch340 GS-dump parser + GIF/GS decoder (Bricks 1-2).
|
||||
|
||||
Deterministically decodes a PCSX2 .gs/.gs.xz/.gs.zst dump into a NORMALIZED, versioned event stream
|
||||
(container parse -> GIF tag walk -> GS register-write / IMAGE / transfer events). No hidden
|
||||
approximation: anything not understood is emitted as an explicit event (MALFORMED / unknown reg /
|
||||
IMAGE-not-inlined), never silently dropped or guessed.
|
||||
|
||||
Container format: see memory/reference_pcsx2_gsdump_format.md (pinned from PCSX2 source + validated
|
||||
byte-exact against captures/gs/cubes/cubes_frame.gs.zst).
|
||||
|
||||
This module is the parser/decoder ONLY. Census/histograms (Brick 3) and the ps2_feeder-scene
|
||||
translator (Brick 4) consume `parse_dump()`'s event stream. Raw IMAGE/transfer PAYLOADS are never
|
||||
inlined into committable output — only structural facts (sizes, formats, offsets).
|
||||
|
||||
Usage:
|
||||
gs_parse.py <dump.gs[.xz|.zst]> [--summary] [--events N] [--json events.jsonl]
|
||||
"""
|
||||
import sys, struct, lzma, subprocess, shutil, json
|
||||
from dataclasses import dataclass, field, asdict
|
||||
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
# ---- GS register address -> name (A+D + decoded-PACKED targets) ----
|
||||
GS_REG = {
|
||||
0x00:"PRIM",0x01:"RGBAQ",0x02:"ST",0x03:"UV",0x04:"XYZF2",0x05:"XYZ2",0x06:"TEX0_1",0x07:"TEX0_2",
|
||||
0x08:"CLAMP_1",0x09:"CLAMP_2",0x0A:"FOG",0x0C:"XYZF3",0x0D:"XYZ3",0x14:"TEX1_1",0x15:"TEX1_2",
|
||||
0x16:"TEX2_1",0x17:"TEX2_2",0x18:"XYOFFSET_1",0x19:"XYOFFSET_2",0x1A:"PRMODECONT",0x1B:"PRMODE",
|
||||
0x1C:"TEXCLUT",0x22:"SCANMSK",0x34:"MIPTBP1_1",0x35:"MIPTBP1_2",0x36:"MIPTBP2_1",0x37:"MIPTBP2_2",
|
||||
0x3B:"TEXA",0x3D:"FOGCOL",0x3F:"TEXFLUSH",0x40:"SCISSOR_1",0x41:"SCISSOR_2",0x42:"ALPHA_1",
|
||||
0x43:"ALPHA_2",0x44:"DIMX",0x45:"DTHE",0x46:"COLCLAMP",0x47:"TEST_1",0x48:"TEST_2",0x49:"PABE",
|
||||
0x4A:"FBA_1",0x4B:"FBA_2",0x4C:"FRAME_1",0x4D:"FRAME_2",0x4E:"ZBUF_1",0x4F:"ZBUF_2",0x50:"BITBLTBUF",
|
||||
0x51:"TRXPOS",0x52:"TRXREG",0x53:"TRXDIR",0x54:"HWREG",0x60:"SIGNAL",0x61:"FINISH",0x62:"LABEL",
|
||||
}
|
||||
# PACKED descriptor (REGS nibble) -> ("how to decode", target GS reg addr)
|
||||
PACKED_PRIM,PACKED_RGBAQ,PACKED_ST,PACKED_UV = 0x0,0x1,0x2,0x3
|
||||
PACKED_XYZF2,PACKED_XYZ2,PACKED_TEX0_1,PACKED_TEX0_2 = 0x4,0x5,0x6,0x7
|
||||
PACKED_CLAMP1,PACKED_CLAMP2,PACKED_FOG = 0x8,0x9,0xA
|
||||
PACKED_XYZF3,PACKED_XYZ3,PACKED_AD,PACKED_NOP = 0xC,0xD,0xE,0xF
|
||||
GST = {0:"Transfer",1:"VSync",2:"ReadFIFO2",3:"Registers"}
|
||||
GSPATH = {0:"Path1Old",1:"Path2",2:"Path3",3:"Path1New",4:"Dummy"}
|
||||
|
||||
@dataclass
|
||||
class Event:
|
||||
kind: str # GSREG | IMAGE | GIFTAG | FRAME_BOUNDARY | READFIFO | TRANSFER | MALFORMED
|
||||
frame: int
|
||||
idx: int
|
||||
byte_off: int # offset in the DECOMPRESSED .gs of the source byte
|
||||
reg: str = "" # for GSREG
|
||||
addr: int = -1
|
||||
value: int = 0
|
||||
info: dict = field(default_factory=dict)
|
||||
|
||||
# ---------------------------------------------------------------- decompression
|
||||
def read_dump_bytes(path):
|
||||
if path.endswith(".gs.xz") or path.endswith(".xz"):
|
||||
return lzma.open(path, "rb").read()
|
||||
if path.endswith(".gs.zst") or path.endswith(".zst"):
|
||||
if shutil.which("zstd") is None:
|
||||
sys.exit("error: .zst dump but `zstd` not found on PATH")
|
||||
return subprocess.run(["zstd","-d","-c",path], capture_output=True, check=True).stdout
|
||||
return open(path,"rb").read()
|
||||
|
||||
# ---------------------------------------------------------------- container parse
|
||||
@dataclass
|
||||
class Header:
|
||||
state_version:int; state_size:int; serial_offset:int; serial_size:int; crc:int
|
||||
ss_w:int; ss_h:int; ss_off:int; ss_size:int; header_size:int; serial:str; packet_start:int
|
||||
|
||||
def parse_header(d):
|
||||
if len(d) < 12 or struct.unpack_from("<I",d,0)[0] != 0xFFFFFFFF:
|
||||
raise ValueError("not a new-format .gs (missing 0xFFFFFFFF marker)")
|
||||
header_size = struct.unpack_from("<I",d,4)[0]
|
||||
f = struct.unpack_from("<9I", d, 8)
|
||||
h = Header(*f, header_size=header_size, serial="", packet_start=0)
|
||||
if header_size < 36:
|
||||
raise ValueError(f"header_size {header_size} < 36")
|
||||
s0 = 8 + h.serial_offset
|
||||
h.serial = d[s0:s0+h.serial_size].decode("latin1","replace")
|
||||
h.packet_start = 8 + header_size + h.state_size + 8192
|
||||
if h.packet_start > len(d):
|
||||
raise ValueError(f"packet_start 0x{h.packet_start:x} past EOF 0x{len(d):x}")
|
||||
return h
|
||||
|
||||
# ---------------------------------------------------------------- GIF tag decode
|
||||
def _bits(q, lo, n): # extract n bits at lo from a 128-bit int
|
||||
return (q >> lo) & ((1 << n) - 1)
|
||||
|
||||
def decode_packed_reg(desc, q):
|
||||
"""Return (addr, value_64, note) for one PACKED register qword, or (None,0,note) to skip."""
|
||||
if desc == PACKED_AD:
|
||||
return _bits(q,64,8), q & 0xFFFFFFFFFFFFFFFF, ""
|
||||
if desc == PACKED_NOP:
|
||||
return None, 0, "nop"
|
||||
if desc == PACKED_PRIM:
|
||||
return 0x00, _bits(q,0,11), ""
|
||||
if desc == PACKED_RGBAQ:
|
||||
r=_bits(q,0,8); g=_bits(q,32,8); b=_bits(q,64,8); a=_bits(q,96,8); Q=_bits(q,96,32) if False else _bits(q,96,32)
|
||||
# NOTE: Q float lives in [127:96]; we keep R/G/B/A (the 8-bit color the renderer uses).
|
||||
Qf=_bits(q,96,32) # not used for color
|
||||
return 0x01, (r | (g<<8) | (b<<16) | (a<<24)), ""
|
||||
if desc == PACKED_ST:
|
||||
return 0x02, (_bits(q,0,32) | (_bits(q,32,32)<<32)), "" # S,T floats (Q -> RGBAQ.Q)
|
||||
if desc == PACKED_UV:
|
||||
return 0x03, (_bits(q,0,14) | (_bits(q,16,14)<<14)), ""
|
||||
if desc == PACKED_XYZ2:
|
||||
return 0x05, (_bits(q,0,16) | (_bits(q,32,16)<<16) | (_bits(q,64,32)<<32)), ("adc" if _bits(q,111,1) else "")
|
||||
if desc == PACKED_XYZ3:
|
||||
return 0x0D, (_bits(q,0,16) | (_bits(q,32,16)<<16) | (_bits(q,64,32)<<32)), ""
|
||||
if desc == PACKED_XYZF2:
|
||||
return 0x04, (_bits(q,0,16) | (_bits(q,32,16)<<16) | (_bits(q,64,24)<<32) | (_bits(q,100,8)<<56)), ("adc" if _bits(q,111,1) else "")
|
||||
if desc == PACKED_XYZF3:
|
||||
return 0x0C, (_bits(q,0,16) | (_bits(q,32,16)<<16) | (_bits(q,64,24)<<32) | (_bits(q,100,8)<<56)), ""
|
||||
if desc in (PACKED_TEX0_1,PACKED_TEX0_2,PACKED_CLAMP1,PACKED_CLAMP2):
|
||||
addr = {PACKED_TEX0_1:0x06,PACKED_TEX0_2:0x07,PACKED_CLAMP1:0x08,PACKED_CLAMP2:0x09}[desc]
|
||||
return addr, q & 0xFFFFFFFFFFFFFFFF, ""
|
||||
if desc == PACKED_FOG:
|
||||
return 0x0A, (_bits(q,100,8) << 56), ""
|
||||
return None, 0, f"packed_desc_0x{desc:x}_unhandled"
|
||||
|
||||
def walk_gif(data, base_off, frame, emit):
|
||||
"""Walk the GIF tag chain in `data` (a Transfer payload). emit(Event)."""
|
||||
off = 0; n = len(data)
|
||||
while off + 16 <= n:
|
||||
q = int.from_bytes(data[off:off+16], "little")
|
||||
nloop=_bits(q,0,15); eop=_bits(q,15,1); pre=_bits(q,46,1); prim=_bits(q,47,11)
|
||||
flg=_bits(q,58,2); nreg=_bits(q,60,4); regs=_bits(q,64,64)
|
||||
nregs = nreg if nreg != 0 else 16
|
||||
emit(Event("GIFTAG",frame,0,base_off+off,info=dict(nloop=nloop,eop=eop,pre=pre,prim=prim,flg=flg,nreg=nregs)))
|
||||
off += 16
|
||||
if pre:
|
||||
emit(Event("GSREG",frame,0,base_off+off,reg="PRIM",addr=0x00,value=prim,info=dict(via="PRE")))
|
||||
if flg == 0: # PACKED: nloop * nregs qwords
|
||||
descs = [(regs >> (4*k)) & 0xF for k in range(nregs)]
|
||||
need = nloop*nregs*16
|
||||
for _ in range(nloop):
|
||||
for d in descs:
|
||||
if off+16 > n: break
|
||||
qq = int.from_bytes(data[off:off+16],"little")
|
||||
addr,val,note = decode_packed_reg(d, qq)
|
||||
if addr is not None:
|
||||
inf = {"note":note} if note else {}
|
||||
# Ch342 audit: PACKED ST also carries Q in lane2 [95:64] -> routed to RGBAQ.Q by
|
||||
# the GS (the STQ mechanism). PACKED RGBAQ carries NO Q; Q comes from ST. Expose
|
||||
# it so consumers reconstruct RGBAQ.Q consistently across PACKED/REGLIST/A+D.
|
||||
if addr == 0x02: inf["q_stq"] = _bits(qq, 64, 32)
|
||||
emit(Event("GSREG",frame,0,base_off+off,reg=GS_REG.get(addr,f"UNKNOWN_0x{addr:02x}"),
|
||||
addr=addr,value=val,info=inf))
|
||||
elif note and note!="nop":
|
||||
emit(Event("MALFORMED",frame,0,base_off+off,info=dict(reason=note)))
|
||||
off += 16
|
||||
off = (base_off+0) and off # keep off as is
|
||||
# if data ran short, account for it
|
||||
if need > n - (off): pass
|
||||
elif flg == 1: # REGLIST: nloop * nregs registers, 2 per qword (64-bit each), A+D-less
|
||||
descs = [(regs >> (4*k)) & 0xF for k in range(nregs)]
|
||||
total = nloop*nregs
|
||||
half = 0
|
||||
cur = 0
|
||||
for i in range(total):
|
||||
if half == 0:
|
||||
if off+16 > n: break
|
||||
qq = int.from_bytes(data[off:off+16],"little"); val = qq & 0xFFFFFFFFFFFFFFFF; cur=qq
|
||||
half = 1
|
||||
else:
|
||||
val = (cur >> 64) & 0xFFFFFFFFFFFFFFFF; half = 0; off += 16
|
||||
d = descs[i % nregs]
|
||||
if d == PACKED_NOP: continue
|
||||
addr = d if d in GS_REG else d
|
||||
emit(Event("GSREG",frame,0,base_off+off,reg=GS_REG.get(d,f"UNKNOWN_0x{d:02x}"),addr=d,value=val,
|
||||
info=dict(via="REGLIST")))
|
||||
if half == 1: off += 16
|
||||
elif flg == 2: # IMAGE: nloop qwords of raw data (texture / FB upload) — NOT inlined
|
||||
qbytes = nloop*16
|
||||
emit(Event("IMAGE",frame,0,base_off+off,info=dict(qwc=nloop,bytes=qbytes)))
|
||||
off += qbytes
|
||||
else: # flg == 3 disabled
|
||||
emit(Event("GIFTAG",frame,0,base_off+off,info=dict(flg=3,note="disabled")))
|
||||
if off > n:
|
||||
emit(Event("MALFORMED",frame,0,base_off+off,info=dict(reason="gif_payload_overrun")))
|
||||
break
|
||||
|
||||
# ---------------------------------------------------------------- packet stream
|
||||
def parse_dump(path):
|
||||
d = read_dump_bytes(path)
|
||||
h = parse_header(d)
|
||||
events = []; frame = 0
|
||||
def emit(ev):
|
||||
ev.idx = len(events); events.append(ev)
|
||||
off = h.packet_start
|
||||
while off < len(d):
|
||||
tid = d[off]; pkt_off = off; off += 1
|
||||
if tid == 0: # Transfer
|
||||
if off+5 > len(d): emit(Event("MALFORMED",frame,0,pkt_off,info=dict(reason="trunc_transfer_hdr"))); break
|
||||
path_id = d[off]; length = struct.unpack_from("<I",d,off+1)[0]; off += 5
|
||||
if off+length > len(d): emit(Event("MALFORMED",frame,0,pkt_off,info=dict(reason="trunc_transfer_data",len=length))); break
|
||||
emit(Event("TRANSFER",frame,0,pkt_off,info=dict(path=GSPATH.get(path_id,path_id),length=length)))
|
||||
walk_gif(d[off:off+length], off, frame, emit)
|
||||
off += length
|
||||
elif tid == 1: # VSync (frame boundary)
|
||||
if off >= len(d): break
|
||||
emit(Event("FRAME_BOUNDARY",frame,0,pkt_off,info=dict(field=d[off]))); off += 1; frame += 1
|
||||
elif tid == 2: # ReadFIFO2
|
||||
if off+4 > len(d): break
|
||||
emit(Event("READFIFO",frame,0,pkt_off,info=dict(qwc=struct.unpack_from("<I",d,off)[0]))); off += 4
|
||||
elif tid == 3: # Registers snapshot
|
||||
emit(Event("TRANSFER",frame,0,pkt_off,info=dict(regs_snapshot=8192))); off += 8192
|
||||
else:
|
||||
emit(Event("MALFORMED",frame,0,pkt_off,info=dict(reason=f"bad_packet_id_{tid}"))); break
|
||||
return h, events
|
||||
|
||||
# ---------------------------------------------------------------- CLI / summary
|
||||
def main(argv):
|
||||
if len(argv) < 2:
|
||||
print(__doc__); return 2
|
||||
path = argv[1]
|
||||
h, events = parse_dump(path)
|
||||
print(f"schema v{SCHEMA_VERSION} serial={h.serial!r} crc=0x{h.crc:08x} ss={h.ss_w}x{h.ss_h} "
|
||||
f"state=0x{h.state_size:x} packets@0x{h.packet_start:x}")
|
||||
# histograms
|
||||
kinds={}; regs={}; prims={}; flgs={}; frames=0; images=0; image_bytes=0; malformed=0
|
||||
PRIMT={0:"POINT",1:"LINE",2:"LINE_STRIP",3:"TRIANGLE",4:"TRI_STRIP",5:"TRI_FAN",6:"SPRITE",7:"INVALID"}
|
||||
for e in events:
|
||||
kinds[e.kind]=kinds.get(e.kind,0)+1
|
||||
if e.kind=="GSREG": regs[e.reg]=regs.get(e.reg,0)+1
|
||||
if e.kind=="FRAME_BOUNDARY": frames+=1
|
||||
if e.kind=="MALFORMED": malformed+=1
|
||||
if e.kind=="IMAGE": images+=1; image_bytes+=e.info.get("bytes",0)
|
||||
if e.kind=="GIFTAG":
|
||||
fl=e.info.get("flg"); flgs[fl]=flgs.get(fl,0)+1
|
||||
if e.info.get("pre"): prims[PRIMT.get(e.info.get("prim",0)&7,"?")]=prims.get(PRIMT.get(e.info.get("prim",0)&7,"?"),0)+1
|
||||
print(f"events={len(events)} frames={frames} images={images} image_bytes={image_bytes} malformed={malformed}")
|
||||
print("event kinds:", dict(sorted(kinds.items(),key=lambda x:-x[1])))
|
||||
print("GIF flg :", {('PACKED' if k==0 else 'REGLIST' if k==1 else 'IMAGE' if k==2 else 'DISABLE'):v for k,v in sorted(flgs.items())})
|
||||
print("PRIM types (via PRE):", dict(sorted(prims.items(),key=lambda x:-x[1])))
|
||||
print("top GS regs:", dict(sorted(regs.items(),key=lambda x:-x[1])[:18]))
|
||||
if "--events" in argv:
|
||||
n=int(argv[argv.index("--events")+1])
|
||||
for e in events[:n]:
|
||||
print(f" f{e.frame} #{e.idx} @0x{e.byte_off:x} {e.kind} {e.reg} {('0x%x'%e.value) if e.kind=='GSREG' else ''} {e.info}")
|
||||
if "--json" in argv:
|
||||
outp=argv[argv.index("--json")+1]
|
||||
with open(outp,"w") as f:
|
||||
for e in events: f.write(json.dumps(asdict(e))+"\n")
|
||||
print(f"wrote {len(events)} events -> {outp}")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,226 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch349 step 1: census of ACTUAL drawn textured geometry in a GS dump.
|
||||
|
||||
Ch347/348 proved authentic ASSETS (SH3 PSMT8 tex + real CLUT) through CHOSEN geometry. The remaining
|
||||
authenticity gap (Codex Ch349): reconstruct an ACTUAL commercial draw faithfully — the texture as the real
|
||||
draw samples it (streamed in one format, sampled in another) on the real triangle's ST/Q + screen geometry.
|
||||
|
||||
This tool is step 1: walk the GS register stream, reconstruct every textured drawing primitive with its full
|
||||
per-vertex state (screen XY from XYZF2/XYZ2 12.4 fixed, S/T/Q from ST+RGBAQ.Q, RGBA), group consecutive
|
||||
primitives that share TEX0+PRIM state into DRAWS, and rank them so a single real environment draw can be
|
||||
PICKED for reconstruction. Pure stdlib; reuses gs_parse for the GIF/register walk and gs_texture_residency
|
||||
for the VRAM snapshot + CLUT/texture residency verdict.
|
||||
|
||||
A good Ch349 candidate is: TME=1, texture RESIDENT in the VRAM snapshot, a non-trivial on-screen footprint
|
||||
(a real surface, not a 2px HUD glyph), indexed or CT texture with a known PSM, and enough triangles to be a
|
||||
genuine mapped surface. The census REPORTS; it does not pick for you — the ranked head is the shortlist.
|
||||
|
||||
Usage: gs_sh3_draw_census.py <dump.gs.zst> [--top N] [--frame F] [--json out.json] [--min-prims K]
|
||||
"""
|
||||
import sys, os, json, struct
|
||||
from collections import defaultdict
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import gs_parse
|
||||
import gs_texture_residency as R
|
||||
|
||||
def f32(bits):
|
||||
return struct.unpack("<f", struct.pack("<I", bits & 0xFFFFFFFF))[0]
|
||||
|
||||
PRIMT = {0:"POINT",1:"LINE",2:"LINE_STRIP",3:"TRIANGLE",4:"TRI_STRIP",5:"TRI_FAN",6:"SPRITE",7:"INVALID"}
|
||||
VERTS_PER = {0:1,1:2,2:2,3:3,4:3,5:3,6:2,7:0} # min verts to kick a primitive
|
||||
|
||||
def census(dump, frame_filter=None, min_prims=1, collected=None):
|
||||
d, h, events, uploads, runs, vram = collected if collected is not None else R.collect(dump, 0)
|
||||
# live GS state we latch as we walk
|
||||
prim = dict(type=7, tme=0, fst=0, abe=0, ctxt=0)
|
||||
tex0 = {1:None, 2:None}
|
||||
ofx = {1:0.0, 2:0.0}; ofy = {1:0.0, 2:0.0}
|
||||
cur_st = (0.0, 0.0, 1.0) # S, T, Q
|
||||
cur_rgba = 0
|
||||
vqueue = [] # vertices latched since last primitive (for fan/strip we only need a window)
|
||||
draws = []
|
||||
cur = None
|
||||
|
||||
def newkey():
|
||||
t0 = tex0[1 if prim["ctxt"]==0 else 2]
|
||||
if t0 is None: return None
|
||||
return (t0["tbp"], t0["psm"], t0["tbw"], prim["type"], prim["tme"], prim["abe"])
|
||||
|
||||
def close():
|
||||
nonlocal cur
|
||||
if cur and cur["nprim"] >= 1:
|
||||
draws.append(cur)
|
||||
cur = None
|
||||
|
||||
def open_draw():
|
||||
nonlocal cur
|
||||
t0 = tex0[1 if prim["ctxt"]==0 else 2]
|
||||
cur = dict(key=newkey(), tex0=dict(t0), prim=dict(prim), frame=cur_frame,
|
||||
first_idx=cur_idx, nprim=0, nvert=0,
|
||||
xmin=1e30, xmax=-1e30, ymin=1e30, ymax=-1e30,
|
||||
smin=1e30, smax=-1e30, tmin=1e30, tmax=-1e30,
|
||||
qmin=1e30, qmax=-1e30, verts=[])
|
||||
|
||||
cur_frame = 0; cur_idx = 0
|
||||
for e in events:
|
||||
if e.kind == "FRAME_BOUNDARY":
|
||||
cur_frame = e.frame + 1
|
||||
continue
|
||||
if e.kind != "GSREG":
|
||||
continue
|
||||
cur_frame = e.frame; cur_idx = e.idx
|
||||
r, v = e.reg, e.value
|
||||
if r == "PRIM":
|
||||
close()
|
||||
prim = dict(type=v&7, tme=(v>>4)&1, fst=(v>>8)&1, abe=(v>>6)&1, ctxt=(v>>9)&1)
|
||||
vqueue = []
|
||||
elif r == "PRMODE": # PRIM-less prim mode (rare); ignore topology change w/o reset
|
||||
pass
|
||||
elif r == "TEX0_1": tex0[1] = R.dec_tex0(v)
|
||||
elif r == "TEX0_2": tex0[2] = R.dec_tex0(v)
|
||||
elif r == "XYOFFSET_1": ofx[1] = (v & 0xFFFF)/16.0; ofy[1] = ((v>>32)&0xFFFF)/16.0
|
||||
elif r == "XYOFFSET_2": ofx[2] = (v & 0xFFFF)/16.0; ofy[2] = ((v>>32)&0xFFFF)/16.0
|
||||
elif r == "RGBAQ": cur_rgba = v & 0xFFFFFFFF
|
||||
elif r == "ST":
|
||||
s = f32(v & 0xFFFFFFFF); t = f32((v>>32)&0xFFFFFFFF)
|
||||
q = f32(e.info.get("q_stq", 0x3F800000)) # Q rides in ST lane2; default 1.0
|
||||
cur_st = (s, t, q)
|
||||
elif r in ("XYZF2","XYZ2","XYZF3","XYZ3"):
|
||||
# drawing kick. XY are 12.4 fixed relative to XYOFFSET.
|
||||
xf = (v & 0xFFFF); yf = (v>>16) & 0xFFFF
|
||||
ci = 1 if prim["ctxt"]==0 else 2
|
||||
x = xf/16.0 - ofx[ci]; y = yf/16.0 - ofy[ci]
|
||||
if r in ("XYZF2","XYZF3"):
|
||||
z = (v>>32) & 0xFFFFFF
|
||||
else:
|
||||
z = (v>>32) & 0xFFFFFFFF
|
||||
if not prim["tme"]: # only textured draws are Ch349 candidates
|
||||
continue
|
||||
if newkey() is None:
|
||||
continue
|
||||
if cur is None or cur["key"] != newkey():
|
||||
close(); open_draw()
|
||||
vtx = dict(x=x, y=y, z=z, s=cur_st[0], t=cur_st[1], q=cur_st[2], rgba=cur_rgba)
|
||||
cur["verts"].append(vtx); cur["nvert"] += 1
|
||||
cur["nprim"] += 1 # each kick completes one primitive in fan/strip/list
|
||||
cur["xmin"]=min(cur["xmin"],x); cur["xmax"]=max(cur["xmax"],x)
|
||||
cur["ymin"]=min(cur["ymin"],y); cur["ymax"]=max(cur["ymax"],y)
|
||||
for (a,lo,hi) in (("s","smin","smax"),("t","tmin","tmax"),("q","qmin","qmax")):
|
||||
pass
|
||||
cur["smin"]=min(cur["smin"],cur_st[0]); cur["smax"]=max(cur["smax"],cur_st[0])
|
||||
cur["tmin"]=min(cur["tmin"],cur_st[1]); cur["tmax"]=max(cur["tmax"],cur_st[1])
|
||||
cur["qmin"]=min(cur["qmin"],cur_st[2]); cur["qmax"]=max(cur["qmax"],cur_st[2])
|
||||
close()
|
||||
|
||||
# attach residency verdict + derived UV/on-screen metrics + score; filter
|
||||
SW, SH = 640.0, 480.0 # SH3 internal render res (matches header ss=640x480)
|
||||
out = []
|
||||
for dr in draws:
|
||||
if dr["nprim"] < min_prims: continue
|
||||
if frame_filter is not None and dr["frame"] != frame_filter: continue
|
||||
t0 = dr["tex0"]
|
||||
snap = R.snapshot_present(vram, t0["tbp"], nb=512)
|
||||
clut = None
|
||||
if t0["psm"] in R.INDEXED_PSMS:
|
||||
csnap = R.snapshot_present(vram, t0["cbp"], nb=1024, min_nz=64)
|
||||
clut = dict(cbp=t0["cbp"], cpsm=t0["cpsm"], cld=t0["cld"], resident=bool(csnap),
|
||||
distinct=(csnap["distinct"] if csnap else None))
|
||||
dr["tex_resident"] = bool(snap)
|
||||
dr["tex_snap"] = snap
|
||||
dr["clut"] = clut
|
||||
dr["w_px"] = dr["xmax"]-dr["xmin"]; dr["h_px"] = dr["ymax"]-dr["ymin"]
|
||||
# per-vertex perspective UV (u_norm=S/Q) -> texel bbox; on-screen vertex fraction
|
||||
umin=vmin=1e30; umax=vmax=-1e30; onscreen=0
|
||||
for vtx in dr["verts"]:
|
||||
if 0.0 <= vtx["x"] <= SW and 0.0 <= vtx["y"] <= SH: onscreen += 1
|
||||
q = vtx["q"]
|
||||
if abs(q) < 1e-9: continue
|
||||
un = vtx["s"]/q; vn = vtx["t"]/q
|
||||
umin=min(umin,un); umax=max(umax,un); vmin=min(vmin,vn); vmax=max(vmax,vn)
|
||||
dr["onscreen_frac"] = onscreen/max(1,dr["nvert"])
|
||||
if umax >= umin:
|
||||
dr["u_texmin"]=umin*t0["tw"]; dr["u_texmax"]=umax*t0["tw"]
|
||||
dr["v_texmin"]=vmin*t0["th"]; dr["v_texmax"]=vmax*t0["th"]
|
||||
else:
|
||||
dr["u_texmin"]=dr["u_texmax"]=dr["v_texmin"]=dr["v_texmax"]=0.0
|
||||
# on-screen clipped bbox area
|
||||
cx0=max(0.0,dr["xmin"]); cx1=min(SW,dr["xmax"]); cy0=max(0.0,dr["ymin"]); cy1=min(SH,dr["ymax"])
|
||||
dr["onscreen_area"]=max(0.0,cx1-cx0)*max(0.0,cy1-cy0)
|
||||
dr["score"] = _score(dr)
|
||||
out.append(dr)
|
||||
out.sort(key=lambda x: x["score"], reverse=True)
|
||||
return out, h, vram
|
||||
|
||||
def _score(dr):
|
||||
"""'Good Ch349 candidate' = resident textured surface, MOSTLY ON-SCREEN, sampling a real texel
|
||||
rectangle in perspective. Reward on-screen containment + sampled-texel span, NOT guard-band area."""
|
||||
t0 = dr["tex0"]
|
||||
s = 0.0
|
||||
if not dr["tex_resident"]: return -1.0 # must be reconstructable
|
||||
if t0["psm"] in R.INDEXED_PSMS:
|
||||
if dr["clut"] and dr["clut"]["resident"]: s += 40.0 # indexed + resident CLUT == the real SH3 path
|
||||
else: return -1.0
|
||||
elif t0["psm"] in (0x00,0x02,0x0A,0x01): # CT32/CT16/CT16S/CT24 — directly decodable
|
||||
s += 18.0
|
||||
else:
|
||||
return -1.0 # unsupported PSM for host decode
|
||||
# ON-SCREEN containment is the dominant term (we want a draw we can actually show + check)
|
||||
s += 100.0 * dr["onscreen_frac"]
|
||||
s += min(dr["onscreen_area"]/200.0, 120.0) # on-screen area only (guard band excluded)
|
||||
# sampled texel rectangle must be a real chunk, not a single degenerate texel
|
||||
du = abs(dr["u_texmax"]-dr["u_texmin"]); dv = abs(dr["v_texmax"]-dr["v_texmin"])
|
||||
s += min(du, 64.0) + min(dv, 64.0)
|
||||
if du < 1.0 and dv < 1.0: s -= 80.0 # near-constant UV: flat/degenerate, not interesting
|
||||
s += min(dr["nprim"], 48)
|
||||
if dr["qmax"] > dr["qmin"] * 1.02: s += 15.0 # genuine perspective
|
||||
return s
|
||||
|
||||
def fmt(dr):
|
||||
t0 = dr["tex0"]; p = dr["prim"]
|
||||
clut = ""
|
||||
if dr["clut"]:
|
||||
clut = f" CLUT[{'R' if dr['clut']['resident'] else 'X'} cbp={t0['cbp']} cpsm=0x{t0['cpsm']:02x} cld={t0['cld']} dist={dr['clut']['distinct']}]"
|
||||
persp = "PERSP" if dr["qmax"] > dr["qmin"]*1.02 else "affine"
|
||||
return (f"score={dr['score']:6.1f} f{dr['frame']} idx{dr['first_idx']} {PRIMT[p['type']]} "
|
||||
f"tme={p['tme']} abe={p['abe']} fst={p['fst']} nprim={dr['nprim']} onscr={dr['onscreen_frac']*100:.0f}%\n"
|
||||
f" TEX0 tbp={t0['tbp']} tbw={t0['tbw']} psm=0x{t0['psm']:02x} {t0['tw']}x{t0['th']} "
|
||||
f"tcc={t0['tcc']} tfx={t0['tfx']} resident={dr['tex_resident']}{clut}\n"
|
||||
f" screen x[{dr['xmin']:.1f}..{dr['xmax']:.1f}] y[{dr['ymin']:.1f}..{dr['ymax']:.1f}] "
|
||||
f"on-area={dr['onscreen_area']:.0f}px2 texel u[{dr['u_texmin']:.1f}..{dr['u_texmax']:.1f}] "
|
||||
f"v[{dr['v_texmin']:.1f}..{dr['v_texmax']:.1f}] {persp}")
|
||||
|
||||
def get_draw(dump, first_idx):
|
||||
"""Return the single census draw whose first_idx matches (with its full vertex list), or None."""
|
||||
draws, h, vram = census(dump, frame_filter=None, min_prims=1)
|
||||
for dr in draws:
|
||||
if dr["first_idx"] == first_idx:
|
||||
return dr
|
||||
return None
|
||||
|
||||
def main(argv):
|
||||
if len(argv) < 2:
|
||||
print(__doc__); return 2
|
||||
dump = argv[1]
|
||||
def opt(n, dv=None): return argv[argv.index(n)+1] if n in argv else dv
|
||||
top = int(opt("--top","25")); minp = int(opt("--min-prims","1"))
|
||||
ff = int(opt("--frame")) if "--frame" in argv else None
|
||||
draws, h, vram = census(dump, frame_filter=ff, min_prims=minp)
|
||||
print(f"# Ch349 draw census: {os.path.basename(dump)}")
|
||||
print(f"# textured draws (>= {minp} prim): {len(draws)} vram_snapshot={'present' if vram is not None else 'ABSENT'}")
|
||||
cand = [d for d in draws if d["score"] > 0]
|
||||
print(f"# reconstructable candidates (resident tex + known PSM): {len(cand)}\n")
|
||||
for dr in draws[:top]:
|
||||
print(fmt(dr)); print()
|
||||
if "--json" in argv:
|
||||
slim = [dict(score=d["score"], frame=d["frame"], first_idx=d["first_idx"],
|
||||
prim=d["prim"], tex0=d["tex0"], nprim=d["nprim"],
|
||||
screen=dict(xmin=d["xmin"],xmax=d["xmax"],ymin=d["ymin"],ymax=d["ymax"]),
|
||||
st=dict(smin=d["smin"],smax=d["smax"],tmin=d["tmin"],tmax=d["tmax"],
|
||||
qmin=d["qmin"],qmax=d["qmax"]),
|
||||
tex_resident=d["tex_resident"], clut=d["clut"]) for d in draws]
|
||||
open(opt("--json"),"w").write(json.dumps(slim, indent=1)+"\n")
|
||||
print(f"# wrote {opt('--json')}")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch349 step 4: software reference image for an ACTUAL SH3 draw.
|
||||
|
||||
Rasterizes the chosen draw's REAL geometry (its TRI_STRIP, real per-vertex screen XY + perspective S/T/Q +
|
||||
vertex RGBA) sampling the texture RECONSTRUCTED from GS local memory (gs_sh3_recon / gs_localmem). Perspective-
|
||||
correct: S,T,Q (= s/w, t/w, 1/w) interpolate linearly in screen space, then u=(S/Q)*TW, v=(T/Q)*TH; texel ->
|
||||
CLUT -> ABGR; TFX=MODULATE applies the vertex color (×2/255-ish, GS 128=1.0). This is the host-first reference
|
||||
that must pixel-check against the real PCSX2 frame BEFORE anything goes to feeder/board.
|
||||
|
||||
Usage: gs_sh3_draw_ref.py <dump.gs.zst> [--draw-idx N] [--tbp T --cbp C --tbw W --tw 512 --th 512]
|
||||
[--clut-order grid|linear] [--modulate 0|1] [--out DIR]
|
||||
"""
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import gs_sh3_draw_census as C
|
||||
import gs_sh3_recon as RC
|
||||
|
||||
SW, SH = 640, 480
|
||||
|
||||
def edge(ax, ay, bx, by, px, py):
|
||||
return (px-ax)*(by-ay) - (py-ay)*(bx-ax)
|
||||
|
||||
def raster_strip(verts, prim_type, tex_idx, pal, tw, th, modulate):
|
||||
"""Rasterize a TRI_STRIP/TRI_FAN/TRIANGLE list of verts into an SW*SH RGBA buffer with a z-buffer."""
|
||||
fb = [(0,0,0,0)]*(SW*SH)
|
||||
zb = [-1]*(SW*SH)
|
||||
def tri(i0, i1, i2):
|
||||
v0, v1, v2 = verts[i0], verts[i1], verts[i2]
|
||||
x0,y0 = v0["x"],v0["y"]; x1,y1 = v1["x"],v1["y"]; x2,y2 = v2["x"],v2["y"]
|
||||
minx = max(0, int(min(x0,x1,x2))); maxx = min(SW-1, int(max(x0,x1,x2))+1)
|
||||
miny = max(0, int(min(y0,y1,y2))); maxy = min(SH-1, int(max(y0,y1,y2))+1)
|
||||
area = edge(x0,y0, x1,y1, x2,y2)
|
||||
if abs(area) < 1e-6: return
|
||||
inv = 1.0/area
|
||||
for py in range(miny, maxy+1):
|
||||
for px in range(minx, maxx+1):
|
||||
cx, cy = px+0.5, py+0.5
|
||||
w0 = edge(x1,y1, x2,y2, cx,cy)
|
||||
w1 = edge(x2,y2, x0,y0, cx,cy)
|
||||
w2 = edge(x0,y0, x1,y1, cx,cy)
|
||||
# inside test (either winding)
|
||||
if not ((w0>=0 and w1>=0 and w2>=0) or (w0<=0 and w1<=0 and w2<=0)):
|
||||
continue
|
||||
b0, b1, b2 = w0*inv, w1*inv, w2*inv
|
||||
S = b0*v0["s"] + b1*v1["s"] + b2*v2["s"]
|
||||
T = b0*v0["t"] + b1*v1["t"] + b2*v2["t"]
|
||||
Q = b0*v0["q"] + b1*v1["q"] + b2*v2["q"]
|
||||
if abs(Q) < 1e-12: continue
|
||||
u = (S/Q)*tw; v = (T/Q)*th
|
||||
tx = int(u) % tw; ty = int(v) % th # REPEAT wrap (SH3 CLAMP default)
|
||||
if tx < 0: tx += tw
|
||||
if ty < 0: ty += th
|
||||
pidx = tex_idx[ty*tw + tx]
|
||||
p = pal[pidx & 0xFF]
|
||||
r,g,b = p&0xFF, (p>>8)&0xFF, (p>>16)&0xFF
|
||||
if modulate:
|
||||
vr = b0*((v0["rgba"])&0xFF)+b1*((v1["rgba"])&0xFF)+b2*((v2["rgba"])&0xFF)
|
||||
vg = b0*((v0["rgba"]>>8)&0xFF)+b1*((v1["rgba"]>>8)&0xFF)+b2*((v2["rgba"]>>8)&0xFF)
|
||||
vb = b0*((v0["rgba"]>>16)&0xFF)+b1*((v1["rgba"]>>16)&0xFF)+b2*((v2["rgba"]>>16)&0xFF)
|
||||
r = min(255, int(r*vr/128.0)); g = min(255, int(g*vg/128.0)); b = min(255, int(b*vb/128.0))
|
||||
z = int(b0*v0["z"] + b1*v1["z"] + b2*v2["z"])
|
||||
o = py*SW + px
|
||||
if z >= zb[o]:
|
||||
zb[o] = z; fb[o] = (r,g,b,255)
|
||||
n = len(verts)
|
||||
if prim_type == 3: # TRIANGLE list
|
||||
for i in range(0, n-2, 3): tri(i,i+1,i+2)
|
||||
elif prim_type == 4: # TRI_STRIP
|
||||
for i in range(2, n): tri(i-2, i-1, i)
|
||||
elif prim_type == 5: # TRI_FAN
|
||||
for i in range(2, n): tri(0, i-1, i)
|
||||
return fb
|
||||
|
||||
def main(argv):
|
||||
if len(argv) < 2:
|
||||
print(__doc__); return 2
|
||||
dump = argv[1]
|
||||
def opt(n, dv=None): return argv[argv.index(n)+1] if n in argv else dv
|
||||
draw_idx = int(opt("--draw-idx","89761"))
|
||||
tbp = int(opt("--tbp","9216")); cbp = int(opt("--cbp","13952"))
|
||||
fbw = int(opt("--tbw","8")); tw = int(opt("--tw","512")); th = int(opt("--th","512"))
|
||||
order = opt("--clut-order","grid"); modulate = int(opt("--modulate","1"))
|
||||
outdir = opt("--out", os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
"captures","gs","silenthill3","extracted","recon"))
|
||||
os.makedirs(outdir, exist_ok=True)
|
||||
|
||||
dr = C.get_draw(dump, draw_idx)
|
||||
if dr is None:
|
||||
print(f"draw first_idx={draw_idx} not found"); return 1
|
||||
t0 = dr["tex0"]
|
||||
print(f"[step4] draw f{dr['frame']} idx{draw_idx} {C.PRIMT[dr['prim']['type']]} nprim={dr['nprim']} "
|
||||
f"nvert={dr['nvert']} TEX0 tbp={t0['tbp']} psm=0x{t0['psm']:02x} {t0['tw']}x{t0['th']}")
|
||||
|
||||
mem, replayed, uploads, events, vram = RC.build_localmem_to(dump, draw_idx)
|
||||
if mem is None: print("VRAM absent"); return 1
|
||||
tex_idx = mem.read_psmt8(tbp, fbw, tw, th)
|
||||
pal = RC.read_clut32(mem, cbp, order=order)
|
||||
print(f"[step4] reconstructed tex {tw}x{th} ({len(set(tex_idx))} idx), CLUT {order} ({len(set(pal))} colors)")
|
||||
|
||||
fb = raster_strip(dr["verts"], dr["prim"]["type"], tex_idx, pal, tw, th, modulate)
|
||||
painted = sum(1 for p in fb if p[3])
|
||||
RC.save_png(os.path.join(outdir, f"draw_ref_{draw_idx}_{order}.png"), SW, SH, fb)
|
||||
print(f"[step4] painted {painted} px -> draw_ref_{draw_idx}_{order}.png ({outdir})")
|
||||
# crop to the draw's on-screen bbox for easier visual compare with the PCSX2 frame
|
||||
x0=max(0,int(dr['xmin'])); x1=min(SW,int(dr['xmax'])+1); y0=max(0,int(dr['ymin'])); y1=min(SH,int(dr['ymax'])+1)
|
||||
if x1>x0 and y1>y0:
|
||||
crop = [fb[y*SW+x] for y in range(y0,y1) for x in range(x0,x1)]
|
||||
RC.save_png(os.path.join(outdir, f"draw_ref_{draw_idx}_{order}_crop.png"), x1-x0, y1-y0, crop)
|
||||
print(f"[step4] bbox crop x[{x0}..{x1}] y[{y0}..{y1}] -> draw_ref_{draw_idx}_{order}_crop.png")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch349 step 4 (definitive): composite a WHOLE SH3 frame from GS local memory.
|
||||
|
||||
Per-draw reconstruction (gs_sh3_draw_ref) proves one surface; this composites EVERY textured draw of a frame
|
||||
to pixel-check the reconstruction against the real PCSX2 screenshot — the strongest faithfulness test. It walks
|
||||
draws in capture order, keeping a GS local-memory model live (replaying host->local uploads as their idx is
|
||||
passed, so each draw samples the texture state IT saw, not a stale end-of-frame one), reconstructs+caches each
|
||||
bound texture (PSMT8 via grid-CSM1 CLUT, or PSMCT32 direct), and rasterizes perspective-correct with a z-buffer.
|
||||
|
||||
Output: composited frame PNG + a side-by-side vs the screenshot + a coverage/color summary. SH3-derived ->
|
||||
LOCAL/gitignored.
|
||||
Usage: gs_sh3_frame_ref.py <dump.gs.zst> [--frame F] [--shot path.png] [--max-draws N] [--out DIR]
|
||||
"""
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import gs_sh3_draw_census as C
|
||||
import gs_sh3_recon as RC
|
||||
import gs_localmem as LM
|
||||
import gs_texture_residency as R
|
||||
|
||||
SW, SH = 640, 480
|
||||
|
||||
def edge(ax, ay, bx, by, px, py):
|
||||
return (px-ax)*(by-ay) - (py-ay)*(bx-ax)
|
||||
|
||||
def reconstruct_texture(mem, t0):
|
||||
"""Return (idx_or_words, pal_or_None, kind) for the bound texture. PSMT8 -> (idx, pal,'i8'); PSMCT32 ->
|
||||
(words, None,'ct32')."""
|
||||
tbp, psm, tbw, tw, th = t0["tbp"], t0["psm"], t0["tbw"], t0["tw"], t0["th"]
|
||||
if psm == 0x13: # PSMT8 indexed
|
||||
idx = mem.read_psmt8(tbp, tbw, tw, th)
|
||||
pal = RC.read_clut32(mem, t0["cbp"], order="grid")
|
||||
return idx, pal, "i8"
|
||||
if psm == 0x00: # PSMCT32 direct
|
||||
words = [mem.read_ct32_word(tbp, tbw, x, y) for y in range(th) for x in range(tw)]
|
||||
return words, None, "ct32"
|
||||
return None, None, None
|
||||
|
||||
def sample(tex, pal, kind, tw, th, u, v):
|
||||
tx = int(u) % tw; ty = int(v) % th
|
||||
if tx < 0: tx += tw
|
||||
if ty < 0: ty += th
|
||||
if kind == "i8":
|
||||
p = pal[tex[ty*tw+tx] & 0xFF]
|
||||
else:
|
||||
p = tex[ty*tw+tx]
|
||||
return (p&0xFF, (p>>8)&0xFF, (p>>16)&0xFF)
|
||||
|
||||
def main(argv):
|
||||
if len(argv) < 2:
|
||||
print(__doc__); return 2
|
||||
dump = argv[1]
|
||||
def opt(n, dv=None): return argv[argv.index(n)+1] if n in argv else dv
|
||||
frame = int(opt("--frame","1")); maxd = int(opt("--max-draws","100000"))
|
||||
min_area = float(opt("--min-area","0"))
|
||||
outdir = opt("--out", os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
"captures","gs","silenthill3","extracted","recon"))
|
||||
os.makedirs(outdir, exist_ok=True)
|
||||
|
||||
# all draws of the frame, in capture (idx) order, with full geometry (parse ONCE, reuse for census)
|
||||
collected = R.collect(dump, 0)
|
||||
d, h, events, uploads, runs, vram = collected
|
||||
draws, _, _ = C.census(dump, frame_filter=frame, min_prims=1, collected=collected)
|
||||
draws = [dr for dr in draws if dr["prim"]["tme"] and dr["tex0"]["psm"] in (0x13,0x00)
|
||||
and dr["onscreen_area"] >= min_area]
|
||||
draws.sort(key=lambda x: x["first_idx"])
|
||||
print(f"[frameref] frame {frame}: {len(draws)} textured (PSMT8/CT32) draws, min_area={min_area}; "
|
||||
f"replaying uploads incrementally")
|
||||
|
||||
mem = LM.LocalMem(vram)
|
||||
up_sorted = sorted([u for u in uploads if u["dpsm"]==0x00], key=lambda u: u["idx"])
|
||||
up_i = 0
|
||||
# epoch counter per tbp (bumped whenever we apply an upload to that dbp) -> texture cache key
|
||||
epoch = {}
|
||||
texcache = {}
|
||||
fb = [(0,0,0,0)]*(SW*SH); zb = [-1]*(SW*SH)
|
||||
|
||||
def apply_uploads_before(idx):
|
||||
nonlocal up_i
|
||||
while up_i < len(up_sorted) and up_sorted[up_i]["idx"] < idx:
|
||||
u = up_sorted[up_i]
|
||||
off, end = u["blob_range"]; blob = d[off:end]
|
||||
words = [int.from_bytes(blob[i*4:i*4+4],"little") for i in range(len(blob)//4)]
|
||||
mem.write_image_ct32(u["dbp"], u["dbw"], u["dx"], u["dy"], u["w"], u["h"], words)
|
||||
epoch[u["dbp"]] = epoch.get(u["dbp"],0)+1
|
||||
up_i += 1
|
||||
|
||||
painted_total = 0
|
||||
for n, dr in enumerate(draws[:maxd]):
|
||||
apply_uploads_before(dr["first_idx"])
|
||||
t0 = dr["tex0"]
|
||||
key = (t0["tbp"], t0["psm"], t0["tbw"], t0["tw"], t0["th"], t0.get("cbp"),
|
||||
epoch.get(t0["tbp"],0), epoch.get(t0.get("cbp"),0))
|
||||
if key not in texcache:
|
||||
texcache[key] = reconstruct_texture(mem, t0)
|
||||
tex, pal, kind = texcache[key]
|
||||
if kind is None: continue
|
||||
tw, th = t0["tw"], t0["th"]
|
||||
verts = dr["verts"]; pt = dr["prim"]["type"]
|
||||
def tri(i0,i1,i2):
|
||||
nonlocal painted_total
|
||||
v0,v1,v2 = verts[i0],verts[i1],verts[i2]
|
||||
x0,y0=v0["x"],v0["y"]; x1,y1=v1["x"],v1["y"]; x2,y2=v2["x"],v2["y"]
|
||||
minx=max(0,int(min(x0,x1,x2))); maxx=min(SW-1,int(max(x0,x1,x2))+1)
|
||||
miny=max(0,int(min(y0,y1,y2))); maxy=min(SH-1,int(max(y0,y1,y2))+1)
|
||||
if maxx<minx or maxy<miny: return
|
||||
area=edge(x0,y0,x1,y1,x2,y2)
|
||||
if abs(area)<1e-6: return
|
||||
inv=1.0/area
|
||||
for py in range(miny,maxy+1):
|
||||
base=py*SW
|
||||
for px in range(minx,maxx+1):
|
||||
cx,cy=px+0.5,py+0.5
|
||||
w0=edge(x1,y1,x2,y2,cx,cy); w1=edge(x2,y2,x0,y0,cx,cy); w2=edge(x0,y0,x1,y1,cx,cy)
|
||||
if not ((w0>=0 and w1>=0 and w2>=0) or (w0<=0 and w1<=0 and w2<=0)): continue
|
||||
b0,b1,b2=w0*inv,w1*inv,w2*inv
|
||||
Q=b0*v0["q"]+b1*v1["q"]+b2*v2["q"]
|
||||
if abs(Q)<1e-12: continue
|
||||
S=b0*v0["s"]+b1*v1["s"]+b2*v2["s"]; T=b0*v0["t"]+b1*v1["t"]+b2*v2["t"]
|
||||
u=(S/Q)*tw; vv=(T/Q)*th
|
||||
z=int(b0*v0["z"]+b1*v1["z"]+b2*v2["z"])
|
||||
o=base+px
|
||||
if z>=zb[o]:
|
||||
zb[o]=z; fb[o]=sample(tex,pal,kind,tw,th,u,vv)+(255,); painted_total+=1
|
||||
if pt==4:
|
||||
for i in range(2,len(verts)): tri(i-2,i-1,i)
|
||||
elif pt==5:
|
||||
for i in range(2,len(verts)): tri(0,i-1,i)
|
||||
elif pt==3:
|
||||
for i in range(0,len(verts)-2,3): tri(i,i+1,i+2)
|
||||
if (n+1)%200==0: print(f" ...{n+1}/{len(draws)} draws, {painted_total} px, {len(texcache)} tex cached")
|
||||
|
||||
cov = sum(1 for p in fb if p[3])
|
||||
RC.save_png(os.path.join(outdir, f"frame{frame}_composite.png"), SW, SH, fb)
|
||||
print(f"[frameref] composite: {cov}/{SW*SH} px painted ({100*cov/(SW*SH):.1f}%), {len(texcache)} textures")
|
||||
# side-by-side vs screenshot
|
||||
shot = opt("--shot")
|
||||
if shot and os.path.exists(shot):
|
||||
from PIL import Image
|
||||
comp = Image.open(os.path.join(outdir, f"frame{frame}_composite.png")).convert("RGB")
|
||||
gt = Image.open(shot).convert("RGB").resize((SW,SH))
|
||||
sbs = Image.new("RGB",(SW*2+8,SH),(40,40,40))
|
||||
sbs.paste(comp,(0,0)); sbs.paste(gt,(SW+8,0))
|
||||
sbs.save(os.path.join(outdir, f"frame{frame}_vs_screenshot.png"))
|
||||
print(f"[frameref] wrote frame{frame}_vs_screenshot.png (left=recon, right=PCSX2)")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch349 steps 2-4: reconstruct ONE real SH3 textured draw from GS local memory.
|
||||
|
||||
Closes the Ch347/348 gap (authentic asset on CHOSEN geometry) -> an ACTUAL commercial draw reconstructed
|
||||
faithfully. Default pick (gs_sh3_draw_census.py top): frame 1 idx89761, a 70-prim PSMT8 512x512 TRI_STRIP at
|
||||
tbp=9216, CLUT cbp=13952 (CSM1 PSMCT32). That texture is STREAMED as 256x256 PSMCT32 (upload idx13288, same
|
||||
262144 bytes) and SAMPLED as 512x512 PSMT8 — the exact stream-one-format / sample-another bridge.
|
||||
|
||||
step 2 Build a GS local-memory model (gs_localmem.LocalMem) seeded from the dump's initial VRAM snapshot,
|
||||
replay every host->local PSMCT32 upload up to the draw, then READ the texture back via the PSMT8
|
||||
swizzle. index -> CLUT -> ABGR. This is "the texture as the real draw sees it".
|
||||
step 3 decode + print the draw's real TEX0/CLUT/state.
|
||||
step 4 (gs_sh3_draw_ref.py) rasterize the actual geometry sampling this texture.
|
||||
|
||||
All outputs are SH3-derived -> LOCAL/gitignored (captures/gs/silenthill3/extracted/recon/).
|
||||
Usage: gs_sh3_recon.py <dump.gs.zst> [--draw-idx N] [--tbp T] [--cbp C] [--tbw W] [--tw 512] [--th 512]
|
||||
[--clut-order linear|grid] [--out DIR]
|
||||
"""
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import gs_parse
|
||||
import gs_texture_residency as R
|
||||
import gs_localmem as LM
|
||||
|
||||
def build_localmem_to(dump, draw_idx):
|
||||
"""Seed VRAM from the initial snapshot and replay all host->local PSMCT32 uploads with idx < draw_idx.
|
||||
Returns (mem, replayed_uploads, all_uploads, events, vram)."""
|
||||
d, h, events, uploads, runs, vram = R.collect(dump, 0)
|
||||
if vram is None:
|
||||
return None, [], uploads, events, None
|
||||
mem = LM.LocalMem(vram)
|
||||
replayed = []
|
||||
for u in uploads:
|
||||
if u["idx"] >= draw_idx: continue
|
||||
if u["dpsm"] != 0x00: # only PSMCT32 stream writes modelled here (SH3 env path)
|
||||
continue
|
||||
off, end = u["blob_range"]
|
||||
blob = d[off:end]
|
||||
words = [int.from_bytes(blob[i*4:i*4+4], "little") for i in range(len(blob)//4)]
|
||||
mem.write_image_ct32(u["dbp"], u["dbw"], u["dx"], u["dy"], u["w"], u["h"], words)
|
||||
replayed.append(u)
|
||||
return mem, replayed, uploads, events, vram
|
||||
|
||||
def read_clut32(mem, cbp, order="grid"):
|
||||
"""Read a 256-entry PSMCT32 CLUT from the modelled VRAM. 'grid' = read as a 16x16 CT32 surface based at
|
||||
cbp (dbw=1) — the layout a CSM1 8-bit palette occupies; 'linear' = raw contiguous i*4 from cbp*256.
|
||||
Returns 256 packed ints (PS2 PSMCT32 word == 0xAABBGGRR, low byte R)."""
|
||||
pal = [0]*256
|
||||
if order == "linear":
|
||||
base = cbp*256
|
||||
for i in range(256):
|
||||
a = base + i*4
|
||||
pal[i] = int.from_bytes(mem.m[a:a+4], "little") if a+4 <= mem.SIZE else 0
|
||||
else: # grid: palette entry i at (x=i%16, y=i//16) via CT32 swizzle, dbw=1
|
||||
for i in range(256):
|
||||
pal[i] = mem.read_ct32_word(cbp, 1, i & 15, i >> 4)
|
||||
return pal
|
||||
|
||||
def decode_pixel(pal, idx):
|
||||
p = pal[idx & 0xFF]
|
||||
return (p & 0xFF, (p>>8)&0xFF, (p>>16)&0xFF, (p>>24)&0xFF) # R,G,B,A
|
||||
|
||||
def save_png(path, w, h, rgba_pixels):
|
||||
from PIL import Image
|
||||
img = Image.new("RGBA", (w, h)); img.putdata(rgba_pixels); img.save(path)
|
||||
|
||||
def main(argv):
|
||||
if len(argv) < 2:
|
||||
print(__doc__); return 2
|
||||
dump = argv[1]
|
||||
def opt(n, dv=None): return argv[argv.index(n)+1] if n in argv else dv
|
||||
draw_idx = int(opt("--draw-idx","89761"))
|
||||
tbp = int(opt("--tbp","9216")); cbp = int(opt("--cbp","13952"))
|
||||
fbw = int(opt("--tbw","8")); tw = int(opt("--tw","512")); th = int(opt("--th","512"))
|
||||
order = opt("--clut-order","grid")
|
||||
outdir = opt("--out", os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
"captures","gs","silenthill3","extracted","recon"))
|
||||
os.makedirs(outdir, exist_ok=True)
|
||||
|
||||
mem, replayed, uploads, events, vram = build_localmem_to(dump, draw_idx)
|
||||
if mem is None:
|
||||
print("VRAM snapshot ABSENT — cannot reconstruct"); return 1
|
||||
print(f"[Ch349] dump={os.path.basename(dump)} draw_idx={draw_idx} tbp={tbp} cbp={cbp} fbw={fbw} {tw}x{th}")
|
||||
print(f"[step2] GS local-mem seeded from initial snapshot + replayed {len(replayed)} PSMCT32 upload(s) "
|
||||
f"before the draw:")
|
||||
for u in replayed:
|
||||
print(f" idx{u['idx']} dbp={u['dbp']} dbw={u['dbw']} {u['w']}x{u['h']} {u['bytes']}B")
|
||||
|
||||
idx = mem.read_psmt8(tbp, fbw, tw, th)
|
||||
distinct = len(set(idx))
|
||||
print(f"[step2] de-swizzled PSMT8 index image: {tw}x{th}, {distinct} distinct indices")
|
||||
save_png(os.path.join(outdir, "recon_indices_gray.png"), tw, th, [(b,b,b,255) for b in idx])
|
||||
|
||||
pal = read_clut32(mem, cbp, order=order)
|
||||
print(f"[step2] CLUT @cbp={cbp} order={order}: {len(set(pal))} distinct ABGR entries")
|
||||
color = [decode_pixel(pal, b) for b in idx]
|
||||
save_png(os.path.join(outdir, f"recon_texture_{order}.png"), tw, th, [(r,g,b,255) for (r,g,b,a) in color])
|
||||
save_png(os.path.join(outdir, f"recon_clut_{order}.png"), 16, 16,
|
||||
[(r,g,b,255) for (r,g,b,a) in (decode_pixel(pal,i) for i in range(256))])
|
||||
print(f"[step2] wrote recon_indices_gray.png, recon_texture_{order}.png, recon_clut_{order}.png -> {outdir}")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch341 Brick 1: host-side texture state analysis + extraction from a GS dump.
|
||||
|
||||
Decodes the texture UPLOADS (BITBLTBUF / TRXREG / TRXPOS / TRXDIR + IMAGE) and TEX0 BINDS in a dump,
|
||||
matches the dominant textured-TRIANGLE primitives to the texture they sample, and finds the EARLIEST
|
||||
contiguous textured-triangle segment that uses a SINGLE TEX0 bind (the no-RTL Ch341 v1 target: one
|
||||
scene-level TEX0 + per-vertex real UV). Reports aggregate facts (committable). With --extract it
|
||||
writes the matched texture blob + a generated-fixture descriptor LOCALLY (gitignored, per provenance).
|
||||
|
||||
Usage:
|
||||
gs_texture.py <dump.gs[.xz|.zst]> [--report out.txt] [--extract outdir]
|
||||
"""
|
||||
import sys, os, json
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
import gs_parse, gs_translate
|
||||
|
||||
PSM = {0x00:"PSMCT32",0x01:"PSMCT24",0x02:"PSMCT16",0x0A:"PSMCT16S",0x13:"PSMT8",0x14:"PSMT4",
|
||||
0x1B:"PSMT8H",0x24:"PSMT4HL",0x2C:"PSMT4HH",0x30:"PSMZ32",0x31:"PSMZ24",0x32:"PSMZ16",0x3A:"PSMZ16S"}
|
||||
BPP = {0x00:32,0x01:24,0x02:16,0x0A:16,0x13:8,0x14:4,0x1B:8,0x24:4,0x2C:4}
|
||||
|
||||
def dec_bitbltbuf(v): return dict(SBP=v&0x3FFF,SBW=(v>>16)&0x3F,SPSM=(v>>24)&0x3F,DBP=(v>>32)&0x3FFF,DBW=(v>>48)&0x3F,DPSM=(v>>56)&0x3F)
|
||||
def dec_trxreg(v): return dict(RRW=v&0xFFF, RRH=(v>>32)&0xFFF)
|
||||
def dec_trxpos(v): return dict(DSAX=v&0x7FF, DSAY=(v>>16)&0x7FF)
|
||||
def dec_tex0(v): return dict(TBP0=v&0x3FFF,TBW=(v>>14)&0x3F,PSM=(v>>20)&0x3F,TW=(v>>26)&0xF,TH=(v>>30)&0xF,TCC=(v>>34)&1,TFX=(v>>35)&3)
|
||||
|
||||
def analyze(path):
|
||||
d = gs_parse.read_dump_bytes(path)
|
||||
h, events = gs_parse.parse_dump(path)
|
||||
# --- pass 1: texture uploads (transfer state machine) ---
|
||||
st = dict(bitbltbuf=0, trxreg=0, trxpos=0)
|
||||
uploads = [] # each: dict(dbp,dbw,dpsm,w,h,bytes,data_off,event_idx)
|
||||
for e in events:
|
||||
if e.kind=="GSREG":
|
||||
if e.reg=="BITBLTBUF": st["bitbltbuf"]=e.value
|
||||
elif e.reg=="TRXREG": st["trxreg"]=e.value
|
||||
elif e.reg=="TRXPOS": st["trxpos"]=e.value
|
||||
elif e.kind=="IMAGE":
|
||||
bb=dec_bitbltbuf(st["bitbltbuf"]); rr=dec_trxreg(st["trxreg"]); tp=dec_trxpos(st["trxpos"])
|
||||
uploads.append(dict(dbp=bb["DBP"],dbw=bb["DBW"],dpsm=bb["DPSM"],w=rr["RRW"],h=rr["RRH"],
|
||||
dsax=tp["DSAX"],dsay=tp["DSAY"],bytes=e.info.get("bytes",0),
|
||||
data_off=e.byte_off,event_idx=e.idx))
|
||||
# --- pass 2: triangles + their active TEX0 ---
|
||||
prims,_ = gs_translate.reconstruct_prims(events)
|
||||
# track TEX0 active at each triangle by re-walking (reconstruct doesn't keep tex0)
|
||||
tex0_at = {}
|
||||
cur_tex0 = 0
|
||||
pi = 0
|
||||
# rebuild active TEX0 per primitive index, matching reconstruct's order
|
||||
# (simple: re-run the kick model tracking tex0)
|
||||
cur_tex0=0; vcount=0; ptype=7; idxmap=[]
|
||||
for e in events:
|
||||
if e.kind=="GSREG":
|
||||
if e.reg=="TEX0_1": cur_tex0=e.value
|
||||
elif e.reg=="PRIM": ptype=e.value&7; vcount=0
|
||||
elif e.reg in ("XYZ2","XYZ3","XYZF2","XYZF3"):
|
||||
need={0:1,1:2,2:2,3:3,4:3,5:3,6:2}.get(ptype,99)
|
||||
kick = e.reg in ("XYZ2","XYZF2")
|
||||
vcount+=1
|
||||
if kick and vcount>=need:
|
||||
idxmap.append(cur_tex0)
|
||||
if ptype in (3,6): vcount=0
|
||||
tris = [(p, idxmap[i] if i < len(idxmap) else 0) for i,p in enumerate(prims) if p.type==3]
|
||||
# --- earliest contiguous textured-triangle segment with a SINGLE TEX0 ---
|
||||
seg=[]; seg_tex0=None
|
||||
for (p,t) in tris:
|
||||
c,_=gs_translate.classify(p) # textured tri -> unsupported in v0 envelope, but here we WANT textured
|
||||
if not p.tme:
|
||||
if seg: break
|
||||
else: continue
|
||||
if seg_tex0 is None: seg_tex0=t; seg=[p]
|
||||
elif t==seg_tex0: seg.append(p)
|
||||
else: break # crossed a TEX0 bind -> stop (single-TEX0 segment)
|
||||
return h, uploads, tris, seg, seg_tex0
|
||||
|
||||
def match_upload(uploads, tex0):
|
||||
tx=dec_tex0(tex0)
|
||||
for u in uploads:
|
||||
if u["dbp"]==tx["TBP0"] and u["dpsm"]==tx["PSM"]:
|
||||
return u
|
||||
# fall back: TBP match only
|
||||
for u in uploads:
|
||||
if u["dbp"]==tx["TBP0"]:
|
||||
return u
|
||||
return None
|
||||
|
||||
def main(argv):
|
||||
if len(argv)<2: print(__doc__); return 2
|
||||
path=argv[1]
|
||||
def opt(n,d=None): return argv[argv.index(n)+1] if n in argv else d
|
||||
h, uploads, tris, seg, seg_tex0 = analyze(path)
|
||||
R=[]
|
||||
R.append(f"# Ch341 Brick 1 texture analysis (source {os.path.basename(path)} serial={h.serial!r}; aggregate facts only)")
|
||||
R.append(f"texture uploads: {len(uploads)}")
|
||||
seen=set()
|
||||
for u in uploads:
|
||||
k=(u["dbp"],u["dpsm"],u["w"],u["h"])
|
||||
if k in seen: continue
|
||||
seen.add(k)
|
||||
R.append(f" TBP={u['dbp']} DBW={u['dbw']} PSM={PSM.get(u['dpsm'],hex(u['dpsm']))} "
|
||||
f"{u['w']}x{u['h']} bytes={u['bytes']} (expect {u['w']*u['h']*BPP.get(u['dpsm'],0)//8})")
|
||||
R.append(f"textured triangles: {sum(1 for p,_ in tris if p.tme)} / {len(tris)} total triangles")
|
||||
if seg:
|
||||
tx=dec_tex0(seg_tex0)
|
||||
R.append("")
|
||||
R.append(f"EARLIEST single-TEX0 textured-tri segment: {len(seg)} triangles")
|
||||
R.append(f" TEX0: TBP0={tx['TBP0']} TBW={tx['TBW']} PSM={PSM.get(tx['PSM'],hex(tx['PSM']))} "
|
||||
f"TW={tx['TW']}({1<<tx['TW']}px) TH={tx['TH']}({1<<tx['TH']}px) TFX={tx['TFX']}")
|
||||
u=match_upload(uploads, seg_tex0)
|
||||
if u:
|
||||
R.append(f" -> matched upload: TBP={u['dbp']} {u['w']}x{u['h']} {PSM.get(u['dpsm'],hex(u['dpsm']))} "
|
||||
f"bytes={u['bytes']} data@0x{u['data_off']:x}")
|
||||
R.append(f" VERDICT: single scene-level TEX0 + per-vertex UV — NO RTL feeder change needed for v1.")
|
||||
else:
|
||||
R.append(f" !! no matching upload found for TBP0={tx['TBP0']} — texture may be CLUT/region or uploaded elsewhere.")
|
||||
else:
|
||||
R.append("NO single-TEX0 textured-triangle segment found (every textured run crosses TEX0 binds).")
|
||||
report="\n".join(R)+"\n"
|
||||
print(report)
|
||||
if opt("--report"):
|
||||
open(opt("--report"),"w").write(report); print(f"[wrote report -> {opt('--report')}]")
|
||||
outdir=opt("--extract")
|
||||
if outdir and seg:
|
||||
u=match_upload(uploads, seg_tex0)
|
||||
if u:
|
||||
os.makedirs(outdir, exist_ok=True)
|
||||
d = gs_parse.read_dump_bytes(path)
|
||||
blob = d[u["data_off"]:u["data_off"]+u["bytes"]]
|
||||
bp=os.path.join(outdir,"tex0_blob.bin"); open(bp,"wb").write(blob)
|
||||
desc=dict(schema=1, tbp0=dec_tex0(seg_tex0)["TBP0"], tbw=dec_tex0(seg_tex0)["TBW"],
|
||||
psm=u["dpsm"], psm_name=PSM.get(u["dpsm"]), w=u["w"], h=u["h"], bytes=u["bytes"],
|
||||
tw=dec_tex0(seg_tex0)["TW"], th=dec_tex0(seg_tex0)["TH"], tfx=dec_tex0(seg_tex0)["TFX"],
|
||||
provenance="cubes_demo (MIT, glampert/ps2-homebrew) — LOCAL only")
|
||||
open(os.path.join(outdir,"tex0_desc.json"),"w").write(json.dumps(desc,indent=2))
|
||||
print(f"[extracted {len(blob)}-byte texture blob + descriptor -> {outdir}/ (LOCAL, gitignored)]")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,306 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch346 generic texture-residency preflight.
|
||||
|
||||
Stops the hand-chasing of stale-VRAM frames (Ch345b: 32 sprites bound TBP=13440, but the captured frame's
|
||||
VRAM there held cube-checker residue, not the font). Before ANY repack/render/fit, this proves — for each
|
||||
textured draw run in a dump — that the bound TEX0 maps to a REAL upload whose payload actually covers the
|
||||
sampled footprint and looks like resident content, NOT stale/placeholder VRAM.
|
||||
|
||||
Generic (per Codex): the gate is RESIDENT + PLAUSIBLE content, not "font-like" — future targets may be
|
||||
sprites, UI, backgrounds, or indexed textures. `--font-like` adds the glyph-specific extra check on top.
|
||||
|
||||
Checks (Codex's minimum set):
|
||||
1. active draw TEX0 maps to a known upload region (DBP/DPSM/stride), not just an address;
|
||||
2. upload EPOCH tracked — if a later upload overwrites that TBP, the candidate uses the latest payload;
|
||||
3. sampled footprint (UV bbox, REPEAT-wrapped into the TEX0 TW/TH) is COVERED by the uploaded rect;
|
||||
4. payload sanity — reject all-zero / single-color flat / flat-alpha-on-alpha-draw / known stale hashes;
|
||||
5. emit RANKED candidates with frame/event offsets, prim run, TEX0, upload source offset, PSM, dims,
|
||||
alpha stats, and WHY it passed/failed.
|
||||
Repack/render tools refuse to run unless `residency_ok()` returns a PASS for the bound texture.
|
||||
|
||||
A checker is NOT auto-rejected — it can be legit authentic content (the Ch343 cube). The signal that killed
|
||||
Ch345b is RESIDENCY (no upload to TBP=13440 at all), not "it's a checker".
|
||||
|
||||
Usage: gs_texture_residency.py <dump.gs[.xz|.zst]> [--max-runs N] [--font-like] [--report r.txt] [--json j.json]
|
||||
gs_texture_residency.py <dump> --assert TBP[:PSM] # exit 0 iff that TBP is resident+plausible
|
||||
"""
|
||||
import sys, os, json, hashlib
|
||||
from collections import Counter
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import gs_parse
|
||||
|
||||
PSMCT32 = 0x00
|
||||
MIN_DISTINCT_COLORS = 2 # <2 => flat fill (stale/cleared); fonts (B/W+transp=3) still pass
|
||||
KNOWN_STALE_HASHES = set() # sha256[:16] denylist of textures known to be placeholder/test residue
|
||||
|
||||
INDEXED_PSMS = (0x13, 0x14) # PSMT8, PSMT4 — need a resident CLUT palette to render
|
||||
def dec_tex0(v):
|
||||
return dict(tbp=v&0x3FFF, tbw=(v>>14)&0x3F, psm=(v>>20)&0x3F,
|
||||
tw=1<<((v>>26)&0xF), th=1<<((v>>30)&0xF), tcc=(v>>34)&1, tfx=(v>>35)&3,
|
||||
cbp=(v>>37)&0x3FFF, cpsm=(v>>51)&0xF, csm=(v>>55)&1,
|
||||
csa=(v>>56)&0x1F, cld=(v>>61)&7)
|
||||
def dec_bitbltbuf(v):
|
||||
return dict(sbp=v&0x3FFF, sbw=(v>>16)&0x3F, spsm=(v>>24)&0x3F,
|
||||
dbp=(v>>32)&0x3FFF, dbw=(v>>48)&0x3F, dpsm=(v>>56)&0x3F)
|
||||
def dec_trxreg(v): return ((v & 0xFFF), ((v>>32) & 0xFFF)) # RRW, RRH
|
||||
def dec_trxpos(v): return (((v>>32)&0x7FF), ((v>>48)&0x7FF)) # DSAX, DSAY
|
||||
def dec_trxdir(v): return v & 3 # 0 = host->local (upload)
|
||||
|
||||
def payload_stats(blob, psm):
|
||||
"""Decode an upload payload and summarize. Only PSMCT32 fully supported; others -> stats=None."""
|
||||
if psm != PSMCT32 or len(blob) < 4:
|
||||
return None
|
||||
n = len(blob)//4
|
||||
texels = [int.from_bytes(blob[i*4:i*4+4], "little") for i in range(n)]
|
||||
colors = Counter(texels)
|
||||
a = [(t>>24)&0xFF for t in texels]
|
||||
transp = sum(1 for x in a if x == 0); opaque = sum(1 for x in a if x >= 0x80)
|
||||
partial = n - transp - opaque
|
||||
h = hashlib.sha256(blob).hexdigest()[:16]
|
||||
return dict(texels=n, distinct=len(colors), top=colors.most_common(1)[0][1],
|
||||
a_transp=transp, a_opaque=opaque, a_partial=partial,
|
||||
flat_alpha=(len(set(a))==1), sha16=h,
|
||||
checkerish=_checkerish(texels))
|
||||
|
||||
def _checkerish(texels):
|
||||
"""Heuristic structural flag (REPORTED, never an auto-reject — a checker can be legit content):
|
||||
a 2-color image whose value flips on a regular coarse grid. Returns block size or 0."""
|
||||
vals = set(texels)
|
||||
if len(vals) > 4: return 0
|
||||
n = len(texels); side = int(n**0.5)
|
||||
if side*side != n: return 0
|
||||
rows = [texels[r*side:(r+1)*side] for r in range(side)]
|
||||
# find the run length of the first row; a checker has long uniform runs that alternate
|
||||
first = rows[0]; run = 1
|
||||
while run < side and first[run] == first[0]: run += 1
|
||||
if run < 4 or run > side//2: return 0
|
||||
return run
|
||||
|
||||
def collect(path, max_runs):
|
||||
d = gs_parse.read_dump_bytes(path)
|
||||
h, events = gs_parse.parse_dump(path)
|
||||
uploads = [] # ordered upload log
|
||||
epoch = {} # dbp -> count seen so far
|
||||
bbuf = trxreg = trxpos = None; trxdir = 3
|
||||
for e in events:
|
||||
if e.kind == "GSREG":
|
||||
if e.reg == "BITBLTBUF": bbuf = dec_bitbltbuf(e.value)
|
||||
elif e.reg == "TRXREG": trxreg = dec_trxreg(e.value)
|
||||
elif e.reg == "TRXPOS": trxpos = dec_trxpos(e.value)
|
||||
elif e.reg == "TRXDIR": trxdir = dec_trxdir(e.value)
|
||||
elif e.kind == "IMAGE" and bbuf is not None and trxdir == 0: # host->local upload
|
||||
w, hh = trxreg if trxreg else (0,0); dx, dy = trxpos if trxpos else (0,0)
|
||||
dbp = bbuf["dbp"]; epoch[dbp] = epoch.get(dbp, 0) + 1
|
||||
nbytes = e.info.get("bytes", 0)
|
||||
uploads.append(dict(idx=e.idx, frame=e.frame, byte_off=e.byte_off, bytes=nbytes,
|
||||
dbp=dbp, dbw=bbuf["dbw"], dpsm=bbuf["dpsm"], w=w, h=hh, dx=dx, dy=dy,
|
||||
epoch=epoch[dbp], blob_range=(e.byte_off, e.byte_off+nbytes)))
|
||||
# draw runs: contiguous textured (TME) draws sharing one TEX0 base+psm; track sampled UV bbox
|
||||
runs = _draw_runs(events, max_runs)
|
||||
# VRAM snapshot: PCSX2 GS dumps freeze the full 4 MiB GS local memory at the END of the state blob
|
||||
# (register prefix first, then VRAM). Commercial games upload textures/CLUTs at scene-load — BEFORE the
|
||||
# dump — so they live here, not as in-stream upload events. This is the correct "resident" source.
|
||||
VRAM = 0x400000
|
||||
vstart = h.packet_start - 8192 - VRAM
|
||||
vram = d[vstart:vstart+VRAM] if 0 <= vstart and vstart + VRAM <= len(d) else None
|
||||
return d, h, events, uploads, runs, vram
|
||||
|
||||
def snapshot_present(vram, base, nb=512, min_nz=16):
|
||||
"""Is there resident, non-flat content at this base pointer (256-byte units) in the VRAM snapshot?
|
||||
The snapshot is SWIZZLED, so this is a presence/plausibility check, not accurate content (de-swizzle
|
||||
is the translation step). Returns stats dict or None."""
|
||||
if vram is None: return None
|
||||
o = base * 256
|
||||
if o < 0 or o + nb > len(vram): return None
|
||||
chunk = vram[o:o+nb]
|
||||
nz = sum(1 for b in chunk if b); dist = len(set(chunk))
|
||||
return dict(nonzero=nz, distinct=dist) if (nz >= min_nz and dist >= 2) else None
|
||||
|
||||
def _draw_runs(events, max_runs):
|
||||
cur = {"type":None,"tme":0,"fst":0,"abe":0}; tex0 = {1:None,2:None}; ctxt = 0
|
||||
uvbuf = []; runs = []; run = None
|
||||
def close():
|
||||
nonlocal run
|
||||
if run and run["nprim"] > 0: runs.append(run)
|
||||
run = None
|
||||
for e in events:
|
||||
if e.kind != "GSREG": continue
|
||||
r, v = e.reg, e.value
|
||||
if r == "PRIM":
|
||||
cur = {"type":v&7,"tme":(v>>4)&1,"fst":(v>>8)&1,"abe":(v>>6)&1}; ctxt=(v>>9)&1; uvbuf=[]
|
||||
elif r == "TEX0_1": tex0[1]=dec_tex0(v)
|
||||
elif r == "TEX0_2": tex0[2]=dec_tex0(v)
|
||||
elif r == "UV": uvbuf.append((v&0x3FFF,(v>>14)&0x3FFF))
|
||||
elif r == "XYZ2":
|
||||
if not cur["tme"]: continue # only textured draws make residency runs
|
||||
t0 = tex0[1 if ctxt==0 else 2]
|
||||
if t0 is None: continue
|
||||
key = (t0["tbp"], t0["psm"], t0["tbw"])
|
||||
if run is None or run["key"] != key or run["last_idx_gap"] != e.idx-1:
|
||||
close()
|
||||
run = dict(key=key, tex0=t0, type=cur["type"], abe=cur["abe"], fst=cur["fst"],
|
||||
first_idx=e.idx, frame=e.frame, nprim=0,
|
||||
umin=1<<30,umax=-1,vmin=1<<30,vmax=-1, last_idx_gap=e.idx)
|
||||
if len(runs) >= max_runs and max_runs>0: break
|
||||
run["nprim"] += 1; run["last_idx"]=e.idx; run["last_idx_gap"]=e.idx
|
||||
for (u,vv) in uvbuf[-2:]: # sprite=2 verts; tri uses last 3 — bbox is fine
|
||||
tu=u>>4; tv=vv>>4
|
||||
run["umin"]=min(run["umin"],tu); run["umax"]=max(run["umax"],tu)
|
||||
run["vmin"]=min(run["vmin"],tv); run["vmax"]=max(run["vmax"],tv)
|
||||
uvbuf=[]
|
||||
close()
|
||||
return runs
|
||||
|
||||
def evaluate(run, uploads, d, vram=None):
|
||||
"""Return verdict dict: resident (in-stream upload OR VRAM snapshot) + coverage + plausibility + CLUT."""
|
||||
t0 = run["tex0"]; tbp=t0["tbp"]; psm=t0["psm"]; tbw=t0["tbw"]
|
||||
# candidate uploads: same base + psm, occurring BEFORE the run's first draw; pick latest by idx
|
||||
cands = [u for u in uploads if u["dbp"]==tbp and u["dpsm"]==psm and u["idx"] < run["first_idx"]]
|
||||
reasons = []
|
||||
if not cands:
|
||||
# no in-stream upload — fall back to the VRAM snapshot (scene-load uploads live there)
|
||||
snap = snapshot_present(vram, tbp, nb=512)
|
||||
if snap is None:
|
||||
any_dbp = any(u["dbp"]==tbp for u in uploads)
|
||||
reasons.append("texture NOT resident: no in-stream upload to TBP and VRAM snapshot empty/absent"
|
||||
if not any_dbp else "upload(s) to TBP exist but PSM mismatch / all after draw, and snapshot empty")
|
||||
return dict(verdict="REJECT", reasons=reasons, upload=None, coverage=None, stats=None, clut=None, tex_source=None)
|
||||
reasons.append(f"texture resident in VRAM SNAPSHOT @tbp={tbp} (nz={snap['nonzero']}/512 distinct={snap['distinct']}; swizzled — content via translation)")
|
||||
clut = _clut_residency(t0, uploads, run, vram, reasons)
|
||||
clut_ok = (psm not in INDEXED_PSMS) or (clut is not None and clut["resident"])
|
||||
return dict(verdict=("PASS" if clut_ok else "REJECT"), reasons=reasons or ["resident (snapshot)"],
|
||||
upload=None, coverage=None, stats=None, clut=clut, tex_source="snapshot")
|
||||
up = max(cands, key=lambda u: u["idx"])
|
||||
# coverage: sampled footprint, REPEAT-wrapped into TW/TH, must fall inside the uploaded rect [dx..dx+w)x[dy..dy+h)
|
||||
tw, th = t0["tw"], t0["th"]
|
||||
foot_known = run["umax"] >= 0 and run["vmax"] >= 0 # UV captured (fst=1); fst=0 ST/Q not yet sampled
|
||||
if not foot_known:
|
||||
# honest: do NOT claim coverage we didn't verify. Verdict rests on residency + plausibility.
|
||||
reasons.append("footprint UNVERIFIED (ST/Q draw — UV not captured); coverage not asserted")
|
||||
inside = True; coverage = None
|
||||
else:
|
||||
wrap = (run["umax"]>=tw or run["vmax"]>=th or run["umin"]<0 or run["vmin"]<0)
|
||||
fmin_u = run["umin"] % tw if tw else run["umin"]; fmin_v = run["vmin"] % th if th else run["vmin"]
|
||||
fmax_u = (run["umax"] % tw if tw else run["umax"]); fmax_v=(run["vmax"] % th if th else run["vmax"])
|
||||
inside = (up["dx"] <= fmin_u and fmax_u < up["dx"]+up["w"] and
|
||||
up["dy"] <= fmin_v and fmax_v < up["dy"]+up["h"]) if (up["w"] and up["h"]) else False
|
||||
coverage = 1.0 if inside else 0.0
|
||||
if wrap: reasons.append(f"footprint WRAPS texture ({run['umin']}..{run['umax']} x {run['vmin']}..{run['vmax']} vs {tw}x{th}); REPEAT declared")
|
||||
if not inside: reasons.append(f"sampled footprint NOT covered by upload rect ({up['dx']}..{up['dx']+up['w']} x {up['dy']}..{up['dy']+up['h']})")
|
||||
blob = d[up["blob_range"][0]:up["blob_range"][1]]
|
||||
stats = payload_stats(blob, psm)
|
||||
plausible = True
|
||||
if stats is None:
|
||||
reasons.append(f"payload plausibility UNSUPPORTED for PSM 0x{psm:02x} (fail-closed)"); plausible=False
|
||||
else:
|
||||
if in_known_stale(stats["sha16"]): reasons.append(f"payload is a KNOWN stale/test texture ({stats['sha16']})"); plausible=False
|
||||
if stats["distinct"] < MIN_DISTINCT_COLORS: reasons.append(f"payload flat ({stats['distinct']} color)"); plausible=False
|
||||
if run["abe"] and stats["flat_alpha"]: reasons.append("alpha draw but payload alpha is FLAT (no mask)"); plausible=False
|
||||
if stats["checkerish"]: reasons.append(f"payload is structurally checker-like (block~{stats['checkerish']}) — verify it's intended content")
|
||||
clut = _clut_residency(t0, uploads, run, vram, reasons)
|
||||
clut_ok = (psm not in INDEXED_PSMS) or (clut is not None and clut["resident"])
|
||||
verdict = "PASS" if (inside and plausible and clut_ok) else "REJECT"
|
||||
return dict(verdict=verdict, reasons=reasons or ["resident + plausible"], upload=up, coverage=coverage, stats=stats, clut=clut, tex_source="stream")
|
||||
|
||||
def _clut_residency(t0, uploads, run, vram, reasons):
|
||||
"""Ch347 — indexed textures (PSMT8/PSMT4) need a resident CLUT at CBP (in-stream upload OR VRAM
|
||||
snapshot). The datapath proof is NOT authentic ingestion: the emitted TEX0's CBP/CPSM/CLD must select
|
||||
a CLUT that is actually loaded."""
|
||||
if t0["psm"] not in INDEXED_PSMS: return None
|
||||
cbp = t0["cbp"]
|
||||
ccands = [u for u in uploads if u["dbp"] == cbp and u["idx"] < run["first_idx"]]
|
||||
if ccands:
|
||||
cup = max(ccands, key=lambda u: u["idx"])
|
||||
reasons.append(f"CLUT resident (stream) @cbp={cbp} cpsm=0x{t0['cpsm']:02x} cld={t0['cld']} (upload idx{cup['idx']} f{cup['frame']})")
|
||||
clut = dict(resident=True, source="stream", upload_idx=cup["idx"], frame=cup["frame"], cbp=cbp, cpsm=t0["cpsm"], cld=t0["cld"], distinct=None)
|
||||
else:
|
||||
snap = snapshot_present(vram, cbp, nb=1024, min_nz=64) # a real 256-entry palette is rich + non-flat
|
||||
if snap:
|
||||
reasons.append(f"CLUT resident (snapshot) @cbp={cbp} cpsm=0x{t0['cpsm']:02x} cld={t0['cld']} (nz={snap['nonzero']}/1024 distinct={snap['distinct']})")
|
||||
clut = dict(resident=True, source="snapshot", upload_idx=None, frame=None, cbp=cbp, cpsm=t0["cpsm"], cld=t0["cld"], distinct=snap["distinct"])
|
||||
else:
|
||||
reasons.append(f"CLUT NOT resident: no upload to CBP={cbp} and snapshot empty — indexed texture cannot render authentically")
|
||||
clut = dict(resident=False, source=None, upload_idx=None, frame=None, cbp=cbp, cpsm=t0["cpsm"], cld=t0["cld"], distinct=None)
|
||||
if t0["cld"] == 0:
|
||||
reasons.append("CLD=0: this TEX0 does not trigger a CLUT (re)load — palette would be whatever a prior load left")
|
||||
return clut
|
||||
|
||||
def in_known_stale(sha16): return sha16 in KNOWN_STALE_HASHES # helper (denylist seeded empty by default)
|
||||
|
||||
def font_like(stats):
|
||||
"""Glyph-specific EXTRA check (only when --font-like): mask-like alpha (real transparent + opaque
|
||||
regions), modest palette. NOT part of the generic gate."""
|
||||
if stats is None: return False, "no stats"
|
||||
if stats["a_transp"] == 0: return False, "no transparent texels (not a glyph mask)"
|
||||
if stats["a_opaque"] == 0: return False, "no opaque texels"
|
||||
frac_t = stats["a_transp"]/stats["texels"]
|
||||
if not (0.05 <= frac_t <= 0.95): return False, f"transparent fraction {frac_t:.2f} not mask-like"
|
||||
return True, f"mask-like (transp {frac_t:.2f})"
|
||||
|
||||
def residency_ok(path, tbp, psm=PSMCT32):
|
||||
"""Programmatic gate for repack/render tools. True iff SOME textured run binding (tbp,psm) is PASS."""
|
||||
d, h, events, uploads, runs, vram = collect(path, 0)
|
||||
for run in runs:
|
||||
if run["tex0"]["tbp"]==tbp and run["tex0"]["psm"]==psm:
|
||||
if evaluate(run, uploads, d, vram)["verdict"]=="PASS": return True
|
||||
return False
|
||||
|
||||
def main(argv):
|
||||
if len(argv) < 2: print(__doc__); return 2
|
||||
path = argv[1]
|
||||
def has(f): return f in argv
|
||||
def opt(n,dv=None): return argv[argv.index(n)+1] if n in argv else dv
|
||||
d, h, events, uploads, runs, vram = collect(path, int(opt("--max-runs","0")))
|
||||
|
||||
if has("--assert"):
|
||||
spec = opt("--assert"); tbp = int(spec.split(":")[0],0); psm=int(spec.split(":")[1],0) if ":" in spec else PSMCT32
|
||||
ok = residency_ok(path, tbp, psm)
|
||||
print(f"residency {'PASS' if ok else 'REJECT'} for TBP={tbp} PSM=0x{psm:02x}")
|
||||
return 0 if ok else 1
|
||||
|
||||
R = [f"# Ch346 texture-residency preflight: {os.path.basename(path)}",
|
||||
f"uploads(host->local): {len(uploads)} textured draw runs: {len(runs)}"]
|
||||
results = []
|
||||
for run in runs:
|
||||
ev = evaluate(run, uploads, d, vram); t0=run["tex0"]
|
||||
fl = font_like(ev["stats"]) if has("--font-like") else None
|
||||
verdict = ev["verdict"]
|
||||
if verdict=="PASS" and fl is not None and not fl[0]: verdict="REJECT"
|
||||
results.append(dict(run=run, ev=ev, font=fl, verdict=verdict))
|
||||
passes = [r for r in results if r["verdict"]=="PASS"]
|
||||
passes.sort(key=lambda r: (r["ev"]["stats"]["distinct"] if r["ev"]["stats"] else 0, r["run"]["nprim"]), reverse=True)
|
||||
|
||||
R.append(f"\n== {len(passes)} PASS / {len(results)-len(passes)} REJECT ==")
|
||||
for i,r in enumerate(results):
|
||||
run=r["run"]; ev=r["ev"]; t0=run["tex0"]; up=ev["upload"]
|
||||
R.append(f"\n[{r['verdict']}] run f{run['frame']} idx{run['first_idx']}+{run['nprim']}prim "
|
||||
f"TEX0 tbp={t0['tbp']} tbw={t0['tbw']} psm=0x{t0['psm']:02x} {t0['tw']}x{t0['th']} abe={run['abe']} fst={run['fst']}")
|
||||
if run["umax"] >= 0: R.append(f" footprint u[{run['umin']}..{run['umax']}] v[{run['vmin']}..{run['vmax']}]")
|
||||
else: R.append(f" footprint UNVERIFIED (fst={run['fst']} ST/Q draw)")
|
||||
if up: R.append(f" upload @byte0x{up['byte_off']:x} f{up['frame']} epoch{up['epoch']} dbp={up['dbp']} dpsm=0x{up['dpsm']:02x} {up['w']}x{up['h']} @({up['dx']},{up['dy']}) {up['bytes']}B")
|
||||
if ev["stats"]:
|
||||
s=ev["stats"]; R.append(f" payload distinct={s['distinct']} alpha[t/o/p]={s['a_transp']}/{s['a_opaque']}/{s['a_partial']} flatA={s['flat_alpha']} checker~{s['checkerish']} sha={s['sha16']}")
|
||||
if ev.get("clut") is not None:
|
||||
c=ev["clut"]; R.append(f" CLUT {'RESIDENT' if c['resident'] else 'MISSING'} cbp={c['cbp']} cpsm=0x{c['cpsm']:02x} cld={c['cld']}")
|
||||
if r["font"] is not None: R.append(f" font-like: {r['font'][0]} ({r['font'][1]})")
|
||||
for why in ev["reasons"]: R.append(f" -> {why}")
|
||||
if passes:
|
||||
R.append(f"\n== RANKED PASS candidates ==")
|
||||
for r in passes:
|
||||
run=r["run"]; t0=run["tex0"]
|
||||
s=r['ev']['stats']; c=r['ev'].get('clut')
|
||||
extra = f"distinct={s['distinct']}" if s else f"tex_src={r['ev'].get('tex_source')}"
|
||||
if c: extra += f" CLUT@cbp={c['cbp']}/{c['source']}"
|
||||
R.append(f" tbp={t0['tbp']} psm=0x{t0['psm']:02x} {run['nprim']}prim {extra} f{run['frame']} idx{run['first_idx']}")
|
||||
report = "\n".join(R)
|
||||
print(report)
|
||||
if has("--report"): open(opt("--report"),"w").write(report+"\n")
|
||||
if has("--json"):
|
||||
j = [dict(verdict=r["verdict"], tbp=r["run"]["tex0"]["tbp"], psm=r["run"]["tex0"]["psm"],
|
||||
frame=r["run"]["frame"], first_idx=r["run"]["first_idx"], nprim=r["run"]["nprim"],
|
||||
stats=r["ev"]["stats"], clut=r["ev"].get("clut"), reasons=r["ev"]["reasons"]) for r in results]
|
||||
open(opt("--json"),"w").write(json.dumps(j, indent=1)+"\n")
|
||||
return 0 if passes else 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,185 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch340 support census + translator (Bricks 3-4).
|
||||
|
||||
Consumes gs_parse's normalized event stream, reconstructs GS primitives via the vertex-kick model,
|
||||
classifies EVERY primitive (translated / unsupported, with reason + frame/event/byte offset), emits an
|
||||
aggregate census + histograms, then translates the EARLIEST mechanically-selected contiguous SUPPORTED
|
||||
draw segment into a ps2_feeder scene file (Ch339 grammar) — reusing the proven encoder, never
|
||||
duplicating staging logic. NO hidden approximation: textured prims, sprites, strips/fans, non-source-
|
||||
over blend, unsupported Z-test, etc. are reported unsupported and the segment STOPS there (fail closed).
|
||||
If no segment qualifies, Ch340 still succeeds via parser + census and the top census blocker frames Ch341.
|
||||
|
||||
A declared, reported VIEWPORT FIT (game bbox -> our 64x64 FB) is a faithful linear transform, not a GS-
|
||||
feature approximation; it is recorded in the qualification report.
|
||||
|
||||
Usage:
|
||||
gs_translate.py <dump.gs[.xz|.zst]> [--report out.census.txt] [--scene out.scene.txt] [--fb N]
|
||||
"""
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
import gs_parse
|
||||
|
||||
PRIMT = {0:"POINT",1:"LINE",2:"LINE_STRIP",3:"TRIANGLE",4:"TRI_STRIP",5:"TRI_FAN",6:"SPRITE",7:"INVALID"}
|
||||
def bit(v,b): return (v>>b)&1
|
||||
|
||||
class Prim:
|
||||
__slots__=("type","verts","tme","abe","iip","frame","event_idx","byte_off","test","alpha")
|
||||
def __init__(s,**k):
|
||||
for a in s.__slots__: setattr(s,a,k.get(a))
|
||||
|
||||
def reconstruct_prims(events):
|
||||
"""Vertex-kick model -> list of Prim. Each vertex = (x,y,z,(r,g,b)) in raw GS coords."""
|
||||
prims=[]
|
||||
st=dict(prim=None, rgbaq=0, xyoff=0, tme=0, abe=0, iip=0, test=0, alpha=0, frame=0, ptype=7)
|
||||
vtx=[]
|
||||
for e in events:
|
||||
if e.kind!="GSREG":
|
||||
if e.kind=="FRAME_BOUNDARY": vtx=[] # vertex FIFO doesn't survive a frame in our model
|
||||
continue
|
||||
r=e.reg; v=e.value
|
||||
if r=="PRIM":
|
||||
st["prim"]=v; st["ptype"]=v&7; st["iip"]=bit(v,3); st["tme"]=bit(v,4); st["abe"]=bit(v,6)
|
||||
vtx=[]
|
||||
elif r=="PRMODE":
|
||||
st["iip"]=bit(v,3); st["tme"]=bit(v,4); st["abe"]=bit(v,6)
|
||||
elif r=="RGBAQ": st["rgbaq"]=v
|
||||
elif r=="XYOFFSET_1": st["xyoff"]=v
|
||||
elif r in ("TEST_1",): st["test"]=v
|
||||
elif r in ("ALPHA_1",): st["alpha"]=v
|
||||
elif r in ("FRAME_1",): st["frame"]=v
|
||||
elif r in ("XYZ2","XYZ3","XYZF2","XYZF3"):
|
||||
x=(v&0xFFFF); y=((v>>16)&0xFFFF); z=(v>>32)&0xFFFFFFFF
|
||||
col=(st["rgbaq"]&0xFF,(st["rgbaq"]>>8)&0xFF,(st["rgbaq"]>>16)&0xFF)
|
||||
kick = r in ("XYZ2","XYZF2") # XYZ3/XYZF3 add a vertex without a drawing kick
|
||||
vtx.append((x,y,z,col))
|
||||
t=st["ptype"]
|
||||
need = {0:1,1:2,2:2,3:3,4:3,5:3,6:2}.get(t,99)
|
||||
if kick and len(vtx)>=need:
|
||||
pv = vtx[-need:]
|
||||
prims.append(Prim(type=t,verts=pv,tme=st["tme"],abe=st["abe"],iip=st["iip"],
|
||||
frame=e.frame,event_idx=e.idx,byte_off=e.byte_off,test=st["test"],alpha=st["alpha"]))
|
||||
if t==3 or t==6: vtx=[] # independent tri / sprite: consume the FIFO
|
||||
# strips/fans keep a sliding window (left as-is; classified unsupported below)
|
||||
return prims, st
|
||||
|
||||
# ---- proven envelope: a primitive we can render faithfully via ps2_feeder ----
|
||||
def classify(p):
|
||||
if p.type!=3: return "unsupported", f"prim={PRIMT.get(p.type,p.type)} (only TRIANGLE renders via ps2_feeder)"
|
||||
if p.tme: return "unsupported", "textured triangle (TME=1; no real-texture path in the feeder)"
|
||||
if p.abe: return "unsupported", "alpha-blended triangle (ABE=1; only opaque is in the proven envelope)"
|
||||
return "translated", "non-textured opaque triangle"
|
||||
|
||||
def census(prims, parse_summary):
|
||||
cats={}; reasons={}
|
||||
for p in prims:
|
||||
c,why=classify(p)
|
||||
cats[c]=cats.get(c,0)+1
|
||||
if c=="unsupported": reasons[why]=reasons.get(why,0)+1
|
||||
return cats, reasons
|
||||
|
||||
def earliest_supported_segment(prims):
|
||||
"""Earliest maximal contiguous run of 'translated' prims (stops at the first unsupported)."""
|
||||
seg=[]; best=None
|
||||
for p in prims:
|
||||
c,_=classify(p)
|
||||
if c=="translated":
|
||||
seg.append(p)
|
||||
else:
|
||||
if seg: best=seg; break # earliest contiguous run -> stop at first unsupported AFTER it
|
||||
seg=[]
|
||||
if not best and seg: best=seg
|
||||
return best or []
|
||||
|
||||
def viewport_fit(prims, fb):
|
||||
"""Declared linear map of the segment's PIXEL bbox into [1, fb-2] (margin), reported. f works on
|
||||
RAW (1/16) GS coords; bbox + scale are in screen PIXELS so the report reads intuitively."""
|
||||
xs=[x/16.0 for p in prims for (x,y,z,c) in p.verts]; ys=[y/16.0 for p in prims for (x,y,z,c) in p.verts]
|
||||
x0,x1,y0,y1=min(xs),max(xs),min(ys),max(ys)
|
||||
s=min((fb-2)/max(1.0,(x1-x0)), (fb-2)/max(1.0,(y1-y0))) # output px per source px
|
||||
def f(x,y): return (1+int((x/16.0-x0)*s), 1+int((y/16.0-y0)*s))
|
||||
return f, dict(bbox_px=(round(x0,1),round(y0,1),round(x1,1),round(y1,1)), scale_px=round(s,4), fb=fb)
|
||||
|
||||
def emit_scene(seg, fb):
|
||||
f,info=viewport_fit(seg, fb)
|
||||
lines=[f"# Ch340 translated segment: {len(seg)} non-textured triangles",
|
||||
f"# viewport fit: src bbox(px) {info['bbox_px']} scale {info['scale_px']} px/px -> {fb}x{fb}"]
|
||||
for p in seg:
|
||||
pts=[]
|
||||
for (x,y,z,c) in p.verts:
|
||||
X,Y=f(x,y); pts += [X,Y]
|
||||
(r0,g0,b0)=p.verts[0][3]
|
||||
if p.iip: # gouraud: trig x0 y0 r0 g0 b0 x1 y1 ... z
|
||||
a=[]
|
||||
for (x,y,z,c) in p.verts:
|
||||
X,Y=f(x,y); a += [X,Y,c[0],c[1],c[2]]
|
||||
z=p.verts[-1][2]
|
||||
lines.append("trig "+" ".join(map(str,a+[z])))
|
||||
else: # flat: tri x0 y0 x1 y1 x2 y2 z r g b (GS flat uses provoking/last vertex color)
|
||||
(r,g,b)=p.verts[-1][3]; z=p.verts[-1][2]
|
||||
lines.append(f"tri {pts[0]} {pts[1]} {pts[2]} {pts[3]} {pts[4]} {pts[5]} {z} {r} {g} {b}")
|
||||
lines.append("go")
|
||||
return "\n".join(lines)+"\n", info
|
||||
|
||||
def main(argv):
|
||||
if len(argv)<2: print(__doc__); return 2
|
||||
path=argv[1]
|
||||
def opt(name,d=None):
|
||||
return argv[argv.index(name)+1] if name in argv else d
|
||||
fb=int(opt("--fb","64"))
|
||||
h, events = gs_parse.parse_dump(path)
|
||||
prims, _ = reconstruct_prims(events)
|
||||
cats, reasons = census(prims, h)
|
||||
|
||||
# histograms (from the raw event stream)
|
||||
regs={}; flgs={}; frames=0; images=0; imgb=0; malformed=0
|
||||
ptypes={}
|
||||
for e in events:
|
||||
if e.kind=="GSREG": regs[e.reg]=regs.get(e.reg,0)+1
|
||||
if e.kind=="FRAME_BOUNDARY": frames+=1
|
||||
if e.kind=="MALFORMED": malformed+=1
|
||||
if e.kind=="IMAGE": images+=1; imgb+=e.info.get("bytes",0)
|
||||
for p in prims: ptypes[PRIMT.get(p.type,p.type)]=ptypes.get(PRIMT.get(p.type,p.type),0)+1
|
||||
|
||||
R=[]
|
||||
R.append(f"# Ch340 GS-dump support census (schema v{gs_parse.SCHEMA_VERSION})")
|
||||
R.append(f"# source: {os.path.basename(path)} serial={h.serial!r} crc=0x{h.crc:08x} (aggregate counts only; no game content)")
|
||||
R.append(f"frames={frames} events={len(events)} primitives={len(prims)} malformed={malformed} image_uploads={images} ({imgb} bytes)")
|
||||
R.append("")
|
||||
R.append("primitive types: "+", ".join(f"{k}={v}" for k,v in sorted(ptypes.items(),key=lambda x:-x[1])))
|
||||
R.append("census classes : "+", ".join(f"{k}={v}" for k,v in sorted(cats.items(),key=lambda x:-x[1])))
|
||||
R.append("")
|
||||
R.append("UNSUPPORTED reasons (count):")
|
||||
for why,c in sorted(reasons.items(),key=lambda x:-x[1]): R.append(f" {c:6d} {why}")
|
||||
R.append("")
|
||||
R.append("top GS register writes:")
|
||||
for k,v in sorted(regs.items(),key=lambda x:-x[1])[:18]: R.append(f" {v:6d} {k}")
|
||||
|
||||
seg=earliest_supported_segment(prims)
|
||||
R.append("")
|
||||
if seg:
|
||||
first=seg[0]
|
||||
R.append(f"EARLIEST SUPPORTED SEGMENT: {len(seg)} triangles, starting frame {first.frame} "
|
||||
f"event #{first.event_idx} @0x{first.byte_off:x}.")
|
||||
R.append(" qualification: every primitive is a non-textured opaque TRIANGLE; segment stops at the first unsupported prim/state.")
|
||||
else:
|
||||
topblk = max(reasons.items(), key=lambda x:x[1])[0] if reasons else "none"
|
||||
R.append("NO SUPPORTED SEGMENT: no contiguous run of non-textured opaque triangles.")
|
||||
R.append(f" Ch340 succeeds via parser + census. Top census blocker (Ch341 candidate): {topblk}")
|
||||
|
||||
report="\n".join(R)+"\n"
|
||||
print(report)
|
||||
rp=opt("--report")
|
||||
if rp:
|
||||
with open(rp,"w") as f: f.write(report)
|
||||
print(f"[wrote census -> {rp}]")
|
||||
sp=opt("--scene")
|
||||
if sp and seg:
|
||||
scene,info=emit_scene(seg,fb)
|
||||
with open(sp,"w") as f: f.write(scene)
|
||||
print(f"[wrote {len(seg)}-tri ps2_feeder scene -> {sp} (viewport {info})]")
|
||||
elif sp:
|
||||
print("[no scene emitted: no supported segment — fail closed]")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,241 @@
|
||||
#!/usr/bin/env python3
|
||||
"""retroDE_ps2 — Ch341 textured-triangle translator (declared affine ST/Q surrogate).
|
||||
|
||||
Extracts the earliest contiguous single-TEX0 textured-triangle subsegment that FITS staging (<=27 tris)
|
||||
from a GS dump, derives faithful per-vertex texel coords u=S/Q, v=T/Q (FST=0 perspective ST), applies
|
||||
the declared 256->64 downscale, and emits a ps2_feeder textured scene (tex0 + tritex) — BUT ONLY if the
|
||||
honesty gate passes: the affine interpolation of vertex (S/Q,T/Q) must stay within MAX_ERR texels of the
|
||||
true perspective interpolation over the triangle. Otherwise it FAILS CLOSED (Ch342 = real ST/Q).
|
||||
|
||||
This is NOT faithful perspective-correct GS texturing. It is authentic cube geometry + authentic
|
||||
extracted texels through the affine-UV feeder, with a DECLARED affine substitute for perspective ST/Q
|
||||
on a tiny-span segment. Sprites stay unsupported. Reports are aggregate; scene text is dump-derived.
|
||||
|
||||
Usage: gs_translate_tex.py <dump> [--tbp 64] [--dst 64] [--maxtri 27] [--scene out.txt] [--report r.txt]
|
||||
"""
|
||||
import sys, os, struct
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
import gs_parse
|
||||
|
||||
MAX_ERR = 0.5 # texels, on the 64x64 fixture
|
||||
def f32(bits): return struct.unpack("<f", struct.pack("<I", bits & 0xFFFFFFFF))[0]
|
||||
def bit(v,b): return (v>>b)&1
|
||||
|
||||
def extract(events, dst, maxtri):
|
||||
"""Walk events; return (segment tris, meta). Each tri: dict(scr=[(x,y)*3], z, stq=[(S,T,Q)*3], rgb)."""
|
||||
st_S=st_T=0.0; q=1.0; rgb=(255,255,255); tex0=0; fst=0; ptype=7; xyoff=0; clamp=0
|
||||
prim_val=0; test_val=0
|
||||
uploads_to={} # TBP -> count of IMAGE uploads seen so far (to detect re-upload crossing)
|
||||
bitbltbuf=0
|
||||
vbuf=[] # pending verts
|
||||
tris=[] # each: dict
|
||||
seg=[]; seg_tex0=None; seg_clamp=None; seg_prim=None; seg_test=None; started=False; stop_reason=None
|
||||
first=None; last=None
|
||||
for e in events:
|
||||
if e.kind=="IMAGE":
|
||||
dbp=(bitbltbuf>>32)&0x3FFF
|
||||
uploads_to[dbp]=uploads_to.get(dbp,0)+1
|
||||
if started and seg_tex0 is not None and ((seg_tex0&0x3FFF)==dbp):
|
||||
stop_reason=f"re-upload to bound TBP {dbp} at event #{e.idx}"; break
|
||||
continue
|
||||
if e.kind!="GSREG": continue
|
||||
r=e.reg; v=e.value
|
||||
if r=="BITBLTBUF": bitbltbuf=v
|
||||
elif r=="PRIM": ptype=v&7; fst=bit(v,8); prim_val=v; vbuf=[]
|
||||
elif r=="TEST_1": test_val=v
|
||||
elif r=="PRMODE": fst=bit(v,8)
|
||||
elif r=="ST":
|
||||
st_S=f32(v&0xFFFFFFFF); st_T=f32((v>>32)&0xFFFFFFFF)
|
||||
if "q_stq" in e.info: q=f32(e.info["q_stq"]) # PACKED ST routes Q -> RGBAQ.Q
|
||||
elif r=="RGBAQ": rgb=(v&0xFF,(v>>8)&0xFF,(v>>16)&0xFF); q=f32((v>>32)&0xFFFFFFFF)
|
||||
elif r=="TEX0_1": tex0=v
|
||||
elif r in ("CLAMP_1",): clamp=v
|
||||
elif r in ("XYOFFSET_1",): xyoff=v
|
||||
elif r in ("XYZ2","XYZ3","XYZF2","XYZF3"):
|
||||
x=(v&0xFFFF)/16.0 - (xyoff&0xFFFF)/16.0
|
||||
y=((v>>16)&0xFFFF)/16.0 - ((xyoff>>16)&0xFFFF)/16.0
|
||||
kick = e.reg in ("XYZ2","XYZF2")
|
||||
vbuf.append(dict(x=x,y=y,z=(v>>32)&0xFFFFFFFF,S=st_S,T=st_T,Q=q,rgb=rgb,tex0=tex0,fst=fst,clamp=clamp))
|
||||
if kick and ptype==3 and len(vbuf)>=3:
|
||||
t=vbuf[-3:]; vbuf=[]
|
||||
if not all(vv["fst"]==0 for vv in t): # only FST=0 textured tris in this rung
|
||||
if started: stop_reason="prim left FST=0 ST mode"; break
|
||||
continue
|
||||
tt = t[0]["tex0"]
|
||||
if not started:
|
||||
seg_tex0=tt; seg_clamp=t[0]["clamp"]; seg_prim=prim_val; seg_test=test_val; started=True; first=e.idx
|
||||
if tt!=seg_tex0:
|
||||
stop_reason=f"TEX0 changed at event #{e.idx}"; break
|
||||
if t[0]["clamp"]!=seg_clamp:
|
||||
stop_reason=f"CLAMP changed at event #{e.idx}"; break
|
||||
seg.append(t); last=e.idx
|
||||
if len(seg)>=maxtri:
|
||||
stop_reason=f"staging cap ({maxtri} tris)"; break
|
||||
return seg, dict(tex0=seg_tex0, clamp=seg_clamp, prim=seg_prim, test=seg_test, first=first, last=last, stop=stop_reason)
|
||||
|
||||
def reconstruct_all_textured(events):
|
||||
"""Every FST=0 textured triangle (3 verts) with its active tex0/clamp + texture-upload epoch
|
||||
(uploads-to-its-TBP seen so far). For the closeout all-window scan — NOT the mechanical earliest
|
||||
selection (extract() above is that). Documentation only, per Codex."""
|
||||
st_S=st_T=0.0; q=1.0; rgb=(255,255,255); tex0=0; fst=0; ptype=7; xyoff=0; clamp=0; bitbltbuf=0
|
||||
epoch={}; vbuf=[]; out=[]
|
||||
for e in events:
|
||||
if e.kind=="IMAGE":
|
||||
dbp=(bitbltbuf>>32)&0x3FFF; epoch[dbp]=epoch.get(dbp,0)+1; continue
|
||||
if e.kind!="GSREG": continue
|
||||
r=e.reg; v=e.value
|
||||
if r=="BITBLTBUF": bitbltbuf=v
|
||||
elif r=="PRIM": ptype=v&7; fst=(v>>8)&1; vbuf=[]
|
||||
elif r=="PRMODE": fst=(v>>8)&1
|
||||
elif r=="ST":
|
||||
st_S=f32(v&0xFFFFFFFF); st_T=f32((v>>32)&0xFFFFFFFF)
|
||||
if "q_stq" in e.info: q=f32(e.info["q_stq"]) # PACKED ST routes Q -> RGBAQ.Q
|
||||
elif r=="RGBAQ": rgb=(v&0xFF,(v>>8)&0xFF,(v>>16)&0xFF); q=f32((v>>32)&0xFFFFFFFF)
|
||||
elif r=="TEX0_1": tex0=v
|
||||
elif r=="CLAMP_1": clamp=v
|
||||
elif r=="XYOFFSET_1": xyoff=v
|
||||
elif r in ("XYZ2","XYZ3","XYZF2","XYZF3"):
|
||||
kick=e.reg in ("XYZ2","XYZF2")
|
||||
vbuf.append(dict(S=st_S,T=st_T,Q=q))
|
||||
if kick and ptype==3 and len(vbuf)>=3:
|
||||
t=vbuf[-3:]; vbuf=[]
|
||||
if fst==0:
|
||||
out.append(dict(v=t, tex0=tex0, clamp=clamp, epoch=epoch.get(tex0&0x3FFF,0)))
|
||||
return out
|
||||
|
||||
def scan_windows(tris, dst, maxtri):
|
||||
"""Slide a <=maxtri window from every start; a window breaks on tex0/clamp/epoch change.
|
||||
Returns (n_windows, n_pass<=0.5, min_window_error)."""
|
||||
n=len(tris); npass=0; best=1e9; total=0
|
||||
for i in range(n):
|
||||
win=[tris[i]]
|
||||
for j in range(i+1, min(n, i+maxtri)):
|
||||
a,b=tris[i],tris[j]
|
||||
if a["tex0"]!=b["tex0"] or a["clamp"]!=b["clamp"] or a["epoch"]!=b["epoch"]: break
|
||||
win.append(tris[j])
|
||||
me=max(tri_error(t["v"], dst)[0] for t in win)
|
||||
total+=1; best=min(best,me)
|
||||
if me<=MAX_ERR: npass+=1
|
||||
return total, npass, best
|
||||
|
||||
def bary_samples():
|
||||
pts=[(1/3,1/3,1/3)]
|
||||
for a in (0.25,0.5,0.75):
|
||||
pts += [(a,(1-a)/2,(1-a)/2),((1-a)/2,a,(1-a)/2),((1-a)/2,(1-a)/2,a)]
|
||||
return pts
|
||||
|
||||
def tri_error(t, dst):
|
||||
"""max |affine - perspective| texel error over the triangle, for u and v (dst-sized texture)."""
|
||||
SQ=[(v["S"]/v["Q"] if v["Q"] else 0.0, v["T"]/v["Q"] if v["Q"] else 0.0) for v in t]
|
||||
uv=[(sq[0]*dst, sq[1]*dst) for sq in SQ]
|
||||
me=0.0
|
||||
for (b0,b1,b2) in bary_samples():
|
||||
Sb=b0*t[0]["S"]+b1*t[1]["S"]+b2*t[2]["S"]; Tb=b0*t[0]["T"]+b1*t[1]["T"]+b2*t[2]["T"]
|
||||
Qb=b0*t[0]["Q"]+b1*t[1]["Q"]+b2*t[2]["Q"]
|
||||
if Qb==0: continue
|
||||
up=(Sb/Qb)*dst; vp=(Tb/Qb)*dst
|
||||
ua=b0*uv[0][0]+b1*uv[1][0]+b2*uv[2][0]; va=b0*uv[0][1]+b1*uv[1][1]+b2*uv[2][1]
|
||||
me=max(me, abs(up-ua), abs(vp-va))
|
||||
return me, uv
|
||||
|
||||
def main(argv):
|
||||
if len(argv)<2: print(__doc__); return 2
|
||||
path=argv[1]
|
||||
def opt(n,d=None): return argv[argv.index(n)+1] if n in argv else d
|
||||
tbp=int(opt("--tbp","64")); dst=int(opt("--dst","64")); maxtri=int(opt("--maxtri","27"))
|
||||
h, events = gs_parse.parse_dump(path)
|
||||
fst0 = sum(1 for e in events if e.kind=="GSREG" and e.reg=="PRIM" and ((e.value>>8)&1)==0)
|
||||
seg, meta = extract(events, dst, maxtri)
|
||||
R=[f"# Ch341 textured-triangle translation (source {os.path.basename(path)}; aggregate facts + dump-derived scene)"]
|
||||
R.append(f"FST=0 (perspective ST) PRIM submissions: {fst0}")
|
||||
if not seg:
|
||||
R.append(f"NO textured-tri segment selected (stop: {meta['stop']}).")
|
||||
print("\n".join(R)); return 0
|
||||
|
||||
# Ch342 — FAITHFUL perspective ST/Q emit (no affine, no error gate; the Ch301 path is exact).
|
||||
# Packs the Ch301 fixed-point contract: S_fp=round(S*4096), T_fp=round(T*4096), Q_fp=round(Q*4096)
|
||||
# (24-bit FRAC=12). The 256->64 downscale is in the TEXTURE (TW=6); ST/Q stay normalized — the GS
|
||||
# computes texel=(S/Q)*2^TW=(S/Q)*64. Fails closed on Q<=0 or fixed-point overflow.
|
||||
if "--perspective" in argv:
|
||||
xs=[v["x"] for t in seg for v in t]; ys=[v["y"] for t in seg for v in t]
|
||||
x0,x1,y0,y1=min(xs),max(xs),min(ys),max(ys)
|
||||
FB=64; sc=min((FB-2)/max(1.0,x1-x0),(FB-2)/max(1.0,y1-y0))
|
||||
fx=lambda x:max(0,min(FB-1,1+int((x-x0)*sc))); fy=lambda y:max(0,min(FB-1,1+int((y-y0)*sc)))
|
||||
L=[f"# Ch342 authentic cube subsegment ({len(seg)} tris) — FAITHFUL perspective ST/Q (Ch301 fixed-point, FRAC=12)",
|
||||
f"# events #{meta['first']}..#{meta['last']}; screen ({x0:.0f},{y0:.0f})..({x1:.0f},{y1:.0f}) viewport-fit -> {FB}x{FB}; texture 256->{dst}",
|
||||
f"tex0 {tbp} 1 6 6 0", "persp"]
|
||||
for t in seg:
|
||||
a=[]
|
||||
for v in t:
|
||||
if v["Q"]<=0:
|
||||
R.append(f"GATE FAILED: Q<=0 ({v['Q']}) at event #{meta['first']} — fail closed."); print("\n".join(R)); return 0
|
||||
sfp=round(v["S"]*4096); tfp=round(v["T"]*4096); qfp=round(v["Q"]*4096)
|
||||
if not (0<=sfp<=0xFFFFFF and 0<=tfp<=0xFFFFFF and 0<qfp<=0xFFFFFF):
|
||||
R.append(f"GATE FAILED: ST/Q fixed-point out of 24-bit range -> fail closed."); print("\n".join(R)); return 0
|
||||
a += [fx(v["x"]), fy(v["y"]), sfp, tfp, qfp]
|
||||
(r,g,b)=t[-1]["rgb"]; z=t[-1]["z"]
|
||||
L.append("persptri "+" ".join(map(str,a+[z,r,g,b])))
|
||||
L.append("go")
|
||||
Qs=[v["Q"] for t in seg for v in t]
|
||||
pv=meta["prim"] or 0; tv=meta["test"] or 0
|
||||
R.append(f"segment PRIM: type={pv&7}(TRI=3) IIP={(pv>>3)&1} TME={(pv>>4)&1} FGE={(pv>>5)&1} ABE={(pv>>6)&1} FST={(pv>>8)&1}"
|
||||
f" TEST_1: ZTE={(tv>>16)&1} ZTST={(tv>>17)&3}(GEQ=2)")
|
||||
if ((pv>>6)&1):
|
||||
R.append("WARN: segment ABE=1 -> routes to the combined-TAZ path (perspective there is a known follow-on bug), NOT the proven S1 path. Do NOT flip ABE.")
|
||||
R.append("S1 perspective path honors TME+FST=0 + ZTE/ZTST GEQUAL; cube segment is ABE=0 (S1 path).")
|
||||
R.append(f"FAITHFUL PERSPECTIVE: {len(seg)} tris, TEX0->TBP={tbp} TW=6 TH=6 TFX=0; S_fp/T_fp/Q_fp=round(*4096); Q span {min(Qs):.4f}..{max(Qs):.4f}")
|
||||
R.append(f"staging words: {7+9*len(seg)} (perspective format word0[32]=1, no rects)")
|
||||
print("\n".join(R)+"\n")
|
||||
sp=opt("--scene")
|
||||
if sp: open(sp,"w").write("\n".join(L)+"\n"); print(f"[wrote {len(seg)}-tri FAITHFUL perspective scene -> {sp}]")
|
||||
if opt("--report"): open(opt("--report"),"w").write("\n".join(R)+"\n")
|
||||
return 0
|
||||
|
||||
# span + Q + error
|
||||
Qs=[v["Q"] for t in seg for v in t]; Ss=[v["S"] for t in seg for v in t]; Ts=[v["T"] for t in seg for v in t]
|
||||
maxerr=0.0
|
||||
for t in seg:
|
||||
me,_=tri_error(t,dst); maxerr=max(maxerr,me)
|
||||
tx=tbp; tex0=meta["tex0"]
|
||||
R.append(f"selected segment: {len(seg)} triangles, events #{meta['first']}..#{meta['last']}, stop after: {meta['stop']}")
|
||||
R.append(f"active TEX0 (orig): TBP0={tex0&0x3FFF} TW={(tex0>>26)&0xF} TH={(tex0>>30)&0xF} TFX={(tex0>>35)&3}")
|
||||
R.append(f"relocated TEX0 (fixture): TBP0={tbp} TBW=1 TW=6 TH=6 TFX=0 (downscale 256->{dst}, UV scale /{256//dst})")
|
||||
R.append(f"Q span: {min(Qs):.4f}..{max(Qs):.4f} S span {min(Ss):.4f}..{max(Ss):.4f} T span {min(Ts):.4f}..{max(Ts):.4f}")
|
||||
R.append(f"perspective-vs-affine max error: {maxerr:.4f} texels (threshold {MAX_ERR})")
|
||||
ok = maxerr <= MAX_ERR
|
||||
staging_words = 7 + 9*len(seg)
|
||||
R.append(f"staging words: {staging_words} (<=256: {'ok' if staging_words<=256 else 'OVERFLOW'})")
|
||||
if not ok:
|
||||
R.append("GATE FAILED: affine ST/Q surrogate exceeds error threshold -> FAIL CLOSED. Ch342 = real ST/Q through the feeder.")
|
||||
else:
|
||||
R.append(f"DECLARED APPROXIMATION: perspective ST/Q rendered as affine UV; max_error={maxerr:.4f} texels (NOT faithful perspective-correct texturing).")
|
||||
print("\n".join(R)+"\n")
|
||||
if opt("--report"): # write the report in BOTH branches (the failure report is the useful one)
|
||||
open(opt("--report"),"w").write("\n".join(R)+"\n"); print(f"[wrote report -> {opt('--report')}]")
|
||||
if not ok:
|
||||
return 0
|
||||
sp=opt("--scene")
|
||||
if sp:
|
||||
# viewport-fit the segment's screen bbox into [1,dst-2] (declared, like Ch340)
|
||||
xs=[v["x"] for t in seg for v in t]; ys=[v["y"] for t in seg for v in t]
|
||||
x0,x1,y0,y1=min(xs),max(xs),min(ys),max(ys)
|
||||
FB=64; s=min((FB-2)/max(1.0,x1-x0),(FB-2)/max(1.0,y1-y0))
|
||||
def fx(x): return max(0,min(FB-1,1+int((x-x0)*s)))
|
||||
def fy(y): return max(0,min(FB-1,1+int((y-y0)*s)))
|
||||
L=[f"# Ch341 authentic cube subsegment ({len(seg)} tris) — DECLARED affine ST/Q surrogate, max_error={maxerr:.3f} texels",
|
||||
f"# screen bbox ({x0:.0f},{y0:.0f})..({x1:.0f},{y1:.0f}) viewport-fit -> {FB}x{FB}; texture 256->{dst} downscale",
|
||||
f"tex0 {tbp} 1 6 6 0"]
|
||||
for t in seg:
|
||||
a=[]
|
||||
for v in t:
|
||||
u=max(0,min(dst-1,int(round((v["S"]/v["Q"])*dst)))) if v["Q"] else 0
|
||||
w=max(0,min(dst-1,int(round((v["T"]/v["Q"])*dst)))) if v["Q"] else 0
|
||||
a += [fx(v["x"]), fy(v["y"]), u, w]
|
||||
(r,g,b)=t[-1]["rgb"]; z=t[-1]["z"]
|
||||
L.append("tritex "+" ".join(map(str,a+[z,r,g,b])))
|
||||
L.append("go")
|
||||
open(sp,"w").write("\n".join(L)+"\n"); print(f"[wrote {len(seg)}-tri textured scene -> {sp}]")
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,524 @@
|
||||
// retroDE_ps2 — ps2_feeder (Ch339): native HPS userspace command producer for the GS feeder.
|
||||
//
|
||||
// Encodes structured drawing commands (triangle / native rectangle / flat or per-vertex color / Z)
|
||||
// into the PROVEN staging format and streams them to the FPGA over the existing HPS bridge via
|
||||
// /dev/mem + mmap — the SAME register protocol the docs/hardware/ps2_feeder_*.sh anchors use. The
|
||||
// RTL and bridge protocol are UNCHANGED; this is purely a host-side encoder + streamer.
|
||||
//
|
||||
// The staging word layout and every GS-register packing here are a byte-exact port of the golden
|
||||
// encoder in sim/data/top_psmct32_raster_demo/bake.py. `--dump <scene>` emits the 256 staging words
|
||||
// so the encoder can be diffed against the golden feeder_*.mem fixtures (byte-equivalence gate).
|
||||
//
|
||||
// Build: gcc -O2 -Wall -o ps2_feeder ps2_feeder.c
|
||||
// Usage:
|
||||
// ps2_feeder --list list built-in named scenes
|
||||
// ps2_feeder --dump <scene> print the 256 staging words (no board access)
|
||||
// ps2_feeder --dump-file <file> parse a scene file, print the first scene's staging words
|
||||
// ps2_feeder [opts] <scene>... stream each named scene to the board (submit/go/wait each)
|
||||
// ps2_feeder [opts] -f <file> stream each scene in a text scene file
|
||||
// Options:
|
||||
// --base 0xADDR bridge base (default 0x40000000) --dry-run encode+validate, no board access
|
||||
//
|
||||
// Scene file grammar (one scene per `go`/EOF; '#' comments; whitespace-separated):
|
||||
// tri x0 y0 x1 y1 x2 y2 z r g b flat triangle
|
||||
// trig x0 y0 r0 g0 b0 x1 y1 r1 g1 b1 x2 y2 r2 g2 b2 z gouraud (per-vertex) triangle
|
||||
// tritile T z r g b flat triangle filling grid tile T (0..15)
|
||||
// rect T z r g b native rectangle in grid tile T
|
||||
// go submit the accumulated scene; start next
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
// ---- profile constants (must match the GS_FEEDER_DEMO build) ----
|
||||
#define FEEDER_STG_WORDS 256
|
||||
#define FIFO_DEPTH 8
|
||||
#define CAP_FBW 1
|
||||
#define CAP_TBP1 96
|
||||
#define TMP_TBW 1
|
||||
static const int TMP_UV[3][2] = {{0,0},{3,0},{0,3}};
|
||||
|
||||
// ---- bridge register byte offsets from base (mirror docs/hardware/ps2_feeder_*.sh) ----
|
||||
#define OFF_STATUS 0x0D8 // R: bit0 = feeder ready ; W: reset staging write address
|
||||
#define OFF_LO 0x0DC // W: low 32 bits of the next staging word ; R: current staging address
|
||||
#define OFF_HI 0x0E4 // W: high 32 bits (commits {hi,lo}, auto-increments addr) ; R: records emitted
|
||||
#define OFF_GO 0x0E8 // W: bit0 = trigger feeder ; R: fifo wait cycles
|
||||
|
||||
// ======================== GS register / vertex packers (port of bake.py) ===================
|
||||
static uint64_t frame_1_psmct32(uint32_t fbw){ return (uint64_t)(fbw & 0x3F) << 16; }
|
||||
static uint64_t alpha_pack(uint32_t a,uint32_t b,uint32_t c,uint32_t d,uint32_t fix){
|
||||
return (uint64_t)(a&3) | ((uint64_t)(b&3)<<2) | ((uint64_t)(c&3)<<4)
|
||||
| ((uint64_t)(d&3)<<6) | ((uint64_t)(fix&0xFF)<<32);
|
||||
}
|
||||
static uint64_t test1_geq(void){ return (1ull<<16) | (2ull<<17); }
|
||||
static uint64_t zbuf1_pack(uint32_t zbp,uint32_t zmsk,uint32_t psm){
|
||||
return (uint64_t)(zbp&0x1FF) | ((uint64_t)(psm&0xF)<<24) | (uint64_t)(zmsk&1);
|
||||
}
|
||||
static uint64_t tex0_pack(uint32_t tbp0,uint32_t tbw,uint32_t psm,uint32_t tw,uint32_t th,uint32_t tfx){
|
||||
return (uint64_t)(tbp0&0x3FFF) | ((uint64_t)(tbw&0x3F)<<14) | ((uint64_t)(psm&0x3F)<<20)
|
||||
| ((uint64_t)(tw&0xF)<<26) | ((uint64_t)(th&0xF)<<30) | ((uint64_t)(tfx&3)<<35);
|
||||
}
|
||||
static uint64_t prim_tri_tme_abe(void){ return 3 | (1<<4) | (1<<6); } // TRI+TME+ABE (legacy scenes)
|
||||
static uint64_t prim_tri_tme(void){ return 3 | (1<<4); } // Ch342 TRI+TME, ABE=0 (cube)
|
||||
static uint64_t prim_sprite_tme_abe(void){ return 6 | (1<<4) | (1<<6); } // Ch345a SPRITE+TME+ABE (textured-alpha)
|
||||
static uint64_t rgbaq_data(uint8_t r,uint8_t g,uint8_t b){
|
||||
return ((uint64_t)0xFF<<24) | ((uint64_t)b<<16) | ((uint64_t)g<<8) | r; // a = 0xFF
|
||||
}
|
||||
static uint64_t uv_data(int ui,int vi){
|
||||
return (uint64_t)(((ui<<4)&0x3FFF)) | ((uint64_t)(((vi<<4)&0x3FFF))<<14);
|
||||
}
|
||||
static uint64_t xyz2_dataz(int x,int y,uint32_t z){
|
||||
return ((uint64_t)(x&0xFFF)<<4) | ((uint64_t)(y&0xFFF)<<20) | ((uint64_t)z<<32);
|
||||
}
|
||||
// Ch342 — perspective ST/Q packers matching the Ch301 gs_stub contract (24-bit fixed-point, FRAC=12).
|
||||
// ST register: S_fp in [23:0], T_fp in [55:32]. RGBAQ register: Q_fp in [55:32] (NOT IEEE float).
|
||||
static uint64_t st_data(uint32_t s_fp,uint32_t t_fp){
|
||||
return (uint64_t)(s_fp & 0xFFFFFF) | ((uint64_t)(t_fp & 0xFFFFFF) << 32);
|
||||
}
|
||||
static uint64_t rgbaq_persp(uint8_t r,uint8_t g,uint8_t b,uint32_t q_fp){
|
||||
return ((uint64_t)0xFF<<24) | ((uint64_t)b<<16) | ((uint64_t)g<<8) | r | ((uint64_t)(q_fp & 0xFFFFFF) << 32);
|
||||
}
|
||||
|
||||
// ======================== scene model ===================
|
||||
#define MAX_TRI 64
|
||||
#define MAX_RECT 64
|
||||
#define MAX_SPRITE 64
|
||||
typedef struct { int x[3], y[3]; uint8_t r[3], g[3], b[3]; int u[3], v[3];
|
||||
uint32_t s_fp[3], t_fp[3], q_fp[3]; uint32_t z; } tri_t; // Ch342 fixed-point ST/Q
|
||||
typedef struct { int tile; uint8_t r, g, b; uint32_t z; } rect_t;
|
||||
// Ch345a — textured + source-over alpha SPRITE record (the Ch344-proven subset): screen rect (x0,y0)-(x1,y1),
|
||||
// per-corner affine UV, flat MODULATE tint. As (source alpha) comes from the TEXEL (TCC=1), not the tint.
|
||||
typedef struct { int x0,y0,x1,y1; int u0,v0,u1,v1; uint8_t r,g,b; } sprite_t;
|
||||
typedef struct {
|
||||
tri_t tri[MAX_TRI]; int ntri; rect_t rect[MAX_RECT]; int nrect; const char *name;
|
||||
sprite_t sprite[MAX_SPRITE]; int nsprite; // Ch345a
|
||||
// Ch341 — optional scene-level texture binding. textured=0 keeps the proven unity+MODULATE header
|
||||
// and TMP_UV (byte-exact with all prior scenes). textured=1 (set by the `tex0` command) overrides
|
||||
// the scene TEX0 and lets `tritex` carry per-vertex real UV.
|
||||
int textured; uint32_t tex_tbp, tex_tbw, tex_tw, tex_th, tex_tfx;
|
||||
// Ch342 — perspective format: word0[32]=1, per-vertex RGBAQ/ST/XYZ2 (fixed-point S/T/Q). Requires
|
||||
// `textured`; rects are rejected (the host fails closed so a mixed scene is never silently dropped).
|
||||
int perspective;
|
||||
// Ch345a — SPRITE format: word0[33]=1, each prim = SPRITE record (2 verts x RGBAQ/UV/XYZ2). Requires
|
||||
// `textured`; mutually exclusive with tris/rects/perspective (fail closed).
|
||||
int sprite_mode;
|
||||
} scene_t;
|
||||
|
||||
static void scene_reset(scene_t *s, const char *name){ s->ntri=0; s->nrect=0; s->nsprite=0; s->name=name; s->textured=0; s->perspective=0; s->sprite_mode=0; }
|
||||
static int scene_empty(const scene_t *s){ return s->ntri==0 && s->nrect==0 && s->nsprite==0; }
|
||||
|
||||
// tri_in_tile(t): the half-tile triangle bake.py draws for grid tile t (0..15, row*4+col).
|
||||
static void tri_in_tile(int t, int vx[3], int vy[3]){
|
||||
int ox=(t%4)*16, oy=(t/4)*16;
|
||||
vx[0]=ox+1; vy[0]=oy+1;
|
||||
vx[1]=ox+14; vy[1]=oy+1;
|
||||
vx[2]=ox+1; vy[2]=oy+14;
|
||||
}
|
||||
|
||||
// returns 0 ok, -1 on capacity/range error (message printed)
|
||||
static int add_tri_gouraud(scene_t *s, const int vx[3], const int vy[3],
|
||||
const uint8_t rr[3], const uint8_t gg[3], const uint8_t bb[3], uint32_t z){
|
||||
if (s->ntri >= MAX_TRI){ fprintf(stderr,"error: too many triangles (max %d)\n", MAX_TRI); return -1; }
|
||||
for (int i=0;i<3;i++){
|
||||
if (vx[i]<0||vx[i]>4095||vy[i]<0||vy[i]>4095){
|
||||
fprintf(stderr,"error: vertex (%d,%d) out of 12-bit range [0..4095]\n", vx[i], vy[i]); return -1; }
|
||||
}
|
||||
tri_t *t=&s->tri[s->ntri++];
|
||||
for (int i=0;i<3;i++){ t->x[i]=vx[i]; t->y[i]=vy[i]; t->r[i]=rr[i]; t->g[i]=gg[i]; t->b[i]=bb[i];
|
||||
t->u[i]=TMP_UV[i][0]; t->v[i]=TMP_UV[i][1]; } // default UV (byte-exact w/ prior scenes)
|
||||
t->z=z;
|
||||
return 0;
|
||||
}
|
||||
// Ch341 — textured triangle: per-vertex real UV (texel coords) + a flat MODULATE color. Requires the
|
||||
// scene `tex0` binding (caller sets s->textured). UV range [0..4095] (10.4 fixed via uv_data).
|
||||
static int add_tritex(scene_t *s, const int vx[3], const int vy[3], const int uu[3], const int vv[3],
|
||||
uint8_t r, uint8_t g, uint8_t b, uint32_t z){
|
||||
uint8_t rr[3]={r,r,r}, gg[3]={g,g,g}, bb[3]={b,b,b};
|
||||
if (add_tri_gouraud(s, vx, vy, rr, gg, bb, z)) return -1;
|
||||
tri_t *t=&s->tri[s->ntri-1];
|
||||
for (int i=0;i<3;i++){
|
||||
if (uu[i]<0||uu[i]>4095||vv[i]<0||vv[i]>4095){
|
||||
fprintf(stderr,"error: UV (%d,%d) out of range [0..4095]\n", uu[i], vv[i]); s->ntri--; return -1; }
|
||||
t->u[i]=uu[i]; t->v[i]=vv[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
static int add_tri_flat(scene_t *s, const int vx[3], const int vy[3], uint8_t r, uint8_t g, uint8_t b, uint32_t z){
|
||||
uint8_t rr[3]={r,r,r}, gg[3]={g,g,g}, bb[3]={b,b,b};
|
||||
return add_tri_gouraud(s, vx, vy, rr, gg, bb, z);
|
||||
}
|
||||
static int add_tritile(scene_t *s, int tile, uint32_t z, uint8_t r, uint8_t g, uint8_t b){
|
||||
if (tile<0||tile>15){ fprintf(stderr,"error: tile %d out of range [0..15]\n", tile); return -1; }
|
||||
int vx[3], vy[3]; tri_in_tile(tile, vx, vy);
|
||||
return add_tri_flat(s, vx, vy, r, g, b, z);
|
||||
}
|
||||
// Ch342 — perspective textured triangle: per-vertex screen (x,y) + fixed-point (S_fp,T_fp,Q_fp) + flat
|
||||
// MODULATE color. Requires the scene `tex0` + `persp`. 24-bit fixed-point (FRAC=12), so S/T/Q <= 0xFFFFFF.
|
||||
static int add_persptri(scene_t *s, const int vx[3], const int vy[3],
|
||||
const uint32_t sf[3], const uint32_t tf[3], const uint32_t qf[3],
|
||||
uint8_t r, uint8_t g, uint8_t b, uint32_t z){
|
||||
uint8_t rr[3]={r,r,r}, gg[3]={g,g,g}, bb[3]={b,b,b};
|
||||
if (add_tri_gouraud(s, vx, vy, rr, gg, bb, z)) return -1;
|
||||
tri_t *t=&s->tri[s->ntri-1];
|
||||
for (int i=0;i<3;i++){
|
||||
if (sf[i]>0xFFFFFF||tf[i]>0xFFFFFF||qf[i]>0xFFFFFF){
|
||||
fprintf(stderr,"error: ST/Q fixed-point out of 24-bit range\n"); s->ntri--; return -1; }
|
||||
t->s_fp[i]=sf[i]; t->t_fp[i]=tf[i]; t->q_fp[i]=qf[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
static int add_rect(scene_t *s, int tile, uint32_t z, uint8_t r, uint8_t g, uint8_t b){
|
||||
if (s->perspective){ fprintf(stderr,"error: rects not allowed in perspective mode\n"); return -1; }
|
||||
if (tile<0||tile>15){ fprintf(stderr,"error: rect tile %d out of range [0..15]\n", tile); return -1; }
|
||||
if (s->nrect >= MAX_RECT){ fprintf(stderr,"error: too many rects (max %d)\n", MAX_RECT); return -1; }
|
||||
rect_t *q=&s->rect[s->nrect++]; q->tile=tile; q->z=z; q->r=r; q->g=g; q->b=b;
|
||||
return 0;
|
||||
}
|
||||
// Ch345a — textured + source-over alpha SPRITE: screen rect + per-corner affine UV + flat MODULATE tint.
|
||||
// Requires the scene `tex0` binding (textured); sets sprite_mode. Fails closed if mixed with tris/rects/persp.
|
||||
static int add_sprite(scene_t *s, int x0, int y0, int x1, int y1,
|
||||
int u0, int v0, int u1, int v1, uint8_t r, uint8_t g, uint8_t b){
|
||||
if (!s->textured){ fprintf(stderr,"error: sprite requires a prior tex0 binding\n"); return -1; }
|
||||
if (s->perspective || s->ntri || s->nrect){
|
||||
fprintf(stderr,"error: sprite mode is exclusive with persp/tris/rects\n"); return -1; }
|
||||
if (s->nsprite >= MAX_SPRITE){ fprintf(stderr,"error: too many sprites (max %d)\n", MAX_SPRITE); return -1; }
|
||||
if (x0<0||x0>4095||y0<0||y0>4095||x1<0||x1>4095||y1<0||y1>4095){
|
||||
fprintf(stderr,"error: sprite vertex out of 12-bit range [0..4095]\n"); return -1; }
|
||||
if (u0<0||u0>4095||v0<0||v0>4095||u1<0||u1>4095||v1<0||v1>4095){
|
||||
fprintf(stderr,"error: sprite UV out of range [0..4095]\n"); return -1; }
|
||||
s->sprite_mode = 1;
|
||||
sprite_t *q=&s->sprite[s->nsprite++];
|
||||
q->x0=x0; q->y0=y0; q->x1=x1; q->y1=y1; q->u0=u0; q->v0=v0; q->u1=u1; q->v1=v1; q->r=r; q->g=g; q->b=b;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ======================== staging builder (byte-exact with bake.py builders) ===================
|
||||
// Emits: word0 = {rect_count[31:16], tri_count[15:0]} (== tri_count when rect_count==0, matching the
|
||||
// colored/gouraud builders), words1..6 = FRAME/ALPHA/TEST/ZBUF/TEX0(unity+MODULATE)/PRIM, then each
|
||||
// triangle as 3x(RGBAQ,UV,XYZ2), then each rect as rect_record (RGBAQ, corner0 XYZ2, corner1 XYZ2).
|
||||
// out[] is zero-padded to FEEDER_STG_WORDS. Returns meaningful word count, or -1 if it won't fit.
|
||||
static int build_staging(const scene_t *s, uint64_t out[FEEDER_STG_WORDS]){
|
||||
int need = s->sprite_mode ? (7 + s->nsprite*6) : (7 + s->ntri*9 + s->nrect*3);
|
||||
if (need > FEEDER_STG_WORDS){
|
||||
fprintf(stderr,"error: scene '%s' needs %d staging words > %d (FEEDER_STG_WORDS) — too large\n",
|
||||
s->name?s->name:"?", need, FEEDER_STG_WORDS);
|
||||
return -1;
|
||||
}
|
||||
if (s->perspective && (!s->textured || s->nrect)){
|
||||
fprintf(stderr,"error: perspective mode requires tex0 and no rects\n"); return -1;
|
||||
}
|
||||
// Ch345a — SPRITE format: word0[33]=1, SPRITE+TME+ABE source-over, per-corner UV, As from texel (TCC).
|
||||
if (s->sprite_mode){
|
||||
if (!s->textured){ fprintf(stderr,"error: sprite mode requires tex0\n"); return -1; }
|
||||
for (int i=0;i<FEEDER_STG_WORDS;i++) out[i]=0;
|
||||
int n=0;
|
||||
out[n++] = (uint64_t)(s->nsprite & 0xFFFF) | ((uint64_t)1 << 33);
|
||||
out[n++] = frame_1_psmct32(CAP_FBW);
|
||||
out[n++] = alpha_pack(0,1,0,1,0); // source-over
|
||||
out[n++] = 0; // TEST_1 (ZTE=0, no depth)
|
||||
out[n++] = 0; // ZBUF_1
|
||||
out[n++] = tex0_pack(s->tex_tbp, s->tex_tbw, 0, s->tex_tw, s->tex_th, s->tex_tfx); // PSMCT32
|
||||
out[n++] = prim_sprite_tme_abe(); // SPRITE+TME+ABE
|
||||
for (int i=0;i<s->nsprite;i++){
|
||||
const sprite_t *q=&s->sprite[i];
|
||||
// RGBAQ A=0x80 (neutral; the vertex alpha is IGNORED — As comes from the texel, TCC=1). Matches
|
||||
// the bake.py golden so test_ps2_feeder.sh can byte-check the sprite staging.
|
||||
uint64_t tint = ((uint64_t)0x80<<24) | ((uint64_t)q->b<<16) | ((uint64_t)q->g<<8) | q->r;
|
||||
out[n++] = tint; out[n++] = uv_data(q->u0,q->v0); out[n++] = xyz2_dataz(q->x0,q->y0,0);
|
||||
out[n++] = tint; out[n++] = uv_data(q->u1,q->v1); out[n++] = xyz2_dataz(q->x1,q->y1,0);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
for (int i=0;i<FEEDER_STG_WORDS;i++) out[i]=0;
|
||||
int n=0;
|
||||
// Ch342 — word0[32] = perspective format flag (feeder emits ST instead of UV; rects forced off).
|
||||
out[n++] = ((uint64_t)(s->nrect & 0xFFFF) << 16) | (uint64_t)(s->ntri & 0xFFFF)
|
||||
| (s->perspective ? ((uint64_t)1 << 32) : 0);
|
||||
out[n++] = frame_1_psmct32(CAP_FBW);
|
||||
out[n++] = alpha_pack(0,1,0,1,0);
|
||||
out[n++] = test1_geq();
|
||||
out[n++] = zbuf1_pack(2,0,0);
|
||||
out[n++] = s->textured ? tex0_pack(s->tex_tbp, s->tex_tbw, 0, s->tex_tw, s->tex_th, s->tex_tfx)
|
||||
: tex0_pack(CAP_TBP1, TMP_TBW, 0, 2, 2, 0); // unity texture + MODULATE (default)
|
||||
out[n++] = s->perspective ? prim_tri_tme() : prim_tri_tme_abe(); // Ch342: ABE=0 -> S1 perspective path
|
||||
for (int i=0;i<s->ntri;i++){
|
||||
const tri_t *t=&s->tri[i];
|
||||
for (int v=0; v<3; v++){
|
||||
if (s->perspective){ // RGBAQ(+Q_fp) / ST(S_fp,T_fp) / XYZ2
|
||||
out[n++] = rgbaq_persp(t->r[v], t->g[v], t->b[v], t->q_fp[v]);
|
||||
out[n++] = st_data(t->s_fp[v], t->t_fp[v]);
|
||||
} else { // legacy: RGBAQ / UV / XYZ2 (byte-exact)
|
||||
out[n++] = rgbaq_data(t->r[v], t->g[v], t->b[v]);
|
||||
out[n++] = uv_data(t->u[v], t->v[v]);
|
||||
}
|
||||
out[n++] = xyz2_dataz(t->x[v], t->y[v], t->z);
|
||||
}
|
||||
}
|
||||
for (int i=0;i<s->nrect;i++){
|
||||
const rect_t *q=&s->rect[i];
|
||||
int ox=(q->tile%4)*16, oy=(q->tile/4)*16;
|
||||
out[n++] = rgbaq_data(q->r, q->g, q->b);
|
||||
out[n++] = xyz2_dataz(ox+1, oy+1, q->z);
|
||||
out[n++] = xyz2_dataz(ox+14, oy+14, q->z);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
// ======================== built-in named scenes (reproduce the Ch333-338 goldens) ===================
|
||||
#define RED 0xFF,0x00,0x00
|
||||
#define GREEN 0x00,0xFF,0x00
|
||||
#define BLUE 0x00,0x00,0xFF
|
||||
#define YELLOW 0xFF,0xFF,0x00
|
||||
#define WHITE 0xFF,0xFF,0xFF
|
||||
|
||||
static int build_named(const char *name, scene_t *s){
|
||||
scene_reset(s, name);
|
||||
if (!strcmp(name,"color-tri")){ // Ch333 feeder_color_tri.mem
|
||||
return add_tritile(s,0,0x5000,RED) || add_tritile(s,5,0x5100,GREEN) || add_tritile(s,10,0x5200,BLUE);
|
||||
}
|
||||
if (!strcmp(name,"sprite")){ // Ch345a feeder_sprite.mem — 3 textured-alpha sprites
|
||||
// tex0 8x8 PSMCT32 @ TBP=64 (TW=TH=3 log2), MODULATE; white (0x80) tint = identity. As from texel.
|
||||
s->textured=1; s->tex_tbp=64; s->tex_tbw=1; s->tex_tw=3; s->tex_th=3; s->tex_tfx=0;
|
||||
return add_sprite(s, 8,24,24,40, 0,0,8,8, 128,128,128)
|
||||
|| add_sprite(s, 26,24,42,40, 0,0,8,8, 128,128,128)
|
||||
|| add_sprite(s, 44,24,60,40, 0,0,8,8, 128,128,128);
|
||||
}
|
||||
if (!strcmp(name,"native-rect")){ // Ch334 feeder_native_rect.mem (3 rects, 0 tris)
|
||||
return add_rect(s,0,0x5000,RED) || add_rect(s,5,0x5100,GREEN) || add_rect(s,10,0x5200,BLUE);
|
||||
}
|
||||
if (!strcmp(name,"gouraud-tri")){ // Ch335 feeder_gouraud_tri.mem (1 gouraud tri, tile 0)
|
||||
int vx[3],vy[3]; tri_in_tile(0,vx,vy);
|
||||
uint8_t rr[3]={0xFF,0x00,0x00}, gg[3]={0x00,0xFF,0x00}, bb[3]={0x00,0x00,0xFF};
|
||||
return add_tri_gouraud(s,vx,vy,rr,gg,bb,0x5000);
|
||||
}
|
||||
if (!strcmp(name,"accum")){ // Ch336 feeder_accum.mem
|
||||
for (int t=0;t<14;t++) if (t<8 ? add_tritile(s,t,0x5000+t*0x100,RED)
|
||||
: add_tritile(s,t,0x5000+t*0x100,BLUE)) return -1;
|
||||
return 0;
|
||||
}
|
||||
if (!strcmp(name,"retrigger-a")){ // Ch337 feeder_scene_a.mem
|
||||
for (int t=0;t<14;t++) if (add_tritile(s,t,0x5000+t*0x100,RED)) return -1;
|
||||
return 0;
|
||||
}
|
||||
if (!strcmp(name,"retrigger-b")){ // Ch337 feeder_scene_b.mem (tiles 2..15)
|
||||
for (int t=2;t<16;t++) if (add_tritile(s,t,0x5000+(t-2)*0x100,BLUE)) return -1;
|
||||
return 0;
|
||||
}
|
||||
if (!strcmp(name,"zpersist-near") || !strcmp(name,"zpersist-far")){ // Ch338 flat overlap
|
||||
int near_first = !strcmp(name,"zpersist-near");
|
||||
static const int b0f[7]={0,1,2,3,4,6,7}, b1f[5]={8,9,10,11,12};
|
||||
// batch0 = overlap prim + 7 fillers ; batch1 = overlap prim + 5 fillers.
|
||||
if (near_first){
|
||||
if (add_tritile(s,5,0x7000,RED)) return -1; // near RED (batch0)
|
||||
for (int i=0;i<7;i++) if (add_tritile(s,b0f[i],0x6000,RED)) return -1;
|
||||
if (add_tritile(s,5,0x5000,BLUE)) return -1; // far BLUE (batch1)
|
||||
for (int i=0;i<5;i++) if (add_tritile(s,b1f[i],0x6000,BLUE)) return -1;
|
||||
} else {
|
||||
if (add_tritile(s,5,0x5000,BLUE)) return -1; // far BLUE (batch0)
|
||||
for (int i=0;i<7;i++) if (add_tritile(s,b0f[i],0x6000,BLUE)) return -1;
|
||||
if (add_tritile(s,5,0x7000,RED)) return -1; // near RED (batch1)
|
||||
for (int i=0;i<5;i++) if (add_tritile(s,b1f[i],0x6000,RED)) return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (!strcmp(name,"zpersist-grad")){ // Ch338 feeder_zpersist_grad.mem (gouraud near + flat far)
|
||||
static const int b0f[7]={0,1,2,3,4,6,7}, b1f[5]={8,9,10,11,12};
|
||||
int vx[3],vy[3]; tri_in_tile(5,vx,vy);
|
||||
uint8_t rr[3]={0xFF,0x00,0x00}, gg[3]={0x00,0xFF,0x00}, bb[3]={0x00,0x00,0xFF};
|
||||
if (add_tri_gouraud(s,vx,vy,rr,gg,bb,0x7000)) return -1; // near gradient (batch0)
|
||||
for (int i=0;i<7;i++) if (add_tritile(s,b0f[i],0x6000,GREEN)) return -1;
|
||||
if (add_tritile(s,5,0x5000,WHITE)) return -1; // far flat white (batch1)
|
||||
for (int i=0;i<5;i++) if (add_tritile(s,b1f[i],0x6000,WHITE)) return -1;
|
||||
return 0;
|
||||
}
|
||||
fprintf(stderr,"error: unknown scene '%s' (try --list)\n", name);
|
||||
return -1;
|
||||
}
|
||||
static const char *named_scenes[] = {
|
||||
"color-tri","native-rect","gouraud-tri","accum","retrigger-a","retrigger-b",
|
||||
"zpersist-near","zpersist-far","zpersist-grad", NULL
|
||||
};
|
||||
|
||||
// ======================== scene-file parser ===================
|
||||
// Parses tokens into one or more scenes (split on `go`). Returns count, or -1 on error.
|
||||
// scenes[] must hold at least max_scenes entries.
|
||||
static int parse_int(const char *tok, long *out){
|
||||
char *end; errno=0; long v=strtol(tok,&end,0);
|
||||
if (*end || errno) return -1;
|
||||
*out=v; return 0;
|
||||
}
|
||||
static int parse_scene_file(const char *path, scene_t *scenes, int max_scenes){
|
||||
FILE *f=fopen(path,"r");
|
||||
if (!f){ fprintf(stderr,"error: cannot open '%s': %s\n", path, strerror(errno)); return -1; }
|
||||
int nsc=0; scene_reset(&scenes[0], path);
|
||||
char line[512]; int lineno=0;
|
||||
while (fgets(line,sizeof line,f)){
|
||||
lineno++;
|
||||
char *h=strchr(line,'#'); if (h) *h=0; // strip comment
|
||||
char *tok=strtok(line," \t\r\n"); if (!tok) continue;
|
||||
long a[24]; int na=0; char op[32]; // persptri needs 19 args
|
||||
snprintf(op,sizeof op,"%s",tok);
|
||||
for (char *t=strtok(NULL," \t\r\n"); t && na<24; t=strtok(NULL," \t\r\n")){
|
||||
if (parse_int(t,&a[na])){ fprintf(stderr,"error: %s:%d bad number '%s'\n",path,lineno,t); fclose(f); return -1; }
|
||||
na++;
|
||||
}
|
||||
scene_t *s=&scenes[nsc];
|
||||
int rc=0;
|
||||
if (!strcmp(op,"go")){
|
||||
if (!scene_empty(s)){ if (++nsc>=max_scenes){ fprintf(stderr,"error: too many scenes\n"); fclose(f); return -1; } scene_reset(&scenes[nsc], path); }
|
||||
} else if (!strcmp(op,"tri") && na==10){
|
||||
int vx[3]={(int)a[0],(int)a[2],(int)a[4]}, vy[3]={(int)a[1],(int)a[3],(int)a[5]};
|
||||
rc=add_tri_flat(s,vx,vy,(uint8_t)a[7],(uint8_t)a[8],(uint8_t)a[9],(uint32_t)a[6]);
|
||||
} else if (!strcmp(op,"trig") && na==16){
|
||||
int vx[3]={(int)a[0],(int)a[5],(int)a[10]}, vy[3]={(int)a[1],(int)a[6],(int)a[11]};
|
||||
uint8_t rr[3]={(uint8_t)a[2],(uint8_t)a[7],(uint8_t)a[12]};
|
||||
uint8_t gg[3]={(uint8_t)a[3],(uint8_t)a[8],(uint8_t)a[13]};
|
||||
uint8_t bb[3]={(uint8_t)a[4],(uint8_t)a[9],(uint8_t)a[14]};
|
||||
rc=add_tri_gouraud(s,vx,vy,rr,gg,bb,(uint32_t)a[15]);
|
||||
} else if (!strcmp(op,"tritile") && na==5){
|
||||
rc=add_tritile(s,(int)a[0],(uint32_t)a[1],(uint8_t)a[2],(uint8_t)a[3],(uint8_t)a[4]);
|
||||
} else if (!strcmp(op,"rect") && na==5){
|
||||
rc=add_rect(s,(int)a[0],(uint32_t)a[1],(uint8_t)a[2],(uint8_t)a[3],(uint8_t)a[4]);
|
||||
} else if (!strcmp(op,"tex0") && na==5){ // tex0 TBP TBW TW TH TFX — scene-level texture bind
|
||||
s->textured=1; s->tex_tbp=(uint32_t)a[0]; s->tex_tbw=(uint32_t)a[1];
|
||||
s->tex_tw=(uint32_t)a[2]; s->tex_th=(uint32_t)a[3]; s->tex_tfx=(uint32_t)a[4];
|
||||
} else if (!strcmp(op,"tritex") && na==16){ // tritex x0 y0 u0 v0 x1 y1 u1 v1 x2 y2 u2 v2 z r g b
|
||||
if (!s->textured){ fprintf(stderr,"error: %s:%d tritex needs a prior tex0\n",path,lineno); fclose(f); return -1; }
|
||||
int vx[3]={(int)a[0],(int)a[4],(int)a[8]}, vy[3]={(int)a[1],(int)a[5],(int)a[9]};
|
||||
int uu[3]={(int)a[2],(int)a[6],(int)a[10]}, vv[3]={(int)a[3],(int)a[7],(int)a[11]};
|
||||
rc=add_tritex(s,vx,vy,uu,vv,(uint8_t)a[13],(uint8_t)a[14],(uint8_t)a[15],(uint32_t)a[12]);
|
||||
} else if (!strcmp(op,"persp") && na==0){ // mark scene perspective (needs prior tex0)
|
||||
if (!s->textured){ fprintf(stderr,"error: %s:%d persp needs a prior tex0\n",path,lineno); fclose(f); return -1; }
|
||||
s->perspective=1;
|
||||
} else if (!strcmp(op,"persptri") && na==19){ // persptri x y s t q (x3) z r g b — fixed-point ST/Q
|
||||
if (!s->perspective){ fprintf(stderr,"error: %s:%d persptri needs a prior persp\n",path,lineno); fclose(f); return -1; }
|
||||
int vx[3]={(int)a[0],(int)a[5],(int)a[10]}, vy[3]={(int)a[1],(int)a[6],(int)a[11]};
|
||||
uint32_t sf[3]={(uint32_t)a[2],(uint32_t)a[7],(uint32_t)a[12]};
|
||||
uint32_t tf[3]={(uint32_t)a[3],(uint32_t)a[8],(uint32_t)a[13]};
|
||||
uint32_t qf[3]={(uint32_t)a[4],(uint32_t)a[9],(uint32_t)a[14]};
|
||||
rc=add_persptri(s,vx,vy,sf,tf,qf,(uint8_t)a[16],(uint8_t)a[17],(uint8_t)a[18],(uint32_t)a[15]);
|
||||
} else if (!strcmp(op,"sprite") && na==11){ // Ch345a — sprite x0 y0 x1 y1 u0 v0 u1 v1 r g b
|
||||
// textured + source-over alpha SPRITE (needs a prior tex0). r/g/b = MODULATE tint; As from texel.
|
||||
rc=add_sprite(s,(int)a[0],(int)a[1],(int)a[2],(int)a[3],(int)a[4],(int)a[5],(int)a[6],(int)a[7],
|
||||
(uint8_t)a[8],(uint8_t)a[9],(uint8_t)a[10]);
|
||||
} else {
|
||||
fprintf(stderr,"error: %s:%d malformed command '%s' (got %d args)\n",path,lineno,op,na); fclose(f); return -1;
|
||||
}
|
||||
if (rc){ fprintf(stderr," (at %s:%d)\n",path,lineno); fclose(f); return -1; }
|
||||
}
|
||||
fclose(f);
|
||||
if (!scene_empty(&scenes[nsc])) nsc++; // trailing scene with no closing 'go'
|
||||
return nsc;
|
||||
}
|
||||
|
||||
// ======================== bridge I/O ===================
|
||||
typedef struct { volatile uint8_t *base; int dry; } bridge_t;
|
||||
static void wr32(bridge_t *br, int off, uint32_t v){ if (!br->dry) *(volatile uint32_t*)(br->base+off)=v; }
|
||||
static uint32_t rd32(bridge_t *br, int off){ return br->dry ? 0 : *(volatile uint32_t*)(br->base+off); }
|
||||
|
||||
static int wait_ready(bridge_t *br){
|
||||
if (br->dry) return 0;
|
||||
for (int i=0;i<300000;i++){ if (rd32(br,OFF_STATUS)&1) return 0; usleep(10); }
|
||||
fprintf(stderr,"error: feeder never reported ready\n"); return -1;
|
||||
}
|
||||
|
||||
// Stream one scene: reset addr, write {lo,hi} per word, GO, wait completion. Prints diagnostics.
|
||||
static int stream_scene(bridge_t *br, const scene_t *s){
|
||||
uint64_t w[FEEDER_STG_WORDS];
|
||||
int n = build_staging(s, w);
|
||||
if (n < 0) return -1;
|
||||
int exp_prims = s->ntri + s->nrect*2; // feeder expands each rect -> 2 triangles
|
||||
int exp_batches = (exp_prims + FIFO_DEPTH - 1) / FIFO_DEPTH;
|
||||
printf("[scene %-14s] tris=%d rects=%d -> staged_words=%d expand_prims=%d batches~=%d\n",
|
||||
s->name?s->name:"?", s->ntri, s->nrect, n, exp_prims, exp_batches);
|
||||
if (wait_ready(br)) return -1;
|
||||
wr32(br, OFF_STATUS, 0); // reset staging write address
|
||||
for (int i=0;i<n;i++){ wr32(br, OFF_LO, (uint32_t)(w[i]&0xFFFFFFFF)); wr32(br, OFF_HI, (uint32_t)(w[i]>>32)); }
|
||||
uint32_t addr = rd32(br, OFF_LO);
|
||||
if (!br->dry && (int)addr != n){ fprintf(stderr,"error: bridge addr=%u after streaming %d words\n", addr, n); return -1; }
|
||||
if (wait_ready(br)) return -1; // staging accepted
|
||||
wr32(br, OFF_GO, 1); // trigger
|
||||
if (wait_ready(br)) return -1; // Ch337: ready only after the WHOLE scene drained
|
||||
uint32_t records = rd32(br, OFF_HI), waits = rd32(br, OFF_GO);
|
||||
printf(" staged_addr=%u records=%u waits=%u completion=ready%s\n",
|
||||
addr, records, waits, br->dry?" (dry-run, no hw readback)":"");
|
||||
if (!br->dry && (int)records != exp_prims)
|
||||
fprintf(stderr," warn: hw records=%u != host-expected %d (rect expansion / format?)\n", records, exp_prims);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ======================== main ===================
|
||||
static void dump_words(const uint64_t w[FEEDER_STG_WORDS]){
|
||||
for (int i=0;i<FEEDER_STG_WORDS;i++) printf("%016llx\n", (unsigned long long)w[i]);
|
||||
}
|
||||
static void usage(void){
|
||||
fprintf(stderr,
|
||||
"usage: ps2_feeder [--base 0xADDR] [--dry-run] <scene>...\n"
|
||||
" ps2_feeder [--base 0xADDR] [--dry-run] -f <scenefile>\n"
|
||||
" ps2_feeder --dump <scene> | --dump-file <scenefile> | --list\n");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv){
|
||||
unsigned long base = 0x40000000UL;
|
||||
int dry = 0;
|
||||
const char *dump = NULL, *dumpfile = NULL, *scenefile = NULL;
|
||||
const char *names[64]; int nnames=0;
|
||||
|
||||
for (int i=1;i<argc;i++){
|
||||
if (!strcmp(argv[i],"--list")){ for (const char**p=named_scenes;*p;p++) printf("%s\n",*p); return 0; }
|
||||
else if (!strcmp(argv[i],"--base") && i+1<argc) base=strtoul(argv[++i],NULL,0);
|
||||
else if (!strcmp(argv[i],"--dry-run")) dry=1;
|
||||
else if (!strcmp(argv[i],"--dump") && i+1<argc) dump=argv[++i];
|
||||
else if (!strcmp(argv[i],"--dump-file") && i+1<argc) dumpfile=argv[++i];
|
||||
else if (!strcmp(argv[i],"-f") && i+1<argc) scenefile=argv[++i];
|
||||
else if (argv[i][0]=='-'){ usage(); return 2; }
|
||||
else if (nnames<64) names[nnames++]=argv[i];
|
||||
}
|
||||
|
||||
// ---- dump modes (no board access) ----
|
||||
if (dump){
|
||||
scene_t s; if (build_named(dump,&s)) return 1;
|
||||
uint64_t w[FEEDER_STG_WORDS]; if (build_staging(&s,w)<0) return 1;
|
||||
dump_words(w); return 0;
|
||||
}
|
||||
if (dumpfile){
|
||||
scene_t scenes[16]; int nsc=parse_scene_file(dumpfile,scenes,16); if (nsc<0) return 1;
|
||||
if (nsc==0){ fprintf(stderr,"error: no scenes in '%s'\n",dumpfile); return 1; }
|
||||
uint64_t w[FEEDER_STG_WORDS]; if (build_staging(&scenes[0],w)<0) return 1;
|
||||
dump_words(w); return 0;
|
||||
}
|
||||
|
||||
// ---- build the work list (named scenes or a scene file) ----
|
||||
scene_t scenes[64]; int nsc=0;
|
||||
if (scenefile){ nsc=parse_scene_file(scenefile,scenes,64); if (nsc<0) return 1; }
|
||||
for (int i=0;i<nnames;i++){ if (nsc>=64){ fprintf(stderr,"too many scenes\n"); return 1; } if (build_named(names[i],&scenes[nsc])) return 1; nsc++; }
|
||||
if (nsc==0){ usage(); return 2; }
|
||||
// validate every scene encodes before touching hardware
|
||||
for (int i=0;i<nsc;i++){ uint64_t w[FEEDER_STG_WORDS]; if (build_staging(&scenes[i],w)<0) return 1; }
|
||||
|
||||
// ---- open the bridge (unless dry-run) ----
|
||||
bridge_t br = { .base=NULL, .dry=dry };
|
||||
int fd=-1; void *map=NULL;
|
||||
if (!dry){
|
||||
fd=open("/dev/mem", O_RDWR|O_SYNC);
|
||||
if (fd<0){ fprintf(stderr,"error: open /dev/mem: %s\n", strerror(errno)); return 1; }
|
||||
map=mmap(NULL, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, (off_t)base);
|
||||
if (map==MAP_FAILED){ fprintf(stderr,"error: mmap 0x%lx: %s\n", base, strerror(errno)); close(fd); return 1; }
|
||||
br.base=(volatile uint8_t*)map;
|
||||
printf("ps2_feeder: bridge @ 0x%lx, %d scene(s)\n", base, nsc);
|
||||
} else {
|
||||
printf("ps2_feeder: DRY-RUN (encode+validate only), %d scene(s)\n", nsc);
|
||||
}
|
||||
|
||||
int rc=0;
|
||||
for (int i=0;i<nsc;i++){ if (stream_scene(&br,&scenes[i])){ rc=1; break; } }
|
||||
|
||||
if (!dry){ munmap(map,0x1000); close(fd); }
|
||||
printf("ps2_feeder: %s\n", rc?"FAILED":"done");
|
||||
return rc;
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
// retroDE_ps2 — ps2_lpddr_probe_test (Ch352 board bring-up: isolate the HPS->LPDDR write/read path)
|
||||
//
|
||||
// Codex debug step 1: prove the write-probe + read-probe ALONE, with a tiny known pattern, BEFORE the texture
|
||||
// cache or renderer is involved. Writes N known words to a scratch LPDDR region, reads them back, and reports
|
||||
// per-word: did the auto-increment pointer (0x04C) advance? did wr_busy (0x054 bit1) assert? does the readback
|
||||
// match? This pinpoints whether "word 0 ok, rest 0" is a PACING race, an AUTO-INCREMENT bug, or a read-probe
|
||||
// latency issue — none of which the cache/render path can disambiguate.
|
||||
//
|
||||
// Build on the board: gcc -O2 -o ps2_lpddr_probe_test ps2_lpddr_probe_test.c
|
||||
// Run: sudo ./ps2_lpddr_probe_test [N=64] [--base 0x40000000] [--lpddr 0x100000]
|
||||
//
|
||||
// Pattern: word i = 0x11110000 + i (non-trivial, low bytes change per word so a stuck/striped path is obvious).
|
||||
// Scratch LPDDR base defaults to 0x100000 (1 MiB) — clear of the texture region at 0x200000.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#define OFF_LPDDR_STATUS 0x02C // R: [3] rd_pending
|
||||
#define OFF_LPDDR_RDADDR 0x03C // W: read byte addr + trigger ; R: result word
|
||||
#define OFF_LPDDR_WRADDR 0x04C // RW: write byte addr (auto-inc +4 per WRDATA write)
|
||||
#define OFF_LPDDR_WRDATA 0x050 // W: data word -> one 32-bit LPDDR write + addr+=4
|
||||
#define OFF_TEX_FILL_CTRL 0x054 // R: [0]fill_done [1]wr_busy
|
||||
#define OFF_LPDDR_WR_ERRS 0x06C // R: write-probe non-OKAY (BRESP) responses
|
||||
#define OFF_LPDDR_STATUS2 0x02C // R: [0]wr_idle [3]rd_pending [4]scan_valid [5]scan_err
|
||||
#define OFF_TEX_FILL_BEATS 0x058 // R: cache-fill beats (emif_clk READ liveness)
|
||||
#define OFF_TEX_FILL_BYTES 0x05C // R: cache-fill bytes
|
||||
#define OFF_TEX_RD_ERRS 0x068 // R: cache-fill non-OKAY read responses
|
||||
#define WR_BUSY_BIT 0x2
|
||||
#define WR_PENDING 0x4 // 0x054 bit2 — Ch352 STABLE write-done flag (poll this, not transient wr_busy)
|
||||
#define RD_PENDING 0x8
|
||||
#define FILL_DONE 0x1
|
||||
|
||||
typedef struct { volatile uint8_t *base; } br_t;
|
||||
static void w(br_t *b,int o,uint32_t v){ *(volatile uint32_t*)(b->base+o)=v; }
|
||||
static uint32_t r(br_t *b,int o){ return *(volatile uint32_t*)(b->base+o); }
|
||||
|
||||
int main(int argc,char**argv){
|
||||
unsigned long base=0x40000000UL, lpddr=0x00100000UL; int N=64;
|
||||
char *e=getenv("PS2_BRIDGE_BASE"); if(e) base=strtoul(e,NULL,0);
|
||||
for(int i=1;i<argc;i++){
|
||||
if(!strcmp(argv[i],"--base")&&i+1<argc) base=strtoul(argv[++i],NULL,0);
|
||||
else if(!strcmp(argv[i],"--lpddr")&&i+1<argc) lpddr=strtoul(argv[++i],NULL,0);
|
||||
else if(argv[i][0]!='-') N=atoi(argv[i]);
|
||||
}
|
||||
if(N<1||N>4096) N=64;
|
||||
printf("[probe] N=%d bridge=0x%lx lpddr-scratch=0x%lx pattern word[i]=0x11110000+i\n", N, base, lpddr);
|
||||
|
||||
int fd=open("/dev/mem",O_RDWR|O_SYNC);
|
||||
if(fd<0){ fprintf(stderr,"open /dev/mem (root?): %s\n",strerror(errno)); return 1; }
|
||||
void*map=mmap(NULL,0x1000,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(off_t)base);
|
||||
if(map==MAP_FAILED){ fprintf(stderr,"mmap: %s\n",strerror(errno)); return 1; }
|
||||
br_t b={(volatile uint8_t*)map};
|
||||
|
||||
// ---- WRITE: set WRADDR once, then stream; per word verify pointer advance + observe busy high->low ----
|
||||
uint32_t err0 = r(&b,OFF_LPDDR_WR_ERRS);
|
||||
w(&b,OFF_LPDDR_WRADDR,(uint32_t)lpddr);
|
||||
int ptr_bad=0, pending_stuck=0;
|
||||
for(int i=0;i<N;i++){
|
||||
uint32_t data=0x11110000u+(uint32_t)i;
|
||||
w(&b,OFF_LPDDR_WRDATA,data);
|
||||
// Wait for the STABLE write_pending flag (0x054 bit2) to clear = the write-probe completed THIS write.
|
||||
// Unlike the transient wr_busy level, this can't be missed by a slow Linux poll (Codex).
|
||||
int g=0; while((r(&b,OFF_TEX_FILL_CTRL)&WR_PENDING) && g<2000000) g++;
|
||||
if(g>=2000000) pending_stuck++;
|
||||
uint32_t ptr=r(&b,OFF_LPDDR_WRADDR), exp=(uint32_t)lpddr+(uint32_t)(i+1)*4;
|
||||
if(ptr!=exp){ if(ptr_bad<6) printf(" PTR after word %d: 0x%08x exp 0x%08x\n",i,ptr,exp); ptr_bad++; }
|
||||
}
|
||||
uint32_t err1=r(&b,OFF_LPDDR_WR_ERRS);
|
||||
printf("[probe] wrote %d words. ptr-mismatches=%d write_pending_stuck=%d wr_bresp_errs: %u->%u\n",
|
||||
N, ptr_bad, pending_stuck, err0, err1);
|
||||
|
||||
// ---- READBACK: per word set RDADDR, poll rd_pending clear, read result. Count rd_pending TIMEOUTS
|
||||
// explicitly (Codex): if rd_pending never clears, the value read below is STALE — not a real LPDDR read. ----
|
||||
int rd_bad=0, rd_timeouts=0;
|
||||
for(int i=0;i<N;i++){
|
||||
uint32_t addr=(uint32_t)lpddr+(uint32_t)i*4, exp=0x11110000u+(uint32_t)i;
|
||||
w(&b,OFF_LPDDR_RDADDR,addr);
|
||||
int g=0; while((r(&b,OFF_LPDDR_STATUS)&RD_PENDING)&&g<1000000) g++;
|
||||
if(g>=1000000) rd_timeouts++; // rd_pending stuck -> the read is stale
|
||||
uint32_t got=r(&b,OFF_LPDDR_RDADDR);
|
||||
if(got!=exp){ if(rd_bad<8) printf(" RD word %d @0x%08x: got 0x%08x exp 0x%08x\n",i,addr,got,exp); rd_bad++; }
|
||||
}
|
||||
printf("[probe] readback mismatches=%d/%d rd_pending_timeouts=%d%s\n", rd_bad, N, rd_timeouts,
|
||||
rd_timeouts? " (reads were STALE, not real LPDDR reads)":"");
|
||||
|
||||
// ---- EMIF READ-domain liveness: arm the texture-cache fill (issues emif_clk reads from LPDDR and
|
||||
// counts beats), independent of the write path. This disambiguates the failure:
|
||||
// beats advance -> emif READ domain is ALIVE => the bug is WRITE-specific (probe/pulse wiring).
|
||||
// beats stay 0 -> the whole emif_clk domain is DEAD (EMIF didn't calibrate / clock / reset),
|
||||
// which by itself explains busy-never-high AND the all-zero readback.
|
||||
// Snapshot BEFORE arming (Codex): the fill counters reset only after the new toggle reaches the EMIF
|
||||
// domain, so a stale fill_done=1 / beats from an earlier fill would be a false positive. Require, in order:
|
||||
// fill_done seen LOW after arming (the NEW fill is in flight), then fill_done HIGH (it completed), with
|
||||
// final beats=8192 / bytes=262144 / rd_errs=0.
|
||||
uint32_t st0 = r(&b,OFF_LPDDR_STATUS2);
|
||||
uint32_t fd0 = (r(&b,OFF_TEX_FILL_CTRL)&FILL_DONE)?1:0;
|
||||
uint32_t beats0 = r(&b,OFF_TEX_FILL_BEATS);
|
||||
w(&b,OFF_TEX_FILL_CTRL,FILL_DONE); // arm: toggle fill_start
|
||||
int saw_low=0; for(int g=0;g<8000000;g++){ if(!(r(&b,OFF_TEX_FILL_CTRL)&FILL_DONE)){saw_low=1;break;} }
|
||||
int fdone=0; for(int g=0;g<8000000;g++){ if( r(&b,OFF_TEX_FILL_CTRL)&FILL_DONE){fdone=1;break;} }
|
||||
uint32_t beats=r(&b,OFF_TEX_FILL_BEATS), bytes=r(&b,OFF_TEX_FILL_BYTES), rderr=r(&b,OFF_TEX_RD_ERRS);
|
||||
int fill_ok = saw_low && fdone && beats==8192 && bytes==262144 && rderr==0;
|
||||
printf("[probe] EMIF fill liveness: pre(status=0x%08x fill_done=%u beats=%u) -> armed: saw_done_low=%d final_done=%d\n",
|
||||
st0, fd0, beats0, saw_low, fdone);
|
||||
printf("[probe] final beats=%u (exp 8192) bytes=%u (exp 262144) rd_errs=%u (exp 0)\n", beats, bytes, rderr);
|
||||
if(fill_ok) printf("[probe] => emif READ domain ALIVE and well — failure is WRITE-specific.\n");
|
||||
else if(beats||fdone) printf("[probe] => emif read PARTIALLY working (counts off) — investigate fill path / cal margin.\n");
|
||||
else printf("[probe] => emif_clk domain appears DEAD (cal/clock/reset) — explains busy-never-high + zero readback.\n");
|
||||
|
||||
if(rd_bad==0 && ptr_bad==0) printf("[probe] write/read path clean.\n");
|
||||
else printf("[probe] FAIL — write/read path broken; see liveness line above for which domain.\n");
|
||||
munmap(map,0x1000); close(fd);
|
||||
return (rd_bad||ptr_bad)?1:0;
|
||||
}
|
||||
@@ -0,0 +1,147 @@
|
||||
// retroDE_ps2 — ps2_sh3_tex_upload (Ch352)
|
||||
//
|
||||
// Uploads the reconstructed 512x512 PSMT8 SH3 texture (65536 32-bit words) into FPGA-private LPDDR4B via the
|
||||
// PS2 HPS-bridge write-probe, verifies the readback, arms the texture-cache fill, and retriggers the feeder.
|
||||
// One command — mmap'd register pokes (NOT 65536 devmem process spawns). Same bridge protocol as ps2_feeder.c
|
||||
// and docs/hardware/ps2_lpddr_tex_test.sh (Ch322), just scaled to the full 256 KiB texture.
|
||||
//
|
||||
// Build on the board: gcc -O2 -o ps2_sh3_tex_upload ps2_sh3_tex_upload.c
|
||||
// Run (after fit+boot): sudo ./ps2_sh3_tex_upload sh3_real_tex_lpddr.mem
|
||||
// (copy sh3_real_tex_lpddr.mem from sim/data/top_psmct32_raster_demo/ to the board alongside the binary.)
|
||||
//
|
||||
// Sequence: (1) write WRADDR=0x200000, stream 65536 words to WRDATA; (2) read each back via the read-probe and
|
||||
// confirm sum32/xor32 match the file; (3) arm cache fill (0x054), poll fill_done, check beats/bytes/rd_errs;
|
||||
// (4) pulse the feeder retrigger (0x0E8) so the scene re-renders with the now-warm cache.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#define OFF_LPDDR_STATUS 0x02C // R: [3] rd_pending (read-probe in flight)
|
||||
#define OFF_LPDDR_RDADDR 0x03C // W: set read byte addr + trigger ; R: 32-bit word
|
||||
#define OFF_LPDDR_WRADDR 0x04C // W: set LPDDR byte addr (auto-increments +4 per WRDATA write)
|
||||
#define OFF_LPDDR_WRDATA 0x050 // W: data word -> single 32-bit LPDDR write + addr += 4
|
||||
#define OFF_TEX_FILL_CTRL 0x054 // W[0]: arm cache fill ; R: [0]fill_done [1]wr_busy
|
||||
#define OFF_LPDDR_WR_ERRS 0x06C // R: write-probe non-OKAY (BRESP) responses (expect 0)
|
||||
#define WR_PENDING_BIT 0x4 // 0x054 bit2 — Ch352 STABLE write-done flag (poll instead of transient wr_busy)
|
||||
#define RD_PENDING_BIT 0x8 // 0x02C bit3
|
||||
#define OFF_TEX_FILL_BEATS 0x058 // R: beats filled (expect TEX_BYTES/32 = 8192)
|
||||
#define OFF_TEX_FILL_BYTES 0x05C // R: bytes filled (expect 262144)
|
||||
#define OFF_TEX_RD_ERRS 0x068 // R: fill non-OKAY read responses (expect 0)
|
||||
#define OFF_TEX_FILL_CRC 0x070 // R: sum32 of EVERY word the cache wrote into tex_mem (must == file sum32)
|
||||
#define OFF_FEEDER_GO 0x0E8 // W[0]: trigger/retrigger the feeder
|
||||
|
||||
#define N_WORDS 65536 // 512*512 PSMT8 / 4
|
||||
#define TEX_BYTES 262144
|
||||
#define N_BEATS 8192 // TEX_BYTES / 32
|
||||
|
||||
typedef struct { volatile uint8_t *base; int dry; } bridge_t;
|
||||
static void wr32(bridge_t *b, int off, uint32_t v){ if(!b->dry) *(volatile uint32_t*)(b->base+off)=v; }
|
||||
static uint32_t rd32(bridge_t *b, int off){ return b->dry?0:*(volatile uint32_t*)(b->base+off); }
|
||||
|
||||
int main(int argc, char **argv){
|
||||
unsigned long base = 0x40000000UL; // PS2 HPS-bridge base (override --base or PS2_BRIDGE_BASE)
|
||||
unsigned long lpddr_base = 0x00200000; // EMIF byte base where the texture is staged (= TEX_LPDDR_BASE RTL)
|
||||
const char *texfile = "sh3_real_tex_lpddr.mem";
|
||||
int dry=0, do_fill=1, do_retrig=1;
|
||||
char *env = getenv("PS2_BRIDGE_BASE"); if (env) base = strtoul(env,NULL,0);
|
||||
for (int i=1;i<argc;i++){
|
||||
if (!strcmp(argv[i],"--base") && i+1<argc) base = strtoul(argv[++i],NULL,0);
|
||||
else if (!strcmp(argv[i],"--lpddr-base") && i+1<argc) lpddr_base = strtoul(argv[++i],NULL,0);
|
||||
else if (!strcmp(argv[i],"--dry-run")) dry=1;
|
||||
else if (!strcmp(argv[i],"--no-fill")) do_fill=0;
|
||||
else if (!strcmp(argv[i],"--no-retrigger")) do_retrig=0;
|
||||
else if (argv[i][0] != '-') texfile = argv[i];
|
||||
else { fprintf(stderr,"usage: %s [tex.mem] [--base 0x40000000] [--lpddr-base 0x200000] [--dry-run] [--no-fill] [--no-retrigger]\n", argv[0]); return 2; }
|
||||
}
|
||||
|
||||
// ---- load the texture hex (.mem: one 32-bit word per line) ----
|
||||
static uint32_t tex[N_WORDS];
|
||||
FILE *f = fopen(texfile,"r");
|
||||
if (!f){ fprintf(stderr,"error: cannot open '%s': %s\n", texfile, strerror(errno)); return 1; }
|
||||
int n=0; char line[64];
|
||||
while (n<N_WORDS && fgets(line,sizeof line,f)){
|
||||
char *s=line; while(*s==' '||*s=='\t') s++;
|
||||
if (*s=='/'||*s=='\n'||*s==0) continue; // skip blank / // banner lines
|
||||
tex[n++] = (uint32_t)strtoul(s,NULL,16);
|
||||
}
|
||||
fclose(f);
|
||||
if (n != N_WORDS){ fprintf(stderr,"error: %s has %d words, expected %d\n", texfile, n, N_WORDS); return 1; }
|
||||
|
||||
// expected checksums (source of truth = the file)
|
||||
uint32_t sum=0, xr=0; for (int i=0;i<N_WORDS;i++){ sum+=tex[i]; xr^=tex[i]; }
|
||||
printf("[ps2_sh3_tex_upload] %d words from %s sum32=0x%08x xor32=0x%08x -> LPDDR 0x%lx (bridge base 0x%lx%s)\n",
|
||||
n, texfile, sum, xr, lpddr_base, base, dry?", DRY-RUN":"");
|
||||
|
||||
// ---- open the bridge ----
|
||||
bridge_t br = {0,dry}; int fd=-1; void *map=NULL;
|
||||
if (!dry){
|
||||
fd=open("/dev/mem", O_RDWR|O_SYNC);
|
||||
if (fd<0){ fprintf(stderr,"error: open /dev/mem (run as root?): %s\n", strerror(errno)); return 1; }
|
||||
map=mmap(NULL,0x1000,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(off_t)base);
|
||||
if (map==MAP_FAILED){ fprintf(stderr,"error: mmap 0x%lx: %s\n", base, strerror(errno)); close(fd); return 1; }
|
||||
br.base=(volatile uint8_t*)map;
|
||||
}
|
||||
|
||||
// ---- (1) upload: set WRADDR then stream WRDATA. CRITICAL: poll wr_busy (0x054 bit1) clear after each word
|
||||
// so the write-probe actually COMMITS before the next write — otherwise the fast mmap writes outrun the
|
||||
// CDC/AXI commit and get DROPPED (the bug: most words read back as 0). The Ch322 devmem script got away with
|
||||
// no poll only because devmem process-spawns are slow. ----
|
||||
wr32(&br, OFF_LPDDR_WRADDR, (uint32_t)lpddr_base);
|
||||
for (int i=0;i<N_WORDS;i++){
|
||||
wr32(&br, OFF_LPDDR_WRDATA, tex[i]);
|
||||
// wait for the STABLE write_pending (0x054 bit2) to clear — the probe committed this word
|
||||
if (!dry){ int g=0; while ((rd32(&br, OFF_TEX_FILL_CTRL) & WR_PENDING_BIT) && g<2000000) g++; }
|
||||
}
|
||||
if (!dry){
|
||||
uint32_t werr = rd32(&br, OFF_LPDDR_WR_ERRS);
|
||||
printf("[ps2_sh3_tex_upload] uploaded %d words (%d KiB). wr_bresp_errs=%u (exp 0)\n", N_WORDS, TEX_BYTES/1024, werr);
|
||||
if (werr) fprintf(stderr,"WARN: %u write BRESP errors.\n", werr);
|
||||
} else printf("[ps2_sh3_tex_upload] uploaded %d words (%d KiB).\n", N_WORDS, TEX_BYTES/1024);
|
||||
|
||||
// ---- (2) readback verify via the read-probe (guardrail #2). Poll rd_pending (0x02C bit3) clear before
|
||||
// reading the latched word — the LPDDR read has latency; reading immediately returns 0/stale. ----
|
||||
int mism=0; uint32_t rsum=0, rxr=0;
|
||||
if (!dry){
|
||||
for (int i=0;i<N_WORDS;i++){
|
||||
wr32(&br, OFF_LPDDR_RDADDR, (uint32_t)(lpddr_base + (unsigned)i*4)); // set addr + trigger read
|
||||
{ int g=0; while ((rd32(&br, OFF_LPDDR_STATUS) & RD_PENDING_BIT) && g<1000000) g++; }
|
||||
uint32_t v = rd32(&br, OFF_LPDDR_RDADDR); // latched word
|
||||
rsum+=v; rxr^=v;
|
||||
if (v != tex[i] && mism<8) fprintf(stderr," readback mismatch @word %d: got 0x%08x exp 0x%08x\n", i, v, tex[i]);
|
||||
if (v != tex[i]) mism++;
|
||||
}
|
||||
printf("[ps2_sh3_tex_upload] readback sum32=0x%08x xor32=0x%08x mismatches=%d\n", rsum, rxr, mism);
|
||||
if (mism){ fprintf(stderr,"FAIL: %d readback mismatches — bad upload, NOT filling cache.\n", mism); munmap(map,0x1000); close(fd); return 1; }
|
||||
}
|
||||
|
||||
// ---- (3) arm the cache fill + poll fill_done; check beats/bytes/rd_errs ----
|
||||
if (do_fill && !dry){
|
||||
wr32(&br, OFF_TEX_FILL_CTRL, 0x1);
|
||||
int done=0; for (int i=0;i<200000;i++){ if (rd32(&br,OFF_TEX_FILL_CTRL)&0x1){ done=1; break; } }
|
||||
uint32_t beats=rd32(&br,OFF_TEX_FILL_BEATS), bytes=rd32(&br,OFF_TEX_FILL_BYTES), errs=rd32(&br,OFF_TEX_RD_ERRS);
|
||||
uint32_t fcrc=rd32(&br,OFF_TEX_FILL_CRC);
|
||||
printf("[ps2_sh3_tex_upload] cache fill: done=%d beats=%u (exp %d) bytes=%u (exp %d) rd_errs=%u (exp 0)\n",
|
||||
done, beats, N_BEATS, bytes, TEX_BYTES, errs);
|
||||
// The cache's sum32 over the words it wrote into tex_mem must equal the file's sum32. If it matches,
|
||||
// tex_mem is byte-correct on silicon — so any residual texture corruption is NOT the cache contents.
|
||||
printf("[ps2_sh3_tex_upload] cache fill_crc=0x%08x (exp file sum32=0x%08x) -> tex_mem %s\n",
|
||||
fcrc, sum, (fcrc==sum) ? "INTEGRITY OK" : "CORRUPT");
|
||||
if (!done || beats!=N_BEATS || bytes!=TEX_BYTES || errs!=0)
|
||||
fprintf(stderr,"WARN: cache fill stats off — texels may be wrong; check EMIF cal + LPDDR base.\n");
|
||||
if (fcrc!=sum)
|
||||
fprintf(stderr,"WARN: cache fill_crc mismatch — tex_mem corrupt on board (NOT a divider/sampler issue).\n");
|
||||
}
|
||||
|
||||
// ---- (4) retrigger the feeder so the scene re-renders with the warm cache ----
|
||||
if (do_retrig && !dry){ wr32(&br, OFF_FEEDER_GO, 0x1); printf("[ps2_sh3_tex_upload] feeder retriggered.\n"); }
|
||||
|
||||
if (!dry){ munmap(map,0x1000); close(fd); }
|
||||
printf("[ps2_sh3_tex_upload] DONE — check HDMI vs the crop reference (recon/sh3_real_ref.png).\n");
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
#!/bin/sh
|
||||
# retroDE_ps2 — Ch340 host gate. No board, no real dump needed (uses content-clean synthetic
|
||||
# fixtures). Proves: (1) byte-exact container/GIF parse, (2) census classifies supported vs
|
||||
# unsupported, (3) the translator emits a ps2_feeder scene for supported input that the Ch339
|
||||
# encoder accepts, and (4) fail-closed on unsupported input.
|
||||
set -u
|
||||
HERE="$(cd "$(dirname "$0")" && pwd)"
|
||||
fail=0
|
||||
|
||||
echo "== 1. byte-exact parser test =="
|
||||
python3 "$HERE/test_gs_parse.py" >/tmp/_gp.txt 2>&1
|
||||
grep -q "RESULT: PASS" /tmp/_gp.txt && echo " parser byte-exact PASS" || { echo " parser FAIL"; tail -5 /tmp/_gp.txt; fail=1; }
|
||||
|
||||
echo "== 2. translator chain on the SUPPORTED fixture =="
|
||||
python3 "$HERE/gs_make_synthetic.py" >/dev/null 2>&1
|
||||
python3 "$HERE/gs_translate.py" "$HERE/../captures/gs/synthetic/mini_supported.gs" --scene /tmp/_sup.scene > /tmp/_sup.txt 2>&1
|
||||
grep -q "census classes : translated=2" /tmp/_sup.txt && echo " census: translated=2 OK" || { echo " census wrong"; grep -a "census classes" /tmp/_sup.txt; fail=1; }
|
||||
grep -q "EARLIEST SUPPORTED SEGMENT: 2 triangles" /tmp/_sup.txt && echo " earliest segment: 2 tris OK" || { echo " segment wrong"; fail=1; }
|
||||
gcc -O2 -o /tmp/_psf "$HERE/ps2_feeder.c" 2>/dev/null
|
||||
if /tmp/_psf --dry-run -f /tmp/_sup.scene >/tmp/_psf.txt 2>&1 && grep -q "tris=2 rects=0" /tmp/_psf.txt; then
|
||||
echo " ps2_feeder accepts the emitted scene (2 tris) OK"
|
||||
else echo " ps2_feeder rejected the scene"; tail -3 /tmp/_psf.txt; fail=1; fi
|
||||
|
||||
echo "== 3. fail-closed on UNSUPPORTED fixture (mini.gs: textured triangle) =="
|
||||
python3 "$HERE/gs_translate.py" "$HERE/../captures/gs/synthetic/mini.gs" --scene /tmp/_uns.scene > /tmp/_uns.txt 2>&1
|
||||
grep -q "NO SUPPORTED SEGMENT" /tmp/_uns.txt && echo " no segment + fail-closed OK" || { echo " should have been unsupported"; grep -a "SEGMENT" /tmp/_uns.txt; fail=1; }
|
||||
[ ! -f /tmp/_uns.scene ] && echo " no scene file written (fail-closed) OK" || { echo " scene wrongly emitted"; fail=1; }
|
||||
|
||||
echo "RESULT: $([ $fail -eq 0 ] && echo PASS || echo FAIL)"
|
||||
exit $fail
|
||||
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Ch340 byte-exact parser gate. Regenerates the synthetic fixture and asserts gs_parse decodes it
|
||||
into the EXACT expected normalized event stream (container header + every GIF mode). No board, no
|
||||
real dump needed. Exit non-zero on any mismatch."""
|
||||
import os, sys, subprocess
|
||||
HERE = os.path.dirname(__file__)
|
||||
sys.path.insert(0, HERE)
|
||||
import gs_parse
|
||||
|
||||
def main():
|
||||
subprocess.run([sys.executable, os.path.join(HERE, "gs_make_synthetic.py")], check=True)
|
||||
fix = os.path.join(HERE, "..", "captures", "gs", "synthetic", "mini.gs")
|
||||
h, ev = gs_parse.parse_dump(fix)
|
||||
fail = 0
|
||||
def check(cond, msg):
|
||||
nonlocal fail
|
||||
print((" ok " if cond else " FAIL ") + msg)
|
||||
if not cond: fail = 1
|
||||
|
||||
check(h.serial == "SYNTH001", f"serial == SYNTH001 (got {h.serial!r})")
|
||||
check(h.crc == 0x12345678, f"crc == 0x12345678 (got 0x{h.crc:08x})")
|
||||
check(h.state_size == 16, f"state_size == 16 (got {h.state_size})")
|
||||
|
||||
# The expected ordered (kind, reg, value) skeleton of the event stream.
|
||||
got = [(e.kind, e.reg, e.value) for e in ev]
|
||||
exp = [
|
||||
("TRANSFER","",0),
|
||||
("GIFTAG","",0),
|
||||
("GSREG","FRAME_1",0x0000_0000_0C00_1807),
|
||||
("TRANSFER","",0),
|
||||
("GIFTAG","",0),
|
||||
("GSREG","PRIM", 3 | (1<<4) | (1<<6)), # via PRE
|
||||
("GSREG","RGBAQ", 0xFF | (0<<8) | (0<<16) | (0x80<<24)), # vtx0 red
|
||||
("GSREG","XYZ2", (100<<4) | ((50<<4)<<16) | (0x5000<<32)),
|
||||
("GSREG","RGBAQ", 0x00 | (0xFF<<8) | (0<<16) | (0x80<<24)),# vtx1 green
|
||||
("GSREG","XYZ2", (200<<4) | ((50<<4)<<16) | (0x5000<<32)),
|
||||
("GSREG","RGBAQ", 0x00 | (0<<8) | (0xFF<<16) | (0x80<<24)),# vtx2 blue
|
||||
("GSREG","XYZ2", (100<<4) | ((150<<4)<<16) | (0x5000<<32)),
|
||||
("TRANSFER","",0),
|
||||
("GIFTAG","",0),
|
||||
("IMAGE","",0),
|
||||
("FRAME_BOUNDARY","",0),
|
||||
]
|
||||
check(len(got)==len(exp), f"event count == {len(exp)} (got {len(got)})")
|
||||
for i,(g,e) in enumerate(zip(got,exp)):
|
||||
# compare kind always; reg+value only where the expected has them
|
||||
if e[0] in ("GSREG",):
|
||||
check(g==e, f"event[{i}] {e} (got {g})")
|
||||
else:
|
||||
check(g[0]==e[0], f"event[{i}] kind {e[0]} (got {g[0]})")
|
||||
# IMAGE qwc + no malformed
|
||||
img=[e for e in ev if e.kind=="IMAGE"]
|
||||
check(len(img)==1 and img[0].info.get("qwc")==2, f"one IMAGE qwc==2 (got {[e.info for e in img]})")
|
||||
check(all(e.kind!="MALFORMED" for e in ev), "no MALFORMED events")
|
||||
|
||||
# Ch342 — PACKED ST must expose the STQ Q lane as info["q_stq"] (don't drop RGBAQ.Q).
|
||||
h2, ev2 = gs_parse.parse_dump(os.path.join(HERE, "..", "captures", "gs", "synthetic", "mini_st.gs"))
|
||||
sts = [e for e in ev2 if e.kind=="GSREG" and e.reg=="ST"]
|
||||
check(len(sts)==1, f"mini_st: one ST event (got {len(sts)})")
|
||||
if sts:
|
||||
e=sts[0]
|
||||
check((e.value & 0xFFFFFFFF)==0x11111111, "mini_st: ST.S == 0x11111111")
|
||||
check(((e.value>>32)&0xFFFFFFFF)==0x22222222, "mini_st: ST.T == 0x22222222")
|
||||
check(e.info.get("q_stq")==0x33333333, f"mini_st: q_stq == 0x33333333 (got {e.info.get('q_stq')})")
|
||||
|
||||
print("RESULT:", "PASS" if not fail else "FAIL")
|
||||
return fail
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,50 @@
|
||||
#!/bin/sh
|
||||
# retroDE_ps2 — Ch339 host-encoder gate. Compiles ps2_feeder and proves its staging output is
|
||||
# BYTE-EQUIVALENT to the golden bake.py fixtures for every representative Ch333-338 scene, then
|
||||
# checks that malformed / oversized / out-of-range commands are rejected cleanly. No board needed.
|
||||
set -u
|
||||
HERE="$(cd "$(dirname "$0")" && pwd)"
|
||||
REPO="$(cd "$HERE/.." && pwd)"
|
||||
MEM="$REPO/sim/data/top_psmct32_raster_demo"
|
||||
BIN="$(mktemp -d)/ps2_feeder"
|
||||
fail=0
|
||||
|
||||
echo "== compile =="
|
||||
gcc -O2 -Wall -Werror -o "$BIN" "$HERE/ps2_feeder.c" || { echo "COMPILE FAILED"; exit 1; }
|
||||
|
||||
echo "== regenerate goldens =="
|
||||
python3 "$MEM/bake.py" >/dev/null 2>&1 || { echo "bake.py failed"; exit 1; }
|
||||
|
||||
echo "== byte-equivalence vs golden .mem =="
|
||||
check() { # $1 scene $2 golden.mem
|
||||
"$BIN" --dump "$1" > /tmp/_dump.txt 2>/tmp/_err.txt
|
||||
tail -n +3 "$MEM/$2" > /tmp/_gold.txt
|
||||
if diff /tmp/_dump.txt /tmp/_gold.txt >/dev/null 2>&1; then
|
||||
echo " MATCH $1 == $2"
|
||||
else
|
||||
echo " MISMATCH $1 != $2"; fail=1
|
||||
fi
|
||||
}
|
||||
check color-tri feeder_color_tri.mem
|
||||
check native-rect feeder_native_rect.mem
|
||||
check gouraud-tri feeder_gouraud_tri.mem
|
||||
check accum feeder_accum.mem
|
||||
check retrigger-a feeder_scene_a.mem
|
||||
check retrigger-b feeder_scene_b.mem
|
||||
check zpersist-near feeder_zpersist_near_first.mem
|
||||
check zpersist-far feeder_zpersist_far_first.mem
|
||||
check zpersist-grad feeder_zpersist_grad.mem
|
||||
check sprite feeder_sprite.mem # Ch345a — runtime textured-alpha SPRITE staging
|
||||
|
||||
echo "== rejection of bad input (each must exit non-zero) =="
|
||||
reject() { # $1 label ; rest = file contents on stdin
|
||||
cat > /tmp/_sc.txt
|
||||
if "$BIN" --dry-run -f /tmp/_sc.txt >/dev/null 2>&1; then echo " NOT-REJECTED $1"; fail=1; else echo " rejected $1"; fi
|
||||
}
|
||||
yes 'tritile 0 0x5000 255 0 0' | head -28 | reject "oversized (28 tris)"
|
||||
printf 'tri 1 2 3\n' | reject "malformed tri"
|
||||
printf 'tritile 99 0x5000 255 0 0\n' | reject "tile out of range"
|
||||
printf 'bogus 1 2 3\n' | reject "unknown op"
|
||||
|
||||
echo "RESULT: $([ $fail -eq 0 ] && echo PASS || echo FAIL)"
|
||||
exit $fail
|
||||
Reference in New Issue
Block a user