#!/usr/bin/env python3 """retroDE_ps2 — Ch350 fixtures: the EXACT Ch349 SH3 draw for the full-res LPDDR integration TB. Codex scope (no crop/downscale/repack): the actual draw geometry (f1 idx89761 TRI_STRIP -> triangle LIST), the reconstructed 512x512 PSMT8 texture (in LPDDR), the real CSM1 CLUT (grid/CT32 order in BRAM), perspective ST/Q, DECAL. Pixel-diff vs the Ch349 host reference with the Ch348 bounded <=1-texel acceptance. Emits (LOCAL/gitignored -> sim/data/top_psmct32_raster_demo/): sh3_real_tex_lpddr.mem 65536 LE words = the PSMT8-SWIZZLED texture bytes mem[TBP*256 : +262144] (loaded into the behavioral LPDDR model; the GS texture-unit re-swizzles on read). sh3_real_idx.mem 65536 words = the DE-SWIZZLED 512x512 indices packed 4/word (TB reference search). sh3_real_clut.mem 256 words = the CT32-grid CLUT bytes mem[CBP*256 : +1024] (backdoored into BRAM; the Ch350 CSM1 clut_loader reads them in grid order). feeder_sh3_real.mem feeder staging: 68 tris (translated to FB origin) + TEX0(PSMT8,CSM1,CLD=1,DECAL). sh3_real_refmap.mem per FB pixel (FBW*64 x FBH): covered|interior|tu|tv for the bounded TB check. sh3_real_ref.png eyeball reference (host DECAL render at FB scale). Geometry/addressing constants are printed + emitted as `sh3_real_params.vh` for the TB to include. Usage: gs_make_sh3_real_draw_fixture.py [dump.gs.zst] [--draw-idx 89761] """ import sys, os, glob HERE=os.path.dirname(os.path.abspath(__file__)); ROOT=os.path.normpath(os.path.join(HERE,"..")) DATA=os.path.join(ROOT,"sim","data","top_psmct32_raster_demo") sys.path.insert(0, HERE) import gs_sh3_draw_census as C import gs_sh3_recon as RC import gs_localmem as LM sys.path.insert(0, DATA); import bake # ---- address map (Ch350 BRAM-CROP diagnostic: read2 needs VRAM < 256 KiB; FB cropped, texture/CLUT/geometry # FULL-RES). VRAM_BYTES = 0x20000 (128 KiB, 2^15 words) so the read2 tripwire (>=256 KiB) is NOT tripped. # Only the FRAMEBUFFER/VIEWPORT is cropped to a deterministic CH-tall band; the texture stays full 512x512 # (in LPDDR, cache-intercepted base 0x40000), the CSM1 CLUT is full, the geometry/ST/Q are unchanged. ---- FBW = 4 # 256 px wide FB (64-px units) — full draw width (247) fits FBPXW = FBW*64 # 256 CH = 120 # FB rows: 256*120*4 = 122880 B; + CLUT(1 KiB) < 128 KiB VRAM_BYTES = 0x20000 # 128 KiB BRAM VRAM (2^15 words) — under the 256 KiB read2 tripwire CBP = 0x1E000//256 # 480 (CLUT right after the 256x120 FB = 0x1E000) NEW_TBP = 0x40000//256 # 1024 (texture VRAM base; cache-intercepted, NOT in the 128 KiB BRAM) TEX_VRAM_BASE = NEW_TBP*256 # 0x40000 TEX_BYTES = 512*512 # 262144 (PSMT8) — FULL texture, no downscale LPDDR_TEX_BASE = 0x00200000 # texture byte base in LPDDR4B TW_LOG, TH_LOG = 9, 9 # 512x512 TBW_TEX = 8 # texture TBW (64-px units) — MUST match the original draw's swizzle STG_WORDS = 768 def recip8(q, IDX_BITS=8, SCALE=24, Q_W=24): """Exact Python replica of gs_reciprocal_stub: recip = (floor(2^(SCALE+IDX_BITS-1)/M) >> e), M = q normalized to an IDX_BITS mantissa (MSB at TOP_BIT), e = msb index. Used to build the RTL-FAITHFUL reference that isolates reciprocal quantization from S1 attribute under-interpolation.""" OUT_MAX = (1 << (SCALE+1)) - 1 if q <= 0: return OUT_MAX TOP_BIT = IDX_BITS - 1 e = q.bit_length() - 1 norm = (q >> (e - TOP_BIT)) if e >= TOP_BIT else (q << (TOP_BIT - e)) M = norm & ((1 << IDX_BITS) - 1) if M == 0: return OUT_MAX r = ((1 << (SCALE + TOP_BIT)) // M) >> e return min(r, OUT_MAX) def persp_texel_recip(uq, vq, q, tw, th, idx_bits=8, SCALE=24): """gs_persp_uv with the idx_bits reciprocal LUT: u=(uq*recip)>>SCALE clamped to 2047, then REPEAT-wrap.""" recip = recip8(int(round(q)), IDX_BITS=idx_bits, SCALE=SCALE) u = (int(round(uq)) * recip) >> SCALE v = (int(round(vq)) * recip) >> SCALE if u > 2047: u = 2047 if v > 2047: v = 2047 return (u % tw), (v % th) def f32_to(v): return v # placeholder def tex0_real(tbp, cbp): # PSMT8 (psm=0x13) 512x512, TFX=DECAL(1); CLUT: CBP, CPSM=PSMCT32(0), CSM=0 (CSM1 grid!), CSA=0, CLD=1. v = bake.tex0_pack(tbp, TBW_TEX, psm=0x13, tw=TW_LOG, th=TH_LOG, tfx=1) v |= (cbp & 0x3FFF) << 37 v |= (0 & 0xF) << 51 # CPSM = PSMCT32 v |= (0 & 0x1) << 55 # CSM = 0 -> CSM1 (16x16 CT32 grid) — the Ch350 path v |= (0 & 0x1F) << 56 # CSA = 0 v |= (1 & 0x7) << 61 # CLD = 1 -> always load return v def main(argv): dump = None; draw_idx = 89761 a = argv[1:] if a and not a[0].startswith("--"): dump = a[0] if "--draw-idx" in a: draw_idx = int(a[a.index("--draw-idx")+1]) if dump is None: c = glob.glob(os.path.join(ROOT,"captures","gs","silenthill3","*224139*.gs.zst")) if not c: sys.exit("no SH3 dump found; pass the .gs.zst path") dump = c[0] PERSP_FRAC = bake.PERSP_FRAC dr = C.get_draw(dump, draw_idx) if dr is None: sys.exit(f"draw idx {draw_idx} not found") t0 = dr["tex0"]; ORIG_TBP = t0["tbp"]; ORIG_CBP = t0["cbp"] TW, TH = t0["tw"], t0["th"] assert TW==512 and TH==512 and t0["psm"]==0x13, f"unexpected TEX0 {t0}" # --- reconstruct GS local memory at draw time (Ch349) --- mem, replayed, uploads, events, vram = RC.build_localmem_to(dump, draw_idx) if mem is None: sys.exit("VRAM snapshot absent") # de-swizzled 512x512 index image (for TB reference) + swizzled bytes (for LPDDR) idx = mem.read_psmt8(ORIG_TBP, t0["tbw"], TW, TH) # de-swizzled indices tex_swz = bytes(mem.m[ORIG_TBP*256 : ORIG_TBP*256 + TEX_BYTES]) # swizzled bytes -> LPDDR clut_bytes = bytes(mem.m[ORIG_CBP*256 : ORIG_CBP*256 + 1024]) # CT32-grid CLUT bytes -> BRAM pal = RC.read_clut32(mem, ORIG_CBP, order="grid") # for the reference PNG # --- geometry: translate to draw origin (full frame), then choose a deterministic CH-tall VIEWPORT crop --- xmin = min(v["x"] for v in dr["verts"]); ymin = min(v["y"] for v in dr["verts"]) OX, OY = int(xmin), int(ymin) fverts = [dict(x=v["x"]-OX, y=v["y"]-OY, s=v["s"], t=v["t"], q=v["q"]) for v in dr["verts"]] ftris = [(i-2,i-1,i) for i in range(2,len(fverts))] full_h = int(max(v["y"] for v in fverts)) + 1 def edge(ax,ay,bx,by,px,py): return (px-ax)*(by-ay)-(py-ay)*(bx-ax) # per-row coverage histogram over the FULL frame -> pick CY0 = argmax covered pixels in a CH-tall band. row_cov = [0]*(full_h+CH+2) for (a0,b0,c0) in ftris: v0,v1,v2=fverts[a0],fverts[b0],fverts[c0] x0,y0=v0["x"],v0["y"]; x1,y1=v1["x"],v1["y"]; x2,y2=v2["x"],v2["y"] ar=edge(x0,y0,x1,y1,x2,y2) if abs(ar)<1e-9: continue inv=1.0/ar for py in range(max(0,int(min(y0,y1,y2))), min(full_h-1,int(max(y0,y1,y2))+1)+1): for px in range(max(0,int(min(x0,x1,x2))), min(FBPXW-1,int(max(x0,x1,x2))+1)+1): cx,cy=px+0.5,py+0.5 w0=edge(x1,y1,x2,y2,cx,cy)*inv; w1=edge(x2,y2,x0,y0,cx,cy)*inv; w2=1.0-w0-w1 if w0>=-0.001 and w1>=-0.001 and w2>=-0.001: row_cov[py]+=1 best_cy0, best_sum = 0, -1 for cy0 in range(0, max(1, full_h-CH+1)): s = sum(row_cov[cy0:cy0+CH]) if s > best_sum: best_sum, best_cy0 = s, cy0 CY0 = best_cy0; CX0 = 0 # apply the viewport crop: shift Y by -CY0 (ST/Q UNCHANGED — only the framebuffer window moves), then # CLIP each triangle to the crop rect [0,FBPXW]x[0,CH] (Sutherland-Hodgman, interpolating S/T/Q linearly in # screen space — correct since S,T,Q are already premultiplied by 1/w). This is the VIEWPORT scissor done at # the host: every emitted vertex lands inside the FB (no out-of-bounds writes), geometry SHAPE + per-vertex # ST/Q are preserved exactly; only the framebuffer window is cropped. Codex's "cropped or scissored" rule. def lerp(p1, p2, a): return dict(x=p1["x"]+a*(p2["x"]-p1["x"]), y=p1["y"]+a*(p2["y"]-p1["y"]), s=p1["s"]+a*(p2["s"]-p1["s"]), t=p1["t"]+a*(p2["t"]-p1["t"]), q=p1["q"]+a*(p2["q"]-p1["q"])) def clip_edge(poly, inside, isect): out=[] for i in range(len(poly)): cur=poly[i]; prv=poly[i-1] ci=inside(cur); pi=inside(prv) if ci: if not pi: out.append(isect(prv,cur)) out.append(cur) elif pi: out.append(isect(prv,cur)) return out def clip_rect(poly): # left x>=0, right x<=FBPXW, top y>=0, bottom y<=CH poly=clip_edge(poly, lambda p:p["x"]>=0.0, lambda a,b:lerp(a,b,(0.0-a["x"])/(b["x"]-a["x"]))) if not poly: return poly poly=clip_edge(poly, lambda p:p["x"]<=FBPXW, lambda a,b:lerp(a,b,(FBPXW-a["x"])/(b["x"]-a["x"]))) if not poly: return poly poly=clip_edge(poly, lambda p:p["y"]>=0.0, lambda a,b:lerp(a,b,(0.0-a["y"])/(b["y"]-a["y"]))) if not poly: return poly poly=clip_edge(poly, lambda p:p["y"]<=CH, lambda a,b:lerp(a,b,(CH-a["y"])/(b["y"]-a["y"]))) return poly sverts = [dict(x=v["x"]-CX0, y=v["y"]-CY0, s=v["s"], t=v["t"], q=v["q"]) for v in fverts] def rnd(v): # round XY to integer screen coords (the feeder gets ints) — host ref MUST use the SAME ints, return dict(x=float(int(round(v["x"]))), y=float(int(round(v["y"]))), s=v["s"], t=v["t"], q=v["q"]) tris = [] # list of (v0,v1,v2) explicit clipped vertex dicts with INTEGER screen XY for (a0,b0,c0) in ((i-2,i-1,i) for i in range(2,len(sverts))): poly = clip_rect([sverts[a0], sverts[b0], sverts[c0]]) poly = [rnd(p) for p in poly] for k in range(1, len(poly)-1): # fan the clipped polygon into triangles tris.append((poly[0], poly[k], poly[k+1])) print(f"[Ch350] draw idx{draw_idx}: {len(sverts)} verts; full frame {FBPXW}x{full_h}; DETERMINISTIC crop " f"CX0={CX0} CY0={CY0} -> FB {FBPXW}x{CH} ({best_sum} covered px in band); clipped to {len(tris)} tris") # --- feeder staging --- # NOTE: gs_persp_uv contract is uq=(u/w)*2^FRAC, q=(1/w)*2^FRAC, u=(uq*floor(2^SCALE/q))>>SCALE. Scaling # S/T/Q by K is INVARIANT (cancels) — confirmed it doesn't move the residual. The texel-accuracy limit is # the gs_reciprocal_stub 8-bit (256-entry) LUT: ~0.4% relative -> <=1 texel for Ch348's TW=64 but ~2+ texels # for this TW=512 texture (plus the S1-path under-interpolation banding). A perspective-precision limit. # Ch351 — EFFECTIVE PERSP_FRAC. The hardware u=s/q divide cancels the frac scale, so "widening PERSP_FRAC" # is a HOST PACKING choice: pack S/T/Q with more frac bits so the far-surface denominator q=(1/w)*2^FRACeff # doesn't round to 1-2 (FRAC=12 collapses for w~2048). PSCALE=2^k gives FRACeff = PERSP_FRAC + k. PSCALE=256 # -> FRACeff=20, which took the SH3 crop 20%->80% (Codex's "Q×256 ≈ +8 frac bits"). Default PSCALE=1 keeps # Ch342/348 at FRAC=12 (canaries). The 24-bit signed S/T field bounds FRACeff for a given S/T range — checked. PSCALE = 4096 S24_MAX = (1<<23) - 1 max_sfp = [0] def vert_words(v): s_fp = round(v["s"] * TW * (1< S24_MAX or abs(t_fp) > S24_MAX: # 24-bit signed ST field overflow guard (Codex #3) sys.exit(f"[Ch351] OVERFLOW: |s_fp|={abs(s_fp)} or |t_fp|={abs(t_fp)} > 2^23-1 at PSCALE={PSCALE} " f"(FRACeff={PERSP_FRAC}+{PSCALE.bit_length()-1}). Lower PSCALE for this S/T range.") if abs(q_fp) > 0x7FFFFFFF: sys.exit(f"[Ch351] OVERFLOW: |q_fp|={abs(q_fp)} > 2^31-1 (Q field). Lower PSCALE.") sx = max(0, min(FBPXW-1, int(round(v["x"])))) sy = max(0, min(CH-1, int(round(v["y"])))) return [bake.rgbaq_with_q(0,0,0, q_fp & 0xFFFFFFFF), bake.st_data(s_fp & 0xFFFFFF, t_fp & 0xFFFFFF), bake.xyz2_dataz(sx, sy, 0x0000_5000)] stg = [] stg.append(len(tris) | (1<<32)) # word0: ntris | perspective-format flag stg.append(bake.frame_1_psmct32(FBW)) stg.append(bake.alpha_pack(0,1,0,1)) stg.append(0) # TEST_1 = 0 (ZTE=0, ATE=0): no depth/alpha test stg.append(bake.zbuf1_pack(0, zmsk=1)) # ZMSK=1: no Z writes -> no Z buffer needed stg.append(tex0_real(NEW_TBP, CBP)) # PSMT8 + CSM1 CLUT (CLD=1) -> feeder commit fires the load stg.append(3 | (1<<4)) # TRI + TME, ABE=0 (S1 perspective path) for (v0,v1,v2) in tris: for v in (v0,v1,v2): stg += vert_words(v) if len(stg) > STG_WORDS: sys.exit(f"staging {len(stg)} > {STG_WORDS} (raise STG_WORDS)") print(f"[Ch350] feeder staging: {len(stg)} words (<= {STG_WORDS})") print(f"[Ch351] effective PERSP_FRAC = {PERSP_FRAC}+{PSCALE.bit_length()-1} = {PERSP_FRAC+PSCALE.bit_length()-1} " f"(PSCALE={PSCALE}); max |s_fp/t_fp|={max_sfp[0]} of 2^23-1 ({100.0*max_sfp[0]/((1<<23)-1):.1f}% of the 24-bit ST field)") # --- host reference + per-pixel texel map. TWO references over the SAME clipped geometry: # refmap = FLOAT perspective (ideal) — the Codex pixel-diff oracle. # refmap_rec = RTL-FAITHFUL: fixed-point vertex attrs (uq=s*TW*2^FRAC, q=Q*2^FRAC), float interp, then the # 8-bit gs_reciprocal_stub. Comparing the RTL FB vs BOTH isolates reciprocal quantization # (RTL≈refmap_rec, refmap_rec≠refmap) from S1 under-interpolation banding (RTL≠refmap_rec). refmap = [0]*(FBPXW*CH); refpix = [(0,0,0)]*(FBPXW*CH) refmap_rec = [0]*(FBPXW*CH); refpix_rec = [(0,0,0)]*(FBPXW*CH) refmap_aff = [0]*(FBPXW*CH) # AFFINE: per-vertex texel, linear u,v interp (NOT perspective-correct) F = 1<1e-12 else 0.0) for v in (v0,v1,v2)] avv=[((v["t"]/v["q"])*TH if abs(v["q"])>1e-12 else 0.0) for v in (v0,v1,v2)] minx=max(0,int(min(x0,x1,x2))); maxx=min(FBPXW-1,int(max(x0,x1,x2))+1) miny=max(0,int(min(y0,y1,y2))); maxy=min(CH-1,int(max(y0,y1,y2))+1) for py in range(miny,maxy+1): for px in range(minx,maxx+1): cx,cy=px+0.5,py+0.5 w0=edge(x1,y1,x2,y2,cx,cy)*inv; w1=edge(x2,y2,x0,y0,cx,cy)*inv; w2=1.0-w0-w1 if w0<-0.001 or w1<-0.001 or w2<-0.001: continue # Ch351 convention fix: coverage/interior at pixel CENTER (px+0.5), but the RTL interpolates # the perspective ATTRIBUTES at the INTEGER pixel coord (tex_dx_s1 = s1_x_q - v0_x, no +0.5). # Use a CORNER barycentric for S/T/Q to match -> removes the ~0.5-texel drift. a0w=edge(x1,y1,x2,y2,float(px),float(py))*inv; a1w=edge(x2,y2,x0,y0,float(px),float(py))*inv; a2w=1.0-a0w-a1w S=a0w*v0["s"]+a1w*v1["s"]+a2w*v2["s"]; T=a0w*v0["t"]+a1w*v1["t"]+a2w*v2["t"] Q=a0w*v0["q"]+a1w*v1["q"]+a2w*v2["q"] if abs(Q)<1e-12: continue tu=int((S/Q)*TW) % TW; tv=int((T/Q)*TH) % TH if tu<0: tu+=TW if tv<0: tv+=TH # RTL-faithful: interp the FIXED-POINT attrs, then the 8-bit reciprocal uq=w0*uqv[0]+w1*uqv[1]+w2*uqv[2]; vq=w0*vqv[0]+w1*vqv[1]+w2*vqv[2]; qq=w0*qv[0]+w1*qv[1]+w2*qv[2] tur,tvr = persp_texel_recip(uq,vq,qq,TW,TH,idx_bits=8) # AFFINE texel: linear interp of the per-vertex texels (the under-interpolation hypothesis) au=int(w0*auv[0]+w1*auv[1]+w2*auv[2]) % TW; av=int(w0*avv[0]+w1*avv[1]+w2*avv[2]) % TH if au<0: au+=TW if av<0: av+=TH mw = min(w0,w1,w2) interior = 1 if mw > 0.04 else 0 # away from this triangle's own edges deep = 1 if mw > 0.15 else 0 # DEEP interior — far from any edge => seam-free zone o=py*FBPXW+px refmap[o] = (1<<31)|(interior<<30)|(deep<<29)|((tu&0x1FF)<<9)|(tv&0x1FF) refmap_rec[o] = (1<<31)|(interior<<30)|((tur&0x1FF)<<9)|(tvr&0x1FF) refmap_aff[o] = (1<<31)|(interior<<30)|((au&0x1FF)<<9)|(av&0x1FF) p=pal[idx[tv*TW+tu]&0xFF]; refpix[o]=(p&0xFF,(p>>8)&0xFF,(p>>16)&0xFF) pr=pal[idx[tvr*TW+tur]&0xFF];refpix_rec[o]=(pr&0xFF,(pr>>8)&0xFF,(pr>>16)&0xFF) covered = sum(1 for w in refmap if w>>31) print(f"[Ch350] host reference: {covered} covered FB pixels") # emit the RTL-faithful refmap + PNG for the Ch351 oracle with open(os.path.join(DATA,"sh3_real_refmap_recip.mem"),"w") as f: f.write("// Ch351 RTL-faithful (8-bit reciprocal) per-pixel texel map. gitignored.\n") for x in refmap_rec: f.write(f"{x & 0xFFFFFFFF:08x}\n") with open(os.path.join(DATA,"sh3_real_refmap_affine.mem"),"w") as f: f.write("// Ch351 AFFINE (per-vertex texel, linear interp) per-pixel texel map. gitignored.\n") for x in refmap_aff: f.write(f"{x & 0xFFFFFFFF:08x}\n") try: from PIL import Image Image.new("RGB",(FBPXW,CH)).copy() # noop guard im2=Image.new("RGB",(FBPXW,CH)); im2.putdata(refpix_rec) im2.save(os.path.join(ROOT,"captures","gs","silenthill3","extracted","recon","sh3_real_ref_recip.png")) except Exception as e: print("(PIL skip recip png:", e, ")") # --- setup bootlet: BOARD-READY CLUT upload (Ch347 pattern). The CSM1 loader reads the CLUT from VRAM at # CBP via GRID offsets; sh3_real_clut.mem IS the raw grid bytes, so a LINEAR 256x1 BITBLT of those 256 words # (PSMCT32_SWIZZLE=0) places byte CBP+k*4 = word k = the grid byte verbatim -> the loader reads it correctly. # The upload also fires dma_done_seen, which auto-starts the feeder (C_SETUP->C_RUN). On the board this is the # ONLY CLUT path (no TB backdoor); the SIM TB still backdoors the same bytes (redundant + consistent). clut_words_b = [int.from_bytes(clut_bytes[i*4:i*4+4],"little") for i in range(256)] RAM_QWORDS = 512 pay = [] pay.append(bake.giftag(1,0,0,4,int('E'*4,16))) pay.append(bake.aplusd(bake.R_BITBLTBUF, bake.bitbltbuf_pack(CBP, 1, 0x00))) # PSMCT32 dest @CBP pay.append(bake.aplusd(bake.R_TRXPOS, bake.trxpos_pack(0,0))) pay.append(bake.aplusd(bake.R_TRXREG, bake.trxreg_pack(256,1))) # 256x1 contiguous pay.append(bake.aplusd(bake.R_TRXDIR, bake.trxdir_pack(0))) pay.append(bake.giftag(256//4,1,2,0,0)) # 64 qwords image data, EOP for q in range(256//4): word=0 for lane in range(4): word |= (clut_words_b[q*4+lane]&0xFFFFFFFF) << (32*lane) pay.append(word) qwc=len(pay) disp_hi=((CH-1)<<12)|(FBPXW-1) with open(os.path.join(DATA,"payload_sh3_real.mem"),"w") as f: f.write(f"// Ch352 LOCAL SH3 real-draw setup payload (CSM1 CLUT 256x1 -> CBP={CBP}, grid bytes verbatim). gitignored. QWC={qwc}.\n") for _ in range(16): f.write(f"{0:032x}\n") for x in pay: f.write(f"{x&((1<<128)-1):032x}\n") for _ in range(RAM_QWORDS-16-qwc): f.write(f"{0:032x}\n") bake.write_bios_mem("bios_sh3_real.mem", bake.build_textured_demo_bootlet_disp(qwc, disp_hi, FBW), f"Ch352 LOCAL SH3 real-draw setup bootlet (QWC={qwc}, DISPLAY1={FBPXW}x{CH}). gitignored.") print(f"[Ch352] setup bootlet: payload {qwc} qw (CSM1 CLUT 256x1 upload to CBP={CBP}).") # --- emit --- def wmem(name, words, width_hex, banner): with open(os.path.join(DATA,name),"w") as f: f.write(f"// {banner}\n") for x in words: f.write(f"{x & ((1<<(4*width_hex))-1):0{width_hex}x}\n") # de-swizzled index image, packed 4 idx/word (LINEAR row-major: byte v*TW+u = idx(u,v)) idx_words = [idx[i*4]|(idx[i*4+1]<<8)|(idx[i*4+2]<<16)|(idx[i*4+3]<<24) for i in range(TW*TH//4)] wmem("sh3_real_idx.mem", idx_words, 8, "Ch350 LOCAL SH3 512x512 de-swizzled indices (4/word) for TB ref. gitignored.") # LPDDR texture: the bram-top defaults PSMT8_SWIZZLE=0 (LINEAR read, like Ch347/348) — so store the texture # LINEAR (de-swizzled, = idx_words). The texture unit's linear PSMT8 addr (base + v*TBW*64 + u) then reads # texel(u,v)=idx(u,v). (The raw SWIZZLED bytes would need PSMT8_SWIZZLE=1; kept as sh3_real_tex_lpddr_swz.mem # for that variant.) This is the Ch299/Ch350 root-cause fix: linear texture <-> linear read. wmem("sh3_real_tex_lpddr.mem", idx_words, 8, "Ch350 LOCAL SH3 512x512 LINEAR de-swizzled indices -> LPDDR model (PSMT8_SWIZZLE=0). gitignored.") # Ch352 guardrail #2 — board-side READBACK CHECKSUM: after the HPS write-probe uploads these 65536 words to # LPDDR @0x200000, the HPS read-probe should read them back and confirm sum32 + xor32 BEFORE the cache fill. tex_sum = sum(idx_words) & 0xFFFFFFFF tex_xor = 0 for w in idx_words: tex_xor ^= w print(f"[Ch352] TEXTURE→LPDDR upload checksum (verify via read-probe before cache-fill): " f"{len(idx_words)} words @ LPDDR 0x{LPDDR_TEX_BASE:07x} sum32=0x{tex_sum:08x} xor32=0x{tex_xor:08x}") tex_swz_words = [int.from_bytes(tex_swz[i*4:i*4+4],"little") for i in range(TEX_BYTES//4)] wmem("sh3_real_tex_lpddr_swz.mem", tex_swz_words, 8, "Ch350 LOCAL SH3 512x512 SWIZZLED bytes -> LPDDR (for PSMT8_SWIZZLE=1 variant). gitignored.") clut_words = [int.from_bytes(clut_bytes[i*4:i*4+4],"little") for i in range(256)] wmem("sh3_real_clut.mem", clut_words, 8, "Ch350 LOCAL SH3 CSM1 CLUT (raw CT32-grid bytes @CBP) -> BRAM (HW CSM1 loader reads these in grid order). gitignored.") # de-gridded palette pal[i] (what the HW CSM1 grid-read produces) -> TB reference expected colors wmem("sh3_real_pal.mem", [p & 0xFFFFFFFF for p in pal], 8, "Ch350 LOCAL SH3 de-gridded palette pal[i] (grid-read of the CBP bytes) for the TB reference. gitignored.") bake.write_feeder_stg_mem("feeder_sh3_real.mem", stg, "Ch350 LOCAL SH3 REAL draw (idx89761) feeder staging: triangle list + TEX0(PSMT8,CSM1,CLD=1,DECAL). gitignored.", total=STG_WORDS) wmem("sh3_real_refmap.mem", refmap, 8, "Ch350 LOCAL per-FB-pixel covered|interior|tu|tv reference map. gitignored.") # params include for the TB with open(os.path.join(DATA,"sh3_real_params.vh"),"w") as f: f.write("// Ch350 LOCAL generated params for tb_top_psmct32_sh3_real_draw_demo. gitignored.\n") f.write(f"localparam int FBW = {FBW};\n") f.write(f"localparam int FBPXW = {FBPXW};\n") f.write(f"localparam int FBH = {CH};\n") f.write(f"localparam int VRAM_BYTES_P = {VRAM_BYTES};\n") f.write(f"localparam int CROP_CX0 = {CX0};\n") f.write(f"localparam int CROP_CY0 = {CY0};\n") f.write(f"localparam int CLUT_CBP = {CBP};\n") f.write(f"localparam int NEW_TBP = {NEW_TBP};\n") f.write(f"localparam int TEX_VRAM_BASE= {TEX_VRAM_BASE};\n") f.write(f"localparam int TEX_BYTES = {TEX_BYTES};\n") f.write(f"localparam [29:0] LPDDR_TEX_BASE = 30'h{LPDDR_TEX_BASE:07x};\n") f.write(f"localparam int N_BEATS = {TEX_BYTES//32};\n") f.write(f"localparam int STG_WORDS = {STG_WORDS};\n") f.write(f"localparam int TW = {TW};\n") f.write(f"localparam int TH = {TH};\n") # eyeball PNG try: from PIL import Image im=Image.new("RGB",(FBPXW,CH)); im.putdata(refpix) im.save(os.path.join(ROOT,"captures","gs","silenthill3","extracted","recon","sh3_real_ref.png")) print("[Ch350] wrote sh3_real_ref.png") except Exception as e: print("(PIL skipped:", e, ")") print(f"[Ch350] emitted fixtures -> {DATA} (LOCAL). TEX_VRAM_BASE=0x{TEX_VRAM_BASE:x} TBP={NEW_TBP} CBP={CBP} " f"LPDDR_TEX_BASE=0x{LPDDR_TEX_BASE:x} N_BEATS={TEX_BYTES//32}") if __name__ == "__main__": main(sys.argv)