Files
retroDE_ps2/tools/ps2_sh3_tex_upload.c
thejayman77 ec82764bef Initial commit: retroDE_ps2 — first-of-its-kind PS2 GS FPGA core (DE25-Nano / Agilex 5)
RTL (GS rasterizer, EE core stub, platform bridge, LPDDR4B path), sim regression
(272 TBs), docs, and tooling. Copyrighted PS2 content (BIOS, game code, GS dumps,
and all dump-derived textures/traces) is excluded via .gitignore and stays local.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 20:10:50 -04:00

148 lines
8.9 KiB
C

// retroDE_ps2 — ps2_sh3_tex_upload (Ch352)
//
// Uploads the reconstructed 512x512 PSMT8 SH3 texture (65536 32-bit words) into FPGA-private LPDDR4B via the
// PS2 HPS-bridge write-probe, verifies the readback, arms the texture-cache fill, and retriggers the feeder.
// One command — mmap'd register pokes (NOT 65536 devmem process spawns). Same bridge protocol as ps2_feeder.c
// and docs/hardware/ps2_lpddr_tex_test.sh (Ch322), just scaled to the full 256 KiB texture.
//
// Build on the board: gcc -O2 -o ps2_sh3_tex_upload ps2_sh3_tex_upload.c
// Run (after fit+boot): sudo ./ps2_sh3_tex_upload sh3_real_tex_lpddr.mem
// (copy sh3_real_tex_lpddr.mem from sim/data/top_psmct32_raster_demo/ to the board alongside the binary.)
//
// Sequence: (1) write WRADDR=0x200000, stream 65536 words to WRDATA; (2) read each back via the read-probe and
// confirm sum32/xor32 match the file; (3) arm cache fill (0x054), poll fill_done, check beats/bytes/rd_errs;
// (4) pulse the feeder retrigger (0x0E8) so the scene re-renders with the now-warm cache.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#define OFF_LPDDR_STATUS 0x02C // R: [3] rd_pending (read-probe in flight)
#define OFF_LPDDR_RDADDR 0x03C // W: set read byte addr + trigger ; R: 32-bit word
#define OFF_LPDDR_WRADDR 0x04C // W: set LPDDR byte addr (auto-increments +4 per WRDATA write)
#define OFF_LPDDR_WRDATA 0x050 // W: data word -> single 32-bit LPDDR write + addr += 4
#define OFF_TEX_FILL_CTRL 0x054 // W[0]: arm cache fill ; R: [0]fill_done [1]wr_busy
#define OFF_LPDDR_WR_ERRS 0x06C // R: write-probe non-OKAY (BRESP) responses (expect 0)
#define WR_PENDING_BIT 0x4 // 0x054 bit2 — Ch352 STABLE write-done flag (poll instead of transient wr_busy)
#define RD_PENDING_BIT 0x8 // 0x02C bit3
#define OFF_TEX_FILL_BEATS 0x058 // R: beats filled (expect TEX_BYTES/32 = 8192)
#define OFF_TEX_FILL_BYTES 0x05C // R: bytes filled (expect 262144)
#define OFF_TEX_RD_ERRS 0x068 // R: fill non-OKAY read responses (expect 0)
#define OFF_TEX_FILL_CRC 0x070 // R: sum32 of EVERY word the cache wrote into tex_mem (must == file sum32)
#define OFF_FEEDER_GO 0x0E8 // W[0]: trigger/retrigger the feeder
#define N_WORDS 65536 // 512*512 PSMT8 / 4
#define TEX_BYTES 262144
#define N_BEATS 8192 // TEX_BYTES / 32
typedef struct { volatile uint8_t *base; int dry; } bridge_t;
static void wr32(bridge_t *b, int off, uint32_t v){ if(!b->dry) *(volatile uint32_t*)(b->base+off)=v; }
static uint32_t rd32(bridge_t *b, int off){ return b->dry?0:*(volatile uint32_t*)(b->base+off); }
int main(int argc, char **argv){
unsigned long base = 0x40000000UL; // PS2 HPS-bridge base (override --base or PS2_BRIDGE_BASE)
unsigned long lpddr_base = 0x00200000; // EMIF byte base where the texture is staged (= TEX_LPDDR_BASE RTL)
const char *texfile = "sh3_real_tex_lpddr.mem";
int dry=0, do_fill=1, do_retrig=1;
char *env = getenv("PS2_BRIDGE_BASE"); if (env) base = strtoul(env,NULL,0);
for (int i=1;i<argc;i++){
if (!strcmp(argv[i],"--base") && i+1<argc) base = strtoul(argv[++i],NULL,0);
else if (!strcmp(argv[i],"--lpddr-base") && i+1<argc) lpddr_base = strtoul(argv[++i],NULL,0);
else if (!strcmp(argv[i],"--dry-run")) dry=1;
else if (!strcmp(argv[i],"--no-fill")) do_fill=0;
else if (!strcmp(argv[i],"--no-retrigger")) do_retrig=0;
else if (argv[i][0] != '-') texfile = argv[i];
else { fprintf(stderr,"usage: %s [tex.mem] [--base 0x40000000] [--lpddr-base 0x200000] [--dry-run] [--no-fill] [--no-retrigger]\n", argv[0]); return 2; }
}
// ---- load the texture hex (.mem: one 32-bit word per line) ----
static uint32_t tex[N_WORDS];
FILE *f = fopen(texfile,"r");
if (!f){ fprintf(stderr,"error: cannot open '%s': %s\n", texfile, strerror(errno)); return 1; }
int n=0; char line[64];
while (n<N_WORDS && fgets(line,sizeof line,f)){
char *s=line; while(*s==' '||*s=='\t') s++;
if (*s=='/'||*s=='\n'||*s==0) continue; // skip blank / // banner lines
tex[n++] = (uint32_t)strtoul(s,NULL,16);
}
fclose(f);
if (n != N_WORDS){ fprintf(stderr,"error: %s has %d words, expected %d\n", texfile, n, N_WORDS); return 1; }
// expected checksums (source of truth = the file)
uint32_t sum=0, xr=0; for (int i=0;i<N_WORDS;i++){ sum+=tex[i]; xr^=tex[i]; }
printf("[ps2_sh3_tex_upload] %d words from %s sum32=0x%08x xor32=0x%08x -> LPDDR 0x%lx (bridge base 0x%lx%s)\n",
n, texfile, sum, xr, lpddr_base, base, dry?", DRY-RUN":"");
// ---- open the bridge ----
bridge_t br = {0,dry}; int fd=-1; void *map=NULL;
if (!dry){
fd=open("/dev/mem", O_RDWR|O_SYNC);
if (fd<0){ fprintf(stderr,"error: open /dev/mem (run as root?): %s\n", strerror(errno)); return 1; }
map=mmap(NULL,0x1000,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(off_t)base);
if (map==MAP_FAILED){ fprintf(stderr,"error: mmap 0x%lx: %s\n", base, strerror(errno)); close(fd); return 1; }
br.base=(volatile uint8_t*)map;
}
// ---- (1) upload: set WRADDR then stream WRDATA. CRITICAL: poll wr_busy (0x054 bit1) clear after each word
// so the write-probe actually COMMITS before the next write — otherwise the fast mmap writes outrun the
// CDC/AXI commit and get DROPPED (the bug: most words read back as 0). The Ch322 devmem script got away with
// no poll only because devmem process-spawns are slow. ----
wr32(&br, OFF_LPDDR_WRADDR, (uint32_t)lpddr_base);
for (int i=0;i<N_WORDS;i++){
wr32(&br, OFF_LPDDR_WRDATA, tex[i]);
// wait for the STABLE write_pending (0x054 bit2) to clear — the probe committed this word
if (!dry){ int g=0; while ((rd32(&br, OFF_TEX_FILL_CTRL) & WR_PENDING_BIT) && g<2000000) g++; }
}
if (!dry){
uint32_t werr = rd32(&br, OFF_LPDDR_WR_ERRS);
printf("[ps2_sh3_tex_upload] uploaded %d words (%d KiB). wr_bresp_errs=%u (exp 0)\n", N_WORDS, TEX_BYTES/1024, werr);
if (werr) fprintf(stderr,"WARN: %u write BRESP errors.\n", werr);
} else printf("[ps2_sh3_tex_upload] uploaded %d words (%d KiB).\n", N_WORDS, TEX_BYTES/1024);
// ---- (2) readback verify via the read-probe (guardrail #2). Poll rd_pending (0x02C bit3) clear before
// reading the latched word — the LPDDR read has latency; reading immediately returns 0/stale. ----
int mism=0; uint32_t rsum=0, rxr=0;
if (!dry){
for (int i=0;i<N_WORDS;i++){
wr32(&br, OFF_LPDDR_RDADDR, (uint32_t)(lpddr_base + (unsigned)i*4)); // set addr + trigger read
{ int g=0; while ((rd32(&br, OFF_LPDDR_STATUS) & RD_PENDING_BIT) && g<1000000) g++; }
uint32_t v = rd32(&br, OFF_LPDDR_RDADDR); // latched word
rsum+=v; rxr^=v;
if (v != tex[i] && mism<8) fprintf(stderr," readback mismatch @word %d: got 0x%08x exp 0x%08x\n", i, v, tex[i]);
if (v != tex[i]) mism++;
}
printf("[ps2_sh3_tex_upload] readback sum32=0x%08x xor32=0x%08x mismatches=%d\n", rsum, rxr, mism);
if (mism){ fprintf(stderr,"FAIL: %d readback mismatches — bad upload, NOT filling cache.\n", mism); munmap(map,0x1000); close(fd); return 1; }
}
// ---- (3) arm the cache fill + poll fill_done; check beats/bytes/rd_errs ----
if (do_fill && !dry){
wr32(&br, OFF_TEX_FILL_CTRL, 0x1);
int done=0; for (int i=0;i<200000;i++){ if (rd32(&br,OFF_TEX_FILL_CTRL)&0x1){ done=1; break; } }
uint32_t beats=rd32(&br,OFF_TEX_FILL_BEATS), bytes=rd32(&br,OFF_TEX_FILL_BYTES), errs=rd32(&br,OFF_TEX_RD_ERRS);
uint32_t fcrc=rd32(&br,OFF_TEX_FILL_CRC);
printf("[ps2_sh3_tex_upload] cache fill: done=%d beats=%u (exp %d) bytes=%u (exp %d) rd_errs=%u (exp 0)\n",
done, beats, N_BEATS, bytes, TEX_BYTES, errs);
// The cache's sum32 over the words it wrote into tex_mem must equal the file's sum32. If it matches,
// tex_mem is byte-correct on silicon — so any residual texture corruption is NOT the cache contents.
printf("[ps2_sh3_tex_upload] cache fill_crc=0x%08x (exp file sum32=0x%08x) -> tex_mem %s\n",
fcrc, sum, (fcrc==sum) ? "INTEGRITY OK" : "CORRUPT");
if (!done || beats!=N_BEATS || bytes!=TEX_BYTES || errs!=0)
fprintf(stderr,"WARN: cache fill stats off — texels may be wrong; check EMIF cal + LPDDR base.\n");
if (fcrc!=sum)
fprintf(stderr,"WARN: cache fill_crc mismatch — tex_mem corrupt on board (NOT a divider/sampler issue).\n");
}
// ---- (4) retrigger the feeder so the scene re-renders with the warm cache ----
if (do_retrig && !dry){ wr32(&br, OFF_FEEDER_GO, 0x1); printf("[ps2_sh3_tex_upload] feeder retriggered.\n"); }
if (!dry){ munmap(map,0x1000); close(fd); }
printf("[ps2_sh3_tex_upload] DONE — check HDMI vs the crop reference (recon/sh3_real_ref.png).\n");
return 0;
}