Sources hardening (Codex audit): promote-time dedup, postJSON timeout, host-only feed_key
Three follow-ups from Codex's audit of the deep-preview/search/dedup work:
- Promote-time duplicate guard: promote_candidate() now re-checks
find_existing_feed() and raises DuplicateFeedError → 409, so an
old/CLI/direct-DB candidate or a race can't bypass the add-time check and
silently overwrite a live source's settings via upsert. (sources scanned
first, so a real source collision wins over the candidate matching itself.)
- postJSON/putJSON/delJSON gain opt-in {timeout} (AbortController, default
none so other calls are unchanged); deep preview uses 120s and surfaces a
calm "timed out" message instead of pinning the button on "Deep-checking…"
if the LAN model stalls.
- feed_key() now lowercases the host only, not the whole URL — paths/queries
can be case-significant; scheme/www/trailing-slash/host-case still collapse.
Tests: test_candidate_deep_preview_and_dedup extended — promote succeeds once,
then a re-promote of the same candidate is refused 409. 224 pytest + 11 vitest.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+34
-18
@@ -10,34 +10,50 @@ export async function getJSON(url, { timeout = 10000 } = {}) {
|
||||
}
|
||||
}
|
||||
|
||||
export async function postJSON(url, body) {
|
||||
return sendJSON('POST', url, body);
|
||||
export async function postJSON(url, body, opts) {
|
||||
return sendJSON('POST', url, body, opts);
|
||||
}
|
||||
|
||||
export async function putJSON(url, body) {
|
||||
return sendJSON('PUT', url, body);
|
||||
export async function putJSON(url, body, opts) {
|
||||
return sendJSON('PUT', url, body, opts);
|
||||
}
|
||||
|
||||
export async function delJSON(url) {
|
||||
return sendJSON('DELETE', url);
|
||||
export async function delJSON(url, opts) {
|
||||
return sendJSON('DELETE', url, undefined, opts);
|
||||
}
|
||||
|
||||
async function sendJSON(method, url, body) {
|
||||
// timeout is opt-in (default: none) so normal calls are unchanged, but slow
|
||||
// model-backed actions (e.g. deep preview) can bound the wait and fail calmly
|
||||
// instead of leaving a button stuck if the LAN model hangs.
|
||||
async function sendJSON(method, url, body, { timeout } = {}) {
|
||||
const opts = { method, credentials: 'same-origin' };
|
||||
if (body !== undefined) {
|
||||
opts.headers = { 'Content-Type': 'application/json' };
|
||||
opts.body = JSON.stringify(body);
|
||||
}
|
||||
const r = await fetch(url, opts);
|
||||
if (!r.ok) {
|
||||
let msg = r.statusText;
|
||||
try {
|
||||
const j = await r.json();
|
||||
if (j && j.detail) msg = j.detail;
|
||||
} catch {
|
||||
/* non-JSON error body */
|
||||
}
|
||||
throw new Error(msg);
|
||||
let timer;
|
||||
if (timeout) {
|
||||
const ctrl = new AbortController();
|
||||
opts.signal = ctrl.signal;
|
||||
timer = setTimeout(() => ctrl.abort(), timeout);
|
||||
}
|
||||
try {
|
||||
const r = await fetch(url, opts);
|
||||
if (!r.ok) {
|
||||
let msg = r.statusText;
|
||||
try {
|
||||
const j = await r.json();
|
||||
if (j && j.detail) msg = j.detail;
|
||||
} catch {
|
||||
/* non-JSON error body */
|
||||
}
|
||||
throw new Error(msg);
|
||||
}
|
||||
return await r.json();
|
||||
} catch (e) {
|
||||
if (e?.name === 'AbortError') throw new Error('Timed out — the server took too long to respond.');
|
||||
throw e;
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
return r.json();
|
||||
}
|
||||
|
||||
@@ -286,8 +286,12 @@
|
||||
// model's true acceptance view, not the fast heuristic estimate.
|
||||
async function deepPreview(c) {
|
||||
c._err = ''; c._deep = true;
|
||||
try { Object.assign(c, await postJSON(`/api/admin/candidates/${c.id}/preview?deep=true`), { _deep: false }); }
|
||||
catch (e) { c._err = e?.message || 'Deep preview failed.'; c._deep = false; }
|
||||
try {
|
||||
// ~5-7s/item on the LAN model; bound the wait so a model stall can't pin
|
||||
// the button on "Deep-checking…" forever.
|
||||
const res = await postJSON(`/api/admin/candidates/${c.id}/preview?deep=true`, undefined, { timeout: 120000 });
|
||||
Object.assign(c, res, { _deep: false });
|
||||
} catch (e) { c._err = e?.message || 'Deep preview failed.'; c._deep = false; }
|
||||
}
|
||||
async function promoteCandidate(c) {
|
||||
c._err = '';
|
||||
|
||||
@@ -1207,6 +1207,13 @@ def create_app() -> FastAPI:
|
||||
trust_score=body.trust_score, pr_risk_score=body.pr_risk_score,
|
||||
poll_interval_minutes=body.poll_interval_minutes,
|
||||
)
|
||||
except sources.DuplicateFeedError as exc:
|
||||
ex = exc.existing
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"“{ex['name']}” is already a source ({ex['status']}) — "
|
||||
"promote skipped so its settings aren't overwritten.",
|
||||
)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=404, detail="candidate not found")
|
||||
src = conn.execute(
|
||||
|
||||
+22
-4
@@ -70,13 +70,23 @@ def upsert_sources(conn: sqlite3.Connection, source_defs: list[dict]) -> int:
|
||||
# --- Duplicate detection (catch the same feed added twice) --------------------
|
||||
|
||||
|
||||
class DuplicateFeedError(Exception):
|
||||
"""Raised when an operation would create a second copy of an existing feed.
|
||||
Carries the existing match so the caller can name it in the response."""
|
||||
|
||||
def __init__(self, existing: dict):
|
||||
self.existing = existing
|
||||
super().__init__(f"feed already exists as {existing['kind']} “{existing['name']}”")
|
||||
|
||||
|
||||
def feed_key(url: str) -> str:
|
||||
"""A loose comparison key for spotting the same feed added twice despite
|
||||
trivial differences (scheme, www, trailing slash, case). Compare-only — the
|
||||
feed_url is always STORED exactly as entered; this just powers dup warnings."""
|
||||
trivial differences (scheme, www, trailing slash, host case). Compare-only —
|
||||
the feed_url is always STORED exactly as entered; this just powers dup checks.
|
||||
Only the host is lowercased: URL paths/queries can be case-significant."""
|
||||
try:
|
||||
p = urlsplit((url or "").strip().lower())
|
||||
host = p.netloc.removeprefix("www.")
|
||||
p = urlsplit((url or "").strip())
|
||||
host = p.netloc.lower().removeprefix("www.")
|
||||
path = p.path.rstrip("/")
|
||||
return host + path + (("?" + p.query) if p.query else "")
|
||||
except Exception: # noqa: BLE001 — never let a weird URL break add
|
||||
@@ -170,6 +180,14 @@ def promote_candidate(
|
||||
if cand is None:
|
||||
raise ValueError(f"no candidate with id {candidate_id}")
|
||||
|
||||
# Re-check duplicates at promote time too — the add-time guard can be bypassed
|
||||
# by old/CLI/direct-DB candidates or a race, and upsert_sources would silently
|
||||
# overwrite the existing source's settings. (sources are scanned first, so a
|
||||
# real source collision wins over this candidate matching itself.)
|
||||
existing = find_existing_feed(conn, cand["feed_url"])
|
||||
if existing and existing["kind"] == "source":
|
||||
raise DuplicateFeedError(existing)
|
||||
|
||||
name = cand["name"] or urlsplit(cand["feed_url"]).netloc or cand["feed_url"]
|
||||
upsert_sources(
|
||||
conn,
|
||||
|
||||
+6
-3
@@ -158,12 +158,15 @@ def test_candidate_deep_preview_and_dedup(tmp_path, monkeypatch):
|
||||
# Deep preview runs the real classifier on the smaller sample.
|
||||
deep = tc.post(f"/api/admin/candidates/{cand['id']}/preview?deep=true").json()
|
||||
assert deep["preview"]["classified"] is True and deep["preview"]["sampled"] == 8
|
||||
# Dedup: exact + trivial variants (scheme / www / trailing slash / case) are refused.
|
||||
# Dedup at ADD: exact + trivial variants (scheme / www / trailing slash / host case).
|
||||
assert tc.post("/api/admin/candidates", json={"feed_url": "https://news.test/feed"}).status_code == 409
|
||||
assert tc.post("/api/admin/candidates", json={"feed_url": "http://www.news.test/feed/"}).status_code == 409
|
||||
# Once promoted to a live source, re-adding is still refused.
|
||||
tc.post(f"/api/admin/candidates/{cand['id']}/promote", json={})
|
||||
# Promote succeeds the first time and creates the live source.
|
||||
assert tc.post(f"/api/admin/candidates/{cand['id']}/promote", json={}).status_code == 200
|
||||
assert tc.post("/api/admin/candidates", json={"feed_url": "https://NEWS.test/feed"}).status_code == 409
|
||||
# Dedup at PROMOTE too: a stale/duplicate candidate (here, re-promoting the
|
||||
# same one) can't bypass add and overwrite the live source's settings.
|
||||
assert tc.post(f"/api/admin/candidates/{cand['id']}/promote", json={}).status_code == 409
|
||||
|
||||
|
||||
def test_candidate_reject_and_gating(tmp_path, monkeypatch):
|
||||
|
||||
Reference in New Issue
Block a user