Fix LLM classify for newer OpenAI-compatible servers

- Use json_schema structured output (newer LM Studio rejects json_object),
  escalating through json_schema -> json_object -> text and pinning the
  first format the server accepts to avoid wasted round-trips.
- Make per-article failures non-fatal and commit incrementally so a single
  timeout no longer discards the whole batch.
- Raise default timeout to 180s (configurable via GOODNEWS_LLM_TIMEOUT) for
  larger local reasoning models.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
jay
2026-05-30 01:21:05 +00:00
parent 068073423f
commit f4842ed100
+81 -20
View File
@@ -10,6 +10,49 @@ from dataclasses import dataclass
DEFAULT_BASE_URL = "http://127.0.0.1:1234/v1"
DEFAULT_MODEL = "gpt-oss"
DEFAULT_TIMEOUT = 180
# Structured-output schema. Newer LM Studio / OpenAI-compatible servers want a
# json_schema response_format (older ones took json_object); we try schema first
# and fall back gracefully so the client works across server versions.
_SCORE_FIELD = {"type": "integer", "minimum": 0, "maximum": 10}
CLASSIFICATION_SCHEMA = {
"type": "object",
"additionalProperties": False,
"required": [
"constructive_score",
"cortisol_score",
"ragebait_score",
"agency_score",
"human_benefit_score",
"novelty_score",
"pr_risk_score",
"accepted",
"reason_code",
"reason_text",
],
"properties": {
"constructive_score": _SCORE_FIELD,
"cortisol_score": _SCORE_FIELD,
"ragebait_score": _SCORE_FIELD,
"agency_score": _SCORE_FIELD,
"human_benefit_score": _SCORE_FIELD,
"novelty_score": _SCORE_FIELD,
"pr_risk_score": _SCORE_FIELD,
"accepted": {"type": "boolean"},
"reason_code": {"type": "string"},
"reason_text": {"type": "string"},
},
}
# Response-format variants tried in order. Once one succeeds for a client, it is
# pinned so we stop paying failed round-trips on every subsequent call.
_RESPONSE_FORMATS = (
{"type": "json_schema", "json_schema": {"name": "classification", "strict": True, "schema": CLASSIFICATION_SCHEMA}},
{"type": "json_object"},
None,
)
SYSTEM_PROMPT = """You classify article metadata for a calm constructive-news digest.
@@ -39,7 +82,9 @@ class LocalModelClient:
base_url: str
model: str
api_key: str | None = None
timeout: int = 90
timeout: int = DEFAULT_TIMEOUT
# Index into _RESPONSE_FORMATS that the server accepts; discovered lazily.
_response_format_idx: int | None = None
@classmethod
def from_env(cls) -> "LocalModelClient":
@@ -47,25 +92,36 @@ class LocalModelClient:
base_url=os.environ.get("GOODNEWS_LLM_BASE_URL", DEFAULT_BASE_URL).rstrip("/"),
model=os.environ.get("GOODNEWS_LLM_MODEL", DEFAULT_MODEL),
api_key=os.environ.get("GOODNEWS_LLM_API_KEY"),
timeout=int(os.environ.get("GOODNEWS_LLM_TIMEOUT", DEFAULT_TIMEOUT)),
)
def classify(self, article: sqlite3.Row) -> dict:
payload = {
"model": self.model,
"temperature": 0.1,
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": _article_prompt(article)},
],
"response_format": {"type": "json_object"},
}
try:
return self._chat(payload)
except RuntimeError as exc:
if "HTTP 400" not in str(exc):
raise
payload.pop("response_format", None)
return self._chat(payload)
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": _article_prompt(article)},
]
# If we already learned which response_format the server accepts, use it.
if self._response_format_idx is not None:
return self._chat(self._build_payload(messages, _RESPONSE_FORMATS[self._response_format_idx]))
# Otherwise escalate through the variants, pinning the first that works.
last_exc: RuntimeError | None = None
for idx, fmt in enumerate(_RESPONSE_FORMATS):
try:
result = self._chat(self._build_payload(messages, fmt))
self._response_format_idx = idx
return result
except RuntimeError as exc:
if "HTTP 400" not in str(exc):
raise
last_exc = exc
raise last_exc if last_exc else RuntimeError("no usable response_format")
def _build_payload(self, messages: list[dict], response_format: dict | None) -> dict:
payload = {"model": self.model, "temperature": 0.1, "messages": messages}
if response_format is not None:
payload["response_format"] = response_format
return payload
def list_models(self) -> list[str]:
headers = {}
@@ -125,13 +181,18 @@ def classify_articles(
rows = _classification_candidates(conn, limit=limit, include_rejected=include_rejected)
results = []
for row in rows:
scores = client.classify(row)
try:
scores = client.classify(row)
except RuntimeError as exc:
# One slow/failed article (timeout, bad response) shouldn't sink the
# whole batch or discard work already committed. Skip and continue.
print(f"[{row['id']}] skipped: {exc}")
continue
scores = normalize_scores(scores, model_name=client.model)
results.append((row["id"], scores))
if not dry_run:
upsert_article_score(conn, row["id"], scores)
if not dry_run:
conn.commit()
conn.commit()
return results