Fix LLM classify for newer OpenAI-compatible servers

- Use json_schema structured output (newer LM Studio rejects json_object), escalating through json_schema -> json_object -> text and pinning the first format the server accepts to avoid wasted round-trips. - Make per-article failures non-fatal and commit incrementally so a single timeout no longer discards the whole batch. - Raise default timeout to 180s (configurable via GOODNEWS_LLM_TIMEOUT) for larger local reasoning models. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 01:21:05 +00:00
parent 068073423f
commit f4842ed100
1 changed files with 81 additions and 20 deletions
@@ -10,6 +10,49 @@ from dataclasses import dataclass

 DEFAULT_BASE_URL = "http://127.0.0.1:1234/v1"
 DEFAULT_MODEL = "gpt-oss"
+DEFAULT_TIMEOUT = 180
+
+
+# Structured-output schema. Newer LM Studio / OpenAI-compatible servers want a
+# json_schema response_format (older ones took json_object); we try schema first
+# and fall back gracefully so the client works across server versions.
+_SCORE_FIELD = {"type": "integer", "minimum": 0, "maximum": 10}
+CLASSIFICATION_SCHEMA = {
+    "type": "object",
+    "additionalProperties": False,
+    "required": [
+        "constructive_score",
+        "cortisol_score",
+        "ragebait_score",
+        "agency_score",
+        "human_benefit_score",
+        "novelty_score",
+        "pr_risk_score",
+        "accepted",
+        "reason_code",
+        "reason_text",
+    ],
+    "properties": {
+        "constructive_score": _SCORE_FIELD,
+        "cortisol_score": _SCORE_FIELD,
+        "ragebait_score": _SCORE_FIELD,
+        "agency_score": _SCORE_FIELD,
+        "human_benefit_score": _SCORE_FIELD,
+        "novelty_score": _SCORE_FIELD,
+        "pr_risk_score": _SCORE_FIELD,
+        "accepted": {"type": "boolean"},
+        "reason_code": {"type": "string"},
+        "reason_text": {"type": "string"},
+    },
+}
+
+# Response-format variants tried in order. Once one succeeds for a client, it is
+# pinned so we stop paying failed round-trips on every subsequent call.
+_RESPONSE_FORMATS = (
+    {"type": "json_schema", "json_schema": {"name": "classification", "strict": True, "schema": CLASSIFICATION_SCHEMA}},
+    {"type": "json_object"},
+    None,
+)


 SYSTEM_PROMPT = """You classify article metadata for a calm constructive-news digest.
@@ -39,7 +82,9 @@ class LocalModelClient:
    base_url: str
    model: str
    api_key: str | None = None
-    timeout: int = 90
+    timeout: int = DEFAULT_TIMEOUT
+    # Index into _RESPONSE_FORMATS that the server accepts; discovered lazily.
+    _response_format_idx: int | None = None

    @classmethod
    def from_env(cls) -> "LocalModelClient":
@@ -47,25 +92,36 @@ class LocalModelClient:
            base_url=os.environ.get("GOODNEWS_LLM_BASE_URL", DEFAULT_BASE_URL).rstrip("/"),
            model=os.environ.get("GOODNEWS_LLM_MODEL", DEFAULT_MODEL),
            api_key=os.environ.get("GOODNEWS_LLM_API_KEY"),
+            timeout=int(os.environ.get("GOODNEWS_LLM_TIMEOUT", DEFAULT_TIMEOUT)),
        )

    def classify(self, article: sqlite3.Row) -> dict:
-        payload = {
-            "model": self.model,
-            "temperature": 0.1,
-            "messages": [
-                {"role": "system", "content": SYSTEM_PROMPT},
-                {"role": "user", "content": _article_prompt(article)},
-            ],
-            "response_format": {"type": "json_object"},
-        }
-        try:
-            return self._chat(payload)
-        except RuntimeError as exc:
-            if "HTTP 400" not in str(exc):
-                raise
-            payload.pop("response_format", None)
-            return self._chat(payload)
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": _article_prompt(article)},
+        ]
+        # If we already learned which response_format the server accepts, use it.
+        if self._response_format_idx is not None:
+            return self._chat(self._build_payload(messages, _RESPONSE_FORMATS[self._response_format_idx]))
+
+        # Otherwise escalate through the variants, pinning the first that works.
+        last_exc: RuntimeError | None = None
+        for idx, fmt in enumerate(_RESPONSE_FORMATS):
+            try:
+                result = self._chat(self._build_payload(messages, fmt))
+                self._response_format_idx = idx
+                return result
+            except RuntimeError as exc:
+                if "HTTP 400" not in str(exc):
+                    raise
+                last_exc = exc
+        raise last_exc if last_exc else RuntimeError("no usable response_format")
+
+    def _build_payload(self, messages: list[dict], response_format: dict | None) -> dict:
+        payload = {"model": self.model, "temperature": 0.1, "messages": messages}
+        if response_format is not None:
+            payload["response_format"] = response_format
+        return payload

    def list_models(self) -> list[str]:
        headers = {}
@@ -125,13 +181,18 @@ def classify_articles(
    rows = _classification_candidates(conn, limit=limit, include_rejected=include_rejected)
    results = []
    for row in rows:
-        scores = client.classify(row)
+        try:
+            scores = client.classify(row)
+        except RuntimeError as exc:
+            # One slow/failed article (timeout, bad response) shouldn't sink the
+            # whole batch or discard work already committed. Skip and continue.
+            print(f"[{row['id']}] skipped: {exc}")
+            continue
        scores = normalize_scores(scores, model_name=client.model)
        results.append((row["id"], scores))
        if not dry_run:
            upsert_article_score(conn, row["id"], scores)
-    if not dry_run:
-        conn.commit()
+            conn.commit()
    return results