"""Single source of truth for article topic/flavor categories. Both the LLM response schema (enum constraints) and the post-hoc validation in normalize_scores import from here, so the allowed values can never drift apart. Adjusting a category here + re-running `classify` is all it takes to reshape the browsable feeds. """ from __future__ import annotations # Primary topic — exactly one per article. Used for ranking, brief balance, and # source reports (the "machine organization" axis). TOPICS: dict[str, str] = { "science": "research, discoveries, space, physics", "technology": "computing, AI, engineering, gadgets, digital tools", "environment": "conservation, climate solutions, ecosystems, clean energy", "health": "medicine, wellbeing, mental health, public health", "community": "local action, humanitarian work, social progress, kindness, fair work", "culture": "arts, history, heritage, sport, human-interest", "animals": "wildlife, nature discoveries, charming animal stories", "learning": "education, personal growth, practical knowledge, curiosity", } # Groupings — 1–4 per article, the "human wandering" axis. A controlled # vocabulary (never free-form) organised into calm families for the Explore UI. # Families live in code, not the DB. Tag slugs are lowercase, hyphenated. FAMILIES: dict[str, dict] = { "Discovery & Wonder": { "description": "Awe, science, and the natural world.", "tags": ["science", "space", "animals", "nature", "archaeology", "technology", "curiosity"], }, "People & Kindness": { "description": "Community, generosity, and human warmth.", "tags": ["community", "helping", "culture", "generosity", "resilience", "local-wins"], }, "Solutions & Progress": { "description": "Problems being solved.", "tags": ["environment", "climate-solutions", "public-health", "cities", "clean-energy", "innovation"], }, "Mind & Craft": { "description": "Ideas, learning, and making.", "tags": ["learning", "ideas", "arts", "books", "creativity", "perspective", "work-life", "food"], }, } # Flat allowed-tag set (union of all families), for enum + validation. ALLOWED_TAGS: tuple[str, ...] = tuple(dict.fromkeys(t for f in FAMILIES.values() for t in f["tags"])) MAX_TAGS = 4 # Tonal axis: why the story is worth surfacing in a calm, uplifting digest. FLAVORS: dict[str, str] = { "breakthrough": "a significant advance or innovation with clear public benefit", "discovery": "newly found or learned; calm and fascinating, low on agency", "solution": "people actively repairing, restoring, or solving a problem", "feelgood": "a heartwarming human, community, or kindness story", "perspective": "useful advice, insight, or framing the reader can apply", } DEFAULT_TOPIC = "science" DEFAULT_FLAVOR = "discovery" def coerce_topic(value: object) -> str: text = str(value or "").strip().lower() return text if text in TOPICS else DEFAULT_TOPIC def coerce_flavor(value: object) -> str: text = str(value or "").strip().lower() return text if text in FLAVORS else DEFAULT_FLAVOR def coerce_tags(value: object, max_tags: int = MAX_TAGS) -> list[str]: """Validate a model-supplied tag list against the controlled vocabulary.""" if not isinstance(value, list): return [] out: list[str] = [] for item in value: tag = str(item).strip().lower() if tag in ALLOWED_TAGS and tag not in out: out.append(tag) if len(out) >= max_tags: break return out def tags_prompt_block() -> str: return "\n".join(f"- {family}: {', '.join(d['tags'])}" for family, d in FAMILIES.items()) def _bullet_list(mapping: dict[str, str]) -> str: return "\n".join(f"- {key}: {desc}" for key, desc in mapping.items()) def topics_prompt_block() -> str: return _bullet_list(TOPICS) def flavors_prompt_block() -> str: return _bullet_list(FLAVORS)