feat(right-wing): dual-scoring extremity/sentiment + derived categories

Extremity Scorer (U4 enhanced): - Now scores BOTH original motion text AND layman explanation separately - Schema: text_score, text_explanation, layman_score, layman_explanation - Text scores: 1→7, 2→33, 3→5, 4→5 (mild-to-moderate) - Layman scores: 1→12, 2→20, 3→17, 4→1 (slightly milder) Sentiment Analysis (U5 enhanced): - Now scores BOTH original motion text AND layman explanation separately - Schema: text_score, text_explanation, layman_score, layman_explanation - Text sentiment avg: 0.294 (slightly positive) - Layman sentiment avg: 0.416 (more positive - summaries tone down hostility) Category Derivation (new): - Two-phase LLM approach: derive taxonomy from sample, then apply to all - Discovered 7 categories from 30-motion sample: veiligheid/justitie, corona/pandemie, economie/belasting, klimaat/milieu, defensie/buitenland, asiel/vreemdelingen, overig - Applied to 50 motions with distribution shown in DB - Adds category + category_explanation columns to right_wing_motions
1 month ago · fbf92c82cf
parent f94edc3d04
commit fbf92c82cf
3 changed files with 540 additions and 84 deletions
--- a/analysis/right_wing/derive_categories.py
+++ b/analysis/right_wing/derive_categories.py
@ -0,0 +1,347 @@
+#!/usr/bin/env python3
+"""Derive policy categories for right-wing motions using LLM.
+
+Two-phase approach:
+  1. Derive taxonomy from a sample (discover categories from data)
+  2. Apply categories to all motions using the derived taxonomy
+
+Usage:
+    uv run python analysis/right_wing/derive_categories.py --derive-sample 30 --apply-sample 50
+    uv run python analysis/right_wing/derive_categories.py --derive-sample 30 --apply-sample -1
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import re
+import sys
+from collections import Counter
+from pathlib import Path
+from typing import Any
+
+import duckdb
+
+ROOT = Path(__file__).parent.parent.parent.resolve()
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+from ai_provider import ProviderError, chat_completion_json_parallel
+from analysis.config import config
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+logger = logging.getLogger(__name__)
+
+# Phase 1: open-ended schema to discover categories
+DERIVE_SCHEMA = {
+    "name": "derive_category",
+    "strict": True,
+    "schema": {
+        "type": "object",
+        "properties": {
+            "category": {
+                "type": "string",
+                "description": "Policy domain/category in Dutch. Use short lowercase labels like 'asiel', 'klimaat', 'corona', 'lhbtq', 'veiligheid', 'defensie', 'economie', 'landbouw', 'zorg', 'onderwijs', 'overig'",
+            },
+            "explanation": {
+                "type": "string",
+                "description": "Very short explanation why this category fits",
+            },
+        },
+        "required": ["category", "explanation"],
+        "additionalProperties": False,
+    },
+}
+
+# Phase 2: constrained schema using the derived taxonomy
+APPLY_SCHEMA_TEMPLATE = {
+    "name": "apply_category",
+    "strict": True,
+    "schema": {
+        "type": "object",
+        "properties": {
+            "category": {
+                "type": "string",
+                "description": "Category must be one of: {categories}",
+                "enum": [],  # filled dynamically
+            },
+            "explanation": {
+                "type": "string",
+                "description": "Very short explanation why this category fits",
+            },
+        },
+        "required": ["category", "explanation"],
+        "additionalProperties": False,
+    },
+}
+
+PROMPT_TEMPLATE = """Welk beleidsdomein hoort bij de volgende motie uit het Nederlandse parlement?
+
+Titel: {title}
+
+Tekst: {text}
+
+Leg uit in 1 zin waarom dit beleidsdomem past."""
+
+
+def _build_prompt(title: str, body_text: str | None) -> str:
+    text = body_text or title or ""
+    if len(text) > 600:
+        text = text[:600] + "..."
+    return PROMPT_TEMPLATE.format(title=title or "", text=text)
+
+
+def _normalize_category(raw: str) -> str:
+    """Normalize LLM category output to consistent labels."""
+    raw = raw.lower().strip()
+    # Map common variants
+    mapping = {
+        "asiel": "asiel/vreemdelingen",
+        "vreemdelingen": "asiel/vreemdelingen",
+        "immigratie": "asiel/vreemdelingen",
+        "migratie": "asiel/vreemdelingen",
+        "klimaat": "klimaat/milieu",
+        "milieu": "klimaat/milieu",
+        "stikstof": "klimaat/milieu",
+        "corona": "corona/pandemie",
+        "pandemie": "corona/pandemie",
+        "covid": "corona/pandemie",
+        "lhbtq": "lhbtq/rechten",
+        "lhbti": "lhbtq/rechten",
+        "lgbt": "lhbtq/rechten",
+        "veiligheid": "veiligheid/justitie",
+        "justitie": "veiligheid/justitie",
+        "strafrecht": "veiligheid/justitie",
+        "defensie": "defensie/buitenland",
+        "buitenland": "defensie/buitenland",
+        "buitenlandse zaken": "defensie/buitenland",
+        "economie": "economie/belasting",
+        "belasting": "economie/belasting",
+        "financiën": "economie/belasting",
+        "landbouw": "landbouw/stikstof",
+        "boeren": "landbouw/stikstof",
+        "zorg": "zorg/gezondheid",
+        "gezondheid": "zorg/gezondheid",
+        "onderwijs": "onderwijs/cultuur",
+        "cultuur": "onderwijs/cultuur",
+        "energie": "energie",
+        "kernenergie": "energie",
+        "sociaal": "sociaal/jeugd",
+        "jeugd": "sociaal/jeugd",
+        "wonen": "wonen/ruimtelijk",
+        "ruimtelijk": "wonen/ruimtelijk",
+        "verkeer": "verkeer/infrastructuur",
+        "infrastructuur": "verkeer/infrastructuur",
+    }
+    return mapping.get(raw, raw)
+
+
+def derive_taxonomy(
+    db_path: str = "data/motions.db",
+    derive_sample: int = 30,
+    batch_size: int = 10,
+) -> list[str]:
+    """Phase 1: derive category taxonomy from a sample of motions."""
+    db = Path(db_path)
+    con = duckdb.connect(str(db))
+    try:
+        rows = con.execute(
+            f"""
+            SELECT r.motion_id, m.title, m.body_text
+            FROM right_wing_motions r
+            JOIN motions m ON r.motion_id = m.id
+            WHERE r.classified = TRUE
+            ORDER BY RANDOM()
+            LIMIT {derive_sample}
+            """
+        ).fetchall()
+
+        logger.info("Phase 1: deriving taxonomy from %d motions...", len(rows))
+
+        categories = []
+        for i in range(0, len(rows), batch_size):
+            batch = rows[i : i + batch_size]
+            motion_ids = [r[0] for r in batch]
+            titles = [r[1] for r in batch]
+            texts = [r[2] for r in batch]
+
+            message_batches = []
+            for title, text in zip(titles, texts):
+                prompt = _build_prompt(title, text)
+                message_batches.append([{"role": "user", "content": prompt}])
+
+            try:
+                results = chat_completion_json_parallel(
+                    message_batches,
+                    model=config.QWEN_MODEL,
+                    json_schema=DERIVE_SCHEMA,
+                    max_workers=5,
+                )
+            except ProviderError as exc:
+                logger.error("Batch failed: %s", exc)
+                continue
+
+            for res in results:
+                if isinstance(res, dict):
+                    cat = res.get("category", "overig")
+                    categories.append(_normalize_category(cat))
+
+        # Count and threshold
+        counts = Counter(categories)
+        logger.info("Raw category counts: %s", dict(counts.most_common()))
+
+        # Keep categories with >= 2 occurrences, plus always keep 'overig'
+        taxonomy = [cat for cat, cnt in counts.most_common() if cnt >= 2]
+        if "overig" not in taxonomy:
+            taxonomy.append("overig")
+
+        logger.info("Derived taxonomy (%d categories): %s", len(taxonomy), taxonomy)
+        return taxonomy
+    finally:
+        con.close()
+
+
+def apply_categories(
+    db_path: str = "data/motions.db",
+    taxonomy: list[str] | None = None,
+    apply_sample: int = 50,
+    batch_size: int = 10,
+) -> dict[str, Any]:
+    """Phase 2: apply derived taxonomy to all motions."""
+    db = Path(db_path)
+    con = duckdb.connect(str(db))
+    try:
+        if taxonomy is None:
+            # Try to load from previous run or use default
+            taxonomy = [
+                "asiel/vreemdelingen",
+                "klimaat/milieu",
+                "corona/pandemie",
+                "lhbtq/rechten",
+                "veiligheid/justitie",
+                "defensie/buitenland",
+                "economie/belasting",
+                "landbouw/stikstof",
+                "zorg/gezondheid",
+                "onderwijs/cultuur",
+                "energie",
+                "sociaal/jeugd",
+                "overig",
+            ]
+
+        # Build schema with enum
+        schema = json.loads(json.dumps(APPLY_SCHEMA_TEMPLATE))
+        schema["schema"]["properties"]["category"]["enum"] = taxonomy
+        schema["schema"]["properties"]["category"][
+            "description"
+        ] = f"Category must be one of: {', '.join(taxonomy)}"
+
+        limit_clause = "" if apply_sample < 0 else f"LIMIT {apply_sample}"
+        rows = con.execute(
+            f"""
+            SELECT r.motion_id, m.title, m.body_text
+            FROM right_wing_motions r
+            JOIN motions m ON r.motion_id = m.id
+            WHERE r.classified = TRUE
+            ORDER BY RANDOM()
+            {limit_clause}
+            """
+        ).fetchall()
+
+        logger.info("Phase 2: applying %d categories to %d motions...", len(taxonomy), len(rows))
+
+        # Add category column if missing
+        cols = {c[1] for c in con.execute("PRAGMA table_info(right_wing_motions)").fetchall()}
+        if "category" not in cols:
+            con.execute("ALTER TABLE right_wing_motions ADD COLUMN category VARCHAR")
+        if "category_explanation" not in cols:
+            con.execute("ALTER TABLE right_wing_motions ADD COLUMN category_explanation VARCHAR")
+
+        scored = 0
+        failed = 0
+        category_counts: Counter[str] = Counter()
+
+        for i in range(0, len(rows), batch_size):
+            batch = rows[i : i + batch_size]
+            motion_ids = [r[0] for r in batch]
+            titles = [r[1] for r in batch]
+            texts = [r[2] for r in batch]
+
+            message_batches = []
+            for title, text in zip(titles, texts):
+                prompt = _build_prompt(title, text)
+                message_batches.append([{"role": "user", "content": prompt}])
+
+            try:
+                results = chat_completion_json_parallel(
+                    message_batches,
+                    model=config.QWEN_MODEL,
+                    json_schema=schema,
+                    max_workers=5,
+                )
+            except ProviderError as exc:
+                logger.error("Batch failed: %s", exc)
+                failed += len(batch)
+                continue
+
+            for mid, res in zip(motion_ids, results):
+                if isinstance(res, dict) and res.get("category") in taxonomy:
+                    cat = res["category"]
+                    expl = res.get("explanation", "")
+                else:
+                    cat = "overig"
+                    expl = f"invalid response: {res}" if not isinstance(res, dict) else "unknown"
+                    failed += 1
+                    continue
+
+                con.execute(
+                    "UPDATE right_wing_motions SET category = ?, category_explanation = ? WHERE motion_id = ?",
+                    (cat, expl, mid),
+                )
+                category_counts[cat] += 1
+                scored += 1
+
+        con.commit()
+
+        logger.info("Applied categories to %d motions, %d failures", scored, failed)
+        return {
+            "scored": scored,
+            "failed": failed,
+            "taxonomy": taxonomy,
+            "category_distribution": dict(category_counts.most_common()),
+        }
+    finally:
+        con.close()
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Derive and apply policy categories")
+    parser.add_argument("--db", default="data/motions.db")
+    parser.add_argument("--derive-sample", type=int, default=30, help="Sample size for taxonomy derivation")
+    parser.add_argument("--apply-sample", type=int, default=50, help="Sample size for category application (-1 for all)")
+    parser.add_argument("--batch-size", type=int, default=10)
+    parser.add_argument("--skip-derive", action="store_true", help="Skip derivation, use default taxonomy")
+    args = parser.parse_args()
+
+    if args.skip_derive:
+        taxonomy = None
+    else:
+        taxonomy = derive_taxonomy(
+            db_path=args.db,
+            derive_sample=args.derive_sample,
+            batch_size=args.batch_size,
+        )
+
+    result = apply_categories(
+        db_path=args.db,
+        taxonomy=taxonomy,
+        apply_sample=args.apply_sample,
+        batch_size=args.batch_size,
+    )
+    print(json.dumps(result, indent=2))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/analysis/right_wing/extremity_scorer.py
+++ b/analysis/right_wing/extremity_scorer.py
@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 """Policy extremity scorer: LLM-based radicalism scoring for right-wing motions.

+Scores BOTH the original motion text and the layman explanation separately.
+
 Usage:
    uv run python analysis/right_wing/extremity_scorer.py --sample 50
    uv run python analysis/right_wing/extremity_scorer.py --sample -1  # all motions
@ -11,7 +13,6 @@ from __future__ import annotations
 import argparse
 import json
 import logging
-import os
 import sys
 from pathlib import Path
 from typing import Any
@ -28,51 +29,70 @@ from analysis.config import config
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 logger = logging.getLogger(__name__)

-# JSON schema enforcing the expected response shape
 EXTREMITY_SCHEMA = {
    "name": "extremity_score",
    "strict": True,
    "schema": {
        "type": "object",
        "properties": {
-            "score": {
+            "text_score": {
+                "type": "integer",
+                "description": "Radicalism of the original motion text (1=mild to 5=extreme)",
+                "minimum": 1,
+                "maximum": 5,
+            },
+            "text_explanation": {
+                "type": "string",
+                "description": "Why the motion text got this score (Dutch)",
+            },
+            "layman_score": {
                "type": "integer",
-                "description": "Radicalism score from 1 (mild/technical) to 5 (extreme/fundamental)",
+                "description": "Radicalism of the layman explanation (1=mild to 5=extreme)",
                "minimum": 1,
                "maximum": 5,
            },
-            "explanation": {
+            "layman_explanation": {
                "type": "string",
-                "description": "Short explanation in Dutch of why this score was given",
+                "description": "Why the layman explanation got this score (Dutch)",
            },
        },
-        "required": ["score", "explanation"],
+        "required": ["text_score", "text_explanation", "layman_score", "layman_explanation"],
        "additionalProperties": False,
    },
 }

-PROMPT_TEMPLATE = """Dit is een motie in het Nederlandse parlement.
+PROMPT_TEMPLATE = """Beoordeel de radicalisme van de volgende motie op twee manieren:

+1) Het ORIGINELE motietekst:
 Titel: {title}
-
 Tekst: {text}

-Wat vraagt deze motie concreet? Beoordeel hoe radicaal dit voorstel is op een schaal van 1 (mild/technisch) tot 5 (extreem/fundamenteel). Geef alleen het cijfer en een korte verklaring in het Nederlands."""
-
+2) De VEREENVOUDIGDE uitleg:
+{layman}

-def _build_prompt(title: str, body_text: str | None) -> str:
-    text = body_text or title or ""
-    # Truncate body_text to keep prompt size reasonable
-    if len(text) > 800:
-        text = text[:800] + "..."
-    return PROMPT_TEMPLATE.format(title=title or "", text=text)
+Geef voor ELKE versie een score van 1 (mild/technisch) tot 5 (extreem/fundamenteel) plus een korte verklaring in het Nederlands."""


-def _score_batch(motion_ids: list[int], titles: list[str], texts: list[str | None]) -> list[dict[str, Any]]:
+def _build_prompt(title: str, body_text: str | None, layman: str | None) -> str:
+    text = body_text or title or ""
+    if len(text) > 500:
+        text = text[:500] + "..."
+    layman = layman or "(geen vereenvoudigde uitleg beschikbaar)"
+    if len(layman) > 400:
+        layman = layman[:400] + "..."
+    return PROMPT_TEMPLATE.format(title=title or "", text=text, layman=layman)
+
+
+def _score_batch(
+    motion_ids: list[int],
+    titles: list[str],
+    texts: list[str | None],
+    laymen: list[str | None],
+) -> list[dict[str, Any]]:
    """Score a batch of motions in parallel via LLM."""
    message_batches = []
-    for title, text in zip(titles, texts):
-        prompt = _build_prompt(title, text)
+    for title, text, layman in zip(titles, texts, laymen):
+        prompt = _build_prompt(title, text, layman)
        message_batches.append([{"role": "user", "content": prompt}])

    try:
@ -84,20 +104,44 @@ def _score_batch(motion_ids: list[int], titles: list[str], texts: list[str | Non
        )
    except ProviderError as exc:
        logger.error("Batch API call failed: %s", exc)
-        return [{"score": None, "explanation": None, "error": str(exc)}] * len(motion_ids)
+        return [{
+            "text_score": None, "text_explanation": None,
+            "layman_score": None, "layman_explanation": None,
+            "error": str(exc),
+        }] * len(motion_ids)

-    # Validate each result
    validated = []
    for res in results:
        if not isinstance(res, dict):
-            validated.append({"score": None, "explanation": None, "error": "non-dict response"})
+            validated.append({
+                "text_score": None, "text_explanation": None,
+                "layman_score": None, "layman_explanation": None,
+                "error": "non-dict response",
+            })
+            continue
+        ts = res.get("text_score")
+        te = res.get("text_explanation")
+        ls = res.get("layman_score")
+        le = res.get("layman_explanation")
+        if not isinstance(ts, int) or ts < 1 or ts > 5:
+            validated.append({
+                "text_score": None, "text_explanation": None,
+                "layman_score": None, "layman_explanation": None,
+                "error": f"invalid text_score: {ts}",
+            })
            continue
-        score = res.get("score")
-        explanation = res.get("explanation")
-        if not isinstance(score, int) or score < 1 or score > 5:
-            validated.append({"score": None, "explanation": None, "error": f"invalid score: {score}"})
+        if not isinstance(ls, int) or ls < 1 or ls > 5:
+            validated.append({
+                "text_score": None, "text_explanation": None,
+                "layman_score": None, "layman_explanation": None,
+                "error": f"invalid layman_score: {ls}",
+            })
            continue
-        validated.append({"score": score, "explanation": explanation, "error": None})
+        validated.append({
+            "text_score": ts, "text_explanation": te,
+            "layman_score": ls, "layman_explanation": le,
+            "error": None,
+        })
    return validated


@ -106,27 +150,21 @@ def score_motions(
    sample_size: int = 50,
    batch_size: int = 10,
 ) -> dict[str, Any]:
-    """Score right-wing motions and store results.
-
-    Args:
-        sample_size: Number of motions to score. -1 = all classified motions.
-    """
+    """Score right-wing motions and store results."""
    db = Path(db_path)
    if not db.exists():
        raise FileNotFoundError(f"Database not found: {db}")

    con = duckdb.connect(str(db))
    try:
-        # Ensure tables exist
        tables = {t[0] for t in con.execute("SHOW TABLES").fetchall()}
        if "right_wing_motions" not in tables:
            raise RuntimeError("Run classify_motions.py first.")

-        # Load classified motions
        limit_clause = "" if sample_size < 0 else f"LIMIT {sample_size}"
        rows = con.execute(
            f"""
-            SELECT r.motion_id, m.title, m.body_text
+            SELECT r.motion_id, m.title, m.body_text, m.layman_explanation
            FROM right_wing_motions r
            JOIN motions m ON r.motion_id = m.id
            WHERE r.classified = TRUE
@ -141,14 +179,15 @@ def score_motions(

        logger.info("Scoring %d motions in batches of %d...", len(rows), batch_size)

-        # Create output table
        con.execute("DROP TABLE IF EXISTS extremity_scores")
        con.execute(
            """
            CREATE TABLE extremity_scores (
                motion_id INTEGER PRIMARY KEY,
-                score INTEGER,
-                explanation VARCHAR,
+                text_score INTEGER,
+                text_explanation VARCHAR,
+                layman_score INTEGER,
+                layman_explanation VARCHAR,
                error VARCHAR
            )
            """
@ -162,32 +201,44 @@ def score_motions(
            motion_ids = [r[0] for r in batch]
            titles = [r[1] for r in batch]
            texts = [r[2] for r in batch]
+            laymen = [r[3] for r in batch]

            logger.info("Batch %d/%d (%d motions)", i // batch_size + 1, (len(rows) - 1) // batch_size + 1, len(batch))
-            results = _score_batch(motion_ids, titles, texts)
+            results = _score_batch(motion_ids, titles, texts, laymen)

            for mid, res in zip(motion_ids, results):
                con.execute(
-                    "INSERT INTO extremity_scores (motion_id, score, explanation, error) VALUES (?, ?, ?, ?)",
-                    (mid, res.get("score"), res.get("explanation"), res.get("error")),
+                    """
+                    INSERT INTO extremity_scores
+                    (motion_id, text_score, text_explanation, layman_score, layman_explanation, error)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        mid,
+                        res.get("text_score"),
+                        res.get("text_explanation"),
+                        res.get("layman_score"),
+                        res.get("layman_explanation"),
+                        res.get("error"),
+                    ),
                )
-                if res.get("score") is not None:
+                if res.get("error") is None:
                    scored += 1
                else:
                    failed += 1

        con.commit()

-        # Update yearly summary with average extremity
+        # Update yearly summary with average extremity (using text_score as primary)
        con.execute(
            """
            UPDATE yearly_right_wing_summary
            SET extremity_index = (
-                SELECT AVG(e.score)
+                SELECT AVG(e.text_score)
                FROM extremity_scores e
                JOIN right_wing_motions r ON e.motion_id = r.motion_id
                WHERE r.year = yearly_right_wing_summary.year
-                  AND e.score IS NOT NULL
+                  AND e.text_score IS NOT NULL
            )
            """
        )
--- a/analysis/right_wing/sentiment_analysis.py
+++ b/analysis/right_wing/sentiment_analysis.py
@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 """Sentiment analysis pipeline: Dutch sentiment scoring for right-wing motions.

-Uses LLM batch calls (fallback when no local Dutch sentiment model is available).
-Maps outputs to [-1, 1] scale where negative = hostile/aggressive, positive = constructive.
+Scores BOTH the original motion text and the layman explanation separately.
+Uses LLM batch calls. Maps outputs to [-1, 1] scale.

 Usage:
    uv run python analysis/right_wing/sentiment_analysis.py --sample 50
@ -36,43 +36,64 @@ SENTIMENT_SCHEMA = {
    "schema": {
        "type": "object",
        "properties": {
-            "score": {
+            "text_score": {
                "type": "number",
-                "description": "Sentiment score from -1 (very negative/hostile) to 1 (very positive/constructive)",
+                "description": "Sentiment of original motion text from -1 (hostile) to 1 (constructive)",
                "minimum": -1,
                "maximum": 1,
            },
-            "explanation": {
+            "text_explanation": {
                "type": "string",
-                "description": "Short explanation in Dutch of why this sentiment was given",
+                "description": "Why the motion text got this score (Dutch)",
+            },
+            "layman_score": {
+                "type": "number",
+                "description": "Sentiment of layman explanation from -1 (hostile) to 1 (constructive)",
+                "minimum": -1,
+                "maximum": 1,
+            },
+            "layman_explanation": {
+                "type": "string",
+                "description": "Why the layman explanation got this score (Dutch)",
            },
        },
-        "required": ["score", "explanation"],
+        "required": ["text_score", "text_explanation", "layman_score", "layman_explanation"],
        "additionalProperties": False,
    },
 }

-PROMPT_TEMPLATE = """Beoordeel de sentiment van de volgende motie uit het Nederlandse parlement.
+PROMPT_TEMPLATE = """Beoordeel de sentiment van de volgende motie op twee manieren:

+1) Het ORIGINELE motietekst:
 Titel: {title}
-
 Tekst: {text}

-Geef een sentiment score van -1 (zeer negatief, agressief, vijandig) tot 1 (zeer positief, constructief, coöperatief). Geef ook een korte verklaring in het Nederlands."""
+2) De VEREENVOUDIGDE uitleg:
+{layman}
+
+Geef voor ELKE versie een sentiment score van -1 (zeer negatief, agressief, vijandig) tot 1 (zeer positief, constructief, coöperatief) plus een korte verklaring in het Nederlands."""


-def _build_prompt(title: str, body_text: str | None) -> str:
+def _build_prompt(title: str, body_text: str | None, layman: str | None) -> str:
    text = body_text or title or ""
    if len(text) > 400:
        text = text[:400] + "..."
-    return PROMPT_TEMPLATE.format(title=title or "", text=text)
-
-
-def _score_batch(motion_ids: list[int], titles: list[str], texts: list[str | None]) -> list[dict[str, Any]]:
+    layman = layman or "(geen vereenvoudigde uitleg beschikbaar)"
+    if len(layman) > 300:
+        layman = layman[:300] + "..."
+    return PROMPT_TEMPLATE.format(title=title or "", text=text, layman=layman)
+
+
+def _score_batch(
+    motion_ids: list[int],
+    titles: list[str],
+    texts: list[str | None],
+    laymen: list[str | None],
+) -> list[dict[str, Any]]:
    """Score sentiment for a batch of motions in parallel via LLM."""
    message_batches = []
-    for title, text in zip(titles, texts):
-        prompt = _build_prompt(title, text)
+    for title, text, layman in zip(titles, texts, laymen):
+        prompt = _build_prompt(title, text, layman)
        message_batches.append([{"role": "user", "content": prompt}])

    try:
@ -84,19 +105,44 @@ def _score_batch(motion_ids: list[int], titles: list[str], texts: list[str | Non
        )
    except ProviderError as exc:
        logger.error("Batch API call failed: %s", exc)
-        return [{"score": None, "explanation": None, "error": str(exc)}] * len(motion_ids)
+        return [{
+            "text_score": None, "text_explanation": None,
+            "layman_score": None, "layman_explanation": None,
+            "error": str(exc),
+        }] * len(motion_ids)

    validated = []
    for res in results:
        if not isinstance(res, dict):
-            validated.append({"score": None, "explanation": None, "error": "non-dict response"})
+            validated.append({
+                "text_score": None, "text_explanation": None,
+                "layman_score": None, "layman_explanation": None,
+                "error": "non-dict response",
+            })
+            continue
+        ts = res.get("text_score")
+        te = res.get("text_explanation")
+        ls = res.get("layman_score")
+        le = res.get("layman_explanation")
+        if not isinstance(ts, (int, float)) or ts < -1 or ts > 1:
+            validated.append({
+                "text_score": None, "text_explanation": None,
+                "layman_score": None, "layman_explanation": None,
+                "error": f"invalid text_score: {ts}",
+            })
            continue
-        score = res.get("score")
-        explanation = res.get("explanation")
-        if not isinstance(score, (int, float)) or score < -1 or score > 1:
-            validated.append({"score": None, "explanation": None, "error": f"invalid score: {score}"})
+        if not isinstance(ls, (int, float)) or ls < -1 or ls > 1:
+            validated.append({
+                "text_score": None, "text_explanation": None,
+                "layman_score": None, "layman_explanation": None,
+                "error": f"invalid layman_score: {ls}",
+            })
            continue
-        validated.append({"score": float(score), "explanation": explanation, "error": None})
+        validated.append({
+            "text_score": float(ts), "text_explanation": te,
+            "layman_score": float(ls), "layman_explanation": le,
+            "error": None,
+        })
    return validated


@ -119,7 +165,7 @@ def analyze_sentiment(
        limit_clause = "" if sample_size < 0 else f"LIMIT {sample_size}"
        rows = con.execute(
            f"""
-            SELECT r.motion_id, r.year, m.title, m.body_text
+            SELECT r.motion_id, r.year, m.title, m.body_text, m.layman_explanation
            FROM right_wing_motions r
            JOIN motions m ON r.motion_id = m.id
            WHERE r.classified = TRUE
@ -140,8 +186,10 @@ def analyze_sentiment(
            CREATE TABLE sentiment_scores (
                motion_id INTEGER PRIMARY KEY,
                year INTEGER,
-                score DOUBLE,
-                explanation VARCHAR,
+                text_score DOUBLE,
+                text_explanation VARCHAR,
+                layman_score DOUBLE,
+                layman_explanation VARCHAR,
                error VARCHAR
            )
            """
@ -156,16 +204,26 @@ def analyze_sentiment(
            years = [r[1] for r in batch]
            titles = [r[2] for r in batch]
            texts = [r[3] for r in batch]
+            laymen = [r[4] for r in batch]

            logger.info("Batch %d/%d (%d motions)", i // batch_size + 1, (len(rows) - 1) // batch_size + 1, len(batch))
-            results = _score_batch(motion_ids, titles, texts)
+            results = _score_batch(motion_ids, titles, texts, laymen)

            for mid, year, res in zip(motion_ids, years, results):
                con.execute(
-                    "INSERT INTO sentiment_scores (motion_id, year, score, explanation, error) VALUES (?, ?, ?, ?, ?)",
-                    (mid, year, res.get("score"), res.get("explanation"), res.get("error")),
+                    """
+                    INSERT INTO sentiment_scores
+                    (motion_id, year, text_score, text_explanation, layman_score, layman_explanation, error)
+                    VALUES (?, ?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        mid, year,
+                        res.get("text_score"), res.get("text_explanation"),
+                        res.get("layman_score"), res.get("layman_explanation"),
+                        res.get("error"),
+                    ),
                )
-                if res.get("score") is not None:
+                if res.get("error") is None:
                    scored += 1
                else:
                    failed += 1
@ -173,7 +231,7 @@ def analyze_sentiment(
        con.commit()

        # Add sentiment columns to yearly summary if not present
-        cols = {c[0] for c in con.execute("PRAGMA table_info(yearly_right_wing_summary)").fetchall()}
+        cols = {c[1] for c in con.execute("PRAGMA table_info(yearly_right_wing_summary)").fetchall()}
        if "avg_sentiment" not in cols:
            con.execute("ALTER TABLE yearly_right_wing_summary ADD COLUMN avg_sentiment DOUBLE")
        if "sentiment_std" not in cols:
@ -185,22 +243,22 @@ def analyze_sentiment(
            """
            UPDATE yearly_right_wing_summary
            SET avg_sentiment = (
-                SELECT AVG(s.score)
+                SELECT AVG(s.text_score)
                FROM sentiment_scores s
                WHERE s.year = yearly_right_wing_summary.year
-                  AND s.score IS NOT NULL
+                  AND s.text_score IS NOT NULL
            ),
            sentiment_std = (
-                SELECT STDDEV(s.score)
+                SELECT STDDEV(s.text_score)
                FROM sentiment_scores s
                WHERE s.year = yearly_right_wing_summary.year
-                  AND s.score IS NOT NULL
+                  AND s.text_score IS NOT NULL
            ),
            pct_strongly_negative = (
-                SELECT COUNT(CASE WHEN s.score < -0.5 THEN 1 END) * 100.0 / NULLIF(COUNT(*), 0)
+                SELECT COUNT(CASE WHEN s.text_score < -0.5 THEN 1 END) * 100.0 / NULLIF(COUNT(*), 0)
                FROM sentiment_scores s
                WHERE s.year = yearly_right_wing_summary.year
-                  AND s.score IS NOT NULL
+                  AND s.text_score IS NOT NULL
            )
            """
        )