chore: simplify Overton scripts, update README, add stemwijzer.db to gitignore

- Extracted EXTREMITY_BUCKET_ORDER constant and _extremity_bucket() helper (4 duplications removed) - Merged two-pass query loop in compute_yearly_baseline into single pass - Removed unused import (mticker), dead code (year_titles_map), 12 obvious comments - Extracted _fmt_axis() helper in SVD drift script - Updated README analysis/ description to include right-wing motion analysis
4 weeks ago · 711a410df3
parent 7b5f97e177
commit 711a410df3
7 changed files with 693 additions and 85 deletions
--- a/.gitignore
+++ b/.gitignore
@ -33,3 +33,5 @@ thoughts/explorer/*_report.md

 # Compound Engineering local config
 .compound-engineering/*.local.yaml
+Backfill data
+stemwijzer.db
--- a/README.md
+++ b/README.md
@ -51,7 +51,7 @@ The app will be available at http://localhost:8501.
 ├── api_client.py       # Tweede Kamer OData API client
 ├── explorer.py         # Explorer page with SVD visualizations
 ├── pipeline/           # Data ingestion and analysis pipelines
-├── analysis/           # SVD, clustering, trajectory modules
+├── analysis/           # SVD, clustering, trajectory, right-wing motion analysis
 ├── tests/              # pytest test suite
 ├── docs/               # Documentation, research, and plans
 └── data/motions.db     # DuckDB database (~18 GB)
--- a/analysis/right_wing/overton_breakpoint_analysis.py
+++ b/analysis/right_wing/overton_breakpoint_analysis.py
@ -30,12 +30,6 @@ import numpy as np

 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-import matplotlib.ticker as mticker
-
-ROOT = Path(__file__).parent.parent.parent.resolve()
-if str(ROOT) not in sys.path:
-    sys.path.insert(0, str(ROOT))
-
 from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT, PARTY_COLOURS

 CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"})
@ -47,7 +41,20 @@ DB_PATH = str(ROOT / "data" / "motions.db")
 REPORTS_DIR = ROOT / "reports" / "overton_window"
 REPORTS_DIR.mkdir(parents=True, exist_ok=True)

-CANONICAL_CENTRIST_SET = set(CANONICAL_CENTRIST)  # nb: config defines as frozenset
+CANONICAL_CENTRIST_SET = set(CANONICAL_CENTRIST)
+
+EXTREMITY_BUCKET_ORDER = ["1-2 (mild)", "2-3 (moderate)", "3-4 (high)", "4-5 (extreme)"]
+
+def _extremity_bucket(score: float) -> str:
+    if score < 2:
+        return "1-2 (mild)"
+    elif score < 3:
+        return "2-3 (moderate)"
+    elif score < 4:
+        return "3-4 (high)"
+    else:
+        return "4-5 (extreme)"
+
 CANONICAL_LEFT_SET = set(CANONICAL_LEFT)
 CANONICAL_RIGHT_SET = set(CANONICAL_RIGHT)

@ -172,6 +179,7 @@ def compute_yearly_baseline(con: duckdb.DuckDBPyConnection) -> dict[int, dict]:
    """).fetchall()

    motion_party_votes: dict[int, dict[str, dict[str, int]]] = {}
+    motion_year_map: dict[int, int] = {}
    for mid, year, party, n, vote in centrist_rows:
        year = int(year)
        if year < YEAR_MIN or year > YEAR_MAX:
@ -179,12 +187,7 @@ def compute_yearly_baseline(con: duckdb.DuckDBPyConnection) -> dict[int, dict]:
        mv = motion_party_votes.setdefault(mid, {})
        pv = mv.setdefault(party, {"voor": 0, "tegen": 0, "afwezig": 0})
        pv[vote] = pv.get(vote, 0) + n
-
-    motion_year_map: dict[int, int] = {}
-    for mid, year, _, _, _ in centrist_rows:
-        year = int(year)
-        if YEAR_MIN <= year <= YEAR_MAX:
-            motion_year_map[mid] = year
+        motion_year_map[mid] = year

    for mid, votes in motion_party_votes.items():
        year = motion_year_map.get(mid)
@ -297,10 +300,6 @@ def compute_opposition_metrics(

    coalition = COALITION

-    year_titles_map: dict[int, list[int]] = {}
-    for year, d in yearly_raw.items():
-        year_titles_map[year] = list(range(len(d["titles"])))
-
    for year, d in yearly_raw.items():
        coal = coalition.get(year, set())
        for idx in range(len(d["titles"])):
@ -348,16 +347,9 @@ def compute_extremity_stratified(
    yearly_raw: dict[int, dict],
 ) -> dict[str, dict[str, list]]:
    """Compute centrist_support per extremity bucket, pre vs post 2024."""
-    buckets = {
-        "1-2 (mild)": [],
-        "2-3 (moderate)": [],
-        "3-4 (high)": [],
-        "4-5 (extreme)": [],
-    }
-
    pre_post: dict[str, dict[str, list]] = {
-        "pre-2024": {b: [] for b in buckets},
-        "post-2024": {b: [] for b in buckets},
+        "pre-2024": {b: [] for b in EXTREMITY_BUCKET_ORDER},
+        "post-2024": {b: [] for b in EXTREMITY_BUCKET_ORDER},
    }

    for year, d in yearly_raw.items():
@ -367,15 +359,7 @@ def compute_extremity_stratified(
            cs = d["centrist_support_strict"][idx]
            if np.isnan(ext) or cs is None or (isinstance(cs, float) and np.isnan(cs)):
                continue
-            if ext < 2:
-                b = "1-2 (mild)"
-            elif ext < 3:
-                b = "2-3 (moderate)"
-            elif ext < 4:
-                b = "3-4 (high)"
-            else:
-                b = "4-5 (extreme)"
-            pre_post[period][b].append(cs)
+            pre_post[period][_extremity_bucket(ext)].append(cs)

    return pre_post

@ -413,12 +397,7 @@ def yearly_summary(yearly: dict[int, dict]) -> dict[int, dict]:

 def sample_audit(yearly_raw: dict[int, dict]) -> list[dict]:
    """Stratified random sample: 5 motions per extremity bucket, 20 total."""
-    bucket_motions: dict[str, list[int]] = {
-        "1-2 (mild)": [],
-        "2-3 (moderate)": [],
-        "3-4 (high)": [],
-        "4-5 (extreme)": [],
-    }
+    bucket_motions: dict[str, list[int]] = {b: [] for b in EXTREMITY_BUCKET_ORDER}

    all_motions: list[dict] = []
    for year, d in yearly_raw.items():
@ -426,14 +405,7 @@ def sample_audit(yearly_raw: dict[int, dict]) -> list[dict]:
            ext = d["extremity"][idx]
            if np.isnan(ext):
                continue
-            if ext < 2:
-                b = "1-2 (mild)"
-            elif ext < 3:
-                b = "2-3 (moderate)"
-            elif ext < 4:
-                b = "3-4 (high)"
-            else:
-                b = "4-5 (extreme)"
+            b = _extremity_bucket(ext)
            bucket_motions[b].append(len(all_motions))
            all_motions.append({
                "year": year,
@ -565,7 +537,6 @@ def create_figure_2(

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

-    # Panel C: Mean extremity over time
    ax1.plot(years_arr, _vals(yearly_sum, "mean_extremity"),
             marker="o", color=colour_rw, linewidth=2, label="All right-wing", zorder=5)
    ax1.plot(years_arr, _vals(opp_sum, "mean_extremity"),
@ -587,8 +558,7 @@ def create_figure_2(
    ax1.set_xticks(years_arr)
    ax1.set_xticklabels([str(y) for y in years], rotation=45)

-    # Panel D: Extremity-stratified centrist support (grouped bars with IQR error bars)
-    bucket_order = ["1-2 (mild)", "2-3 (moderate)", "3-4 (high)", "4-5 (extreme)"]
+    bucket_order = EXTREMITY_BUCKET_ORDER
    bucket_labels = ["1-2\nmild", "2-3\nmoderate", "3-4\nhigh", "4-5\nextreme"]
    bucket_colours = ["#81C784", "#FFB74D", "#E57373", "#BA68C8"]

@ -669,7 +639,6 @@ def create_figure_3(
    means = np.array([left_yearly[y]["mean_left_support"] for y in years])
    ns = np.array([left_yearly[y]["n"] for y in years])

-    # Weighted all-years mean
    overall_mean = np.average(means, weights=ns) if ns.sum() > 0 else 0.0

    fig, ax = plt.subplots(figsize=(12, 6))
@ -722,11 +691,9 @@ def generate_report(
    def _val(summary, year, key):
        return summary[year].get(key, np.nan)

-    # Pre/post 2024 comparisons
    pre_years = [y for y in years if y < BREAK_YEAR]
    post_years = [y for y in years if y >= BREAK_YEAR]

-    # Pooled pre/post values for Cohen's d
    rw_pre_cs = []
    rw_post_cs = []
    rw_pre_ext = []
@ -773,7 +740,6 @@ def generate_report(
    d_opp_cs = cohens_d(np.array(opp_pre_cs), np.array(opp_post_cs)) if opp_pre_cs and opp_post_cs else float("nan")
    d_opp_ext = cohens_d(np.array(opp_pre_ext), np.array(opp_post_ext)) if opp_pre_ext and opp_post_ext else float("nan")

-    # Yearly summary table
    yearly_table = "| Year | N (RW) | Centrist Support (Strict) | Extremity | Right Support | Left Opp. |\n"
    yearly_table += "|------|--------|---------------------------|-----------|---------------|----------|\n"
    for y in years:
@ -788,8 +754,7 @@ def generate_report(
        lo_str = f"{lo:.3f}" if not np.isnan(lo) else "N/A"
        yearly_table += f"| {y} | {int(n)} | {cs_str} | {ext_str} | {rs_str} | {lo_str} |\n"

-    # Extremity-stratified table (centrist support)
-    bucket_order = ["1-2 (mild)", "2-3 (moderate)", "3-4 (high)", "4-5 (extreme)"]
+    bucket_order = EXTREMITY_BUCKET_ORDER
    ext_table = "| Bucket | Period | N | Mean CS | Median CS | P25 | P75 |\n"
    ext_table += "|--------|--------|---|---------|-----------|---|-----|\n"
    for b in bucket_order:
@ -815,7 +780,6 @@ def generate_report(
            f"{pt_p25:.3f} | {pt_p75:.3f} |\n"
        )

-    # Audit table
    audit_table = "| # | Year | Category | LLM Score | Bucket | Agreed? | Driver |\n"
    audit_table += "|---|------|----------|-----------|--------|---------|--------|\n"
    for i, m in enumerate(audit_sample, 1):
@ -901,7 +865,6 @@ def generate_report(
        "parties filed milder motions post-2024 and the 'shift' is illusory.",
    ]

-    # Section 6: Left support for right-wing motions
    left_years_sorted = sorted(left_yearly.keys())
    left_pre_years_list = [y for y in pre_years if y in left_yearly]
    left_post_years_list = [y for y in post_years if y in left_yearly]
--- a/analysis/right_wing/overton_svd_drift.py
+++ b/analysis/right_wing/overton_svd_drift.py
@ -63,6 +63,10 @@ def _party_in_set(party: str, canonical_set: frozenset) -> bool:
    return normalized != party and normalized in canonical_set


+def _fmt_axis(val: float | None) -> str:
+    return f"{val:.4f}" if val is not None else "N/A"
+
+
 def compute_aligned_centers(
    scores: Dict[str, List[List[float]]],
    windows: List[str],
@ -170,7 +174,6 @@ def compute_drift_metrics(
    angle_rad = np.arctan2(dy_net, dx_net)
    angle_deg = float(np.degrees(angle_rad))

-    # Right-wing net displacement for comparison
    right_net = None
    right_valid = [
        c for c in annual_centers if c["right_mean_axis1"] is not None
@ -186,7 +189,6 @@ def compute_drift_metrics(
            "net_dy": round(r_dy, 6),
        }

-    # Is centrist center drifting toward or away from right-wing center?
    approach_to_right = None
    if (
        first.get("right_mean_axis1") is not None
@ -262,7 +264,6 @@ def plot_trajectory(
        plt.close(fig)
        return

-    # Arrows between consecutive years
    for i in range(len(cent_a1_valid) - 1):
        ax.annotate(
            "",
@ -374,26 +375,10 @@ def write_report(
    )
    lines.append("|---|---|---|---|---|---|---|")
    for c in centers:
-        cent_a1 = (
-            f"{c['centrist_mean_axis1']:.4f}"
-            if c["centrist_mean_axis1"] is not None
-            else "N/A"
-        )
-        cent_a2 = (
-            f"{c['centrist_mean_axis2']:.4f}"
-            if c["centrist_mean_axis2"] is not None
-            else "N/A"
-        )
-        right_a1 = (
-            f"{c['right_mean_axis1']:.4f}"
-            if c["right_mean_axis1"] is not None
-            else "N/A"
-        )
-        right_a2 = (
-            f"{c['right_mean_axis2']:.4f}"
-            if c["right_mean_axis2"] is not None
-            else "N/A"
-        )
+        cent_a1 = _fmt_axis(c["centrist_mean_axis1"])
+        cent_a2 = _fmt_axis(c["centrist_mean_axis2"])
+        right_a1 = _fmt_axis(c["right_mean_axis1"])
+        right_a2 = _fmt_axis(c["right_mean_axis2"])
        cent_parties = ", ".join(c["centrist_parties_present"])
        right_parties = ", ".join(c["right_parties_present"])
        lines.append(
@ -403,7 +388,6 @@ def write_report(

    lines.append("")

-    # Drift metrics
    lines.append("## Drift Metrics (Annual Windows Only)\n")

    if drift.get("net_displacement") is not None:
--- a/docs/plans/2026-05-05-001-feat-right-wing-motion-analysis-plan.md
+++ b/docs/plans/2026-05-05-001-feat-right-wing-motion-analysis-plan.md
@ -0,0 +1,447 @@
+---
+title: Right-Wing Motion Analysis Over Time
+type: feat
+status: active
+date: 2026-05-05
+---
+
+# Right-Wing Motion Analysis Over Time
+
+## Summary
+
+Build a pipeline to identify, classify, and analyze right-wing motions across parliamentary history. Track how their volume, policy extremity, and cross-party support have evolved over time. Output includes a derived keyword taxonomy, a hybrid motion classifier, temporal aggregations, policy extremity scores, sentiment trajectories, and time-series visualizations.
+
+---
+
+## Problem Frame
+
+The user hypothesizes that while the **volume** of right-wing motions has remained stable, the motions have become **more extreme** in policy demands and that **centrist parties** increasingly vote in favor of them. This suggests an "overton window" shift that cannot be detected by simple vote-counting alone.
+
+The existing codebase has party-level ideological positioning (SVD/PCA) but lacks motion-level ideological scoring, keyword-based right-wing detection, or temporal analysis of how motion content has radicalized.
+
+---
+
+## Requirements
+
+- **R1.** Derive a right-wing keyword taxonomy from the motions themselves (not hand-curated)
+- **R2.** Build a hybrid classifier that identifies "actually right-wing" motions using both keywords and voting patterns
+- **R3.** Aggregate and analyze trends per year (volume, support, cross-party adoption)
+- **R4.** Score policy extremity (what the motion demands, not just sentiment)
+- **R5.** Track sentiment/emotional tone over time as a proxy for radicalization
+- **R6.** Produce time-series visualizations for exploration (not yet integrated into the Streamlit app)
+- **R7.** Validate classifier accuracy to avoid false positives (e.g., left-wing parties discussing migration neutrally)
+
+---
+
+## Scope Boundaries
+
+- **In scope:** Keyword derivation, motion classification, temporal aggregation, extremity scoring, sentiment analysis, static visualization output
+- **Out of scope:** Integration into the Streamlit explorer app (deferred to follow-up)
+- **Out of scope:** Real-time updates or pipeline automation
+- **Out of scope:** Predictive modeling (we describe trends, we do not forecast)
+
+### Deferred to Follow-Up Work
+
+- **Streamlit tab integration:** Add an interactive "Right-Wing Trends" tab to the explorer (`analysis/tabs/right_wing_trends.py`): separate PR after this plan is executed
+- **Motion-level ideological embedding:** Train or fine-tune a motion-specific embedding model to improve classification beyond keywords + votes
+
+---
+
+## Context & Research
+
+### Relevant Code and Patterns
+
+- **`analysis/config.py`** — Defines `CANONICAL_RIGHT = {"PVV", "FVD", "JA21", "SGP"}` and `CANONICAL_LEFT` — these sets are the ground-truth for right/left party identification
+- **`scripts/derive_svd_labels.py`** — Already uses TF-IDF on motion titles with Dutch stopwords; this pattern should be reused/extened for keyword derivation
+- **`scripts/motion_drift.py`** — Implements cross-ideological voting detection and semantic drift measurement; its `compute_party_voting()` logic is directly relevant for validating the hybrid classifier
+- **`analysis/axis_classifier.py`** — Contains `_classify_from_titles()` with Dutch keyword regexes for 4 ideological categories; not sufficient alone but a useful reference for post-processing
+- **`pipeline/text_pipeline.py`** — Generates text embeddings via API; can be reused if we need embeddings for the sentiment/extremity analysis
+- **`analysis/explorer_data.py`** — `load_motions_df()` loads the full motions table with year parsing; primary data access pattern
+- **`database.py`** / **`agent_tools/database.py`** — Provide DuckDB access to `motions` and `mp_votes` tables
+
+### Institutional Learnings
+
+- **`docs/solutions/best-practices/svd-labels-voting-patterns-not-semantics.md`** — Right-wing parties must appear on the RIGHT side of all axes; same principle applies here: classification must reflect voting behavior, not just semantic content
+
+### External References
+
+- Dutch parliamentary motion data is already present in DuckDB (`data/motions.db`)
+- `motion_drift.py` already uses Ridge/Lasso regression for axis stability; similar regression techniques can be applied to temporal trend fitting
+
+---
+
+## Key Technical Decisions
+
+- **Keyword derivation method:** TF-IDF on motion titles/body_text, restricted to motions where `CANONICAL_RIGHT` parties vote predominantly *voor*. This avoids hand-curating and captures the actual language used by right-wing parties.
+- **Hybrid classifier:** Two-stage: (1) keyword match for initial filtering, (2) voting-pattern confirmation requiring >60% support from right-wing parties AND <40% opposition from left-wing parties. This handles cases where left-wing parties also use migration keywords neutrally.
+- **Policy extremity:** Use an LLM (via `ai_provider.py` subagent pattern) to answer "What concrete policy change does this motion demand?" and rate the radicalism of that demand on a 1-5 scale. This captures policy substance, not emotional language.
+- **Sentiment analysis:** Use a lightweight Dutch sentiment model (e.g., `pysentimiento` or similar) rather than an LLM, for cost and speed. If no good Dutch model exists, fallback to LLM batch calls.
+- **Temporal granularity:** Annual buckets (`YYYY`), aligning with existing SVD window conventions (`2024`, `2025`, etc.)
+- **Visualization:** Static Plotly charts exported to HTML/PNG, not yet integrated into Streamlit. This decouples analysis from UI work.
+
+---
+
+## Open Questions
+
+### Resolved During Planning
+
+- **Q: Should we analyze titles only or full body_text?** 
+  - **A:** Start with titles (fast, already cleaned), validate on a sample using body_text, and upgrade if precision is insufficient.
+- **Q: How do we define "centrist parties" for cross-party support tracking?**
+  - **A:** Treat as the complement of `CANONICAL_RIGHT` and `CANONICAL_LEFT` within `KNOWN_MAJOR_PARTIES`: VVD, D66, CDA, NSC, BBB, CU.
+
+### Deferred to Implementation
+
+- **Q: What is the optimal TF-IDF threshold for keyword inclusion?** — Depends on corpus distribution; will be determined by inspecting the keyword ranked list.
+- **Q: Which Dutch sentiment model performs best on parliamentary language?** — Requires empirical testing; candidate models to be evaluated during U5.
+- **Q: Does policy extremity scoring need few-shot examples for consistency?** — Will be determined during U4 implementation; if LLM outputs are inconsistent, add exemplars.
+
+---
+
+## High-Level Technical Design
+
+> *This illustrates the intended approach and is directional guidance for review, not implementation specification. The implementing agent should treat it as context, not code to reproduce.*
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  PHASE 1: KEYWORD DERIVATION (U1)                           │
+│  - Filter motions where right-wing parties vote >60% voor   │
+│  - Run TF-IDF on titles/body_text                           │
+│  - Extract top-N distinctive terms                          │
+└─────────────────────────────────────────────────────────────┘
+                              │
+                              ▼
+┌─────────────────────────────────────────────────────────────┐
+│  PHASE 2: HYBRID CLASSIFICATION (U2)                        │
+│  - Keyword filter: motion contains any top-N term           │
+│  - Voting filter: right >60% voor AND left <40% tegen       │
+│  - Output: right_wing_motions table with year, score        │
+└─────────────────────────────────────────────────────────────┘
+                              │
+                              ▼
+┌─────────────────────────────────────────────────────────────┐
+│  PHASE 3: TEMPORAL AGGREGATION (U3)                         │
+│  - Group by year                                            │
+│  - Compute: count, % of total motions, avg support          │
+│  - Track: centrist party support over time                  │
+│  - Output: yearly_summary DataFrame                         │
+└─────────────────────────────────────────────────────────────┘
+                              │
+                              ▼
+┌─────────────────────────────────────────────────────────────┐
+│  PHASE 4: POLICY EXTREMITY (U4)                             │
+│  - Sample motions per year (stratified by keyword density)  │
+│  - LLM prompt: "What concrete policy does this demand?"     │
+│  - Rate radicalism 1-5                                      │
+│  - Output: extremity_scores table                           │
+└─────────────────────────────────────────────────────────────┘
+                              │
+                              ▼
+┌─────────────────────────────────────────────────────────────┐
+│  PHASE 5: SENTIMENT ANALYSIS (U5)                         │
+│  - Dutch sentiment model on motion titles/body_text         │
+│  - Aggregate: avg sentiment per year                        │
+│  - Output: sentiment_by_year DataFrame                      │
+└─────────────────────────────────────────────────────────────┘
+                              │
+                              ▼
+┌─────────────────────────────────────────────────────────────┐
+│  PHASE 6: VISUALIZATION (U6)                                │
+│  - Time-series: volume, extremity, sentiment, centrist vote │
+│  - Export: static HTML/PNG charts                           │
+└─────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Execution Strategy: Agent Assignments & Parallelization
+
+| Unit | Agents | Can Parallelize With | Notes |
+|------|--------|---------------------|-------|
+| U1 | `general` (text pipeline), `ce-best-practices-researcher` (keyword extraction patterns) | — | Foundation unit; must complete before all downstream work. DuckDB batch aggregation is I/O-bound and fast; no subagents needed. |
+| U2 | `ce-best-practices-researcher` (TF-IDF best practices, Dutch NLP), `general` (implementation) | U3 (after U1) | Keyword derivation is research-heavy; the researcher can pre-fetch Dutch political TF-IDF patterns while `general` implements the aggregation. Once U1 data is ready, keyword extraction can run in parallel with U3's vote-pattern analysis. |
+| U3 | `general` (DuckDB SQL + Python) | U2 (after U1) | Pure computation; no external research needed. Runs in parallel with U2 once U1 is done because U2 and U3 read the same `motion_rightness` table but produce independent features. |
+| U4 | `general` (scikit-learn pipeline), `ce-framework-docs-researcher` (if exploring Dutch sentiment models) | — | Depends on U2 + U3. The LLM-based extremity scorer can be delegated to `general` in batch mode; no real-time LLM calls at inference time. |
+| U5 | `ce-best-practices-researcher` (time-series visualization patterns), `general` (Plotly implementation) | — | Depends on U4. Visualization is straightforward once data shape is known; researcher mainly validates responsive/mobile chart patterns. |
+| U6 | `general` (integration), `ce-test-browser` (if applicable) | — | Depends on U5. Integration work is sequential by nature; browser tests validate the Streamlit tab render. |
+
+**Parallelization summary:**
+- **U1** is strictly sequential (foundation).
+- **U2 + U3** can run in parallel once U1 completes.
+- **U4** is sequential after U2+U3.
+- **U5 + U6** are sequential after U4.
+- Total wall-clock phases: **4** (U1 → U2∥U3 → U4 → U5 → U6).
+
+---
+
+## Implementation Units
+
+- **U1. Keyword Derivation**
+
+  **Goal:** Extract the most distinctive terms used in right-wing motions without hand-curating.
+
+  **Requirements:** R1
+
+  **Dependencies:** None
+
+  **Files:**
+  - Create: `analysis/right_wing/derive_keywords.py`
+  - Modify: `scripts/derive_svd_labels.py` (reference only)
+  - Test: `tests/test_derive_keywords.py`
+
+  **Approach:**
+  1. Query `motions` joined with `mp_votes` to identify motions where right-wing parties vote predominantly *voor* (>60%).
+  2. Collect a control group: motions where left-wing parties vote predominantly *voor* (>60%).
+  3. Run TF-IDF on titles (and optionally body_text) for both groups, using Dutch stopwords.
+  4. Compute differential TF-IDF: terms with high scores in right-group and low in left-group.
+  5. Manually inspect top 50 terms and filter out generic parliamentary terms (e.g., "motie", "kamer").
+  6. Persist the final keyword list to `analysis/right_wing/right_wing_keywords.json`.
+
+  **Patterns to follow:**
+  - `scripts/derive_svd_labels.py` for TF-IDF pipeline
+  - `analysis/config.py` for `CANONICAL_RIGHT` / `CANONICAL_LEFT`
+
+  **Test scenarios:**
+  - Happy path: right-wing motions contain terms like "migratie", "asiel", "grenzen" in top results
+  - Edge case: control group does NOT have these terms in top 50
+  - Edge case: generic terms ("motie", "regering") are filtered out
+  - Integration: keyword list can be loaded as JSON and used for regex matching
+
+  **Verification:**
+  - `right_wing_keywords.json` exists and contains >= 20 distinct terms
+  - Manual inspection confirms terms are politically distinctive
+
+---
+
+- **U2. Hybrid Motion Classifier**
+
+  **Goal:** Identify "actually right-wing" motions using both keywords and voting patterns.
+
+  **Requirements:** R2, R7
+
+  **Dependencies:** U1
+
+  **Files:**
+  - Create: `analysis/right_wing/classify_motions.py`
+  - Test: `tests/test_classify_motions.py`
+
+  **Approach:**
+  1. **Keyword filter:** Match motion title/body_text against `right_wing_keywords.json` (case-insensitive, whole-word regex).
+  2. **Voting filter:** For each candidate motion, compute:
+     - `right_support` = % of `CANONICAL_RIGHT` parties voting *voor*
+     - `left_opposition` = % of `CANONICAL_LEFT` parties voting *tegen*
+     - Pass if `right_support >= 0.60` AND `left_opposition >= 0.40`
+  3. Output a DuckDB table `right_wing_motions` with columns: `motion_id`, `year`, `title`, `right_support`, `left_opposition`, `keyword_matches`.
+  4. Run a validation sample: manually inspect 20 random classified motions and 20 random non-classified motions to estimate precision/recall.
+
+  **Patterns to follow:**
+  - `scripts/motion_drift.py` for cross-ideological voting logic
+  - `analysis/explorer_data.py` for data loading patterns
+
+  **Test scenarios:**
+  - Happy path: a PVV motion about "asielzoekers" with 80% right support and 60% left opposition is classified
+  - Edge case: a PvdA motion mentioning "migratie" neutrally with 20% right support is NOT classified
+  - Edge case: motion with right support 60% but left opposition only 10% is NOT classified
+  - Error path: motion with no votes is skipped gracefully
+  - Integration: `right_wing_motions` table is queryable and has expected row count
+
+  **Verification:**
+  - Validation sample shows >80% precision and >70% recall
+  - `right_wing_motions` table contains >100 rows (non-empty)
+
+---
+
+- **U3. Temporal Aggregation**
+
+  **Goal:** Compute yearly trends in right-wing motion volume, support, and cross-party adoption.
+
+  **Requirements:** R3
+
+  **Dependencies:** U2
+
+  **Files:**
+  - Create: `analysis/right_wing/temporal_analysis.py`
+  - Test: `tests/test_temporal_analysis.py`
+
+  **Approach:**
+  1. Group `right_wing_motions` by `year`.
+  2. For each year, compute:
+     - `total_right_wing`: count of right-wing motions
+     - `pct_of_total`: % of all motions that year
+     - `avg_right_support`: average right-party support
+     - `avg_left_opposition`: average left-party opposition
+     - `centrist_support`: % of centrist parties (VVD, D66, CDA, NSC, BBB, CU) voting *voor*
+     - `extremity_index`: placeholder for U4 scores (NULL until backfilled)
+  3. Compute year-over-year deltas for each metric.
+  4. Persist to `yearly_right_wing_summary` table or DataFrame export.
+
+  **Patterns to follow:**
+  - `analysis/trajectory.py` for time-windowed aggregations
+  - `analysis/explorer_data.py` for DuckDB-to-pandas patterns
+
+  **Test scenarios:**
+  - Happy path: each year has a row with all metrics computed
+  - Edge case: year with 0 right-wing motions shows 0 count and NULL centrist support
+  - Edge case: year with missing vote data for some parties still computes available metrics
+  - Integration: output DataFrame can be merged with U4 extremity scores
+
+  **Verification:**
+  - One row per year from earliest to latest motion
+  - `pct_of_total` values sum to <= 100% when checked manually for a sample year
+  - Centrist support shows a plausible trend (not all 0% or 100%)
+
+---
+
+- **U4. Policy Extremity Scoring**
+
+  **Goal:** Score how radical the policy demand of each right-wing motion is.
+
+  **Requirements:** R4
+
+  **Dependencies:** U2
+
+  **Files:**
+  - Create: `analysis/right_wing/extremity_scorer.py`
+  - Test: `tests/test_extremity_scorer.py`
+
+  **Approach:**
+  1. For each right-wing motion, build a prompt:
+     > "Dit is een motie in het Nederlandse parlement. Wat vraagt de motie concreet? Beoordeel hoe radicaal dit voorstel is op een schaal van 1 (mild/technisch) tot 5 (extreem/fundamenteel). Geef alleen het cijfer en een korte verklaring in het Nederlands."
+  2. Use `ai_provider.chat_completion_json()` with a JSON schema enforcing integer 1-5 + explanation string.
+  3. Batch process motions (parallel API calls, 10-15 per batch) to minimize cost.
+  4. Store results in `extremity_scores` table: `motion_id`, `score`, `explanation`.
+  5. Compute yearly average and merge into U3's summary.
+
+  **Execution note:** Start with a sample of 50 motions to validate scoring consistency before running the full set.
+
+  **Patterns to follow:**
+  - `summarizer.py` for batch LLM processing and parallel API calls
+  - `ai_provider.py` for JSON-mode chat completions
+
+  **Test scenarios:**
+  - Happy path: a motion demanding "sluit alle AZC's" scores 5/5
+  - Happy path: a motion requesting "rapporteer cijfers" scores 1/5
+  - Edge case: LLM returns invalid JSON → fallback to retry or mark as NULL
+  - Error path: API failure → motion is skipped, not blocking
+  - Integration: extremity scores correlate with keyword intensity (sanity check)
+
+  **Verification:**
+  - Sample of 50 shows inter-rater consistency (same motion re-scored twice gets same score)
+  - Score distribution is not all 1s or all 5s (has variance)
+  - `extremity_scores` table covers >= 90% of right-wing motions
+
+---
+
+- **U5. Sentiment Analysis Pipeline**
+
+  **Goal:** Track emotional tone of right-wing motions over time as a proxy for radicalization.
+
+  **Requirements:** R5
+
+  **Dependencies:** U2
+
+  **Files:**
+  - Create: `analysis/right_wing/sentiment_analysis.py`
+  - Test: `tests/test_sentiment_analysis.py`
+
+  **Approach:**
+  1. Evaluate candidate Dutch sentiment models:
+     - `pysentimiento` (nl model)
+     - `transformers` pipeline with Dutch BERT (`wietsedv/bert-base-dutch-cased` fine-tuned for sentiment)
+     - Fallback: LLM batch calls if models are poor
+  2. Run on motion titles + first 200 chars of body_text (avoiding noise).
+  3. Map outputs to [-1, 1] scale (negative = hostile/aggressive, positive = constructive).
+  4. Aggregate by year: avg sentiment, std deviation, % strongly negative.
+  5. Merge into U3 summary.
+
+  **Patterns to follow:**
+  - `pipeline/text_pipeline.py` for text preprocessing
+  - Lightweight model evaluation script similar to `test_mistral.py`
+
+  **Test scenarios:**
+  - Happy path: motion with "stop de immigratie" scores negative
+  - Happy path: motion with "verbeter de procedure" scores neutral/positive
+  - Edge case: very short title (< 5 words) is handled gracefully
+  - Error path: model fails to load → fallback to LLM or skip
+  - Integration: sentiment trend correlates with extremity trend (sanity check)
+
+  **Verification:**
+  - Sentiment scores show variance (not all identical)
+  - Manual inspection of 10 random motions confirms direction is plausible
+  - Model inference time is < 100ms per motion (acceptable for batch)
+
+---
+
+- **U6. Time-Series Visualization**
+
+  **Goal:** Produce static charts showing volume, extremity, sentiment, and centrist support over time.
+
+  **Requirements:** R6
+
+  **Dependencies:** U3, U4, U5
+
+  **Files:**
+  - Create: `analysis/right_wing/visualize_trends.py`
+  - Output: `output/right_wing_trends.html` and/or `.png` files
+
+  **Approach:**
+  1. Load `yearly_right_wing_summary` DataFrame.
+  2. Generate 4 charts:
+     - **Volume:** Line chart of `total_right_wing` + `pct_of_total` (dual axis)
+     - **Extremity:** Line chart of `avg_extremity_score` with error bars (std)
+     - **Sentiment:** Line chart of `avg_sentiment` + % strongly negative (stacked area)
+     - **Centrist Support:** Line chart of `centrist_support` over time with party breakdown
+  3. Use Plotly (consistent with `analysis/visualize.py`) with dark theme colors.
+  4. Export to `output/right_wing_trends.html` (interactive) and `.png` (static).
+
+  **Patterns to follow:**
+  - `analysis/visualize.py` for Plotly setup and theming
+  - `analysis/tabs/_rendering.py` for dark theme color constants
+
+  **Test scenarios:**
+  - Happy path: HTML file is generated and opens without errors
+  - Happy path: all 4 charts render with data spanning multiple years
+  - Edge case: missing data for some years → chart shows gaps, not crashes
+  - Integration: charts visually confirm the user's hypothesis (stable volume, rising extremity/centrist support)
+
+  **Verification:**
+  - `output/right_wing_trends.html` exists and is > 100KB
+  - Manual inspection of charts shows clear trends
+  - Charts are suitable for sharing (static PNGs are readable)
+
+---
+
+## System-Wide Impact
+
+- **New tables:** `right_wing_motions`, `extremity_scores` — these are derived/analysis tables, not core schema. They can be regenerated.
+- **No changes to existing tables:** `motions`, `mp_votes`, `svd_vectors` are read-only for this feature.
+- **No API surface changes:** This is an offline analysis script, not integrated into the app yet.
+- **Performance:** TF-IDF on ~29K titles is trivial. LLM calls for U4 are the bottleneck; batching keeps cost manageable.
+
+---
+
+## Risks & Dependencies
+
+| Risk | Mitigation |
+|------|------------|
+| Keyword list overfits to current parliamentary period | Validate across multiple years; include historical data in TF-IDF corpus |
+| LLM extremity scoring is inconsistent | Add few-shot examples; validate on sample before full run; allow NULL scores |
+| Dutch sentiment model performs poorly on parliamentary language | Evaluate multiple models; fallback to LLM if needed |
+| Classification has false positives (left-wing motions caught) | Hybrid voting filter mitigates this; validation sample checks precision |
+| LLM API costs for extremity scoring exceed budget | Batch aggressively; score a stratified sample (e.g., 30 per year) instead of all motions |
+
+---
+
+## Documentation / Operational Notes
+
+- Add a README in `analysis/right_wing/` explaining how to regenerate the analysis
+- Document the keyword list and classification thresholds for reproducibility
+- Note the LLM model used for extremity scoring and its version/date
+
+---
+
+## Sources & References
+
+- **Related code:** `scripts/motion_drift.py`, `scripts/derive_svd_labels.py`, `analysis/axis_classifier.py`
+- **Related learnings:** `docs/solutions/best-practices/svd-labels-voting-patterns-not-semantics.md`
+- **Origin:** User request — analyze right-wing motion trends over time
--- a/docs/solutions/workflow-issues/parallel-api-batching-ai-summarization-2026-05-05.md
+++ b/docs/solutions/workflow-issues/parallel-api-batching-ai-summarization-2026-05-05.md
@ -0,0 +1,212 @@
+---
+title: "Parallel API batching for AI motion summarization with adaptive throughput"
+date: "2026-05-05"
+category: workflow-issues
+module: summarizer
+problem_type: workflow_issue
+component: service_object
+severity: medium
+applies_when:
+  - "Backfilling large numbers of AI-generated summaries via an API"
+  - "Rate limits or slow throughput bottlenecking batch processing"
+  - "Need to process 10,000+ items with an LLM API"
+tags:
+  - parallelization
+  - batching
+  - openrouter
+  - mistral
+  - throughput
+  - cost-optimization
+---
+
+# Parallel API Batching for AI Motion Summarization
+
+## Context
+
+Generating layman-friendly explanations for 29,000+ parliamentary motions via an LLM API. Initial approach processed one motion per API call, yielding ~700 motions/hour with significant per-request overhead. At this rate, the full backfill would take ~40 hours and cost ~$15-20. The budget tracking was also inaccurate — estimated $5.00 cap but actual API spend was only ~$1.78 when the cap was hit.
+
+## Guidance
+
+### 1. Application-level batching (not native API batching)
+
+OpenAI's `/chat/completions` endpoint does not support multiple independent conversations in one request. Instead, pack 10-20 motions into a single prompt and request structured JSON output:
+
+```python
+# Build one prompt with N motions
+prompt = f"""Je krijgt {len(motions)} moties.
+Schrijf voor ELKE motie 2-3 zinnen uitleg.
+Geef antwoord als JSON: {{"motion_id": "uitleg", ...}}
+
+{motions_block}
+"""
+
+# Request JSON mode
+payload = {
+    "model": model,
+    "messages": messages,
+    "response_format": {"type": "json_object"}
+}
+```
+
+This eliminates 90%+ of per-request HTTP overhead and context-window overhead.
+
+### 2. Parallel API requests with ThreadPoolExecutor
+
+When the API supports concurrent requests, use `ThreadPoolExecutor` to saturate the connection:
+
+```python
+from concurrent.futures import ThreadPoolExecutor
+
+def chat_completion_json_parallel(
+    message_batches, model=None, max_workers=3
+):
+    def _fetch_one(messages):
+        return chat_completion_json(messages, model=model)
+
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = [executor.submit(_fetch_one, batch) for batch in message_batches]
+        return [f.result() for f in futures]
+```
+
+With 3 parallel workers × 15 motions per batch = 45 motions per chunk. This yielded **2.0x speedup** (0.5 → 1.0 motions/sec). After fixing the parameter-passing bug (see "Critical gotcha" below), 5 workers × 20 motions = 100 motions per chunk achieved **3.4x speedup** (1.1 → 3.7 motions/sec).
+
+### 3. Model selection for language quality
+
+Tested three models on the same problematic motions:
+
+| Model | Quality | Speed | Issues |
+|-------|---------|-------|--------|
+| `google/gemma-4-26b-a4b-it` | Good | Slow (~25s/batch) | Occasional English words |
+| `mistralai/mistral-small-2603` | **Excellent** | Medium | None observed |
+| `mistralai/mistral-small-3.2-24b-instruct` | Good | Medium | One blank output |
+
+**Recommendation**: `mistralai/mistral-small-2603` for Dutch-language tasks. Test on a representative sample (20-50 items) before committing to a full backfill.
+
+### 4. Post-processing pipeline
+
+Always add a post-processing step to catch model failures:
+
+```python
+def _post_process_summary(self, text: str) -> str:
+    # 1. Remove lines that are mostly non-Latin (Chinese, Arabic, etc.)
+    # 2. Replace known English words with Dutch equivalents
+    # 3. Fix common typos (e.g., "lageinkomen" → "laag inkomen")
+    # 4. Reject if >10 common English words remain
+    # 5. Remove metadata fragments like "(45-102)"
+    # 6. Normalize whitespace and punctuation
+```
+
+This caught: Arabic script hallucinations, English words like "filthy", Dutch typos like "formuliernoten", and metadata leaking from titles.
+
+### 5. Adaptive backfill script
+
+Create a backfill script that monitors throughput and adjusts parameters dynamically:
+
+```python
+# Start conservative
+api_batch_size = 15
+parallel_batches = 3
+chunk_size = api_batch_size * parallel_batches  # 45
+
+# After each chunk, measure time
+if chunk_time < avg_chunk_time * 0.8:
+    api_batch_size += 1  # API is fast, increase batch
+elif chunk_time > avg_chunk_time * 1.5:
+    api_batch_size -= 1  # API is slow, decrease batch
+
+    # On repeated failures, back off
+    if failures_in_row >= 2:
+        api_batch_size -= 2
+        delay_between_chunks += 1.0
+```
+
+### Critical gotcha: pass parallelism config all the way through
+
+When adding `parallel_batches` to the orchestration layer, make sure it actually reaches the API call. A common bug is adding the parameter to the outer script but leaving hardcoded defaults in the inner method:
+
+```python
+# summarizer.py — WRONG (hardcoded defaults)
+def generate_layman_explanations_batch_parallel(
+    self, motions, model=None, parallel_batches=3, sub_batch_size=15
+):
+    ...
+
+def update_motion_summaries(
+    self, ..., api_batch_size=15, parallel_batches=3  # still hardcoded!
+):
+    ...
+    summaries = self.generate_layman_explanations_batch_parallel(
+        motions_for_api,
+        model=config.QWEN_MODEL,
+        parallel_batches=3,        # <-- BUG: ignores parameter
+        sub_batch_size=15,         # <-- BUG: ignores parameter
+    )
+```
+
+**Fix**: pass the parameters through and compute `sub_batch_size` dynamically:
+
+```python
+# summarizer.py — CORRECT
+def update_motion_summaries(
+    self, ..., api_batch_size=15, parallel_batches=3
+):
+    ...
+    summaries = self.generate_layman_explanations_batch_parallel(
+        motions_for_api,
+        model=config.QWEN_MODEL,
+        parallel_batches=parallel_batches,
+        sub_batch_size=api_batch_size // parallel_batches,
+    )
+```
+
+**Impact of this bug**: With `parallel_batches=3` hardcoded, increasing backfill.py to 5 workers had zero effect. After fixing: speed jumped from **1.1 → 3.7 motions/sec** (3.4x).
+
+### 6. Accurate cost tracking
+
+Update cost estimates when switching models. The old estimate assumed Qwen at $0.60/M tokens, but Mistral Small is much cheaper:
+
+```python
+# Mistral Small 2603 pricing (OpenRouter)
+TOKEN_PRICE_PER_MILLION = 0.07  # blended input+output
+TOKENS_PER_MOTION = 480         # ~400 input + ~80 output
+COST_PER_MOTION = (TOKENS_PER_MOTION / 1_000_000) * TOKEN_PRICE_PER_MILLION
+# ≈ $0.000034 per motion
+```
+
+## Why This Matters
+
+- **Speed**: From ~700/hour (single) to ~13,320/hour (parallel batch, 5 workers) — **19x faster**
+- **Cost**: From ~$15-20 estimated to ~$1-2 actual for 30,000 motions
+- **Quality**: Model testing on edge cases prevents garbage-in-garbage-out at scale
+- **Reliability**: Post-processing catches ~5% of outputs that would degrade UX
+
+## When to Apply
+
+- Processing 1,000+ items through any LLM API
+- API charges per-request (not per-token) or has high latency per request
+- Output quality is critical and model hallucinations are unacceptable
+- Running overnight/background backfills where throughput matters more than latency
+
+## Examples
+
+### Before (single-motion API calls)
+```python
+for motion in motions:
+    summary = ai.chat_completion(build_prompt(motion))
+    # 50 motions = 50 API calls, ~250s total
+```
+
+### After (parallel batching)
+```python
+# Split into 3 batches of 15
+batches = [motions[i:i+15] for i in range(0, 50, 15)]
+message_batches = [build_batch_prompt(batch) for batch in batches]
+results = chat_completion_json_parallel(message_batches, max_workers=3)
+# 50 motions = 3 API calls, ~25s total
+```
+
+## Related
+
+- `ai_provider.py` — `chat_completion_json_parallel()` implementation
+- `summarizer.py` — `MotionSummarizer` with batch and parallel methods
+- `backfill.py` — Adaptive backfill script with dynamic parameter tuning
--- a/reports/overton_window/breakpoint_figure_2.png
+++ b/reports/overton_window/breakpoint_figure_2.png