#!/usr/bin/env python3 """U3: Replace binary pass/fail with continuous voting margin as the primary success metric. For each right-wing motion, compute the voting margin from per-party vote counts: margin = (voor - tegen) / (voor + tegen + afwezig) This gives a continuous [-1, 1] scale where: +1.0 = unanimous support (all parties voted voor) 0.0 = exactly tied or no votes -1.0 = unanimous opposition (all parties voted tegen) Usage: uv run python -m analysis.right_wing.voting_margin Output: reports/overton_window/voting_margin.md reports/overton_window/voting_margin_figure.png """ from __future__ import annotations import json import logging import sys from pathlib import Path from typing import Any PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) import duckdb import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np from scipy.stats import spearmanr, pearsonr, mannwhitneyu from analysis.config import CANONICAL_RIGHT logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) DB_PATH = str(PROJECT_ROOT / "data" / "motions.db") REPORTS_DIR = PROJECT_ROOT / "reports" / "overton_window" REPORTS_DIR.mkdir(parents=True, exist_ok=True) BREAK_YEAR = 2024 QUARTILE_LABELS = [ "Q1 [0.00\u20130.25]", "Q2 (0.25\u20130.50]", "Q3 (0.50\u20130.75]", "Q4 (0.75\u20131.00]", ] def quartile_bin(cs: float) -> int: if cs <= 0.25: return 0 elif cs <= 0.50: return 1 elif cs <= 0.75: return 2 else: return 3 def compute_margin(voting: dict[str, str]) -> float | None: """Compute voting margin from per-party vote directions. voting: {party_name: "voor"/"tegen"/"afwezig"} Returns margin in [-1, 1] or None if no votes. """ voor = sum(1 for v in voting.values() if v == "voor") tegen = sum(1 for v in voting.values() if v == "tegen") afwezig = sum(1 for v in voting.values() if v == "afwezig") denom = voor + tegen + afwezig if denom == 0: return None return (voor - tegen) / denom def motion_passed(margin: float | None) -> bool | None: """Determine pass/fail from margin.""" if margin is None: return None return margin > 0 def collect_motion_margins( con: duckdb.DuckDBPyConnection, ) -> list[dict[str, Any]]: rows = con.execute(""" SELECT r.motion_id, r.year, r.centrist_support_strict, m.voting_results FROM right_wing_motions r JOIN motions m ON r.motion_id = m.id WHERE r.classified = TRUE AND r.year IS NOT NULL AND r.centrist_support_strict IS NOT NULL """).fetchall() motions: list[dict[str, Any]] = [] for mid, year, cs, vr_json in rows: voting = json.loads(vr_json) if isinstance(vr_json, str) else (vr_json or {}) margin = compute_margin(voting) if margin is None: continue passed = motion_passed(margin) motions.append({ "motion_id": mid, "year": int(year), "centrist_support_strict": float(cs), "margin": margin, "passed": passed, "period": "post-2024" if int(year) >= BREAK_YEAR else "pre-2024", }) return motions def quartile_margin_stats( motions: list[dict], filter_fn=None ) -> dict: if filter_fn is None: strata = { "all": lambda m: True, "pre-2024": lambda m: m["period"] == "pre-2024", "post-2024": lambda m: m["period"] == "post-2024", } else: strata = {"filtered": filter_fn} result: dict[str, dict[int, dict]] = {} for label, fn in strata.items(): bins: dict[int, dict] = {q: {"margins": [], "n": 0} for q in range(4)} for m in motions: if not fn(m): continue q = quartile_bin(m["centrist_support_strict"]) bins[q]["margins"].append(m["margin"]) bins[q]["n"] += 1 for q in range(4): d = bins[q] margins_arr = np.array(d["margins"]) d["mean"] = float(np.mean(margins_arr)) if len(margins_arr) > 0 else float("nan") d["median"] = float(np.median(margins_arr)) if len(margins_arr) > 0 else float("nan") d["std"] = float(np.std(margins_arr, ddof=1)) if len(margins_arr) > 1 else float("nan") d["p25"] = float(np.percentile(margins_arr, 25)) if len(margins_arr) > 0 else float("nan") d["p75"] = float(np.percentile(margins_arr, 75)) if len(margins_arr) > 0 else float("nan") d["min"] = float(np.min(margins_arr)) if len(margins_arr) > 0 else float("nan") d["max"] = float(np.max(margins_arr)) if len(margins_arr) > 0 else float("nan") d["margin"] = d["margins"] del d["margins"] result[label] = bins return result def spearman_correlation(motions: list[dict]) -> dict[str, Any]: margins = np.array([m["margin"] for m in motions]) cs_vals = np.array([m["centrist_support_strict"] for m in motions]) rho, p = spearmanr(margins, cs_vals) r, pr = pearsonr(margins, cs_vals) return {"spearman_rho": float(rho), "spearman_p": float(p), "pearson_r": float(r), "pearson_p": float(pr)} def create_figure( all_strata: dict[str, dict[int, dict]], motions: list[dict], corr: dict[str, Any], ) -> str: fig, (ax_a, ax_b, ax_c) = plt.subplots(1, 3, figsize=(18, 6)) # --- Panel A: Box plots of margin by centrist support quartile --- all_bins = all_strata["all"] quartile_data = [all_bins[q]["margin"] for q in range(4)] quartile_ns = [all_bins[q]["n"] for q in range(4)] bp = ax_a.boxplot( quartile_data, positions=range(4), widths=0.5, patch_artist=True, showfliers=True, flierprops=dict(marker="o", markersize=3, alpha=0.4), ) box_colours = ["#E0E0E0", "#BDBDBD", "#9E9E9E", "#616161"] for patch, color in zip(bp["boxes"], box_colours): patch.set_facecolor(color) patch.set_alpha(0.8) for q in range(4): mean_val = all_bins[q]["mean"] if not np.isnan(mean_val): ax_a.scatter(q, mean_val, marker="D", color="#D32F2F", s=40, zorder=5, label="Mean" if q == 0 else None) ax_a.set_xticks(range(4)) ax_a.set_xticklabels([f"Q{q+1}\n(n={quartile_ns[q]})" for q in range(4)], fontsize=9) ax_a.set_ylabel("Voting margin (party-level)") ax_a.set_title("A. Margin by centrist support quartile", fontweight="bold") ax_a.set_ylim(-1.05, 1.05) ax_a.axhline(y=0, color="grey", linestyle="--", alpha=0.5, linewidth=0.8) ax_a.legend(fontsize=7, loc="upper left") ax_a.grid(True, alpha=0.3, axis="y") # --- Panel B: Margin over time (yearly mean) --- years_data: dict[int, list[float]] = {} for m in motions: y = m["year"] years_data.setdefault(y, []).append(m["margin"]) years_sorted = sorted(years_data.keys()) yearly_means = np.array([np.mean(years_data[y]) for y in years_sorted]) yearly_stds = np.array([np.std(years_data[y], ddof=1) for y in years_sorted]) yearly_ns = np.array([len(years_data[y]) for y in years_sorted]) yearly_sems = yearly_stds / np.sqrt(yearly_ns) ax_b.fill_between(years_sorted, yearly_means - 1.96 * yearly_sems, yearly_means + 1.96 * yearly_sems, alpha=0.2, color="#002366", label="95% CI") ax_b.plot(years_sorted, yearly_means, marker="o", color="#002366", linewidth=2, label="Mean margin") ax_b.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1) ax_b.annotate("2024", xy=(BREAK_YEAR - 0.3, ax_b.get_ylim()[1] * 0.90), fontsize=9, color="black", alpha=0.7) ax_b.set_xlabel("Year") ax_b.set_ylabel("Mean voting margin") ax_b.set_title("B. Voting margin over time", fontweight="bold") ax_b.legend(fontsize=8) ax_b.grid(True, alpha=0.3) ax_b.set_xticks(years_sorted) ax_b.set_xticklabels([str(y) for y in years_sorted], rotation=45) # --- Panel C: Scatter of margin vs centrist support --- margins_arr = np.array([m["margin"] for m in motions]) cs_arr = np.array([m["centrist_support_strict"] for m in motions]) pre_mask = np.array([m["period"] == "pre-2024" for m in motions]) post_mask = ~pre_mask ax_c.scatter(cs_arr[pre_mask], margins_arr[pre_mask], alpha=0.35, s=12, color="#90CAF9", label="Pre-2024", edgecolors="none") ax_c.scatter(cs_arr[post_mask], margins_arr[post_mask], alpha=0.35, s=12, color="#1E88E5", label="Post-2024", edgecolors="none") valid = ~np.isnan(cs_arr) & ~np.isnan(margins_arr) if valid.sum() > 1: coeffs = np.polyfit(cs_arr[valid], margins_arr[valid], 1) x_fit = np.linspace(0, 1, 100) ax_c.plot(x_fit, np.polyval(coeffs, x_fit), color="#D32F2F", linewidth=1.5, linestyle="--", label=f"Linear fit (r={corr['pearson_r']:.3f})") ax_c.set_xlabel("Centrist support (strict)") ax_c.set_ylabel("Voting margin") ax_c.set_title(f"C. Margin vs centrist support\nSpearman \u03c1={corr['spearman_rho']:.3f}, p={corr['spearman_p']:.1e}", fontweight="bold") ax_c.set_ylim(-1.05, 1.05) ax_c.set_xlim(-0.02, 1.02) ax_c.axhline(y=0, color="grey", linestyle="--", alpha=0.5, linewidth=0.8) ax_c.legend(fontsize=8, loc="upper left") ax_c.grid(True, alpha=0.3) plt.tight_layout() path = str(REPORTS_DIR / "voting_margin_figure.png") fig.savefig(path, dpi=150, bbox_inches="tight") plt.close(fig) logger.info("Saved figure to %s", path) return path def generate_report( all_strata: dict[str, dict[int, dict]], motions: list[dict], corr: dict[str, Any], fig_path: str, ) -> str: n_total = len(motions) margins_arr = np.array([m["margin"] for m in motions]) cs_arr = np.array([m["centrist_support_strict"] for m in motions]) n_passed = sum(1 for m in motions if m["passed"]) n_failed = sum(1 for m in motions if m["passed"] is False) overall_pass_rate = n_passed / n_total if n_total > 0 else 0.0 # Quartile margin table qtable = "| Stratum | " + " | ".join(QUARTILE_LABELS) + " |\n" qtable += "|---------|" + "|".join([":------:" for _ in QUARTILE_LABELS]) + "|\n" for key in ["all", "pre-2024", "post-2024"]: bins = all_strata.get(key, {}) row = [key] for q in range(4): d = bins.get(q, {}) m = d.get("mean", float("nan")) n = d.get("n", 0) if np.isnan(m): row.append(f"N/A (n={n})") else: row.append(f"{m:+.3f} (n={n})") qtable += "| " + " | ".join(row) + " |\n" # Quartile detailed stats table qdetail = "| Quartile | N | Mean | Median | Std | P25 | P75 | Min | Max |\n" qdetail += "|----------|---|------|--------|-----|-----|-----|-----|-----|\n" for q in range(4): d = all_strata["all"][q] qdetail += ( f"| Q{q+1} | {d['n']} | {d['mean']:+.3f} | {d['median']:+.3f} | " f"{d['std']:.3f} | {d['p25']:+.3f} | {d['p75']:+.3f} | " f"{d['min']:+.3f} | {d['max']:+.3f} |\n" ) # Period-level stats pre_motions = [m for m in motions if m["period"] == "pre-2024"] post_motions = [m for m in motions if m["period"] == "post-2024"] pre_margins = np.array([m["margin"] for m in pre_motions]) post_margins = np.array([m["margin"] for m in post_motions]) pre_mean = float(np.mean(pre_margins)) if len(pre_margins) > 0 else float("nan") post_mean = float(np.mean(post_margins)) if len(post_margins) > 0 else float("nan") delta = post_mean - pre_mean # Mann-Whitney for period difference if len(pre_margins) > 0 and len(post_margins) > 0: u_stat, u_p = mannwhitneyu(pre_margins, post_margins, alternative="two-sided") u_str = f"U={u_stat:.0f}, p={u_p:.1e}" cohens_d = (post_mean - pre_mean) / np.sqrt( (np.std(pre_margins, ddof=1) ** 2 + np.std(post_margins, ddof=1) ** 2) / 2 ) if len(pre_margins) > 1 and len(post_margins) > 1 else float("nan") else: u_str = "N/A" cohens_d = float("nan") # Yearly breakdown years_data: dict[int, list[float]] = {} years_cs: dict[int, list[float]] = {} for m in motions: y = m["year"] years_data.setdefault(y, []).append(m["margin"]) years_cs.setdefault(y, []).append(m["centrist_support_strict"]) ytable = "| Year | N | Mean Margin | Mean CS (strict) | % Passed |\n" ytable += "|------|---|-------------|-----------------|---------|\n" for y in sorted(years_data.keys()): ym = years_data[y] yc = years_cs[y] passed = sum(1 for m in motions if m["year"] == y and m["passed"]) total = len(ym) ytable += ( f"| {y} | {total} | {np.mean(ym):+.3f} | {np.mean(yc):.3f} | " f"{passed/total:.1%} |\n" ) # Q4 vs Q1 gap (analogous to success premium) q1_mean = all_strata["all"][0]["mean"] q4_mean = all_strata["all"][3]["mean"] margin_gap = q4_mean - q1_mean if not (np.isnan(q1_mean) or np.isnan(q4_mean)) else float("nan") # Pass rate by quartile for comparison pass_table = "| Quartile | N | Pass Rate | Mean Margin |\n" pass_table += "|----------|---|-----------|-------------|\n" for q in range(4): d = all_strata["all"][q] q_motions = [m for m in motions if quartile_bin(m["centrist_support_strict"]) == q] q_passed = sum(1 for m in q_motions if m["passed"]) pr = q_passed / d["n"] if d["n"] > 0 else float("nan") pr_str = f"{pr:.1%}" if not np.isnan(pr) else "N/A" pass_table += f"| Q{q+1} | {d['n']} | {pr_str} | {d['mean']:+.3f} |\n" report = [ "# Voting Margin Analysis", "", "**Goal:** Replace binary pass/fail with continuous voting margin as the primary", "success metric for right-wing motions in the Tweede Kamer.", "", f"**Analysis period:** 2016\u20132026", f"**Total right-wing motions with vote data:** {n_total}", f"**Motions passed:** {n_passed} ({overall_pass_rate:.1%})", f"**Motions failed:** {n_failed} ({n_failed/n_total:.1%})" if n_total > 0 else "", "", "---", "", "## 1. Methodology", "", "The voting margin is computed from `motions.voting_results`, which stores", "per-party vote directions as a JSON object:", "`{\"PVV\": \"voor\", \"VVD\": \"tegen\", \"D66\": \"afwezig\", ...}`.", "", "```", "margin = (voor - tegen) / (voor + tegen + afwezig)", "```", "", "Each party contributes one vote (its majority position). The margin ranges", "from -1 (unanimous rejection) to +1 (unanimous support). A margin of 0", "indicates an exact tie or no participating parties.", "", "This continuous metric captures *magnitude* of support, not just direction.", "A motion that passes 14-1 has margin = +0.87, while one that passes 8-7 has", "margin = +0.07. Both are \"passed\" in binary terms, but the former has far", "stronger parliamentary consensus.", "", "> **Note:** The per-party aggregation treats all parties equally, regardless of", "> seat count. This is appropriate for measuring *breadth of support across the", "> political spectrum*, which is exactly what the Overton window concept", "> concerns. Seat-weighted margins would be confounded by coalition size effects.", "", "---", "", "## 2. Correlation: Margin vs Centrist Support", "", "| Metric | Value |", "|--------|-------|", f"| Spearman \u03c1 | {corr['spearman_rho']:.3f} |", f"| Spearman p-value | {corr['spearman_p']:.1e} |", f"| Pearson r | {corr['pearson_r']:.3f} |", f"| Pearson p-value | {corr['pearson_p']:.1e} |", "", ] if corr["spearman_p"] < 0.05: report.append( f"The Spearman correlation is significant (\u03c1 = {corr['spearman_rho']:.3f}, " f"p = {corr['spearman_p']:.1e}), indicating a " f"{'positive' if corr['spearman_rho'] > 0 else 'negative'} monotonic " f"relationship between centrist support and voting margin." ) else: report.append( f"The Spearman correlation is not significant (\u03c1 = {corr['spearman_rho']:.3f}, " f"p = {corr['spearman_p']:.3f}). Centrist support alone does not predict " f"voting margin." ) report += [ "", "---", "", "## 3. Margin Distribution by Centrist Support Quartile", "", "### Summary Table", "", qtable, "", "### Detailed Statistics (All Motions)", "", qdetail, "", f"**Q4 \u2013 Q1 gap in mean margin:** {margin_gap:+.3f}", "", ] if not np.isnan(margin_gap) and margin_gap > 0: report.append( f"The gap of {margin_gap:+.3f} indicates that motions with the highest " f"centrist support (Q4) have a meaningfully higher voting margin than " f"those with the lowest (Q1)." ) elif not np.isnan(margin_gap): report.append( f"The gap of {margin_gap:+.3f} shows no meaningful positive relationship " f"between centrist support and voting margin." ) report += [ "", "---", "", "## 4. Pass Rate vs Margin Comparison", "", "This section compares the binary pass-rate metric with the continuous margin", "metric to determine whether margin captures additional information.", "", pass_table, "", ] # Check if margin detects patterns pass rate misses q1_pr = 0.0 q4_pr = 0.0 for q in range(4): d = all_strata["all"][q] q_motions = [m for m in motions if quartile_bin(m["centrist_support_strict"]) == q] q_passed = sum(1 for m in q_motions if m["passed"]) pr = q_passed / d["n"] if d["n"] > 0 else 0.0 if q == 0: q1_pr = pr elif q == 3: q4_pr = pr pass_gap = q4_pr - q1_pr if q4_pr > 0 else 0.0 report.append( f"**Pass rate gap (Q4 \u2013 Q1):** {pass_gap:+.1%}" ) report.append( f"**Margin gap (Q4 \u2013 Q1):** {margin_gap:+.3f}" ) if pass_gap < 0.05 and abs(margin_gap) > 0.05: report.append("") report.append( "The pass rate gap is small ({:.1%}) while the margin gap is meaningful " "({:+.3f}), suggesting that **margin captures variance that the binary " "pass/fail metric misses**. This supports replacing pass rate with voting " "margin as the primary success metric.".format(pass_gap, margin_gap) ) elif pass_gap >= 0.05: report.append("") report.append( "Both pass rate and margin show a positive relationship with centrist " "support. Margin provides additional granularity but does not contradict " "the pass rate findings." ) else: report.append("") report.append( "Neither pass rate nor margin show a meaningful relationship with centrist " "support. The high baseline pass rate (~{:.0%}) creates a ceiling effect " "for both metrics.".format(overall_pass_rate) ) report += [ "", "---", "", "## 5. Period Stratification", "", "| Metric | Pre-2024 | Post-2024 | \u0394 |", "|--------|----------|-----------|-----|", f"| N | {len(pre_motions)} | {len(post_motions)} | |", f"| Mean margin | {pre_mean:+.3f} | {post_mean:+.3f} | {delta:+.3f} |", f"| Mann-Whitney U | | | {u_str} |", f"| Cohen's d | | | {cohens_d:+.3f} |" if not np.isnan(cohens_d) else "", "", ] if u_p < 0.05 if isinstance(u_p := corr.get("spearman_p", 1.0), float) else False: pass else: if not np.isnan(post_mean) and not np.isnan(pre_mean): _, period_p = mannwhitneyu(pre_margins, post_margins, alternative="two-sided") if period_p < 0.05: direction = "rose" if post_mean > pre_mean else "fell" report.append( f"Voting margin {direction} significantly post-2024 " f"(Mann-Whitney p = {period_p:.1e}, d = {cohens_d:+.3f})." ) else: report.append( f"Voting margin did not change significantly between periods " f"(Mann-Whitney p = {period_p:.3f})." ) report += [ "", "---", "", "## 6. Yearly Breakdown", "", ytable, "", "---", "", "## 7. Interpretation", "", ] if corr["spearman_p"] < 0.05 and corr["spearman_rho"] > 0: report.append( f"**Finding:** Higher centrist support is associated with higher voting " f"margins (\u03c1 = {corr['spearman_rho']:.3f}, p = {corr['spearman_p']:.1e}). " f"This validates centrist support as a predictor of parliamentary success " f"on a continuous scale, not just a binary pass/fail threshold." ) elif corr["spearman_p"] < 0.05: report.append( f"**Finding:** Higher centrist support is associated with *lower* voting " f"margins (\u03c1 = {corr['spearman_rho']:.3f}, p = {corr['spearman_p']:.1e}). " f"This is counterintuitive and warrants further investigation." ) else: report.append( f"**Finding:** No significant correlation between centrist support and " f"voting margin (\u03c1 = {corr['spearman_rho']:.3f}, p = {corr['spearman_p']:.3f}). " ) report.append("") report.append( "**Margin vs pass rate:** The voting margin provides strictly more information " "than the binary pass rate. Every pass/fail outcome can be derived from the " "margin (margin > 0 = passed), but the margin also captures the *strength* of " "parliamentary consensus. This is particularly important in the Tweede Kamer " "where >95% of motions pass, making pass rate a nearly constant measure." ) report += [ "", "---", "", "## 8. Limitations", "", "- **Per-party aggregation:** All parties are weighted equally regardless of", " seat count. A motion passing with VVD (24 seats) + PVV (37 seats) has the", " same margin as one passing with SGP (3 seats) + DENK (3 seats). This is", " appropriate for measuring *breadth of cross-spectrum support* but may not", " reflect actual parliamentary power.", "- **Voting discipline:** Party-line voting is near-universal in the Dutch", " parliament. The per-party aggregation loses little information.", "- **No within-party splits:** The voting_results data shows majority party", " positions, not individual MP votes. Intra-party dissent is invisible.", "- **Missing data:** Motions without voting_results are excluded.", "", "---", "", f"![Figure: Voting margin analysis]({Path(fig_path).name})", "", "*Report generated by `analysis/right_wing/voting_margin.py`*", ] report_path = REPORTS_DIR / "voting_margin.md" with open(report_path, "w") as f: f.write("\n".join(report)) logger.info("Report written to %s", report_path) return str(report_path) def main() -> int: logger.info("Connecting to database: %s", DB_PATH) con = duckdb.connect(DB_PATH, read_only=True) logger.info("Collecting motion margins...") motions = collect_motion_margins(con) con.close() n_total = len(motions) n_passed = sum(1 for m in motions if m["passed"]) n_pre = sum(1 for m in motions if m["period"] == "pre-2024") n_post = sum(1 for m in motions if m["period"] == "post-2024") logger.info( "Total: %d motions with voting data, %d passed (%.1f%%), pre=%d post=%d", n_total, n_passed, (n_passed / n_total * 100) if n_total > 0 else 0, n_pre, n_post, ) all_strata = quartile_margin_stats(motions) corr = spearman_correlation(motions) logger.info( "Spearman rho=%.3f p=%.1e | Pearson r=%.3f p=%.1e", corr["spearman_rho"], corr["spearman_p"], corr["pearson_r"], corr["pearson_p"], ) logger.info("Generating figure...") fig_path = create_figure(all_strata, motions, corr) logger.info("Generating report...") report_path = generate_report(all_strata, motions, corr, fig_path) print(f"\nReport: {report_path}") print(f"Figure: {fig_path}") return 0 if __name__ == "__main__": raise SystemExit(main())