You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
motief/analysis/right_wing/party_differentiation.py

492 lines
17 KiB

#!/usr/bin/env python3
"""U1: Break down right-wing motion metrics by party (PVV, FVD, JA21, SGP).
Usage:
uv run python analysis/right_wing/party_differentiation.py
Output:
reports/overton_window/party_differentiation.md
reports/overton_window/party_differentiation_figure.png
"""
from __future__ import annotations
import logging
import re
import sys
from pathlib import Path
from typing import Any
import duckdb
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
ROOT = Path(__file__).parent.parent.parent.resolve()
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from analysis.config import CANONICAL_RIGHT, PARTY_COLOURS, _PARTY_NORMALIZE
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
RIGHT_PARTIES = sorted(CANONICAL_RIGHT)
YEAR_MIN, YEAR_MAX = 2016, 2026
BREAK_YEAR = 2024
TITLE_PATTERNS = [
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b",
r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b",
]
def _conn(read_only: bool = True) -> duckdb.DuckDBPyConnection:
return duckdb.connect(DB_PATH, read_only=read_only)
def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]:
rows = con.execute("""
SELECT mp_name, party, van, tot_en_met
FROM mp_metadata
WHERE party IS NOT NULL
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST
""").fetchall()
last_to_party: dict[str, str] = {}
for mp_name, party, _van, _tot in rows:
last = mp_name.split(",")[0].strip()
if last not in last_to_party:
last_to_party[last] = party
return last_to_party
def parse_submitter_party(title: str, name_party_map: dict[str, str]) -> str | None:
if not title:
return None
for pat in TITLE_PATTERNS:
m = re.search(pat, title)
if m:
submitter_str = m.group(1).strip()
parts = submitter_str.split(" en ")
first_name = parts[0].strip()
first_name = re.sub(r"\s+c\.s\.", "", first_name).strip()
if not first_name:
continue
raw_party = name_party_map.get(first_name)
if raw_party:
return _PARTY_NORMALIZE.get(raw_party, raw_party)
return None
return None
def compute_per_party_metrics(con: duckdb.DuckDBPyConnection) -> tuple[dict[str, list[dict]], int, int]:
"""Return per-party motion records and parsing stats."""
rows = con.execute("""
SELECT
r.motion_id,
r.year,
r.title,
r.centrist_support_strict,
r.category,
e.stijl_extremiteit,
e.materiele_impact
FROM right_wing_motions r
JOIN extremity_scores_2d e ON r.motion_id = e.motion_id
WHERE r.classified = TRUE
AND r.year IS NOT NULL
AND r.title IS NOT NULL
""").fetchall()
logger.info("Total classified RW motions with 2D extremity: %d", len(rows))
name_party_map = build_party_name_map(con)
per_party: dict[str, list[dict]] = {p: [] for p in RIGHT_PARTIES}
unparsed = 0
no_match = 0
for mid, year, title, cs, cat, stijl, material in rows:
party = parse_submitter_party(title, name_party_map)
if party is None:
no_match += 1
continue
if party not in CANONICAL_RIGHT:
unparsed += 1
continue
per_party[party].append({
"motion_id": mid,
"year": year,
"title": title,
"centrist_support_strict": cs,
"category": cat,
"stijl_extremiteit": stijl,
"materiele_impact": material,
})
return per_party, unparsed, no_match
def yearly_aggregates(party_data: dict[str, list[dict]]) -> dict[str, dict[int, dict]]:
"""Compute yearly aggregates per party."""
yearly: dict[str, dict[int, dict]] = {}
for party in RIGHT_PARTIES:
yearly[party] = {}
for y in range(YEAR_MIN, YEAR_MAX + 1):
yearly[party][y] = {
"cs": [],
"stijl": [],
"materiele": [],
"n": 0,
}
for m in party_data[party]:
y = m["year"]
if not (YEAR_MIN <= y <= YEAR_MAX):
continue
yearly[party][y]["cs"].append(m["centrist_support_strict"])
yearly[party][y]["stijl"].append(m["stijl_extremiteit"])
yearly[party][y]["materiele"].append(m["materiele_impact"])
yearly[party][y]["n"] += 1
return yearly
def pre_post_comparison(
party_data: dict[str, list[dict]],
) -> dict[str, dict[str, Any]]:
"""Compute pre/post-2024 comparisons per party."""
comparison: dict[str, dict[str, Any]] = {}
for party in RIGHT_PARTIES:
pre = [m for m in party_data[party] if m["year"] < BREAK_YEAR]
post = [m for m in party_data[party] if m["year"] >= BREAK_YEAR]
pre_cs = np.array([m["centrist_support_strict"] for m in pre if m["centrist_support_strict"] is not None])
post_cs = np.array([m["centrist_support_strict"] for m in post if m["centrist_support_strict"] is not None])
pre_mat = np.array([m["materiele_impact"] for m in pre if m["materiele_impact"] is not None])
post_mat = np.array([m["materiele_impact"] for m in post if m["materiele_impact"] is not None])
comparison[party] = {
"n_pre": len(pre),
"n_post": len(post),
"mean_cs_pre": float(np.mean(pre_cs)) if len(pre_cs) > 0 else float("nan"),
"mean_cs_post": float(np.mean(post_cs)) if len(post_cs) > 0 else float("nan"),
"delta_cs": float(np.mean(post_cs) - np.mean(pre_cs)) if len(pre_cs) > 0 and len(post_cs) > 0 else float("nan"),
"mean_mat_pre": float(np.mean(pre_mat)) if len(pre_mat) > 0 else float("nan"),
"mean_mat_post": float(np.mean(post_mat)) if len(post_mat) > 0 else float("nan"),
"delta_mat": float(np.mean(post_mat) - np.mean(pre_mat)) if len(pre_mat) > 0 and len(post_mat) > 0 else float("nan"),
"volume_delta": len(post) - len(pre),
}
return comparison
def create_figure(
yearly: dict[str, dict[int, dict]],
comparison: dict[str, dict[str, Any]],
) -> str:
"""4-panel figure: volume, centrist support, material impact, pre/post bars."""
years = list(range(YEAR_MIN, YEAR_MAX + 1))
years_arr = np.array(years)
party_colours = {
"PVV": PARTY_COLOURS.get("PVV", "#002366"),
"FVD": PARTY_COLOURS.get("FVD", "#6A1B9A"),
"JA21": PARTY_COLOURS.get("JA21", "#7B1FA2"),
"SGP": PARTY_COLOURS.get("SGP", "#F4511E"),
}
marker_map = {"PVV": "o", "FVD": "s", "JA21": "^", "SGP": "D"}
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
(ax_vol, ax_cs), (ax_mat, ax_bar) = axes
# Panel A: Motion volume
for party in RIGHT_PARTIES:
volumes = [yearly[party][y]["n"] for y in years]
ax_vol.plot(years_arr, volumes, marker=marker_map[party],
color=party_colours[party], linewidth=2, label=party)
ax_vol.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
ax_vol.set_xlabel("Year")
ax_vol.set_ylabel("Motion count")
ax_vol.set_title("A: Motion Volume by Party Over Time", fontweight="bold")
ax_vol.legend(fontsize=9)
ax_vol.grid(True, alpha=0.3)
ax_vol.set_xticks(years_arr)
ax_vol.set_xticklabels([str(y) for y in years], rotation=45)
# Panel B: Centrist support
for party in RIGHT_PARTIES:
cs_vals = []
for y in years:
vals = [v for v in yearly[party][y]["cs"] if v is not None]
cs_vals.append(np.mean(vals) if vals else np.nan)
ax_cs.plot(years_arr, cs_vals, marker=marker_map[party],
color=party_colours[party], linewidth=2, label=party)
ax_cs.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
ax_cs.set_xlabel("Year")
ax_cs.set_ylabel("Centrist support (strict)")
ax_cs.set_title("B: Centrist Support by Party Over Time", fontweight="bold")
ax_cs.legend(fontsize=9)
ax_cs.set_ylim(0, 1.05)
ax_cs.grid(True, alpha=0.3)
ax_cs.set_xticks(years_arr)
ax_cs.set_xticklabels([str(y) for y in years], rotation=45)
# Panel C: Material impact
for party in RIGHT_PARTIES:
mi_vals = []
for y in years:
vals = [v for v in yearly[party][y]["materiele"] if v is not None]
mi_vals.append(np.mean(vals) if vals else np.nan)
ax_mat.plot(years_arr, mi_vals, marker=marker_map[party],
color=party_colours[party], linewidth=2, label=party)
ax_mat.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
ax_mat.set_xlabel("Year")
ax_mat.set_ylabel("Material impact (1-5)")
ax_mat.set_title("C: Material Impact by Party Over Time", fontweight="bold")
ax_mat.legend(fontsize=9)
ax_mat.grid(True, alpha=0.3)
ax_mat.set_xticks(years_arr)
ax_mat.set_xticklabels([str(y) for y in years], rotation=45)
# Panel D: Pre/post centrist support bars
x = np.arange(len(RIGHT_PARTIES))
width = 0.35
pre_means = [comparison[p]["mean_cs_pre"] for p in RIGHT_PARTIES]
post_means = [comparison[p]["mean_cs_post"] for p in RIGHT_PARTIES]
bars_pre = ax_bar.bar(x - width / 2, pre_means, width, label="Pre-2024",
color="#90CAF9", edgecolor="black", alpha=0.9)
bars_post = ax_bar.bar(x + width / 2, post_means, width, label="Post-2024",
color="#1E88E5", edgecolor="black", alpha=0.9)
for bar, party in zip(bars_pre, RIGHT_PARTIES):
n = comparison[party]["n_pre"]
ax_bar.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.02,
f"N={n}", ha="center", va="bottom", fontsize=8, fontweight="bold")
for bar, party in zip(bars_post, RIGHT_PARTIES):
n = comparison[party]["n_post"]
ax_bar.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.02,
f"N={n}", ha="center", va="bottom", fontsize=8, fontweight="bold")
ax_bar.set_xticks(x)
ax_bar.set_xticklabels(RIGHT_PARTIES, fontsize=10)
ax_bar.set_ylabel("Centrist support (strict)")
ax_bar.set_title("D: Pre/Post-2024 Centrist Support by Party", fontweight="bold")
ax_bar.legend(fontsize=9)
ax_bar.set_ylim(0, 1.05)
ax_bar.grid(True, alpha=0.3, axis="y")
plt.tight_layout()
path = str(REPORTS_DIR / "party_differentiation_figure.png")
fig.savefig(path, dpi=150, bbox_inches="tight")
plt.close(fig)
logger.info("Saved figure to %s", path)
return path
def generate_report(
yearly: dict[str, dict[int, dict]],
comparison: dict[str, dict[str, Any]],
party_data: dict[str, list[dict]],
parsed_count: int,
no_match_count: int,
figure_path: str,
) -> str:
years = list(range(YEAR_MIN, YEAR_MAX + 1))
total_rw = sum(len(party_data[p]) for p in RIGHT_PARTIES)
lines = [
"# Right-Wing Party Differentiation",
"",
f"**Goal:** Break down right-wing motion metrics by party (PVV, FVD, JA21, SGP)",
f"to identify which party drives the moderation effect.",
"",
f"**Analysis period:** {YEAR_MIN}{YEAR_MAX}",
f"**Right-wing parties:** {', '.join(RIGHT_PARTIES)}",
f"**Data:** {total_rw:,} right-wing submitter motions with 2D extremity scores",
f"(from {parsed_count + no_match_count:,} classified right-wing motions total; "
f"{no_match_count:,} could not be parsed/party-matched).",
"",
"---",
"",
"## 1. Motion Volume by Party and Year",
"",
"| Year | " + " | ".join(RIGHT_PARTIES) + " | Total RW |",
"|------|" + "|".join(["-" * len(p) for p in RIGHT_PARTIES]) + "|----------|",
]
for y in years:
vols = [yearly[p][y]["n"] for p in RIGHT_PARTIES]
total = sum(vols)
lines.append(f"| {y} | {vols[0]} | {vols[1]} | {vols[2]} | {vols[3]} | {total} |")
lines += [
"",
"---",
"",
"## 2. Centrist Support (Strict) by Party and Year",
"",
"| Year | " + " | ".join(RIGHT_PARTIES) + " |",
"|------|" + "|".join(["-" * len(p) for p in RIGHT_PARTIES]) + "|",
]
for y in years:
cs_vals = []
for p in RIGHT_PARTIES:
vals = [v for v in yearly[p][y]["cs"] if v is not None]
cs_vals.append(np.mean(vals) if vals else float("nan"))
cs_strs = [f"{v:.3f}" if not np.isnan(v) else "N/A" for v in cs_vals]
lines.append(f"| {y} | {cs_strs[0]} | {cs_strs[1]} | {cs_strs[2]} | {cs_strs[3]} |")
lines += [
"",
"---",
"",
"## 3. Material Impact by Party and Year",
"",
"| Year | " + " | ".join(RIGHT_PARTIES) + " |",
"|------|" + "|".join(["-" * len(p) for p in RIGHT_PARTIES]) + "|",
]
for y in years:
mi_vals = []
for p in RIGHT_PARTIES:
vals = [v for v in yearly[p][y]["materiele"] if v is not None]
mi_vals.append(np.mean(vals) if vals else float("nan"))
mi_strs = [f"{v:.2f}" if not np.isnan(v) else "N/A" for v in mi_vals]
lines.append(f"| {y} | {mi_strs[0]} | {mi_strs[1]} | {mi_strs[2]} | {mi_strs[3]} |")
lines += [
"",
"---",
"",
"## 4. Pre/Post-2024 Comparison by Party",
"",
"| Party | N Pre | N Post | CS Pre | CS Post | Delta CS | Mat. Pre | Mat. Post | Delta Mat. | Vol. Delta |",
"|-------|-------|--------|--------|---------|----------|----------|-----------|------------|------------|",
]
for party in RIGHT_PARTIES:
c = comparison[party]
lines.append(
f"| {party} | {c['n_pre']} | {c['n_post']} | "
f"{c['mean_cs_pre']:.3f} | {c['mean_cs_post']:.3f} | "
f"{c['delta_cs']:+.3f} | {c['mean_mat_pre']:.2f} | "
f"{c['mean_mat_post']:.2f} | {c['delta_mat']:+.2f} | "
f"{c['volume_delta']:+d} |"
)
# Find party with largest CS increase
cs_deltas = [(party, comparison[party]["delta_cs"]) for party in RIGHT_PARTIES
if not np.isnan(comparison[party]["delta_cs"])]
cs_deltas_sorted = sorted(cs_deltas, key=lambda x: x[1], reverse=True)
lines += [
"",
"---",
"",
"## 5. Key Findings",
"",
]
if cs_deltas_sorted:
lines.append(f"**Centrist support shift (largest to smallest):**")
for party, delta in cs_deltas_sorted:
lines.append(f"- **{party}**: {delta:+.3f}")
lines += [
"",
"### Volume",
]
for party in RIGHT_PARTIES:
c = comparison[party]
lines.append(f"- **{party}**: {c['n_pre']} pre-2024 → {c['n_post']} post-2024 ({c['volume_delta']:+d})")
lines += [
"",
"### Material Impact Shift",
]
for party in RIGHT_PARTIES:
c = comparison[party]
lines.append(f"- **{party}**: {c['mean_mat_pre']:.2f}{c['mean_mat_post']:.2f} ({c['delta_mat']:+.2f})")
lines += [
"",
"---",
"",
"## 6. Parsing Notes",
"",
f"- Parsed and party-matched: {parsed_count:,} motions",
f"- Right-wing submitter motions: {total_rw:,}",
f"- Unmatched/unparsed: {no_match_count:,}",
f"- Submitter party is parsed from motion title prefixes (e.g. 'Motie van het lid Wilders ...').",
f"- Multi-submitter motions use the first listed submitter.",
f"- Party names are normalized via `_PARTY_NORMALIZE` (e.g. Groep Markuszower → PVV).",
"",
"---",
"",
"## 7. Figure",
"",
f"![Party differentiation figure]({Path(figure_path).name})",
"",
]
report_path = REPORTS_DIR / "party_differentiation.md"
with open(report_path, "w") as f:
f.write("\n".join(lines))
logger.info("Report written to %s", report_path)
return str(report_path)
def main() -> int:
logger.info("Connecting to database: %s", DB_PATH)
con = _conn(read_only=True)
logger.info("Computing per-party metrics...")
party_data, unparsed, no_match = compute_per_party_metrics(con)
con.close()
total_rw = sum(len(party_data[p]) for p in RIGHT_PARTIES)
logger.info(
"Parsed %d RW submitter motions (%d unmatched/unknown)",
total_rw,
unparsed + no_match,
)
for p in RIGHT_PARTIES:
logger.info(" %s: %d motions", p, len(party_data[p]))
logger.info("Computing yearly aggregates...")
yearly = yearly_aggregates(party_data)
logger.info("Computing pre/post-2024 comparisons...")
comparison = pre_post_comparison(party_data)
logger.info("Generating figure...")
fig_path = create_figure(yearly, comparison)
logger.info("Generating report...")
report_path = generate_report(
yearly, comparison, party_data,
total_rw, unparsed + no_match, fig_path,
)
print(f"\nReport: {report_path}")
print(f"Figure: {fig_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main())