You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
187 lines
6.3 KiB
187 lines
6.3 KiB
"""Shared constants and helpers for right-wing motion analysis.
|
|
|
|
Extracted from 6+ files to eliminate code duplication. All Overton analysis
|
|
scripts should import from here instead of defining their own copies.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import math
|
|
from pathlib import Path
|
|
|
|
import duckdb
|
|
import numpy as np
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Paths
|
|
# ---------------------------------------------------------------------------
|
|
|
|
ROOT = Path(__file__).resolve().parents[2]
|
|
DB_PATH = str(ROOT / "data" / "motions.db")
|
|
REPORTS_DIR = ROOT / "reports" / "overton_window"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Party sets
|
|
# ---------------------------------------------------------------------------
|
|
|
|
CANONICAL_LEFT = frozenset({"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK", "PvdD", "Volt"})
|
|
CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"})
|
|
CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"})
|
|
CANONICAL_CENTRIST_STRICT = frozenset({"D66", "CDA", "NSC", "CU"})
|
|
|
|
CANONICAL_LEFT_SET = set(CANONICAL_LEFT)
|
|
CANONICAL_RIGHT_SET = set(CANONICAL_RIGHT)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Time periods
|
|
# ---------------------------------------------------------------------------
|
|
|
|
YEAR_MIN, YEAR_MAX = 2016, 2026
|
|
BREAK_YEAR = 2024
|
|
SCHOOF_START_DATE = "2024-07-01"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Coalition composition
|
|
# ---------------------------------------------------------------------------
|
|
|
|
RUTTE_IV_COALITION: set[str] = {"VVD", "D66", "CDA", "CU"}
|
|
SCHOOF_COALITION: set[str] = {"PVV", "VVD", "NSC", "BBB"}
|
|
|
|
COALITION: dict[int, set[str]] = {
|
|
2016: {"VVD", "PvdA"},
|
|
2017: {"VVD", "PvdA"},
|
|
2018: {"VVD", "CDA", "D66", "CU"},
|
|
2019: {"VVD", "CDA", "D66", "CU"},
|
|
2020: {"VVD", "CDA", "D66", "CU"},
|
|
2021: {"VVD", "CDA", "D66", "CU"},
|
|
2022: {"VVD", "D66", "CDA", "CU"},
|
|
2023: {"VVD", "D66", "CDA", "CU"},
|
|
2024: SCHOOF_COALITION,
|
|
2025: SCHOOF_COALITION,
|
|
2026: SCHOOF_COALITION,
|
|
}
|
|
|
|
COALITION_NOTE = (
|
|
"2016-2017: Rutte II (VVD/PvdA). "
|
|
"2018-2021: Rutte III (VVD/CDA/D66/CU). "
|
|
"2022-2023: Rutte IV (VVD/D66/CDA/CU). "
|
|
"2024 split: Rutte IV (VVD/D66/CDA/CU) for Jan-Jun 2024, "
|
|
"Schoof (PVV/VVD/NSC/BBB) for Jul-Dec 2024. "
|
|
"2025-2026: Schoof (PVV/VVD/NSC/BBB). "
|
|
"Period detection uses motion date, not just year."
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Database helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _conn(db_path: str | None = None, read_only: bool = True) -> duckdb.DuckDBPyConnection:
|
|
"""Open a DuckDB connection to the motions database."""
|
|
return duckdb.connect(db_path or DB_PATH, read_only=read_only)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Statistical helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def cohens_d(x: np.ndarray, y: np.ndarray) -> float:
|
|
"""Cohen's d effect size (positive when y > x)."""
|
|
pooled = np.sqrt((np.var(x, ddof=1) + np.var(y, ddof=1)) / 2)
|
|
if pooled == 0:
|
|
return 0.0
|
|
return (np.mean(y) - np.mean(x)) / pooled
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Motion metadata helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]:
|
|
"""Build mapping: last name -> party from mp_metadata."""
|
|
rows = con.execute("""
|
|
SELECT mp_name, party, van, tot_en_met
|
|
FROM mp_metadata
|
|
WHERE party IS NOT NULL
|
|
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST
|
|
""").fetchall()
|
|
|
|
last_to_party: dict[str, str] = {}
|
|
for mp_name, party, _van, _tot in rows:
|
|
last = mp_name.split(",")[0].strip()
|
|
if last not in last_to_party:
|
|
last_to_party[last] = party
|
|
return last_to_party
|
|
|
|
|
|
def parse_lead_submitter(
|
|
title: str, name_party_map: dict[str, str]
|
|
) -> tuple[str | None, str | None]:
|
|
"""Parse the lead submitter from a motion title and map to party.
|
|
|
|
Returns (parsed_name, party) or (None, None).
|
|
"""
|
|
if not title:
|
|
return None, None
|
|
|
|
patterns = [
|
|
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
|
|
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
|
|
r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b",
|
|
r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b",
|
|
]
|
|
|
|
for pat in patterns:
|
|
m = re.search(pat, title)
|
|
if m:
|
|
submitter_str = m.group(1).strip()
|
|
parts = submitter_str.split(" en ")
|
|
first_name = parts[0].strip()
|
|
first_name = re.sub(r"\s+c\.s\.", "", first_name).strip()
|
|
if not first_name:
|
|
continue
|
|
party = name_party_map.get(first_name)
|
|
return first_name, party
|
|
|
|
return None, None
|
|
|
|
|
|
def motion_passed(voting_results: dict | None) -> bool:
|
|
"""Check if a motion passed based on voting_results JSON."""
|
|
if not voting_results:
|
|
return False
|
|
if isinstance(voting_results, str):
|
|
try:
|
|
import json
|
|
voting_results = json.loads(voting_results)
|
|
except (ValueError, TypeError):
|
|
return False
|
|
return voting_results.get("result") == "aangenomen"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Temporal helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def quarter_sort_key(q: str) -> tuple[int, int]:
|
|
"""Sort key for quarter strings like '2024-Q1'."""
|
|
year = int(q[:4])
|
|
quarter = int(q[-1])
|
|
return (year, quarter)
|
|
|
|
|
|
def find_inflection_point(
|
|
quarters: list[str], values: list[float], threshold: float = 0.4
|
|
) -> str | None:
|
|
"""Find the first quarter where the smoothed value exceeds the threshold."""
|
|
if len(quarters) < 3:
|
|
return None
|
|
for i in range(1, len(quarters) - 1):
|
|
avg = (values[i - 1] + values[i] + values[i + 1]) / 3
|
|
if avg > threshold:
|
|
return quarters[i]
|
|
return None
|
|
|