You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

187 lines
6.3 KiB

"""Shared constants and helpers for right-wing motion analysis.
Extracted from 6+ files to eliminate code duplication. All Overton analysis
scripts should import from here instead of defining their own copies.
"""
from __future__ import annotations
import re
import math
from pathlib import Path
import duckdb
import numpy as np
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
ROOT = Path(__file__).resolve().parents[2]
DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
# ---------------------------------------------------------------------------
# Party sets
# ---------------------------------------------------------------------------
CANONICAL_LEFT = frozenset({"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK", "PvdD", "Volt"})
CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"})
CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"})
CANONICAL_CENTRIST_STRICT = frozenset({"D66", "CDA", "NSC", "CU"})
CANONICAL_LEFT_SET = set(CANONICAL_LEFT)
CANONICAL_RIGHT_SET = set(CANONICAL_RIGHT)
# ---------------------------------------------------------------------------
# Time periods
# ---------------------------------------------------------------------------
YEAR_MIN, YEAR_MAX = 2016, 2026
BREAK_YEAR = 2024
SCHOOF_START_DATE = "2024-07-01"
# ---------------------------------------------------------------------------
# Coalition composition
# ---------------------------------------------------------------------------
RUTTE_IV_COALITION: set[str] = {"VVD", "D66", "CDA", "CU"}
SCHOOF_COALITION: set[str] = {"PVV", "VVD", "NSC", "BBB"}
COALITION: dict[int, set[str]] = {
2016: {"VVD", "PvdA"},
2017: {"VVD", "PvdA"},
2018: {"VVD", "CDA", "D66", "CU"},
2019: {"VVD", "CDA", "D66", "CU"},
2020: {"VVD", "CDA", "D66", "CU"},
2021: {"VVD", "CDA", "D66", "CU"},
2022: {"VVD", "D66", "CDA", "CU"},
2023: {"VVD", "D66", "CDA", "CU"},
2024: SCHOOF_COALITION,
2025: SCHOOF_COALITION,
2026: SCHOOF_COALITION,
}
COALITION_NOTE = (
"2016-2017: Rutte II (VVD/PvdA). "
"2018-2021: Rutte III (VVD/CDA/D66/CU). "
"2022-2023: Rutte IV (VVD/D66/CDA/CU). "
"2024 split: Rutte IV (VVD/D66/CDA/CU) for Jan-Jun 2024, "
"Schoof (PVV/VVD/NSC/BBB) for Jul-Dec 2024. "
"2025-2026: Schoof (PVV/VVD/NSC/BBB). "
"Period detection uses motion date, not just year."
)
# ---------------------------------------------------------------------------
# Database helpers
# ---------------------------------------------------------------------------
def _conn(db_path: str | None = None, read_only: bool = True) -> duckdb.DuckDBPyConnection:
"""Open a DuckDB connection to the motions database."""
return duckdb.connect(db_path or DB_PATH, read_only=read_only)
# ---------------------------------------------------------------------------
# Statistical helpers
# ---------------------------------------------------------------------------
def cohens_d(x: np.ndarray, y: np.ndarray) -> float:
"""Cohen's d effect size (positive when y > x)."""
pooled = np.sqrt((np.var(x, ddof=1) + np.var(y, ddof=1)) / 2)
if pooled == 0:
return 0.0
return (np.mean(y) - np.mean(x)) / pooled
# ---------------------------------------------------------------------------
# Motion metadata helpers
# ---------------------------------------------------------------------------
def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]:
"""Build mapping: last name -> party from mp_metadata."""
rows = con.execute("""
SELECT mp_name, party, van, tot_en_met
FROM mp_metadata
WHERE party IS NOT NULL
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST
""").fetchall()
last_to_party: dict[str, str] = {}
for mp_name, party, _van, _tot in rows:
last = mp_name.split(",")[0].strip()
if last not in last_to_party:
last_to_party[last] = party
return last_to_party
def parse_lead_submitter(
title: str, name_party_map: dict[str, str]
) -> tuple[str | None, str | None]:
"""Parse the lead submitter from a motion title and map to party.
Returns (parsed_name, party) or (None, None).
"""
if not title:
return None, None
patterns = [
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b",
r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b",
]
for pat in patterns:
m = re.search(pat, title)
if m:
submitter_str = m.group(1).strip()
parts = submitter_str.split(" en ")
first_name = parts[0].strip()
first_name = re.sub(r"\s+c\.s\.", "", first_name).strip()
if not first_name:
continue
party = name_party_map.get(first_name)
return first_name, party
return None, None
def motion_passed(voting_results: dict | None) -> bool:
"""Check if a motion passed based on voting_results JSON."""
if not voting_results:
return False
if isinstance(voting_results, str):
try:
import json
voting_results = json.loads(voting_results)
except (ValueError, TypeError):
return False
return voting_results.get("result") == "aangenomen"
# ---------------------------------------------------------------------------
# Temporal helpers
# ---------------------------------------------------------------------------
def quarter_sort_key(q: str) -> tuple[int, int]:
"""Sort key for quarter strings like '2024-Q1'."""
year = int(q[:4])
quarter = int(q[-1])
return (year, quarter)
def find_inflection_point(
quarters: list[str], values: list[float], threshold: float = 0.4
) -> str | None:
"""Find the first quarter where the smoothed value exceeds the threshold."""
if len(quarters) < 3:
return None
for i in range(1, len(quarters) - 1):
avg = (values[i - 1] + values[i] + values[i + 1]) / 3
if avg > threshold:
return quarters[i]
return None