"""Shared constants and helpers for right-wing motion analysis. Extracted from 6+ files to eliminate code duplication. All Overton analysis scripts should import from here instead of defining their own copies. """ from __future__ import annotations import re import math from pathlib import Path import duckdb import numpy as np # --------------------------------------------------------------------------- # Paths # --------------------------------------------------------------------------- ROOT = Path(__file__).resolve().parents[2] DB_PATH = str(ROOT / "data" / "motions.db") REPORTS_DIR = ROOT / "reports" / "overton_window" # --------------------------------------------------------------------------- # Party sets # --------------------------------------------------------------------------- CANONICAL_LEFT = frozenset({"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK", "PvdD", "Volt"}) CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"}) CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"}) CANONICAL_CENTRIST_STRICT = frozenset({"D66", "CDA", "NSC", "CU"}) CANONICAL_LEFT_SET = set(CANONICAL_LEFT) CANONICAL_RIGHT_SET = set(CANONICAL_RIGHT) # --------------------------------------------------------------------------- # Time periods # --------------------------------------------------------------------------- YEAR_MIN, YEAR_MAX = 2016, 2026 BREAK_YEAR = 2024 SCHOOF_START_DATE = "2024-07-01" # --------------------------------------------------------------------------- # Coalition composition # --------------------------------------------------------------------------- RUTTE_IV_COALITION: set[str] = {"VVD", "D66", "CDA", "CU"} SCHOOF_COALITION: set[str] = {"PVV", "VVD", "NSC", "BBB"} COALITION: dict[int, set[str]] = { 2016: {"VVD", "PvdA"}, 2017: {"VVD", "PvdA"}, 2018: {"VVD", "CDA", "D66", "CU"}, 2019: {"VVD", "CDA", "D66", "CU"}, 2020: {"VVD", "CDA", "D66", "CU"}, 2021: {"VVD", "CDA", "D66", "CU"}, 2022: {"VVD", "D66", "CDA", "CU"}, 2023: {"VVD", "D66", "CDA", "CU"}, 2024: SCHOOF_COALITION, 2025: SCHOOF_COALITION, 2026: SCHOOF_COALITION, } COALITION_NOTE = ( "2016-2017: Rutte II (VVD/PvdA). " "2018-2021: Rutte III (VVD/CDA/D66/CU). " "2022-2023: Rutte IV (VVD/D66/CDA/CU). " "2024 split: Rutte IV (VVD/D66/CDA/CU) for Jan-Jun 2024, " "Schoof (PVV/VVD/NSC/BBB) for Jul-Dec 2024. " "2025-2026: Schoof (PVV/VVD/NSC/BBB). " "Period detection uses motion date, not just year." ) # --------------------------------------------------------------------------- # Database helpers # --------------------------------------------------------------------------- def _conn(db_path: str | None = None, read_only: bool = True) -> duckdb.DuckDBPyConnection: """Open a DuckDB connection to the motions database.""" return duckdb.connect(db_path or DB_PATH, read_only=read_only) # --------------------------------------------------------------------------- # Statistical helpers # --------------------------------------------------------------------------- def cohens_d(x: np.ndarray, y: np.ndarray) -> float: """Cohen's d effect size (positive when y > x).""" pooled = np.sqrt((np.var(x, ddof=1) + np.var(y, ddof=1)) / 2) if pooled == 0: return 0.0 return (np.mean(y) - np.mean(x)) / pooled # --------------------------------------------------------------------------- # Motion metadata helpers # --------------------------------------------------------------------------- def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]: """Build mapping: last name -> party from mp_metadata.""" rows = con.execute(""" SELECT mp_name, party, van, tot_en_met FROM mp_metadata WHERE party IS NOT NULL ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST """).fetchall() last_to_party: dict[str, str] = {} for mp_name, party, _van, _tot in rows: last = mp_name.split(",")[0].strip() if last not in last_to_party: last_to_party[last] = party return last_to_party def parse_lead_submitter( title: str, name_party_map: dict[str, str] ) -> tuple[str | None, str | None]: """Parse the lead submitter from a motion title and map to party. Returns (parsed_name, party) or (None, None). """ if not title: return None, None patterns = [ r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b", r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b", r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b", r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b", ] for pat in patterns: m = re.search(pat, title) if m: submitter_str = m.group(1).strip() parts = submitter_str.split(" en ") first_name = parts[0].strip() first_name = re.sub(r"\s+c\.s\.", "", first_name).strip() if not first_name: continue party = name_party_map.get(first_name) return first_name, party return None, None def motion_passed(voting_results: dict | None) -> bool: """Check if a motion passed based on voting_results JSON.""" if not voting_results: return False if isinstance(voting_results, str): try: import json voting_results = json.loads(voting_results) except (ValueError, TypeError): return False return voting_results.get("result") == "aangenomen" # --------------------------------------------------------------------------- # Temporal helpers # --------------------------------------------------------------------------- def quarter_sort_key(q: str) -> tuple[int, int]: """Sort key for quarter strings like '2024-Q1'.""" year = int(q[:4]) quarter = int(q[-1]) return (year, quarter) def find_inflection_point( quarters: list[str], values: list[float], threshold: float = 0.4 ) -> str | None: """Find the first quarter where the smoothed value exceeds the threshold.""" if len(quarters) < 3: return None for i in range(1, len(quarters) - 1): avg = (values[i - 1] + values[i] + values[i + 1]) / 3 if avg > threshold: return quarters[i] return None