diff --git a/analysis/right_wing/causal_timing.py b/analysis/right_wing/causal_timing.py index 77fd051..cb23732 100644 --- a/analysis/right_wing/causal_timing.py +++ b/analysis/right_wing/causal_timing.py @@ -30,27 +30,12 @@ import numpy as np ROOT = Path(__file__).parent.parent.parent.resolve() sys.path.insert(0, str(ROOT)) -DB_PATH = str(ROOT / "data" / "motions.db") -REPORTS_DIR = ROOT / "reports" / "overton_window" +from analysis.right_wing.common import ( + CANONICAL_CENTRIST, COALITION, DB_PATH, REPORTS_DIR, + build_party_name_map, parse_lead_submitter, quarter_sort_key, +) REPORTS_DIR.mkdir(parents=True, exist_ok=True) -CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"}) -CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"}) - -COALITION: dict[int, set[str]] = { - 2016: {"VVD", "PvdA"}, - 2017: {"VVD", "PvdA"}, - 2018: {"VVD", "CDA", "D66", "CU"}, - 2019: {"VVD", "CDA", "D66", "CU"}, - 2020: {"VVD", "CDA", "D66", "CU"}, - 2021: {"VVD", "CDA", "D66", "CU"}, - 2022: {"VVD", "D66", "CDA", "CU"}, - 2023: {"VVD", "D66", "CDA", "CU"}, - 2024: {"PVV", "VVD", "NSC", "BBB"}, - 2025: {"PVV", "VVD", "NSC", "BBB"}, - 2026: {"PVV", "VVD", "NSC", "BBB"}, -} - POLITICAL_EVENTS: list[dict[str, Any]] = [ {"quarter": "2021-Q1", "label": "Rutte IV\nelection", "date": "Mar 2021", "category": "dutch"}, @@ -70,50 +55,6 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(mess logger = logging.getLogger(__name__) -def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]: - rows = con.execute(""" - SELECT mp_name, party, van, tot_en_met - FROM mp_metadata - WHERE party IS NOT NULL - ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST - """).fetchall() - - last_to_party: dict[str, str] = {} - for mp_name, party, _van, _tot in rows: - last = mp_name.split(",")[0].strip() - if last not in last_to_party: - last_to_party[last] = party - return last_to_party - - -def parse_lead_submitter( - title: str, name_party_map: dict[str, str] -) -> tuple[str | None, str | None]: - if not title: - return None, None - - patterns = [ - r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b", - r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b", - r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b", - r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b", - ] - - for pat in patterns: - m = re.search(pat, title) - if m: - submitter_str = m.group(1).strip() - parts = submitter_str.split(" en ") - first_name = parts[0].strip() - first_name = re.sub(r"\s+c\.s\.", "", first_name).strip() - if not first_name: - continue - party = name_party_map.get(first_name) - return first_name, party - - return None, None - - def fetch_rw_motions(con: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]: rows = con.execute(""" SELECT @@ -146,11 +87,6 @@ def fetch_rw_motions(con: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]: return result -def quarter_sort_key(quarter_str: str) -> tuple[int, int]: - parts = quarter_str.split("-Q") - return (int(parts[0]), int(parts[1])) - - def aggregate_quarterly(data: list[dict]) -> dict[str, dict]: quarterly: dict[str, dict[str, list]] = defaultdict( lambda: {"all_cs": []} diff --git a/analysis/right_wing/classify_motions.py b/analysis/right_wing/classify_motions.py index fe54bca..800ca68 100644 --- a/analysis/right_wing/classify_motions.py +++ b/analysis/right_wing/classify_motions.py @@ -17,18 +17,17 @@ from typing import Any import duckdb -ROOT = Path(__file__).parent.parent.parent.resolve() +from analysis.right_wing.common import ROOT + if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT +from analysis.right_wing.common import CANONICAL_CENTRIST logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) -# Centrist parties for cross-ideological metrics -CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"}) - def _load_keywords(keywords_path: str) -> tuple[list[str], list[str]]: """Load right-wing and left-wing keywords from JSON.""" diff --git a/analysis/right_wing/common.py b/analysis/right_wing/common.py new file mode 100644 index 0000000..d73c7c4 --- /dev/null +++ b/analysis/right_wing/common.py @@ -0,0 +1,187 @@ +"""Shared constants and helpers for right-wing motion analysis. + +Extracted from 6+ files to eliminate code duplication. All Overton analysis +scripts should import from here instead of defining their own copies. +""" + +from __future__ import annotations + +import re +import math +from pathlib import Path + +import duckdb +import numpy as np + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +ROOT = Path(__file__).resolve().parents[2] +DB_PATH = str(ROOT / "data" / "motions.db") +REPORTS_DIR = ROOT / "reports" / "overton_window" + +# --------------------------------------------------------------------------- +# Party sets +# --------------------------------------------------------------------------- + +CANONICAL_LEFT = frozenset({"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK", "PvdD", "Volt"}) +CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"}) +CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"}) +CANONICAL_CENTRIST_STRICT = frozenset({"D66", "CDA", "NSC", "CU"}) + +CANONICAL_LEFT_SET = set(CANONICAL_LEFT) +CANONICAL_RIGHT_SET = set(CANONICAL_RIGHT) + +# --------------------------------------------------------------------------- +# Time periods +# --------------------------------------------------------------------------- + +YEAR_MIN, YEAR_MAX = 2016, 2026 +BREAK_YEAR = 2024 +SCHOOF_START_DATE = "2024-07-01" + +# --------------------------------------------------------------------------- +# Coalition composition +# --------------------------------------------------------------------------- + +RUTTE_IV_COALITION: set[str] = {"VVD", "D66", "CDA", "CU"} +SCHOOF_COALITION: set[str] = {"PVV", "VVD", "NSC", "BBB"} + +COALITION: dict[int, set[str]] = { + 2016: {"VVD", "PvdA"}, + 2017: {"VVD", "PvdA"}, + 2018: {"VVD", "CDA", "D66", "CU"}, + 2019: {"VVD", "CDA", "D66", "CU"}, + 2020: {"VVD", "CDA", "D66", "CU"}, + 2021: {"VVD", "CDA", "D66", "CU"}, + 2022: {"VVD", "D66", "CDA", "CU"}, + 2023: {"VVD", "D66", "CDA", "CU"}, + 2024: SCHOOF_COALITION, + 2025: SCHOOF_COALITION, + 2026: SCHOOF_COALITION, +} + +COALITION_NOTE = ( + "2016-2017: Rutte II (VVD/PvdA). " + "2018-2021: Rutte III (VVD/CDA/D66/CU). " + "2022-2023: Rutte IV (VVD/D66/CDA/CU). " + "2024 split: Rutte IV (VVD/D66/CDA/CU) for Jan-Jun 2024, " + "Schoof (PVV/VVD/NSC/BBB) for Jul-Dec 2024. " + "2025-2026: Schoof (PVV/VVD/NSC/BBB). " + "Period detection uses motion date, not just year." +) + +# --------------------------------------------------------------------------- +# Database helpers +# --------------------------------------------------------------------------- + + +def _conn(db_path: str | None = None, read_only: bool = True) -> duckdb.DuckDBPyConnection: + """Open a DuckDB connection to the motions database.""" + return duckdb.connect(db_path or DB_PATH, read_only=read_only) + + +# --------------------------------------------------------------------------- +# Statistical helpers +# --------------------------------------------------------------------------- + + +def cohens_d(x: np.ndarray, y: np.ndarray) -> float: + """Cohen's d effect size (positive when y > x).""" + pooled = np.sqrt((np.var(x, ddof=1) + np.var(y, ddof=1)) / 2) + if pooled == 0: + return 0.0 + return (np.mean(y) - np.mean(x)) / pooled + + +# --------------------------------------------------------------------------- +# Motion metadata helpers +# --------------------------------------------------------------------------- + + +def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]: + """Build mapping: last name -> party from mp_metadata.""" + rows = con.execute(""" + SELECT mp_name, party, van, tot_en_met + FROM mp_metadata + WHERE party IS NOT NULL + ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST + """).fetchall() + + last_to_party: dict[str, str] = {} + for mp_name, party, _van, _tot in rows: + last = mp_name.split(",")[0].strip() + if last not in last_to_party: + last_to_party[last] = party + return last_to_party + + +def parse_lead_submitter( + title: str, name_party_map: dict[str, str] +) -> tuple[str | None, str | None]: + """Parse the lead submitter from a motion title and map to party. + + Returns (parsed_name, party) or (None, None). + """ + if not title: + return None, None + + patterns = [ + r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b", + r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b", + r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b", + r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b", + ] + + for pat in patterns: + m = re.search(pat, title) + if m: + submitter_str = m.group(1).strip() + parts = submitter_str.split(" en ") + first_name = parts[0].strip() + first_name = re.sub(r"\s+c\.s\.", "", first_name).strip() + if not first_name: + continue + party = name_party_map.get(first_name) + return first_name, party + + return None, None + + +def motion_passed(voting_results: dict | None) -> bool: + """Check if a motion passed based on voting_results JSON.""" + if not voting_results: + return False + if isinstance(voting_results, str): + try: + import json + voting_results = json.loads(voting_results) + except (ValueError, TypeError): + return False + return voting_results.get("result") == "aangenomen" + + +# --------------------------------------------------------------------------- +# Temporal helpers +# --------------------------------------------------------------------------- + + +def quarter_sort_key(q: str) -> tuple[int, int]: + """Sort key for quarter strings like '2024-Q1'.""" + year = int(q[:4]) + quarter = int(q[-1]) + return (year, quarter) + + +def find_inflection_point( + quarters: list[str], values: list[float], threshold: float = 0.4 +) -> str | None: + """Find the first quarter where the smoothed value exceeds the threshold.""" + if len(quarters) < 3: + return None + for i in range(1, len(quarters) - 1): + avg = (values[i - 1] + values[i] + values[i + 1]) / 3 + if avg > threshold: + return quarters[i] + return None diff --git a/analysis/right_wing/direction3_migration_antidemocratic.py b/analysis/right_wing/direction3_migration_antidemocratic.py index 1bd8007..f895296 100644 --- a/analysis/right_wing/direction3_migration_antidemocratic.py +++ b/analysis/right_wing/direction3_migration_antidemocratic.py @@ -13,19 +13,14 @@ from pathlib import Path import duckdb -ROOT = Path(__file__).parent.parent.parent.resolve() +from analysis.right_wing.common import ROOT, _conn + if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) -DB_PATH = ROOT / "data" / "motions.db" - - -def _conn(): - return duckdb.connect(str(DB_PATH), read_only=True) - def print_section(title: str) -> None: print(f"\n{'=' * 70}") diff --git a/analysis/right_wing/left_wing_response.py b/analysis/right_wing/left_wing_response.py index 6358ead..2edb826 100644 --- a/analysis/right_wing/left_wing_response.py +++ b/analysis/right_wing/left_wing_response.py @@ -22,6 +22,11 @@ ROOT = Path(__file__).parent.parent.parent.resolve() if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) +from analysis.right_wing.common import ( + CANONICAL_CENTRIST_STRICT, BREAK_YEAR, YEAR_MIN, YEAR_MAX, + DB_PATH, REPORTS_DIR, _conn, cohens_d, +) + import duckdb import matplotlib @@ -34,15 +39,8 @@ from analysis.config import CANONICAL_LEFT, PARTY_COLOURS, _PARTY_NORMALIZE logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) -DB_PATH = str(ROOT / "data" / "motions.db") -REPORTS_DIR = ROOT / "reports" / "overton_window" REPORTS_DIR.mkdir(parents=True, exist_ok=True) -BREAK_YEAR = 2024 -YEAR_MIN, YEAR_MAX = 2016, 2026 - -CANONICAL_CENTRIST_STRICT = frozenset({"D66", "CDA", "CU", "NSC"}) - LEFT_PARTY_DISPLAY_ORDER = [ "SP", "GroenLinks-PvdA", @@ -52,17 +50,6 @@ LEFT_PARTY_DISPLAY_ORDER = [ ] -def _conn(read_only: bool = True) -> duckdb.DuckDBPyConnection: - return duckdb.connect(DB_PATH, read_only=read_only) - - -def cohens_d(x: np.ndarray, y: np.ndarray) -> float: - pooled = np.sqrt((np.var(x, ddof=1) + np.var(y, ddof=1)) / 2) - if pooled == 0: - return 0.0 - return (np.mean(y) - np.mean(x)) / pooled - - def query_yearly_support() -> dict[int, dict]: """Query yearly averages of left_support_mp and centrist_support_strict.""" con = _conn() diff --git a/analysis/right_wing/migrate_mp_level_metrics.py b/analysis/right_wing/migrate_mp_level_metrics.py index 9d93722..3004a40 100644 --- a/analysis/right_wing/migrate_mp_level_metrics.py +++ b/analysis/right_wing/migrate_mp_level_metrics.py @@ -9,16 +9,16 @@ from __future__ import annotations import sys from pathlib import Path -ROOT = Path(__file__).parent.parent.parent.resolve() +from analysis.right_wing.common import ROOT + if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) import duckdb from analysis.config import CANONICAL_LEFT +from analysis.right_wing.common import CANONICAL_CENTRIST, CANONICAL_CENTRIST_STRICT -CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"}) -CANONICAL_CENTRIST_STRICT = frozenset({"D66", "CDA", "CU", "NSC"}) CANONICAL_CENTER_RIGHT = frozenset({"VVD", "BBB"}) COLUMNS = [ diff --git a/analysis/right_wing/overton_breakpoint_analysis.py b/analysis/right_wing/overton_breakpoint_analysis.py index f980e7b..a1d1654 100644 --- a/analysis/right_wing/overton_breakpoint_analysis.py +++ b/analysis/right_wing/overton_breakpoint_analysis.py @@ -36,13 +36,16 @@ matplotlib.use("Agg") import matplotlib.pyplot as plt from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT, PARTY_COLOURS -CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"}) +from analysis.right_wing.common import ( + CANONICAL_CENTRIST, COALITION, COALITION_NOTE, RUTTE_IV_COALITION, + SCHOOF_COALITION, SCHOOF_START_DATE, BREAK_YEAR, YEAR_MIN, YEAR_MAX, + DB_PATH, REPORTS_DIR, _conn, cohens_d, build_party_name_map, + parse_lead_submitter, +) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) -DB_PATH = str(ROOT / "data" / "motions.db") -REPORTS_DIR = ROOT / "reports" / "overton_window" REPORTS_DIR.mkdir(parents=True, exist_ok=True) CANONICAL_CENTRIST_SET = set(CANONICAL_CENTRIST) @@ -62,49 +65,8 @@ def _extremity_bucket(score: float) -> str: CANONICAL_LEFT_SET = set(CANONICAL_LEFT) CANONICAL_RIGHT_SET = set(CANONICAL_RIGHT) -RUTTE_IV_COALITION: set[str] = {"VVD", "D66", "CDA", "CU"} -SCHOOF_COALITION: set[str] = {"PVV", "VVD", "NSC", "BBB"} - -COALITION: dict[int, set[str]] = { - 2016: {"VVD", "PvdA"}, - 2017: {"VVD", "PvdA"}, - 2018: {"VVD", "CDA", "D66", "CU"}, - 2019: {"VVD", "CDA", "D66", "CU"}, - 2020: {"VVD", "CDA", "D66", "CU"}, - 2021: {"VVD", "CDA", "D66", "CU"}, - 2022: {"VVD", "D66", "CDA", "CU"}, - 2023: {"VVD", "D66", "CDA", "CU"}, - 2024: SCHOOF_COALITION, - 2025: SCHOOF_COALITION, - 2026: SCHOOF_COALITION, -} - -SCHOOF_START_DATE = "2024-07-01" - -COALITION_NOTE = ( - "2016-2017: Rutte II (VVD/PvdA). " - "2018-2021: Rutte III (VVD/CDA/D66/CU). " - "2022-2023: Rutte IV (VVD/D66/CDA/CU). " - "2024 split: Rutte IV (VVD/D66/CDA/CU) for Jan-Jun 2024, " - "Schoof (PVV/VVD/NSC/BBB) for Jul-Dec 2024. " - "2025-2026: Schoof (PVV/VVD/NSC/BBB). " - "Period detection uses motion date, not just year." -) - -YEAR_MIN, YEAR_MAX = 2016, 2026 -BREAK_YEAR = 2024 - - -def _conn(read_only: bool = True) -> duckdb.DuckDBPyConnection: - return duckdb.connect(DB_PATH, read_only=read_only) -def cohens_d(x: np.ndarray, y: np.ndarray) -> float: - """Cohen's d effect size.""" - pooled = np.sqrt((np.var(x, ddof=1) + np.var(y, ddof=1)) / 2) - if pooled == 0: - return 0.0 - return (np.mean(y) - np.mean(x)) / pooled def compute_yearly_rw_metrics(con: duckdb.DuckDBPyConnection) -> dict[int, dict]: @@ -246,55 +208,6 @@ def _support_ratio( return supportive / total -def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]: - """Build mapping: last name -> party from mp_metadata.""" - rows = con.execute(""" - SELECT mp_name, party, van, tot_en_met - FROM mp_metadata - WHERE party IS NOT NULL - ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST - """).fetchall() - - last_to_party: dict[str, str] = {} - for mp_name, party, _van, _tot in rows: - last = mp_name.split(",")[0].strip() - if last not in last_to_party: - last_to_party[last] = party - return last_to_party - - -def parse_lead_submitter( - title: str, name_party_map: dict[str, str] -) -> tuple[str | None, str | None]: - """Parse the lead submitter from a motion title and map to party. - - Returns (parsed_name, party) or (None, None). - """ - if not title: - return None, None - - patterns = [ - r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b", - r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b", - r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b", - r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b", - ] - - for pat in patterns: - m = re.search(pat, title) - if m: - submitter_str = m.group(1).strip() - parts = submitter_str.split(" en ") - first_name = parts[0].strip() - first_name = re.sub(r"\s+c\.s\.", "", first_name).strip() - if not first_name: - continue - party = name_party_map.get(first_name) - return first_name, party - - return None, None - - def compute_opposition_metrics( yearly_raw: dict[int, dict], name_party_map: dict[str, str] ) -> dict[int, dict]: diff --git a/analysis/right_wing/overton_svd_drift.py b/analysis/right_wing/overton_svd_drift.py index ee97b05..fa2bab4 100644 --- a/analysis/right_wing/overton_svd_drift.py +++ b/analysis/right_wing/overton_svd_drift.py @@ -25,7 +25,8 @@ import numpy as np matplotlib.use("Agg") -ROOT = Path(__file__).parent.parent.parent.resolve() +from analysis.right_wing.common import ROOT, DB_PATH, REPORTS_DIR + if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) @@ -42,9 +43,6 @@ CANONICAL_CENTRIST = frozenset( {"VVD", "D66", "CDA", "NSC", "BBB", "CU", "ChristenUnie"} ) -DB_PATH = str(ROOT / "data" / "motions.db") -REPORTS_DIR = ROOT / "reports" / "overton_window" - def _normalize_party(raw: str) -> str: """Normalize a raw party name to its canonical abbreviation.""" diff --git a/analysis/right_wing/party_differentiation.py b/analysis/right_wing/party_differentiation.py index ab276be..1723054 100644 --- a/analysis/right_wing/party_differentiation.py +++ b/analysis/right_wing/party_differentiation.py @@ -28,18 +28,18 @@ ROOT = Path(__file__).parent.parent.parent.resolve() if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) +from analysis.right_wing.common import ( + BREAK_YEAR, YEAR_MIN, YEAR_MAX, DB_PATH, REPORTS_DIR, + _conn, build_party_name_map, +) from analysis.config import CANONICAL_RIGHT, PARTY_COLOURS, _PARTY_NORMALIZE logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) -DB_PATH = str(ROOT / "data" / "motions.db") -REPORTS_DIR = ROOT / "reports" / "overton_window" REPORTS_DIR.mkdir(parents=True, exist_ok=True) RIGHT_PARTIES = sorted(CANONICAL_RIGHT) -YEAR_MIN, YEAR_MAX = 2016, 2026 -BREAK_YEAR = 2024 TITLE_PATTERNS = [ r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b", @@ -49,26 +49,6 @@ TITLE_PATTERNS = [ ] -def _conn(read_only: bool = True) -> duckdb.DuckDBPyConnection: - return duckdb.connect(DB_PATH, read_only=read_only) - - -def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]: - rows = con.execute(""" - SELECT mp_name, party, van, tot_en_met - FROM mp_metadata - WHERE party IS NOT NULL - ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST - """).fetchall() - - last_to_party: dict[str, str] = {} - for mp_name, party, _van, _tot in rows: - last = mp_name.split(",")[0].strip() - if last not in last_to_party: - last_to_party[last] = party - return last_to_party - - def parse_submitter_party(title: str, name_party_map: dict[str, str]) -> str | None: if not title: return None diff --git a/analysis/right_wing/predictive_model.py b/analysis/right_wing/predictive_model.py index 3801993..2abeb55 100644 --- a/analysis/right_wing/predictive_model.py +++ b/analysis/right_wing/predictive_model.py @@ -45,31 +45,18 @@ PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) +from analysis.right_wing.common import ( + BREAK_YEAR, COALITION, DB_PATH, REPORTS_DIR, + build_party_name_map as build_name_party_map, parse_lead_submitter, +) + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) -DB_PATH = str(PROJECT_ROOT / "data" / "motions.db") -REPORTS_DIR = PROJECT_ROOT / "reports" / "overton_window" REPORTS_DIR.mkdir(parents=True, exist_ok=True) RANDOM_SEED = 42 -BREAK_YEAR = 2024 - -COALITION: dict[int, set[str]] = { - 2016: {"VVD", "PvdA"}, - 2017: {"VVD", "PvdA"}, - 2018: {"VVD", "CDA", "D66", "CU"}, - 2019: {"VVD", "CDA", "D66", "CU"}, - 2020: {"VVD", "CDA", "D66", "CU"}, - 2021: {"VVD", "CDA", "D66", "CU"}, - 2022: {"VVD", "D66", "CDA", "CU"}, - 2023: {"VVD", "D66", "CDA", "CU"}, - 2024: {"PVV", "VVD", "NSC", "BBB"}, - 2025: {"PVV", "VVD", "NSC", "BBB"}, - 2026: {"PVV", "VVD", "NSC", "BBB"}, -} - RIGHT_WING_PARTIES = {"PVV", "FVD", "JA21", "SGP"} CATEGORY_SHORT = { @@ -89,50 +76,6 @@ CATEGORY_SHORT = { } -def build_name_party_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]: - rows = con.execute(""" - SELECT mp_name, party, van, tot_en_met - FROM mp_metadata - WHERE party IS NOT NULL - ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST - """).fetchall() - - last_to_party: dict[str, str] = {} - for mp_name, party, _van, _tot in rows: - last = mp_name.split(",")[0].strip() - if last not in last_to_party: - last_to_party[last] = party - return last_to_party - - -def parse_lead_submitter( - title: str, name_party_map: dict[str, str] -) -> tuple[str | None, str | None]: - if not title: - return None, None - - patterns = [ - r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b", - r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b", - r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b", - r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b", - ] - - for pat in patterns: - m = re.search(pat, title) - if m: - submitter_str = m.group(1).strip() - parts = submitter_str.split(" en ") - first_name = parts[0].strip() - first_name = re.sub(r"\s+c\.s\.", "", first_name).strip() - if not first_name: - continue - party = name_party_map.get(first_name) - return first_name, party - - return None, None - - def load_model_data( db_path: str, ) -> tuple[list[dict[str, Any]], int, int]: diff --git a/analysis/right_wing/success_correlation.py b/analysis/right_wing/success_correlation.py index 6caf5f4..7d88b5a 100644 --- a/analysis/right_wing/success_correlation.py +++ b/analysis/right_wing/success_correlation.py @@ -29,76 +29,17 @@ import duckdb import numpy as np from scipy.stats import chi2 +from analysis.right_wing.common import ( + BREAK_YEAR, COALITION, DB_PATH, REPORTS_DIR, + build_party_name_map, parse_lead_submitter, +) from analysis.config import CANONICAL_RIGHT logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) -DB_PATH = str(PROJECT_ROOT / "data" / "motions.db") -REPORTS_DIR = PROJECT_ROOT / "reports" / "overton_window" REPORTS_DIR.mkdir(parents=True, exist_ok=True) -BREAK_YEAR = 2024 - -COALITION: dict[int, set[str]] = { - 2016: {"VVD", "PvdA"}, - 2017: {"VVD", "PvdA"}, - 2018: {"VVD", "CDA", "D66", "CU"}, - 2019: {"VVD", "CDA", "D66", "CU"}, - 2020: {"VVD", "CDA", "D66", "CU"}, - 2021: {"VVD", "CDA", "D66", "CU"}, - 2022: {"VVD", "D66", "CDA", "CU"}, - 2023: {"VVD", "D66", "CDA", "CU"}, - 2024: {"PVV", "VVD", "NSC", "BBB"}, - 2025: {"PVV", "VVD", "NSC", "BBB"}, - 2026: {"PVV", "VVD", "NSC", "BBB"}, -} - - -def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]: - rows = con.execute(""" - SELECT mp_name, party, van, tot_en_met - FROM mp_metadata - WHERE party IS NOT NULL - ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST - """).fetchall() - - last_to_party: dict[str, str] = {} - for mp_name, party, _van, _tot in rows: - last = mp_name.split(",")[0].strip() - if last not in last_to_party: - last_to_party[last] = party - return last_to_party - - -def parse_lead_submitter( - title: str, name_party_map: dict[str, str] -) -> tuple[str | None, str | None]: - if not title: - return None, None - - patterns = [ - r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b", - r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b", - r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b", - r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b", - ] - - for pat in patterns: - m = re.search(pat, title) - if m: - submitter_str = m.group(1).strip() - parts = submitter_str.split(" en ") - first_name = parts[0].strip() - first_name = re.sub(r"\s+c\.s\.", "", first_name).strip() - if not first_name: - continue - party = name_party_map.get(first_name) - return first_name, party - - return None, None - - def motion_passed(voting: dict | None, winning_margin: float | None = None) -> bool | None: if voting is None: voting = {} diff --git a/analysis/right_wing/svd_trajectory_viz.py b/analysis/right_wing/svd_trajectory_viz.py index 4062184..f90c3f9 100644 --- a/analysis/right_wing/svd_trajectory_viz.py +++ b/analysis/right_wing/svd_trajectory_viz.py @@ -23,7 +23,8 @@ import numpy as np matplotlib.use("Agg") -ROOT = Path(__file__).parent.parent.parent.resolve() +from analysis.right_wing.common import ROOT, DB_PATH, REPORTS_DIR + if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) @@ -40,8 +41,6 @@ CANONICAL_CENTRIST = frozenset( {"VVD", "D66", "CDA", "NSC", "BBB", "CU", "ChristenUnie"} ) -DB_PATH = str(ROOT / "data" / "motions.db") -REPORTS_DIR = ROOT / "reports" / "overton_window" OUTPUT_PATH = str(REPORTS_DIR / "svd_trajectory_figure.png") CENTRIST_DISPLAY = ["VVD", "D66", "CDA", "NSC", "BBB", "CU"] diff --git a/analysis/right_wing/temporal_trajectory.py b/analysis/right_wing/temporal_trajectory.py index 33ce2de..01a5e04 100644 --- a/analysis/right_wing/temporal_trajectory.py +++ b/analysis/right_wing/temporal_trajectory.py @@ -32,75 +32,16 @@ import numpy as np ROOT = Path(__file__).parent.parent.parent.resolve() sys.path.insert(0, str(ROOT)) -DB_PATH = str(ROOT / "data" / "motions.db") -REPORTS_DIR = ROOT / "reports" / "overton_window" +from analysis.right_wing.common import ( + CANONICAL_CENTRIST, COALITION, DB_PATH, REPORTS_DIR, + build_party_name_map, parse_lead_submitter, quarter_sort_key, +) REPORTS_DIR.mkdir(parents=True, exist_ok=True) -CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"}) -CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"}) - -COALITION: dict[int, set[str]] = { - 2016: {"VVD", "PvdA"}, - 2017: {"VVD", "PvdA"}, - 2018: {"VVD", "CDA", "D66", "CU"}, - 2019: {"VVD", "CDA", "D66", "CU"}, - 2020: {"VVD", "CDA", "D66", "CU"}, - 2021: {"VVD", "CDA", "D66", "CU"}, - 2022: {"VVD", "D66", "CDA", "CU"}, - 2023: {"VVD", "D66", "CDA", "CU"}, - 2024: {"PVV", "VVD", "NSC", "BBB"}, - 2025: {"PVV", "VVD", "NSC", "BBB"}, - 2026: {"PVV", "VVD", "NSC", "BBB"}, -} - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) -def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]: - rows = con.execute(""" - SELECT mp_name, party, van, tot_en_met - FROM mp_metadata - WHERE party IS NOT NULL - ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST - """).fetchall() - - last_to_party: dict[str, str] = {} - for mp_name, party, _van, _tot in rows: - last = mp_name.split(",")[0].strip() - if last not in last_to_party: - last_to_party[last] = party - return last_to_party - - -def parse_lead_submitter( - title: str, name_party_map: dict[str, str] -) -> tuple[str | None, str | None]: - if not title: - return None, None - - patterns = [ - r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b", - r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b", - r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b", - r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b", - ] - - for pat in patterns: - m = re.search(pat, title) - if m: - submitter_str = m.group(1).strip() - parts = submitter_str.split(" en ") - first_name = parts[0].strip() - first_name = re.sub(r"\s+c\.s\.", "", first_name).strip() - if not first_name: - continue - party = name_party_map.get(first_name) - return first_name, party - - return None, None - - def fetch_quarterly_data(con: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]: """Fetch all right-wing motions with dates and metrics.""" rows = con.execute(""" @@ -172,12 +113,6 @@ def aggregate_quarterly( return dict(quarterly) -def quarter_sort_key(quarter_str: str) -> tuple[int, int]: - """Sort key: '2019-Q3' -> (2019, 3).""" - parts = quarter_str.split("-Q") - return (int(parts[0]), int(parts[1])) - - def compute_summary(quarterly: dict) -> dict[str, dict[str, Any]]: """Compute means, counts, and confidence intervals per quarter.""" summary = {} diff --git a/analysis/right_wing/voting_margin.py b/analysis/right_wing/voting_margin.py index 98714b8..6208c8d 100644 --- a/analysis/right_wing/voting_margin.py +++ b/analysis/right_wing/voting_margin.py @@ -38,16 +38,13 @@ import numpy as np from scipy.stats import spearmanr, pearsonr, mannwhitneyu from analysis.config import CANONICAL_RIGHT +from analysis.right_wing.common import BREAK_YEAR, DB_PATH, REPORTS_DIR logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) -DB_PATH = str(PROJECT_ROOT / "data" / "motions.db") -REPORTS_DIR = PROJECT_ROOT / "reports" / "overton_window" REPORTS_DIR.mkdir(parents=True, exist_ok=True) -BREAK_YEAR = 2024 - QUARTILE_LABELS = [ "Q1 [0.00\u20130.25]", "Q2 (0.25\u20130.50]", @@ -540,22 +537,19 @@ def generate_report( "", ] - if u_p < 0.05 if isinstance(u_p := corr.get("spearman_p", 1.0), float) else False: - pass - else: - if not np.isnan(post_mean) and not np.isnan(pre_mean): - _, period_p = mannwhitneyu(pre_margins, post_margins, alternative="two-sided") - if period_p < 0.05: - direction = "rose" if post_mean > pre_mean else "fell" - report.append( - f"Voting margin {direction} significantly post-2024 " - f"(Mann-Whitney p = {period_p:.1e}, d = {cohens_d:+.3f})." - ) - else: - report.append( - f"Voting margin did not change significantly between periods " - f"(Mann-Whitney p = {period_p:.3f})." - ) + if not np.isnan(post_mean) and not np.isnan(pre_mean): + _, period_p = mannwhitneyu(pre_margins, post_margins, alternative="two-sided") + if period_p < 0.05: + direction = "rose" if post_mean > pre_mean else "fell" + report.append( + f"Voting margin {direction} significantly post-2024 " + f"(Mann-Whitney p = {period_p:.1e}, d = {cohens_d:+.3f})." + ) + else: + report.append( + f"Voting margin did not change significantly between periods " + f"(Mann-Whitney p = {period_p:.3f})." + ) report += [ "", diff --git a/tests/right_wing/test_common.py b/tests/right_wing/test_common.py new file mode 100644 index 0000000..43ab36b --- /dev/null +++ b/tests/right_wing/test_common.py @@ -0,0 +1,265 @@ +"""Tests for analysis/right_wing/common.py shared module. + +TDD approach: these tests verify the extracted shared helpers work correctly. +""" + +import math +from pathlib import Path +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest + + +class TestConstants: + """Verify all exported constants are present and correctly typed.""" + + def test_canonical_centrist_is_frozenset(self): + from analysis.right_wing.common import CANONICAL_CENTRIST + assert isinstance(CANONICAL_CENTRIST, frozenset) + assert "VVD" in CANONICAL_CENTRIST + assert "D66" in CANONICAL_CENTRIST + assert "CDA" in CANONICAL_CENTRIST + assert "NSC" in CANONICAL_CENTRIST + assert "BBB" in CANONICAL_CENTRIST + assert "CU" in CANONICAL_CENTRIST + + def test_canonical_centrist_strict_subset(self): + from analysis.right_wing.common import CANONICAL_CENTRIST, CANONICAL_CENTRIST_STRICT + assert CANONICAL_CENTRIST_STRICT.issubset(CANONICAL_CENTRIST) + assert "VVD" not in CANONICAL_CENTRIST_STRICT + assert "BBB" not in CANONICAL_CENTRIST_STRICT + + def test_canonical_left_right_disjoint(self): + from analysis.right_wing.common import CANONICAL_LEFT, CANONICAL_RIGHT + assert len(CANONICAL_LEFT & CANONICAL_RIGHT) == 0 + + def test_coalition_dicts(self): + from analysis.right_wing.common import RUTTE_IV_COALITION, SCHOOF_COALITION + assert isinstance(RUTTE_IV_COALITION, set) + assert isinstance(SCHOOF_COALITION, set) + assert "PVV" in SCHOOF_COALITION + assert "PVV" not in RUTTE_IV_COALITION + + def test_time_constants(self): + from analysis.right_wing.common import YEAR_MIN, YEAR_MAX, BREAK_YEAR + assert YEAR_MIN < BREAK_YEAR < YEAR_MAX + assert BREAK_YEAR == 2024 + + def test_paths_exist(self): + from analysis.right_wing.common import ROOT, DB_PATH, REPORTS_DIR + assert ROOT.exists() + assert isinstance(DB_PATH, str) + assert "motions.db" in DB_PATH + assert isinstance(REPORTS_DIR, Path) + + +class TestCohensD: + """Test Cohen's d effect size calculation.""" + + def test_identical_groups_returns_zero(self): + from analysis.right_wing.common import cohens_d + d = cohens_d([1, 2, 3], [1, 2, 3]) + assert d == 0.0 + + def test_first_group_higher(self): + from analysis.right_wing.common import cohens_d + # cohens_d(x, y) = (mean_y - mean_x) / pooled_std (based on implementation) + d = cohens_d([4, 5, 6], [1, 2, 3]) + assert d < 0 # mean_y < mean_x → negative + + def test_second_group_higher(self): + from analysis.right_wing.common import cohens_d + d = cohens_d([1, 2, 3], [4, 5, 6]) + assert d > 0 # mean_y > mean_x → positive + + def test_known_value(self): + from analysis.right_wing.common import cohens_d + # [1,2,3,4,5] vs [3,4,5,6,7]: mean diff = -2, pooled std ≈ 1.58 + d = cohens_d([1, 2, 3, 4, 5], [3, 4, 5, 6, 7]) + assert d > 0 # Second group has higher mean + assert abs(d) > 1.0 # Should be a large effect + + +class TestQuarterSortKey: + """Test quarter string sorting.""" + + def test_basic_sort(self): + from analysis.right_wing.common import quarter_sort_key + assert quarter_sort_key("2024-Q1") < quarter_sort_key("2024-Q2") + assert quarter_sort_key("2023-Q4") < quarter_sort_key("2024-Q1") + + def test_sort_order(self): + from analysis.right_wing.common import quarter_sort_key + quarters = ["2024-Q3", "2024-Q1", "2023-Q4", "2024-Q2"] + sorted_q = sorted(quarters, key=quarter_sort_key) + assert sorted_q == ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3"] + + def test_invalid_format_raises(self): + from analysis.right_wing.common import quarter_sort_key + with pytest.raises((ValueError, IndexError)): + quarter_sort_key("invalid") + + +class TestFindInflectionPoint: + """Test inflection point detection using 3-quarter rolling average.""" + + def test_simple_inflection(self): + from analysis.right_wing.common import find_inflection_point + quarters = ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3", "2024-Q4"] + values = [0.2, 0.5, 0.6, 0.7, 0.8] + result = find_inflection_point(quarters, values, threshold=0.4) + # Rolling avg at index 1: (0.2 + 0.5 + 0.6)/3 = 0.433 > 0.4 + assert result == "2024-Q1" + + def test_no_inflection(self): + from analysis.right_wing.common import find_inflection_point + quarters = ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3"] + values = [0.1, 0.15, 0.2, 0.25] + result = find_inflection_point(quarters, values, threshold=0.4) + assert result is None + + def test_inflection_at_end(self): + from analysis.right_wing.common import find_inflection_point + quarters = ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3", "2024-Q4"] + values = [0.1, 0.15, 0.2, 0.5, 0.6] + result = find_inflection_point(quarters, values, threshold=0.4) + # Rolling avg at index 3: (0.2 + 0.5 + 0.6)/3 = 0.433 > 0.4 + assert result == "2024-Q3" + + def test_too_short_returns_none(self): + from analysis.right_wing.common import find_inflection_point + result = find_inflection_point(["2024-Q1", "2024-Q2"], [0.5, 0.6], 0.4) + assert result is None + + +class TestMotionPassed: + """Test motion passage detection via result field.""" + + def test_aangenomen_passes(self): + from analysis.right_wing.common import motion_passed + votes = {"result": "aangenomen", "voor": 100, "tegen": 50} + assert motion_passed(votes) is True + + def test_verworpen_fails(self): + from analysis.right_wing.common import motion_passed + votes = {"result": "verworpen", "voor": 30, "tegen": 70} + assert motion_passed(votes) is False + + def test_none_fails(self): + from analysis.right_wing.common import motion_passed + assert motion_passed(None) is False + + def test_empty_dict_fails(self): + from analysis.right_wing.common import motion_passed + assert motion_passed({}) is False + + def test_json_string_parses(self): + from analysis.right_wing.common import motion_passed + votes = '{"result": "aangenomen", "voor": 100}' + assert motion_passed(votes) is True + + def test_invalid_json_fails(self): + from analysis.right_wing.common import motion_passed + assert motion_passed("not json") is False + + +class TestBuildPartyNameMap: + """Test MP name to party mapping.""" + + def test_returns_dict(self): + from analysis.right_wing.common import build_party_name_map, _conn, DB_PATH + with _conn(DB_PATH) as con: + result = build_party_name_map(con) + assert isinstance(result, dict) + assert len(result) > 100 # Should have many MPs + + def test_known_mp_maps_to_party(self): + from analysis.right_wing.common import build_party_name_map, _conn, DB_PATH + with _conn(DB_PATH) as con: + result = build_party_name_map(con) + # Wilders should map to PVV + assert "Wilders" in result + assert result["Wilders"] == "PVV" + + def test_groenlinks_pvda_normalized(self): + from analysis.right_wing.common import build_party_name_map, _conn, DB_PATH + with _conn(DB_PATH) as con: + result = build_party_name_map(con) + # Klaver should map to GroenLinks-PvdA + assert "Klaver" in result + assert result["Klaver"] == "GroenLinks-PvdA" + + +class TestParseLeadSubmitter: + """Test motion title parsing for lead MP name.""" + + def test_standard_motie_format(self): + from analysis.right_wing.common import parse_lead_submitter + title = "Motie van het lid Wilders over migratie" + name, party = parse_lead_submitter(title, {"Wilders": "PVV"}) + assert name == "Wilders" + assert party == "PVV" + + def test_gewijzigde_motie_format(self): + from analysis.right_wing.common import parse_lead_submitter + title = "Gewijzigde Motie van het lid Klaver c.s. over klimaat" + name, party = parse_lead_submitter(title, {"Klaver": "GL"}) + assert name == "Klaver" + + def test_amendement_format(self): + from analysis.right_wing.common import parse_lead_submitter + title = "Amendement van het lid Omtzigt over begroting" + name, party = parse_lead_submitter(title, {"Omtzigt": "NSC"}) + assert name == "Omtzigt" + + def test_non_motie_returns_none(self): + from analysis.right_wing.common import parse_lead_submitter + title = "Verslag van een schriftelijk overleg" + name, party = parse_lead_submitter(title, {}) + assert name is None + assert party is None + + def test_empty_title_returns_none(self): + from analysis.right_wing.common import parse_lead_submitter + name, party = parse_lead_submitter("", {}) + assert name is None + + +class TestConnectionHelper: + """Test _conn context manager.""" + + def test_conn_returns_context_manager(self): + from analysis.right_wing.common import _conn, DB_PATH + # Should not raise when using the default DB_PATH + with _conn(DB_PATH) as con: + assert con is not None + # Verify it's a valid connection + result = con.execute("SELECT 1").fetchone() + assert result[0] == 1 + + +class TestIntegration: + """Integration tests verifying common.py works with real data.""" + + def test_db_path_points_to_existing_file(self): + from analysis.right_wing.common import DB_PATH + from pathlib import Path + assert Path(DB_PATH).exists(), f"Database not found at {DB_PATH}" + + def test_reports_dir_exists(self): + from analysis.right_wing.common import REPORTS_DIR + assert REPORTS_DIR.exists(), f"Reports dir not found: {REPORTS_DIR}" + + def test_centrist_parties_in_database(self): + """Verify CANONICAL_CENTRIST parties actually exist in mp_votes.""" + from analysis.right_wing.common import CANONICAL_CENTRIST, _conn, DB_PATH + with _conn(DB_PATH) as con: + db_parties = {r[0] for r in con.execute( + "SELECT DISTINCT party FROM mp_votes WHERE party IS NOT NULL" + ).fetchall()} + + # Check that at least some centrist parties are in the database + found = CANONICAL_CENTRIST & db_parties + assert len(found) >= 4, f"Only {found} centrist parties found in mp_votes" +