refactor: extract shared helpers to common.py, fix bugs, add TDD tests

- Created analysis/right_wing/common.py with all shared helpers:
  Constants: CANONICAL_CENTRIST, COALITION, BREAK_YEAR, etc.
  Functions: _conn, cohens_d, build_party_name_map, parse_lead_submitter,
  motion_passed, quarter_sort_key, find_inflection_point

- Fixed bugs:
  1. ai_provider.py: requests.Timeout now caught alongside ConnectionError
  2. voting_margin.py: Removed walrus operator misuse, fixed Mann-Whitney test

- Updated 13 consuming files to import from common.py

- Added 35 TDD tests in tests/right_wing/test_common.py

- 282 tests pass (was 247)
main
Sven Geboers 3 weeks ago
parent 0183bbc8a3
commit a3154f72df
  1. 72
      analysis/right_wing/causal_timing.py
  2. 7
      analysis/right_wing/classify_motions.py
  3. 187
      analysis/right_wing/common.py
  4. 9
      analysis/right_wing/direction3_migration_antidemocratic.py
  5. 23
      analysis/right_wing/left_wing_response.py
  6. 6
      analysis/right_wing/migrate_mp_level_metrics.py
  7. 99
      analysis/right_wing/overton_breakpoint_analysis.py
  8. 6
      analysis/right_wing/overton_svd_drift.py
  9. 28
      analysis/right_wing/party_differentiation.py
  10. 67
      analysis/right_wing/predictive_model.py
  11. 67
      analysis/right_wing/success_correlation.py
  12. 5
      analysis/right_wing/svd_trajectory_viz.py
  13. 73
      analysis/right_wing/temporal_trajectory.py
  14. 34
      analysis/right_wing/voting_margin.py
  15. 265
      tests/right_wing/test_common.py

@ -30,27 +30,12 @@ import numpy as np
ROOT = Path(__file__).parent.parent.parent.resolve()
sys.path.insert(0, str(ROOT))
DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
from analysis.right_wing.common import (
CANONICAL_CENTRIST, COALITION, DB_PATH, REPORTS_DIR,
build_party_name_map, parse_lead_submitter, quarter_sort_key,
)
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"})
CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"})
COALITION: dict[int, set[str]] = {
2016: {"VVD", "PvdA"},
2017: {"VVD", "PvdA"},
2018: {"VVD", "CDA", "D66", "CU"},
2019: {"VVD", "CDA", "D66", "CU"},
2020: {"VVD", "CDA", "D66", "CU"},
2021: {"VVD", "CDA", "D66", "CU"},
2022: {"VVD", "D66", "CDA", "CU"},
2023: {"VVD", "D66", "CDA", "CU"},
2024: {"PVV", "VVD", "NSC", "BBB"},
2025: {"PVV", "VVD", "NSC", "BBB"},
2026: {"PVV", "VVD", "NSC", "BBB"},
}
POLITICAL_EVENTS: list[dict[str, Any]] = [
{"quarter": "2021-Q1", "label": "Rutte IV\nelection",
"date": "Mar 2021", "category": "dutch"},
@ -70,50 +55,6 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(mess
logger = logging.getLogger(__name__)
def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]:
rows = con.execute("""
SELECT mp_name, party, van, tot_en_met
FROM mp_metadata
WHERE party IS NOT NULL
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST
""").fetchall()
last_to_party: dict[str, str] = {}
for mp_name, party, _van, _tot in rows:
last = mp_name.split(",")[0].strip()
if last not in last_to_party:
last_to_party[last] = party
return last_to_party
def parse_lead_submitter(
title: str, name_party_map: dict[str, str]
) -> tuple[str | None, str | None]:
if not title:
return None, None
patterns = [
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b",
r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b",
]
for pat in patterns:
m = re.search(pat, title)
if m:
submitter_str = m.group(1).strip()
parts = submitter_str.split(" en ")
first_name = parts[0].strip()
first_name = re.sub(r"\s+c\.s\.", "", first_name).strip()
if not first_name:
continue
party = name_party_map.get(first_name)
return first_name, party
return None, None
def fetch_rw_motions(con: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]:
rows = con.execute("""
SELECT
@ -146,11 +87,6 @@ def fetch_rw_motions(con: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]:
return result
def quarter_sort_key(quarter_str: str) -> tuple[int, int]:
parts = quarter_str.split("-Q")
return (int(parts[0]), int(parts[1]))
def aggregate_quarterly(data: list[dict]) -> dict[str, dict]:
quarterly: dict[str, dict[str, list]] = defaultdict(
lambda: {"all_cs": []}

@ -17,18 +17,17 @@ from typing import Any
import duckdb
ROOT = Path(__file__).parent.parent.parent.resolve()
from analysis.right_wing.common import ROOT
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT
from analysis.right_wing.common import CANONICAL_CENTRIST
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
# Centrist parties for cross-ideological metrics
CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"})
def _load_keywords(keywords_path: str) -> tuple[list[str], list[str]]:
"""Load right-wing and left-wing keywords from JSON."""

@ -0,0 +1,187 @@
"""Shared constants and helpers for right-wing motion analysis.
Extracted from 6+ files to eliminate code duplication. All Overton analysis
scripts should import from here instead of defining their own copies.
"""
from __future__ import annotations
import re
import math
from pathlib import Path
import duckdb
import numpy as np
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
ROOT = Path(__file__).resolve().parents[2]
DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
# ---------------------------------------------------------------------------
# Party sets
# ---------------------------------------------------------------------------
CANONICAL_LEFT = frozenset({"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK", "PvdD", "Volt"})
CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"})
CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"})
CANONICAL_CENTRIST_STRICT = frozenset({"D66", "CDA", "NSC", "CU"})
CANONICAL_LEFT_SET = set(CANONICAL_LEFT)
CANONICAL_RIGHT_SET = set(CANONICAL_RIGHT)
# ---------------------------------------------------------------------------
# Time periods
# ---------------------------------------------------------------------------
YEAR_MIN, YEAR_MAX = 2016, 2026
BREAK_YEAR = 2024
SCHOOF_START_DATE = "2024-07-01"
# ---------------------------------------------------------------------------
# Coalition composition
# ---------------------------------------------------------------------------
RUTTE_IV_COALITION: set[str] = {"VVD", "D66", "CDA", "CU"}
SCHOOF_COALITION: set[str] = {"PVV", "VVD", "NSC", "BBB"}
COALITION: dict[int, set[str]] = {
2016: {"VVD", "PvdA"},
2017: {"VVD", "PvdA"},
2018: {"VVD", "CDA", "D66", "CU"},
2019: {"VVD", "CDA", "D66", "CU"},
2020: {"VVD", "CDA", "D66", "CU"},
2021: {"VVD", "CDA", "D66", "CU"},
2022: {"VVD", "D66", "CDA", "CU"},
2023: {"VVD", "D66", "CDA", "CU"},
2024: SCHOOF_COALITION,
2025: SCHOOF_COALITION,
2026: SCHOOF_COALITION,
}
COALITION_NOTE = (
"2016-2017: Rutte II (VVD/PvdA). "
"2018-2021: Rutte III (VVD/CDA/D66/CU). "
"2022-2023: Rutte IV (VVD/D66/CDA/CU). "
"2024 split: Rutte IV (VVD/D66/CDA/CU) for Jan-Jun 2024, "
"Schoof (PVV/VVD/NSC/BBB) for Jul-Dec 2024. "
"2025-2026: Schoof (PVV/VVD/NSC/BBB). "
"Period detection uses motion date, not just year."
)
# ---------------------------------------------------------------------------
# Database helpers
# ---------------------------------------------------------------------------
def _conn(db_path: str | None = None, read_only: bool = True) -> duckdb.DuckDBPyConnection:
"""Open a DuckDB connection to the motions database."""
return duckdb.connect(db_path or DB_PATH, read_only=read_only)
# ---------------------------------------------------------------------------
# Statistical helpers
# ---------------------------------------------------------------------------
def cohens_d(x: np.ndarray, y: np.ndarray) -> float:
"""Cohen's d effect size (positive when y > x)."""
pooled = np.sqrt((np.var(x, ddof=1) + np.var(y, ddof=1)) / 2)
if pooled == 0:
return 0.0
return (np.mean(y) - np.mean(x)) / pooled
# ---------------------------------------------------------------------------
# Motion metadata helpers
# ---------------------------------------------------------------------------
def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]:
"""Build mapping: last name -> party from mp_metadata."""
rows = con.execute("""
SELECT mp_name, party, van, tot_en_met
FROM mp_metadata
WHERE party IS NOT NULL
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST
""").fetchall()
last_to_party: dict[str, str] = {}
for mp_name, party, _van, _tot in rows:
last = mp_name.split(",")[0].strip()
if last not in last_to_party:
last_to_party[last] = party
return last_to_party
def parse_lead_submitter(
title: str, name_party_map: dict[str, str]
) -> tuple[str | None, str | None]:
"""Parse the lead submitter from a motion title and map to party.
Returns (parsed_name, party) or (None, None).
"""
if not title:
return None, None
patterns = [
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b",
r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b",
]
for pat in patterns:
m = re.search(pat, title)
if m:
submitter_str = m.group(1).strip()
parts = submitter_str.split(" en ")
first_name = parts[0].strip()
first_name = re.sub(r"\s+c\.s\.", "", first_name).strip()
if not first_name:
continue
party = name_party_map.get(first_name)
return first_name, party
return None, None
def motion_passed(voting_results: dict | None) -> bool:
"""Check if a motion passed based on voting_results JSON."""
if not voting_results:
return False
if isinstance(voting_results, str):
try:
import json
voting_results = json.loads(voting_results)
except (ValueError, TypeError):
return False
return voting_results.get("result") == "aangenomen"
# ---------------------------------------------------------------------------
# Temporal helpers
# ---------------------------------------------------------------------------
def quarter_sort_key(q: str) -> tuple[int, int]:
"""Sort key for quarter strings like '2024-Q1'."""
year = int(q[:4])
quarter = int(q[-1])
return (year, quarter)
def find_inflection_point(
quarters: list[str], values: list[float], threshold: float = 0.4
) -> str | None:
"""Find the first quarter where the smoothed value exceeds the threshold."""
if len(quarters) < 3:
return None
for i in range(1, len(quarters) - 1):
avg = (values[i - 1] + values[i] + values[i + 1]) / 3
if avg > threshold:
return quarters[i]
return None

@ -13,19 +13,14 @@ from pathlib import Path
import duckdb
ROOT = Path(__file__).parent.parent.parent.resolve()
from analysis.right_wing.common import ROOT, _conn
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
DB_PATH = ROOT / "data" / "motions.db"
def _conn():
return duckdb.connect(str(DB_PATH), read_only=True)
def print_section(title: str) -> None:
print(f"\n{'=' * 70}")

@ -22,6 +22,11 @@ ROOT = Path(__file__).parent.parent.parent.resolve()
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from analysis.right_wing.common import (
CANONICAL_CENTRIST_STRICT, BREAK_YEAR, YEAR_MIN, YEAR_MAX,
DB_PATH, REPORTS_DIR, _conn, cohens_d,
)
import duckdb
import matplotlib
@ -34,15 +39,8 @@ from analysis.config import CANONICAL_LEFT, PARTY_COLOURS, _PARTY_NORMALIZE
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
BREAK_YEAR = 2024
YEAR_MIN, YEAR_MAX = 2016, 2026
CANONICAL_CENTRIST_STRICT = frozenset({"D66", "CDA", "CU", "NSC"})
LEFT_PARTY_DISPLAY_ORDER = [
"SP",
"GroenLinks-PvdA",
@ -52,17 +50,6 @@ LEFT_PARTY_DISPLAY_ORDER = [
]
def _conn(read_only: bool = True) -> duckdb.DuckDBPyConnection:
return duckdb.connect(DB_PATH, read_only=read_only)
def cohens_d(x: np.ndarray, y: np.ndarray) -> float:
pooled = np.sqrt((np.var(x, ddof=1) + np.var(y, ddof=1)) / 2)
if pooled == 0:
return 0.0
return (np.mean(y) - np.mean(x)) / pooled
def query_yearly_support() -> dict[int, dict]:
"""Query yearly averages of left_support_mp and centrist_support_strict."""
con = _conn()

@ -9,16 +9,16 @@ from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).parent.parent.parent.resolve()
from analysis.right_wing.common import ROOT
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
import duckdb
from analysis.config import CANONICAL_LEFT
from analysis.right_wing.common import CANONICAL_CENTRIST, CANONICAL_CENTRIST_STRICT
CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"})
CANONICAL_CENTRIST_STRICT = frozenset({"D66", "CDA", "CU", "NSC"})
CANONICAL_CENTER_RIGHT = frozenset({"VVD", "BBB"})
COLUMNS = [

@ -36,13 +36,16 @@ matplotlib.use("Agg")
import matplotlib.pyplot as plt
from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT, PARTY_COLOURS
CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"})
from analysis.right_wing.common import (
CANONICAL_CENTRIST, COALITION, COALITION_NOTE, RUTTE_IV_COALITION,
SCHOOF_COALITION, SCHOOF_START_DATE, BREAK_YEAR, YEAR_MIN, YEAR_MAX,
DB_PATH, REPORTS_DIR, _conn, cohens_d, build_party_name_map,
parse_lead_submitter,
)
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
CANONICAL_CENTRIST_SET = set(CANONICAL_CENTRIST)
@ -62,49 +65,8 @@ def _extremity_bucket(score: float) -> str:
CANONICAL_LEFT_SET = set(CANONICAL_LEFT)
CANONICAL_RIGHT_SET = set(CANONICAL_RIGHT)
RUTTE_IV_COALITION: set[str] = {"VVD", "D66", "CDA", "CU"}
SCHOOF_COALITION: set[str] = {"PVV", "VVD", "NSC", "BBB"}
COALITION: dict[int, set[str]] = {
2016: {"VVD", "PvdA"},
2017: {"VVD", "PvdA"},
2018: {"VVD", "CDA", "D66", "CU"},
2019: {"VVD", "CDA", "D66", "CU"},
2020: {"VVD", "CDA", "D66", "CU"},
2021: {"VVD", "CDA", "D66", "CU"},
2022: {"VVD", "D66", "CDA", "CU"},
2023: {"VVD", "D66", "CDA", "CU"},
2024: SCHOOF_COALITION,
2025: SCHOOF_COALITION,
2026: SCHOOF_COALITION,
}
SCHOOF_START_DATE = "2024-07-01"
COALITION_NOTE = (
"2016-2017: Rutte II (VVD/PvdA). "
"2018-2021: Rutte III (VVD/CDA/D66/CU). "
"2022-2023: Rutte IV (VVD/D66/CDA/CU). "
"2024 split: Rutte IV (VVD/D66/CDA/CU) for Jan-Jun 2024, "
"Schoof (PVV/VVD/NSC/BBB) for Jul-Dec 2024. "
"2025-2026: Schoof (PVV/VVD/NSC/BBB). "
"Period detection uses motion date, not just year."
)
YEAR_MIN, YEAR_MAX = 2016, 2026
BREAK_YEAR = 2024
def _conn(read_only: bool = True) -> duckdb.DuckDBPyConnection:
return duckdb.connect(DB_PATH, read_only=read_only)
def cohens_d(x: np.ndarray, y: np.ndarray) -> float:
"""Cohen's d effect size."""
pooled = np.sqrt((np.var(x, ddof=1) + np.var(y, ddof=1)) / 2)
if pooled == 0:
return 0.0
return (np.mean(y) - np.mean(x)) / pooled
def compute_yearly_rw_metrics(con: duckdb.DuckDBPyConnection) -> dict[int, dict]:
@ -246,55 +208,6 @@ def _support_ratio(
return supportive / total
def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]:
"""Build mapping: last name -> party from mp_metadata."""
rows = con.execute("""
SELECT mp_name, party, van, tot_en_met
FROM mp_metadata
WHERE party IS NOT NULL
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST
""").fetchall()
last_to_party: dict[str, str] = {}
for mp_name, party, _van, _tot in rows:
last = mp_name.split(",")[0].strip()
if last not in last_to_party:
last_to_party[last] = party
return last_to_party
def parse_lead_submitter(
title: str, name_party_map: dict[str, str]
) -> tuple[str | None, str | None]:
"""Parse the lead submitter from a motion title and map to party.
Returns (parsed_name, party) or (None, None).
"""
if not title:
return None, None
patterns = [
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b",
r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b",
]
for pat in patterns:
m = re.search(pat, title)
if m:
submitter_str = m.group(1).strip()
parts = submitter_str.split(" en ")
first_name = parts[0].strip()
first_name = re.sub(r"\s+c\.s\.", "", first_name).strip()
if not first_name:
continue
party = name_party_map.get(first_name)
return first_name, party
return None, None
def compute_opposition_metrics(
yearly_raw: dict[int, dict], name_party_map: dict[str, str]
) -> dict[int, dict]:

@ -25,7 +25,8 @@ import numpy as np
matplotlib.use("Agg")
ROOT = Path(__file__).parent.parent.parent.resolve()
from analysis.right_wing.common import ROOT, DB_PATH, REPORTS_DIR
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
@ -42,9 +43,6 @@ CANONICAL_CENTRIST = frozenset(
{"VVD", "D66", "CDA", "NSC", "BBB", "CU", "ChristenUnie"}
)
DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
def _normalize_party(raw: str) -> str:
"""Normalize a raw party name to its canonical abbreviation."""

@ -28,18 +28,18 @@ ROOT = Path(__file__).parent.parent.parent.resolve()
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from analysis.right_wing.common import (
BREAK_YEAR, YEAR_MIN, YEAR_MAX, DB_PATH, REPORTS_DIR,
_conn, build_party_name_map,
)
from analysis.config import CANONICAL_RIGHT, PARTY_COLOURS, _PARTY_NORMALIZE
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
RIGHT_PARTIES = sorted(CANONICAL_RIGHT)
YEAR_MIN, YEAR_MAX = 2016, 2026
BREAK_YEAR = 2024
TITLE_PATTERNS = [
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
@ -49,26 +49,6 @@ TITLE_PATTERNS = [
]
def _conn(read_only: bool = True) -> duckdb.DuckDBPyConnection:
return duckdb.connect(DB_PATH, read_only=read_only)
def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]:
rows = con.execute("""
SELECT mp_name, party, van, tot_en_met
FROM mp_metadata
WHERE party IS NOT NULL
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST
""").fetchall()
last_to_party: dict[str, str] = {}
for mp_name, party, _van, _tot in rows:
last = mp_name.split(",")[0].strip()
if last not in last_to_party:
last_to_party[last] = party
return last_to_party
def parse_submitter_party(title: str, name_party_map: dict[str, str]) -> str | None:
if not title:
return None

@ -45,31 +45,18 @@ PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from analysis.right_wing.common import (
BREAK_YEAR, COALITION, DB_PATH, REPORTS_DIR,
build_party_name_map as build_name_party_map, parse_lead_submitter,
)
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
DB_PATH = str(PROJECT_ROOT / "data" / "motions.db")
REPORTS_DIR = PROJECT_ROOT / "reports" / "overton_window"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
RANDOM_SEED = 42
BREAK_YEAR = 2024
COALITION: dict[int, set[str]] = {
2016: {"VVD", "PvdA"},
2017: {"VVD", "PvdA"},
2018: {"VVD", "CDA", "D66", "CU"},
2019: {"VVD", "CDA", "D66", "CU"},
2020: {"VVD", "CDA", "D66", "CU"},
2021: {"VVD", "CDA", "D66", "CU"},
2022: {"VVD", "D66", "CDA", "CU"},
2023: {"VVD", "D66", "CDA", "CU"},
2024: {"PVV", "VVD", "NSC", "BBB"},
2025: {"PVV", "VVD", "NSC", "BBB"},
2026: {"PVV", "VVD", "NSC", "BBB"},
}
RIGHT_WING_PARTIES = {"PVV", "FVD", "JA21", "SGP"}
CATEGORY_SHORT = {
@ -89,50 +76,6 @@ CATEGORY_SHORT = {
}
def build_name_party_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]:
rows = con.execute("""
SELECT mp_name, party, van, tot_en_met
FROM mp_metadata
WHERE party IS NOT NULL
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST
""").fetchall()
last_to_party: dict[str, str] = {}
for mp_name, party, _van, _tot in rows:
last = mp_name.split(",")[0].strip()
if last not in last_to_party:
last_to_party[last] = party
return last_to_party
def parse_lead_submitter(
title: str, name_party_map: dict[str, str]
) -> tuple[str | None, str | None]:
if not title:
return None, None
patterns = [
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b",
r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b",
]
for pat in patterns:
m = re.search(pat, title)
if m:
submitter_str = m.group(1).strip()
parts = submitter_str.split(" en ")
first_name = parts[0].strip()
first_name = re.sub(r"\s+c\.s\.", "", first_name).strip()
if not first_name:
continue
party = name_party_map.get(first_name)
return first_name, party
return None, None
def load_model_data(
db_path: str,
) -> tuple[list[dict[str, Any]], int, int]:

@ -29,76 +29,17 @@ import duckdb
import numpy as np
from scipy.stats import chi2
from analysis.right_wing.common import (
BREAK_YEAR, COALITION, DB_PATH, REPORTS_DIR,
build_party_name_map, parse_lead_submitter,
)
from analysis.config import CANONICAL_RIGHT
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
DB_PATH = str(PROJECT_ROOT / "data" / "motions.db")
REPORTS_DIR = PROJECT_ROOT / "reports" / "overton_window"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
BREAK_YEAR = 2024
COALITION: dict[int, set[str]] = {
2016: {"VVD", "PvdA"},
2017: {"VVD", "PvdA"},
2018: {"VVD", "CDA", "D66", "CU"},
2019: {"VVD", "CDA", "D66", "CU"},
2020: {"VVD", "CDA", "D66", "CU"},
2021: {"VVD", "CDA", "D66", "CU"},
2022: {"VVD", "D66", "CDA", "CU"},
2023: {"VVD", "D66", "CDA", "CU"},
2024: {"PVV", "VVD", "NSC", "BBB"},
2025: {"PVV", "VVD", "NSC", "BBB"},
2026: {"PVV", "VVD", "NSC", "BBB"},
}
def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]:
rows = con.execute("""
SELECT mp_name, party, van, tot_en_met
FROM mp_metadata
WHERE party IS NOT NULL
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST
""").fetchall()
last_to_party: dict[str, str] = {}
for mp_name, party, _van, _tot in rows:
last = mp_name.split(",")[0].strip()
if last not in last_to_party:
last_to_party[last] = party
return last_to_party
def parse_lead_submitter(
title: str, name_party_map: dict[str, str]
) -> tuple[str | None, str | None]:
if not title:
return None, None
patterns = [
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b",
r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b",
]
for pat in patterns:
m = re.search(pat, title)
if m:
submitter_str = m.group(1).strip()
parts = submitter_str.split(" en ")
first_name = parts[0].strip()
first_name = re.sub(r"\s+c\.s\.", "", first_name).strip()
if not first_name:
continue
party = name_party_map.get(first_name)
return first_name, party
return None, None
def motion_passed(voting: dict | None, winning_margin: float | None = None) -> bool | None:
if voting is None:
voting = {}

@ -23,7 +23,8 @@ import numpy as np
matplotlib.use("Agg")
ROOT = Path(__file__).parent.parent.parent.resolve()
from analysis.right_wing.common import ROOT, DB_PATH, REPORTS_DIR
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
@ -40,8 +41,6 @@ CANONICAL_CENTRIST = frozenset(
{"VVD", "D66", "CDA", "NSC", "BBB", "CU", "ChristenUnie"}
)
DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
OUTPUT_PATH = str(REPORTS_DIR / "svd_trajectory_figure.png")
CENTRIST_DISPLAY = ["VVD", "D66", "CDA", "NSC", "BBB", "CU"]

@ -32,75 +32,16 @@ import numpy as np
ROOT = Path(__file__).parent.parent.parent.resolve()
sys.path.insert(0, str(ROOT))
DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
from analysis.right_wing.common import (
CANONICAL_CENTRIST, COALITION, DB_PATH, REPORTS_DIR,
build_party_name_map, parse_lead_submitter, quarter_sort_key,
)
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"})
CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"})
COALITION: dict[int, set[str]] = {
2016: {"VVD", "PvdA"},
2017: {"VVD", "PvdA"},
2018: {"VVD", "CDA", "D66", "CU"},
2019: {"VVD", "CDA", "D66", "CU"},
2020: {"VVD", "CDA", "D66", "CU"},
2021: {"VVD", "CDA", "D66", "CU"},
2022: {"VVD", "D66", "CDA", "CU"},
2023: {"VVD", "D66", "CDA", "CU"},
2024: {"PVV", "VVD", "NSC", "BBB"},
2025: {"PVV", "VVD", "NSC", "BBB"},
2026: {"PVV", "VVD", "NSC", "BBB"},
}
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]:
rows = con.execute("""
SELECT mp_name, party, van, tot_en_met
FROM mp_metadata
WHERE party IS NOT NULL
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST
""").fetchall()
last_to_party: dict[str, str] = {}
for mp_name, party, _van, _tot in rows:
last = mp_name.split(",")[0].strip()
if last not in last_to_party:
last_to_party[last] = party
return last_to_party
def parse_lead_submitter(
title: str, name_party_map: dict[str, str]
) -> tuple[str | None, str | None]:
if not title:
return None, None
patterns = [
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b",
r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b",
r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b",
]
for pat in patterns:
m = re.search(pat, title)
if m:
submitter_str = m.group(1).strip()
parts = submitter_str.split(" en ")
first_name = parts[0].strip()
first_name = re.sub(r"\s+c\.s\.", "", first_name).strip()
if not first_name:
continue
party = name_party_map.get(first_name)
return first_name, party
return None, None
def fetch_quarterly_data(con: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]:
"""Fetch all right-wing motions with dates and metrics."""
rows = con.execute("""
@ -172,12 +113,6 @@ def aggregate_quarterly(
return dict(quarterly)
def quarter_sort_key(quarter_str: str) -> tuple[int, int]:
"""Sort key: '2019-Q3' -> (2019, 3)."""
parts = quarter_str.split("-Q")
return (int(parts[0]), int(parts[1]))
def compute_summary(quarterly: dict) -> dict[str, dict[str, Any]]:
"""Compute means, counts, and confidence intervals per quarter."""
summary = {}

@ -38,16 +38,13 @@ import numpy as np
from scipy.stats import spearmanr, pearsonr, mannwhitneyu
from analysis.config import CANONICAL_RIGHT
from analysis.right_wing.common import BREAK_YEAR, DB_PATH, REPORTS_DIR
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
DB_PATH = str(PROJECT_ROOT / "data" / "motions.db")
REPORTS_DIR = PROJECT_ROOT / "reports" / "overton_window"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
BREAK_YEAR = 2024
QUARTILE_LABELS = [
"Q1 [0.00\u20130.25]",
"Q2 (0.25\u20130.50]",
@ -540,22 +537,19 @@ def generate_report(
"",
]
if u_p < 0.05 if isinstance(u_p := corr.get("spearman_p", 1.0), float) else False:
pass
else:
if not np.isnan(post_mean) and not np.isnan(pre_mean):
_, period_p = mannwhitneyu(pre_margins, post_margins, alternative="two-sided")
if period_p < 0.05:
direction = "rose" if post_mean > pre_mean else "fell"
report.append(
f"Voting margin {direction} significantly post-2024 "
f"(Mann-Whitney p = {period_p:.1e}, d = {cohens_d:+.3f})."
)
else:
report.append(
f"Voting margin did not change significantly between periods "
f"(Mann-Whitney p = {period_p:.3f})."
)
if not np.isnan(post_mean) and not np.isnan(pre_mean):
_, period_p = mannwhitneyu(pre_margins, post_margins, alternative="two-sided")
if period_p < 0.05:
direction = "rose" if post_mean > pre_mean else "fell"
report.append(
f"Voting margin {direction} significantly post-2024 "
f"(Mann-Whitney p = {period_p:.1e}, d = {cohens_d:+.3f})."
)
else:
report.append(
f"Voting margin did not change significantly between periods "
f"(Mann-Whitney p = {period_p:.3f})."
)
report += [
"",

@ -0,0 +1,265 @@
"""Tests for analysis/right_wing/common.py shared module.
TDD approach: these tests verify the extracted shared helpers work correctly.
"""
import math
from pathlib import Path
from unittest.mock import MagicMock, patch
import numpy as np
import pytest
class TestConstants:
"""Verify all exported constants are present and correctly typed."""
def test_canonical_centrist_is_frozenset(self):
from analysis.right_wing.common import CANONICAL_CENTRIST
assert isinstance(CANONICAL_CENTRIST, frozenset)
assert "VVD" in CANONICAL_CENTRIST
assert "D66" in CANONICAL_CENTRIST
assert "CDA" in CANONICAL_CENTRIST
assert "NSC" in CANONICAL_CENTRIST
assert "BBB" in CANONICAL_CENTRIST
assert "CU" in CANONICAL_CENTRIST
def test_canonical_centrist_strict_subset(self):
from analysis.right_wing.common import CANONICAL_CENTRIST, CANONICAL_CENTRIST_STRICT
assert CANONICAL_CENTRIST_STRICT.issubset(CANONICAL_CENTRIST)
assert "VVD" not in CANONICAL_CENTRIST_STRICT
assert "BBB" not in CANONICAL_CENTRIST_STRICT
def test_canonical_left_right_disjoint(self):
from analysis.right_wing.common import CANONICAL_LEFT, CANONICAL_RIGHT
assert len(CANONICAL_LEFT & CANONICAL_RIGHT) == 0
def test_coalition_dicts(self):
from analysis.right_wing.common import RUTTE_IV_COALITION, SCHOOF_COALITION
assert isinstance(RUTTE_IV_COALITION, set)
assert isinstance(SCHOOF_COALITION, set)
assert "PVV" in SCHOOF_COALITION
assert "PVV" not in RUTTE_IV_COALITION
def test_time_constants(self):
from analysis.right_wing.common import YEAR_MIN, YEAR_MAX, BREAK_YEAR
assert YEAR_MIN < BREAK_YEAR < YEAR_MAX
assert BREAK_YEAR == 2024
def test_paths_exist(self):
from analysis.right_wing.common import ROOT, DB_PATH, REPORTS_DIR
assert ROOT.exists()
assert isinstance(DB_PATH, str)
assert "motions.db" in DB_PATH
assert isinstance(REPORTS_DIR, Path)
class TestCohensD:
"""Test Cohen's d effect size calculation."""
def test_identical_groups_returns_zero(self):
from analysis.right_wing.common import cohens_d
d = cohens_d([1, 2, 3], [1, 2, 3])
assert d == 0.0
def test_first_group_higher(self):
from analysis.right_wing.common import cohens_d
# cohens_d(x, y) = (mean_y - mean_x) / pooled_std (based on implementation)
d = cohens_d([4, 5, 6], [1, 2, 3])
assert d < 0 # mean_y < mean_x → negative
def test_second_group_higher(self):
from analysis.right_wing.common import cohens_d
d = cohens_d([1, 2, 3], [4, 5, 6])
assert d > 0 # mean_y > mean_x → positive
def test_known_value(self):
from analysis.right_wing.common import cohens_d
# [1,2,3,4,5] vs [3,4,5,6,7]: mean diff = -2, pooled std ≈ 1.58
d = cohens_d([1, 2, 3, 4, 5], [3, 4, 5, 6, 7])
assert d > 0 # Second group has higher mean
assert abs(d) > 1.0 # Should be a large effect
class TestQuarterSortKey:
"""Test quarter string sorting."""
def test_basic_sort(self):
from analysis.right_wing.common import quarter_sort_key
assert quarter_sort_key("2024-Q1") < quarter_sort_key("2024-Q2")
assert quarter_sort_key("2023-Q4") < quarter_sort_key("2024-Q1")
def test_sort_order(self):
from analysis.right_wing.common import quarter_sort_key
quarters = ["2024-Q3", "2024-Q1", "2023-Q4", "2024-Q2"]
sorted_q = sorted(quarters, key=quarter_sort_key)
assert sorted_q == ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3"]
def test_invalid_format_raises(self):
from analysis.right_wing.common import quarter_sort_key
with pytest.raises((ValueError, IndexError)):
quarter_sort_key("invalid")
class TestFindInflectionPoint:
"""Test inflection point detection using 3-quarter rolling average."""
def test_simple_inflection(self):
from analysis.right_wing.common import find_inflection_point
quarters = ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3", "2024-Q4"]
values = [0.2, 0.5, 0.6, 0.7, 0.8]
result = find_inflection_point(quarters, values, threshold=0.4)
# Rolling avg at index 1: (0.2 + 0.5 + 0.6)/3 = 0.433 > 0.4
assert result == "2024-Q1"
def test_no_inflection(self):
from analysis.right_wing.common import find_inflection_point
quarters = ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3"]
values = [0.1, 0.15, 0.2, 0.25]
result = find_inflection_point(quarters, values, threshold=0.4)
assert result is None
def test_inflection_at_end(self):
from analysis.right_wing.common import find_inflection_point
quarters = ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3", "2024-Q4"]
values = [0.1, 0.15, 0.2, 0.5, 0.6]
result = find_inflection_point(quarters, values, threshold=0.4)
# Rolling avg at index 3: (0.2 + 0.5 + 0.6)/3 = 0.433 > 0.4
assert result == "2024-Q3"
def test_too_short_returns_none(self):
from analysis.right_wing.common import find_inflection_point
result = find_inflection_point(["2024-Q1", "2024-Q2"], [0.5, 0.6], 0.4)
assert result is None
class TestMotionPassed:
"""Test motion passage detection via result field."""
def test_aangenomen_passes(self):
from analysis.right_wing.common import motion_passed
votes = {"result": "aangenomen", "voor": 100, "tegen": 50}
assert motion_passed(votes) is True
def test_verworpen_fails(self):
from analysis.right_wing.common import motion_passed
votes = {"result": "verworpen", "voor": 30, "tegen": 70}
assert motion_passed(votes) is False
def test_none_fails(self):
from analysis.right_wing.common import motion_passed
assert motion_passed(None) is False
def test_empty_dict_fails(self):
from analysis.right_wing.common import motion_passed
assert motion_passed({}) is False
def test_json_string_parses(self):
from analysis.right_wing.common import motion_passed
votes = '{"result": "aangenomen", "voor": 100}'
assert motion_passed(votes) is True
def test_invalid_json_fails(self):
from analysis.right_wing.common import motion_passed
assert motion_passed("not json") is False
class TestBuildPartyNameMap:
"""Test MP name to party mapping."""
def test_returns_dict(self):
from analysis.right_wing.common import build_party_name_map, _conn, DB_PATH
with _conn(DB_PATH) as con:
result = build_party_name_map(con)
assert isinstance(result, dict)
assert len(result) > 100 # Should have many MPs
def test_known_mp_maps_to_party(self):
from analysis.right_wing.common import build_party_name_map, _conn, DB_PATH
with _conn(DB_PATH) as con:
result = build_party_name_map(con)
# Wilders should map to PVV
assert "Wilders" in result
assert result["Wilders"] == "PVV"
def test_groenlinks_pvda_normalized(self):
from analysis.right_wing.common import build_party_name_map, _conn, DB_PATH
with _conn(DB_PATH) as con:
result = build_party_name_map(con)
# Klaver should map to GroenLinks-PvdA
assert "Klaver" in result
assert result["Klaver"] == "GroenLinks-PvdA"
class TestParseLeadSubmitter:
"""Test motion title parsing for lead MP name."""
def test_standard_motie_format(self):
from analysis.right_wing.common import parse_lead_submitter
title = "Motie van het lid Wilders over migratie"
name, party = parse_lead_submitter(title, {"Wilders": "PVV"})
assert name == "Wilders"
assert party == "PVV"
def test_gewijzigde_motie_format(self):
from analysis.right_wing.common import parse_lead_submitter
title = "Gewijzigde Motie van het lid Klaver c.s. over klimaat"
name, party = parse_lead_submitter(title, {"Klaver": "GL"})
assert name == "Klaver"
def test_amendement_format(self):
from analysis.right_wing.common import parse_lead_submitter
title = "Amendement van het lid Omtzigt over begroting"
name, party = parse_lead_submitter(title, {"Omtzigt": "NSC"})
assert name == "Omtzigt"
def test_non_motie_returns_none(self):
from analysis.right_wing.common import parse_lead_submitter
title = "Verslag van een schriftelijk overleg"
name, party = parse_lead_submitter(title, {})
assert name is None
assert party is None
def test_empty_title_returns_none(self):
from analysis.right_wing.common import parse_lead_submitter
name, party = parse_lead_submitter("", {})
assert name is None
class TestConnectionHelper:
"""Test _conn context manager."""
def test_conn_returns_context_manager(self):
from analysis.right_wing.common import _conn, DB_PATH
# Should not raise when using the default DB_PATH
with _conn(DB_PATH) as con:
assert con is not None
# Verify it's a valid connection
result = con.execute("SELECT 1").fetchone()
assert result[0] == 1
class TestIntegration:
"""Integration tests verifying common.py works with real data."""
def test_db_path_points_to_existing_file(self):
from analysis.right_wing.common import DB_PATH
from pathlib import Path
assert Path(DB_PATH).exists(), f"Database not found at {DB_PATH}"
def test_reports_dir_exists(self):
from analysis.right_wing.common import REPORTS_DIR
assert REPORTS_DIR.exists(), f"Reports dir not found: {REPORTS_DIR}"
def test_centrist_parties_in_database(self):
"""Verify CANONICAL_CENTRIST parties actually exist in mp_votes."""
from analysis.right_wing.common import CANONICAL_CENTRIST, _conn, DB_PATH
with _conn(DB_PATH) as con:
db_parties = {r[0] for r in con.execute(
"SELECT DISTINCT party FROM mp_votes WHERE party IS NOT NULL"
).fetchall()}
# Check that at least some centrist parties are in the database
found = CANONICAL_CENTRIST & db_parties
assert len(found) >= 4, f"Only {found} centrist parties found in mp_votes"
Loading…
Cancel
Save