- Created analysis/right_wing/common.py with all shared helpers: Constants: CANONICAL_CENTRIST, COALITION, BREAK_YEAR, etc. Functions: _conn, cohens_d, build_party_name_map, parse_lead_submitter, motion_passed, quarter_sort_key, find_inflection_point - Fixed bugs: 1. ai_provider.py: requests.Timeout now caught alongside ConnectionError 2. voting_margin.py: Removed walrus operator misuse, fixed Mann-Whitney test - Updated 13 consuming files to import from common.py - Added 35 TDD tests in tests/right_wing/test_common.py - 282 tests pass (was 247)main
parent
0183bbc8a3
commit
a3154f72df
@ -0,0 +1,187 @@ |
||||
"""Shared constants and helpers for right-wing motion analysis. |
||||
|
||||
Extracted from 6+ files to eliminate code duplication. All Overton analysis |
||||
scripts should import from here instead of defining their own copies. |
||||
""" |
||||
|
||||
from __future__ import annotations |
||||
|
||||
import re |
||||
import math |
||||
from pathlib import Path |
||||
|
||||
import duckdb |
||||
import numpy as np |
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Paths |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
ROOT = Path(__file__).resolve().parents[2] |
||||
DB_PATH = str(ROOT / "data" / "motions.db") |
||||
REPORTS_DIR = ROOT / "reports" / "overton_window" |
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Party sets |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
CANONICAL_LEFT = frozenset({"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK", "PvdD", "Volt"}) |
||||
CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"}) |
||||
CANONICAL_CENTRIST = frozenset({"VVD", "D66", "CDA", "NSC", "BBB", "CU"}) |
||||
CANONICAL_CENTRIST_STRICT = frozenset({"D66", "CDA", "NSC", "CU"}) |
||||
|
||||
CANONICAL_LEFT_SET = set(CANONICAL_LEFT) |
||||
CANONICAL_RIGHT_SET = set(CANONICAL_RIGHT) |
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Time periods |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
YEAR_MIN, YEAR_MAX = 2016, 2026 |
||||
BREAK_YEAR = 2024 |
||||
SCHOOF_START_DATE = "2024-07-01" |
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Coalition composition |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
RUTTE_IV_COALITION: set[str] = {"VVD", "D66", "CDA", "CU"} |
||||
SCHOOF_COALITION: set[str] = {"PVV", "VVD", "NSC", "BBB"} |
||||
|
||||
COALITION: dict[int, set[str]] = { |
||||
2016: {"VVD", "PvdA"}, |
||||
2017: {"VVD", "PvdA"}, |
||||
2018: {"VVD", "CDA", "D66", "CU"}, |
||||
2019: {"VVD", "CDA", "D66", "CU"}, |
||||
2020: {"VVD", "CDA", "D66", "CU"}, |
||||
2021: {"VVD", "CDA", "D66", "CU"}, |
||||
2022: {"VVD", "D66", "CDA", "CU"}, |
||||
2023: {"VVD", "D66", "CDA", "CU"}, |
||||
2024: SCHOOF_COALITION, |
||||
2025: SCHOOF_COALITION, |
||||
2026: SCHOOF_COALITION, |
||||
} |
||||
|
||||
COALITION_NOTE = ( |
||||
"2016-2017: Rutte II (VVD/PvdA). " |
||||
"2018-2021: Rutte III (VVD/CDA/D66/CU). " |
||||
"2022-2023: Rutte IV (VVD/D66/CDA/CU). " |
||||
"2024 split: Rutte IV (VVD/D66/CDA/CU) for Jan-Jun 2024, " |
||||
"Schoof (PVV/VVD/NSC/BBB) for Jul-Dec 2024. " |
||||
"2025-2026: Schoof (PVV/VVD/NSC/BBB). " |
||||
"Period detection uses motion date, not just year." |
||||
) |
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Database helpers |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
|
||||
def _conn(db_path: str | None = None, read_only: bool = True) -> duckdb.DuckDBPyConnection: |
||||
"""Open a DuckDB connection to the motions database.""" |
||||
return duckdb.connect(db_path or DB_PATH, read_only=read_only) |
||||
|
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Statistical helpers |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
|
||||
def cohens_d(x: np.ndarray, y: np.ndarray) -> float: |
||||
"""Cohen's d effect size (positive when y > x).""" |
||||
pooled = np.sqrt((np.var(x, ddof=1) + np.var(y, ddof=1)) / 2) |
||||
if pooled == 0: |
||||
return 0.0 |
||||
return (np.mean(y) - np.mean(x)) / pooled |
||||
|
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Motion metadata helpers |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
|
||||
def build_party_name_map(con: duckdb.DuckDBPyConnection) -> dict[str, str]: |
||||
"""Build mapping: last name -> party from mp_metadata.""" |
||||
rows = con.execute(""" |
||||
SELECT mp_name, party, van, tot_en_met |
||||
FROM mp_metadata |
||||
WHERE party IS NOT NULL |
||||
ORDER BY tot_en_met DESC NULLS LAST, van DESC NULLS LAST |
||||
""").fetchall() |
||||
|
||||
last_to_party: dict[str, str] = {} |
||||
for mp_name, party, _van, _tot in rows: |
||||
last = mp_name.split(",")[0].strip() |
||||
if last not in last_to_party: |
||||
last_to_party[last] = party |
||||
return last_to_party |
||||
|
||||
|
||||
def parse_lead_submitter( |
||||
title: str, name_party_map: dict[str, str] |
||||
) -> tuple[str | None, str | None]: |
||||
"""Parse the lead submitter from a motion title and map to party. |
||||
|
||||
Returns (parsed_name, party) or (None, None). |
||||
""" |
||||
if not title: |
||||
return None, None |
||||
|
||||
patterns = [ |
||||
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+het\s+lid\s+(.+?)\s+(?:c\.s\.\s+)?over\b", |
||||
r"(?:Gewijzigde|Nader\s+gewijzigde)?\s*Motie\s+van\s+de\s+leden\s+(.+?)\s+(?:c\.s\.\s+)?over\b", |
||||
r"Amendement\s+van\s+het\s+lid\s+(.+?)\s+over\b", |
||||
r"Amendement\s+van\s+de\s+leden\s+(.+?)\s+over\b", |
||||
] |
||||
|
||||
for pat in patterns: |
||||
m = re.search(pat, title) |
||||
if m: |
||||
submitter_str = m.group(1).strip() |
||||
parts = submitter_str.split(" en ") |
||||
first_name = parts[0].strip() |
||||
first_name = re.sub(r"\s+c\.s\.", "", first_name).strip() |
||||
if not first_name: |
||||
continue |
||||
party = name_party_map.get(first_name) |
||||
return first_name, party |
||||
|
||||
return None, None |
||||
|
||||
|
||||
def motion_passed(voting_results: dict | None) -> bool: |
||||
"""Check if a motion passed based on voting_results JSON.""" |
||||
if not voting_results: |
||||
return False |
||||
if isinstance(voting_results, str): |
||||
try: |
||||
import json |
||||
voting_results = json.loads(voting_results) |
||||
except (ValueError, TypeError): |
||||
return False |
||||
return voting_results.get("result") == "aangenomen" |
||||
|
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Temporal helpers |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
|
||||
def quarter_sort_key(q: str) -> tuple[int, int]: |
||||
"""Sort key for quarter strings like '2024-Q1'.""" |
||||
year = int(q[:4]) |
||||
quarter = int(q[-1]) |
||||
return (year, quarter) |
||||
|
||||
|
||||
def find_inflection_point( |
||||
quarters: list[str], values: list[float], threshold: float = 0.4 |
||||
) -> str | None: |
||||
"""Find the first quarter where the smoothed value exceeds the threshold.""" |
||||
if len(quarters) < 3: |
||||
return None |
||||
for i in range(1, len(quarters) - 1): |
||||
avg = (values[i - 1] + values[i] + values[i + 1]) / 3 |
||||
if avg > threshold: |
||||
return quarters[i] |
||||
return None |
||||
@ -0,0 +1,265 @@ |
||||
"""Tests for analysis/right_wing/common.py shared module. |
||||
|
||||
TDD approach: these tests verify the extracted shared helpers work correctly. |
||||
""" |
||||
|
||||
import math |
||||
from pathlib import Path |
||||
from unittest.mock import MagicMock, patch |
||||
|
||||
import numpy as np |
||||
import pytest |
||||
|
||||
|
||||
class TestConstants: |
||||
"""Verify all exported constants are present and correctly typed.""" |
||||
|
||||
def test_canonical_centrist_is_frozenset(self): |
||||
from analysis.right_wing.common import CANONICAL_CENTRIST |
||||
assert isinstance(CANONICAL_CENTRIST, frozenset) |
||||
assert "VVD" in CANONICAL_CENTRIST |
||||
assert "D66" in CANONICAL_CENTRIST |
||||
assert "CDA" in CANONICAL_CENTRIST |
||||
assert "NSC" in CANONICAL_CENTRIST |
||||
assert "BBB" in CANONICAL_CENTRIST |
||||
assert "CU" in CANONICAL_CENTRIST |
||||
|
||||
def test_canonical_centrist_strict_subset(self): |
||||
from analysis.right_wing.common import CANONICAL_CENTRIST, CANONICAL_CENTRIST_STRICT |
||||
assert CANONICAL_CENTRIST_STRICT.issubset(CANONICAL_CENTRIST) |
||||
assert "VVD" not in CANONICAL_CENTRIST_STRICT |
||||
assert "BBB" not in CANONICAL_CENTRIST_STRICT |
||||
|
||||
def test_canonical_left_right_disjoint(self): |
||||
from analysis.right_wing.common import CANONICAL_LEFT, CANONICAL_RIGHT |
||||
assert len(CANONICAL_LEFT & CANONICAL_RIGHT) == 0 |
||||
|
||||
def test_coalition_dicts(self): |
||||
from analysis.right_wing.common import RUTTE_IV_COALITION, SCHOOF_COALITION |
||||
assert isinstance(RUTTE_IV_COALITION, set) |
||||
assert isinstance(SCHOOF_COALITION, set) |
||||
assert "PVV" in SCHOOF_COALITION |
||||
assert "PVV" not in RUTTE_IV_COALITION |
||||
|
||||
def test_time_constants(self): |
||||
from analysis.right_wing.common import YEAR_MIN, YEAR_MAX, BREAK_YEAR |
||||
assert YEAR_MIN < BREAK_YEAR < YEAR_MAX |
||||
assert BREAK_YEAR == 2024 |
||||
|
||||
def test_paths_exist(self): |
||||
from analysis.right_wing.common import ROOT, DB_PATH, REPORTS_DIR |
||||
assert ROOT.exists() |
||||
assert isinstance(DB_PATH, str) |
||||
assert "motions.db" in DB_PATH |
||||
assert isinstance(REPORTS_DIR, Path) |
||||
|
||||
|
||||
class TestCohensD: |
||||
"""Test Cohen's d effect size calculation.""" |
||||
|
||||
def test_identical_groups_returns_zero(self): |
||||
from analysis.right_wing.common import cohens_d |
||||
d = cohens_d([1, 2, 3], [1, 2, 3]) |
||||
assert d == 0.0 |
||||
|
||||
def test_first_group_higher(self): |
||||
from analysis.right_wing.common import cohens_d |
||||
# cohens_d(x, y) = (mean_y - mean_x) / pooled_std (based on implementation) |
||||
d = cohens_d([4, 5, 6], [1, 2, 3]) |
||||
assert d < 0 # mean_y < mean_x → negative |
||||
|
||||
def test_second_group_higher(self): |
||||
from analysis.right_wing.common import cohens_d |
||||
d = cohens_d([1, 2, 3], [4, 5, 6]) |
||||
assert d > 0 # mean_y > mean_x → positive |
||||
|
||||
def test_known_value(self): |
||||
from analysis.right_wing.common import cohens_d |
||||
# [1,2,3,4,5] vs [3,4,5,6,7]: mean diff = -2, pooled std ≈ 1.58 |
||||
d = cohens_d([1, 2, 3, 4, 5], [3, 4, 5, 6, 7]) |
||||
assert d > 0 # Second group has higher mean |
||||
assert abs(d) > 1.0 # Should be a large effect |
||||
|
||||
|
||||
class TestQuarterSortKey: |
||||
"""Test quarter string sorting.""" |
||||
|
||||
def test_basic_sort(self): |
||||
from analysis.right_wing.common import quarter_sort_key |
||||
assert quarter_sort_key("2024-Q1") < quarter_sort_key("2024-Q2") |
||||
assert quarter_sort_key("2023-Q4") < quarter_sort_key("2024-Q1") |
||||
|
||||
def test_sort_order(self): |
||||
from analysis.right_wing.common import quarter_sort_key |
||||
quarters = ["2024-Q3", "2024-Q1", "2023-Q4", "2024-Q2"] |
||||
sorted_q = sorted(quarters, key=quarter_sort_key) |
||||
assert sorted_q == ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3"] |
||||
|
||||
def test_invalid_format_raises(self): |
||||
from analysis.right_wing.common import quarter_sort_key |
||||
with pytest.raises((ValueError, IndexError)): |
||||
quarter_sort_key("invalid") |
||||
|
||||
|
||||
class TestFindInflectionPoint: |
||||
"""Test inflection point detection using 3-quarter rolling average.""" |
||||
|
||||
def test_simple_inflection(self): |
||||
from analysis.right_wing.common import find_inflection_point |
||||
quarters = ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3", "2024-Q4"] |
||||
values = [0.2, 0.5, 0.6, 0.7, 0.8] |
||||
result = find_inflection_point(quarters, values, threshold=0.4) |
||||
# Rolling avg at index 1: (0.2 + 0.5 + 0.6)/3 = 0.433 > 0.4 |
||||
assert result == "2024-Q1" |
||||
|
||||
def test_no_inflection(self): |
||||
from analysis.right_wing.common import find_inflection_point |
||||
quarters = ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3"] |
||||
values = [0.1, 0.15, 0.2, 0.25] |
||||
result = find_inflection_point(quarters, values, threshold=0.4) |
||||
assert result is None |
||||
|
||||
def test_inflection_at_end(self): |
||||
from analysis.right_wing.common import find_inflection_point |
||||
quarters = ["2023-Q4", "2024-Q1", "2024-Q2", "2024-Q3", "2024-Q4"] |
||||
values = [0.1, 0.15, 0.2, 0.5, 0.6] |
||||
result = find_inflection_point(quarters, values, threshold=0.4) |
||||
# Rolling avg at index 3: (0.2 + 0.5 + 0.6)/3 = 0.433 > 0.4 |
||||
assert result == "2024-Q3" |
||||
|
||||
def test_too_short_returns_none(self): |
||||
from analysis.right_wing.common import find_inflection_point |
||||
result = find_inflection_point(["2024-Q1", "2024-Q2"], [0.5, 0.6], 0.4) |
||||
assert result is None |
||||
|
||||
|
||||
class TestMotionPassed: |
||||
"""Test motion passage detection via result field.""" |
||||
|
||||
def test_aangenomen_passes(self): |
||||
from analysis.right_wing.common import motion_passed |
||||
votes = {"result": "aangenomen", "voor": 100, "tegen": 50} |
||||
assert motion_passed(votes) is True |
||||
|
||||
def test_verworpen_fails(self): |
||||
from analysis.right_wing.common import motion_passed |
||||
votes = {"result": "verworpen", "voor": 30, "tegen": 70} |
||||
assert motion_passed(votes) is False |
||||
|
||||
def test_none_fails(self): |
||||
from analysis.right_wing.common import motion_passed |
||||
assert motion_passed(None) is False |
||||
|
||||
def test_empty_dict_fails(self): |
||||
from analysis.right_wing.common import motion_passed |
||||
assert motion_passed({}) is False |
||||
|
||||
def test_json_string_parses(self): |
||||
from analysis.right_wing.common import motion_passed |
||||
votes = '{"result": "aangenomen", "voor": 100}' |
||||
assert motion_passed(votes) is True |
||||
|
||||
def test_invalid_json_fails(self): |
||||
from analysis.right_wing.common import motion_passed |
||||
assert motion_passed("not json") is False |
||||
|
||||
|
||||
class TestBuildPartyNameMap: |
||||
"""Test MP name to party mapping.""" |
||||
|
||||
def test_returns_dict(self): |
||||
from analysis.right_wing.common import build_party_name_map, _conn, DB_PATH |
||||
with _conn(DB_PATH) as con: |
||||
result = build_party_name_map(con) |
||||
assert isinstance(result, dict) |
||||
assert len(result) > 100 # Should have many MPs |
||||
|
||||
def test_known_mp_maps_to_party(self): |
||||
from analysis.right_wing.common import build_party_name_map, _conn, DB_PATH |
||||
with _conn(DB_PATH) as con: |
||||
result = build_party_name_map(con) |
||||
# Wilders should map to PVV |
||||
assert "Wilders" in result |
||||
assert result["Wilders"] == "PVV" |
||||
|
||||
def test_groenlinks_pvda_normalized(self): |
||||
from analysis.right_wing.common import build_party_name_map, _conn, DB_PATH |
||||
with _conn(DB_PATH) as con: |
||||
result = build_party_name_map(con) |
||||
# Klaver should map to GroenLinks-PvdA |
||||
assert "Klaver" in result |
||||
assert result["Klaver"] == "GroenLinks-PvdA" |
||||
|
||||
|
||||
class TestParseLeadSubmitter: |
||||
"""Test motion title parsing for lead MP name.""" |
||||
|
||||
def test_standard_motie_format(self): |
||||
from analysis.right_wing.common import parse_lead_submitter |
||||
title = "Motie van het lid Wilders over migratie" |
||||
name, party = parse_lead_submitter(title, {"Wilders": "PVV"}) |
||||
assert name == "Wilders" |
||||
assert party == "PVV" |
||||
|
||||
def test_gewijzigde_motie_format(self): |
||||
from analysis.right_wing.common import parse_lead_submitter |
||||
title = "Gewijzigde Motie van het lid Klaver c.s. over klimaat" |
||||
name, party = parse_lead_submitter(title, {"Klaver": "GL"}) |
||||
assert name == "Klaver" |
||||
|
||||
def test_amendement_format(self): |
||||
from analysis.right_wing.common import parse_lead_submitter |
||||
title = "Amendement van het lid Omtzigt over begroting" |
||||
name, party = parse_lead_submitter(title, {"Omtzigt": "NSC"}) |
||||
assert name == "Omtzigt" |
||||
|
||||
def test_non_motie_returns_none(self): |
||||
from analysis.right_wing.common import parse_lead_submitter |
||||
title = "Verslag van een schriftelijk overleg" |
||||
name, party = parse_lead_submitter(title, {}) |
||||
assert name is None |
||||
assert party is None |
||||
|
||||
def test_empty_title_returns_none(self): |
||||
from analysis.right_wing.common import parse_lead_submitter |
||||
name, party = parse_lead_submitter("", {}) |
||||
assert name is None |
||||
|
||||
|
||||
class TestConnectionHelper: |
||||
"""Test _conn context manager.""" |
||||
|
||||
def test_conn_returns_context_manager(self): |
||||
from analysis.right_wing.common import _conn, DB_PATH |
||||
# Should not raise when using the default DB_PATH |
||||
with _conn(DB_PATH) as con: |
||||
assert con is not None |
||||
# Verify it's a valid connection |
||||
result = con.execute("SELECT 1").fetchone() |
||||
assert result[0] == 1 |
||||
|
||||
|
||||
class TestIntegration: |
||||
"""Integration tests verifying common.py works with real data.""" |
||||
|
||||
def test_db_path_points_to_existing_file(self): |
||||
from analysis.right_wing.common import DB_PATH |
||||
from pathlib import Path |
||||
assert Path(DB_PATH).exists(), f"Database not found at {DB_PATH}" |
||||
|
||||
def test_reports_dir_exists(self): |
||||
from analysis.right_wing.common import REPORTS_DIR |
||||
assert REPORTS_DIR.exists(), f"Reports dir not found: {REPORTS_DIR}" |
||||
|
||||
def test_centrist_parties_in_database(self): |
||||
"""Verify CANONICAL_CENTRIST parties actually exist in mp_votes.""" |
||||
from analysis.right_wing.common import CANONICAL_CENTRIST, _conn, DB_PATH |
||||
with _conn(DB_PATH) as con: |
||||
db_parties = {r[0] for r in con.execute( |
||||
"SELECT DISTINCT party FROM mp_votes WHERE party IS NOT NULL" |
||||
).fetchall()} |
||||
|
||||
# Check that at least some centrist parties are in the database |
||||
found = CANONICAL_CENTRIST & db_parties |
||||
assert len(found) >= 4, f"Only {found} centrist parties found in mp_votes" |
||||
|
||||
Loading…
Reference in new issue