import numpy as np import types import sys import pytest # --------------------------------------------------------------------------- # Helpers shared by orientation tests # --------------------------------------------------------------------------- def _make_fake_traj(aligned): fake = types.SimpleNamespace() fake._load_window_ids = lambda db: list(aligned.keys()) fake._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {}) fake._procrustes_align_windows = lambda x: aligned return fake def test_compute_2d_axes_pca_synthetic(monkeypatch): """Synthetic test for compute_2d_axes using patched alignment helper.""" # Create a fake trajectory module with required helpers fake_traj = types.SimpleNamespace() # _load_window_ids should return ordered windows fake_traj._load_window_ids = lambda db: ["w1", "w2"] # Provide aligned vectors directly aligned = { "w1": {"Alice": np.array([1.0, 0.0, 0.0]), "Bob": np.array([0.0, 1.0, 0.0])}, "w2": {"Alice": np.array([0.8, 0.2, 0.0]), "Bob": np.array([0.1, 0.9, 0.0])}, } # _load_mp_vectors_for_window returns the pre-aligned vectors (needed for padding step) fake_traj._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {}) fake_traj._procrustes_align_windows = lambda x: aligned # Insert fake module into sys.modules for import by analysis.political_axis monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj) # Now import the function under test from analysis.political_axis import compute_2d_axes positions_by_window, axis_def = compute_2d_axes( db_path="dummy", window_ids=["w1", "w2"], method="pca" ) assert "w1" in positions_by_window and "w2" in positions_by_window for wid in ("w1", "w2"): for name, coord in positions_by_window[wid].items(): assert len(coord) == 2 assert np.isfinite(coord[0]) and np.isfinite(coord[1]) assert axis_def.get("method") == "pca" def test_per_window_y_orientation(monkeypatch): """Per-window Y correction must ensure prog_avg_y > cons_avg_y in every window. We construct two windows: - w_good: progressive MPs at +Y, conservative MPs at -Y (already correct) - w_bad: conservative MPs at +Y, progressive MPs at -Y (inverted) We weight w_good with many more MPs so the GLOBAL centroid check passes without noticing the per-window inversion. The per-window correction must then flip w_bad so both windows end up with prog_avg_y > cons_avg_y. """ # Helpers to make slightly varied vectors def pv(base): return np.array(base, dtype=float) # w_good: large left/right spread on dim-0, prog up (+Y), cons down (-Y) w_good = { # right / conservative "Wilders, G.": pv([-3.0, -1.0, 0.0]), "Rutte, M.": pv([-3.0, -0.9, 0.0]), "van der Staaij, K.": pv([-2.9, -0.95, 0.0]), "Omtzigt, P.": pv([-2.8, -0.85, 0.0]), # left / progressive "Marijnissen, L.": pv([3.0, 1.0, 0.0]), "Klever, A.": pv([3.0, 0.9, 0.0]), "Bromet, L.": pv([2.9, 0.95, 0.0]), "Nijboer, H.": pv([2.8, 0.85, 0.0]), } # w_bad: same left/right structure but Y is inverted relative to w_good # (conservative at +Y, progressive at -Y) w_bad = { "Wilders, G.": pv([-3.0, 1.0, 0.0]), # cons at +Y "Rutte, M.": pv([-3.0, 0.9, 0.0]), "van der Staaij, K.": pv([-2.9, 0.95, 0.0]), "Omtzigt, P.": pv([-2.8, 0.85, 0.0]), "Marijnissen, L.": pv([3.0, -1.0, 0.0]), # prog at -Y "Klever, A.": pv([3.0, -0.9, 0.0]), "Bromet, L.": pv([2.9, -0.95, 0.0]), "Nijboer, H.": pv([2.8, -0.85, 0.0]), } aligned = {"w_good": w_good, "w_bad": w_bad} mp_metadata = [ ("Wilders, G.", "PVV"), ("Rutte, M.", "VVD"), ("van der Staaij, K.", "SGP"), ("Omtzigt, P.", "Nieuw Sociaal Contract"), ("Marijnissen, L.", "SP"), ("Klever, A.", "GroenLinks-PvdA"), ("Bromet, L.", "GroenLinks-PvdA"), ("Nijboer, H.", "SP"), ] fake_traj = _make_fake_traj(aligned) monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj) import types as _types fake_conn = _types.SimpleNamespace( execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata), close=lambda: None, ) import duckdb as _duckdb monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn) import importlib, analysis.political_axis as _ax importlib.reload(_ax) from analysis.political_axis import compute_2d_axes positions_by_window, axis_def = compute_2d_axes( db_path="dummy", window_ids=["w_good", "w_bad"], method="pca" ) prog_mps = {"Marijnissen, L.", "Klever, A.", "Bromet, L.", "Nijboer, H."} cons_mps = {"Wilders, G.", "Rutte, M.", "van der Staaij, K.", "Omtzigt, P."} for wid in ("w_good", "w_bad"): pos = positions_by_window[wid] prog_y = np.mean([pos[mp][1] for mp in prog_mps if mp in pos]) cons_y = np.mean([pos[mp][1] for mp in cons_mps if mp in pos]) assert prog_y > cons_y, ( f"Window '{wid}': expected prog_avg_y ({prog_y:.3f}) > cons_avg_y ({cons_y:.3f})" ) def test_pca_axis_orientation(monkeypatch): """PCA axes must be oriented so right parties score higher on X and progressive parties score higher on Y than their respective opposites. We construct a minimal vote-matrix world where: - Right MPs (PVV, VVD members) cluster in one direction on dim-0. - Left MPs (SP, GroenLinks-PvdA members) cluster in the opposite direction. - Progressive MPs cluster on dim-1; conservative MPs on the opposite side. The orientation logic in compute_2d_axes should flip axis signs so that right_x > left_x and prog_y > cons_y regardless of the raw SVD sign. """ # Build vectors so that right parties are at +1 on dim-0 and # progressive parties are at +1 on dim-1. # We deliberately negate them to test that auto-orient flips them back. # Right/left use magnitude 3, prog/cons use magnitude 1 so that dim-0 # dominates PCA variance — ensuring PC1 = left-right axis, PC2 = prog-cons. right_vec = np.array([-3.0, 0.0, 0.0]) # intentionally negative on dim-0 left_vec = np.array([3.0, 0.0, 0.0]) # intentionally positive on dim-0 prog_vec = np.array([0.0, -1.0, 0.0]) # intentionally negative on dim-1 cons_vec = np.array([0.0, 1.0, 0.0]) # intentionally positive on dim-1 aligned = { "w1": { # Right-leaning MPs "Wilders, G.": right_vec, "Rutte, M.": right_vec + np.array([0.0, 0.0, 0.05]), # Left-leaning MPs "Marijnissen, L.": left_vec, "Klever, A.": left_vec + np.array([0.0, 0.0, 0.05]), # Progressive MPs "Bromet, L.": prog_vec, "Nijboer, H.": prog_vec + np.array([0.0, 0.0, -0.05]), # Conservative MPs "Segers, G.": cons_vec, "Omtzigt, P.": cons_vec + np.array([0.0, 0.0, -0.05]), } } # mp_metadata rows used by the orientation code (party affiliation) mp_metadata = [ ("Wilders, G.", "PVV"), ("Rutte, M.", "VVD"), ("Marijnissen, L.", "SP"), ("Klever, A.", "GroenLinks-PvdA"), ("Bromet, L.", "GroenLinks-PvdA"), ("Nijboer, H.", "SP"), ("Segers, G.", "CDA"), ("Omtzigt, P.", "Nieuw Sociaal Contract"), ] fake_traj = _make_fake_traj(aligned) monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj) # Patch duckdb so the orientation helper can fetch mp_metadata import types as _types fake_conn = _types.SimpleNamespace( execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata), close=lambda: None, ) import duckdb as _duckdb monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn) # Need to reload the module so monkeypatched sys.modules takes effect import importlib, analysis.political_axis as _ax importlib.reload(_ax) from analysis.political_axis import compute_2d_axes positions_by_window, axis_def = compute_2d_axes( db_path="dummy", window_ids=["w1"], method="pca" ) pos = positions_by_window["w1"] # X-axis: right parties should score higher than left parties right_x = np.mean([pos["Wilders, G."][0], pos["Rutte, M."][0]]) left_x = np.mean([pos["Marijnissen, L."][0], pos["Klever, A."][0]]) assert right_x > left_x, ( f"Expected right parties (x={right_x:.3f}) > left parties (x={left_x:.3f}) on X-axis" ) # Y-axis: progressive parties should score higher than conservative parties prog_y = np.mean([pos["Bromet, L."][1], pos["Nijboer, H."][1]]) cons_y = np.mean([pos["Segers, G."][1], pos["Omtzigt, P."][1]]) assert prog_y > cons_y, ( f"Expected progressive parties (y={prog_y:.3f}) > conservative parties (y={cons_y:.3f}) on Y-axis" ) # --------------------------------------------------------------------------- # Tests for compute_party_discipline # --------------------------------------------------------------------------- def _make_mp_votes_db(): """Create an in-memory DuckDB with mp_votes fixture data. 6 motions, 2 parties (SP, VVD), each with 4 MPs. SP is perfectly disciplined (all 4 vote the same each time). VVD has 1 dissident on 2 of 6 motions → Rice index = (4+4+4+4+3+3)/6/4 ≈ 0.917. Dates span 2023-01-01 to 2023-12-31. """ import duckdb conn = duckdb.connect(":memory:") conn.execute(""" CREATE TABLE mp_votes ( id INTEGER, motion_id VARCHAR, mp_name VARCHAR, party VARCHAR, vote VARCHAR, date DATE, created_at TIMESTAMP ) """) rows = [] dates = [ "2023-01-10", "2023-03-15", "2023-05-20", "2023-07-25", "2023-09-30", "2023-11-05", ] sp_mps = ["Janssen, A.", "Pietersen, B.", "Willemsen, C.", "Hendriksen, D."] vvd_mps = ["Adams, E.", "Bakker, F.", "Claassen, G.", "Dekker, H."] for i, date in enumerate(dates, start=1): m_id = f"M{i:03d}" for mp in sp_mps: rows.append((i * 10 + 1, m_id, mp, "SP", "voor", date, "2023-01-01")) if i <= 4: for mp in vvd_mps: rows.append((i * 10 + 2, m_id, mp, "VVD", "voor", date, "2023-01-01")) else: for mp in vvd_mps[:3]: rows.append((i * 10 + 2, m_id, mp, "VVD", "voor", date, "2023-01-01")) rows.append( (i * 10 + 3, m_id, vvd_mps[3], "VVD", "tegen", date, "2023-01-01") ) conn.executemany("INSERT INTO mp_votes VALUES (?, ?, ?, ?, ?, ?, ?)", rows) return conn def test_compute_party_discipline_basic(monkeypatch): """compute_party_discipline returns correct Rice index for fixture data.""" import duckdb as _duckdb fixture_conn = _make_mp_votes_db() monkeypatch.setattr(_duckdb, "connect", lambda path, **kw: fixture_conn) import importlib import sys if "streamlit" not in sys.modules: import types st_stub = types.ModuleType("streamlit") st_stub.cache_data = lambda **kw: lambda f: f sys.modules["streamlit"] = st_stub import explorer as _explorer importlib.reload(_explorer) df = _explorer.compute_party_discipline( db_path="dummy", start_date="2023-01-01", end_date="2023-12-31", ) assert not df.empty assert set(df.columns) >= {"party", "n_motions", "discipline"} sp_row = df[df["party"] == "SP"].iloc[0] vvd_row = df[df["party"] == "VVD"].iloc[0] assert sp_row["n_motions"] == 6 assert sp_row["discipline"] == pytest.approx(1.0, abs=1e-6) assert vvd_row["n_motions"] == 6 expected_vvd = (4 * 1.0 + 2 * 0.75) / 6 assert vvd_row["discipline"] == pytest.approx(expected_vvd, abs=1e-4) assert (df["discipline"] >= 0).all() and (df["discipline"] <= 1).all() def test_compute_party_discipline_empty_range(monkeypatch): """Returns empty DataFrame when no motions fall in the date range.""" import duckdb as _duckdb fixture_conn = _make_mp_votes_db() monkeypatch.setattr(_duckdb, "connect", lambda path, **kw: fixture_conn) import importlib, sys if "streamlit" not in sys.modules: import types st_stub = types.ModuleType("streamlit") st_stub.cache_data = lambda **kw: lambda f: f sys.modules["streamlit"] = st_stub import explorer as _explorer importlib.reload(_explorer) df = _explorer.compute_party_discipline( db_path="dummy", start_date="2000-01-01", end_date="2000-12-31", ) assert df.empty