You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/tests/test_political_compass.py

471 lines
16 KiB

import numpy as np
import types
import sys
import pytest
# ---------------------------------------------------------------------------
# Helpers shared by orientation tests
# ---------------------------------------------------------------------------
def _make_fake_traj(aligned):
fake = types.SimpleNamespace()
fake._load_window_ids = lambda db: list(aligned.keys())
fake._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {})
fake._procrustes_align_windows = lambda x: aligned
return fake
def test_compute_2d_axes_pca_synthetic(monkeypatch):
"""Synthetic test for compute_2d_axes using patched alignment helper."""
# Create a fake trajectory module with required helpers
fake_traj = types.SimpleNamespace()
# _load_window_ids should return ordered windows
fake_traj._load_window_ids = lambda db: ["w1", "w2"]
# Provide aligned vectors directly
aligned = {
"w1": {"Alice": np.array([1.0, 0.0, 0.0]), "Bob": np.array([0.0, 1.0, 0.0])},
"w2": {"Alice": np.array([0.8, 0.2, 0.0]), "Bob": np.array([0.1, 0.9, 0.0])},
}
# _load_mp_vectors_for_window returns the pre-aligned vectors (needed for padding step)
fake_traj._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {})
fake_traj._procrustes_align_windows = lambda x: aligned
# Insert fake module into sys.modules for import by analysis.political_axis
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
# Now import the function under test
from analysis.political_axis import compute_2d_axes
positions_by_window, axis_def = compute_2d_axes(
db_path="dummy", window_ids=["w1", "w2"], method="pca"
)
assert "w1" in positions_by_window and "w2" in positions_by_window
for wid in ("w1", "w2"):
for name, coord in positions_by_window[wid].items():
assert len(coord) == 2
assert np.isfinite(coord[0]) and np.isfinite(coord[1])
assert axis_def.get("method") == "pca"
def test_per_window_y_orientation(monkeypatch):
"""Per-window Y correction must ensure prog_avg_y > cons_avg_y in every window.
We construct two windows:
- w_good: progressive MPs at +Y, conservative MPs at -Y (already correct)
- w_bad: conservative MPs at +Y, progressive MPs at -Y (inverted)
We weight w_good with many more MPs so the GLOBAL centroid check passes
without noticing the per-window inversion. The per-window correction must
then flip w_bad so both windows end up with prog_avg_y > cons_avg_y.
"""
# Helpers to make slightly varied vectors
def pv(base):
return np.array(base, dtype=float)
# w_good: large left/right spread on dim-0, prog up (+Y), cons down (-Y)
w_good = {
# right / conservative
"Wilders, G.": pv([-3.0, -1.0, 0.0]),
"Rutte, M.": pv([-3.0, -0.9, 0.0]),
"van der Staaij, K.": pv([-2.9, -0.95, 0.0]),
"Omtzigt, P.": pv([-2.8, -0.85, 0.0]),
# left / progressive
"Marijnissen, L.": pv([3.0, 1.0, 0.0]),
"Klever, A.": pv([3.0, 0.9, 0.0]),
"Bromet, L.": pv([2.9, 0.95, 0.0]),
"Nijboer, H.": pv([2.8, 0.85, 0.0]),
}
# w_bad: same left/right structure but Y is inverted relative to w_good
# (conservative at +Y, progressive at -Y)
w_bad = {
"Wilders, G.": pv([-3.0, 1.0, 0.0]), # cons at +Y
"Rutte, M.": pv([-3.0, 0.9, 0.0]),
"van der Staaij, K.": pv([-2.9, 0.95, 0.0]),
"Omtzigt, P.": pv([-2.8, 0.85, 0.0]),
"Marijnissen, L.": pv([3.0, -1.0, 0.0]), # prog at -Y
"Klever, A.": pv([3.0, -0.9, 0.0]),
"Bromet, L.": pv([2.9, -0.95, 0.0]),
"Nijboer, H.": pv([2.8, -0.85, 0.0]),
}
aligned = {"w_good": w_good, "w_bad": w_bad}
mp_metadata = [
("Wilders, G.", "PVV"),
("Rutte, M.", "VVD"),
("van der Staaij, K.", "SGP"),
("Omtzigt, P.", "Nieuw Sociaal Contract"),
("Marijnissen, L.", "SP"),
("Klever, A.", "GroenLinks-PvdA"),
("Bromet, L.", "GroenLinks-PvdA"),
("Nijboer, H.", "SP"),
]
fake_traj = _make_fake_traj(aligned)
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
import types as _types
fake_conn = _types.SimpleNamespace(
execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata),
close=lambda: None,
)
import duckdb as _duckdb
monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn)
import importlib, analysis.political_axis as _ax
importlib.reload(_ax)
from analysis.political_axis import compute_2d_axes
positions_by_window, axis_def = compute_2d_axes(
db_path="dummy", window_ids=["w_good", "w_bad"], method="pca"
)
prog_mps = {"Marijnissen, L.", "Klever, A.", "Bromet, L.", "Nijboer, H."}
cons_mps = {"Wilders, G.", "Rutte, M.", "van der Staaij, K.", "Omtzigt, P."}
for wid in ("w_good", "w_bad"):
pos = positions_by_window[wid]
prog_y = np.mean([pos[mp][1] for mp in prog_mps if mp in pos])
cons_y = np.mean([pos[mp][1] for mp in cons_mps if mp in pos])
assert prog_y > cons_y, (
f"Window '{wid}': expected prog_avg_y ({prog_y:.3f}) > cons_avg_y ({cons_y:.3f})"
)
def test_pca_axis_orientation(monkeypatch):
"""PCA axes must be oriented so right parties score higher on X and
progressive parties score higher on Y than their respective opposites.
We construct a minimal vote-matrix world where:
- Right MPs (PVV, VVD members) cluster in one direction on dim-0.
- Left MPs (SP, GroenLinks-PvdA members) cluster in the opposite direction.
- Progressive MPs cluster on dim-1; conservative MPs on the opposite side.
The orientation logic in compute_2d_axes should flip axis signs so that
right_x > left_x and prog_y > cons_y regardless of the raw SVD sign.
"""
# Build vectors so that right parties are at +1 on dim-0 and
# progressive parties are at +1 on dim-1.
# We deliberately negate them to test that auto-orient flips them back.
# Right/left use magnitude 3, prog/cons use magnitude 1 so that dim-0
# dominates PCA variance — ensuring PC1 = left-right axis, PC2 = prog-cons.
right_vec = np.array([-3.0, 0.0, 0.0]) # intentionally negative on dim-0
left_vec = np.array([3.0, 0.0, 0.0]) # intentionally positive on dim-0
prog_vec = np.array([0.0, -1.0, 0.0]) # intentionally negative on dim-1
cons_vec = np.array([0.0, 1.0, 0.0]) # intentionally positive on dim-1
aligned = {
"w1": {
# Right-leaning MPs
"Wilders, G.": right_vec,
"Rutte, M.": right_vec + np.array([0.0, 0.0, 0.05]),
# Left-leaning MPs
"Marijnissen, L.": left_vec,
"Klever, A.": left_vec + np.array([0.0, 0.0, 0.05]),
# Progressive MPs
"Bromet, L.": prog_vec,
"Nijboer, H.": prog_vec + np.array([0.0, 0.0, -0.05]),
# Conservative MPs
"Segers, G.": cons_vec,
"Omtzigt, P.": cons_vec + np.array([0.0, 0.0, -0.05]),
}
}
# mp_metadata rows used by the orientation code (party affiliation)
mp_metadata = [
("Wilders, G.", "PVV"),
("Rutte, M.", "VVD"),
("Marijnissen, L.", "SP"),
("Klever, A.", "GroenLinks-PvdA"),
("Bromet, L.", "GroenLinks-PvdA"),
("Nijboer, H.", "SP"),
("Segers, G.", "CDA"),
("Omtzigt, P.", "Nieuw Sociaal Contract"),
]
fake_traj = _make_fake_traj(aligned)
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
# Patch duckdb so the orientation helper can fetch mp_metadata
import types as _types
fake_conn = _types.SimpleNamespace(
execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata),
close=lambda: None,
)
import duckdb as _duckdb
monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn)
# Need to reload the module so monkeypatched sys.modules takes effect
import importlib, analysis.political_axis as _ax
importlib.reload(_ax)
from analysis.political_axis import compute_2d_axes
positions_by_window, axis_def = compute_2d_axes(
db_path="dummy", window_ids=["w1"], method="pca"
)
pos = positions_by_window["w1"]
# X-axis: right parties should score higher than left parties
right_x = np.mean([pos["Wilders, G."][0], pos["Rutte, M."][0]])
left_x = np.mean([pos["Marijnissen, L."][0], pos["Klever, A."][0]])
assert right_x > left_x, (
f"Expected right parties (x={right_x:.3f}) > left parties (x={left_x:.3f}) on X-axis"
)
# Y-axis: progressive parties should score higher than conservative parties
prog_y = np.mean([pos["Bromet, L."][1], pos["Nijboer, H."][1]])
cons_y = np.mean([pos["Segers, G."][1], pos["Omtzigt, P."][1]])
assert prog_y > cons_y, (
f"Expected progressive parties (y={prog_y:.3f}) > conservative parties (y={cons_y:.3f}) on Y-axis"
)
# ---------------------------------------------------------------------------
# Tests for compute_party_discipline
# ---------------------------------------------------------------------------
def _make_mp_votes_db():
"""Create an in-memory DuckDB with mp_votes fixture data.
6 motions, 2 parties (SP, VVD), each with 4 MPs.
SP is perfectly disciplined (all 4 vote the same each time).
VVD has 1 dissident on 2 of 6 motions → Rice index = (4+4+4+4+3+3)/6/4 ≈ 0.917.
Dates span 2023-01-01 to 2023-12-31.
"""
import duckdb
conn = duckdb.connect(":memory:")
conn.execute("""
CREATE TABLE mp_votes (
id INTEGER,
motion_id VARCHAR,
mp_name VARCHAR,
party VARCHAR,
vote VARCHAR,
date DATE,
created_at TIMESTAMP
)
""")
rows = []
dates = [
"2023-01-10",
"2023-03-15",
"2023-05-20",
"2023-07-25",
"2023-09-30",
"2023-11-05",
]
sp_mps = ["Janssen, A.", "Pietersen, B.", "Willemsen, C.", "Hendriksen, D."]
vvd_mps = ["Adams, E.", "Bakker, F.", "Claassen, G.", "Dekker, H."]
for i, date in enumerate(dates, start=1):
m_id = f"M{i:03d}"
for mp in sp_mps:
rows.append((i * 10 + 1, m_id, mp, "SP", "voor", date, "2023-01-01"))
if i <= 4:
for mp in vvd_mps:
rows.append((i * 10 + 2, m_id, mp, "VVD", "voor", date, "2023-01-01"))
else:
for mp in vvd_mps[:3]:
rows.append((i * 10 + 2, m_id, mp, "VVD", "voor", date, "2023-01-01"))
rows.append(
(i * 10 + 3, m_id, vvd_mps[3], "VVD", "tegen", date, "2023-01-01")
)
conn.executemany("INSERT INTO mp_votes VALUES (?, ?, ?, ?, ?, ?, ?)", rows)
return conn
def test_compute_party_discipline_basic(monkeypatch):
"""compute_party_discipline returns correct Rice index for fixture data."""
import duckdb as _duckdb
fixture_conn = _make_mp_votes_db()
monkeypatch.setattr(_duckdb, "connect", lambda path, **kw: fixture_conn)
import importlib
import sys
if "streamlit" not in sys.modules:
import types
st_stub = types.ModuleType("streamlit")
st_stub.cache_data = lambda **kw: lambda f: f
sys.modules["streamlit"] = st_stub
import explorer as _explorer
importlib.reload(_explorer)
df = _explorer.compute_party_discipline(
db_path="dummy",
start_date="2023-01-01",
end_date="2023-12-31",
)
assert not df.empty
assert set(df.columns) >= {"party", "n_motions", "discipline"}
sp_row = df[df["party"] == "SP"].iloc[0]
vvd_row = df[df["party"] == "VVD"].iloc[0]
assert sp_row["n_motions"] == 6
assert sp_row["discipline"] == pytest.approx(1.0, abs=1e-6)
assert vvd_row["n_motions"] == 6
expected_vvd = (4 * 1.0 + 2 * 0.75) / 6
assert vvd_row["discipline"] == pytest.approx(expected_vvd, abs=1e-4)
assert (df["discipline"] >= 0).all() and (df["discipline"] <= 1).all()
def test_compute_party_discipline_empty_range(monkeypatch):
"""Returns empty DataFrame when no motions fall in the date range."""
import duckdb as _duckdb
fixture_conn = _make_mp_votes_db()
monkeypatch.setattr(_duckdb, "connect", lambda path, **kw: fixture_conn)
import importlib, sys
if "streamlit" not in sys.modules:
import types
st_stub = types.ModuleType("streamlit")
st_stub.cache_data = lambda **kw: lambda f: f
sys.modules["streamlit"] = st_stub
import explorer as _explorer
importlib.reload(_explorer)
df = _explorer.compute_party_discipline(
db_path="dummy",
start_date="2000-01-01",
end_date="2000-12-31",
)
assert df.empty
# ---------------------------------------------------------------------------
# Tests for analysis.axis_classifier
# ---------------------------------------------------------------------------
import importlib
def _fresh_classifier(monkeypatch):
"""Import axis_classifier with cleared module-level caches."""
import analysis.axis_classifier as _cls
monkeypatch.setattr(_cls, "_ideology_cache", None)
monkeypatch.setattr(_cls, "_coalition_cache", None)
return _cls
def test_axis_label_left_right(tmp_path, monkeypatch):
"""Positions that closely correlate with left_right scores → label 'Links–Rechts'."""
_cls = _fresh_classifier(monkeypatch)
(tmp_path / "party_ideologies.csv").write_text(
"party,left_right,progressive\n"
"VVD,0.65,0.10\n"
"PvdA,-0.70,0.75\n"
"SP,-0.90,0.50\n"
"PVV,0.90,-0.50\n"
"D66,-0.10,0.85\n"
"CDA,0.25,-0.45\n"
)
(tmp_path / "coalition_membership.csv").write_text("window_id,party\n")
# X values are the party's left_right scores — perfect correlation
positions_by_window = {
"2022": {
"VVD": (0.65, 0.10),
"PvdA": (-0.70, 0.20),
"SP": (-0.90, 0.30),
"PVV": (0.90, -0.10),
"D66": (-0.10, 0.40),
"CDA": (0.25, -0.20),
}
}
axes = {"x_axis": None, "y_axis": None, "method": "pca"}
result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))
assert result["x_label"] == "Links\u2013Rechts"
assert result["x_quality"]["2022"] >= 0.65
def test_axis_label_coalition_dominant(tmp_path, monkeypatch):
"""Positions that match coalition pattern but NOT left-right → 'Coalitie–Oppositie'."""
_cls = _fresh_classifier(monkeypatch)
(tmp_path / "party_ideologies.csv").write_text(
"party,left_right,progressive\n"
"VVD,0.65,0.10\n"
"PvdA,-0.70,0.75\n"
"SP,-0.90,0.50\n"
"PVV,0.90,-0.50\n"
"D66,-0.10,0.85\n"
"CDA,0.25,-0.45\n"
)
# 2016: Rutte II coalition = VVD + PvdA
(tmp_path / "coalition_membership.csv").write_text(
"window_id,party\n2016,VVD\n2016,PvdA\n"
)
# Coalition parties (VVD + PvdA) at x ≈ +1, opposition at x ≈ -1.
# VVD (right) and PvdA (left) are both near +1 → low left_right correlation
# but high coalition correlation.
positions_by_window = {
"2016": {
"VVD": (0.95, 0.10),
"PvdA": (0.90, 0.20),
"SP": (-0.85, 0.30),
"PVV": (-0.95, -0.10),
"D66": (-0.80, 0.40),
"CDA": (-0.75, -0.20),
}
}
axes = {"x_axis": None, "y_axis": None, "method": "pca"}
result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))
assert result["x_label"] == "Coalitie\u2013Oppositie"
assert "coalitie" in result["x_interpretation"]["2016"].lower()
def test_axis_classifier_missing_csv(tmp_path, monkeypatch):
"""Missing party_ideologies.csv → returns axes dict unchanged, no exception."""
_cls = _fresh_classifier(monkeypatch)
# No CSVs written — directory exists but files do not
positions_by_window = {"2022": {"VVD": (1.0, 0.5), "PvdA": (-1.0, 0.3)}}
axes = {"x_axis": None, "y_axis": None, "method": "pca"}
result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))
# Must not crash and must return the original axes dict unchanged
assert result is axes
assert "x_label" not in result