You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
471 lines
16 KiB
471 lines
16 KiB
import numpy as np
|
|
import types
|
|
import sys
|
|
|
|
import pytest
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers shared by orientation tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _make_fake_traj(aligned):
|
|
fake = types.SimpleNamespace()
|
|
fake._load_window_ids = lambda db: list(aligned.keys())
|
|
fake._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {})
|
|
fake._procrustes_align_windows = lambda x: aligned
|
|
return fake
|
|
|
|
|
|
def test_compute_2d_axes_pca_synthetic(monkeypatch):
|
|
"""Synthetic test for compute_2d_axes using patched alignment helper."""
|
|
|
|
# Create a fake trajectory module with required helpers
|
|
fake_traj = types.SimpleNamespace()
|
|
|
|
# _load_window_ids should return ordered windows
|
|
fake_traj._load_window_ids = lambda db: ["w1", "w2"]
|
|
|
|
# Provide aligned vectors directly
|
|
aligned = {
|
|
"w1": {"Alice": np.array([1.0, 0.0, 0.0]), "Bob": np.array([0.0, 1.0, 0.0])},
|
|
"w2": {"Alice": np.array([0.8, 0.2, 0.0]), "Bob": np.array([0.1, 0.9, 0.0])},
|
|
}
|
|
|
|
# _load_mp_vectors_for_window returns the pre-aligned vectors (needed for padding step)
|
|
fake_traj._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {})
|
|
|
|
fake_traj._procrustes_align_windows = lambda x: aligned
|
|
|
|
# Insert fake module into sys.modules for import by analysis.political_axis
|
|
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
|
|
|
|
# Now import the function under test
|
|
from analysis.political_axis import compute_2d_axes
|
|
|
|
positions_by_window, axis_def = compute_2d_axes(
|
|
db_path="dummy", window_ids=["w1", "w2"], method="pca"
|
|
)
|
|
|
|
assert "w1" in positions_by_window and "w2" in positions_by_window
|
|
for wid in ("w1", "w2"):
|
|
for name, coord in positions_by_window[wid].items():
|
|
assert len(coord) == 2
|
|
assert np.isfinite(coord[0]) and np.isfinite(coord[1])
|
|
|
|
assert axis_def.get("method") == "pca"
|
|
|
|
|
|
def test_per_window_y_orientation(monkeypatch):
|
|
"""Per-window Y correction must ensure prog_avg_y > cons_avg_y in every window.
|
|
|
|
We construct two windows:
|
|
- w_good: progressive MPs at +Y, conservative MPs at -Y (already correct)
|
|
- w_bad: conservative MPs at +Y, progressive MPs at -Y (inverted)
|
|
|
|
We weight w_good with many more MPs so the GLOBAL centroid check passes
|
|
without noticing the per-window inversion. The per-window correction must
|
|
then flip w_bad so both windows end up with prog_avg_y > cons_avg_y.
|
|
"""
|
|
|
|
# Helpers to make slightly varied vectors
|
|
def pv(base):
|
|
return np.array(base, dtype=float)
|
|
|
|
# w_good: large left/right spread on dim-0, prog up (+Y), cons down (-Y)
|
|
w_good = {
|
|
# right / conservative
|
|
"Wilders, G.": pv([-3.0, -1.0, 0.0]),
|
|
"Rutte, M.": pv([-3.0, -0.9, 0.0]),
|
|
"van der Staaij, K.": pv([-2.9, -0.95, 0.0]),
|
|
"Omtzigt, P.": pv([-2.8, -0.85, 0.0]),
|
|
# left / progressive
|
|
"Marijnissen, L.": pv([3.0, 1.0, 0.0]),
|
|
"Klever, A.": pv([3.0, 0.9, 0.0]),
|
|
"Bromet, L.": pv([2.9, 0.95, 0.0]),
|
|
"Nijboer, H.": pv([2.8, 0.85, 0.0]),
|
|
}
|
|
|
|
# w_bad: same left/right structure but Y is inverted relative to w_good
|
|
# (conservative at +Y, progressive at -Y)
|
|
w_bad = {
|
|
"Wilders, G.": pv([-3.0, 1.0, 0.0]), # cons at +Y
|
|
"Rutte, M.": pv([-3.0, 0.9, 0.0]),
|
|
"van der Staaij, K.": pv([-2.9, 0.95, 0.0]),
|
|
"Omtzigt, P.": pv([-2.8, 0.85, 0.0]),
|
|
"Marijnissen, L.": pv([3.0, -1.0, 0.0]), # prog at -Y
|
|
"Klever, A.": pv([3.0, -0.9, 0.0]),
|
|
"Bromet, L.": pv([2.9, -0.95, 0.0]),
|
|
"Nijboer, H.": pv([2.8, -0.85, 0.0]),
|
|
}
|
|
|
|
aligned = {"w_good": w_good, "w_bad": w_bad}
|
|
|
|
mp_metadata = [
|
|
("Wilders, G.", "PVV"),
|
|
("Rutte, M.", "VVD"),
|
|
("van der Staaij, K.", "SGP"),
|
|
("Omtzigt, P.", "Nieuw Sociaal Contract"),
|
|
("Marijnissen, L.", "SP"),
|
|
("Klever, A.", "GroenLinks-PvdA"),
|
|
("Bromet, L.", "GroenLinks-PvdA"),
|
|
("Nijboer, H.", "SP"),
|
|
]
|
|
|
|
fake_traj = _make_fake_traj(aligned)
|
|
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
|
|
|
|
import types as _types
|
|
|
|
fake_conn = _types.SimpleNamespace(
|
|
execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata),
|
|
close=lambda: None,
|
|
)
|
|
import duckdb as _duckdb
|
|
|
|
monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn)
|
|
|
|
import importlib, analysis.political_axis as _ax
|
|
|
|
importlib.reload(_ax)
|
|
from analysis.political_axis import compute_2d_axes
|
|
|
|
positions_by_window, axis_def = compute_2d_axes(
|
|
db_path="dummy", window_ids=["w_good", "w_bad"], method="pca"
|
|
)
|
|
|
|
prog_mps = {"Marijnissen, L.", "Klever, A.", "Bromet, L.", "Nijboer, H."}
|
|
cons_mps = {"Wilders, G.", "Rutte, M.", "van der Staaij, K.", "Omtzigt, P."}
|
|
|
|
for wid in ("w_good", "w_bad"):
|
|
pos = positions_by_window[wid]
|
|
prog_y = np.mean([pos[mp][1] for mp in prog_mps if mp in pos])
|
|
cons_y = np.mean([pos[mp][1] for mp in cons_mps if mp in pos])
|
|
assert prog_y > cons_y, (
|
|
f"Window '{wid}': expected prog_avg_y ({prog_y:.3f}) > cons_avg_y ({cons_y:.3f})"
|
|
)
|
|
|
|
|
|
def test_pca_axis_orientation(monkeypatch):
|
|
"""PCA axes must be oriented so right parties score higher on X and
|
|
progressive parties score higher on Y than their respective opposites.
|
|
|
|
We construct a minimal vote-matrix world where:
|
|
- Right MPs (PVV, VVD members) cluster in one direction on dim-0.
|
|
- Left MPs (SP, GroenLinks-PvdA members) cluster in the opposite direction.
|
|
- Progressive MPs cluster on dim-1; conservative MPs on the opposite side.
|
|
|
|
The orientation logic in compute_2d_axes should flip axis signs so that
|
|
right_x > left_x and prog_y > cons_y regardless of the raw SVD sign.
|
|
"""
|
|
# Build vectors so that right parties are at +1 on dim-0 and
|
|
# progressive parties are at +1 on dim-1.
|
|
# We deliberately negate them to test that auto-orient flips them back.
|
|
# Right/left use magnitude 3, prog/cons use magnitude 1 so that dim-0
|
|
# dominates PCA variance — ensuring PC1 = left-right axis, PC2 = prog-cons.
|
|
right_vec = np.array([-3.0, 0.0, 0.0]) # intentionally negative on dim-0
|
|
left_vec = np.array([3.0, 0.0, 0.0]) # intentionally positive on dim-0
|
|
prog_vec = np.array([0.0, -1.0, 0.0]) # intentionally negative on dim-1
|
|
cons_vec = np.array([0.0, 1.0, 0.0]) # intentionally positive on dim-1
|
|
|
|
aligned = {
|
|
"w1": {
|
|
# Right-leaning MPs
|
|
"Wilders, G.": right_vec,
|
|
"Rutte, M.": right_vec + np.array([0.0, 0.0, 0.05]),
|
|
# Left-leaning MPs
|
|
"Marijnissen, L.": left_vec,
|
|
"Klever, A.": left_vec + np.array([0.0, 0.0, 0.05]),
|
|
# Progressive MPs
|
|
"Bromet, L.": prog_vec,
|
|
"Nijboer, H.": prog_vec + np.array([0.0, 0.0, -0.05]),
|
|
# Conservative MPs
|
|
"Segers, G.": cons_vec,
|
|
"Omtzigt, P.": cons_vec + np.array([0.0, 0.0, -0.05]),
|
|
}
|
|
}
|
|
|
|
# mp_metadata rows used by the orientation code (party affiliation)
|
|
mp_metadata = [
|
|
("Wilders, G.", "PVV"),
|
|
("Rutte, M.", "VVD"),
|
|
("Marijnissen, L.", "SP"),
|
|
("Klever, A.", "GroenLinks-PvdA"),
|
|
("Bromet, L.", "GroenLinks-PvdA"),
|
|
("Nijboer, H.", "SP"),
|
|
("Segers, G.", "CDA"),
|
|
("Omtzigt, P.", "Nieuw Sociaal Contract"),
|
|
]
|
|
|
|
fake_traj = _make_fake_traj(aligned)
|
|
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
|
|
|
|
# Patch duckdb so the orientation helper can fetch mp_metadata
|
|
import types as _types
|
|
|
|
fake_conn = _types.SimpleNamespace(
|
|
execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata),
|
|
close=lambda: None,
|
|
)
|
|
import duckdb as _duckdb
|
|
|
|
monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn)
|
|
|
|
# Need to reload the module so monkeypatched sys.modules takes effect
|
|
import importlib, analysis.political_axis as _ax
|
|
|
|
importlib.reload(_ax)
|
|
from analysis.political_axis import compute_2d_axes
|
|
|
|
positions_by_window, axis_def = compute_2d_axes(
|
|
db_path="dummy", window_ids=["w1"], method="pca"
|
|
)
|
|
|
|
pos = positions_by_window["w1"]
|
|
|
|
# X-axis: right parties should score higher than left parties
|
|
right_x = np.mean([pos["Wilders, G."][0], pos["Rutte, M."][0]])
|
|
left_x = np.mean([pos["Marijnissen, L."][0], pos["Klever, A."][0]])
|
|
assert right_x > left_x, (
|
|
f"Expected right parties (x={right_x:.3f}) > left parties (x={left_x:.3f}) on X-axis"
|
|
)
|
|
|
|
# Y-axis: progressive parties should score higher than conservative parties
|
|
prog_y = np.mean([pos["Bromet, L."][1], pos["Nijboer, H."][1]])
|
|
cons_y = np.mean([pos["Segers, G."][1], pos["Omtzigt, P."][1]])
|
|
assert prog_y > cons_y, (
|
|
f"Expected progressive parties (y={prog_y:.3f}) > conservative parties (y={cons_y:.3f}) on Y-axis"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests for compute_party_discipline
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _make_mp_votes_db():
|
|
"""Create an in-memory DuckDB with mp_votes fixture data.
|
|
|
|
6 motions, 2 parties (SP, VVD), each with 4 MPs.
|
|
SP is perfectly disciplined (all 4 vote the same each time).
|
|
VVD has 1 dissident on 2 of 6 motions → Rice index = (4+4+4+4+3+3)/6/4 ≈ 0.917.
|
|
Dates span 2023-01-01 to 2023-12-31.
|
|
"""
|
|
import duckdb
|
|
|
|
conn = duckdb.connect(":memory:")
|
|
conn.execute("""
|
|
CREATE TABLE mp_votes (
|
|
id INTEGER,
|
|
motion_id VARCHAR,
|
|
mp_name VARCHAR,
|
|
party VARCHAR,
|
|
vote VARCHAR,
|
|
date DATE,
|
|
created_at TIMESTAMP
|
|
)
|
|
""")
|
|
rows = []
|
|
dates = [
|
|
"2023-01-10",
|
|
"2023-03-15",
|
|
"2023-05-20",
|
|
"2023-07-25",
|
|
"2023-09-30",
|
|
"2023-11-05",
|
|
]
|
|
sp_mps = ["Janssen, A.", "Pietersen, B.", "Willemsen, C.", "Hendriksen, D."]
|
|
vvd_mps = ["Adams, E.", "Bakker, F.", "Claassen, G.", "Dekker, H."]
|
|
for i, date in enumerate(dates, start=1):
|
|
m_id = f"M{i:03d}"
|
|
for mp in sp_mps:
|
|
rows.append((i * 10 + 1, m_id, mp, "SP", "voor", date, "2023-01-01"))
|
|
if i <= 4:
|
|
for mp in vvd_mps:
|
|
rows.append((i * 10 + 2, m_id, mp, "VVD", "voor", date, "2023-01-01"))
|
|
else:
|
|
for mp in vvd_mps[:3]:
|
|
rows.append((i * 10 + 2, m_id, mp, "VVD", "voor", date, "2023-01-01"))
|
|
rows.append(
|
|
(i * 10 + 3, m_id, vvd_mps[3], "VVD", "tegen", date, "2023-01-01")
|
|
)
|
|
conn.executemany("INSERT INTO mp_votes VALUES (?, ?, ?, ?, ?, ?, ?)", rows)
|
|
return conn
|
|
|
|
|
|
def test_compute_party_discipline_basic(monkeypatch):
|
|
"""compute_party_discipline returns correct Rice index for fixture data."""
|
|
import duckdb as _duckdb
|
|
|
|
fixture_conn = _make_mp_votes_db()
|
|
|
|
monkeypatch.setattr(_duckdb, "connect", lambda path, **kw: fixture_conn)
|
|
|
|
import importlib
|
|
import sys
|
|
|
|
if "streamlit" not in sys.modules:
|
|
import types
|
|
|
|
st_stub = types.ModuleType("streamlit")
|
|
st_stub.cache_data = lambda **kw: lambda f: f
|
|
sys.modules["streamlit"] = st_stub
|
|
|
|
import explorer as _explorer
|
|
|
|
importlib.reload(_explorer)
|
|
|
|
df = _explorer.compute_party_discipline(
|
|
db_path="dummy",
|
|
start_date="2023-01-01",
|
|
end_date="2023-12-31",
|
|
)
|
|
|
|
assert not df.empty
|
|
assert set(df.columns) >= {"party", "n_motions", "discipline"}
|
|
|
|
sp_row = df[df["party"] == "SP"].iloc[0]
|
|
vvd_row = df[df["party"] == "VVD"].iloc[0]
|
|
|
|
assert sp_row["n_motions"] == 6
|
|
assert sp_row["discipline"] == pytest.approx(1.0, abs=1e-6)
|
|
|
|
assert vvd_row["n_motions"] == 6
|
|
expected_vvd = (4 * 1.0 + 2 * 0.75) / 6
|
|
assert vvd_row["discipline"] == pytest.approx(expected_vvd, abs=1e-4)
|
|
|
|
assert (df["discipline"] >= 0).all() and (df["discipline"] <= 1).all()
|
|
|
|
|
|
def test_compute_party_discipline_empty_range(monkeypatch):
|
|
"""Returns empty DataFrame when no motions fall in the date range."""
|
|
import duckdb as _duckdb
|
|
|
|
fixture_conn = _make_mp_votes_db()
|
|
monkeypatch.setattr(_duckdb, "connect", lambda path, **kw: fixture_conn)
|
|
|
|
import importlib, sys
|
|
|
|
if "streamlit" not in sys.modules:
|
|
import types
|
|
|
|
st_stub = types.ModuleType("streamlit")
|
|
st_stub.cache_data = lambda **kw: lambda f: f
|
|
sys.modules["streamlit"] = st_stub
|
|
|
|
import explorer as _explorer
|
|
|
|
importlib.reload(_explorer)
|
|
|
|
df = _explorer.compute_party_discipline(
|
|
db_path="dummy",
|
|
start_date="2000-01-01",
|
|
end_date="2000-12-31",
|
|
)
|
|
|
|
assert df.empty
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests for analysis.axis_classifier
|
|
# ---------------------------------------------------------------------------
|
|
|
|
import importlib
|
|
|
|
|
|
def _fresh_classifier(monkeypatch):
|
|
"""Import axis_classifier with cleared module-level caches."""
|
|
import analysis.axis_classifier as _cls
|
|
|
|
monkeypatch.setattr(_cls, "_ideology_cache", None)
|
|
monkeypatch.setattr(_cls, "_coalition_cache", None)
|
|
return _cls
|
|
|
|
|
|
def test_axis_label_left_right(tmp_path, monkeypatch):
|
|
"""Positions that closely correlate with left_right scores → label 'Links–Rechts'."""
|
|
_cls = _fresh_classifier(monkeypatch)
|
|
|
|
(tmp_path / "party_ideologies.csv").write_text(
|
|
"party,left_right,progressive\n"
|
|
"VVD,0.65,0.10\n"
|
|
"PvdA,-0.70,0.75\n"
|
|
"SP,-0.90,0.50\n"
|
|
"PVV,0.90,-0.50\n"
|
|
"D66,-0.10,0.85\n"
|
|
"CDA,0.25,-0.45\n"
|
|
)
|
|
(tmp_path / "coalition_membership.csv").write_text("window_id,party\n")
|
|
|
|
# X values are the party's left_right scores — perfect correlation
|
|
positions_by_window = {
|
|
"2022": {
|
|
"VVD": (0.65, 0.10),
|
|
"PvdA": (-0.70, 0.20),
|
|
"SP": (-0.90, 0.30),
|
|
"PVV": (0.90, -0.10),
|
|
"D66": (-0.10, 0.40),
|
|
"CDA": (0.25, -0.20),
|
|
}
|
|
}
|
|
axes = {"x_axis": None, "y_axis": None, "method": "pca"}
|
|
|
|
result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))
|
|
|
|
assert result["x_label"] == "Links\u2013Rechts"
|
|
assert result["x_quality"]["2022"] >= 0.65
|
|
|
|
|
|
def test_axis_label_coalition_dominant(tmp_path, monkeypatch):
|
|
"""Positions that match coalition pattern but NOT left-right → 'Coalitie–Oppositie'."""
|
|
_cls = _fresh_classifier(monkeypatch)
|
|
|
|
(tmp_path / "party_ideologies.csv").write_text(
|
|
"party,left_right,progressive\n"
|
|
"VVD,0.65,0.10\n"
|
|
"PvdA,-0.70,0.75\n"
|
|
"SP,-0.90,0.50\n"
|
|
"PVV,0.90,-0.50\n"
|
|
"D66,-0.10,0.85\n"
|
|
"CDA,0.25,-0.45\n"
|
|
)
|
|
# 2016: Rutte II coalition = VVD + PvdA
|
|
(tmp_path / "coalition_membership.csv").write_text(
|
|
"window_id,party\n2016,VVD\n2016,PvdA\n"
|
|
)
|
|
|
|
# Coalition parties (VVD + PvdA) at x ≈ +1, opposition at x ≈ -1.
|
|
# VVD (right) and PvdA (left) are both near +1 → low left_right correlation
|
|
# but high coalition correlation.
|
|
positions_by_window = {
|
|
"2016": {
|
|
"VVD": (0.95, 0.10),
|
|
"PvdA": (0.90, 0.20),
|
|
"SP": (-0.85, 0.30),
|
|
"PVV": (-0.95, -0.10),
|
|
"D66": (-0.80, 0.40),
|
|
"CDA": (-0.75, -0.20),
|
|
}
|
|
}
|
|
axes = {"x_axis": None, "y_axis": None, "method": "pca"}
|
|
|
|
result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))
|
|
|
|
assert result["x_label"] == "Coalitie\u2013Oppositie"
|
|
assert "coalitie" in result["x_interpretation"]["2016"].lower()
|
|
|
|
|
|
def test_axis_classifier_missing_csv(tmp_path, monkeypatch):
|
|
"""Missing party_ideologies.csv → returns axes dict unchanged, no exception."""
|
|
_cls = _fresh_classifier(monkeypatch)
|
|
|
|
# No CSVs written — directory exists but files do not
|
|
positions_by_window = {"2022": {"VVD": (1.0, 0.5), "PvdA": (-1.0, 0.3)}}
|
|
axes = {"x_axis": None, "y_axis": None, "method": "pca"}
|
|
|
|
result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))
|
|
|
|
# Must not crash and must return the original axes dict unchanged
|
|
assert result is axes
|
|
assert "x_label" not in result
|
|
|