|
|
import numpy as np
|
|
|
import types
|
|
|
import sys
|
|
|
import types as _types
|
|
|
|
|
|
# Provide a minimal duckdb stub when the real package is not available in the test env
|
|
|
try:
|
|
|
import duckdb as _duckdb
|
|
|
except Exception:
|
|
|
import pandas as _pd
|
|
|
|
|
|
class FakeDuckDBConnection:
|
|
|
def __init__(self):
|
|
|
# storage for mp_votes rows: list of tuples matching _make_mp_votes_db
|
|
|
self._mp_votes = []
|
|
|
|
|
|
def execute(self, sql, params=None):
|
|
|
s = sql.strip().lower()
|
|
|
# simple create/select handling: return empty results for schema queries
|
|
|
if s.startswith("create table") or s.startswith(
|
|
|
"select distinct window_id"
|
|
|
):
|
|
|
return _types.SimpleNamespace(fetchall=lambda: [])
|
|
|
|
|
|
# compute_party_discipline query detection
|
|
|
if (
|
|
|
"from rice_per_motion" in s
|
|
|
or "select\n party,\n count(distinct motion_id) as n_motions"
|
|
|
in sql
|
|
|
):
|
|
|
# params: [start_date, end_date]
|
|
|
start_date, end_date = params or [None, None]
|
|
|
# filter rows by mp_name like '%,%' and date range and vote in ('voor','tegen')
|
|
|
rows = [r for r in self._mp_votes if ("," in (r[2] or ""))]
|
|
|
if start_date:
|
|
|
rows = [r for r in rows if r[5] >= start_date and r[5] <= end_date]
|
|
|
rows = [r for r in rows if (r[4] in ("voor", "tegen"))]
|
|
|
|
|
|
# build counts per motion_id, party, vote
|
|
|
from collections import defaultdict
|
|
|
|
|
|
counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
|
|
|
motions = set()
|
|
|
for _id, motion_id, mp_name, party, vote, date, created_at in rows:
|
|
|
counts[motion_id][party][vote] += 1
|
|
|
motions.add((motion_id, party))
|
|
|
|
|
|
# compute rice per (motion, party)
|
|
|
rice_vals = defaultdict(list) # party -> list of rice per motion
|
|
|
motion_part_set = set()
|
|
|
for motion_id, party_counts in counts.items():
|
|
|
for party, vc in party_counts.items():
|
|
|
total = sum(vc.values())
|
|
|
if total == 0:
|
|
|
continue
|
|
|
# majority vote: vote with max count, tie-breaker by vote asc
|
|
|
maj_vote = sorted(vc.items(), key=lambda kv: (-kv[1], kv[0]))[
|
|
|
0
|
|
|
][0]
|
|
|
same = vc.get(maj_vote, 0)
|
|
|
rice = same / float(total)
|
|
|
rice_vals[party].append((motion_id, rice))
|
|
|
motion_part_set.add((motion_id, party))
|
|
|
|
|
|
# aggregate per party
|
|
|
import pandas as pd
|
|
|
|
|
|
rows_out = []
|
|
|
for party, lst in rice_vals.items():
|
|
|
n_motions = len({m for m, _ in lst})
|
|
|
avg_rice = sum(r for _, r in lst) / n_motions if n_motions else 0.0
|
|
|
rows_out.append(
|
|
|
{"party": party, "n_motions": n_motions, "discipline": avg_rice}
|
|
|
)
|
|
|
|
|
|
df = pd.DataFrame(rows_out)
|
|
|
return _types.SimpleNamespace(fetchdf=lambda: df)
|
|
|
|
|
|
# default fallback
|
|
|
return _types.SimpleNamespace(fetchall=lambda: [])
|
|
|
|
|
|
def executemany(self, sql, rows):
|
|
|
s = sql.strip().lower()
|
|
|
if s.startswith("insert into mp_votes"):
|
|
|
for r in rows:
|
|
|
self._mp_votes.append(r)
|
|
|
|
|
|
def close(self):
|
|
|
return None
|
|
|
|
|
|
_fake_duckdb = _types.ModuleType("duckdb")
|
|
|
_fake_duckdb.connect = lambda *a, **kw: FakeDuckDBConnection()
|
|
|
sys.modules["duckdb"] = _fake_duckdb
|
|
|
_duckdb = _fake_duckdb
|
|
|
|
|
|
# Provide a minimal plotly.express stub so explorer imports in tests without requiring plotly
|
|
|
try:
|
|
|
import plotly.express as px # type: ignore
|
|
|
except Exception:
|
|
|
_px = types.ModuleType("plotly.express")
|
|
|
_px.scatter = lambda *a, **kw: None
|
|
|
_px.line = lambda *a, **kw: None
|
|
|
# Ensure top-level 'plotly' package exists and exposes express
|
|
|
_plotly_pkg = types.ModuleType("plotly")
|
|
|
_plotly_pkg.express = _px
|
|
|
sys.modules["plotly"] = _plotly_pkg
|
|
|
sys.modules["plotly.express"] = _px
|
|
|
px = _px
|
|
|
# stub plotly.graph_objects too
|
|
|
_go = types.ModuleType("plotly.graph_objects")
|
|
|
_go.Figure = lambda *a, **kw: None
|
|
|
sys.modules["plotly.graph_objects"] = _go
|
|
|
|
|
|
# Provide a minimal streamlit stub so explorer imports succeed in the test env
|
|
|
try:
|
|
|
import streamlit as _st # noqa: F401
|
|
|
except Exception:
|
|
|
_st_stub = types.ModuleType("streamlit")
|
|
|
_st_stub.cache_data = lambda **kw: lambda f: f
|
|
|
_st_stub.plotly_chart = lambda *a, **kw: None
|
|
|
_st_stub.markdown = lambda *a, **kw: None
|
|
|
_st_stub.caption = lambda *a, **kw: None
|
|
|
_st_stub.error = lambda *a, **kw: None
|
|
|
_st_stub.warning = lambda *a, **kw: None
|
|
|
_st_stub.info = lambda *a, **kw: None
|
|
|
_st_stub.write = lambda *a, **kw: None
|
|
|
sys.modules["streamlit"] = _st_stub
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Helpers shared by orientation tests
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def _make_fake_traj(aligned):
|
|
|
fake = types.SimpleNamespace()
|
|
|
fake._load_window_ids = lambda db: list(aligned.keys())
|
|
|
fake._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {})
|
|
|
fake._procrustes_align_windows = lambda x: aligned
|
|
|
return fake
|
|
|
|
|
|
|
|
|
def test_compute_2d_axes_pca_synthetic(monkeypatch):
|
|
|
"""Synthetic test for compute_2d_axes using patched alignment helper."""
|
|
|
|
|
|
# Create a fake trajectory module with required helpers
|
|
|
fake_traj = types.SimpleNamespace()
|
|
|
|
|
|
# _load_window_ids should return ordered windows
|
|
|
fake_traj._load_window_ids = lambda db: ["w1", "w2"]
|
|
|
|
|
|
# Provide aligned vectors directly
|
|
|
aligned = {
|
|
|
"w1": {"Alice": np.array([1.0, 0.0, 0.0]), "Bob": np.array([0.0, 1.0, 0.0])},
|
|
|
"w2": {"Alice": np.array([0.8, 0.2, 0.0]), "Bob": np.array([0.1, 0.9, 0.0])},
|
|
|
}
|
|
|
|
|
|
# _load_mp_vectors_for_window returns the pre-aligned vectors (needed for padding step)
|
|
|
fake_traj._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {})
|
|
|
|
|
|
fake_traj._procrustes_align_windows = lambda x: aligned
|
|
|
|
|
|
# Insert fake module into sys.modules for import by analysis.political_axis
|
|
|
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
|
|
|
|
|
|
# Now import the function under test
|
|
|
from analysis.political_axis import compute_2d_axes
|
|
|
|
|
|
positions_by_window, axis_def = compute_2d_axes(
|
|
|
db_path="dummy", window_ids=["w1", "w2"], method="pca"
|
|
|
)
|
|
|
|
|
|
assert "w1" in positions_by_window and "w2" in positions_by_window
|
|
|
for wid in ("w1", "w2"):
|
|
|
for name, coord in positions_by_window[wid].items():
|
|
|
assert len(coord) == 2
|
|
|
assert np.isfinite(coord[0]) and np.isfinite(coord[1])
|
|
|
|
|
|
assert axis_def.get("method") == "pca"
|
|
|
|
|
|
|
|
|
def test_per_window_y_orientation(monkeypatch):
|
|
|
"""Per-window Y correction must ensure prog_avg_y > cons_avg_y in every window.
|
|
|
|
|
|
We construct two windows:
|
|
|
- w_good: progressive MPs at +Y, conservative MPs at -Y (already correct)
|
|
|
- w_bad: conservative MPs at +Y, progressive MPs at -Y (inverted)
|
|
|
|
|
|
We weight w_good with many more MPs so the GLOBAL centroid check passes
|
|
|
without noticing the per-window inversion. The per-window correction must
|
|
|
then flip w_bad so both windows end up with prog_avg_y > cons_avg_y.
|
|
|
"""
|
|
|
|
|
|
# Helpers to make slightly varied vectors
|
|
|
def pv(base):
|
|
|
return np.array(base, dtype=float)
|
|
|
|
|
|
# w_good: large left/right spread on dim-0, prog up (+Y), cons down (-Y)
|
|
|
w_good = {
|
|
|
# right / conservative
|
|
|
"Wilders, G.": pv([-3.0, -1.0, 0.0]),
|
|
|
"Rutte, M.": pv([-3.0, -0.9, 0.0]),
|
|
|
"van der Staaij, K.": pv([-2.9, -0.95, 0.0]),
|
|
|
"Omtzigt, P.": pv([-2.8, -0.85, 0.0]),
|
|
|
# left / progressive
|
|
|
"Marijnissen, L.": pv([3.0, 1.0, 0.0]),
|
|
|
"Klever, A.": pv([3.0, 0.9, 0.0]),
|
|
|
"Bromet, L.": pv([2.9, 0.95, 0.0]),
|
|
|
"Nijboer, H.": pv([2.8, 0.85, 0.0]),
|
|
|
}
|
|
|
|
|
|
# w_bad: same left/right structure but Y is inverted relative to w_good
|
|
|
# (conservative at +Y, progressive at -Y)
|
|
|
w_bad = {
|
|
|
"Wilders, G.": pv([-3.0, 1.0, 0.0]), # cons at +Y
|
|
|
"Rutte, M.": pv([-3.0, 0.9, 0.0]),
|
|
|
"van der Staaij, K.": pv([-2.9, 0.95, 0.0]),
|
|
|
"Omtzigt, P.": pv([-2.8, 0.85, 0.0]),
|
|
|
"Marijnissen, L.": pv([3.0, -1.0, 0.0]), # prog at -Y
|
|
|
"Klever, A.": pv([3.0, -0.9, 0.0]),
|
|
|
"Bromet, L.": pv([2.9, -0.95, 0.0]),
|
|
|
"Nijboer, H.": pv([2.8, -0.85, 0.0]),
|
|
|
}
|
|
|
|
|
|
aligned = {"w_good": w_good, "w_bad": w_bad}
|
|
|
|
|
|
mp_metadata = [
|
|
|
("Wilders, G.", "PVV"),
|
|
|
("Rutte, M.", "VVD"),
|
|
|
("van der Staaij, K.", "SGP"),
|
|
|
("Omtzigt, P.", "Nieuw Sociaal Contract"),
|
|
|
("Marijnissen, L.", "SP"),
|
|
|
("Klever, A.", "GroenLinks-PvdA"),
|
|
|
("Bromet, L.", "GroenLinks-PvdA"),
|
|
|
("Nijboer, H.", "SP"),
|
|
|
]
|
|
|
|
|
|
fake_traj = _make_fake_traj(aligned)
|
|
|
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
|
|
|
|
|
|
import types as _types
|
|
|
|
|
|
fake_conn = _types.SimpleNamespace(
|
|
|
execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata),
|
|
|
close=lambda: None,
|
|
|
)
|
|
|
import duckdb as _duckdb
|
|
|
|
|
|
monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn)
|
|
|
|
|
|
import importlib, analysis.political_axis as _ax
|
|
|
|
|
|
importlib.reload(_ax)
|
|
|
from analysis.political_axis import compute_2d_axes
|
|
|
|
|
|
positions_by_window, axis_def = compute_2d_axes(
|
|
|
db_path="dummy", window_ids=["w_good", "w_bad"], method="pca"
|
|
|
)
|
|
|
|
|
|
prog_mps = {"Marijnissen, L.", "Klever, A.", "Bromet, L.", "Nijboer, H."}
|
|
|
cons_mps = {"Wilders, G.", "Rutte, M.", "van der Staaij, K.", "Omtzigt, P."}
|
|
|
|
|
|
for wid in ("w_good", "w_bad"):
|
|
|
pos = positions_by_window[wid]
|
|
|
prog_y = np.mean([pos[mp][1] for mp in prog_mps if mp in pos])
|
|
|
cons_y = np.mean([pos[mp][1] for mp in cons_mps if mp in pos])
|
|
|
assert prog_y > cons_y, (
|
|
|
f"Window '{wid}': expected prog_avg_y ({prog_y:.3f}) > cons_avg_y ({cons_y:.3f})"
|
|
|
)
|
|
|
|
|
|
|
|
|
def test_pca_axis_orientation(monkeypatch):
|
|
|
"""PCA axes must be oriented so right parties score higher on X and
|
|
|
progressive parties score higher on Y than their respective opposites.
|
|
|
|
|
|
We construct a minimal vote-matrix world where:
|
|
|
- Right MPs (PVV, VVD members) cluster in one direction on dim-0.
|
|
|
- Left MPs (SP, GroenLinks-PvdA members) cluster in the opposite direction.
|
|
|
- Progressive MPs cluster on dim-1; conservative MPs on the opposite side.
|
|
|
|
|
|
The orientation logic in compute_2d_axes should flip axis signs so that
|
|
|
right_x > left_x and prog_y > cons_y regardless of the raw SVD sign.
|
|
|
"""
|
|
|
# Build vectors so that right parties are at +1 on dim-0 and
|
|
|
# progressive parties are at +1 on dim-1.
|
|
|
# We deliberately negate them to test that auto-orient flips them back.
|
|
|
# Right/left use magnitude 3, prog/cons use magnitude 1 so that dim-0
|
|
|
# dominates PCA variance — ensuring PC1 = left-right axis, PC2 = prog-cons.
|
|
|
right_vec = np.array([-3.0, 0.0, 0.0]) # intentionally negative on dim-0
|
|
|
left_vec = np.array([3.0, 0.0, 0.0]) # intentionally positive on dim-0
|
|
|
prog_vec = np.array([0.0, -1.0, 0.0]) # intentionally negative on dim-1
|
|
|
cons_vec = np.array([0.0, 1.0, 0.0]) # intentionally positive on dim-1
|
|
|
|
|
|
aligned = {
|
|
|
"w1": {
|
|
|
# Right-leaning MPs
|
|
|
"Wilders, G.": right_vec,
|
|
|
"Rutte, M.": right_vec + np.array([0.0, 0.0, 0.05]),
|
|
|
# Left-leaning MPs
|
|
|
"Marijnissen, L.": left_vec,
|
|
|
"Klever, A.": left_vec + np.array([0.0, 0.0, 0.05]),
|
|
|
# Progressive MPs
|
|
|
"Bromet, L.": prog_vec,
|
|
|
"Nijboer, H.": prog_vec + np.array([0.0, 0.0, -0.05]),
|
|
|
# Conservative MPs
|
|
|
"Segers, G.": cons_vec,
|
|
|
"Omtzigt, P.": cons_vec + np.array([0.0, 0.0, -0.05]),
|
|
|
}
|
|
|
}
|
|
|
|
|
|
# mp_metadata rows used by the orientation code (party affiliation)
|
|
|
mp_metadata = [
|
|
|
("Wilders, G.", "PVV"),
|
|
|
("Rutte, M.", "VVD"),
|
|
|
("Marijnissen, L.", "SP"),
|
|
|
("Klever, A.", "GroenLinks-PvdA"),
|
|
|
("Bromet, L.", "GroenLinks-PvdA"),
|
|
|
("Nijboer, H.", "SP"),
|
|
|
("Segers, G.", "CDA"),
|
|
|
("Omtzigt, P.", "Nieuw Sociaal Contract"),
|
|
|
]
|
|
|
|
|
|
fake_traj = _make_fake_traj(aligned)
|
|
|
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
|
|
|
|
|
|
# Patch duckdb so the orientation helper can fetch mp_metadata
|
|
|
import types as _types
|
|
|
|
|
|
fake_conn = _types.SimpleNamespace(
|
|
|
execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata),
|
|
|
close=lambda: None,
|
|
|
)
|
|
|
import duckdb as _duckdb
|
|
|
|
|
|
monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn)
|
|
|
|
|
|
# Need to reload the module so monkeypatched sys.modules takes effect
|
|
|
import importlib, analysis.political_axis as _ax
|
|
|
|
|
|
importlib.reload(_ax)
|
|
|
from analysis.political_axis import compute_2d_axes
|
|
|
|
|
|
positions_by_window, axis_def = compute_2d_axes(
|
|
|
db_path="dummy", window_ids=["w1"], method="pca"
|
|
|
)
|
|
|
|
|
|
pos = positions_by_window["w1"]
|
|
|
|
|
|
# X-axis: right parties should score higher than left parties
|
|
|
right_x = np.mean([pos["Wilders, G."][0], pos["Rutte, M."][0]])
|
|
|
left_x = np.mean([pos["Marijnissen, L."][0], pos["Klever, A."][0]])
|
|
|
assert right_x > left_x, (
|
|
|
f"Expected right parties (x={right_x:.3f}) > left parties (x={left_x:.3f}) on X-axis"
|
|
|
)
|
|
|
|
|
|
# Y-axis: progressive parties should score higher than conservative parties
|
|
|
prog_y = np.mean([pos["Bromet, L."][1], pos["Nijboer, H."][1]])
|
|
|
cons_y = np.mean([pos["Segers, G."][1], pos["Omtzigt, P."][1]])
|
|
|
assert prog_y > cons_y, (
|
|
|
f"Expected progressive parties (y={prog_y:.3f}) > conservative parties (y={cons_y:.3f}) on Y-axis"
|
|
|
)
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tests for compute_party_discipline
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def _make_mp_votes_db():
|
|
|
"""Create an in-memory DuckDB with mp_votes fixture data.
|
|
|
|
|
|
6 motions, 2 parties (SP, VVD), each with 4 MPs.
|
|
|
SP is perfectly disciplined (all 4 vote the same each time).
|
|
|
VVD has 1 dissident on 2 of 6 motions → Rice index = (4+4+4+4+3+3)/6/4 ≈ 0.917.
|
|
|
Dates span 2023-01-01 to 2023-12-31.
|
|
|
"""
|
|
|
import duckdb
|
|
|
|
|
|
conn = duckdb.connect(":memory:")
|
|
|
conn.execute("""
|
|
|
CREATE TABLE mp_votes (
|
|
|
id INTEGER,
|
|
|
motion_id VARCHAR,
|
|
|
mp_name VARCHAR,
|
|
|
party VARCHAR,
|
|
|
vote VARCHAR,
|
|
|
date DATE,
|
|
|
created_at TIMESTAMP
|
|
|
)
|
|
|
""")
|
|
|
rows = []
|
|
|
dates = [
|
|
|
"2023-01-10",
|
|
|
"2023-03-15",
|
|
|
"2023-05-20",
|
|
|
"2023-07-25",
|
|
|
"2023-09-30",
|
|
|
"2023-11-05",
|
|
|
]
|
|
|
sp_mps = ["Janssen, A.", "Pietersen, B.", "Willemsen, C.", "Hendriksen, D."]
|
|
|
vvd_mps = ["Adams, E.", "Bakker, F.", "Claassen, G.", "Dekker, H."]
|
|
|
for i, date in enumerate(dates, start=1):
|
|
|
m_id = f"M{i:03d}"
|
|
|
for mp in sp_mps:
|
|
|
rows.append((i * 10 + 1, m_id, mp, "SP", "voor", date, "2023-01-01"))
|
|
|
if i <= 4:
|
|
|
for mp in vvd_mps:
|
|
|
rows.append((i * 10 + 2, m_id, mp, "VVD", "voor", date, "2023-01-01"))
|
|
|
else:
|
|
|
for mp in vvd_mps[:3]:
|
|
|
rows.append((i * 10 + 2, m_id, mp, "VVD", "voor", date, "2023-01-01"))
|
|
|
rows.append(
|
|
|
(i * 10 + 3, m_id, vvd_mps[3], "VVD", "tegen", date, "2023-01-01")
|
|
|
)
|
|
|
conn.executemany("INSERT INTO mp_votes VALUES (?, ?, ?, ?, ?, ?, ?)", rows)
|
|
|
return conn
|
|
|
|
|
|
|
|
|
def test_compute_party_discipline_basic(monkeypatch):
|
|
|
"""compute_party_discipline returns correct Rice index for fixture data."""
|
|
|
import duckdb as _duckdb
|
|
|
|
|
|
fixture_conn = _make_mp_votes_db()
|
|
|
|
|
|
monkeypatch.setattr(_duckdb, "connect", lambda path, **kw: fixture_conn)
|
|
|
|
|
|
import importlib
|
|
|
import sys
|
|
|
|
|
|
if "streamlit" not in sys.modules:
|
|
|
import types
|
|
|
|
|
|
st_stub = types.ModuleType("streamlit")
|
|
|
st_stub.cache_data = lambda **kw: lambda f: f
|
|
|
sys.modules["streamlit"] = st_stub
|
|
|
|
|
|
import explorer as _explorer
|
|
|
|
|
|
importlib.reload(_explorer)
|
|
|
|
|
|
df = _explorer.compute_party_discipline(
|
|
|
db_path="dummy",
|
|
|
start_date="2023-01-01",
|
|
|
end_date="2023-12-31",
|
|
|
)
|
|
|
|
|
|
assert not df.empty
|
|
|
assert set(df.columns) >= {"party", "n_motions", "discipline"}
|
|
|
|
|
|
sp_row = df[df["party"] == "SP"].iloc[0]
|
|
|
vvd_row = df[df["party"] == "VVD"].iloc[0]
|
|
|
|
|
|
assert sp_row["n_motions"] == 6
|
|
|
assert sp_row["discipline"] == pytest.approx(1.0, abs=1e-6)
|
|
|
|
|
|
assert vvd_row["n_motions"] == 6
|
|
|
expected_vvd = (4 * 1.0 + 2 * 0.75) / 6
|
|
|
assert vvd_row["discipline"] == pytest.approx(expected_vvd, abs=1e-4)
|
|
|
|
|
|
assert (df["discipline"] >= 0).all() and (df["discipline"] <= 1).all()
|
|
|
|
|
|
|
|
|
def test_compute_party_discipline_empty_range(monkeypatch):
|
|
|
"""Returns empty DataFrame when no motions fall in the date range."""
|
|
|
import duckdb as _duckdb
|
|
|
|
|
|
fixture_conn = _make_mp_votes_db()
|
|
|
monkeypatch.setattr(_duckdb, "connect", lambda path, **kw: fixture_conn)
|
|
|
|
|
|
import importlib, sys
|
|
|
|
|
|
if "streamlit" not in sys.modules:
|
|
|
import types
|
|
|
|
|
|
st_stub = types.ModuleType("streamlit")
|
|
|
st_stub.cache_data = lambda **kw: lambda f: f
|
|
|
sys.modules["streamlit"] = st_stub
|
|
|
|
|
|
import explorer as _explorer
|
|
|
|
|
|
importlib.reload(_explorer)
|
|
|
|
|
|
df = _explorer.compute_party_discipline(
|
|
|
db_path="dummy",
|
|
|
start_date="2000-01-01",
|
|
|
end_date="2000-12-31",
|
|
|
)
|
|
|
|
|
|
assert df.empty
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tests for analysis.axis_classifier
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
import importlib
|
|
|
|
|
|
|
|
|
def _fresh_classifier(monkeypatch):
|
|
|
"""Import axis_classifier with cleared module-level caches."""
|
|
|
import analysis.axis_classifier as _cls
|
|
|
|
|
|
monkeypatch.setattr(_cls, "_ideology_cache", None)
|
|
|
monkeypatch.setattr(_cls, "_coalition_cache", None)
|
|
|
return _cls
|
|
|
|
|
|
|
|
|
def test_axis_label_left_right(tmp_path, monkeypatch):
|
|
|
"""Positions that closely correlate with left_right scores → label 'Verzorgingsstaat–Marktwerking'."""
|
|
|
_cls = _fresh_classifier(monkeypatch)
|
|
|
|
|
|
(tmp_path / "party_ideologies.csv").write_text(
|
|
|
"party,left_right,progressive\n"
|
|
|
"VVD,0.65,0.10\n"
|
|
|
"PvdA,-0.70,0.75\n"
|
|
|
"SP,-0.90,0.50\n"
|
|
|
"PVV,0.90,-0.50\n"
|
|
|
"D66,-0.10,0.85\n"
|
|
|
"CDA,0.25,-0.45\n"
|
|
|
)
|
|
|
(tmp_path / "coalition_membership.csv").write_text("window_id,party\n")
|
|
|
|
|
|
# X values are the party's left_right scores — perfect correlation
|
|
|
positions_by_window = {
|
|
|
"2022": {
|
|
|
"VVD": (0.65, 0.10),
|
|
|
"PvdA": (-0.70, 0.20),
|
|
|
"SP": (-0.90, 0.30),
|
|
|
"PVV": (0.90, -0.10),
|
|
|
"D66": (-0.10, 0.40),
|
|
|
"CDA": (0.25, -0.20),
|
|
|
}
|
|
|
}
|
|
|
axes = {"x_axis": None, "y_axis": None, "method": "pca"}
|
|
|
|
|
|
result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))
|
|
|
|
|
|
assert result["x_label"] == "Verzorgingsstaat–Marktwerking"
|
|
|
assert result["x_quality"]["2022"] >= 0.65
|
|
|
|
|
|
|
|
|
def test_axis_label_coalition_dominant(tmp_path, monkeypatch):
|
|
|
"""Positions that match coalition pattern but NOT left-right → 'Coalitie–Oppositie'."""
|
|
|
_cls = _fresh_classifier(monkeypatch)
|
|
|
|
|
|
(tmp_path / "party_ideologies.csv").write_text(
|
|
|
"party,left_right,progressive\n"
|
|
|
"VVD,0.65,0.10\n"
|
|
|
"PvdA,-0.70,0.75\n"
|
|
|
"SP,-0.90,0.50\n"
|
|
|
"PVV,0.90,-0.50\n"
|
|
|
"D66,-0.10,0.85\n"
|
|
|
"CDA,0.25,-0.45\n"
|
|
|
)
|
|
|
# 2016: Rutte II coalition = VVD + PvdA
|
|
|
(tmp_path / "coalition_membership.csv").write_text(
|
|
|
"window_id,party\n2016,VVD\n2016,PvdA\n"
|
|
|
)
|
|
|
|
|
|
# Coalition parties (VVD + PvdA) at x ≈ +1, opposition at x ≈ -1.
|
|
|
# VVD (right) and PvdA (left) are both near +1 → low left_right correlation
|
|
|
# but high coalition correlation.
|
|
|
positions_by_window = {
|
|
|
"2016": {
|
|
|
"VVD": (0.95, 0.10),
|
|
|
"PvdA": (0.90, 0.20),
|
|
|
"SP": (-0.85, 0.30),
|
|
|
"PVV": (-0.95, -0.10),
|
|
|
"D66": (-0.80, 0.40),
|
|
|
"CDA": (-0.75, -0.20),
|
|
|
}
|
|
|
}
|
|
|
axes = {"x_axis": None, "y_axis": None, "method": "pca"}
|
|
|
|
|
|
result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))
|
|
|
|
|
|
assert result["x_label"] == "Coalitie\u2013Oppositie"
|
|
|
assert "coalitie" in result["x_interpretation"]["2016"].lower()
|
|
|
|
|
|
|
|
|
def test_axis_classifier_missing_csv(tmp_path, monkeypatch):
|
|
|
"""Missing party_ideologies.csv → returns axes dict unchanged, no exception."""
|
|
|
_cls = _fresh_classifier(monkeypatch)
|
|
|
|
|
|
# No CSVs written — directory exists but files do not
|
|
|
positions_by_window = {"2022": {"VVD": (1.0, 0.5), "PvdA": (-1.0, 0.3)}}
|
|
|
axes = {"x_axis": None, "y_axis": None, "method": "pca"}
|
|
|
|
|
|
result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))
|
|
|
|
|
|
# Must not crash and must return the original axes dict unchanged
|
|
|
assert result is axes
|
|
|
assert "x_label" not in result
|
|
|
|
|
|
|
|
|
def test_compute_2d_axes_exposes_global_mean(monkeypatch):
|
|
|
"""axes dict returned by compute_2d_axes must contain 'global_mean'."""
|
|
|
fake_traj = types.SimpleNamespace()
|
|
|
fake_traj._load_window_ids = lambda db: ["w1"]
|
|
|
aligned = {
|
|
|
"w1": {
|
|
|
"Alice": np.array([1.0, 0.0, 0.0]),
|
|
|
"Bob": np.array([-1.0, 0.5, 0.0]),
|
|
|
}
|
|
|
}
|
|
|
fake_traj._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {})
|
|
|
fake_traj._procrustes_align_windows = lambda x: aligned
|
|
|
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
|
|
|
# Provide a minimal duckdb stub so importing analysis.political_axis succeeds
|
|
|
import types as _types
|
|
|
|
|
|
fake_conn = _types.SimpleNamespace(
|
|
|
execute=lambda q: _types.SimpleNamespace(fetchall=lambda: []),
|
|
|
close=lambda: None,
|
|
|
)
|
|
|
fake_duckdb = _types.SimpleNamespace(connect=lambda db_path, **kw: fake_conn)
|
|
|
monkeypatch.setitem(sys.modules, "duckdb", fake_duckdb)
|
|
|
|
|
|
from analysis.political_axis import compute_2d_axes
|
|
|
|
|
|
_, axis_def = compute_2d_axes(db_path="dummy", window_ids=["w1"], method="pca")
|
|
|
assert "global_mean" in axis_def
|
|
|
assert isinstance(axis_def["global_mean"], np.ndarray)
|
|
|
|
|
|
|
|
|
def test_classify_from_titles_left_right():
|
|
|
"""Titles dominated by left-right keywords -> 'Verzorgingsstaat–Marktwerking'."""
|
|
|
from analysis.axis_classifier import _classify_from_titles
|
|
|
|
|
|
titles = [
|
|
|
"Motie over asielbeleid",
|
|
|
"Motie over minimumloon verhoging",
|
|
|
"Motie over vluchtelingen opvang",
|
|
|
"Motie over belastingverlaging",
|
|
|
"Motie over bijstandsuitkering",
|
|
|
]
|
|
|
label, confidence = _classify_from_titles(titles)
|
|
|
assert label == "Verzorgingsstaat–Marktwerking"
|
|
|
assert confidence >= 0.4
|
|
|
|
|
|
|
|
|
def test_classify_from_titles_progressive():
|
|
|
"""Titles dominated by progressive/conservative keywords -> 'ProgressiefConservatief'."""
|
|
|
from analysis.axis_classifier import _classify_from_titles
|
|
|
|
|
|
titles = [
|
|
|
"Motie over klimaatdoelstellingen",
|
|
|
"Motie over stikstofbeleid",
|
|
|
"Motie over duurzame energie",
|
|
|
"Motie over co2 uitstoot",
|
|
|
"Motie over energietransitie",
|
|
|
]
|
|
|
label, confidence = _classify_from_titles(titles)
|
|
|
assert label == "Progressief\u2013Conservatief"
|
|
|
assert confidence >= 0.4
|
|
|
|
|
|
|
|
|
def test_classify_from_titles_low_confidence():
|
|
|
"""Mixed/irrelevant titles -> None (fallback triggered)."""
|
|
|
from analysis.axis_classifier import _classify_from_titles
|
|
|
|
|
|
titles = [
|
|
|
"Motie over sportsubsidie",
|
|
|
"Motie over bibliotheekregeling",
|
|
|
"Motie over verkeersveiligheid",
|
|
|
]
|
|
|
label, confidence = _classify_from_titles(titles)
|
|
|
assert label is None
|
|
|
assert confidence < 0.4
|
|
|
|
|
|
|
|
|
def test_axis_swap_when_y_is_left_right():
|
|
|
"""When y_label is economic left-right and x_label is not, positions must be swapped."""
|
|
|
from explorer import _swap_axes
|
|
|
|
|
|
positions_by_window = {
|
|
|
"2023": {
|
|
|
"VVD": (0.5, 0.8),
|
|
|
"PvdA": (-0.3, -0.6),
|
|
|
}
|
|
|
}
|
|
|
axis_def = {
|
|
|
"x_label": "Progressief\u2013Conservatief",
|
|
|
"y_label": "Verzorgingsstaat–Marktwerking",
|
|
|
"x_quality": {"2023": 0.7},
|
|
|
"y_quality": {"2023": 0.8},
|
|
|
"x_interpretation": {"2023": "prog interpretation"},
|
|
|
"y_interpretation": {"2023": "economic interpretation"},
|
|
|
"x_top_motions": {"2023": {"+": [], "-": []}},
|
|
|
"y_top_motions": {"2023": {"+": [], "-": []}},
|
|
|
"x_label_confidence": {"2023": 0.5},
|
|
|
"y_label_confidence": {"2023": 0.7},
|
|
|
}
|
|
|
|
|
|
new_pos, new_ax = _swap_axes(positions_by_window, axis_def)
|
|
|
|
|
|
# Positions swapped: (x, y) → (y, x)
|
|
|
assert new_pos["2023"]["VVD"] == (0.8, 0.5)
|
|
|
assert new_pos["2023"]["PvdA"] == (-0.6, -0.3)
|
|
|
|
|
|
# Labels swapped
|
|
|
assert new_ax["x_label"] == "Verzorgingsstaat–Marktwerking"
|
|
|
assert new_ax["y_label"] == "Progressief\u2013Conservatief"
|
|
|
|
|
|
# Quality swapped
|
|
|
assert new_ax["x_quality"] == {"2023": 0.8}
|
|
|
assert new_ax["y_quality"] == {"2023": 0.7}
|
|
|
|
|
|
|
|
|
def test_axis_swap_not_applied_when_x_is_left_right():
|
|
|
"""When x_label is already economic left-right, no swap should occur."""
|
|
|
from explorer import _should_swap_axes
|
|
|
|
|
|
axis_def = {
|
|
|
"x_label": "Verzorgingsstaat–Marktwerking",
|
|
|
"y_label": "Progressief\u2013Conservatief",
|
|
|
}
|
|
|
assert _should_swap_axes(axis_def) is False
|
|
|
|
|
|
axis_def2 = {
|
|
|
"x_label": "Verzorgingsstaat–Marktwerking",
|
|
|
"y_label": "Verzorgingsstaat–Marktwerking", # both economic — no swap
|
|
|
}
|
|
|
assert _should_swap_axes(axis_def2) is False
|
|
|
|
|
|
|
|
|
def test_axis_swap_not_applied_when_neither_axis_is_left_right():
|
|
|
from explorer import _should_swap_axes
|
|
|
|
|
|
assert _should_swap_axes({"x_label": "Foo", "y_label": "Bar"}) is False
|
|
|
assert _should_swap_axes({"x_label": None, "y_label": None}) is False
|
|
|
|