motief/tests/test_political_compass.py

import numpy as np
import types
import sys

import pytest


# ---------------------------------------------------------------------------
# Helpers shared by orientation tests
# ---------------------------------------------------------------------------


def _make_fake_traj(aligned):
    fake = types.SimpleNamespace()
    fake._load_window_ids = lambda db: list(aligned.keys())
    fake._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {})
    fake._procrustes_align_windows = lambda x: aligned
    return fake


def test_compute_2d_axes_pca_synthetic(monkeypatch):
    """Synthetic test for compute_2d_axes using patched alignment helper."""

    # Create a fake trajectory module with required helpers
    fake_traj = types.SimpleNamespace()

    # _load_window_ids should return ordered windows
    fake_traj._load_window_ids = lambda db: ["w1", "w2"]

    # Provide aligned vectors directly
    aligned = {
        "w1": {"Alice": np.array([1.0, 0.0, 0.0]), "Bob": np.array([0.0, 1.0, 0.0])},
        "w2": {"Alice": np.array([0.8, 0.2, 0.0]), "Bob": np.array([0.1, 0.9, 0.0])},
    }

    # _load_mp_vectors_for_window returns the pre-aligned vectors (needed for padding step)
    fake_traj._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {})

    fake_traj._procrustes_align_windows = lambda x: aligned

    # Insert fake module into sys.modules for import by analysis.political_axis
    monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)

    # Now import the function under test
    from analysis.political_axis import compute_2d_axes

    positions_by_window, axis_def = compute_2d_axes(
        db_path="dummy", window_ids=["w1", "w2"], method="pca"
    )

    assert "w1" in positions_by_window and "w2" in positions_by_window
    for wid in ("w1", "w2"):
        for name, coord in positions_by_window[wid].items():
            assert len(coord) == 2
            assert np.isfinite(coord[0]) and np.isfinite(coord[1])

    assert axis_def.get("method") == "pca"


def test_per_window_y_orientation(monkeypatch):
    """Per-window Y correction must ensure prog_avg_y > cons_avg_y in every window.

    We construct two windows:
    - w_good: progressive MPs at +Y, conservative MPs at -Y (already correct)
    - w_bad:  conservative MPs at +Y, progressive MPs at -Y (inverted)

    We weight w_good with many more MPs so the GLOBAL centroid check passes
    without noticing the per-window inversion. The per-window correction must
    then flip w_bad so both windows end up with prog_avg_y > cons_avg_y.
    """

    # Helpers to make slightly varied vectors
    def pv(base):
        return np.array(base, dtype=float)

    # w_good: large left/right spread on dim-0, prog up (+Y), cons down (-Y)
    w_good = {
        # right / conservative
        "Wilders, G.": pv([-3.0, -1.0, 0.0]),
        "Rutte, M.": pv([-3.0, -0.9, 0.0]),
        "van der Staaij, K.": pv([-2.9, -0.95, 0.0]),
        "Omtzigt, P.": pv([-2.8, -0.85, 0.0]),
        # left / progressive
        "Marijnissen, L.": pv([3.0, 1.0, 0.0]),
        "Klever, A.": pv([3.0, 0.9, 0.0]),
        "Bromet, L.": pv([2.9, 0.95, 0.0]),
        "Nijboer, H.": pv([2.8, 0.85, 0.0]),
    }

    # w_bad: same left/right structure but Y is inverted relative to w_good
    # (conservative at +Y, progressive at -Y)
    w_bad = {
        "Wilders, G.": pv([-3.0, 1.0, 0.0]),  # cons at +Y
        "Rutte, M.": pv([-3.0, 0.9, 0.0]),
        "van der Staaij, K.": pv([-2.9, 0.95, 0.0]),
        "Omtzigt, P.": pv([-2.8, 0.85, 0.0]),
        "Marijnissen, L.": pv([3.0, -1.0, 0.0]),  # prog at -Y
        "Klever, A.": pv([3.0, -0.9, 0.0]),
        "Bromet, L.": pv([2.9, -0.95, 0.0]),
        "Nijboer, H.": pv([2.8, -0.85, 0.0]),
    }

    aligned = {"w_good": w_good, "w_bad": w_bad}

    mp_metadata = [
        ("Wilders, G.", "PVV"),
        ("Rutte, M.", "VVD"),
        ("van der Staaij, K.", "SGP"),
        ("Omtzigt, P.", "Nieuw Sociaal Contract"),
        ("Marijnissen, L.", "SP"),
        ("Klever, A.", "GroenLinks-PvdA"),
        ("Bromet, L.", "GroenLinks-PvdA"),
        ("Nijboer, H.", "SP"),
    ]

    fake_traj = _make_fake_traj(aligned)
    monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)

    import types as _types

    fake_conn = _types.SimpleNamespace(
        execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata),
        close=lambda: None,
    )
    import duckdb as _duckdb

    monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn)

    import importlib, analysis.political_axis as _ax

    importlib.reload(_ax)
    from analysis.political_axis import compute_2d_axes

    positions_by_window, axis_def = compute_2d_axes(
        db_path="dummy", window_ids=["w_good", "w_bad"], method="pca"
    )

    prog_mps = {"Marijnissen, L.", "Klever, A.", "Bromet, L.", "Nijboer, H."}
    cons_mps = {"Wilders, G.", "Rutte, M.", "van der Staaij, K.", "Omtzigt, P."}

    for wid in ("w_good", "w_bad"):
        pos = positions_by_window[wid]
        prog_y = np.mean([pos[mp][1] for mp in prog_mps if mp in pos])
        cons_y = np.mean([pos[mp][1] for mp in cons_mps if mp in pos])
        assert prog_y > cons_y, (
            f"Window '{wid}': expected prog_avg_y ({prog_y:.3f}) > cons_avg_y ({cons_y:.3f})"
        )


def test_pca_axis_orientation(monkeypatch):
    """PCA axes must be oriented so right parties score higher on X and
    progressive parties score higher on Y than their respective opposites.

    We construct a minimal vote-matrix world where:
    - Right MPs (PVV, VVD members) cluster in one direction on dim-0.
    - Left MPs (SP, GroenLinks-PvdA members) cluster in the opposite direction.
    - Progressive MPs cluster on dim-1; conservative MPs on the opposite side.

    The orientation logic in compute_2d_axes should flip axis signs so that
    right_x > left_x and prog_y > cons_y regardless of the raw SVD sign.
    """
    # Build vectors so that right parties are at +1 on dim-0 and
    # progressive parties are at +1 on dim-1.
    # We deliberately negate them to test that auto-orient flips them back.
    # Right/left use magnitude 3, prog/cons use magnitude 1 so that dim-0
    # dominates PCA variance — ensuring PC1 = left-right axis, PC2 = prog-cons.
    right_vec = np.array([-3.0, 0.0, 0.0])  # intentionally negative on dim-0
    left_vec = np.array([3.0, 0.0, 0.0])  # intentionally positive on dim-0
    prog_vec = np.array([0.0, -1.0, 0.0])  # intentionally negative on dim-1
    cons_vec = np.array([0.0, 1.0, 0.0])  # intentionally positive on dim-1

    aligned = {
        "w1": {
            # Right-leaning MPs
            "Wilders, G.": right_vec,
            "Rutte, M.": right_vec + np.array([0.0, 0.0, 0.05]),
            # Left-leaning MPs
            "Marijnissen, L.": left_vec,
            "Klever, A.": left_vec + np.array([0.0, 0.0, 0.05]),
            # Progressive MPs
            "Bromet, L.": prog_vec,
            "Nijboer, H.": prog_vec + np.array([0.0, 0.0, -0.05]),
            # Conservative MPs
            "Segers, G.": cons_vec,
            "Omtzigt, P.": cons_vec + np.array([0.0, 0.0, -0.05]),
        }
    }

    # mp_metadata rows used by the orientation code (party affiliation)
    mp_metadata = [
        ("Wilders, G.", "PVV"),
        ("Rutte, M.", "VVD"),
        ("Marijnissen, L.", "SP"),
        ("Klever, A.", "GroenLinks-PvdA"),
        ("Bromet, L.", "GroenLinks-PvdA"),
        ("Nijboer, H.", "SP"),
        ("Segers, G.", "CDA"),
        ("Omtzigt, P.", "Nieuw Sociaal Contract"),
    ]

    fake_traj = _make_fake_traj(aligned)
    monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)

    # Patch duckdb so the orientation helper can fetch mp_metadata
    import types as _types

    fake_conn = _types.SimpleNamespace(
        execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata),
        close=lambda: None,
    )
    import duckdb as _duckdb

    monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn)

    # Need to reload the module so monkeypatched sys.modules takes effect
    import importlib, analysis.political_axis as _ax

    importlib.reload(_ax)
    from analysis.political_axis import compute_2d_axes

    positions_by_window, axis_def = compute_2d_axes(
        db_path="dummy", window_ids=["w1"], method="pca"
    )

    pos = positions_by_window["w1"]

    # X-axis: right parties should score higher than left parties
    right_x = np.mean([pos["Wilders, G."][0], pos["Rutte, M."][0]])
    left_x = np.mean([pos["Marijnissen, L."][0], pos["Klever, A."][0]])
    assert right_x > left_x, (
        f"Expected right parties (x={right_x:.3f}) > left parties (x={left_x:.3f}) on X-axis"
    )

    # Y-axis: progressive parties should score higher than conservative parties
    prog_y = np.mean([pos["Bromet, L."][1], pos["Nijboer, H."][1]])
    cons_y = np.mean([pos["Segers, G."][1], pos["Omtzigt, P."][1]])
    assert prog_y > cons_y, (
        f"Expected progressive parties (y={prog_y:.3f}) > conservative parties (y={cons_y:.3f}) on Y-axis"
    )


# ---------------------------------------------------------------------------
# Tests for compute_party_discipline
# ---------------------------------------------------------------------------


def _make_mp_votes_db():
    """Create an in-memory DuckDB with mp_votes fixture data.

    6 motions, 2 parties (SP, VVD), each with 4 MPs.
    SP is perfectly disciplined (all 4 vote the same each time).
    VVD has 1 dissident on 2 of 6 motions → Rice index = (4+4+4+4+3+3)/6/4 ≈ 0.917.
    Dates span 2023-01-01 to 2023-12-31.
    """
    import duckdb

    conn = duckdb.connect(":memory:")
    conn.execute("""
        CREATE TABLE mp_votes (
            id INTEGER,
            motion_id VARCHAR,
            mp_name VARCHAR,
            party VARCHAR,
            vote VARCHAR,
            date DATE,
            created_at TIMESTAMP
        )
    """)
    rows = []
    dates = [
        "2023-01-10",
        "2023-03-15",
        "2023-05-20",
        "2023-07-25",
        "2023-09-30",
        "2023-11-05",
    ]
    sp_mps = ["Janssen, A.", "Pietersen, B.", "Willemsen, C.", "Hendriksen, D."]
    vvd_mps = ["Adams, E.", "Bakker, F.", "Claassen, G.", "Dekker, H."]
    for i, date in enumerate(dates, start=1):
        m_id = f"M{i:03d}"
        for mp in sp_mps:
            rows.append((i * 10 + 1, m_id, mp, "SP", "voor", date, "2023-01-01"))
        if i <= 4:
            for mp in vvd_mps:
                rows.append((i * 10 + 2, m_id, mp, "VVD", "voor", date, "2023-01-01"))
        else:
            for mp in vvd_mps[:3]:
                rows.append((i * 10 + 2, m_id, mp, "VVD", "voor", date, "2023-01-01"))
            rows.append(
                (i * 10 + 3, m_id, vvd_mps[3], "VVD", "tegen", date, "2023-01-01")
            )
    conn.executemany("INSERT INTO mp_votes VALUES (?, ?, ?, ?, ?, ?, ?)", rows)
    return conn


def test_compute_party_discipline_basic(monkeypatch):
    """compute_party_discipline returns correct Rice index for fixture data."""
    import duckdb as _duckdb

    fixture_conn = _make_mp_votes_db()

    monkeypatch.setattr(_duckdb, "connect", lambda path, **kw: fixture_conn)

    import importlib
    import sys

    if "streamlit" not in sys.modules:
        import types

        st_stub = types.ModuleType("streamlit")
        st_stub.cache_data = lambda **kw: lambda f: f
        sys.modules["streamlit"] = st_stub

    import explorer as _explorer

    importlib.reload(_explorer)

    df = _explorer.compute_party_discipline(
        db_path="dummy",
        start_date="2023-01-01",
        end_date="2023-12-31",
    )

    assert not df.empty
    assert set(df.columns) >= {"party", "n_motions", "discipline"}

    sp_row = df[df["party"] == "SP"].iloc[0]
    vvd_row = df[df["party"] == "VVD"].iloc[0]

    assert sp_row["n_motions"] == 6
    assert sp_row["discipline"] == pytest.approx(1.0, abs=1e-6)

    assert vvd_row["n_motions"] == 6
    expected_vvd = (4 * 1.0 + 2 * 0.75) / 6
    assert vvd_row["discipline"] == pytest.approx(expected_vvd, abs=1e-4)

    assert (df["discipline"] >= 0).all() and (df["discipline"] <= 1).all()


def test_compute_party_discipline_empty_range(monkeypatch):
    """Returns empty DataFrame when no motions fall in the date range."""
    import duckdb as _duckdb

    fixture_conn = _make_mp_votes_db()
    monkeypatch.setattr(_duckdb, "connect", lambda path, **kw: fixture_conn)

    import importlib, sys

    if "streamlit" not in sys.modules:
        import types

        st_stub = types.ModuleType("streamlit")
        st_stub.cache_data = lambda **kw: lambda f: f
        sys.modules["streamlit"] = st_stub

    import explorer as _explorer

    importlib.reload(_explorer)

    df = _explorer.compute_party_discipline(
        db_path="dummy",
        start_date="2000-01-01",
        end_date="2000-12-31",
    )

    assert df.empty


# ---------------------------------------------------------------------------
# Tests for analysis.axis_classifier
# ---------------------------------------------------------------------------

import importlib


def _fresh_classifier(monkeypatch):
    """Import axis_classifier with cleared module-level caches."""
    import analysis.axis_classifier as _cls

    monkeypatch.setattr(_cls, "_ideology_cache", None)
    monkeypatch.setattr(_cls, "_coalition_cache", None)
    return _cls


def test_axis_label_left_right(tmp_path, monkeypatch):
    """Positions that closely correlate with left_right scores → label 'Links–Rechts'."""
    _cls = _fresh_classifier(monkeypatch)

    (tmp_path / "party_ideologies.csv").write_text(
        "party,left_right,progressive\n"
        "VVD,0.65,0.10\n"
        "PvdA,-0.70,0.75\n"
        "SP,-0.90,0.50\n"
        "PVV,0.90,-0.50\n"
        "D66,-0.10,0.85\n"
        "CDA,0.25,-0.45\n"
    )
    (tmp_path / "coalition_membership.csv").write_text("window_id,party\n")

    # X values are the party's left_right scores — perfect correlation
    positions_by_window = {
        "2022": {
            "VVD": (0.65, 0.10),
            "PvdA": (-0.70, 0.20),
            "SP": (-0.90, 0.30),
            "PVV": (0.90, -0.10),
            "D66": (-0.10, 0.40),
            "CDA": (0.25, -0.20),
        }
    }
    axes = {"x_axis": None, "y_axis": None, "method": "pca"}

    result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))

    assert result["x_label"] == "Links\u2013Rechts"
    assert result["x_quality"]["2022"] >= 0.65


def test_axis_label_coalition_dominant(tmp_path, monkeypatch):
    """Positions that match coalition pattern but NOT left-right → 'Coalitie–Oppositie'."""
    _cls = _fresh_classifier(monkeypatch)

    (tmp_path / "party_ideologies.csv").write_text(
        "party,left_right,progressive\n"
        "VVD,0.65,0.10\n"
        "PvdA,-0.70,0.75\n"
        "SP,-0.90,0.50\n"
        "PVV,0.90,-0.50\n"
        "D66,-0.10,0.85\n"
        "CDA,0.25,-0.45\n"
    )
    # 2016: Rutte II coalition = VVD + PvdA
    (tmp_path / "coalition_membership.csv").write_text(
        "window_id,party\n2016,VVD\n2016,PvdA\n"
    )

    # Coalition parties (VVD + PvdA) at x ≈ +1, opposition at x ≈ -1.
    # VVD (right) and PvdA (left) are both near +1 → low left_right correlation
    # but high coalition correlation.
    positions_by_window = {
        "2016": {
            "VVD": (0.95, 0.10),
            "PvdA": (0.90, 0.20),
            "SP": (-0.85, 0.30),
            "PVV": (-0.95, -0.10),
            "D66": (-0.80, 0.40),
            "CDA": (-0.75, -0.20),
        }
    }
    axes = {"x_axis": None, "y_axis": None, "method": "pca"}

    result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))

    assert result["x_label"] == "Coalitie\u2013Oppositie"
    assert "coalitie" in result["x_interpretation"]["2016"].lower()


def test_axis_classifier_missing_csv(tmp_path, monkeypatch):
    """Missing party_ideologies.csv → returns axes dict unchanged, no exception."""
    _cls = _fresh_classifier(monkeypatch)

    # No CSVs written — directory exists but files do not
    positions_by_window = {"2022": {"VVD": (1.0, 0.5), "PvdA": (-1.0, 0.3)}}
    axes = {"x_axis": None, "y_axis": None, "method": "pca"}

    result = _cls.classify_axes(positions_by_window, axes, str(tmp_path / "motions.db"))

    # Must not crash and must return the original axes dict unchanged
    assert result is axes
    assert "x_label" not in result