diff --git a/analysis/political_axis.py b/analysis/political_axis.py index 8a8f30a..1e7efb6 100644 --- a/analysis/political_axis.py +++ b/analysis/political_axis.py @@ -257,38 +257,40 @@ def compute_2d_axes( "pca_residual_used": bool(pca_residual or evr1 > 0.85), } + # Canonical party sets used for axis orientation (global and per-window). + # Defined outside the try-block so they're always in scope. + right_parties = { + "PVV", + "VVD", + "FVD", + "BBB", + "JA21", + "Nieuw Sociaal Contract", + } + left_parties = {"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK"} + cons_parties = { + "PVV", + "VVD", + "FVD", + "CDA", + "SGP", + "BBB", + "JA21", + "Nieuw Sociaal Contract", + } + prog_parties = { + "GL", + "GroenLinks", + "PvdA", + "PvdD", + "SP", + "GroenLinks-PvdA", + "DENK", + } + # Ensure consistent left/right and progressive/conservative orientation # by checking canonical party centroids and flipping axis signs if needed. try: - right_parties = { - "PVV", - "VVD", - "FVD", - "BBB", - "JA21", - "Nieuw Sociaal Contract", - } - left_parties = {"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK"} - cons_parties = { - "PVV", - "VVD", - "FVD", - "CDA", - "SGP", - "BBB", - "JA21", - "Nieuw Sociaal Contract", - } - prog_parties = { - "GL", - "GroenLinks", - "PvdA", - "PvdD", - "SP", - "GroenLinks-PvdA", - "DENK", - } - # Build mapping of entity -> vector from stacked matrix M ent_to_vec = {ent: vec for (wid, ent), vec in zip(entity_index, M)} @@ -367,6 +369,69 @@ def compute_2d_axes( y = float(np.dot(v_centered, axes["y_axis"])) positions_by_window[wid][ent] = (x, y) + # Per-window Y-axis correction: ensure "positive Y = progressive" holds + # for EACH window individually. The global orientation check above uses + # centroids averaged across all windows, so individual windows (e.g. an + # election year with few returning MPs) can still be inverted. We check + # each window and flip its Y values if conservative parties sit above + # progressive ones. + try: + # Fetch mp_metadata once for the per-window check + _mp_meta_rows: List[Tuple[str, str]] = [] + try: + conn = duckdb.connect(db_path) + _mp_meta_rows = conn.execute( + "SELECT mp_name, party FROM mp_metadata" + ).fetchall() + conn.close() + except Exception: + pass # no DB available (e.g. unit tests without metadata) + + # Map mp_name -> party + _mp_party: Dict[str, str] = {r[0]: r[1] for r in _mp_meta_rows} + + y_flipped_windows: set = set() + for wid, pos_dict in positions_by_window.items(): + prog_ys = [] + cons_ys = [] + for ent, (x_val, y_val) in pos_dict.items(): + # direct party entity + if ent in prog_parties: + prog_ys.append(y_val) + elif ent in cons_parties: + cons_ys.append(y_val) + # individual MP via metadata lookup + party = _mp_party.get(ent) + if party is not None: + if party in prog_parties: + prog_ys.append(y_val) + elif party in cons_parties: + cons_ys.append(y_val) + + if prog_ys and cons_ys: + prog_avg = float(np.mean(prog_ys)) + cons_avg = float(np.mean(cons_ys)) + if cons_avg > prog_avg: + _logger.info( + "Per-window Y flip for window %s: " + "prog_avg_y=%.3f cons_avg_y=%.3f — negating Y", + wid, + prog_avg, + cons_avg, + ) + positions_by_window[wid] = { + ent: (x_val, -y_val) + for ent, (x_val, y_val) in pos_dict.items() + } + y_flipped_windows.add(wid) + + axes["y_flipped_windows"] = y_flipped_windows + + except Exception: + _logger.debug( + "Per-window Y orientation check failed; leaving per-window Y as-is" + ) + return positions_by_window, axes elif method == "anchor": diff --git a/tests/test_political_compass.py b/tests/test_political_compass.py index c67f6db..d5c3f28 100644 --- a/tests/test_political_compass.py +++ b/tests/test_political_compass.py @@ -57,6 +57,96 @@ def test_compute_2d_axes_pca_synthetic(monkeypatch): assert axis_def.get("method") == "pca" +def test_per_window_y_orientation(monkeypatch): + """Per-window Y correction must ensure prog_avg_y > cons_avg_y in every window. + + We construct two windows: + - w_good: progressive MPs at +Y, conservative MPs at -Y (already correct) + - w_bad: conservative MPs at +Y, progressive MPs at -Y (inverted) + + We weight w_good with many more MPs so the GLOBAL centroid check passes + without noticing the per-window inversion. The per-window correction must + then flip w_bad so both windows end up with prog_avg_y > cons_avg_y. + """ + + # Helpers to make slightly varied vectors + def pv(base): + return np.array(base, dtype=float) + + # w_good: large left/right spread on dim-0, prog up (+Y), cons down (-Y) + w_good = { + # right / conservative + "Wilders, G.": pv([-3.0, -1.0, 0.0]), + "Rutte, M.": pv([-3.0, -0.9, 0.0]), + "van der Staaij, K.": pv([-2.9, -0.95, 0.0]), + "Omtzigt, P.": pv([-2.8, -0.85, 0.0]), + # left / progressive + "Marijnissen, L.": pv([3.0, 1.0, 0.0]), + "Klever, A.": pv([3.0, 0.9, 0.0]), + "Bromet, L.": pv([2.9, 0.95, 0.0]), + "Nijboer, H.": pv([2.8, 0.85, 0.0]), + } + + # w_bad: same left/right structure but Y is inverted relative to w_good + # (conservative at +Y, progressive at -Y) + w_bad = { + "Wilders, G.": pv([-3.0, 1.0, 0.0]), # cons at +Y + "Rutte, M.": pv([-3.0, 0.9, 0.0]), + "van der Staaij, K.": pv([-2.9, 0.95, 0.0]), + "Omtzigt, P.": pv([-2.8, 0.85, 0.0]), + "Marijnissen, L.": pv([3.0, -1.0, 0.0]), # prog at -Y + "Klever, A.": pv([3.0, -0.9, 0.0]), + "Bromet, L.": pv([2.9, -0.95, 0.0]), + "Nijboer, H.": pv([2.8, -0.85, 0.0]), + } + + aligned = {"w_good": w_good, "w_bad": w_bad} + + mp_metadata = [ + ("Wilders, G.", "PVV"), + ("Rutte, M.", "VVD"), + ("van der Staaij, K.", "SGP"), + ("Omtzigt, P.", "Nieuw Sociaal Contract"), + ("Marijnissen, L.", "SP"), + ("Klever, A.", "GroenLinks-PvdA"), + ("Bromet, L.", "GroenLinks-PvdA"), + ("Nijboer, H.", "SP"), + ] + + fake_traj = _make_fake_traj(aligned) + monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj) + + import types as _types + + fake_conn = _types.SimpleNamespace( + execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata), + close=lambda: None, + ) + import duckdb as _duckdb + + monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn) + + import importlib, analysis.political_axis as _ax + + importlib.reload(_ax) + from analysis.political_axis import compute_2d_axes + + positions_by_window, axis_def = compute_2d_axes( + db_path="dummy", window_ids=["w_good", "w_bad"], method="pca" + ) + + prog_mps = {"Marijnissen, L.", "Klever, A.", "Bromet, L.", "Nijboer, H."} + cons_mps = {"Wilders, G.", "Rutte, M.", "van der Staaij, K.", "Omtzigt, P."} + + for wid in ("w_good", "w_bad"): + pos = positions_by_window[wid] + prog_y = np.mean([pos[mp][1] for mp in prog_mps if mp in pos]) + cons_y = np.mean([pos[mp][1] for mp in cons_mps if mp in pos]) + assert prog_y > cons_y, ( + f"Window '{wid}': expected prog_avg_y ({prog_y:.3f}) > cons_avg_y ({cons_y:.3f})" + ) + + def test_pca_axis_orientation(monkeypatch): """PCA axes must be oriented so right parties score higher on X and progressive parties score higher on Y than their respective opposites.