fix: per-window Y-axis correction for political compass

The global orientation check using party centroids averaged across all
windows was insufficient — individual windows (notably 2023) could still
have conservative parties above progressive ones on the Y-axis.

Added a per-window flip in compute_2d_axes (PCA branch) that checks
prog_avg_y vs cons_avg_y for each window independently and negates all
Y values in that window when cons > prog. Flipped window IDs are stored
in axis_def['y_flipped_windows'] for diagnostics.

Moved the canonical party set definitions outside the orientation try-
block so they are always in scope for the per-window correction.

Added test_per_window_y_orientation to cover the case where one window
is globally fine but locally inverted.
main
Sven Geboers 1 month ago
parent 6329d6a256
commit 064cd059d4
  1. 123
      analysis/political_axis.py
  2. 90
      tests/test_political_compass.py

@ -257,38 +257,40 @@ def compute_2d_axes(
"pca_residual_used": bool(pca_residual or evr1 > 0.85),
}
# Canonical party sets used for axis orientation (global and per-window).
# Defined outside the try-block so they're always in scope.
right_parties = {
"PVV",
"VVD",
"FVD",
"BBB",
"JA21",
"Nieuw Sociaal Contract",
}
left_parties = {"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK"}
cons_parties = {
"PVV",
"VVD",
"FVD",
"CDA",
"SGP",
"BBB",
"JA21",
"Nieuw Sociaal Contract",
}
prog_parties = {
"GL",
"GroenLinks",
"PvdA",
"PvdD",
"SP",
"GroenLinks-PvdA",
"DENK",
}
# Ensure consistent left/right and progressive/conservative orientation
# by checking canonical party centroids and flipping axis signs if needed.
try:
right_parties = {
"PVV",
"VVD",
"FVD",
"BBB",
"JA21",
"Nieuw Sociaal Contract",
}
left_parties = {"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK"}
cons_parties = {
"PVV",
"VVD",
"FVD",
"CDA",
"SGP",
"BBB",
"JA21",
"Nieuw Sociaal Contract",
}
prog_parties = {
"GL",
"GroenLinks",
"PvdA",
"PvdD",
"SP",
"GroenLinks-PvdA",
"DENK",
}
# Build mapping of entity -> vector from stacked matrix M
ent_to_vec = {ent: vec for (wid, ent), vec in zip(entity_index, M)}
@ -367,6 +369,69 @@ def compute_2d_axes(
y = float(np.dot(v_centered, axes["y_axis"]))
positions_by_window[wid][ent] = (x, y)
# Per-window Y-axis correction: ensure "positive Y = progressive" holds
# for EACH window individually. The global orientation check above uses
# centroids averaged across all windows, so individual windows (e.g. an
# election year with few returning MPs) can still be inverted. We check
# each window and flip its Y values if conservative parties sit above
# progressive ones.
try:
# Fetch mp_metadata once for the per-window check
_mp_meta_rows: List[Tuple[str, str]] = []
try:
conn = duckdb.connect(db_path)
_mp_meta_rows = conn.execute(
"SELECT mp_name, party FROM mp_metadata"
).fetchall()
conn.close()
except Exception:
pass # no DB available (e.g. unit tests without metadata)
# Map mp_name -> party
_mp_party: Dict[str, str] = {r[0]: r[1] for r in _mp_meta_rows}
y_flipped_windows: set = set()
for wid, pos_dict in positions_by_window.items():
prog_ys = []
cons_ys = []
for ent, (x_val, y_val) in pos_dict.items():
# direct party entity
if ent in prog_parties:
prog_ys.append(y_val)
elif ent in cons_parties:
cons_ys.append(y_val)
# individual MP via metadata lookup
party = _mp_party.get(ent)
if party is not None:
if party in prog_parties:
prog_ys.append(y_val)
elif party in cons_parties:
cons_ys.append(y_val)
if prog_ys and cons_ys:
prog_avg = float(np.mean(prog_ys))
cons_avg = float(np.mean(cons_ys))
if cons_avg > prog_avg:
_logger.info(
"Per-window Y flip for window %s: "
"prog_avg_y=%.3f cons_avg_y=%.3f — negating Y",
wid,
prog_avg,
cons_avg,
)
positions_by_window[wid] = {
ent: (x_val, -y_val)
for ent, (x_val, y_val) in pos_dict.items()
}
y_flipped_windows.add(wid)
axes["y_flipped_windows"] = y_flipped_windows
except Exception:
_logger.debug(
"Per-window Y orientation check failed; leaving per-window Y as-is"
)
return positions_by_window, axes
elif method == "anchor":

@ -57,6 +57,96 @@ def test_compute_2d_axes_pca_synthetic(monkeypatch):
assert axis_def.get("method") == "pca"
def test_per_window_y_orientation(monkeypatch):
"""Per-window Y correction must ensure prog_avg_y > cons_avg_y in every window.
We construct two windows:
- w_good: progressive MPs at +Y, conservative MPs at -Y (already correct)
- w_bad: conservative MPs at +Y, progressive MPs at -Y (inverted)
We weight w_good with many more MPs so the GLOBAL centroid check passes
without noticing the per-window inversion. The per-window correction must
then flip w_bad so both windows end up with prog_avg_y > cons_avg_y.
"""
# Helpers to make slightly varied vectors
def pv(base):
return np.array(base, dtype=float)
# w_good: large left/right spread on dim-0, prog up (+Y), cons down (-Y)
w_good = {
# right / conservative
"Wilders, G.": pv([-3.0, -1.0, 0.0]),
"Rutte, M.": pv([-3.0, -0.9, 0.0]),
"van der Staaij, K.": pv([-2.9, -0.95, 0.0]),
"Omtzigt, P.": pv([-2.8, -0.85, 0.0]),
# left / progressive
"Marijnissen, L.": pv([3.0, 1.0, 0.0]),
"Klever, A.": pv([3.0, 0.9, 0.0]),
"Bromet, L.": pv([2.9, 0.95, 0.0]),
"Nijboer, H.": pv([2.8, 0.85, 0.0]),
}
# w_bad: same left/right structure but Y is inverted relative to w_good
# (conservative at +Y, progressive at -Y)
w_bad = {
"Wilders, G.": pv([-3.0, 1.0, 0.0]), # cons at +Y
"Rutte, M.": pv([-3.0, 0.9, 0.0]),
"van der Staaij, K.": pv([-2.9, 0.95, 0.0]),
"Omtzigt, P.": pv([-2.8, 0.85, 0.0]),
"Marijnissen, L.": pv([3.0, -1.0, 0.0]), # prog at -Y
"Klever, A.": pv([3.0, -0.9, 0.0]),
"Bromet, L.": pv([2.9, -0.95, 0.0]),
"Nijboer, H.": pv([2.8, -0.85, 0.0]),
}
aligned = {"w_good": w_good, "w_bad": w_bad}
mp_metadata = [
("Wilders, G.", "PVV"),
("Rutte, M.", "VVD"),
("van der Staaij, K.", "SGP"),
("Omtzigt, P.", "Nieuw Sociaal Contract"),
("Marijnissen, L.", "SP"),
("Klever, A.", "GroenLinks-PvdA"),
("Bromet, L.", "GroenLinks-PvdA"),
("Nijboer, H.", "SP"),
]
fake_traj = _make_fake_traj(aligned)
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj)
import types as _types
fake_conn = _types.SimpleNamespace(
execute=lambda q: _types.SimpleNamespace(fetchall=lambda: mp_metadata),
close=lambda: None,
)
import duckdb as _duckdb
monkeypatch.setattr(_duckdb, "connect", lambda db_path, **kw: fake_conn)
import importlib, analysis.political_axis as _ax
importlib.reload(_ax)
from analysis.political_axis import compute_2d_axes
positions_by_window, axis_def = compute_2d_axes(
db_path="dummy", window_ids=["w_good", "w_bad"], method="pca"
)
prog_mps = {"Marijnissen, L.", "Klever, A.", "Bromet, L.", "Nijboer, H."}
cons_mps = {"Wilders, G.", "Rutte, M.", "van der Staaij, K.", "Omtzigt, P."}
for wid in ("w_good", "w_bad"):
pos = positions_by_window[wid]
prog_y = np.mean([pos[mp][1] for mp in prog_mps if mp in pos])
cons_y = np.mean([pos[mp][1] for mp in cons_mps if mp in pos])
assert prog_y > cons_y, (
f"Window '{wid}': expected prog_avg_y ({prog_y:.3f}) > cons_avg_y ({cons_y:.3f})"
)
def test_pca_axis_orientation(monkeypatch):
"""PCA axes must be oriented so right parties score higher on X and
progressive parties score higher on Y than their respective opposites.

Loading…
Cancel
Save