Compare commits
No commits in common. 'ee8ffea6e26c116d0f0b81df717eac0ee91aafef' and 'c0d1c59bd7d31e29156396f4f2b8125284b6d9c6' have entirely different histories.
ee8ffea6e2
...
c0d1c59bd7
@ -1,659 +0,0 @@ |
|||||||
"""Axis classifier: correlate per-party PCA positions against ideology reference data |
|
||||||
to assign honest, dynamic labels to political compass axes. |
|
||||||
|
|
||||||
Public API: classify_axes(positions_by_window, axes, db_path) -> dict |
|
||||||
""" |
|
||||||
|
|
||||||
import logging |
|
||||||
from collections import Counter |
|
||||||
from pathlib import Path |
|
||||||
from typing import Dict, List, Optional, Tuple |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
import re |
|
||||||
import json |
|
||||||
|
|
||||||
from analysis.svd_labels import get_svd_label, get_fallback_labels |
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__) |
|
||||||
|
|
||||||
# Module-level caches — loaded once per process lifetime. |
|
||||||
_ideology_cache: Optional[Dict[str, Dict[str, float]]] = None |
|
||||||
_coalition_cache: Optional[Dict[str, set]] = None |
|
||||||
|
|
||||||
# Correlation threshold above which we consider an axis "explained" by a dimension. |
|
||||||
_THRESHOLD = 0.65 |
|
||||||
|
|
||||||
_LABELS = { |
|
||||||
"lr": "Verzorgingsstaat–Marktwerking", |
|
||||||
"eu": "EU-integratie–Nationalisme", |
|
||||||
"pi": "Populistisch–Institutioneel", |
|
||||||
"co": "Coalitie–Oppositie", |
|
||||||
"pc": "Conservatief–Progressief", |
|
||||||
# When we have no interpretable classifier signal, fall back to the known |
|
||||||
# SVD component meanings rather than generic "As N" labels. |
|
||||||
"fallback_x": get_svd_label(1), |
|
||||||
"fallback_y": get_svd_label(2), |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
# Module-level helper: map internal/modal labels to user-facing labels. |
|
||||||
# Remove duplicate lower definition (keep the one at the top) |
|
||||||
|
|
||||||
|
|
||||||
def display_label_for_modal(modal_label: Optional[str], axis: str) -> str: |
|
||||||
"""Return a user-facing axis label for a modal/internal label. |
|
||||||
|
|
||||||
Maps numeric fallback names 'As 1' / 'Stempatroon As 1' to the |
|
||||||
semantic labels from SVD_THEMES. Any other label is returned unchanged. |
|
||||||
None is treated as the semantic fallback for the axis. |
|
||||||
""" |
|
||||||
if modal_label is None: |
|
||||||
# Fallback to component 1 (x) or 2 (y) |
|
||||||
comp = 1 if axis == "x" else 2 |
|
||||||
return get_svd_label(comp) |
|
||||||
|
|
||||||
# Map "As 1" / "As 2" to semantic labels |
|
||||||
if axis == "x" and modal_label in ("As 1", "Stempatroon As 1"): |
|
||||||
return get_svd_label(1) |
|
||||||
if axis == "y" and modal_label in ("As 2", "Stempatroon As 2"): |
|
||||||
return get_svd_label(2) |
|
||||||
|
|
||||||
return modal_label |
|
||||||
|
|
||||||
|
|
||||||
_INTERPRETATION_TEMPLATES = { |
|
||||||
"lr": "De {orientation} as weerspiegelt de economische tegenstelling tussen verzorgingsstaat en marktwerking.", |
|
||||||
"eu": "De {orientation} as weerspiegelt de tegenstelling tussen EU-integratie/internationalisme en nationalisme/soevereiniteit.", |
|
||||||
"pi": "De {orientation} as scheidt populistisch-nationalistische partijen van het institutioneel-parlementaire establishment.", |
|
||||||
"co": ( |
|
||||||
"De {orientation} as weerspiegelt stemgedrag van coalitie- versus " |
|
||||||
"oppositiepartijen (r={r:.2f}). Ideologische tegenstellingen zijn minder dominant dit jaar." |
|
||||||
), |
|
||||||
"pc": "De {orientation} as weerspiegelt de progressief-conservatieve tegenstelling.", |
|
||||||
} |
|
||||||
|
|
||||||
# Maps motion-path keyword labels to _INTERPRETATION_TEMPLATES keys. |
|
||||||
# Labels not present here fall back to "fallback". |
|
||||||
_MOTION_LABEL_TEMPLATE_KEY: Dict[str, str] = { |
|
||||||
"Verzorgingsstaat–Marktwerking": "lr", |
|
||||||
"EU-integratie–Nationalisme": "eu", |
|
||||||
"Populistisch–Institutioneel": "pi", |
|
||||||
"Progressief–Conservatief": "pc", |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
# Simple keyword-based classifier for motion titles (fallback signal) |
|
||||||
_KEYWORD_THRESHOLD = 0.4 |
|
||||||
|
|
||||||
_KEYWORDS: Dict[str, List[str]] = { |
|
||||||
"Verzorgingsstaat–Marktwerking": [ |
|
||||||
# economic / welfare state |
|
||||||
"belasting", |
|
||||||
"uitkering", |
|
||||||
"bijstand", |
|
||||||
"minimumloon", |
|
||||||
"cao", |
|
||||||
"vakbond", |
|
||||||
"bezuiniging", |
|
||||||
"privatisering", |
|
||||||
"subsidie", |
|
||||||
"pensioen", |
|
||||||
"aow", |
|
||||||
"zorg", |
|
||||||
"huur", |
|
||||||
"woning", |
|
||||||
"sociaal", |
|
||||||
"werkloos", |
|
||||||
"ww", |
|
||||||
"arbeidsongeschik", |
|
||||||
"wao", |
|
||||||
"gemeentefonds", |
|
||||||
], |
|
||||||
"EU-integratie–Nationalisme": [ |
|
||||||
# EU and international cooperation |
|
||||||
"europees", |
|
||||||
"europese", |
|
||||||
" eu ", |
|
||||||
"eu-", |
|
||||||
"verdrag", |
|
||||||
"intergouvernementeel", |
|
||||||
"samenwerking", |
|
||||||
"internationaal", |
|
||||||
"navo", |
|
||||||
"nato", |
|
||||||
" vn ", |
|
||||||
"vn-", |
|
||||||
"sancties", |
|
||||||
"israël", |
|
||||||
"vluchteling", |
|
||||||
"asiel", |
|
||||||
"soevereiniteit", |
|
||||||
"nationaal", |
|
||||||
], |
|
||||||
"Populistisch–Institutioneel": [ |
|
||||||
# Populist/nationalist themes |
|
||||||
"terugsturen", |
|
||||||
"syrië", |
|
||||||
"syrier", |
|
||||||
"grenzen dicht", |
|
||||||
"remigratie", |
|
||||||
"eigen volk", |
|
||||||
"nederland eerst", |
|
||||||
"corona", |
|
||||||
"vaccin", |
|
||||||
"ivermectine", |
|
||||||
"hydroxychloroquine", |
|
||||||
"complot", |
|
||||||
"deep state", |
|
||||||
"establishment", |
|
||||||
"elite", |
|
||||||
"herstelbetaling", |
|
||||||
"excuses", |
|
||||||
], |
|
||||||
"Progressief–Conservatief": [ |
|
||||||
# environment |
|
||||||
"klimaat", |
|
||||||
"stikstof", |
|
||||||
"duurzaam", |
|
||||||
"duurzaamheid", |
|
||||||
"co2", |
|
||||||
"energietransitie", |
|
||||||
"biodiversiteit", |
|
||||||
# social |
|
||||||
"euthanasie", |
|
||||||
"abortus", |
|
||||||
"lgbtq", |
|
||||||
"transgender", |
|
||||||
"diversiteit", |
|
||||||
"traditi", |
|
||||||
"gezin", |
|
||||||
"religie", |
|
||||||
"geloof", |
|
||||||
], |
|
||||||
} |
|
||||||
|
|
||||||
# Pre-compiled regexes for keyword matching. We escape keywords but do NOT add |
|
||||||
# word-boundaries because some keywords intentionally match substrings |
|
||||||
# (e.g. 'traditi' matching 'tradities'). re.IGNORECASE makes lowercasing |
|
||||||
# unnecessary during matching. |
|
||||||
_KEYWORD_REGEXES: Dict[str, "re.Pattern[str]"] = { |
|
||||||
cat: re.compile( |
|
||||||
"|".join(re.escape(kw.strip()) for kw in kws), |
|
||||||
re.IGNORECASE, |
|
||||||
) |
|
||||||
for cat, kws in _KEYWORDS.items() |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
def _classify_from_titles(titles: List[str]) -> Tuple[Optional[str], float]: |
|
||||||
"""Classify a list of motion titles into an axis category using keyword matching. |
|
||||||
|
|
||||||
Returns (category_label, confidence) where confidence = fraction of titles |
|
||||||
containing at least one keyword from the winning category. |
|
||||||
Returns (None, confidence) if confidence is below _KEYWORD_THRESHOLD. |
|
||||||
""" |
|
||||||
if not titles: |
|
||||||
return None, 0.0 |
|
||||||
|
|
||||||
counts: Dict[str, int] = {cat: 0 for cat in _KEYWORDS} |
|
||||||
for title in titles: |
|
||||||
for cat, rx in _KEYWORD_REGEXES.items(): |
|
||||||
if rx.search(title): |
|
||||||
counts[cat] += 1 |
|
||||||
|
|
||||||
# Determine the best category, but be deterministic on ties: if more than |
|
||||||
# one category has the top count, return None to indicate ambiguity. |
|
||||||
best_count = max(counts.values()) |
|
||||||
best_cats = [cat for cat, cnt in counts.items() if cnt == best_count] |
|
||||||
confidence = best_count / len(titles) |
|
||||||
|
|
||||||
if len(best_cats) != 1 or confidence < _KEYWORD_THRESHOLD: |
|
||||||
return None, confidence |
|
||||||
|
|
||||||
return best_cats[0], confidence |
|
||||||
|
|
||||||
|
|
||||||
def _load_motion_vectors(db_path: str, window_id: str) -> Dict[int, np.ndarray]: |
|
||||||
"""Load SVD motion vectors for a given window from DuckDB. |
|
||||||
|
|
||||||
Returns {motion_id: vector_array}. Returns {} on any error. |
|
||||||
""" |
|
||||||
try: |
|
||||||
import duckdb |
|
||||||
|
|
||||||
conn = duckdb.connect(db_path, read_only=True) |
|
||||||
try: |
|
||||||
rows = conn.execute( |
|
||||||
"SELECT entity_id, vector FROM svd_vectors " |
|
||||||
"WHERE entity_type = 'motion' AND window_id = ?", |
|
||||||
[window_id], |
|
||||||
).fetchall() |
|
||||||
finally: |
|
||||||
conn.close() |
|
||||||
result: Dict[int, np.ndarray] = {} |
|
||||||
for entity_id, vector_raw in rows: |
|
||||||
try: |
|
||||||
mid = int(entity_id) |
|
||||||
vec = np.array(json.loads(vector_raw), dtype=float) |
|
||||||
result[mid] = vec |
|
||||||
except Exception: |
|
||||||
continue |
|
||||||
return result |
|
||||||
except Exception as exc: |
|
||||||
_logger.debug("Failed to load motion vectors for window %s: %s", window_id, exc) |
|
||||||
return {} |
|
||||||
|
|
||||||
|
|
||||||
def _project_motions( |
|
||||||
motion_vecs: Dict[int, np.ndarray], |
|
||||||
x_axis: np.ndarray, |
|
||||||
y_axis: np.ndarray, |
|
||||||
global_mean: np.ndarray, |
|
||||||
) -> Dict[int, Tuple[float, float]]: |
|
||||||
"""Project motion vectors onto the PCA axes after centering by global_mean. |
|
||||||
|
|
||||||
Returns {motion_id: (x_score, y_score)}. |
|
||||||
""" |
|
||||||
try: |
|
||||||
projections: Dict[int, Tuple[float, float]] = {} |
|
||||||
for mid, vec in motion_vecs.items(): |
|
||||||
try: |
|
||||||
centered = vec - global_mean |
|
||||||
x_score = float(np.dot(centered, x_axis)) |
|
||||||
y_score = float(np.dot(centered, y_axis)) |
|
||||||
projections[mid] = (x_score, y_score) |
|
||||||
except Exception: |
|
||||||
continue |
|
||||||
return projections |
|
||||||
except Exception as exc: |
|
||||||
_logger.debug("Failed to project motions: %s", exc) |
|
||||||
return {} |
|
||||||
|
|
||||||
|
|
||||||
def _top_motion_ids( |
|
||||||
projections: Dict[int, Tuple[float, float]], |
|
||||||
axis: str, |
|
||||||
n: int = 5, |
|
||||||
) -> Dict[str, List[int]]: |
|
||||||
"""Return the top-n motion IDs at each pole of the given axis. |
|
||||||
|
|
||||||
axis: 'x' or 'y' |
|
||||||
Returns {'+': [motion_ids], '-': [motion_ids]} (highest positive first, |
|
||||||
most negative first in the '-' list). |
|
||||||
""" |
|
||||||
try: |
|
||||||
if axis not in ("x", "y"): |
|
||||||
raise ValueError("axis must be 'x' or 'y'") |
|
||||||
idx = 0 if axis == "x" else 1 |
|
||||||
sorted_ids = sorted(projections, key=lambda mid: projections[mid][idx]) |
|
||||||
neg_ids = sorted_ids[:n] |
|
||||||
pos_ids = sorted_ids[-n:][::-1] |
|
||||||
return {"+": pos_ids, "-": neg_ids} |
|
||||||
except Exception as exc: |
|
||||||
_logger.debug("Failed to compute top_motion_ids: %s", exc) |
|
||||||
return {"+": [], "-": []} |
|
||||||
|
|
||||||
|
|
||||||
def _fetch_motion_titles( |
|
||||||
db_path: str, |
|
||||||
motion_ids: List[int], |
|
||||||
) -> Dict[int, Tuple[str, str]]: |
|
||||||
"""Fetch (title, date) for a list of motion IDs from DuckDB. |
|
||||||
|
|
||||||
Returns {motion_id: (title, date_str)}. Missing IDs are omitted. |
|
||||||
Returns {} on any DB error. |
|
||||||
""" |
|
||||||
if not motion_ids: |
|
||||||
return {} |
|
||||||
try: |
|
||||||
import duckdb |
|
||||||
|
|
||||||
placeholders = ", ".join("?" for _ in motion_ids) |
|
||||||
conn = duckdb.connect(db_path, read_only=True) |
|
||||||
try: |
|
||||||
rows = conn.execute( |
|
||||||
f"SELECT id, title, date FROM motions WHERE id IN ({placeholders})", |
|
||||||
motion_ids, |
|
||||||
).fetchall() |
|
||||||
finally: |
|
||||||
conn.close() |
|
||||||
return {int(row[0]): (str(row[1]), str(row[2])) for row in rows} |
|
||||||
except Exception as exc: |
|
||||||
_logger.debug("Failed to fetch motion titles: %s", exc) |
|
||||||
return {} |
|
||||||
|
|
||||||
|
|
||||||
def _load_ideology(csv_path: Path) -> Dict[str, Dict[str, float]]: |
|
||||||
"""Load party ideology scores from CSV. |
|
||||||
|
|
||||||
Returns {party_name: {"left_right": float, "progressive": float}}. |
|
||||||
Returns {} on any error (caller should treat empty as 'skip classification'). |
|
||||||
""" |
|
||||||
global _ideology_cache |
|
||||||
if _ideology_cache is not None: |
|
||||||
return _ideology_cache |
|
||||||
result: Dict[str, Dict[str, float]] = {} |
|
||||||
try: |
|
||||||
with open(csv_path, encoding="utf-8") as fh: |
|
||||||
lines = fh.read().splitlines() |
|
||||||
header = [h.strip() for h in lines[0].split(",")] |
|
||||||
lr_idx = header.index("left_right") |
|
||||||
pc_idx = header.index("progressive") |
|
||||||
for line in lines[1:]: |
|
||||||
if not line.strip(): |
|
||||||
continue |
|
||||||
parts = [p.strip() for p in line.split(",")] |
|
||||||
if len(parts) <= max(lr_idx, pc_idx): |
|
||||||
continue |
|
||||||
result[parts[0]] = { |
|
||||||
"left_right": float(parts[lr_idx]), |
|
||||||
"progressive": float(parts[pc_idx]), |
|
||||||
} |
|
||||||
except FileNotFoundError: |
|
||||||
_logger.warning( |
|
||||||
"party_ideologies.csv not found at %s — axis labels will be generic", |
|
||||||
csv_path, |
|
||||||
) |
|
||||||
return {} |
|
||||||
except Exception as exc: |
|
||||||
_logger.warning("Failed to load party_ideologies.csv: %s", exc) |
|
||||||
return {} |
|
||||||
_ideology_cache = result |
|
||||||
return result |
|
||||||
|
|
||||||
|
|
||||||
def _load_coalition(csv_path: Path) -> Dict[str, set]: |
|
||||||
"""Load coalition membership from CSV. |
|
||||||
|
|
||||||
Returns {window_id: set_of_party_names}. |
|
||||||
Returns {} on any error (coalition dimension will be skipped). |
|
||||||
""" |
|
||||||
global _coalition_cache |
|
||||||
if _coalition_cache is not None: |
|
||||||
return _coalition_cache |
|
||||||
result: Dict[str, set] = {} |
|
||||||
try: |
|
||||||
with open(csv_path, encoding="utf-8") as fh: |
|
||||||
lines = fh.read().splitlines() |
|
||||||
for line in lines[1:]: |
|
||||||
if not line.strip(): |
|
||||||
continue |
|
||||||
parts = [p.strip() for p in line.split(",")] |
|
||||||
if len(parts) < 2: |
|
||||||
continue |
|
||||||
wid, party = parts[0], parts[1] |
|
||||||
result.setdefault(wid, set()).add(party) |
|
||||||
except FileNotFoundError: |
|
||||||
_logger.warning( |
|
||||||
"coalition_membership.csv not found at %s — coalition axis detection disabled", |
|
||||||
csv_path, |
|
||||||
) |
|
||||||
return {} |
|
||||||
except Exception as exc: |
|
||||||
_logger.warning("Failed to load coalition_membership.csv: %s", exc) |
|
||||||
return {} |
|
||||||
_coalition_cache = result |
|
||||||
return result |
|
||||||
|
|
||||||
|
|
||||||
def _window_year(window_id: str) -> Optional[str]: |
|
||||||
"""Extract year string from window_id. |
|
||||||
|
|
||||||
Returns None for 'current_parliament'. |
|
||||||
'2016' → '2016', '2016-Q3' → '2016'. |
|
||||||
""" |
|
||||||
if window_id == "current_parliament": |
|
||||||
return None |
|
||||||
return window_id.split("-")[0] |
|
||||||
|
|
||||||
|
|
||||||
def _pearsonr(x: List[float], y: List[float]) -> float: |
|
||||||
"""Pearson r; returns 0.0 for degenerate input (< 3 points or zero variance).""" |
|
||||||
if len(x) < 3: |
|
||||||
return 0.0 |
|
||||||
xa = np.array(x, dtype=float) |
|
||||||
ya = np.array(y, dtype=float) |
|
||||||
if xa.std() < 1e-12 or ya.std() < 1e-12: |
|
||||||
return 0.0 |
|
||||||
return float(np.corrcoef(xa, ya)[0, 1]) |
|
||||||
|
|
||||||
|
|
||||||
def _assign_label( |
|
||||||
r_lr: float, |
|
||||||
r_co: float, |
|
||||||
r_pc: float, |
|
||||||
axis: str, |
|
||||||
) -> Tuple[str, str, float]: |
|
||||||
"""Assign label, interpretation and quality score for one axis. |
|
||||||
|
|
||||||
Priority: left-right > coalition > progressive > fallback. |
|
||||||
Returns (label, interpretation_string, quality_score). |
|
||||||
""" |
|
||||||
orientation = "horizontale" if axis == "x" else "verticale" |
|
||||||
_x_fallback, _y_fallback = get_fallback_labels() |
|
||||||
fallback_label = _x_fallback if axis == "x" else _y_fallback |
|
||||||
quality = max(abs(r_lr), abs(r_co), abs(r_pc)) |
|
||||||
|
|
||||||
if abs(r_lr) >= _THRESHOLD: |
|
||||||
return ( |
|
||||||
_LABELS["lr"], |
|
||||||
_INTERPRETATION_TEMPLATES["lr"].format(orientation=orientation), |
|
||||||
quality, |
|
||||||
) |
|
||||||
if abs(r_co) >= _THRESHOLD: |
|
||||||
return ( |
|
||||||
_LABELS["co"], |
|
||||||
_INTERPRETATION_TEMPLATES["co"].format(orientation=orientation, r=r_co), |
|
||||||
quality, |
|
||||||
) |
|
||||||
if abs(r_pc) >= _THRESHOLD: |
|
||||||
return ( |
|
||||||
_LABELS["pc"], |
|
||||||
_INTERPRETATION_TEMPLATES["pc"].format(orientation=orientation), |
|
||||||
quality, |
|
||||||
) |
|
||||||
return ( |
|
||||||
fallback_label, |
|
||||||
"", # No interpretation for unclassified axes |
|
||||||
quality, |
|
||||||
) |
|
||||||
|
|
||||||
|
|
||||||
def classify_axes( |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], |
|
||||||
axes: dict, |
|
||||||
db_path: str, |
|
||||||
) -> dict: |
|
||||||
"""Classify compass axes using motion projection (primary) and ideology CSV (fallback). |
|
||||||
|
|
||||||
Motion projection path: |
|
||||||
- Requires axes["global_mean"], axes["x_axis"], axes["y_axis"]. |
|
||||||
- Loads motion SVD vectors per window, projects onto PCA axes, |
|
||||||
ranks top 5+5 motions, applies keyword classifier -> label. |
|
||||||
|
|
||||||
Fallback path (unchanged): |
|
||||||
- Pearson-r against party_ideologies.csv (left_right, progressive). |
|
||||||
- Pearson-r against coalition_membership.csv dummy. |
|
||||||
|
|
||||||
Enriches axes with: |
|
||||||
x_label, y_label — global modal label across annual windows |
|
||||||
x_quality, y_quality — {window_id: float} max |r| |
|
||||||
x_interpretation — {window_id: str} |
|
||||||
y_interpretation — {window_id: str} |
|
||||||
x_top_motions, y_top_motions — {window_id: {'+': [(title, date), ...], '-': [...]}} |
|
||||||
x_label_confidence — {window_id: float} |
|
||||||
y_label_confidence — {window_id: float} |
|
||||||
""" |
|
||||||
data_dir = Path(db_path).parent |
|
||||||
ideology = _load_ideology(data_dir / "party_ideologies.csv") |
|
||||||
coalition = _load_coalition(data_dir / "coalition_membership.csv") |
|
||||||
|
|
||||||
# Determine whether motion projection is possible. |
|
||||||
global_mean = axes.get("global_mean") |
|
||||||
x_axis_arr = np.array(axes.get("x_axis", [])) |
|
||||||
y_axis_arr = np.array(axes.get("y_axis", [])) |
|
||||||
motion_path_available = ( |
|
||||||
global_mean is not None |
|
||||||
and x_axis_arr.ndim == 1 |
|
||||||
and x_axis_arr.size > 0 |
|
||||||
and y_axis_arr.size > 0 |
|
||||||
) |
|
||||||
|
|
||||||
# If we have neither ideology reference data nor motion vectors available, |
|
||||||
# there is nothing to classify. Previously an early-exit below could be |
|
||||||
# shadowed by a nested helper definition causing classify_axes to return |
|
||||||
# None. Ensure we return the original axes dict in this case. |
|
||||||
if not ideology and not motion_path_available: |
|
||||||
return axes |
|
||||||
|
|
||||||
x_quality: Dict[str, float] = {} |
|
||||||
y_quality: Dict[str, float] = {} |
|
||||||
x_interpretation: Dict[str, str] = {} |
|
||||||
y_interpretation: Dict[str, str] = {} |
|
||||||
x_top_motions: Dict[str, Dict] = {} |
|
||||||
y_top_motions: Dict[str, Dict] = {} |
|
||||||
x_label_confidence: Dict[str, float] = {} |
|
||||||
y_label_confidence: Dict[str, float] = {} |
|
||||||
annual_x_labels: List[str] = [] |
|
||||||
annual_y_labels: List[str] = [] |
|
||||||
|
|
||||||
for wid, pos_dict in positions_by_window.items(): |
|
||||||
year = _window_year(wid) |
|
||||||
is_annual = wid != "current_parliament" and "-" not in wid |
|
||||||
|
|
||||||
# ── Ideology / coalition Pearson-r (unchanged logic) ────────────────── |
|
||||||
x_lbl_fallback: Optional[str] = None |
|
||||||
y_lbl_fallback: Optional[str] = None |
|
||||||
x_q = 0.0 |
|
||||||
y_q = 0.0 |
|
||||||
x_int = "" |
|
||||||
y_int = "" |
|
||||||
|
|
||||||
if ideology: |
|
||||||
parties = [p for p in pos_dict if p in ideology] |
|
||||||
if len(parties) >= 5: |
|
||||||
party_x = [pos_dict[p][0] for p in parties] |
|
||||||
party_y = [pos_dict[p][1] for p in parties] |
|
||||||
ref_lr = [ideology[p]["left_right"] for p in parties] |
|
||||||
ref_pc = [ideology[p]["progressive"] for p in parties] |
|
||||||
|
|
||||||
if year and coalition and year in coalition: |
|
||||||
gov_set = coalition[year] |
|
||||||
ref_co = [1.0 if p in gov_set else -1.0 for p in parties] |
|
||||||
else: |
|
||||||
ref_co = [0.0] * len(parties) |
|
||||||
|
|
||||||
r_lr_x = _pearsonr(party_x, ref_lr) |
|
||||||
r_co_x = _pearsonr(party_x, ref_co) |
|
||||||
r_pc_x = _pearsonr(party_x, ref_pc) |
|
||||||
x_lbl_fallback, x_int, x_q = _assign_label(r_lr_x, r_co_x, r_pc_x, "x") |
|
||||||
|
|
||||||
r_lr_y = _pearsonr(party_y, ref_lr) |
|
||||||
r_co_y = _pearsonr(party_y, ref_co) |
|
||||||
r_pc_y = _pearsonr(party_y, ref_pc) |
|
||||||
y_lbl_fallback, y_int, y_q = _assign_label(r_lr_y, r_co_y, r_pc_y, "y") |
|
||||||
|
|
||||||
# ── Motion projection (primary) ──────────────────────────────────────── |
|
||||||
x_lbl = x_lbl_fallback |
|
||||||
y_lbl = y_lbl_fallback |
|
||||||
x_conf = 0.0 |
|
||||||
y_conf = 0.0 |
|
||||||
x_tops: Dict[str, List] = {"+": [], "-": []} |
|
||||||
y_tops: Dict[str, List] = {"+": [], "-": []} |
|
||||||
|
|
||||||
if motion_path_available: |
|
||||||
motion_vecs = _load_motion_vectors(db_path, wid) |
|
||||||
if motion_vecs: |
|
||||||
projections = _project_motions( |
|
||||||
motion_vecs, x_axis_arr, y_axis_arr, global_mean |
|
||||||
) |
|
||||||
x_ids = _top_motion_ids(projections, "x", n=5) |
|
||||||
y_ids = _top_motion_ids(projections, "y", n=5) |
|
||||||
|
|
||||||
all_x_ids = x_ids["+"] + x_ids["-"] |
|
||||||
all_y_ids = y_ids["+"] + y_ids["-"] |
|
||||||
titles_map = _fetch_motion_titles( |
|
||||||
db_path, list(set(all_x_ids + all_y_ids)) |
|
||||||
) |
|
||||||
|
|
||||||
x_title_list = [ |
|
||||||
titles_map[mid][0] for mid in all_x_ids if mid in titles_map |
|
||||||
] |
|
||||||
y_title_list = [ |
|
||||||
titles_map[mid][0] for mid in all_y_ids if mid in titles_map |
|
||||||
] |
|
||||||
|
|
||||||
x_kw_lbl, x_conf = _classify_from_titles(x_title_list) |
|
||||||
y_kw_lbl, y_conf = _classify_from_titles(y_title_list) |
|
||||||
|
|
||||||
if x_kw_lbl is not None: |
|
||||||
x_lbl = x_kw_lbl |
|
||||||
if not x_int: |
|
||||||
tkey = _MOTION_LABEL_TEMPLATE_KEY.get(x_kw_lbl, "fallback") |
|
||||||
x_int = _INTERPRETATION_TEMPLATES[tkey].format( |
|
||||||
orientation="horizontale" |
|
||||||
) |
|
||||||
if y_kw_lbl is not None: |
|
||||||
y_lbl = y_kw_lbl |
|
||||||
if not y_int: |
|
||||||
tkey = _MOTION_LABEL_TEMPLATE_KEY.get(y_kw_lbl, "fallback") |
|
||||||
y_int = _INTERPRETATION_TEMPLATES[tkey].format( |
|
||||||
orientation="verticale" |
|
||||||
) |
|
||||||
|
|
||||||
# Build display lists: [(title, date), ...] |
|
||||||
for pole, ids in x_ids.items(): |
|
||||||
x_tops[pole] = [titles_map[mid] for mid in ids if mid in titles_map] |
|
||||||
for pole, ids in y_ids.items(): |
|
||||||
y_tops[pole] = [titles_map[mid] for mid in ids if mid in titles_map] |
|
||||||
|
|
||||||
# ── Final label resolution ──────────────────────────────────────────── |
|
||||||
# If both motion and ideology paths produced nothing, use generic fallback. |
|
||||||
_x_fallback, _y_fallback = get_fallback_labels() |
|
||||||
if x_lbl is None: |
|
||||||
x_lbl = _x_fallback |
|
||||||
x_int = "" # No interpretation for unclassified axes |
|
||||||
if y_lbl is None: |
|
||||||
y_lbl = _y_fallback |
|
||||||
y_int = "" # No interpretation for unclassified axes |
|
||||||
|
|
||||||
x_quality[wid] = x_q |
|
||||||
y_quality[wid] = y_q |
|
||||||
x_interpretation[wid] = x_int |
|
||||||
y_interpretation[wid] = y_int |
|
||||||
x_top_motions[wid] = x_tops |
|
||||||
y_top_motions[wid] = y_tops |
|
||||||
x_label_confidence[wid] = x_conf |
|
||||||
y_label_confidence[wid] = y_conf |
|
||||||
|
|
||||||
if is_annual: |
|
||||||
annual_x_labels.append(x_lbl) |
|
||||||
annual_y_labels.append(y_lbl) |
|
||||||
|
|
||||||
def _modal(labels: List[str], fallback: str) -> str: |
|
||||||
if not labels: |
|
||||||
return fallback |
|
||||||
return Counter(labels).most_common(1)[0][0] |
|
||||||
|
|
||||||
# Use the module-level display_label_for_modal defined above. |
|
||||||
|
|
||||||
enriched = dict(axes) |
|
||||||
# Resolve modal label across annual windows. If the modal label is the |
|
||||||
# internal generic component name ("As 1"/"As 2" or legacy |
|
||||||
# "Stempatroon As N"), prefer a conventional short semantic fallback so the |
|
||||||
# UI doesn't display unhelpful "As N" strings to end users. |
|
||||||
modal_x = _modal(annual_x_labels, "Links\u2013Rechts") |
|
||||||
modal_y = _modal(annual_y_labels, "Progressief\u2013Conservatief") |
|
||||||
|
|
||||||
enriched["x_label"] = display_label_for_modal(modal_x, "x") |
|
||||||
enriched["y_label"] = display_label_for_modal(modal_y, "y") |
|
||||||
enriched["x_quality"] = x_quality |
|
||||||
enriched["y_quality"] = y_quality |
|
||||||
enriched["x_interpretation"] = x_interpretation |
|
||||||
enriched["y_interpretation"] = y_interpretation |
|
||||||
enriched["x_top_motions"] = x_top_motions |
|
||||||
enriched["y_top_motions"] = y_top_motions |
|
||||||
enriched["x_label_confidence"] = x_label_confidence |
|
||||||
enriched["y_label_confidence"] = y_label_confidence |
|
||||||
return enriched |
|
||||||
@ -1,175 +0,0 @@ |
|||||||
"""Unified SVD component labels and automatic flip direction computation. |
|
||||||
|
|
||||||
This module provides a single source of truth for SVD component labels, |
|
||||||
deriving them from SVD_THEMES in explorer.py. It also computes flip |
|
||||||
directions automatically based on party centroids. |
|
||||||
""" |
|
||||||
|
|
||||||
import logging |
|
||||||
from typing import Dict, List, Optional, Tuple |
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__) |
|
||||||
|
|
||||||
# Canonical party sets for orientation |
|
||||||
# Right-wing parties that should appear on the right side of axes |
|
||||||
RIGHT_PARTIES = { |
|
||||||
"PVV", |
|
||||||
"VVD", |
|
||||||
"FVD", |
|
||||||
"BBB", |
|
||||||
"JA21", |
|
||||||
"Nieuw Sociaal Contract", |
|
||||||
"SGP", |
|
||||||
"CDA", |
|
||||||
"ChristenUnie", |
|
||||||
} |
|
||||||
|
|
||||||
# Left-wing parties that should appear on the left side of axes |
|
||||||
LEFT_PARTIES = { |
|
||||||
"SP", |
|
||||||
"PvdA", |
|
||||||
"GL", |
|
||||||
"GroenLinks", |
|
||||||
"GroenLinks-PvdA", |
|
||||||
"DENK", |
|
||||||
"PvdD", |
|
||||||
"Volt", |
|
||||||
} |
|
||||||
|
|
||||||
# Cache for SVD_THEMES to avoid repeated imports |
|
||||||
_svd_themes_cache: Optional[Dict[int, Dict[str, str]]] = None |
|
||||||
|
|
||||||
|
|
||||||
def _get_svd_themes() -> Dict[int, Dict[str, str]]: |
|
||||||
"""Import SVD_THEMES from explorer.py. |
|
||||||
|
|
||||||
Returns: |
|
||||||
Dict mapping component number to theme dict with keys: |
|
||||||
- label: Short label for the component |
|
||||||
- explanation: Detailed explanation |
|
||||||
- positive_pole: Description of positive pole |
|
||||||
- negative_pole: Description of negative pole |
|
||||||
- flip: Whether to flip the axis |
|
||||||
""" |
|
||||||
global _svd_themes_cache |
|
||||||
if _svd_themes_cache is not None: |
|
||||||
return _svd_themes_cache |
|
||||||
|
|
||||||
try: |
|
||||||
# Import SVD_THEMES from explorer at runtime to avoid circular imports |
|
||||||
# explorer.py now exports SVD_THEMES at module level |
|
||||||
import explorer |
|
||||||
|
|
||||||
_svd_themes_cache = explorer.SVD_THEMES |
|
||||||
return _svd_themes_cache |
|
||||||
except ImportError as e: |
|
||||||
_logger.warning("Could not import explorer.SVD_THEMES: %s", e) |
|
||||||
return {} |
|
||||||
except Exception as e: |
|
||||||
_logger.exception("Failed to load SVD_THEMES from explorer.py: %s", e) |
|
||||||
return {} |
|
||||||
|
|
||||||
|
|
||||||
def get_svd_label(component: int) -> str: |
|
||||||
"""Get short label for SVD component. |
|
||||||
|
|
||||||
Args: |
|
||||||
component: SVD component number (1-indexed) |
|
||||||
|
|
||||||
Returns: |
|
||||||
Short label string (e.g., 'EU-integratie–Nationalisme') |
|
||||||
|
|
||||||
Raises: |
|
||||||
ValueError: If component < 1 |
|
||||||
""" |
|
||||||
if component < 1: |
|
||||||
raise ValueError(f"Component must be >= 1, got {component}") |
|
||||||
|
|
||||||
themes = _get_svd_themes() |
|
||||||
if component in themes: |
|
||||||
return themes[component].get("label", f"As {component}") |
|
||||||
|
|
||||||
# Fallback labels for components 1-3 (most commonly used) |
|
||||||
fallback_labels = { |
|
||||||
1: "EU-integratie–Nationalisme", |
|
||||||
2: "Populistisch–Institutioneel", |
|
||||||
3: "Verzorgingsstaat–Marktwerking", |
|
||||||
} |
|
||||||
return fallback_labels.get(component, f"As {component}") |
|
||||||
|
|
||||||
|
|
||||||
def get_svd_theme(component: int) -> Dict[str, str]: |
|
||||||
"""Get full theme dict for SVD component. |
|
||||||
|
|
||||||
Args: |
|
||||||
component: SVD component number (1-indexed) |
|
||||||
|
|
||||||
Returns: |
|
||||||
Dict with keys: label, explanation, positive_pole, negative_pole, flip |
|
||||||
""" |
|
||||||
if component < 1: |
|
||||||
raise ValueError(f"Component must be >= 1, got {component}") |
|
||||||
|
|
||||||
themes = _get_svd_themes() |
|
||||||
if component in themes: |
|
||||||
return themes[component] |
|
||||||
|
|
||||||
# Return minimal fallback |
|
||||||
return { |
|
||||||
"label": get_svd_label(component), |
|
||||||
"explanation": "", |
|
||||||
"positive_pole": "", |
|
||||||
"negative_pole": "", |
|
||||||
"flip": False, |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
def compute_flip_direction( |
|
||||||
component: int, party_scores: Dict[str, List[float]] |
|
||||||
) -> bool: |
|
||||||
"""Compute flip direction so right parties appear on the right side. |
|
||||||
|
|
||||||
Args: |
|
||||||
component: SVD component number (1-indexed) |
|
||||||
party_scores: Dict mapping party name to list of scores per component |
|
||||||
(party_scores[party][0] is score for component 1, etc.) |
|
||||||
|
|
||||||
Returns: |
|
||||||
True if axis should be flipped so right parties are on right. |
|
||||||
False otherwise. |
|
||||||
""" |
|
||||||
if component < 1: |
|
||||||
return False |
|
||||||
|
|
||||||
idx = component - 1 # Convert to 0-indexed |
|
||||||
|
|
||||||
right_scores = [] |
|
||||||
left_scores = [] |
|
||||||
|
|
||||||
for party, scores in party_scores.items(): |
|
||||||
if len(scores) <= idx: |
|
||||||
continue |
|
||||||
|
|
||||||
score = scores[idx] |
|
||||||
if party in RIGHT_PARTIES: |
|
||||||
right_scores.append(score) |
|
||||||
elif party in LEFT_PARTIES: |
|
||||||
left_scores.append(score) |
|
||||||
|
|
||||||
if not right_scores or not left_scores: |
|
||||||
return False # Default: no flip if insufficient data |
|
||||||
|
|
||||||
right_mean = sum(right_scores) / len(right_scores) |
|
||||||
left_mean = sum(left_scores) / len(left_scores) |
|
||||||
|
|
||||||
# Flip if right parties have lower mean (they're on the left) |
|
||||||
return right_mean < left_mean |
|
||||||
|
|
||||||
|
|
||||||
def get_fallback_labels() -> Tuple[str, str]: |
|
||||||
"""Get fallback labels for x and y axes (components 1 and 2). |
|
||||||
|
|
||||||
Returns: |
|
||||||
Tuple of (x_label, y_label) |
|
||||||
""" |
|
||||||
return (get_svd_label(1), get_svd_label(2)) |
|
||||||
@ -1,6 +0,0 @@ |
|||||||
[defaults] |
|
||||||
inventory = inventory.ini |
|
||||||
remote_user = webapps |
|
||||||
|
|
||||||
[ssh_connection] |
|
||||||
ssh_args = -o ForwardAgent=yes -o ControlMaster=auto -o ControlPersist=60s |
|
||||||
@ -1 +1 @@ |
|||||||
sgeboers.nl ansible_user=webapps |
motief.sgeboers.nl |
||||||
|
|||||||
|
|
@ -1,787 +0,0 @@ |
|||||||
# Axis Classification Implementation Plan |
|
||||||
|
|
||||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. |
|
||||||
|
|
||||||
**Goal:** Add `analysis/axis_classifier.py` that dynamically labels the political compass axes by correlating per-party PCA positions against a party ideology reference CSV, replacing hardcoded "Links–Rechts" / "Progressief–Conservatief" labels. |
|
||||||
|
|
||||||
**Architecture:** A new pure module `classify_axes()` reads two static CSVs (`data/party_ideologies.csv`, `data/coalition_membership.csv`) and enriches the `axes` dict returned by `compute_2d_axes`. `load_positions()` in `explorer.py` calls it after PCA; the compass and trajectories renderers use the resulting `x_label`/`y_label` keys instead of hardcoded strings. CSVs are committed to git and baked into the Docker image. |
|
||||||
|
|
||||||
**Tech Stack:** Python stdlib (`pathlib`, `csv`-via-manual-parse), NumPy (already present), Streamlit (already present). No new runtime dependencies. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## File Map |
|
||||||
|
|
||||||
| Action | Path | Responsibility | |
|
||||||
|---|---|---| |
|
||||||
| Create | `data/party_ideologies.csv` | Party left_right + progressive reference scores | |
|
||||||
| Create | `data/coalition_membership.csv` | Per-year coalition party membership | |
|
||||||
| Create | `analysis/axis_classifier.py` | `classify_axes()` — correlate positions against reference | |
|
||||||
| Modify | `tests/test_political_compass.py` | Add 3 tests for classifier behaviour | |
|
||||||
| Modify | `explorer.py:194-209` | Call `classify_axes` inside `load_positions` | |
|
||||||
| Modify | `explorer.py:927-928` | Dynamic labels in party-level scatter | |
|
||||||
| Modify | `explorer.py:946` | Dynamic labels in MP-level scatter | |
|
||||||
| Modify | `explorer.py:1050` | Accept axis_def from `load_positions` in trajectories tab | |
|
||||||
| Modify | `explorer.py:1120-1121` | Dynamic titles in trajectories chart | |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### Task 1: Write the three failing tests |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `tests/test_political_compass.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Open `tests/test_political_compass.py` and append the three test functions below** |
|
||||||
|
|
||||||
Add this block at the end of the file: |
|
||||||
|
|
||||||
```python |
|
||||||
# --------------------------------------------------------------------------- |
|
||||||
# Tests for analysis.axis_classifier |
|
||||||
# --------------------------------------------------------------------------- |
|
||||||
|
|
||||||
import importlib |
|
||||||
|
|
||||||
|
|
||||||
def _fresh_classifier(monkeypatch): |
|
||||||
"""Import axis_classifier with cleared module-level caches.""" |
|
||||||
import analysis.axis_classifier as _cls |
|
||||||
monkeypatch.setattr(_cls, "_ideology_cache", None) |
|
||||||
monkeypatch.setattr(_cls, "_coalition_cache", None) |
|
||||||
return _cls |
|
||||||
|
|
||||||
|
|
||||||
def test_axis_label_left_right(tmp_path, monkeypatch): |
|
||||||
"""Positions that closely correlate with left_right scores → label 'Links–Rechts'.""" |
|
||||||
_cls = _fresh_classifier(monkeypatch) |
|
||||||
|
|
||||||
(tmp_path / "party_ideologies.csv").write_text( |
|
||||||
"party,left_right,progressive\n" |
|
||||||
"VVD,0.65,0.10\n" |
|
||||||
"PvdA,-0.70,0.75\n" |
|
||||||
"SP,-0.90,0.50\n" |
|
||||||
"PVV,0.90,-0.50\n" |
|
||||||
"D66,-0.10,0.85\n" |
|
||||||
"CDA,0.25,-0.45\n" |
|
||||||
) |
|
||||||
(tmp_path / "coalition_membership.csv").write_text("window_id,party\n") |
|
||||||
|
|
||||||
# X values are the party's left_right scores — perfect correlation |
|
||||||
positions_by_window = { |
|
||||||
"2022": { |
|
||||||
"VVD": (0.65, 0.10), |
|
||||||
"PvdA": (-0.70, 0.20), |
|
||||||
"SP": (-0.90, 0.30), |
|
||||||
"PVV": (0.90, -0.10), |
|
||||||
"D66": (-0.10, 0.40), |
|
||||||
"CDA": (0.25, -0.20), |
|
||||||
} |
|
||||||
} |
|
||||||
axes = {"x_axis": None, "y_axis": None, "method": "pca"} |
|
||||||
|
|
||||||
result = _cls.classify_axes( |
|
||||||
positions_by_window, axes, str(tmp_path / "motions.db") |
|
||||||
) |
|
||||||
|
|
||||||
assert result["x_label"] == "Links\u2013Rechts" |
|
||||||
assert result["x_quality"]["2022"] >= 0.65 |
|
||||||
|
|
||||||
|
|
||||||
def test_axis_label_coalition_dominant(tmp_path, monkeypatch): |
|
||||||
"""Positions that match coalition pattern but NOT left-right → 'Coalitie–Oppositie'.""" |
|
||||||
_cls = _fresh_classifier(monkeypatch) |
|
||||||
|
|
||||||
(tmp_path / "party_ideologies.csv").write_text( |
|
||||||
"party,left_right,progressive\n" |
|
||||||
"VVD,0.65,0.10\n" |
|
||||||
"PvdA,-0.70,0.75\n" |
|
||||||
"SP,-0.90,0.50\n" |
|
||||||
"PVV,0.90,-0.50\n" |
|
||||||
"D66,-0.10,0.85\n" |
|
||||||
"CDA,0.25,-0.45\n" |
|
||||||
) |
|
||||||
# 2016: Rutte II coalition = VVD + PvdA |
|
||||||
(tmp_path / "coalition_membership.csv").write_text( |
|
||||||
"window_id,party\n" |
|
||||||
"2016,VVD\n" |
|
||||||
"2016,PvdA\n" |
|
||||||
) |
|
||||||
|
|
||||||
# Coalition parties (VVD + PvdA) at x ≈ +1, opposition at x ≈ -1. |
|
||||||
# VVD (right) and PvdA (left) are both near +1 → low left_right correlation |
|
||||||
# but high coalition correlation. |
|
||||||
positions_by_window = { |
|
||||||
"2016": { |
|
||||||
"VVD": (0.95, 0.10), |
|
||||||
"PvdA": (0.90, 0.20), |
|
||||||
"SP": (-0.85, 0.30), |
|
||||||
"PVV": (-0.95, -0.10), |
|
||||||
"D66": (-0.80, 0.40), |
|
||||||
"CDA": (-0.75, -0.20), |
|
||||||
} |
|
||||||
} |
|
||||||
axes = {"x_axis": None, "y_axis": None, "method": "pca"} |
|
||||||
|
|
||||||
result = _cls.classify_axes( |
|
||||||
positions_by_window, axes, str(tmp_path / "motions.db") |
|
||||||
) |
|
||||||
|
|
||||||
assert result["x_label"] == "Coalitie\u2013Oppositie" |
|
||||||
assert "coalitie" in result["x_interpretation"]["2016"].lower() |
|
||||||
|
|
||||||
|
|
||||||
def test_axis_classifier_missing_csv(tmp_path, monkeypatch): |
|
||||||
"""Missing party_ideologies.csv → returns axes dict unchanged, no exception.""" |
|
||||||
_cls = _fresh_classifier(monkeypatch) |
|
||||||
|
|
||||||
# No CSVs written — directory exists but files do not |
|
||||||
positions_by_window = {"2022": {"VVD": (1.0, 0.5), "PvdA": (-1.0, 0.3)}} |
|
||||||
axes = {"x_axis": None, "y_axis": None, "method": "pca"} |
|
||||||
|
|
||||||
result = _cls.classify_axes( |
|
||||||
positions_by_window, axes, str(tmp_path / "motions.db") |
|
||||||
) |
|
||||||
|
|
||||||
# Must not crash and must return the original axes dict unchanged |
|
||||||
assert result is axes |
|
||||||
assert "x_label" not in result |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run the tests to confirm they fail (module doesn't exist yet)** |
|
||||||
|
|
||||||
```bash |
|
||||||
uv run pytest tests/test_political_compass.py::test_axis_label_left_right tests/test_political_compass.py::test_axis_label_coalition_dominant tests/test_political_compass.py::test_axis_classifier_missing_csv -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: 3 failures like `ModuleNotFoundError: No module named 'analysis.axis_classifier'` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### Task 2: Create the reference data files |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Create: `data/party_ideologies.csv` |
|
||||||
- Create: `data/coalition_membership.csv` |
|
||||||
|
|
||||||
- [ ] **Step 1: Create `data/party_ideologies.csv`** |
|
||||||
|
|
||||||
``` |
|
||||||
party,left_right,progressive |
|
||||||
VVD,0.65,0.10 |
|
||||||
PvdA,-0.70,0.75 |
|
||||||
SP,-0.90,0.50 |
|
||||||
CDA,0.25,-0.45 |
|
||||||
D66,-0.10,0.85 |
|
||||||
GroenLinks,-0.70,0.90 |
|
||||||
GL,-0.70,0.90 |
|
||||||
GroenLinks-PvdA,-0.70,0.82 |
|
||||||
ChristenUnie,0.10,-0.55 |
|
||||||
SGP,0.35,-0.95 |
|
||||||
PVV,0.90,-0.50 |
|
||||||
DENK,-0.40,0.55 |
|
||||||
50Plus,-0.05,-0.10 |
|
||||||
FVD,0.90,-0.75 |
|
||||||
PvdD,-0.60,0.85 |
|
||||||
Volt,-0.20,0.80 |
|
||||||
JA21,0.70,-0.30 |
|
||||||
BBB,0.50,-0.35 |
|
||||||
NSC,0.20,-0.20 |
|
||||||
Nieuw Sociaal Contract,0.20,-0.20 |
|
||||||
BVNL,0.85,-0.55 |
|
||||||
Bij1,-0.90,0.90 |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Create `data/coalition_membership.csv`** |
|
||||||
|
|
||||||
``` |
|
||||||
window_id,party |
|
||||||
2012,VVD |
|
||||||
2012,PvdA |
|
||||||
2013,VVD |
|
||||||
2013,PvdA |
|
||||||
2014,VVD |
|
||||||
2014,PvdA |
|
||||||
2015,VVD |
|
||||||
2015,PvdA |
|
||||||
2016,VVD |
|
||||||
2016,PvdA |
|
||||||
2017,VVD |
|
||||||
2017,CDA |
|
||||||
2017,D66 |
|
||||||
2017,ChristenUnie |
|
||||||
2018,VVD |
|
||||||
2018,CDA |
|
||||||
2018,D66 |
|
||||||
2018,ChristenUnie |
|
||||||
2019,VVD |
|
||||||
2019,CDA |
|
||||||
2019,D66 |
|
||||||
2019,ChristenUnie |
|
||||||
2020,VVD |
|
||||||
2020,CDA |
|
||||||
2020,D66 |
|
||||||
2020,ChristenUnie |
|
||||||
2021,VVD |
|
||||||
2021,CDA |
|
||||||
2021,D66 |
|
||||||
2021,ChristenUnie |
|
||||||
2022,VVD |
|
||||||
2022,D66 |
|
||||||
2022,CDA |
|
||||||
2022,ChristenUnie |
|
||||||
2023,VVD |
|
||||||
2023,D66 |
|
||||||
2023,CDA |
|
||||||
2023,ChristenUnie |
|
||||||
2024,PVV |
|
||||||
2024,VVD |
|
||||||
2024,NSC |
|
||||||
2024,BBB |
|
||||||
2025,PVV |
|
||||||
2025,VVD |
|
||||||
2025,NSC |
|
||||||
2025,BBB |
|
||||||
2026,PVV |
|
||||||
2026,VVD |
|
||||||
2026,NSC |
|
||||||
2026,BBB |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 3: Verify the files are NOT excluded by .gitignore** |
|
||||||
|
|
||||||
```bash |
|
||||||
git check-ignore -v data/party_ideologies.csv data/coalition_membership.csv |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: no output (files are not ignored — `.gitignore` only excludes `data/*.db`, `data/*.bak`, `data/*.json`) |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### Task 3: Implement `analysis/axis_classifier.py` |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Create: `analysis/axis_classifier.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Create the file with this full implementation** |
|
||||||
|
|
||||||
```python |
|
||||||
"""Axis classifier: correlate per-party PCA positions against ideology reference data |
|
||||||
to assign honest, dynamic labels to political compass axes. |
|
||||||
|
|
||||||
Public API: classify_axes(positions_by_window, axes, db_path) -> dict |
|
||||||
""" |
|
||||||
import logging |
|
||||||
from collections import Counter |
|
||||||
from pathlib import Path |
|
||||||
from typing import Dict, List, Optional, Tuple |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__) |
|
||||||
|
|
||||||
# Module-level caches — loaded once per process lifetime. |
|
||||||
_ideology_cache: Optional[Dict[str, Dict[str, float]]] = None |
|
||||||
_coalition_cache: Optional[Dict[str, set]] = None |
|
||||||
|
|
||||||
# Correlation threshold above which we consider an axis "explained" by a dimension. |
|
||||||
_THRESHOLD = 0.65 |
|
||||||
|
|
||||||
_LABELS = { |
|
||||||
"lr": "Links\u2013Rechts", |
|
||||||
"co": "Coalitie\u2013Oppositie", |
|
||||||
"pc": "Progressief\u2013Conservatief", |
|
||||||
"fallback_x": "Stempatroon As 1", |
|
||||||
"fallback_y": "Stempatroon As 2", |
|
||||||
} |
|
||||||
|
|
||||||
_INTERPRETATION_TEMPLATES = { |
|
||||||
"lr": "De {orientation} as weerspiegelt de klassieke links-rechts tegenstelling.", |
|
||||||
"co": ( |
|
||||||
"De {orientation} as weerspiegelt stemgedrag van coalitie- versus " |
|
||||||
"oppositiepartijen (r={r:.2f}). Links-rechts is minder dominant dit jaar." |
|
||||||
), |
|
||||||
"pc": "De {orientation} as weerspiegelt de progressief-conservatieve tegenstelling.", |
|
||||||
"fallback": ( |
|
||||||
"De {orientation} as weerspiegelt een empirisch stempatroon " |
|
||||||
"zonder duidelijke ideologische richting." |
|
||||||
), |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
def _load_ideology(csv_path: Path) -> Dict[str, Dict[str, float]]: |
|
||||||
"""Load party ideology scores from CSV. |
|
||||||
|
|
||||||
Returns {party_name: {"left_right": float, "progressive": float}}. |
|
||||||
Returns {} on any error (caller should treat empty as 'skip classification'). |
|
||||||
""" |
|
||||||
global _ideology_cache |
|
||||||
if _ideology_cache is not None: |
|
||||||
return _ideology_cache |
|
||||||
result: Dict[str, Dict[str, float]] = {} |
|
||||||
try: |
|
||||||
with open(csv_path, encoding="utf-8") as fh: |
|
||||||
lines = fh.read().splitlines() |
|
||||||
header = [h.strip() for h in lines[0].split(",")] |
|
||||||
lr_idx = header.index("left_right") |
|
||||||
pc_idx = header.index("progressive") |
|
||||||
for line in lines[1:]: |
|
||||||
if not line.strip(): |
|
||||||
continue |
|
||||||
parts = [p.strip() for p in line.split(",")] |
|
||||||
if len(parts) <= max(lr_idx, pc_idx): |
|
||||||
continue |
|
||||||
result[parts[0]] = { |
|
||||||
"left_right": float(parts[lr_idx]), |
|
||||||
"progressive": float(parts[pc_idx]), |
|
||||||
} |
|
||||||
except FileNotFoundError: |
|
||||||
_logger.warning("party_ideologies.csv not found at %s — axis labels will be generic", csv_path) |
|
||||||
return {} |
|
||||||
except Exception as exc: |
|
||||||
_logger.warning("Failed to load party_ideologies.csv: %s", exc) |
|
||||||
return {} |
|
||||||
_ideology_cache = result |
|
||||||
return result |
|
||||||
|
|
||||||
|
|
||||||
def _load_coalition(csv_path: Path) -> Dict[str, set]: |
|
||||||
"""Load coalition membership from CSV. |
|
||||||
|
|
||||||
Returns {window_id: set_of_party_names}. |
|
||||||
Returns {} on any error (coalition dimension will be skipped). |
|
||||||
""" |
|
||||||
global _coalition_cache |
|
||||||
if _coalition_cache is not None: |
|
||||||
return _coalition_cache |
|
||||||
result: Dict[str, set] = {} |
|
||||||
try: |
|
||||||
with open(csv_path, encoding="utf-8") as fh: |
|
||||||
lines = fh.read().splitlines() |
|
||||||
for line in lines[1:]: |
|
||||||
if not line.strip(): |
|
||||||
continue |
|
||||||
parts = [p.strip() for p in line.split(",")] |
|
||||||
if len(parts) < 2: |
|
||||||
continue |
|
||||||
wid, party = parts[0], parts[1] |
|
||||||
result.setdefault(wid, set()).add(party) |
|
||||||
except FileNotFoundError: |
|
||||||
_logger.warning( |
|
||||||
"coalition_membership.csv not found at %s — coalition axis detection disabled", csv_path |
|
||||||
) |
|
||||||
return {} |
|
||||||
except Exception as exc: |
|
||||||
_logger.warning("Failed to load coalition_membership.csv: %s", exc) |
|
||||||
return {} |
|
||||||
_coalition_cache = result |
|
||||||
return result |
|
||||||
|
|
||||||
|
|
||||||
def _window_year(window_id: str) -> Optional[str]: |
|
||||||
"""Extract year string from window_id. |
|
||||||
|
|
||||||
Returns None for 'current_parliament'. |
|
||||||
'2016' → '2016', '2016-Q3' → '2016'. |
|
||||||
""" |
|
||||||
if window_id == "current_parliament": |
|
||||||
return None |
|
||||||
return window_id.split("-")[0] |
|
||||||
|
|
||||||
|
|
||||||
def _pearsonr(x: List[float], y: List[float]) -> float: |
|
||||||
"""Pearson r; returns 0.0 for degenerate input (< 3 points or zero variance).""" |
|
||||||
if len(x) < 3: |
|
||||||
return 0.0 |
|
||||||
xa = np.array(x, dtype=float) |
|
||||||
ya = np.array(y, dtype=float) |
|
||||||
if xa.std() < 1e-12 or ya.std() < 1e-12: |
|
||||||
return 0.0 |
|
||||||
return float(np.corrcoef(xa, ya)[0, 1]) |
|
||||||
|
|
||||||
|
|
||||||
def _assign_label( |
|
||||||
r_lr: float, |
|
||||||
r_co: float, |
|
||||||
r_pc: float, |
|
||||||
axis: str, |
|
||||||
) -> Tuple[str, str, float]: |
|
||||||
"""Assign label, interpretation and quality score for one axis. |
|
||||||
|
|
||||||
Priority: left-right > coalition > progressive > fallback. |
|
||||||
Returns (label, interpretation_string, quality_score). |
|
||||||
""" |
|
||||||
orientation = "horizontale" if axis == "x" else "verticale" |
|
||||||
fallback_label = _LABELS["fallback_x"] if axis == "x" else _LABELS["fallback_y"] |
|
||||||
quality = max(abs(r_lr), abs(r_co), abs(r_pc)) |
|
||||||
|
|
||||||
if abs(r_lr) >= _THRESHOLD: |
|
||||||
return ( |
|
||||||
_LABELS["lr"], |
|
||||||
_INTERPRETATION_TEMPLATES["lr"].format(orientation=orientation), |
|
||||||
quality, |
|
||||||
) |
|
||||||
if abs(r_co) >= _THRESHOLD: |
|
||||||
return ( |
|
||||||
_LABELS["co"], |
|
||||||
_INTERPRETATION_TEMPLATES["co"].format(orientation=orientation, r=r_co), |
|
||||||
quality, |
|
||||||
) |
|
||||||
if abs(r_pc) >= _THRESHOLD: |
|
||||||
return ( |
|
||||||
_LABELS["pc"], |
|
||||||
_INTERPRETATION_TEMPLATES["pc"].format(orientation=orientation), |
|
||||||
quality, |
|
||||||
) |
|
||||||
return ( |
|
||||||
fallback_label, |
|
||||||
_INTERPRETATION_TEMPLATES["fallback"].format(orientation=orientation), |
|
||||||
quality, |
|
||||||
) |
|
||||||
|
|
||||||
|
|
||||||
def classify_axes( |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], |
|
||||||
axes: dict, |
|
||||||
db_path: str, |
|
||||||
) -> dict: |
|
||||||
"""Classify compass axes by correlating per-party positions against ideology reference data. |
|
||||||
|
|
||||||
Enriches ``axes`` with: |
|
||||||
x_label, y_label — global label (modal across annual windows) |
|
||||||
x_quality, y_quality — {window_id: float} max |r| for each window |
|
||||||
x_interpretation — {window_id: str} Dutch explanation per window |
|
||||||
y_interpretation — {window_id: str} Dutch explanation per window |
|
||||||
|
|
||||||
Returns the original ``axes`` dict unchanged if reference data is unavailable. |
|
||||||
""" |
|
||||||
data_dir = Path(db_path).parent |
|
||||||
ideology = _load_ideology(data_dir / "party_ideologies.csv") |
|
||||||
if not ideology: |
|
||||||
return axes # no reference data — preserve existing behaviour |
|
||||||
|
|
||||||
coalition = _load_coalition(data_dir / "coalition_membership.csv") |
|
||||||
|
|
||||||
x_quality: Dict[str, float] = {} |
|
||||||
y_quality: Dict[str, float] = {} |
|
||||||
x_interpretation: Dict[str, str] = {} |
|
||||||
y_interpretation: Dict[str, str] = {} |
|
||||||
annual_x_labels: List[str] = [] |
|
||||||
annual_y_labels: List[str] = [] |
|
||||||
|
|
||||||
for wid, pos_dict in positions_by_window.items(): |
|
||||||
year = _window_year(wid) |
|
||||||
is_current = wid == "current_parliament" |
|
||||||
is_annual = not is_current and "-" not in wid # e.g. "2016" not "2016-Q3" |
|
||||||
|
|
||||||
# Only use parties present in both the positions and the ideology reference. |
|
||||||
parties = [p for p in pos_dict if p in ideology] |
|
||||||
if len(parties) < 5: |
|
||||||
_logger.debug( |
|
||||||
"Skipping axis classification for %s: only %d reference parties (need 5)", |
|
||||||
wid, |
|
||||||
len(parties), |
|
||||||
) |
|
||||||
continue |
|
||||||
|
|
||||||
party_x = [pos_dict[p][0] for p in parties] |
|
||||||
party_y = [pos_dict[p][1] for p in parties] |
|
||||||
ref_lr = [ideology[p]["left_right"] for p in parties] |
|
||||||
ref_pc = [ideology[p]["progressive"] for p in parties] |
|
||||||
|
|
||||||
# Coalition dummy: +1 if in government that year, -1 otherwise. |
|
||||||
# current_parliament and windows with no coalition data use a neutral vector. |
|
||||||
if year and coalition and year in coalition: |
|
||||||
gov_set = coalition[year] |
|
||||||
ref_co = [1.0 if p in gov_set else -1.0 for p in parties] |
|
||||||
else: |
|
||||||
ref_co = [0.0] * len(parties) # neutral — will never exceed threshold |
|
||||||
|
|
||||||
r_lr_x = _pearsonr(party_x, ref_lr) |
|
||||||
r_co_x = _pearsonr(party_x, ref_co) |
|
||||||
r_pc_x = _pearsonr(party_x, ref_pc) |
|
||||||
x_lbl, x_int, x_q = _assign_label(r_lr_x, r_co_x, r_pc_x, "x") |
|
||||||
|
|
||||||
r_lr_y = _pearsonr(party_y, ref_lr) |
|
||||||
r_co_y = _pearsonr(party_y, ref_co) |
|
||||||
r_pc_y = _pearsonr(party_y, ref_pc) |
|
||||||
y_lbl, y_int, y_q = _assign_label(r_lr_y, r_co_y, r_pc_y, "y") |
|
||||||
|
|
||||||
x_quality[wid] = x_q |
|
||||||
y_quality[wid] = y_q |
|
||||||
x_interpretation[wid] = x_int |
|
||||||
y_interpretation[wid] = y_int |
|
||||||
|
|
||||||
# Only annual windows vote on the global label (not quarterly, not current_parliament). |
|
||||||
if is_annual: |
|
||||||
annual_x_labels.append(x_lbl) |
|
||||||
annual_y_labels.append(y_lbl) |
|
||||||
|
|
||||||
def _modal(labels: List[str], fallback: str) -> str: |
|
||||||
if not labels: |
|
||||||
return fallback |
|
||||||
return Counter(labels).most_common(1)[0][0] |
|
||||||
|
|
||||||
enriched = dict(axes) |
|
||||||
enriched["x_label"] = _modal(annual_x_labels, "Links\u2013Rechts") |
|
||||||
enriched["y_label"] = _modal(annual_y_labels, "Progressief\u2013Conservatief") |
|
||||||
enriched["x_quality"] = x_quality |
|
||||||
enriched["y_quality"] = y_quality |
|
||||||
enriched["x_interpretation"] = x_interpretation |
|
||||||
enriched["y_interpretation"] = y_interpretation |
|
||||||
return enriched |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run the three new tests** |
|
||||||
|
|
||||||
```bash |
|
||||||
uv run pytest tests/test_political_compass.py::test_axis_label_left_right tests/test_political_compass.py::test_axis_label_coalition_dominant tests/test_political_compass.py::test_axis_classifier_missing_csv -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all 3 PASS |
|
||||||
|
|
||||||
- [ ] **Step 3: Run the full test suite to confirm no regressions** |
|
||||||
|
|
||||||
```bash |
|
||||||
uv run pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all tests PASS (5 original + 3 new = 8 total) |
|
||||||
|
|
||||||
- [ ] **Step 4: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add data/party_ideologies.csv data/coalition_membership.csv analysis/axis_classifier.py tests/test_political_compass.py |
|
||||||
git commit -m "feat: add axis classifier with party ideology reference data |
|
||||||
|
|
||||||
classify_axes() correlates per-party PCA positions against party_ideologies.csv |
|
||||||
to assign honest dynamic labels (Links-Rechts, Coalitie-Oppositie, etc.) |
|
||||||
instead of always assuming the first PCA axis is left-right." |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### Task 4: Wire classify_axes into load_positions |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `explorer.py:194-209` |
|
||||||
|
|
||||||
- [ ] **Step 1: In `load_positions()`, add the classify_axes call after `compute_2d_axes` returns** |
|
||||||
|
|
||||||
Find this block (lines 194–209): |
|
||||||
|
|
||||||
```python |
|
||||||
positions_by_window, axis_def = compute_2d_axes( |
|
||||||
db_path, |
|
||||||
window_ids=all_available, |
|
||||||
method="pca", |
|
||||||
pca_residual=True, |
|
||||||
normalize_vectors=True, |
|
||||||
) |
|
||||||
|
|
||||||
# Filter displayed windows by window_size AFTER PCA computation. |
|
||||||
if window_size == "annual": |
|
||||||
``` |
|
||||||
|
|
||||||
Replace with: |
|
||||||
|
|
||||||
```python |
|
||||||
positions_by_window, axis_def = compute_2d_axes( |
|
||||||
db_path, |
|
||||||
window_ids=all_available, |
|
||||||
method="pca", |
|
||||||
pca_residual=True, |
|
||||||
normalize_vectors=True, |
|
||||||
) |
|
||||||
|
|
||||||
try: |
|
||||||
from analysis.axis_classifier import classify_axes |
|
||||||
axis_def = classify_axes(positions_by_window, axis_def, db_path) |
|
||||||
except Exception: |
|
||||||
import logging |
|
||||||
logging.getLogger(__name__).exception("classify_axes failed; using generic axis labels") |
|
||||||
|
|
||||||
# Filter displayed windows by window_size AFTER PCA computation. |
|
||||||
if window_size == "annual": |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run the full test suite** |
|
||||||
|
|
||||||
```bash |
|
||||||
uv run pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all 8 tests PASS |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### Task 5: Use dynamic labels in the compass scatter plots |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `explorer.py:927-928` and `explorer.py:946` |
|
||||||
|
|
||||||
The `axis_def` variable is already in scope in `build_compass_tab` (it's returned by `load_positions` at line 817). |
|
||||||
|
|
||||||
- [ ] **Step 1: Add helper variables just before the first `px.scatter` call** |
|
||||||
|
|
||||||
Find the line `title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",` (around line 925) and locate the function `build_compass_tab`. Near the top of that function (just after `axis_def` becomes available at line 817), find a convenient spot before the first scatter plot is created. |
|
||||||
|
|
||||||
Look for the block that starts building the figure (the `if level == "Partijen":` branch). Add the two helper variables right before that `if`: |
|
||||||
|
|
||||||
```python |
|
||||||
_x_label = axis_def.get("x_label", "Links\u2013Rechts") |
|
||||||
_y_label = axis_def.get("y_label", "Progressief\u2013Conservatief") |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Replace the hardcoded label in the party-level scatter (around line 927–928)** |
|
||||||
|
|
||||||
Find: |
|
||||||
```python |
|
||||||
labels={ |
|
||||||
"x": "Links \u2190 \u2192 Rechts", |
|
||||||
"y": "Progressief / Conservatief", |
|
||||||
"n": "Kamerleden", |
|
||||||
}, |
|
||||||
``` |
|
||||||
|
|
||||||
Replace with: |
|
||||||
```python |
|
||||||
labels={ |
|
||||||
"x": _x_label, |
|
||||||
"y": _y_label, |
|
||||||
"n": "Kamerleden", |
|
||||||
}, |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 3: Replace the hardcoded label in the MP-level scatter (around line 946)** |
|
||||||
|
|
||||||
Find: |
|
||||||
```python |
|
||||||
labels={"x": "Links \u2190 \u2192 Rechts", "y": "Progressief / Conservatief"}, |
|
||||||
``` |
|
||||||
|
|
||||||
Replace with: |
|
||||||
```python |
|
||||||
labels={"x": _x_label, "y": _y_label}, |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 4: Add the per-year interpretation caption after the chart is rendered** |
|
||||||
|
|
||||||
Find (around line 955–959): |
|
||||||
```python |
|
||||||
_add_y_direction_annotations(fig) |
|
||||||
|
|
||||||
with col1: |
|
||||||
st.plotly_chart(fig, use_container_width=True) |
|
||||||
``` |
|
||||||
|
|
||||||
Replace with: |
|
||||||
```python |
|
||||||
_add_y_direction_annotations(fig) |
|
||||||
|
|
||||||
with col1: |
|
||||||
st.plotly_chart(fig, use_container_width=True) |
|
||||||
_x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "") |
|
||||||
_y_interp = axis_def.get("y_interpretation", {}).get(window_idx, "") |
|
||||||
if _x_interp and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD: |
|
||||||
st.caption(_x_interp) |
|
||||||
if _y_interp and axis_def.get("y_quality", {}).get(window_idx, 1.0) < _THRESHOLD: |
|
||||||
st.caption(_y_interp) |
|
||||||
``` |
|
||||||
|
|
||||||
Also add the constant `_THRESHOLD = 0.65` near the top of `explorer.py`, with the other module-level constants (after the imports). Search for an existing `_SPARSE_YEARS` or similar constant to find the right location. If no suitable spot exists, add it right before `build_compass_tab`. |
|
||||||
|
|
||||||
- [ ] **Step 5: Run the full test suite** |
|
||||||
|
|
||||||
```bash |
|
||||||
uv run pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all 8 tests PASS |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### Task 6: Update the trajectories chart labels |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `explorer.py:1050` and `explorer.py:1120-1121` |
|
||||||
|
|
||||||
- [ ] **Step 1: In `build_trajectories_tab`, capture `axis_def` from `load_positions`** |
|
||||||
|
|
||||||
Find (around line 1050): |
|
||||||
```python |
|
||||||
positions_by_window, _ = load_positions(db_path, window_size) |
|
||||||
``` |
|
||||||
|
|
||||||
Replace with: |
|
||||||
```python |
|
||||||
positions_by_window, axis_def = load_positions(db_path, window_size) |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Replace hardcoded axis titles in the trajectories chart (around line 1120–1121)** |
|
||||||
|
|
||||||
Find: |
|
||||||
```python |
|
||||||
xaxis_title="Links \u2190 \u2192 Rechts", |
|
||||||
yaxis_title="Progressief / Conservatief", |
|
||||||
``` |
|
||||||
|
|
||||||
Replace with: |
|
||||||
```python |
|
||||||
xaxis_title=axis_def.get("x_label", "Links\u2013Rechts"), |
|
||||||
yaxis_title=axis_def.get("y_label", "Progressief\u2013Conservatief"), |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 3: Run the full test suite one final time** |
|
||||||
|
|
||||||
```bash |
|
||||||
uv run pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all 8 tests PASS |
|
||||||
|
|
||||||
- [ ] **Step 4: Final commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add explorer.py |
|
||||||
git commit -m "feat: use dynamic axis labels in compass and trajectories UI |
|
||||||
|
|
||||||
Replace hardcoded 'Links-Rechts' / 'Progressief-Conservatief' axis labels |
|
||||||
with values from classify_axes(). Add per-year interpretation caption when |
|
||||||
axis quality score is below the 0.65 correlation threshold." |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Self-Review |
|
||||||
|
|
||||||
### Spec coverage check |
|
||||||
|
|
||||||
| Spec requirement | Covered by | |
|
||||||
|---|---| |
|
||||||
| `analysis/axis_classifier.py` with `classify_axes()` | Task 3 | |
|
||||||
| CSV paths derived from `Path(db_path).parent` | Task 3 (line in implementation) | |
|
||||||
| Pearson r for left_right, progressive, coalition dimensions | Task 3 (`_pearsonr`, `_assign_label`) | |
|
||||||
| Priority: lr > coalition > progressive > fallback | Task 3 (`_assign_label`) | |
|
||||||
| Global label = modal across annual windows | Task 3 (`_modal`, `is_annual` flag) | |
|
||||||
| `current_parliament` excluded from modal vote | Task 3 (`is_current`, `is_annual` check) | |
|
||||||
| Quarterly windows excluded from modal vote | Task 3 (`is_annual` = no `-` in wid) | |
|
||||||
| Backward-compatible when CSVs missing | Task 3 (`_load_ideology` returns `{}`; `classify_axes` returns original `axes`) | |
|
||||||
| `data/party_ideologies.csv` committed to git | Task 2 | |
|
||||||
| `data/coalition_membership.csv` committed to git | Task 2 | |
|
||||||
| `load_positions` calls `classify_axes` | Task 4 | |
|
||||||
| Dynamic x/y labels in compass scatter | Task 5 Steps 2–3 | |
|
||||||
| Per-year caption when quality < 0.65 | Task 5 Step 4 | |
|
||||||
| Dynamic labels in trajectories chart | Task 6 | |
|
||||||
| 3 tests: left_right, coalition, missing CSV | Task 1 | |
|
||||||
|
|
||||||
All spec requirements covered. No gaps. |
|
||||||
|
|
||||||
### Placeholder scan |
|
||||||
|
|
||||||
No TBDs, TODOs, or vague steps present. |
|
||||||
|
|
||||||
### Type consistency |
|
||||||
|
|
||||||
- `classify_axes` returns `dict` with keys `x_label` (str), `y_label` (str), `x_quality` (dict), `y_quality` (dict), `x_interpretation` (dict), `y_interpretation` (dict) — consistent across Tasks 3, 4, 5, 6. |
|
||||||
- `_THRESHOLD` is used in Task 5 Step 4; the constant is introduced in that same step. |
|
||||||
- `axis_def.get("x_label", "Links–Rechts")` matches the key name `"x_label"` set in Task 3. |
|
||||||
@ -1,895 +0,0 @@ |
|||||||
# Motion-Driven Axis Labeling Implementation Plan |
|
||||||
|
|
||||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. |
|
||||||
|
|
||||||
**Goal:** Replace static ideology-CSV axis labeling with motion-projection-based labeling, add axis swap when Y ends up as "Links–Rechts", and expose top motions per axis to the user. |
|
||||||
|
|
||||||
**Architecture:** `political_axis.py` exposes `global_mean` in the `axes` dict; `axis_classifier.py` gains motion-loading helpers and a keyword classifier as the primary label source (falling back to existing Pearson-r); `explorer.py` swaps axes when needed and renders a new expander showing the top motions. |
|
||||||
|
|
||||||
**Tech Stack:** Python, NumPy, DuckDB (stdlib only — no new deps), Streamlit, pytest |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## File Map |
|
||||||
|
|
||||||
| File | Change | |
|
||||||
|---|---| |
|
||||||
| `analysis/political_axis.py` | Add `axes["global_mean"] = global_mean` (one line) | |
|
||||||
| `analysis/axis_classifier.py` | Add `_KEYWORDS`, motion helpers, restructure `classify_axes` | |
|
||||||
| `explorer.py` | Add `_swap_axes`, `_should_swap_axes`, wire swap, add motion expander | |
|
||||||
| `tests/test_political_compass.py` | Add 5 new unit tests | |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 1: Expose `global_mean` from `compute_2d_axes` |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `analysis/political_axis.py` (line 362) |
|
||||||
|
|
||||||
- [ ] **Step 1: Write the failing test** |
|
||||||
|
|
||||||
Add this test at the bottom of `tests/test_political_compass.py`: |
|
||||||
|
|
||||||
```python |
|
||||||
def test_compute_2d_axes_exposes_global_mean(monkeypatch): |
|
||||||
"""axes dict returned by compute_2d_axes must contain 'global_mean'.""" |
|
||||||
fake_traj = types.SimpleNamespace() |
|
||||||
fake_traj._load_window_ids = lambda db: ["w1"] |
|
||||||
aligned = { |
|
||||||
"w1": { |
|
||||||
"Alice": np.array([1.0, 0.0, 0.0]), |
|
||||||
"Bob": np.array([-1.0, 0.5, 0.0]), |
|
||||||
} |
|
||||||
} |
|
||||||
fake_traj._load_mp_vectors_for_window = lambda db, w: aligned.get(w, {}) |
|
||||||
fake_traj._procrustes_align_windows = lambda x: aligned |
|
||||||
monkeypatch.setitem(sys.modules, "analysis.trajectory", fake_traj) |
|
||||||
|
|
||||||
from analysis.political_axis import compute_2d_axes |
|
||||||
|
|
||||||
_, axis_def = compute_2d_axes(db_path="dummy", window_ids=["w1"], method="pca") |
|
||||||
assert "global_mean" in axis_def |
|
||||||
assert isinstance(axis_def["global_mean"], np.ndarray) |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run test to verify it fails** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py::test_compute_2d_axes_exposes_global_mean -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: FAIL — `AssertionError: assert 'global_mean' in {…}` (key not yet present) |
|
||||||
|
|
||||||
- [ ] **Step 3: Add `global_mean` to axes dict in `political_axis.py`** |
|
||||||
|
|
||||||
In `analysis/political_axis.py`, the line at ~362 reads: |
|
||||||
```python |
|
||||||
global_mean = M.mean(axis=0) |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]] = { |
|
||||||
``` |
|
||||||
|
|
||||||
Add `axes["global_mean"] = global_mean` immediately after that assignment: |
|
||||||
```python |
|
||||||
global_mean = M.mean(axis=0) |
|
||||||
axes["global_mean"] = global_mean |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]] = { |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 4: Run test to verify it passes** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py::test_compute_2d_axes_exposes_global_mean -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: PASS |
|
||||||
|
|
||||||
- [ ] **Step 5: Run full test suite to confirm no regressions** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all previously passing tests still pass + new test passes. |
|
||||||
|
|
||||||
- [ ] **Step 6: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add analysis/political_axis.py tests/test_political_compass.py |
|
||||||
git commit -m "feat: expose global_mean in compute_2d_axes axes dict" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 2: Add keyword classifier helper `_classify_from_titles` |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `analysis/axis_classifier.py` |
|
||||||
- Test: `tests/test_political_compass.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Write the three failing tests** |
|
||||||
|
|
||||||
Add to `tests/test_political_compass.py`: |
|
||||||
|
|
||||||
```python |
|
||||||
def test_classify_from_titles_left_right(): |
|
||||||
"""Titles dominated by left-right keywords → 'Links–Rechts'.""" |
|
||||||
from analysis.axis_classifier import _classify_from_titles |
|
||||||
|
|
||||||
titles = [ |
|
||||||
"Motie over asielbeleid", |
|
||||||
"Motie over minimumloon verhoging", |
|
||||||
"Motie over vluchtelingen opvang", |
|
||||||
"Motie over belastingverlaging", |
|
||||||
"Motie over bijstandsuitkering", |
|
||||||
] |
|
||||||
label, confidence = _classify_from_titles(titles) |
|
||||||
assert label == "Links\u2013Rechts" |
|
||||||
assert confidence >= 0.4 |
|
||||||
|
|
||||||
|
|
||||||
def test_classify_from_titles_progressive(): |
|
||||||
"""Titles dominated by progressive/conservative keywords → 'Progressief–Conservatief'.""" |
|
||||||
from analysis.axis_classifier import _classify_from_titles |
|
||||||
|
|
||||||
titles = [ |
|
||||||
"Motie over klimaatdoelstellingen", |
|
||||||
"Motie over stikstofbeleid", |
|
||||||
"Motie over duurzame energie", |
|
||||||
"Motie over co2 uitstoot", |
|
||||||
"Motie over energietransitie", |
|
||||||
] |
|
||||||
label, confidence = _classify_from_titles(titles) |
|
||||||
assert label == "Progressief\u2013Conservatief" |
|
||||||
assert confidence >= 0.4 |
|
||||||
|
|
||||||
|
|
||||||
def test_classify_from_titles_low_confidence(): |
|
||||||
"""Mixed/irrelevant titles → None (fallback triggered).""" |
|
||||||
from analysis.axis_classifier import _classify_from_titles |
|
||||||
|
|
||||||
titles = [ |
|
||||||
"Motie over sportsubsidie", |
|
||||||
"Motie over bibliotheekregeling", |
|
||||||
"Motie over verkeersveiligheid", |
|
||||||
] |
|
||||||
label, confidence = _classify_from_titles(titles) |
|
||||||
assert label is None |
|
||||||
assert confidence < 0.4 |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run tests to verify they fail** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py::test_classify_from_titles_left_right tests/test_political_compass.py::test_classify_from_titles_progressive tests/test_political_compass.py::test_classify_from_titles_low_confidence -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: FAIL — `ImportError: cannot import name '_classify_from_titles'` |
|
||||||
|
|
||||||
- [ ] **Step 3: Add `_KEYWORDS` constant and `_classify_from_titles` to `axis_classifier.py`** |
|
||||||
|
|
||||||
Add after the `_INTERPRETATION_TEMPLATES` block (after line 42) and before `_load_ideology`: |
|
||||||
|
|
||||||
```python |
|
||||||
_KEYWORD_THRESHOLD = 0.4 |
|
||||||
|
|
||||||
_KEYWORDS: Dict[str, List[str]] = { |
|
||||||
"Links\u2013Rechts": [ |
|
||||||
# economic |
|
||||||
"belasting", |
|
||||||
"uitkering", |
|
||||||
"bijstand", |
|
||||||
"minimumloon", |
|
||||||
"cao", |
|
||||||
"vakbond", |
|
||||||
"bezuiniging", |
|
||||||
"privatisering", |
|
||||||
"subsidie", |
|
||||||
"pensioen", |
|
||||||
"aow", |
|
||||||
"zorg", |
|
||||||
# immigration |
|
||||||
"asiel", |
|
||||||
"asielaanvraag", |
|
||||||
"migratie", |
|
||||||
"vreemdeling", |
|
||||||
"vluchtelingen", |
|
||||||
"terugkeer", |
|
||||||
"grenzen", |
|
||||||
"opvang", |
|
||||||
"statushouder", |
|
||||||
], |
|
||||||
"Progressief\u2013Conservatief": [ |
|
||||||
# environment |
|
||||||
"klimaat", |
|
||||||
"stikstof", |
|
||||||
"duurzaam", |
|
||||||
"duurzaamheid", |
|
||||||
"co2", |
|
||||||
"energietransitie", |
|
||||||
"biodiversiteit", |
|
||||||
# social |
|
||||||
"euthanasie", |
|
||||||
"abortus", |
|
||||||
"lgbtq", |
|
||||||
"transgender", |
|
||||||
"diversiteit", |
|
||||||
"traditi", |
|
||||||
"gezin", |
|
||||||
"religie", |
|
||||||
"geloof", |
|
||||||
], |
|
||||||
"Nationaal\u2013Internationaal": [ |
|
||||||
"navo", |
|
||||||
"nato", |
|
||||||
"europees", |
|
||||||
"europese", |
|
||||||
" eu ", |
|
||||||
"verdrag", |
|
||||||
" vn ", |
|
||||||
"internationaal", |
|
||||||
], |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
def _classify_from_titles(titles: List[str]) -> Tuple[Optional[str], float]: |
|
||||||
"""Classify a list of motion titles into an axis category using keyword matching. |
|
||||||
|
|
||||||
Returns (category_label, confidence) where confidence = fraction of titles |
|
||||||
containing at least one keyword from the winning category. |
|
||||||
Returns (None, 0.0) if confidence is below _KEYWORD_THRESHOLD. |
|
||||||
""" |
|
||||||
if not titles: |
|
||||||
return None, 0.0 |
|
||||||
|
|
||||||
counts: Dict[str, int] = {cat: 0 for cat in _KEYWORDS} |
|
||||||
for title in titles: |
|
||||||
lower = title.lower() |
|
||||||
for cat, keywords in _KEYWORDS.items(): |
|
||||||
if any(kw in lower for kw in keywords): |
|
||||||
counts[cat] += 1 |
|
||||||
|
|
||||||
best_cat = max(counts, key=lambda c: counts[c]) |
|
||||||
best_count = counts[best_cat] |
|
||||||
confidence = best_count / len(titles) |
|
||||||
|
|
||||||
if confidence < _KEYWORD_THRESHOLD: |
|
||||||
return None, confidence |
|
||||||
|
|
||||||
return best_cat, confidence |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 4: Run the three tests to verify they pass** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py::test_classify_from_titles_left_right tests/test_political_compass.py::test_classify_from_titles_progressive tests/test_political_compass.py::test_classify_from_titles_low_confidence -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all 3 PASS |
|
||||||
|
|
||||||
- [ ] **Step 5: Run full suite to confirm no regressions** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 6: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add analysis/axis_classifier.py tests/test_political_compass.py |
|
||||||
git commit -m "feat: add _classify_from_titles keyword classifier to axis_classifier" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 3: Add motion-loading helpers to `axis_classifier.py` |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `analysis/axis_classifier.py` |
|
||||||
|
|
||||||
These helpers have DB dependencies so they don't get new unit tests here — they are exercised indirectly once `classify_axes` is wired up. Error handling is the main concern. |
|
||||||
|
|
||||||
- [ ] **Step 1: Add `import json` at top of `axis_classifier.py`** |
|
||||||
|
|
||||||
After `import numpy as np` (line 12), add: |
|
||||||
```python |
|
||||||
import json |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Add the four motion helpers after `_classify_from_titles`** |
|
||||||
|
|
||||||
```python |
|
||||||
def _load_motion_vectors(db_path: str, window_id: str) -> Dict[int, np.ndarray]: |
|
||||||
"""Load SVD motion vectors for a given window from DuckDB. |
|
||||||
|
|
||||||
Returns {motion_id: vector_array}. Returns {} on any error. |
|
||||||
""" |
|
||||||
try: |
|
||||||
import duckdb |
|
||||||
|
|
||||||
conn = duckdb.connect(db_path, read_only=True) |
|
||||||
rows = conn.execute( |
|
||||||
"SELECT entity_id, vector FROM svd_vectors " |
|
||||||
"WHERE entity_type = 'motion' AND window_id = ?", |
|
||||||
[window_id], |
|
||||||
).fetchall() |
|
||||||
conn.close() |
|
||||||
result = {} |
|
||||||
for entity_id, vector_raw in rows: |
|
||||||
try: |
|
||||||
mid = int(entity_id) |
|
||||||
vec = np.array(json.loads(vector_raw), dtype=float) |
|
||||||
result[mid] = vec |
|
||||||
except Exception: |
|
||||||
continue |
|
||||||
return result |
|
||||||
except Exception as exc: |
|
||||||
_logger.debug("Failed to load motion vectors for window %s: %s", window_id, exc) |
|
||||||
return {} |
|
||||||
|
|
||||||
|
|
||||||
def _project_motions( |
|
||||||
motion_vecs: Dict[int, np.ndarray], |
|
||||||
x_axis: np.ndarray, |
|
||||||
y_axis: np.ndarray, |
|
||||||
global_mean: np.ndarray, |
|
||||||
) -> Dict[int, Tuple[float, float]]: |
|
||||||
"""Project motion vectors onto the PCA axes after centering by global_mean. |
|
||||||
|
|
||||||
Returns {motion_id: (x_score, y_score)}. |
|
||||||
""" |
|
||||||
projections: Dict[int, Tuple[float, float]] = {} |
|
||||||
for mid, vec in motion_vecs.items(): |
|
||||||
try: |
|
||||||
centered = vec - global_mean |
|
||||||
x_score = float(np.dot(centered, x_axis)) |
|
||||||
y_score = float(np.dot(centered, y_axis)) |
|
||||||
projections[mid] = (x_score, y_score) |
|
||||||
except Exception: |
|
||||||
continue |
|
||||||
return projections |
|
||||||
|
|
||||||
|
|
||||||
def _top_motion_ids( |
|
||||||
projections: Dict[int, Tuple[float, float]], |
|
||||||
axis: str, |
|
||||||
n: int = 5, |
|
||||||
) -> Dict[str, List[int]]: |
|
||||||
"""Return the top-n motion IDs at each pole of the given axis. |
|
||||||
|
|
||||||
axis: 'x' or 'y' |
|
||||||
Returns {'+': [motion_ids], '-': [motion_ids]} (highest positive first, |
|
||||||
most negative first in the '-' list). |
|
||||||
""" |
|
||||||
idx = 0 if axis == "x" else 1 |
|
||||||
sorted_ids = sorted(projections, key=lambda mid: projections[mid][idx]) |
|
||||||
neg_ids = sorted_ids[:n] # most negative |
|
||||||
pos_ids = sorted_ids[-n:][::-1] # most positive |
|
||||||
return {"+": pos_ids, "-": neg_ids} |
|
||||||
|
|
||||||
|
|
||||||
def _fetch_motion_titles( |
|
||||||
db_path: str, |
|
||||||
motion_ids: List[int], |
|
||||||
) -> Dict[int, Tuple[str, str]]: |
|
||||||
"""Fetch (title, date) for a list of motion IDs from DuckDB. |
|
||||||
|
|
||||||
Returns {motion_id: (title, date_str)}. Missing IDs are omitted. |
|
||||||
Returns {} on any DB error. |
|
||||||
""" |
|
||||||
if not motion_ids: |
|
||||||
return {} |
|
||||||
try: |
|
||||||
import duckdb |
|
||||||
|
|
||||||
placeholders = ", ".join("?" * len(motion_ids)) |
|
||||||
conn = duckdb.connect(db_path, read_only=True) |
|
||||||
rows = conn.execute( |
|
||||||
f"SELECT id, title, date FROM motions WHERE id IN ({placeholders})", |
|
||||||
motion_ids, |
|
||||||
).fetchall() |
|
||||||
conn.close() |
|
||||||
return {int(row[0]): (str(row[1]), str(row[2])) for row in rows} |
|
||||||
except Exception as exc: |
|
||||||
_logger.debug("Failed to fetch motion titles: %s", exc) |
|
||||||
return {} |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 3: Run full test suite to confirm nothing broke** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all previously passing tests still pass. |
|
||||||
|
|
||||||
- [ ] **Step 4: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add analysis/axis_classifier.py |
|
||||||
git commit -m "feat: add motion-loading helpers to axis_classifier" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 4: Restructure `classify_axes` to use motion projection as primary |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `analysis/axis_classifier.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Replace the body of `classify_axes`** |
|
||||||
|
|
||||||
Replace the entire function (lines 180–269 in the current file) with the version below. |
|
||||||
Key changes from the old version: |
|
||||||
- Remove the `if not ideology: return axes` early return (motion path doesn't need ideology). |
|
||||||
- New early return only if BOTH motion path AND ideology path are unavailable. |
|
||||||
- Motion classification runs first per window; keyword result overrides Pearson-r if confident. |
|
||||||
- New accumulators: `x_top_motions`, `y_top_motions`, `x_label_confidence`, `y_label_confidence`. |
|
||||||
|
|
||||||
```python |
|
||||||
def classify_axes( |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], |
|
||||||
axes: dict, |
|
||||||
db_path: str, |
|
||||||
) -> dict: |
|
||||||
"""Classify compass axes using motion projection (primary) and ideology CSV (fallback). |
|
||||||
|
|
||||||
Motion projection path: |
|
||||||
- Requires axes["global_mean"], axes["x_axis"], axes["y_axis"]. |
|
||||||
- Loads motion SVD vectors per window, projects onto PCA axes, |
|
||||||
ranks top 5+5 motions, applies keyword classifier → label. |
|
||||||
|
|
||||||
Fallback path (unchanged): |
|
||||||
- Pearson-r against party_ideologies.csv (left_right, progressive). |
|
||||||
- Pearson-r against coalition_membership.csv dummy. |
|
||||||
|
|
||||||
Enriches axes with: |
|
||||||
x_label, y_label — global modal label across annual windows |
|
||||||
x_quality, y_quality — {window_id: float} max |r| |
|
||||||
x_interpretation — {window_id: str} |
|
||||||
y_interpretation — {window_id: str} |
|
||||||
x_top_motions, y_top_motions — {window_id: {'+': [(title, date), ...], '-': [...]}} |
|
||||||
x_label_confidence — {window_id: float} |
|
||||||
y_label_confidence — {window_id: float} |
|
||||||
""" |
|
||||||
data_dir = Path(db_path).parent |
|
||||||
ideology = _load_ideology(data_dir / "party_ideologies.csv") |
|
||||||
coalition = _load_coalition(data_dir / "coalition_membership.csv") |
|
||||||
|
|
||||||
# Determine whether motion projection is possible. |
|
||||||
global_mean = axes.get("global_mean") |
|
||||||
x_axis_arr = np.array(axes.get("x_axis", [])) |
|
||||||
y_axis_arr = np.array(axes.get("y_axis", [])) |
|
||||||
motion_path_available = ( |
|
||||||
global_mean is not None |
|
||||||
and x_axis_arr.ndim == 1 |
|
||||||
and x_axis_arr.size > 0 |
|
||||||
and y_axis_arr.size > 0 |
|
||||||
) |
|
||||||
|
|
||||||
if not ideology and not motion_path_available: |
|
||||||
return axes # nothing to classify with |
|
||||||
|
|
||||||
x_quality: Dict[str, float] = {} |
|
||||||
y_quality: Dict[str, float] = {} |
|
||||||
x_interpretation: Dict[str, str] = {} |
|
||||||
y_interpretation: Dict[str, str] = {} |
|
||||||
x_top_motions: Dict[str, Dict] = {} |
|
||||||
y_top_motions: Dict[str, Dict] = {} |
|
||||||
x_label_confidence: Dict[str, float] = {} |
|
||||||
y_label_confidence: Dict[str, float] = {} |
|
||||||
annual_x_labels: List[str] = [] |
|
||||||
annual_y_labels: List[str] = [] |
|
||||||
|
|
||||||
for wid, pos_dict in positions_by_window.items(): |
|
||||||
year = _window_year(wid) |
|
||||||
is_annual = wid != "current_parliament" and "-" not in wid |
|
||||||
|
|
||||||
# ── Ideology / coalition Pearson-r (unchanged logic) ────────────────── |
|
||||||
x_lbl_fallback: Optional[str] = None |
|
||||||
y_lbl_fallback: Optional[str] = None |
|
||||||
x_q = 0.0 |
|
||||||
y_q = 0.0 |
|
||||||
x_int = "" |
|
||||||
y_int = "" |
|
||||||
|
|
||||||
if ideology: |
|
||||||
parties = [p for p in pos_dict if p in ideology] |
|
||||||
if len(parties) >= 5: |
|
||||||
party_x = [pos_dict[p][0] for p in parties] |
|
||||||
party_y = [pos_dict[p][1] for p in parties] |
|
||||||
ref_lr = [ideology[p]["left_right"] for p in parties] |
|
||||||
ref_pc = [ideology[p]["progressive"] for p in parties] |
|
||||||
|
|
||||||
if year and coalition and year in coalition: |
|
||||||
gov_set = coalition[year] |
|
||||||
ref_co = [1.0 if p in gov_set else -1.0 for p in parties] |
|
||||||
else: |
|
||||||
ref_co = [0.0] * len(parties) |
|
||||||
|
|
||||||
r_lr_x = _pearsonr(party_x, ref_lr) |
|
||||||
r_co_x = _pearsonr(party_x, ref_co) |
|
||||||
r_pc_x = _pearsonr(party_x, ref_pc) |
|
||||||
x_lbl_fallback, x_int, x_q = _assign_label(r_lr_x, r_co_x, r_pc_x, "x") |
|
||||||
|
|
||||||
r_lr_y = _pearsonr(party_y, ref_lr) |
|
||||||
r_co_y = _pearsonr(party_y, ref_co) |
|
||||||
r_pc_y = _pearsonr(party_y, ref_pc) |
|
||||||
y_lbl_fallback, y_int, y_q = _assign_label(r_lr_y, r_co_y, r_pc_y, "y") |
|
||||||
|
|
||||||
# ── Motion projection (primary) ──────────────────────────────────────── |
|
||||||
x_lbl = x_lbl_fallback |
|
||||||
y_lbl = y_lbl_fallback |
|
||||||
x_conf = 0.0 |
|
||||||
y_conf = 0.0 |
|
||||||
x_tops: Dict[str, List] = {"+": [], "-": []} |
|
||||||
y_tops: Dict[str, List] = {"+": [], "-": []} |
|
||||||
|
|
||||||
if motion_path_available: |
|
||||||
motion_vecs = _load_motion_vectors(db_path, wid) |
|
||||||
if motion_vecs: |
|
||||||
projections = _project_motions(motion_vecs, x_axis_arr, y_axis_arr, global_mean) |
|
||||||
x_ids = _top_motion_ids(projections, "x", n=5) |
|
||||||
y_ids = _top_motion_ids(projections, "y", n=5) |
|
||||||
|
|
||||||
all_x_ids = x_ids["+"] + x_ids["-"] |
|
||||||
all_y_ids = y_ids["+"] + y_ids["-"] |
|
||||||
titles_map = _fetch_motion_titles(db_path, list(set(all_x_ids + all_y_ids))) |
|
||||||
|
|
||||||
x_title_list = [ |
|
||||||
titles_map[mid][0] for mid in all_x_ids if mid in titles_map |
|
||||||
] |
|
||||||
y_title_list = [ |
|
||||||
titles_map[mid][0] for mid in all_y_ids if mid in titles_map |
|
||||||
] |
|
||||||
|
|
||||||
x_kw_lbl, x_conf = _classify_from_titles(x_title_list) |
|
||||||
y_kw_lbl, y_conf = _classify_from_titles(y_title_list) |
|
||||||
|
|
||||||
if x_kw_lbl is not None: |
|
||||||
x_lbl = x_kw_lbl |
|
||||||
if y_kw_lbl is not None: |
|
||||||
y_lbl = y_kw_lbl |
|
||||||
|
|
||||||
# Build display lists: [(title, date), ...] |
|
||||||
for pole, ids in x_ids.items(): |
|
||||||
x_tops[pole] = [ |
|
||||||
titles_map[mid] for mid in ids if mid in titles_map |
|
||||||
] |
|
||||||
for pole, ids in y_ids.items(): |
|
||||||
y_tops[pole] = [ |
|
||||||
titles_map[mid] for mid in ids if mid in titles_map |
|
||||||
] |
|
||||||
|
|
||||||
# ── Final label resolution ──────────────────────────────────────────── |
|
||||||
# If both motion and ideology paths produced nothing, use generic fallback. |
|
||||||
if x_lbl is None: |
|
||||||
x_lbl = _LABELS["fallback_x"] |
|
||||||
x_int = _INTERPRETATION_TEMPLATES["fallback"].format(orientation="horizontale") |
|
||||||
if y_lbl is None: |
|
||||||
y_lbl = _LABELS["fallback_y"] |
|
||||||
y_int = _INTERPRETATION_TEMPLATES["fallback"].format(orientation="verticale") |
|
||||||
|
|
||||||
x_quality[wid] = x_q |
|
||||||
y_quality[wid] = y_q |
|
||||||
x_interpretation[wid] = x_int |
|
||||||
y_interpretation[wid] = y_int |
|
||||||
x_top_motions[wid] = x_tops |
|
||||||
y_top_motions[wid] = y_tops |
|
||||||
x_label_confidence[wid] = x_conf |
|
||||||
y_label_confidence[wid] = y_conf |
|
||||||
|
|
||||||
if is_annual: |
|
||||||
annual_x_labels.append(x_lbl) |
|
||||||
annual_y_labels.append(y_lbl) |
|
||||||
|
|
||||||
def _modal(labels: List[str], fallback: str) -> str: |
|
||||||
if not labels: |
|
||||||
return fallback |
|
||||||
return Counter(labels).most_common(1)[0][0] |
|
||||||
|
|
||||||
enriched = dict(axes) |
|
||||||
enriched["x_label"] = _modal(annual_x_labels, "Links\u2013Rechts") |
|
||||||
enriched["y_label"] = _modal(annual_y_labels, "Progressief\u2013Conservatief") |
|
||||||
enriched["x_quality"] = x_quality |
|
||||||
enriched["y_quality"] = y_quality |
|
||||||
enriched["x_interpretation"] = x_interpretation |
|
||||||
enriched["y_interpretation"] = y_interpretation |
|
||||||
enriched["x_top_motions"] = x_top_motions |
|
||||||
enriched["y_top_motions"] = y_top_motions |
|
||||||
enriched["x_label_confidence"] = x_label_confidence |
|
||||||
enriched["y_label_confidence"] = y_label_confidence |
|
||||||
return enriched |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run full test suite** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all existing tests + all 4 tasks' new tests pass. Particularly verify the 3 classifier tests from Task 2 and the `test_compute_2d_axes_exposes_global_mean` from Task 1 still pass. |
|
||||||
|
|
||||||
- [ ] **Step 3: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add analysis/axis_classifier.py |
|
||||||
git commit -m "feat: restructure classify_axes — motion projection as primary label source" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 5: Add axis-swap logic and tests in `explorer.py` |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `explorer.py` |
|
||||||
- Test: `tests/test_political_compass.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Write the two failing tests** |
|
||||||
|
|
||||||
Add to `tests/test_political_compass.py`: |
|
||||||
|
|
||||||
```python |
|
||||||
def test_axis_swap_when_y_is_left_right(): |
|
||||||
"""When y_label is 'Links–Rechts' and x_label is not, positions must be swapped.""" |
|
||||||
from explorer import _swap_axes |
|
||||||
|
|
||||||
positions_by_window = { |
|
||||||
"2023": { |
|
||||||
"VVD": (0.5, 0.8), |
|
||||||
"PvdA": (-0.3, -0.6), |
|
||||||
} |
|
||||||
} |
|
||||||
axis_def = { |
|
||||||
"x_label": "Progressief\u2013Conservatief", |
|
||||||
"y_label": "Links\u2013Rechts", |
|
||||||
"x_quality": {"2023": 0.7}, |
|
||||||
"y_quality": {"2023": 0.8}, |
|
||||||
"x_interpretation": {"2023": "prog interpretation"}, |
|
||||||
"y_interpretation": {"2023": "lr interpretation"}, |
|
||||||
"x_top_motions": {"2023": {"+": [], "-": []}}, |
|
||||||
"y_top_motions": {"2023": {"+": [], "-": []}}, |
|
||||||
"x_label_confidence": {"2023": 0.5}, |
|
||||||
"y_label_confidence": {"2023": 0.7}, |
|
||||||
} |
|
||||||
|
|
||||||
new_pos, new_ax = _swap_axes(positions_by_window, axis_def) |
|
||||||
|
|
||||||
# Positions swapped: (x, y) → (y, x) |
|
||||||
assert new_pos["2023"]["VVD"] == (0.8, 0.5) |
|
||||||
assert new_pos["2023"]["PvdA"] == (-0.6, -0.3) |
|
||||||
|
|
||||||
# Labels swapped |
|
||||||
assert new_ax["x_label"] == "Links\u2013Rechts" |
|
||||||
assert new_ax["y_label"] == "Progressief\u2013Conservatief" |
|
||||||
|
|
||||||
# Quality swapped |
|
||||||
assert new_ax["x_quality"] == {"2023": 0.8} |
|
||||||
assert new_ax["y_quality"] == {"2023": 0.7} |
|
||||||
|
|
||||||
|
|
||||||
def test_axis_swap_not_applied_when_x_is_left_right(): |
|
||||||
"""When x_label is already 'Links–Rechts', no swap should occur.""" |
|
||||||
from explorer import _should_swap_axes |
|
||||||
|
|
||||||
axis_def = { |
|
||||||
"x_label": "Links\u2013Rechts", |
|
||||||
"y_label": "Progressief\u2013Conservatief", |
|
||||||
} |
|
||||||
assert _should_swap_axes(axis_def) is False |
|
||||||
|
|
||||||
axis_def2 = { |
|
||||||
"x_label": "Links\u2013Rechts", |
|
||||||
"y_label": "Links\u2013Rechts", # both LR — no swap |
|
||||||
} |
|
||||||
assert _should_swap_axes(axis_def2) is False |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run tests to verify they fail** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py::test_axis_swap_when_y_is_left_right tests/test_political_compass.py::test_axis_swap_not_applied_when_x_is_left_right -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: FAIL — `ImportError: cannot import name '_swap_axes'` / `'_should_swap_axes'` |
|
||||||
|
|
||||||
- [ ] **Step 3: Add `_swap_axes` and `_should_swap_axes` to `explorer.py`** |
|
||||||
|
|
||||||
Add these two functions near the top of `explorer.py`, just before `load_positions` (i.e. before the function that starts around line 184). A good place is after any existing module-level helpers. |
|
||||||
|
|
||||||
```python |
|
||||||
def _should_swap_axes(axis_def: dict) -> bool: |
|
||||||
"""Return True if the Y axis is 'Links–Rechts' and the X axis is not. |
|
||||||
|
|
||||||
When true, caller should swap x/y positions and metadata so left-right |
|
||||||
is conventionally on the horizontal axis. |
|
||||||
""" |
|
||||||
lr = "Links\u2013Rechts" |
|
||||||
return axis_def.get("y_label") == lr and axis_def.get("x_label") != lr |
|
||||||
|
|
||||||
|
|
||||||
def _swap_axes( |
|
||||||
positions_by_window: dict, |
|
||||||
axis_def: dict, |
|
||||||
) -> tuple: |
|
||||||
"""Swap x and y in all positions and axis metadata. |
|
||||||
|
|
||||||
Pure function — returns (new_positions_by_window, new_axis_def). |
|
||||||
""" |
|
||||||
new_positions: dict = {} |
|
||||||
for wid, pos_dict in positions_by_window.items(): |
|
||||||
new_positions[wid] = {ent: (y, x) for ent, (x, y) in pos_dict.items()} |
|
||||||
|
|
||||||
new_ax = dict(axis_def) |
|
||||||
# Swap paired scalar keys |
|
||||||
new_ax["x_label"] = axis_def.get("y_label") |
|
||||||
new_ax["y_label"] = axis_def.get("x_label") |
|
||||||
|
|
||||||
# Swap paired dict keys |
|
||||||
for x_key, y_key in [ |
|
||||||
("x_quality", "y_quality"), |
|
||||||
("x_interpretation", "y_interpretation"), |
|
||||||
("x_top_motions", "y_top_motions"), |
|
||||||
("x_label_confidence", "y_label_confidence"), |
|
||||||
]: |
|
||||||
new_ax[x_key] = axis_def.get(y_key) |
|
||||||
new_ax[y_key] = axis_def.get(x_key) |
|
||||||
|
|
||||||
return new_positions, new_ax |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 4: Wire the swap in `load_positions`** |
|
||||||
|
|
||||||
In `explorer.py`, after the `classify_axes` try/except block (currently lines 202–211, ending at `axis_def = classify_axes(...)`), add: |
|
||||||
|
|
||||||
```python |
|
||||||
if _should_swap_axes(axis_def): |
|
||||||
positions_by_window, axis_def = _swap_axes(positions_by_window, axis_def) |
|
||||||
``` |
|
||||||
|
|
||||||
Place this immediately before the `# Filter displayed windows by window_size` comment (currently ~line 213). |
|
||||||
|
|
||||||
- [ ] **Step 5: Run tests to verify they pass** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py::test_axis_swap_when_y_is_left_right tests/test_political_compass.py::test_axis_swap_not_applied_when_x_is_left_right -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: both PASS |
|
||||||
|
|
||||||
- [ ] **Step 6: Run full suite** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all tests pass. |
|
||||||
|
|
||||||
- [ ] **Step 7: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add explorer.py tests/test_political_compass.py |
|
||||||
git commit -m "feat: add axis swap — left-right goes on horizontal axis when detected" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 6: Add motion expander UI in `build_compass_tab` |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `explorer.py` |
|
||||||
|
|
||||||
No new unit tests for this task — it's pure Streamlit rendering and cannot be unit-tested without a browser. Verify visually after implementation. |
|
||||||
|
|
||||||
- [ ] **Step 1: Add the expander block after `st.plotly_chart`** |
|
||||||
|
|
||||||
In `explorer.py`, find the `st.plotly_chart` call (line ~974) inside `with col1:`. After the two `st.caption` calls (lines ~981–986), add: |
|
||||||
|
|
||||||
```python |
|
||||||
# Motion expander — show which motions define each axis for this window |
|
||||||
x_top = axis_def.get("x_top_motions", {}).get(window_idx, {}) |
|
||||||
y_top = axis_def.get("y_top_motions", {}).get(window_idx, {}) |
|
||||||
x_conf = axis_def.get("x_label_confidence", {}).get(window_idx) |
|
||||||
y_conf = axis_def.get("y_label_confidence", {}).get(window_idx) |
|
||||||
evr = axis_def.get("explained_variance_ratio", [None, None]) |
|
||||||
evr0 = evr[0] if evr else None |
|
||||||
|
|
||||||
_has_motion_data = bool( |
|
||||||
x_top.get("+") or x_top.get("-") or y_top.get("+") or y_top.get("-") |
|
||||||
) |
|
||||||
if _has_motion_data: |
|
||||||
with st.expander("\U0001f50d Wat bepaalt deze assen?"): |
|
||||||
x_conf_pct = f" (vertrouwen: {x_conf:.0%})" if x_conf is not None else "" |
|
||||||
y_conf_pct = f" (vertrouwen: {y_conf:.0%})" if y_conf is not None else "" |
|
||||||
|
|
||||||
st.markdown(f"**Horizontale as: {_x_label}**{x_conf_pct}") |
|
||||||
x_pos_titles = x_top.get("+", []) |
|
||||||
x_neg_titles = x_top.get("-", []) |
|
||||||
if x_pos_titles: |
|
||||||
labels_pos = " · ".join( |
|
||||||
f"{t} ({d})" for t, d in x_pos_titles[:3] |
|
||||||
) |
|
||||||
st.markdown(f" ➕ {labels_pos}") |
|
||||||
if x_neg_titles: |
|
||||||
labels_neg = " · ".join( |
|
||||||
f"{t} ({d})" for t, d in x_neg_titles[:3] |
|
||||||
) |
|
||||||
st.markdown(f" ➖ {labels_neg}") |
|
||||||
|
|
||||||
st.markdown(f"**Verticale as: {_y_label}**{y_conf_pct}") |
|
||||||
y_pos_titles = y_top.get("+", []) |
|
||||||
y_neg_titles = y_top.get("-", []) |
|
||||||
if y_pos_titles: |
|
||||||
labels_pos = " · ".join( |
|
||||||
f"{t} ({d})" for t, d in y_pos_titles[:3] |
|
||||||
) |
|
||||||
st.markdown(f" ➕ {labels_pos}") |
|
||||||
if y_neg_titles: |
|
||||||
labels_neg = " · ".join( |
|
||||||
f"{t} ({d})" for t, d in y_neg_titles[:3] |
|
||||||
) |
|
||||||
st.markdown(f" ➖ {labels_neg}") |
|
||||||
|
|
||||||
if evr0 is not None: |
|
||||||
st.caption( |
|
||||||
f"As 1 verklaart {evr0:.1%} van de variantie in stemgedrag." |
|
||||||
) |
|
||||||
``` |
|
||||||
|
|
||||||
Note: `_x_label` and `_y_label` are already defined earlier in `build_compass_tab` from `axis_def.get("x_label", …)`. `window_idx` is the currently selected window string. Confirm those variable names match the existing code before inserting. |
|
||||||
|
|
||||||
- [ ] **Step 2: Check that `explained_variance_ratio` is stored in `axis_def`** |
|
||||||
|
|
||||||
Search `analysis/political_axis.py` for where `axes["explained_variance_ratio"]` is set. If it isn't stored, add it: |
|
||||||
|
|
||||||
In `compute_2d_axes`, after `axes["global_mean"] = global_mean` (Task 1), find where `evr` is computed (it's the `explained_variance_ratio_` from sklearn PCA or numpy SVD). Add: |
|
||||||
|
|
||||||
```python |
|
||||||
axes["explained_variance_ratio"] = list(axes.get("explained_variance_ratio", [evr1, evr2])) |
|
||||||
``` |
|
||||||
|
|
||||||
If it's already stored under a different key, use that key in the expander code instead. |
|
||||||
|
|
||||||
- [ ] **Step 3: Run full test suite (sanity check)** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: all tests pass (expander is UI-only, no test required). |
|
||||||
|
|
||||||
- [ ] **Step 4: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add explorer.py |
|
||||||
git commit -m "feat: add motion expander to compass tab — shows top motions per axis" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Final Verification |
|
||||||
|
|
||||||
- [ ] **Run all tests one last time** |
|
||||||
|
|
||||||
```bash |
|
||||||
pytest tests/test_political_compass.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected output summary: 13+ tests passing (8 existing + 5 new), 0 failing. |
|
||||||
|
|
||||||
- [ ] **Smoke-test the app** (if DB is available) |
|
||||||
|
|
||||||
```bash |
|
||||||
streamlit run explorer.py |
|
||||||
``` |
|
||||||
|
|
||||||
Navigate to the compass tab, select a window, verify: |
|
||||||
1. Axis labels show e.g. "Links–Rechts" on X and "Progressief–Conservatief" on Y |
|
||||||
2. The "🔍 Wat bepaalt deze assen?" expander appears and shows motions |
|
||||||
3. No Python exceptions in the terminal |
|
||||||
|
|
||||||
- [ ] **Final commit (if any cleanup needed)** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add -u |
|
||||||
git commit -m "fix: address any issues found during smoke test" |
|
||||||
``` |
|
||||||
@ -1,879 +0,0 @@ |
|||||||
# SVD Label Unification Implementation Plan |
|
||||||
|
|
||||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. |
|
||||||
|
|
||||||
**Goal:** Unify SVD component labels into a single source of truth and automatically compute axis flip directions so right-wing parties consistently appear on the right side of all SVD component axes. |
|
||||||
|
|
||||||
**Architecture:** Create a new `analysis/svd_labels.py` module that imports `SVD_THEMES` from explorer.py at runtime and provides helper functions for label lookup and flip direction computation. Update `axis_classifier.py` to use this module instead of hardcoded labels. Add 1D party position charts for components 3-10 in the SVD Components tab. |
|
||||||
|
|
||||||
**Tech Stack:** Python, Streamlit, NumPy, DuckDB |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## File Structure |
|
||||||
|
|
||||||
| File | Responsibility | |
|
||||||
|------|---------------| |
|
||||||
| `analysis/svd_labels.py` | **NEW** - Unified label system with auto-flip computation | |
|
||||||
| `analysis/axis_classifier.py` | Remove `_LABELS`, import from svd_labels | |
|
||||||
| `explorer.py` | Remove fallback labels, add 1D charts for components 3-10 | |
|
||||||
| `tests/test_svd_labels.py` | **NEW** - Tests for svd_labels module | |
|
||||||
| `tests/test_axis_label_fallback.py` | Update to use new label system | |
|
||||||
| `tests/test_political_compass.py` | Update assertions for new labels | |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 1: Create `analysis/svd_labels.py` with core functions |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Create: `analysis/svd_labels.py` |
|
||||||
- Test: `tests/test_svd_labels.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Write the failing test for `get_svd_label`** |
|
||||||
|
|
||||||
```python |
|
||||||
# tests/test_svd_labels.py |
|
||||||
"""Tests for analysis/svd_labels module.""" |
|
||||||
|
|
||||||
def test_get_svd_label_returns_correct_label(): |
|
||||||
"""Test that get_svd_label returns the correct label for each component.""" |
|
||||||
from analysis.svd_labels import get_svd_label |
|
||||||
|
|
||||||
# Component 1 should return EU-integratie label |
|
||||||
label1 = get_svd_label(1) |
|
||||||
assert "EU-integratie" in label1 or "Nationalisme" in label1 |
|
||||||
|
|
||||||
# Component 2 should return Populistisch label |
|
||||||
label2 = get_svd_label(2) |
|
||||||
assert "Populistisch" in label2 or "Institutioneel" in label2 |
|
||||||
|
|
||||||
# Component 3 should return Verzorgingsstaat label |
|
||||||
label3 = get_svd_label(3) |
|
||||||
assert "Verzorgingsstaat" in label3 or "Marktwerking" in label3 |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run test to verify it fails** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_svd_labels.py::test_get_svd_label_returns_correct_label -v` |
|
||||||
Expected: FAIL with "ModuleNotFoundError: No module named 'analysis.svd_labels'" |
|
||||||
|
|
||||||
- [ ] **Step 3: Create `analysis/svd_labels.py` with `get_svd_label` function** |
|
||||||
|
|
||||||
```python |
|
||||||
# analysis/svd_labels.py |
|
||||||
"""Unified SVD component labels and automatic flip direction computation. |
|
||||||
|
|
||||||
This module provides a single source of truth for SVD component labels, |
|
||||||
deriving them from SVD_THEMES in explorer.py. It also computes flip |
|
||||||
directions automatically based on party centroids. |
|
||||||
""" |
|
||||||
|
|
||||||
import logging |
|
||||||
from typing import Dict, List, Optional, Tuple |
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__) |
|
||||||
|
|
||||||
# Canonical party sets for orientation |
|
||||||
# Right-wing parties that should appear on the right side of axes |
|
||||||
RIGHT_PARTIES = { |
|
||||||
"PVV", "VVD", "FVD", "BBB", "JA21", |
|
||||||
"Nieuw Sociaal Contract", "SGP", "CDA", "ChristenUnie" |
|
||||||
} |
|
||||||
|
|
||||||
# Left-wing parties that should appear on the left side of axes |
|
||||||
LEFT_PARTIES = { |
|
||||||
"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", |
|
||||||
"DENK", "PvdD", "Volt" |
|
||||||
} |
|
||||||
|
|
||||||
# Cache for SVD_THEMES to avoid repeated imports |
|
||||||
_svd_themes_cache: Optional[Dict[int, Dict[str, str]]] = None |
|
||||||
|
|
||||||
|
|
||||||
def _get_svd_themes() -> Dict[int, Dict[str, str]]: |
|
||||||
"""Lazy import SVD_THEMES from explorer.py to avoid circular imports. |
|
||||||
|
|
||||||
Returns: |
|
||||||
Dict mapping component number to theme dict with keys: |
|
||||||
- label: Short label for the component |
|
||||||
- explanation: Detailed explanation |
|
||||||
- positive_pole: Description of positive pole |
|
||||||
- negative_pole: Description of negative pole |
|
||||||
- flip: Whether to flip the axis |
|
||||||
""" |
|
||||||
global _svd_themes_cache |
|
||||||
if _svd_themes_cache is not None: |
|
||||||
return _svd_themes_cache |
|
||||||
|
|
||||||
try: |
|
||||||
# Import at runtime to avoid circular dependency |
|
||||||
# explorer.py imports from analysis/ but we don't import from explorer.py |
|
||||||
# at module load time |
|
||||||
import importlib.util |
|
||||||
import os |
|
||||||
|
|
||||||
# Find explorer.py |
|
||||||
explorer_path = os.path.join(os.path.dirname(__file__), "..", "explorer.py") |
|
||||||
if not os.path.exists(explorer_path): |
|
||||||
_logger.warning("explorer.py not found at %s", explorer_path) |
|
||||||
return {} |
|
||||||
|
|
||||||
spec = importlib.util.spec_from_file_location("explorer", explorer_path) |
|
||||||
if spec is None or spec.loader is None: |
|
||||||
_logger.warning("Could not load spec from explorer.py") |
|
||||||
return {} |
|
||||||
|
|
||||||
explorer_module = importlib.util.module_from_spec(spec) |
|
||||||
spec.loader.exec_module(explorer_module) |
|
||||||
|
|
||||||
# Get SVD_THEMES from the build_svd_components_tab function's local scope |
|
||||||
# This is a bit hacky but avoids importing the entire Streamlit app |
|
||||||
# We'll need to refactor explorer.py to export SVD_THEMES at module level |
|
||||||
# For now, we'll define it here as a fallback |
|
||||||
|
|
||||||
# Fallback: define SVD_THEMES here if we can't import from explorer |
|
||||||
_svd_themes_cache = {} |
|
||||||
return _svd_themes_cache |
|
||||||
|
|
||||||
except Exception as e: |
|
||||||
_logger.exception("Failed to load SVD_THEMES from explorer.py: %s", e) |
|
||||||
return {} |
|
||||||
|
|
||||||
|
|
||||||
def get_svd_label(component: int) -> str: |
|
||||||
"""Get short label for SVD component. |
|
||||||
|
|
||||||
Args: |
|
||||||
component: SVD component number (1-indexed) |
|
||||||
|
|
||||||
Returns: |
|
||||||
Short label string (e.g., 'EU-integratie–Nationalisme') |
|
||||||
|
|
||||||
Raises: |
|
||||||
ValueError: If component < 1 |
|
||||||
""" |
|
||||||
if component < 1: |
|
||||||
raise ValueError(f"Component must be >= 1, got {component}") |
|
||||||
|
|
||||||
themes = _get_svd_themes() |
|
||||||
if component in themes: |
|
||||||
return themes[component].get("label", f"As {component}") |
|
||||||
|
|
||||||
# Fallback labels for components 1-3 (most commonly used) |
|
||||||
fallback_labels = { |
|
||||||
1: "EU-integratie–Nationalisme", |
|
||||||
2: "Populistisch–Institutioneel", |
|
||||||
3: "Verzorgingsstaat–Marktwerking", |
|
||||||
} |
|
||||||
return fallback_labels.get(component, f"As {component}") |
|
||||||
|
|
||||||
|
|
||||||
def get_svd_theme(component: int) -> Dict[str, str]: |
|
||||||
"""Get full theme dict for SVD component. |
|
||||||
|
|
||||||
Args: |
|
||||||
component: SVD component number (1-indexed) |
|
||||||
|
|
||||||
Returns: |
|
||||||
Dict with keys: label, explanation, positive_pole, negative_pole, flip |
|
||||||
""" |
|
||||||
if component < 1: |
|
||||||
raise ValueError(f"Component must be >= 1, got {component}") |
|
||||||
|
|
||||||
themes = _get_svd_themes() |
|
||||||
if component in themes: |
|
||||||
return themes[component] |
|
||||||
|
|
||||||
# Return minimal fallback |
|
||||||
return { |
|
||||||
"label": get_svd_label(component), |
|
||||||
"explanation": "", |
|
||||||
"positive_pole": "", |
|
||||||
"negative_pole": "", |
|
||||||
"flip": False, |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
def compute_flip_direction( |
|
||||||
component: int, |
|
||||||
party_scores: Dict[str, List[float]] |
|
||||||
) -> bool: |
|
||||||
"""Compute flip direction so right parties appear on the right side. |
|
||||||
|
|
||||||
Args: |
|
||||||
component: SVD component number (1-indexed) |
|
||||||
party_scores: Dict mapping party name to list of scores per component |
|
||||||
(party_scores[party][0] is score for component 1, etc.) |
|
||||||
|
|
||||||
Returns: |
|
||||||
True if axis should be flipped so right parties are on right. |
|
||||||
False otherwise. |
|
||||||
""" |
|
||||||
if component < 1: |
|
||||||
return False |
|
||||||
|
|
||||||
idx = component - 1 # Convert to 0-indexed |
|
||||||
|
|
||||||
right_scores = [] |
|
||||||
left_scores = [] |
|
||||||
|
|
||||||
for party, scores in party_scores.items(): |
|
||||||
if len(scores) <= idx: |
|
||||||
continue |
|
||||||
|
|
||||||
score = scores[idx] |
|
||||||
if party in RIGHT_PARTIES: |
|
||||||
right_scores.append(score) |
|
||||||
elif party in LEFT_PARTIES: |
|
||||||
left_scores.append(score) |
|
||||||
|
|
||||||
if not right_scores or not left_scores: |
|
||||||
return False # Default: no flip if insufficient data |
|
||||||
|
|
||||||
right_mean = sum(right_scores) / len(right_scores) |
|
||||||
left_mean = sum(left_scores) / len(left_scores) |
|
||||||
|
|
||||||
# Flip if right parties have lower mean (they're on the left) |
|
||||||
return right_mean < left_mean |
|
||||||
|
|
||||||
|
|
||||||
def get_fallback_labels() -> Tuple[str, str]: |
|
||||||
"""Get fallback labels for x and y axes (components 1 and 2). |
|
||||||
|
|
||||||
Returns: |
|
||||||
Tuple of (x_label, y_label) |
|
||||||
""" |
|
||||||
return (get_svd_label(1), get_svd_label(2)) |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 4: Run test to verify it passes** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_svd_labels.py::test_get_svd_label_returns_correct_label -v` |
|
||||||
Expected: PASS |
|
||||||
|
|
||||||
- [ ] **Step 5: Write test for `compute_flip_direction`** |
|
||||||
|
|
||||||
```python |
|
||||||
# tests/test_svd_labels.py (append) |
|
||||||
|
|
||||||
def test_compute_flip_direction_right_on_left(): |
|
||||||
"""Test that flip is True when right parties are on the left.""" |
|
||||||
from analysis.svd_labels import compute_flip_direction |
|
||||||
|
|
||||||
# Right parties have negative scores (on left), left parties have positive |
|
||||||
party_scores = { |
|
||||||
"VVD": [-0.5, 0.0], # Right party, component 1 score = -0.5 |
|
||||||
"PVV": [-0.8, 0.0], # Right party |
|
||||||
"SP": [0.6, 0.0], # Left party, component 1 score = 0.6 |
|
||||||
"DENK": [0.4, 0.0], # Left party |
|
||||||
} |
|
||||||
|
|
||||||
# Component 1: right_mean = -0.65, left_mean = 0.5 |
|
||||||
# right_mean < left_mean, so flip = True |
|
||||||
assert compute_flip_direction(1, party_scores) is True |
|
||||||
|
|
||||||
|
|
||||||
def test_compute_flip_direction_right_on_right(): |
|
||||||
"""Test that flip is False when right parties are already on the right.""" |
|
||||||
from analysis.svd_labels import compute_flip_direction |
|
||||||
|
|
||||||
# Right parties have positive scores (on right), left parties have negative |
|
||||||
party_scores = { |
|
||||||
"VVD": [0.5, 0.0], # Right party, component 1 score = 0.5 |
|
||||||
"PVV": [0.8, 0.0], # Right party |
|
||||||
"SP": [-0.6, 0.0], # Left party |
|
||||||
"DENK": [-0.4, 0.0], # Left party |
|
||||||
} |
|
||||||
|
|
||||||
# Component 1: right_mean = 0.65, left_mean = -0.5 |
|
||||||
# right_mean > left_mean, so flip = False |
|
||||||
assert compute_flip_direction(1, party_scores) is False |
|
||||||
|
|
||||||
|
|
||||||
def test_compute_flip_direction_insufficient_data(): |
|
||||||
"""Test that flip is False when there's insufficient data.""" |
|
||||||
from analysis.svd_labels import compute_flip_direction |
|
||||||
|
|
||||||
# No right parties in data |
|
||||||
party_scores = { |
|
||||||
"SP": [0.6, 0.0], |
|
||||||
"DENK": [0.4, 0.0], |
|
||||||
} |
|
||||||
|
|
||||||
assert compute_flip_direction(1, party_scores) is False |
|
||||||
|
|
||||||
# No left parties in data |
|
||||||
party_scores = { |
|
||||||
"VVD": [0.5, 0.0], |
|
||||||
"PVV": [0.8, 0.0], |
|
||||||
} |
|
||||||
|
|
||||||
assert compute_flip_direction(1, party_scores) is False |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 6: Run tests for `compute_flip_direction`** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_svd_labels.py -v` |
|
||||||
Expected: All tests PASS |
|
||||||
|
|
||||||
- [ ] **Step 7: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add analysis/svd_labels.py tests/test_svd_labels.py |
|
||||||
git commit -m "feat: add svd_labels module for unified label system" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 2: Refactor explorer.py to export SVD_THEMES at module level |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `explorer.py` (move SVD_THEMES to module level) |
|
||||||
- Modify: `analysis/svd_labels.py` (update to import from explorer) |
|
||||||
|
|
||||||
- [ ] **Step 1: Move SVD_THEMES from function to module level in explorer.py** |
|
||||||
|
|
||||||
Find the `SVD_THEMES` dict inside `build_svd_components_tab` function (around line 2459) and move it to module level (near the top of the file, after imports). |
|
||||||
|
|
||||||
```python |
|
||||||
# explorer.py (near top of file, after imports and constants) |
|
||||||
|
|
||||||
# Political polarisation themes per SVD component (1-indexed, window=2025) |
|
||||||
# Produced by per-axis analysis of all 10 unique top motions (zero cross-axis overlap). |
|
||||||
# This is the canonical source of truth for SVD component labels. |
|
||||||
SVD_THEMES: dict[int, dict[str, str]] = { |
|
||||||
1: { |
|
||||||
"label": "EU-integratie en internationalisme versus nationalisme", |
|
||||||
# ... rest of the dict |
|
||||||
}, |
|
||||||
# ... rest of components |
|
||||||
} |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Update `analysis/svd_labels.py` to import SVD_THEMES properly** |
|
||||||
|
|
||||||
```python |
|
||||||
# analysis/svd_labels.py (update _get_svd_themes function) |
|
||||||
|
|
||||||
def _get_svd_themes() -> Dict[int, Dict[str, str]]: |
|
||||||
"""Lazy import SVD_THEMES from explorer.py to avoid circular imports.""" |
|
||||||
global _svd_themes_cache |
|
||||||
if _svd_themes_cache is not None: |
|
||||||
return _svd_themes_cache |
|
||||||
|
|
||||||
try: |
|
||||||
# Import at runtime to avoid circular dependency at module load time |
|
||||||
# explorer.py imports from analysis/ but we delay our import |
|
||||||
import explorer |
|
||||||
_svd_themes_cache = explorer.SVD_THEMES |
|
||||||
return _svd_themes_cache |
|
||||||
except ImportError as e: |
|
||||||
_logger.warning("Could not import explorer.SVD_THEMES: %s", e) |
|
||||||
return {} |
|
||||||
except Exception as e: |
|
||||||
_logger.exception("Failed to load SVD_THEMES from explorer.py: %s", e) |
|
||||||
return {} |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 3: Run tests to verify nothing broke** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_svd_labels.py -v` |
|
||||||
Expected: All tests PASS |
|
||||||
|
|
||||||
- [ ] **Step 4: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add explorer.py analysis/svd_labels.py |
|
||||||
git commit -m "refactor: move SVD_THEMES to module level for import" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 3: Update `axis_classifier.py` to use svd_labels |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `analysis/axis_classifier.py` |
|
||||||
- Modify: `tests/test_axis_label_fallback.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Write test for updated `display_label_for_modal`** |
|
||||||
|
|
||||||
```python |
|
||||||
# tests/test_axis_label_fallback.py (update existing tests) |
|
||||||
|
|
||||||
def test_display_label_for_modal_uses_svd_themes(): |
|
||||||
"""Test that display_label_for_modal uses SVD_THEMES for fallback labels.""" |
|
||||||
from analysis.axis_classifier import display_label_for_modal |
|
||||||
|
|
||||||
# None should return fallback from SVD_THEMES |
|
||||||
x_label = display_label_for_modal(None, "x") |
|
||||||
y_label = display_label_for_modal(None, "y") |
|
||||||
|
|
||||||
# Should return component 1 and 2 labels from SVD_THEMES |
|
||||||
assert "EU-integratie" in x_label or "Nationalisme" in x_label |
|
||||||
assert "Populistisch" in y_label or "Institutioneel" in y_label |
|
||||||
|
|
||||||
|
|
||||||
def test_display_label_for_modal_maps_as_labels(): |
|
||||||
"""Test that 'As 1' and 'As 2' are mapped to semantic labels.""" |
|
||||||
from analysis.axis_classifier import display_label_for_modal |
|
||||||
|
|
||||||
x_label = display_label_for_modal("As 1", "x") |
|
||||||
y_label = display_label_for_modal("As 2", "y") |
|
||||||
|
|
||||||
# Should return component 1 and 2 labels |
|
||||||
assert "EU-integratie" in x_label or "Nationalisme" in x_label |
|
||||||
assert "Populistisch" in y_label or "Institutioneel" in y_label |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run test to verify it fails** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_axis_label_fallback.py -v` |
|
||||||
Expected: Some tests FAIL (current implementation returns hardcoded labels) |
|
||||||
|
|
||||||
- [ ] **Step 3: Update `axis_classifier.py` to use svd_labels** |
|
||||||
|
|
||||||
```python |
|
||||||
# analysis/axis_classifier.py (update the module) |
|
||||||
|
|
||||||
# Remove the hardcoded _LABELS dict (lines 25-35) |
|
||||||
# Replace with imports from svd_labels |
|
||||||
|
|
||||||
# At the top of the file, after imports: |
|
||||||
from analysis.svd_labels import get_svd_label, get_fallback_labels |
|
||||||
|
|
||||||
# Remove _LABELS dict entirely |
|
||||||
|
|
||||||
# Update display_label_for_modal function (lines 42-55): |
|
||||||
def display_label_for_modal(modal_label: Optional[str], axis: str) -> str: |
|
||||||
"""Return a user-facing axis label for a modal/internal label. |
|
||||||
|
|
||||||
Maps numeric fallback names 'As 1' / 'Stempatroon As 1' to the |
|
||||||
semantic labels from SVD_THEMES. Any other label is returned unchanged. |
|
||||||
None is treated as the semantic fallback for the axis. |
|
||||||
""" |
|
||||||
if modal_label is None: |
|
||||||
# Fallback to component 1 (x) or 2 (y) |
|
||||||
comp = 1 if axis == "x" else 2 |
|
||||||
return get_svd_label(comp) |
|
||||||
|
|
||||||
# Map "As 1" / "As 2" to semantic labels |
|
||||||
if axis == "x" and modal_label in ("As 1", "Stempatroon As 1"): |
|
||||||
return get_svd_label(1) |
|
||||||
if axis == "y" and modal_label in ("As 2", "Stempatroon As 2"): |
|
||||||
return get_svd_label(2) |
|
||||||
|
|
||||||
return modal_label |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 4: Update `_INTERPRETATION_TEMPLATES` to use svd_labels** |
|
||||||
|
|
||||||
The interpretation templates still need to reference the correct labels. Keep them as-is for now since they're used for motion classification, not display. |
|
||||||
|
|
||||||
- [ ] **Step 5: Update `_MOTION_LABEL_TEMPLATE_KEY` to use svd_labels** |
|
||||||
|
|
||||||
Keep the mapping as-is for now since it's used for motion classification. |
|
||||||
|
|
||||||
- [ ] **Step 6: Update `_KEYWORDS` to use svd_labels** |
|
||||||
|
|
||||||
Keep the keywords as-is for now since they're used for motion classification. |
|
||||||
|
|
||||||
- [ ] **Step 7: Update fallback label references in classify_axes** |
|
||||||
|
|
||||||
Find lines 610 and 615 where `_LABELS["fallback_x"]` and `_LABELS["fallback_y"]` are used and replace: |
|
||||||
|
|
||||||
```python |
|
||||||
# Before: |
|
||||||
x_lbl = _LABELS["fallback_x"] |
|
||||||
y_lbl = _LABELS["fallback_y"] |
|
||||||
|
|
||||||
# After: |
|
||||||
x_lbl, y_lbl = get_fallback_labels() |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 8: Run tests to verify they pass** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_axis_label_fallback.py tests/test_political_compass.py -v` |
|
||||||
Expected: All tests PASS |
|
||||||
|
|
||||||
- [ ] **Step 9: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add analysis/axis_classifier.py tests/test_axis_label_fallback.py |
|
||||||
git commit -m "refactor: use svd_labels for axis labels in axis_classifier" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 4: Update explorer.py to use svd_labels for fallback labels |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `explorer.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Find and update fallback label references in explorer.py** |
|
||||||
|
|
||||||
Find lines 1440-1441 where hardcoded fallback labels are used: |
|
||||||
|
|
||||||
```python |
|
||||||
# Before: |
|
||||||
_x_label = _raw_x or "EU-integratie–Nationalisme" |
|
||||||
_y_label = _raw_y or "Populistisch–Institutioneel" |
|
||||||
|
|
||||||
# After: |
|
||||||
from analysis.svd_labels import get_fallback_labels |
|
||||||
_x_fallback, _y_fallback = get_fallback_labels() |
|
||||||
_x_label = _raw_x or _x_fallback |
|
||||||
_y_label = _raw_y or _y_fallback |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run tests to verify nothing broke** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_political_compass.py -v` |
|
||||||
Expected: All tests PASS |
|
||||||
|
|
||||||
- [ ] **Step 3: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add explorer.py |
|
||||||
git commit -m "refactor: use svd_labels for fallback labels in explorer" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 5: Add 1D party position charts for components 3-10 |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `explorer.py` |
|
||||||
- Test: `tests/test_explorer_chart.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Write test for 1D party position chart** |
|
||||||
|
|
||||||
```python |
|
||||||
# tests/test_explorer_chart.py (append) |
|
||||||
|
|
||||||
def test_render_party_axis_chart_1d_renders(): |
|
||||||
"""Test that _render_party_axis_chart_1d renders a figure.""" |
|
||||||
from explorer import _render_party_axis_chart_1d |
|
||||||
|
|
||||||
party_coords = { |
|
||||||
"VVD": (0.5,), |
|
||||||
"SP": (-0.6,), |
|
||||||
"PVV": (0.8,), |
|
||||||
"DENK": (-0.4,), |
|
||||||
} |
|
||||||
|
|
||||||
theme = { |
|
||||||
"label": "Test Component", |
|
||||||
"positive_pole": "Positive", |
|
||||||
"negative_pole": "Negative", |
|
||||||
"flip": False, |
|
||||||
} |
|
||||||
|
|
||||||
fig = _render_party_axis_chart_1d(party_coords, 3, theme) |
|
||||||
assert fig is not None |
|
||||||
# Check that figure has traces |
|
||||||
assert len(fig.data) > 0 |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run test to verify it fails** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_explorer_chart.py::test_render_party_axis_chart_1d_renders -v` |
|
||||||
Expected: FAIL with "cannot import name '_render_party_axis_chart_1d'" |
|
||||||
|
|
||||||
- [ ] **Step 3: Implement `_render_party_axis_chart_1d` function in explorer.py** |
|
||||||
|
|
||||||
Add this function near `_render_party_axis_chart` (around line 2900): |
|
||||||
|
|
||||||
```python |
|
||||||
# explorer.py (add new function) |
|
||||||
|
|
||||||
def _render_party_axis_chart_1d( |
|
||||||
party_coords: dict, |
|
||||||
component: int, |
|
||||||
theme: dict, |
|
||||||
bootstrap_data: Optional[Dict] = None, |
|
||||||
) -> go.Figure: |
|
||||||
"""Render a 1D party position chart for a single SVD component. |
|
||||||
|
|
||||||
Args: |
|
||||||
party_coords: Dict mapping party name to (score,) tuple |
|
||||||
component: SVD component number (1-indexed) |
|
||||||
theme: Dict with label, positive_pole, negative_pole, flip |
|
||||||
bootstrap_data: Optional bootstrap confidence intervals |
|
||||||
|
|
||||||
Returns: |
|
||||||
Plotly Figure object |
|
||||||
""" |
|
||||||
import plotly.graph_objects as go |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
if not party_coords: |
|
||||||
fig = go.Figure() |
|
||||||
fig.add_annotation( |
|
||||||
text="Geen partijposities beschikbaar", |
|
||||||
xref="paper", yref="paper", |
|
||||||
x=0.5, y=0.5, showarrow=False, |
|
||||||
font=dict(size=14) |
|
||||||
) |
|
||||||
return fig |
|
||||||
|
|
||||||
# Extract scores and parties |
|
||||||
parties = list(party_coords.keys()) |
|
||||||
scores = [coords[0] for coords in party_coords.values()] |
|
||||||
|
|
||||||
# Apply flip if needed |
|
||||||
flip = theme.get("flip", False) |
|
||||||
if flip: |
|
||||||
scores = [-s for s in scores] |
|
||||||
|
|
||||||
# Get party colors |
|
||||||
party_colors = [PARTY_COLOURS.get(p, "#9E9E9E") for p in parties] |
|
||||||
|
|
||||||
# Create horizontal bar chart |
|
||||||
fig = go.Figure() |
|
||||||
|
|
||||||
# Sort by score for better visualization |
|
||||||
sorted_indices = np.argsort(scores) |
|
||||||
sorted_parties = [parties[i] for i in sorted_indices] |
|
||||||
sorted_scores = [scores[i] for i in sorted_indices] |
|
||||||
sorted_colors = [party_colors[i] for i in sorted_indices] |
|
||||||
|
|
||||||
fig.add_trace(go.Bar( |
|
||||||
y=sorted_parties, |
|
||||||
x=sorted_scores, |
|
||||||
orientation='h', |
|
||||||
marker_color=sorted_colors, |
|
||||||
text=[f"{s:.2f}" for s in sorted_scores], |
|
||||||
textposition='outside', |
|
||||||
)) |
|
||||||
|
|
||||||
# Update layout |
|
||||||
label = theme.get("label", f"As {component}") |
|
||||||
positive_pole = theme.get("positive_pole", "Positief") |
|
||||||
negative_pole = theme.get("negative_pole", "Negatief") |
|
||||||
|
|
||||||
fig.update_layout( |
|
||||||
title=f"Partijposities — {label}", |
|
||||||
xaxis_title=f"{negative_pole} ← → {positive_pole}", |
|
||||||
yaxis_title="", |
|
||||||
height=max(400, len(parties) * 25), # Scale height with number of parties |
|
||||||
margin=dict(l=150), # Extra margin for party names |
|
||||||
showlegend=False, |
|
||||||
) |
|
||||||
|
|
||||||
# Add vertical line at x=0 |
|
||||||
fig.add_vline(x=0, line_dash="dash", line_color="gray", opacity=0.5) |
|
||||||
|
|
||||||
return fig |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 4: Update SVD Components tab to use 1D chart for components 3-10** |
|
||||||
|
|
||||||
Find the section in `build_svd_components_tab` where `_render_party_axis_chart` is called (around line 2823) and update: |
|
||||||
|
|
||||||
```python |
|
||||||
# explorer.py (update around line 2822-2830) |
|
||||||
|
|
||||||
# Before: |
|
||||||
if comp_sel in (1, 2): |
|
||||||
_render_party_axis_chart( |
|
||||||
party_coords, comp_sel, theme, bootstrap_data=bootstrap_data |
|
||||||
) |
|
||||||
else: |
|
||||||
st.caption( |
|
||||||
"_Partijposities zijn alleen beschikbaar voor de eerste twee SVD-assen._" |
|
||||||
) |
|
||||||
|
|
||||||
# After: |
|
||||||
if comp_sel in (1, 2): |
|
||||||
_render_party_axis_chart( |
|
||||||
party_coords, comp_sel, theme, bootstrap_data=bootstrap_data |
|
||||||
) |
|
||||||
else: |
|
||||||
# For components 3-10, show 1D party positions |
|
||||||
# Extract 1D scores for this component |
|
||||||
party_1d_coords = {} |
|
||||||
idx = comp_sel - 1 # Convert to 0-indexed |
|
||||||
for party, scores in party_scores.items(): |
|
||||||
if len(scores) > idx: |
|
||||||
party_1d_coords[party] = (scores[idx],) |
|
||||||
|
|
||||||
_render_party_axis_chart_1d( |
|
||||||
party_1d_coords, comp_sel, theme, bootstrap_data=None |
|
||||||
) |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 5: Run tests to verify they pass** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_explorer_chart.py -v` |
|
||||||
Expected: All tests PASS |
|
||||||
|
|
||||||
- [ ] **Step 6: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add explorer.py tests/test_explorer_chart.py |
|
||||||
git commit -m "feat: add 1D party position charts for SVD components 3-10" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 6: Auto-compute flip directions for all components |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `explorer.py` |
|
||||||
- Modify: `analysis/svd_labels.py` |
|
||||||
- Test: `tests/test_svd_labels.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Write test for auto-computing flip in SVD Components tab** |
|
||||||
|
|
||||||
```python |
|
||||||
# tests/test_svd_labels.py (append) |
|
||||||
|
|
||||||
def test_auto_flip_computation_for_all_components(): |
|
||||||
"""Test that flip directions are computed correctly for all components.""" |
|
||||||
from analysis.svd_labels import compute_flip_direction |
|
||||||
|
|
||||||
# Simulate party scores for 10 components |
|
||||||
# Right parties should have positive scores on component 1 (EU-integratie) |
|
||||||
# Left parties should have negative scores on component 1 |
|
||||||
party_scores = { |
|
||||||
"VVD": [0.5] * 10, # Right party, positive on all components |
|
||||||
"PVV": [0.8] * 10, # Right party |
|
||||||
"SP": [-0.6] * 10, # Left party, negative on all components |
|
||||||
"DENK": [-0.4] * 10, # Left party |
|
||||||
} |
|
||||||
|
|
||||||
# For all components, right_mean > left_mean, so flip should be False |
|
||||||
for comp in range(1, 11): |
|
||||||
flip = compute_flip_direction(comp, party_scores) |
|
||||||
assert flip is False, f"Component {comp} should not flip" |
|
||||||
|
|
||||||
# Now test with right parties on left (negative scores) |
|
||||||
party_scores_left = { |
|
||||||
"VVD": [-0.5] * 10, |
|
||||||
"PVV": [-0.8] * 10, |
|
||||||
"SP": [0.6] * 10, |
|
||||||
"DENK": [0.4] * 10, |
|
||||||
} |
|
||||||
|
|
||||||
# For all components, right_mean < left_mean, so flip should be True |
|
||||||
for comp in range(1, 11): |
|
||||||
flip = compute_flip_direction(comp, party_scores_left) |
|
||||||
assert flip is True, f"Component {comp} should flip" |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run test to verify it passes** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_svd_labels.py::test_auto_flip_computation_for_all_components -v` |
|
||||||
Expected: PASS (function already implemented in Task 1) |
|
||||||
|
|
||||||
- [ ] **Step 3: Update SVD Components tab to compute flip dynamically** |
|
||||||
|
|
||||||
In `build_svd_components_tab`, after loading party_scores, compute flip for each component: |
|
||||||
|
|
||||||
```python |
|
||||||
# explorer.py (in build_svd_components_tab, after loading party_scores) |
|
||||||
|
|
||||||
from analysis.svd_labels import compute_flip_direction |
|
||||||
|
|
||||||
# After party_scores is loaded (around line 2800) |
|
||||||
# Compute flip directions for all components |
|
||||||
computed_flips = {} |
|
||||||
for comp in range(1, 11): |
|
||||||
computed_flips[comp] = compute_flip_direction(comp, party_scores) |
|
||||||
|
|
||||||
# Update SVD_THEMES with computed flips |
|
||||||
for comp, flip in computed_flips.items(): |
|
||||||
if comp in SVD_THEMES: |
|
||||||
SVD_THEMES[comp]["flip"] = flip |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 4: Run tests to verify nothing broke** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/test_svd_labels.py tests/test_explorer_chart.py -v` |
|
||||||
Expected: All tests PASS |
|
||||||
|
|
||||||
- [ ] **Step 5: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add explorer.py analysis/svd_labels.py tests/test_svd_labels.py |
|
||||||
git commit -m "feat: auto-compute flip directions for all SVD components" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 7: Update existing tests for new label system |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `tests/test_political_compass.py` |
|
||||||
- Modify: `tests/test_axis_label_fallback.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Run all tests to identify failures** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/ -v --tb=short` |
|
||||||
Expected: Some tests may fail due to label changes |
|
||||||
|
|
||||||
- [ ] **Step 2: Fix any failing tests** |
|
||||||
|
|
||||||
Update test assertions to use new labels from SVD_THEMES: |
|
||||||
|
|
||||||
```python |
|
||||||
# tests/test_political_compass.py (update assertions) |
|
||||||
|
|
||||||
# Before: |
|
||||||
assert "Links–Rechts" in x_label |
|
||||||
|
|
||||||
# After: |
|
||||||
assert "EU-integratie" in x_label or "Nationalisme" in x_label |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 3: Run all tests to verify they pass** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/ -v` |
|
||||||
Expected: All tests PASS |
|
||||||
|
|
||||||
- [ ] **Step 4: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add tests/ |
|
||||||
git commit -m "test: update tests for unified SVD label system" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 8: Final verification and cleanup |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- All modified files |
|
||||||
|
|
||||||
- [ ] **Step 1: Run full test suite** |
|
||||||
|
|
||||||
Run: `uv run pytest tests/ -v` |
|
||||||
Expected: All tests PASS |
|
||||||
|
|
||||||
- [ ] **Step 2: Run Streamlit app to verify UI** |
|
||||||
|
|
||||||
Run: `uv run streamlit run explorer.py` |
|
||||||
Expected: App starts without errors |
|
||||||
|
|
||||||
- [ ] **Step 3: Manually verify in UI** |
|
||||||
- Open SVD Components tab |
|
||||||
- Check that components 1-10 show correct labels |
|
||||||
- Check that party position charts render for all components |
|
||||||
- Check that right parties appear on the right side of axes |
|
||||||
|
|
||||||
- [ ] **Step 4: Final commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add -A |
|
||||||
git commit -m "feat: complete SVD label unification" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Summary |
|
||||||
|
|
||||||
This plan creates a unified label system for SVD components with: |
|
||||||
1. Single source of truth in `SVD_THEMES` |
|
||||||
2. Automatic flip direction computation based on party centroids |
|
||||||
3. 1D party position charts for components 3-10 |
|
||||||
4. Consistent labels across compass, trajectory, and SVD Components views |
|
||||||
@ -1,298 +0,0 @@ |
|||||||
--- |
|
||||||
date: 2026-03-29 |
|
||||||
topic: "Honest PCA Axis Classification" |
|
||||||
status: validated |
|
||||||
--- |
|
||||||
|
|
||||||
# Axis Classification Design |
|
||||||
|
|
||||||
## Problem Statement |
|
||||||
|
|
||||||
The political compass always labels its X-axis "Links–Rechts" and Y-axis "Progressief–Conservatief" |
|
||||||
regardless of what the PCA actually found. In coalition years, the first principal component captures |
|
||||||
**coalition membership**, not ideology. The dominant axis of voting variation in Rutte II (VVD+PvdA) |
|
||||||
and Rutte III/IV (VVD+CDA+D66+CU) is "are you in the governing coalition?" PvdA and PVV end up at the |
|
||||||
same position because both were in opposition — technically correct voting similarity, but the label |
|
||||||
"Links–Rechts" is a lie. |
|
||||||
|
|
||||||
The fix: after each PCA, validate what the axes actually capture by correlating party positions against |
|
||||||
a small reference dataset of known ideological scores. Assign labels honestly. |
|
||||||
|
|
||||||
## Constraints |
|
||||||
|
|
||||||
- No changes to the PCA computation itself (`compute_2d_axes` is unchanged) |
|
||||||
- No new runtime dependencies (scipy is already optional; pandas is already present) |
|
||||||
- `party_ideologies.csv` and `coalition_membership.csv` are static data files — not derived from the DB |
|
||||||
- Backward-compatible: the compass still renders even when reference files are missing (falls back to |
|
||||||
current hardcoded labels silently) |
|
||||||
|
|
||||||
## Approach |
|
||||||
|
|
||||||
Reference-validated PCA with dynamic labeling. For each time window, correlate the per-party PCA |
|
||||||
positions against known ideological scores. Assign a label based on which correlation is strongest. |
|
||||||
Surface the finding as a one-line caption in the UI when the axis diverges from "Links–Rechts". |
|
||||||
|
|
||||||
Rejected alternatives: |
|
||||||
- **Fixed anchor compass**: replaces honest complexity with comfortable fiction; loses behavioral |
|
||||||
information entirely |
|
||||||
- **Dual view (behavioral + ideological)**: too much UI complexity for V1; can be done later |
|
||||||
|
|
||||||
## Architecture Overview |
|
||||||
|
|
||||||
A thin axis classification layer sits between `compute_2d_axes` (unchanged) and the compass UI. |
|
||||||
|
|
||||||
``` |
|
||||||
compute_2d_axes() |
|
||||||
↓ |
|
||||||
positions_by_window + axes dict |
|
||||||
↓ |
|
||||||
classify_axes(positions_by_window, axes, db_path) |
|
||||||
↓ |
|
||||||
axes dict enriched with: |
|
||||||
- x_label, y_label (global, most-common label across annual windows) |
|
||||||
- x_quality (dict: window_id → float, max |r|) |
|
||||||
- y_quality (dict: window_id → float, max |r|) |
|
||||||
- x_interpretation (dict: window_id → Dutch str) |
|
||||||
- y_interpretation (dict: window_id → Dutch str) |
|
||||||
↓ |
|
||||||
compass renderer uses labels + per-year quality captions |
|
||||||
``` |
|
||||||
|
|
||||||
## Components |
|
||||||
|
|
||||||
### 1. Reference data files |
|
||||||
|
|
||||||
**`data/party_ideologies.csv`** |
|
||||||
|
|
||||||
One row per party. Party names must match entity IDs in the `svd_vectors` table exactly. |
|
||||||
|
|
||||||
``` |
|
||||||
party,left_right,progressive |
|
||||||
VVD,0.65,0.10 |
|
||||||
PvdA,-0.70,0.75 |
|
||||||
SP,-0.90,0.50 |
|
||||||
CDA,0.25,-0.45 |
|
||||||
D66,-0.10,0.85 |
|
||||||
GroenLinks,-0.70,0.90 |
|
||||||
GL,-0.70,0.90 |
|
||||||
GroenLinks-PvdA,-0.70,0.82 |
|
||||||
ChristenUnie,0.10,-0.55 |
|
||||||
SGP,0.35,-0.95 |
|
||||||
PVV,0.90,-0.50 |
|
||||||
DENK,-0.40,0.55 |
|
||||||
50Plus,-0.05,-0.10 |
|
||||||
FVD,0.90,-0.75 |
|
||||||
PvdD,-0.60,0.85 |
|
||||||
Volt,-0.20,0.80 |
|
||||||
JA21,0.70,-0.30 |
|
||||||
BBB,0.50,-0.35 |
|
||||||
NSC,0.20,-0.20 |
|
||||||
Nieuw Sociaal Contract,0.20,-0.20 |
|
||||||
BVNL,0.85,-0.55 |
|
||||||
Bij1,-0.90,0.90 |
|
||||||
``` |
|
||||||
|
|
||||||
Scores: left_right = −1 (far left) to +1 (far right). progressive = −1 (conservative) to +1 (progressive). |
|
||||||
These are expert judgments based on party programs and voting records, not derived algorithmically. |
|
||||||
|
|
||||||
**`data/coalition_membership.csv`** |
|
||||||
|
|
||||||
One row per (window_id, party) where that party held a government seat. Annual windows only; quarterly |
|
||||||
windows inherit from their year. |
|
||||||
|
|
||||||
``` |
|
||||||
window_id,party |
|
||||||
2012,VVD |
|
||||||
2012,PvdA |
|
||||||
2013,VVD |
|
||||||
2013,PvdA |
|
||||||
2014,VVD |
|
||||||
2014,PvdA |
|
||||||
2015,VVD |
|
||||||
2015,PvdA |
|
||||||
2016,VVD |
|
||||||
2016,PvdA |
|
||||||
2017,VVD |
|
||||||
2017,CDA |
|
||||||
2017,D66 |
|
||||||
2017,ChristenUnie |
|
||||||
2018,VVD |
|
||||||
2018,CDA |
|
||||||
2018,D66 |
|
||||||
2018,ChristenUnie |
|
||||||
2019,VVD |
|
||||||
2019,CDA |
|
||||||
2019,D66 |
|
||||||
2019,ChristenUnie |
|
||||||
2020,VVD |
|
||||||
2020,CDA |
|
||||||
2020,D66 |
|
||||||
2020,ChristenUnie |
|
||||||
2021,VVD |
|
||||||
2021,CDA |
|
||||||
2021,D66 |
|
||||||
2021,ChristenUnie |
|
||||||
2022,VVD |
|
||||||
2022,D66 |
|
||||||
2022,CDA |
|
||||||
2022,ChristenUnie |
|
||||||
2023,VVD |
|
||||||
2023,D66 |
|
||||||
2023,CDA |
|
||||||
2023,ChristenUnie |
|
||||||
2024,PVV |
|
||||||
2024,VVD |
|
||||||
2024,NSC |
|
||||||
2024,BBB |
|
||||||
2025,PVV |
|
||||||
2025,VVD |
|
||||||
2025,NSC |
|
||||||
2025,BBB |
|
||||||
2026,PVV |
|
||||||
2026,VVD |
|
||||||
2026,NSC |
|
||||||
2026,BBB |
|
||||||
``` |
|
||||||
|
|
||||||
### 2. `analysis/axis_classifier.py` (new module) |
|
||||||
|
|
||||||
Single public function: `classify_axes(positions_by_window, axes, db_path)`. |
|
||||||
|
|
||||||
The function is pure except for reading two CSV files (cached module-level after first load). |
|
||||||
|
|
||||||
CSV paths are derived from `db_path`: `Path(db_path).parent / "party_ideologies.csv"` and |
|
||||||
`Path(db_path).parent / "coalition_membership.csv"`. Both files live in the same `data/` directory |
|
||||||
as the database. |
|
||||||
|
|
||||||
**Algorithm per window:** |
|
||||||
|
|
||||||
1. Collect parties that appear in both `positions_by_window[window_id]` and `party_ideologies.csv`. |
|
||||||
Skip windows with fewer than 5 overlapping parties. |
|
||||||
2. Build vectors: |
|
||||||
- `party_x`: per-party X positions from this window |
|
||||||
- `party_y`: per-party Y positions from this window |
|
||||||
- `ref_lr`: left_right scores from CSV |
|
||||||
- `ref_pc`: progressive scores from CSV |
|
||||||
- `coalition_dummy`: +1 if party is in government for this window's year, −1 otherwise |
|
||||||
(quarterly windows: strip suffix to get year, e.g., `2016-Q3` → `2016`) |
|
||||||
3. Compute Pearson r for X against each reference dimension: |
|
||||||
- `r_lr_x = pearsonr(party_x, ref_lr)[0]` |
|
||||||
- `r_pc_x = pearsonr(party_x, ref_pc)[0]` |
|
||||||
- `r_co_x = pearsonr(party_x, coalition_dummy)[0]` |
|
||||||
4. Assign label and interpretation using priority order (first threshold that fires wins): |
|
||||||
- `|r_lr_x| ≥ 0.65` → label = `"Links–Rechts"`, flip sign if r < 0 |
|
||||||
- `|r_co_x| ≥ 0.65` → label = `"Coalitie–Oppositie"` |
|
||||||
- `|r_pc_x| ≥ 0.65` → label = `"Progressief–Conservatief"`, flip sign if r < 0 |
|
||||||
- fallback → label = `"Stempatroon As 1"` |
|
||||||
5. Quality score for this window's X-axis: `max(|r_lr_x|, |r_pc_x|, |r_co_x|)` |
|
||||||
6. Repeat steps 3–5 for Y-axis using `party_y`. |
|
||||||
7. After processing all windows, pick global X label = modal label across annual windows only |
|
||||||
(quarterly windows participate in quality tracking but not in the modal vote, to avoid |
|
||||||
over-weighting). The `current_parliament` window is excluded from modal voting entirely and |
|
||||||
from the coalition dimension (no year to look up); it still gets x_quality and x_interpretation |
|
||||||
based on the left_right and progressive correlations. |
|
||||||
|
|
||||||
**Interpretation strings (Dutch):** |
|
||||||
|
|
||||||
| label | interpretation | |
|
||||||
|---|---| |
|
||||||
| Links–Rechts | "De horizontale as weerspiegelt de klassieke links-rechts tegenstelling." | |
|
||||||
| Coalitie–Oppositie | "De horizontale as weerspiegelt stemgedrag van coalitie- versus oppositiepartijen (r={r:.2f}). Links-rechts is minder dominant dit jaar." | |
|
||||||
| Progressief–Conservatief | "De horizontale as weerspiegelt de progressief-conservatieve tegenstelling." | |
|
||||||
| Stempatroon As 1 | "De horizontale as weerspiegelt een empirisch stempatroon zonder duidelijke ideologische richting." | |
|
||||||
|
|
||||||
Y-axis interpretations follow the same template with "verticale" instead of "horizontale". |
|
||||||
|
|
||||||
**Return value:** the input `axes` dict with four new keys added: |
|
||||||
`x_label`, `y_label`, `x_quality` (dict), `y_quality` (dict), `x_interpretation` (dict), |
|
||||||
`y_interpretation` (dict). |
|
||||||
|
|
||||||
### 3. `explorer.py` changes |
|
||||||
|
|
||||||
**`load_positions()`** — after calling `compute_2d_axes`, call `classify_axes` and store the enriched |
|
||||||
axes dict. If `classify_axes` raises for any reason, catch and log; use the original axes dict. |
|
||||||
|
|
||||||
**Compass renderer** — two changes only: |
|
||||||
1. Replace hardcoded `"Links–Rechts"` / `"Progressief–Conservatief"` axis title strings with |
|
||||||
`axes.get("x_label", "Links–Rechts")` and `axes.get("y_label", "Progressief–Conservatief")`. |
|
||||||
2. Add a caption below the compass for the selected year. Show when either axis quality < 0.65: |
|
||||||
> *"In 2016 weerspiegelt de horizontale as coalitie–oppositie stemgedrag (r=0.71)."* |
|
||||||
|
|
||||||
Source: `axes["x_interpretation"].get(selected_window_id, "")`. |
|
||||||
|
|
||||||
No other UI changes. The compass layout is untouched. |
|
||||||
|
|
||||||
## Data Flow |
|
||||||
|
|
||||||
``` |
|
||||||
load_positions(db_path, window_size) |
|
||||||
→ compute_2d_axes(...) [unchanged; returns positions_by_window, axes] |
|
||||||
→ classify_axes( [new] |
|
||||||
positions_by_window, |
|
||||||
axes, |
|
||||||
db_path=db_path |
|
||||||
) |
|
||||||
reads: data/party_ideologies.csv (module-level cache) |
|
||||||
reads: data/coalition_membership.csv (module-level cache) |
|
||||||
uses: positions_by_window already in memory |
|
||||||
writes: new keys into axes dict (no mutation of positions) |
|
||||||
→ return positions_by_window, axes_enriched |
|
||||||
|
|
||||||
compass render (existing function) |
|
||||||
→ axes["x_label"] [was hardcoded "Links–Rechts"] |
|
||||||
→ axes["y_label"] [was hardcoded "Progressief–Conservatief"] |
|
||||||
→ axes["x_interpretation"][window_id] [new caption] |
|
||||||
``` |
|
||||||
|
|
||||||
No DB writes. No new DB queries. Pure in-memory correlation over data that's already loaded. |
|
||||||
CSV reads are ~microseconds and cached after first call. |
|
||||||
|
|
||||||
## Error Handling |
|
||||||
|
|
||||||
| Failure | Behaviour | |
|
||||||
|---|---| |
|
||||||
| `data/party_ideologies.csv` missing | Log WARNING, return `axes` unchanged (current labels preserved) | |
|
||||||
| `data/coalition_membership.csv` missing | Log WARNING, coalition dimension skipped; other correlations still computed | |
|
||||||
| Party in positions but not in CSV | Skip silently; log once at DEBUG per session | |
|
||||||
| Window has fewer than 5 overlapping parties | Skip classification for that window; use fallback label | |
|
||||||
| All correlations < 0.65 | Fallback label is always safe; no crash | |
|
||||||
| Any unexpected exception in `classify_axes` | Caller (`load_positions`) catches, logs, returns original `axes` dict | |
|
||||||
|
|
||||||
## Testing Strategy |
|
||||||
|
|
||||||
Three new tests added to `tests/test_political_compass.py`: |
|
||||||
|
|
||||||
**`test_axis_label_left_right`** |
|
||||||
Construct synthetic per-party positions where X values correlate strongly (r > 0.8) with the left_right |
|
||||||
column of a minimal inline CSV. Assert that `classify_axes` returns `x_label == "Links–Rechts"` and |
|
||||||
`x_quality[window] > 0.65`. |
|
||||||
|
|
||||||
**`test_axis_label_coalition_dominant`** |
|
||||||
Construct synthetic positions where X values match coalition membership pattern but NOT left-right. |
|
||||||
(E.g., coalition parties [VVD, PvdA] cluster at x=+1, opposition [PVV, SP] at x=−1, which is |
|
||||||
historically coherent for 2016.) Assert `x_label == "Coalitie–Oppositie"` and that the interpretation |
|
||||||
string contains "coalitie". |
|
||||||
|
|
||||||
**`test_axis_classifier_missing_csv`** |
|
||||||
Call `classify_axes` with a db_path pointing to a nonexistent directory so CSV loading fails. Assert |
|
||||||
that the function returns the axes dict unchanged and does not raise. |
|
||||||
|
|
||||||
All three tests use monkeypatching to inject CSV content as in-memory StringIO, following the existing |
|
||||||
pattern in `tests/test_political_compass.py` of patching module-level imports. |
|
||||||
|
|
||||||
## Deployment |
|
||||||
|
|
||||||
The CSV files (`data/party_ideologies.csv` and `data/coalition_membership.csv`) are **static reference |
|
||||||
data committed to git**. They are baked into the Docker image at build time alongside the application |
|
||||||
code. No rsync or volume mount is needed. |
|
||||||
|
|
||||||
The `.gitignore` excludes `data/*.db`, `data/*.bak`, `data/*.json` but not `data/*.csv`, so they can |
|
||||||
be tracked without change to the ignore rules. The data volume mount (`DATA_DIR:/home/app/app/data`) |
|
||||||
only contains the database file and does not overwrite the baked-in CSVs. |
|
||||||
|
|
||||||
When party compositions change (e.g., a new party enters parliament), update the CSV, commit, and |
|
||||||
redeploy. Typical frequency: once per parliament formation (~4 years). |
|
||||||
|
|
||||||
## Open Questions |
|
||||||
|
|
||||||
None. |
|
||||||
@ -1,219 +0,0 @@ |
|||||||
--- |
|
||||||
date: 2026-03-29 |
|
||||||
topic: "Motion-Driven Axis Labeling for Political Compass" |
|
||||||
status: validated |
|
||||||
--- |
|
||||||
|
|
||||||
# Motion-Driven Axis Labeling |
|
||||||
|
|
||||||
## Problem Statement |
|
||||||
|
|
||||||
The current axis labeling in `analysis/axis_classifier.py` correlates per-party PCA |
|
||||||
positions against static scores from `data/party_ideologies.csv`. This has three |
|
||||||
failure modes: |
|
||||||
|
|
||||||
1. **Mislabeling**: When the dominant PCA axis is coalition/opposition rather than |
|
||||||
left-right, it gets labeled "Links-Rechts" anyway, making the compass look "rotated |
|
||||||
90 degrees". |
|
||||||
2. **Static reference**: A fixed ideology CSV cannot reflect year-specific political |
|
||||||
dynamics (e.g., asylum being the main left-right issue in 2015 vs. housing in 2023). |
|
||||||
3. **No explainability**: Users cannot see *why* an axis got a particular label. |
|
||||||
|
|
||||||
The fix is to derive labels from the **actual motions** that most strongly split |
|
||||||
parliament on each PCA axis in a given year, and to expose those motions to users. |
|
||||||
|
|
||||||
## Constraints |
|
||||||
|
|
||||||
- Must not break existing 8 passing tests. |
|
||||||
- Must remain DuckDB-only for data access (no new external files for primary path). |
|
||||||
- `party_ideologies.csv` and `coalition_membership.csv` remain as fallbacks — not |
|
||||||
removed. |
|
||||||
- The labeling approximation (projecting motion vectors without full Procrustes |
|
||||||
alignment) is acceptable for v1. Proper alignment can be added later. |
|
||||||
- Labels must still be deterministic given the same DB state. |
|
||||||
|
|
||||||
## Approach |
|
||||||
|
|
||||||
**Primary**: For each window, load motion SVD vectors from the DB, project them onto |
|
||||||
the PCA axes, rank motions by projection score, apply a Dutch keyword classifier to the |
|
||||||
top motion titles, and derive a categorical label. |
|
||||||
|
|
||||||
**Fallback chain** (unchanged from today): |
|
||||||
1. Keyword classifier on top motions → categorical label |
|
||||||
2. Coalition correlation (existing `_pearsonr` against coalition dummy) |
|
||||||
3. Ideology CSV correlation (existing Pearson-r against `party_ideologies.csv`) |
|
||||||
4. "Stempatroon As N" (generic fallback) |
|
||||||
|
|
||||||
**Axis swap**: After classification, if Y-axis is "Links-Rechts" and X-axis is not, |
|
||||||
swap them (both positions and all axis metadata), so that left-right is conventionally |
|
||||||
on the horizontal axis when present. |
|
||||||
|
|
||||||
## Architecture |
|
||||||
|
|
||||||
### Changes by file |
|
||||||
|
|
||||||
#### `analysis/political_axis.py` (minimal) |
|
||||||
- Add `axes["global_mean"] = M.mean(axis=0)` before returning from `compute_2d_axes`. |
|
||||||
This lets `classify_axes` center motion vectors before projection without needing to |
|
||||||
re-access the stacked matrix. |
|
||||||
|
|
||||||
#### `analysis/axis_classifier.py` (major) |
|
||||||
|
|
||||||
New private helpers: |
|
||||||
- `_load_motion_vectors(db_path, window_id)` → `dict[int, np.ndarray]` |
|
||||||
- SELECT entity_id, vector FROM svd_vectors WHERE entity_type='motion' AND window_id=? |
|
||||||
- Returns {motion_id: vector}. Returns {} on any DB error. |
|
||||||
- `_project_motions(motion_vecs, x_axis, y_axis, global_mean)` → `dict[int, tuple[float, float]]` |
|
||||||
- For each motion: `x = dot(vec - global_mean, x_axis)`, `y = dot(vec - global_mean, y_axis)` |
|
||||||
- Returns {motion_id: (x_score, y_score)} |
|
||||||
- `_top_motion_ids(projections, axis, n=5)` → `{'+': [ids], '-': [ids]}` |
|
||||||
- Sorts by axis score, returns top n positive and n negative motion IDs |
|
||||||
- `_fetch_motion_titles(db_path, motion_ids)` → `dict[int, tuple[str, str]]` |
|
||||||
- SELECT id, title, date FROM motions WHERE id IN (...) |
|
||||||
- Returns {id: (title, date_str)} |
|
||||||
- `_classify_from_titles(titles)` → `str | None` |
|
||||||
- Applies keyword dict against concatenated titles of top motions |
|
||||||
- Returns category string or None if confidence below threshold (0.4) |
|
||||||
|
|
||||||
New module-level constant: |
|
||||||
- `_KEYWORDS: dict[str, list[str]]` — Dutch keyword → category mapping (see below) |
|
||||||
|
|
||||||
Modified `classify_axes`: |
|
||||||
1. Check if `axes` contains `global_mean`; if not, skip motion classification. |
|
||||||
2. For each window W: |
|
||||||
a. Load motion vectors |
|
||||||
b. Project onto x_axis, y_axis using global_mean |
|
||||||
c. Find top 5+5 motions per axis |
|
||||||
d. Fetch titles from motions table |
|
||||||
e. Apply keyword classifier → label candidate |
|
||||||
f. If None: fall through to existing Pearson-r approaches |
|
||||||
3. Store `x_top_motions` and `y_top_motions` per window in enriched dict |
|
||||||
4. Store `x_label_confidence` and `y_label_confidence` per window |
|
||||||
|
|
||||||
#### `explorer.py` (two changes) |
|
||||||
|
|
||||||
1. **Axis swap** in `load_positions`, after `classify_axes` returns: |
|
||||||
``` |
|
||||||
if axis_def.get("y_label") == "Links–Rechts" and axis_def.get("x_label") != "Links–Rechts": |
|
||||||
positions_by_window, axis_def = _swap_axes(positions_by_window, axis_def) |
|
||||||
``` |
|
||||||
`_swap_axes` transposes (x, y) in every entity position and swaps all x_*/y_* |
|
||||||
keys in axis_def. |
|
||||||
|
|
||||||
2. **Motion expander** in `build_compass_tab`, below `st.plotly_chart`: |
|
||||||
``` |
|
||||||
with st.expander("🔍 Wat bepaalt deze assen?"): |
|
||||||
# show top 3 +/- motions for x and y, with date |
|
||||||
# show confidence and explained variance for this window |
|
||||||
``` |
|
||||||
|
|
||||||
## Data Flow |
|
||||||
|
|
||||||
``` |
|
||||||
compute_2d_axes(db_path, windows) |
|
||||||
→ (positions_by_window, axes) # axes now contains global_mean |
|
||||||
|
|
||||||
classify_axes(positions_by_window, axes, db_path) |
|
||||||
→ axis_def # now contains x/y_top_motions, confidence |
|
||||||
|
|
||||||
load_positions (in explorer.py) |
|
||||||
→ swap axes if y_label == "Links–Rechts" |
|
||||||
→ return (positions_by_window, axis_def) |
|
||||||
|
|
||||||
build_compass_tab |
|
||||||
→ scatter chart (uses x_label, y_label — already wired) |
|
||||||
→ expander (uses x_top_motions, y_top_motions) |
|
||||||
``` |
|
||||||
|
|
||||||
## Keyword Dictionary |
|
||||||
|
|
||||||
Categories and representative terms (non-exhaustive; full dict in implementation): |
|
||||||
|
|
||||||
**Links-Rechts** |
|
||||||
- Economic: `belasting`, `uitkering`, `bijstand`, `minimumloon`, `cao`, `vakbond`, |
|
||||||
`bezuiniging`, `privatisering`, `subsidie`, `zorg`, `pensioen`, `AOW` |
|
||||||
- Immigration: `asiel`, `asielaanvraag`, `migratie`, `vreemdeling`, `vluchtelingen`, |
|
||||||
`terugkeer`, `grenzen`, `opvang`, `statushouder` |
|
||||||
|
|
||||||
**Progressief-Conservatief** |
|
||||||
- Environment: `klimaat`, `stikstof`, `duurzaam`, `duurzaamheid`, `co2`, |
|
||||||
`energietransitie`, `biodiversiteit` |
|
||||||
- Social: `euthanasie`, `abortus`, `lgbtq`, `transgender`, `diversiteit`, `traditi`, |
|
||||||
`gezin`, `religie`, `geloof` |
|
||||||
|
|
||||||
**Coalitie-Oppositie** (detected via coalition correlation, not keywords — keyword |
|
||||||
detection for this category is unreliable) |
|
||||||
|
|
||||||
**Nationaal-Internationaal** (optional, lower priority) |
|
||||||
- `navo`, `nato`, `europees`, `europese`, `eu`, `verdrag`, `vn`, `internationaal` |
|
||||||
|
|
||||||
Matching: case-insensitive substring match on lowercased title. Score = fraction of |
|
||||||
top-10 motions containing at least one keyword from the winning category. Threshold |
|
||||||
for acceptance = 0.4 (i.e., at least 4 out of 10 top motions match). |
|
||||||
|
|
||||||
## New `axis_def` Fields |
|
||||||
|
|
||||||
``` |
|
||||||
x_top_motions: {window_id: {'+': [(title, date), ...], '-': [(title, date), ...]}} |
|
||||||
y_top_motions: same structure |
|
||||||
x_label_confidence: {window_id: float} # 0.0–1.0 |
|
||||||
y_label_confidence: {window_id: float} |
|
||||||
global_mean: np.ndarray # stored in axes dict, not surfaced to UI |
|
||||||
``` |
|
||||||
|
|
||||||
Existing fields (`x_label`, `y_label`, `x_quality`, `y_quality`, `x_interpretation`, |
|
||||||
`y_interpretation`) are preserved. |
|
||||||
|
|
||||||
## UI Display (Option C) |
|
||||||
|
|
||||||
**Axis titles**: unchanged — already uses `axis_def.get("x_label")`. |
|
||||||
|
|
||||||
**New expander** (collapsed by default) below compass scatter: |
|
||||||
``` |
|
||||||
🔍 Wat bepaalt deze assen? |
|
||||||
|
|
||||||
Horizontale as: Links–Rechts (vertrouwen: 70%) |
|
||||||
Rechtspool: Motie over asielbeleid (2023-11-14) · Motie over belastingverlaging (2023-10-05) ... |
|
||||||
Linkspool: Motie over uitkeringen (2023-11-20) · Motie over minimumloon (2023-09-12) ... |
|
||||||
|
|
||||||
Verticale as: Progressief–Conservatief (vertrouwen: 55%) |
|
||||||
Progressief: Motie over klimaatdoelen (2023-12-01) ... |
|
||||||
Conservatief: Motie over tradities (2023-10-18) ... |
|
||||||
|
|
||||||
As 1 verklaart 11% van de variantie in stemgedrag. |
|
||||||
``` |
|
||||||
|
|
||||||
## Error Handling |
|
||||||
|
|
||||||
| Situation | Behavior | |
|
||||||
|---|---| |
|
||||||
| No motion vectors for window | Skip motion classification; fall through to ideology CSV | |
|
||||||
| Motion title fetch fails | Use motion IDs as placeholder; label falls back | |
|
||||||
| Keyword confidence below threshold | Fall through to coalition correlation | |
|
||||||
| Both motion and CSV classification fail | "Stempatroon As N" (existing) | |
|
||||||
| `global_mean` missing from axes | Skip motion projection entirely | |
|
||||||
|
|
||||||
## Testing Strategy |
|
||||||
|
|
||||||
New unit tests (in `tests/test_political_compass.py`): |
|
||||||
- `test_classify_from_titles_left_right` — mock titles with `asiel`/`belasting` → expect "Links–Rechts" |
|
||||||
- `test_classify_from_titles_progressive` — mock titles with `klimaat`/`stikstof` → expect "Progressief–Conservatief" |
|
||||||
- `test_classify_from_titles_low_confidence` — mixed keywords → expect None (fallback triggered) |
|
||||||
- `test_axis_swap_when_y_is_left_right` — positions (x,y) → (y,x), labels swapped |
|
||||||
- `test_axis_swap_not_applied_when_x_is_left_right` — no swap when already correct |
|
||||||
|
|
||||||
All 8 existing tests must continue to pass. |
|
||||||
|
|
||||||
## Out of Scope |
|
||||||
|
|
||||||
**Explained variance drop (18% → 11%)**: Observed but not addressed here. Likely |
|
||||||
reflects genuine fragmentation of the Schoof parliament (4 smaller coalition parties). |
|
||||||
Warrants a separate diagnostic session. The expander now surfaces the explained |
|
||||||
variance, making this visible to users. |
|
||||||
|
|
||||||
**Proper Procrustes alignment of motion vectors**: The projection approximation |
|
||||||
(ignoring per-window rotation) is acceptable for v1. If label instability is observed |
|
||||||
across windows, add rotation application as a follow-up. |
|
||||||
|
|
||||||
**Removing `party_ideologies.csv`**: Kept as fallback. Can be removed once motion |
|
||||||
classification has proven reliable over several parliament periods. |
|
||||||
@ -1,177 +0,0 @@ |
|||||||
# SVD Label Unification Design |
|
||||||
|
|
||||||
## Goal |
|
||||||
|
|
||||||
Unify SVD component labels into a single source of truth (`SVD_THEMES`) and automatically compute axis flip directions so right-wing parties consistently appear on the right side of all SVD component axes. |
|
||||||
|
|
||||||
## Background |
|
||||||
|
|
||||||
Currently there are two separate label systems: |
|
||||||
1. `SVD_THEMES` in `explorer.py` - defines labels for all 10 SVD components with detailed explanations |
|
||||||
2. `_LABELS` in `axis_classifier.py` - defines short labels for the classifier (lr, eu, pi, co, pc) |
|
||||||
|
|
||||||
This causes duplication and potential inconsistency. Additionally, flip values are hardcoded in `SVD_THEMES` rather than computed from actual party positions. |
|
||||||
|
|
||||||
## Design |
|
||||||
|
|
||||||
### Single Source of Truth |
|
||||||
|
|
||||||
`SVD_THEMES` in `explorer.py` remains the canonical definition for all SVD component labels. A new shared module `analysis/svd_labels.py` will: |
|
||||||
|
|
||||||
1. Import `SVD_THEMES` from explorer.py (at runtime to avoid circular imports) |
|
||||||
2. Provide helper functions to derive labels for any component |
|
||||||
3. Compute flip direction automatically based on party centroids |
|
||||||
|
|
||||||
### New Module: `analysis/svd_labels.py` |
|
||||||
|
|
||||||
```python |
|
||||||
"""Unified SVD component labels and automatic flip direction computation.""" |
|
||||||
|
|
||||||
# Canonical party sets for orientation |
|
||||||
RIGHT_PARTIES = { |
|
||||||
"PVV", "VVD", "FVD", "BBB", "JA21", |
|
||||||
"Nieuw Sociaal Contract", "SGP", "CDA", "ChristenUnie" |
|
||||||
} |
|
||||||
LEFT_PARTIES = { |
|
||||||
"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", |
|
||||||
"DENK", "PvdD", "Volt" |
|
||||||
} |
|
||||||
|
|
||||||
def get_svd_label(component: int) -> str: |
|
||||||
"""Get short label for SVD component (e.g., 'EU-integratie–Nationalisme').""" |
|
||||||
|
|
||||||
def get_svd_theme(component: int) -> dict: |
|
||||||
"""Get full theme dict for SVD component.""" |
|
||||||
|
|
||||||
def compute_flip_direction(component: int, party_scores: dict) -> bool: |
|
||||||
""" |
|
||||||
Compute flip so right parties appear on the right side. |
|
||||||
|
|
||||||
Args: |
|
||||||
component: SVD component number (1-indexed) |
|
||||||
party_scores: {party_name: [score_comp1, score_comp2, ...]} |
|
||||||
|
|
||||||
Returns: |
|
||||||
True if axis should be flipped so right parties are on right. |
|
||||||
""" |
|
||||||
# Get scores for this component (0-indexed internally) |
|
||||||
idx = component - 1 |
|
||||||
right_scores = [scores[idx] for party, scores in party_scores.items() |
|
||||||
if party in RIGHT_PARTIES and len(scores) > idx] |
|
||||||
left_scores = [scores[idx] for party, scores in party_scores.items() |
|
||||||
if party in LEFT_PARTIES and len(scores) > idx] |
|
||||||
|
|
||||||
if not right_scores or not left_scores: |
|
||||||
return False # Default: no flip if insufficient data |
|
||||||
|
|
||||||
right_mean = sum(right_scores) / len(right_scores) |
|
||||||
left_mean = sum(left_scores) / len(left_scores) |
|
||||||
|
|
||||||
# Flip if right parties have lower mean (they're on the left) |
|
||||||
return right_mean < left_mean |
|
||||||
|
|
||||||
def get_fallback_labels() -> tuple[str, str]: |
|
||||||
"""Get fallback labels for x and y axes (components 1 and 2).""" |
|
||||||
return (get_svd_label(1), get_svd_label(2)) |
|
||||||
``` |
|
||||||
|
|
||||||
### Changes to `analysis/axis_classifier.py` |
|
||||||
|
|
||||||
1. Remove hardcoded `_LABELS` dict |
|
||||||
2. Import from `svd_labels.py` at runtime (to avoid circular imports) |
|
||||||
3. Update `display_label_for_modal` to derive from SVD_THEMES |
|
||||||
|
|
||||||
```python |
|
||||||
# Remove: |
|
||||||
_LABELS = { |
|
||||||
"lr": "Verzorgingsstaat–Marktwerking", |
|
||||||
"eu": "EU-integratie–Nationalisme", |
|
||||||
... |
|
||||||
} |
|
||||||
|
|
||||||
# Add: |
|
||||||
def _get_svd_labels(): |
|
||||||
"""Lazy import to avoid circular dependency.""" |
|
||||||
from analysis.svd_labels import get_svd_label, get_fallback_labels |
|
||||||
return get_fallback_labels() |
|
||||||
|
|
||||||
def display_label_for_modal(modal_label: Optional[str], axis: str) -> str: |
|
||||||
"""Return a user-facing axis label for a modal/internal label.""" |
|
||||||
from analysis.svd_labels import get_svd_label |
|
||||||
|
|
||||||
if modal_label is None: |
|
||||||
# Fallback to component 1 (x) or 2 (y) |
|
||||||
comp = 1 if axis == "x" else 2 |
|
||||||
return get_svd_label(comp) |
|
||||||
|
|
||||||
# Map "As 1" / "As 2" to semantic labels |
|
||||||
if axis == "x" and modal_label in ("As 1", "Stempatroon As 1"): |
|
||||||
return get_svd_label(1) |
|
||||||
if axis == "y" and modal_label in ("As 2", "Stempatroon As 2"): |
|
||||||
return get_svd_label(2) |
|
||||||
|
|
||||||
return modal_label |
|
||||||
``` |
|
||||||
|
|
||||||
### Changes to `explorer.py` |
|
||||||
|
|
||||||
1. Keep `SVD_THEMES` as canonical source |
|
||||||
2. Remove hardcoded fallback labels in `_build_political_compass_figure` |
|
||||||
3. Use `svd_labels.py` for all label lookups |
|
||||||
4. Add party position charts for components 3-10 in SVD Components tab |
|
||||||
|
|
||||||
#### Party Position Charts for All Components |
|
||||||
|
|
||||||
Currently, `_render_party_axis_chart` only works for components 1 and 2 (which have 2D coords). For components 3-10, we need to show 1D party positions: |
|
||||||
|
|
||||||
```python |
|
||||||
def _render_party_axis_chart_1d(party_coords: dict, component: int, theme: dict, bootstrap_data: dict = None): |
|
||||||
"""Render a 1D party position chart for a single SVD component.""" |
|
||||||
# Extract scores for this component |
|
||||||
# Show parties on a horizontal axis |
|
||||||
# Use theme['label'] for axis title |
|
||||||
# Use theme['positive_pole'] and theme['negative_pole'] for annotations |
|
||||||
``` |
|
||||||
|
|
||||||
### Auto-compute Flip Values |
|
||||||
|
|
||||||
On app startup or when loading SVD data: |
|
||||||
1. Load party scores for all components |
|
||||||
2. Compute flip direction for each component using `compute_flip_direction` |
|
||||||
3. Update `SVD_THEMES[component]["flip"]` dynamically OR store precomputed values |
|
||||||
|
|
||||||
### Files Modified |
|
||||||
|
|
||||||
| File | Changes | |
|
||||||
|------|---------| |
|
||||||
| `analysis/svd_labels.py` | **NEW** - Unified label system with auto-flip | |
|
||||||
| `analysis/axis_classifier.py` | Remove `_LABELS`, import from svd_labels | |
|
||||||
| `explorer.py` | Remove fallback labels, add 1D charts for components 3-10 | |
|
||||||
| `tests/test_axis_label_fallback.py` | Update to use new label system | |
|
||||||
| `tests/test_political_compass.py` | Update assertions for new labels | |
|
||||||
|
|
||||||
### Test Plan |
|
||||||
|
|
||||||
1. **Unit tests for `svd_labels.py`**: |
|
||||||
- `get_svd_label(component)` returns correct label |
|
||||||
- `compute_flip_direction` returns correct flip based on party scores |
|
||||||
- `get_fallback_labels()` returns tuple of component 1 and 2 labels |
|
||||||
|
|
||||||
2. **Integration tests**: |
|
||||||
- Compass plot uses correct labels from SVD_THEMES |
|
||||||
- Trajectory plot uses correct labels from SVD_THEMES |
|
||||||
- SVD Components tab shows party positions for all components |
|
||||||
- Right parties appear on right side of all axes |
|
||||||
|
|
||||||
3. **Regression tests**: |
|
||||||
- Existing tests pass with new label system |
|
||||||
- No circular import errors |
|
||||||
|
|
||||||
## Implementation Order |
|
||||||
|
|
||||||
1. Create `analysis/svd_labels.py` with helper functions |
|
||||||
2. Update `axis_classifier.py` to use svd_labels |
|
||||||
3. Update `explorer.py` to use svd_labels for fallback labels |
|
||||||
4. Add 1D party position charts for components 3-10 |
|
||||||
5. Update tests |
|
||||||
6. Verify flip directions are correct for all components |
|
||||||
@ -1,97 +0,0 @@ |
|||||||
# Voting Discipline Analysis |
|
||||||
|
|
||||||
## What is Voting Discipline (Rice Index)? |
|
||||||
|
|
||||||
The Rice index measures party cohesion during roll-call votes. For each motion, it calculates the fraction of party MPs who vote with the party majority. A score of 100% means all MPs voted the same way; 50% means the party was evenly split. |
|
||||||
|
|
||||||
**Formula:** `Rice = (|votes_for_majority| - |votes_against_majority|) / (|total_votes|)` |
|
||||||
|
|
||||||
Or equivalently: `Rice = fraction of MPs voting with party majority` |
|
||||||
|
|
||||||
## Typical Patterns in Dutch Parliament |
|
||||||
|
|
||||||
Based on the Rice index methodology, here's what voting discipline typically reveals: |
|
||||||
|
|
||||||
### High Discipline Parties (>95% cohesion) |
|
||||||
|
|
||||||
These parties vote as a unified bloc: |
|
||||||
|
|
||||||
- **PVV** - Typically shows very high discipline due to strong party discipline from leadership |
|
||||||
- **SGP** - Historically disciplined, small homogeneous membership |
|
||||||
- **DENK** - Tight-knit group with clear ideological positions |
|
||||||
- **FvD** - High discipline when party leadership is stable |
|
||||||
|
|
||||||
**Interpretation:** High discipline indicates: |
|
||||||
- Strong party whips |
|
||||||
- Homogeneous membership |
|
||||||
- Clear ideological positions |
|
||||||
- Leadership control over voting behavior |
|
||||||
|
|
||||||
### Moderate Discipline Parties (85-95% cohesion) |
|
||||||
|
|
||||||
- **VVD** - Generally disciplined but allows some dissent on social issues |
|
||||||
- **CDA** - Moderate discipline, allows conscience votes on ethical issues |
|
||||||
- **D66** - Generally disciplined on progressive issues, some variation on economic policy |
|
||||||
- **GroenLinks** - High discipline on environmental issues, moderate on economic policy |
|
||||||
|
|
||||||
### Lower Discipline Parties (<85% cohesion) |
|
||||||
|
|
||||||
- **PvdA** - Historically shows internal divisions between left and centrist factions |
|
||||||
- **SP** - Can show splits between pragmatic and ideological wings |
|
||||||
- **ChristenUnie** - Allows conscience votes on ethical issues |
|
||||||
- **Volt** - Newer party, may show variation as positions solidify |
|
||||||
|
|
||||||
**Interpretation:** Lower discipline can indicate: |
|
||||||
- Internal factional divisions |
|
||||||
- Allowance for conscience votes |
|
||||||
- Broad ideological tent |
|
||||||
- Decentralized decision-making |
|
||||||
|
|
||||||
## What Voting Discipline Tells Us |
|
||||||
|
|
||||||
### 1. Party Cohesion vs. Democratic Deliberation |
|
||||||
|
|
||||||
High discipline isn't inherently "good" or "bad": |
|
||||||
- **Pro:** Clear voter mandate, predictable policy positions |
|
||||||
- **Con:** Limited internal debate, suppressed minority views within party |
|
||||||
|
|
||||||
### 2. Coalition Dynamics |
|
||||||
|
|
||||||
Discipline patterns reveal coalition mechanics: |
|
||||||
- **Coalition parties** often show temporary discipline drops when supporting unpopular government policies |
|
||||||
- **Opposition parties** can vote more freely without government responsibility |
|
||||||
|
|
||||||
### 3. Issue-Based Splits |
|
||||||
|
|
||||||
Certain issues cause predictable discipline drops: |
|
||||||
- **Ethical issues** (euthanasia, abortion) - conscience votes allowed |
|
||||||
- **European integration** - splits traditional left-right alignments |
|
||||||
- **Immigration** - creates internal tensions in center parties |
|
||||||
|
|
||||||
### 4. Party Health Indicators |
|
||||||
|
|
||||||
- **Rising discipline** over time may indicate centralization or leadership consolidation |
|
||||||
- **Falling discipline** may indicate internal conflict, leadership challenges, or ideological realignment |
|
||||||
|
|
||||||
## Methodological Notes |
|
||||||
|
|
||||||
### Data Source |
|
||||||
- Uses individual MP votes from `mp_votes` table |
|
||||||
- Only counts 'voor' and 'tegen' votes (excludes absent/abstain) |
|
||||||
- Requires minimum 5 motions per party for statistical reliability |
|
||||||
|
|
||||||
### Limitations |
|
||||||
- Roll-call votes are a subset of all votes (may not be representative) |
|
||||||
- Strategic absence is not captured (MPs may skip controversial votes) |
|
||||||
- Party discipline varies by topic - aggregate scores hide issue-specific patterns |
|
||||||
|
|
||||||
## Recommendations for Further Analysis |
|
||||||
|
|
||||||
1. **Topic-specific discipline:** Calculate Rice index per policy area to see where parties are unified vs. divided |
|
||||||
2. **Temporal trends:** Track discipline over time to identify party evolution |
|
||||||
3. **Dissent networks:** Map which MPs consistently vote against their party |
|
||||||
4. **Coalition effects:** Compare discipline during coalition vs. opposition periods |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
*This analysis is based on the Rice index methodology implemented in `compute_party_discipline()` in `explorer.py`.* |
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,317 +0,0 @@ |
|||||||
"""Helper utilities used by explorer.py. |
|
||||||
|
|
||||||
Primary export: |
|
||||||
- compute_party_coords: compute per-party (x_mean, y_mean) from positions_by_window. |
|
||||||
|
|
||||||
This module is intentionally free of Streamlit side-effects to be easy to unit test. |
|
||||||
""" |
|
||||||
|
|
||||||
from __future__ import annotations |
|
||||||
|
|
||||||
import logging |
|
||||||
import math |
|
||||||
import re |
|
||||||
from typing import Any, Dict, List, Optional, Set, Tuple |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
|
|
||||||
logger = logging.getLogger(__name__) |
|
||||||
|
|
||||||
|
|
||||||
def normalize_positions( |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[Any, Any]]], |
|
||||||
clamp_abs_value: float = 1e3, |
|
||||||
null_tokens: tuple = ("nan", "NaN", "None", "none", "null", ""), |
|
||||||
) -> Dict[str, Dict[str, Tuple[float, float]]]: |
|
||||||
"""Normalize a positions_by_window structure. |
|
||||||
|
|
||||||
- Coerce numeric strings to floats. |
|
||||||
- Treat common null tokens and None as np.nan. |
|
||||||
- Decode bytes/bytearray if necessary (best-effort). |
|
||||||
- Clamp very large absolute values to [-clamp_abs_value, clamp_abs_value]. |
|
||||||
- Preserve entity keys; any uncoercible coords become (np.nan, np.nan). |
|
||||||
|
|
||||||
Returns a new positions_by_window mapping with floats or np.nan values. |
|
||||||
Pure and import-safe (no IO). |
|
||||||
""" |
|
||||||
|
|
||||||
def _coerce(val: Any) -> float: |
|
||||||
if val is None: |
|
||||||
return float(np.nan) |
|
||||||
if isinstance(val, (float, int, np.floating, np.integer)): |
|
||||||
v = float(val) |
|
||||||
if math.isnan(v) or math.isinf(v): |
|
||||||
return float(np.nan) |
|
||||||
if abs(v) > clamp_abs_value: |
|
||||||
return float(np.nan) |
|
||||||
return v |
|
||||||
if isinstance(val, (bytes, bytearray)): |
|
||||||
try: |
|
||||||
s = val.decode() |
|
||||||
except Exception: |
|
||||||
return float(np.nan) |
|
||||||
val = s |
|
||||||
if isinstance(val, str): |
|
||||||
s = val.strip() |
|
||||||
if s in null_tokens: |
|
||||||
return float(np.nan) |
|
||||||
try: |
|
||||||
v = float(s) |
|
||||||
except Exception: |
|
||||||
return float(np.nan) |
|
||||||
if math.isnan(v) or math.isinf(v): |
|
||||||
return float(np.nan) |
|
||||||
if abs(v) > clamp_abs_value: |
|
||||||
return float(np.nan) |
|
||||||
return v |
|
||||||
return float(np.nan) |
|
||||||
|
|
||||||
out: Dict[str, Dict[str, Tuple[float, float]]] = {} |
|
||||||
for wid, mapping in (positions_by_window or {}).items(): |
|
||||||
win_map: Dict[str, Tuple[float, float]] = {} |
|
||||||
if not mapping: |
|
||||||
out[wid] = win_map |
|
||||||
continue |
|
||||||
for ent, xy in mapping.items(): |
|
||||||
try: |
|
||||||
if xy is None: |
|
||||||
x_raw = y_raw = None |
|
||||||
else: |
|
||||||
x_raw = xy[0] if len(xy) > 0 else None |
|
||||||
y_raw = xy[1] if len(xy) > 1 else None |
|
||||||
except Exception: |
|
||||||
x_raw = y_raw = None |
|
||||||
x = _coerce(x_raw) |
|
||||||
y = _coerce(y_raw) |
|
||||||
win_map[ent] = (x, y) |
|
||||||
out[wid] = win_map |
|
||||||
return out |
|
||||||
|
|
||||||
|
|
||||||
def _strip_paren(s: str) -> str: |
|
||||||
# helper used in plan to try to strip parenthetical variants |
|
||||||
return s.split("(")[0].strip() |
|
||||||
|
|
||||||
|
|
||||||
def inspect_positions_for_issues( |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], |
|
||||||
party_map: Dict[str, str], |
|
||||||
) -> Dict[str, Any]: |
|
||||||
"""Inspect positions_by_window for simple issues/summary. |
|
||||||
|
|
||||||
Returns a dictionary with keys including the previous ones (windows_count, |
|
||||||
window_labels, mp_id_set, party_map_count, parties_with_centroid_counts, |
|
||||||
mismatched_mp_ids_sample) plus: |
|
||||||
- mp_positions_count: int (num unique MP ids seen) |
|
||||||
- mp_positions_sample: list[str] (sorted sample up to 10) |
|
||||||
- windows_with_no_positions: list[str] |
|
||||||
|
|
||||||
This helper remains pure and import-safe so unit tests can exercise it. |
|
||||||
""" |
|
||||||
windows = list(positions_by_window.keys()) |
|
||||||
windows_count = len(windows) |
|
||||||
window_labels = sorted(windows)[:10] |
|
||||||
|
|
||||||
mp_id_set: Set[str] = set() |
|
||||||
parties_with_centroid_counts: Dict[str, int] = {} |
|
||||||
mismatched: Set[str] = set() |
|
||||||
windows_with_no_positions: List[str] = [] |
|
||||||
|
|
||||||
for win, pos in positions_by_window.items(): |
|
||||||
if not pos: |
|
||||||
windows_with_no_positions.append(win) |
|
||||||
continue |
|
||||||
present_parties: Set[str] = set() |
|
||||||
for ent in pos.keys(): |
|
||||||
if not ent: |
|
||||||
continue |
|
||||||
mp_id_set.add(ent) |
|
||||||
party = party_map.get(ent) |
|
||||||
if party is None: |
|
||||||
# try stripping paren variant |
|
||||||
party = party_map.get(_strip_paren(ent)) |
|
||||||
if party: |
|
||||||
present_parties.add(party) |
|
||||||
else: |
|
||||||
mismatched.add(ent) |
|
||||||
|
|
||||||
for p in present_parties: |
|
||||||
parties_with_centroid_counts[p] = parties_with_centroid_counts.get(p, 0) + 1 |
|
||||||
|
|
||||||
mismatched_mp_ids_sample = sorted(list(mismatched))[:10] |
|
||||||
|
|
||||||
mp_positions_sample = sorted(list(mp_id_set))[:10] |
|
||||||
mp_positions_count = len(mp_id_set) |
|
||||||
|
|
||||||
return { |
|
||||||
"windows_count": windows_count, |
|
||||||
"window_labels": window_labels, |
|
||||||
"mp_id_set": mp_id_set, |
|
||||||
"party_map_count": len(party_map), |
|
||||||
"parties_with_centroid_counts": parties_with_centroid_counts, |
|
||||||
"mismatched_mp_ids_sample": mismatched_mp_ids_sample, |
|
||||||
"mp_positions_sample": mp_positions_sample, |
|
||||||
"mp_positions_count": mp_positions_count, |
|
||||||
"windows_with_no_positions": windows_with_no_positions, |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
def compute_party_coords( |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], |
|
||||||
party_map: Dict[str, str], |
|
||||||
window_id: str, |
|
||||||
fallback_party_scores: Optional[Dict[str, List[float]]] = None, |
|
||||||
) -> Tuple[Dict[str, Tuple[float, float]], Set[str]]: |
|
||||||
""" |
|
||||||
Compute per-party centroids (x_mean, y_mean) for a specific window. |
|
||||||
|
|
||||||
Args: |
|
||||||
positions_by_window: mapping window_id -> {entity_name: (x, y)} |
|
||||||
party_map: mapping mp_name -> party abbreviation (Normalized) |
|
||||||
window_id: which window to compute centroids for (key into positions_by_window) |
|
||||||
fallback_party_scores: optional mapping party -> numeric vector (len>=2). When a |
|
||||||
party has no MPs in the window and fallback_party_scores contains an entry, |
|
||||||
the first two elements of that vector will be used as a fallback (x,y). |
|
||||||
|
|
||||||
Returns: |
|
||||||
(party_coords, fallback_used) where: |
|
||||||
- party_coords: {party: (x_mean, y_mean)} for parties with a computed coord or fallback. |
|
||||||
- fallback_used: set of party names where fallback_party_scores was used. |
|
||||||
""" |
|
||||||
pos = positions_by_window.get(window_id, {}) or {} |
|
||||||
|
|
||||||
per_party: Dict[str, List[Tuple[float, float]]] = {} |
|
||||||
for ent, xy in pos.items(): |
|
||||||
if not ent or xy is None: |
|
||||||
continue |
|
||||||
try: |
|
||||||
x, y = float(xy[0]), float(xy[1]) |
|
||||||
except Exception: |
|
||||||
# skip malformed coords |
|
||||||
continue |
|
||||||
party = party_map.get(ent) |
|
||||||
if party is None: |
|
||||||
# try stripped name fallback |
|
||||||
party = party_map.get(_strip_paren(ent)) |
|
||||||
if not party or party == "Unknown": |
|
||||||
continue |
|
||||||
per_party.setdefault(party, []).append((x, y)) |
|
||||||
|
|
||||||
party_coords: Dict[str, Tuple[float, float]] = {} |
|
||||||
fallback_used: Set[str] = set() |
|
||||||
|
|
||||||
# compute means for parties that have MPs |
|
||||||
for party, coords in per_party.items(): |
|
||||||
xs = [c[0] for c in coords] |
|
||||||
ys = [c[1] for c in coords] |
|
||||||
# defensive: drop nan/inf |
|
||||||
xs = [float(x) for x in xs if not (math.isnan(x) or math.isinf(x))] |
|
||||||
ys = [float(y) for y in ys if not (math.isnan(y) or math.isinf(y))] |
|
||||||
if not xs or not ys: |
|
||||||
continue |
|
||||||
party_coords[party] = (float(np.mean(xs)), float(np.mean(ys))) |
|
||||||
|
|
||||||
# fallback: use supplied party vectors if a party has no MPs in this window |
|
||||||
if fallback_party_scores: |
|
||||||
for party, vec in fallback_party_scores.items(): |
|
||||||
if party in party_coords: |
|
||||||
continue |
|
||||||
if not vec: |
|
||||||
continue |
|
||||||
try: |
|
||||||
# vec may be list, np.array, etc. |
|
||||||
if len(vec) >= 2: |
|
||||||
x_f, y_f = float(vec[0]), float(vec[1]) |
|
||||||
if ( |
|
||||||
math.isnan(x_f) |
|
||||||
or math.isnan(y_f) |
|
||||||
or math.isinf(x_f) |
|
||||||
or math.isinf(y_f) |
|
||||||
): |
|
||||||
continue |
|
||||||
party_coords[party] = (x_f, y_f) |
|
||||||
fallback_used.add(party) |
|
||||||
except Exception: |
|
||||||
continue |
|
||||||
|
|
||||||
if fallback_used: |
|
||||||
logger.warning( |
|
||||||
"compute_party_coords used fallback for parties: %s", |
|
||||||
sorted(list(fallback_used)), |
|
||||||
) |
|
||||||
|
|
||||||
return party_coords, fallback_used |
|
||||||
|
|
||||||
|
|
||||||
def compute_party_centroids( |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], |
|
||||||
party_map: Dict[str, str], |
|
||||||
windows: List[str], |
|
||||||
) -> Tuple[Dict[str, List[Tuple[float, float]]], Dict[str, Any]]: |
|
||||||
"""Compute per-party centroids across multiple windows. |
|
||||||
|
|
||||||
Returns (party_centroids, metadata) |
|
||||||
- party_centroids: mapping party -> list of (x,y) tuples of length len(windows). |
|
||||||
Entries without MPs are (np.nan, np.nan). |
|
||||||
- metadata: dict with keys 'per_party_counts', 'total_windows', 'parties', |
|
||||||
'windows_with_data_count', 'input_party_map_entries', 'windows_processed', |
|
||||||
'parties_with_positions', 'parties_all_nan' |
|
||||||
""" |
|
||||||
# Initialize diagnostics metadata |
|
||||||
metadata = { |
|
||||||
"windows_with_data_count": len(positions_by_window) |
|
||||||
if positions_by_window |
|
||||||
else 0, |
|
||||||
"input_party_map_entries": len(party_map) if party_map else 0, |
|
||||||
"windows_processed": 0, |
|
||||||
"parties_with_positions": set(), |
|
||||||
"parties_all_nan": [], |
|
||||||
"per_party_counts": {}, |
|
||||||
"total_windows": len(windows), |
|
||||||
"parties": [], |
|
||||||
} |
|
||||||
|
|
||||||
party_centroids: Dict[str, List[Tuple[float, float]]] = {} |
|
||||||
# collect all parties from party_map values |
|
||||||
parties = sorted(set(party_map.values())) |
|
||||||
metadata["parties"] = parties |
|
||||||
|
|
||||||
# if no parties known, return empty dict but still metadata |
|
||||||
if not parties: |
|
||||||
return {}, metadata |
|
||||||
|
|
||||||
# initialize lists |
|
||||||
for p in parties: |
|
||||||
party_centroids[p] = [] |
|
||||||
|
|
||||||
# for each window, compute party coords using compute_party_coords for that window |
|
||||||
for w in windows: |
|
||||||
coords, _ = compute_party_coords(positions_by_window or {}, party_map, w) |
|
||||||
metadata["windows_processed"] += 1 |
|
||||||
for p in parties: |
|
||||||
if p in coords: |
|
||||||
# ensure numeric floats |
|
||||||
party_centroids[p].append((float(coords[p][0]), float(coords[p][1]))) |
|
||||||
metadata["parties_with_positions"].add(p) |
|
||||||
else: |
|
||||||
party_centroids[p].append((float(np.nan), float(np.nan))) |
|
||||||
|
|
||||||
# Convert set to list for JSON serialization |
|
||||||
metadata["parties_with_positions"] = sorted( |
|
||||||
list(metadata["parties_with_positions"]) |
|
||||||
) |
|
||||||
|
|
||||||
# metadata per-party counts |
|
||||||
for p, vals in party_centroids.items(): |
|
||||||
count = 0 |
|
||||||
for x, y in vals: |
|
||||||
if not (np.isnan(x) or np.isnan(y)): |
|
||||||
count += 1 |
|
||||||
metadata["per_party_counts"][p] = count |
|
||||||
|
|
||||||
# Check for parties with all NaN centroids |
|
||||||
for party, coords in party_centroids.items(): |
|
||||||
if all(np.isnan(x) and np.isnan(y) for x, y in coords): |
|
||||||
metadata["parties_all_nan"].append(party) |
|
||||||
|
|
||||||
return party_centroids, metadata |
|
||||||
@ -1,234 +0,0 @@ |
|||||||
#!/usr/bin/env python3 |
|
||||||
# HL REV:NEWFILE |
|
||||||
""" |
|
||||||
Automated probes for Trajectories tab diagnostics. |
|
||||||
|
|
||||||
This script runs several simulated scenarios to diagnose the trajectories tab: |
|
||||||
1. NORMAL: Uses real data from data/motions.db |
|
||||||
2. EMPTY_POSITIONS: Simulates load_positions returning empty (realistic failure mode) |
|
||||||
3. EMPTY_PARTY_MAP: Simulates load_party_map returning empty (realistic failure mode) |
|
||||||
4. BOTH_EMPTY: Simulates both returning empty (severe failure mode) |
|
||||||
|
|
||||||
Run: python scripts/diagnose_trajectories_cli.py |
|
||||||
""" |
|
||||||
|
|
||||||
import os |
|
||||||
import importlib |
|
||||||
import traceback |
|
||||||
import sys |
|
||||||
import json |
|
||||||
from datetime import datetime |
|
||||||
|
|
||||||
# Real DB path relative to project root |
|
||||||
DB_PATH = os.path.abspath( |
|
||||||
os.path.join(os.path.dirname(__file__), "..", "data", "motions.db") |
|
||||||
) |
|
||||||
|
|
||||||
|
|
||||||
def run(): |
|
||||||
os.environ.setdefault("EXPLORER_DEBUG_TRAJECTORIES", "1") |
|
||||||
# Ensure project root is on sys.path so 'import explorer' finds the module |
|
||||||
root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) |
|
||||||
if root not in sys.path: |
|
||||||
sys.path.insert(0, root) |
|
||||||
|
|
||||||
# Import explorer fresh so env var reads take effect |
|
||||||
import explorer |
|
||||||
|
|
||||||
# Results collection for all scenarios |
|
||||||
all_results = { |
|
||||||
"generated_at": datetime.now().isoformat(), |
|
||||||
"db_path": DB_PATH, |
|
||||||
"db_exists": os.path.exists(DB_PATH), |
|
||||||
"scenarios": {}, |
|
||||||
} |
|
||||||
|
|
||||||
def run_scenario( |
|
||||||
name, |
|
||||||
load_positions_ret=None, |
|
||||||
load_party_map_ret=None, |
|
||||||
select_helper_behavior=None, |
|
||||||
use_real_data=False, |
|
||||||
): |
|
||||||
print("\n" + "=" * 80) |
|
||||||
print("SCENARIO:", name) |
|
||||||
print("=" * 80) |
|
||||||
|
|
||||||
scenario_result = { |
|
||||||
"name": name, |
|
||||||
"use_real_data": use_real_data, |
|
||||||
"monkeypatched": {}, |
|
||||||
} |
|
||||||
|
|
||||||
# Clear previous diagnostics |
|
||||||
explorer._last_trajectories_diagnostics.clear() |
|
||||||
|
|
||||||
# Backup originals |
|
||||||
orig_load_positions = getattr(explorer, "load_positions", None) |
|
||||||
orig_load_party_map = getattr(explorer, "load_party_map", None) |
|
||||||
orig_select_helper = getattr(explorer, "select_trajectory_plot_data", None) |
|
||||||
|
|
||||||
# If using real data, call the real functions once to get actual values |
|
||||||
if use_real_data and os.path.exists(DB_PATH): |
|
||||||
print(f"Loading real data from: {DB_PATH}") |
|
||||||
try: |
|
||||||
real_positions = explorer.load_positions(DB_PATH, "annual") |
|
||||||
real_party_map = explorer.load_party_map(DB_PATH) |
|
||||||
print( |
|
||||||
f" Real load_positions: {len(real_positions[0]) if real_positions[0] else 0} windows" |
|
||||||
) |
|
||||||
print(f" Real party_map: {len(real_party_map)} entries") |
|
||||||
scenario_result["real_data"] = { |
|
||||||
"positions_windows": len(real_positions[0]) |
|
||||||
if real_positions[0] |
|
||||||
else 0, |
|
||||||
"party_map_count": len(real_party_map), |
|
||||||
} |
|
||||||
except Exception as e: |
|
||||||
print(f" ERROR loading real data: {e}") |
|
||||||
scenario_result["real_data_error"] = str(e) |
|
||||||
|
|
||||||
if load_positions_ret is not None: |
|
||||||
explorer.load_positions = lambda db, ws: load_positions_ret |
|
||||||
scenario_result["monkeypatched"]["load_positions"] = "ARTIFICIAL_EMPTY" |
|
||||||
print( |
|
||||||
" Monkeypatched: load_positions -> ARTIFICIAL_EMPTY (for comparison)" |
|
||||||
) |
|
||||||
|
|
||||||
if load_party_map_ret is not None: |
|
||||||
explorer.load_party_map = lambda db: load_party_map_ret |
|
||||||
scenario_result["monkeypatched"]["load_party_map"] = "ARTIFICIAL_EMPTY" |
|
||||||
print( |
|
||||||
" Monkeypatched: load_party_map -> ARTIFICIAL_EMPTY (for comparison)" |
|
||||||
) |
|
||||||
|
|
||||||
if select_helper_behavior == "raise": |
|
||||||
|
|
||||||
def raising(*args, **kwargs): |
|
||||||
raise ValueError("simulated crash from select_trajectory_plot_data") |
|
||||||
|
|
||||||
explorer.select_trajectory_plot_data = raising |
|
||||||
scenario_result["monkeypatched"]["select_helper"] = "RAISE_ERROR" |
|
||||||
print(" Monkeypatched: select_trajectory_plot_data -> RAISE_ERROR") |
|
||||||
|
|
||||||
elif select_helper_behavior == "zero_traces": |
|
||||||
|
|
||||||
class DummyFig: |
|
||||||
def __init__(self): |
|
||||||
self.data = [] |
|
||||||
|
|
||||||
def zero(*args, **kwargs): |
|
||||||
return DummyFig(), 0, None |
|
||||||
|
|
||||||
explorer.select_trajectory_plot_data = zero |
|
||||||
scenario_result["monkeypatched"]["select_helper"] = "ZERO_TRACES" |
|
||||||
print(" Monkeypatched: select_trajectory_plot_data -> ZERO_TRACES") |
|
||||||
|
|
||||||
try: |
|
||||||
# Call the UI function; it's import-safe and uses a dummy st when streamlit is absent |
|
||||||
explorer.build_trajectories_tab(db_path=DB_PATH, window_size="annual") |
|
||||||
except Exception as e: |
|
||||||
print("build_trajectories_tab RAISED:", type(e), e) |
|
||||||
print(traceback.format_exc()) |
|
||||||
scenario_result["exception"] = str(e) |
|
||||||
scenario_result["traceback"] = traceback.format_exc() |
|
||||||
|
|
||||||
# Capture diagnostics |
|
||||||
diag = getattr(explorer, "_last_trajectories_diagnostics", None) |
|
||||||
scenario_result["diagnostics"] = dict(diag) if diag else {} |
|
||||||
print("module _last_trajectories_diagnostics:", diag) |
|
||||||
|
|
||||||
sh = None |
|
||||||
if hasattr(explorer, "select_trajectory_plot_data"): |
|
||||||
sh = getattr( |
|
||||||
explorer.select_trajectory_plot_data, "_last_diagnostics", None |
|
||||||
) |
|
||||||
scenario_result["select_helper_diagnostics"] = dict(sh) if sh else {} |
|
||||||
print("select_helper _last_diagnostics:", sh) |
|
||||||
|
|
||||||
# restore originals |
|
||||||
if orig_load_positions is not None: |
|
||||||
explorer.load_positions = orig_load_positions |
|
||||||
if orig_load_party_map is not None: |
|
||||||
explorer.load_party_map = orig_load_party_map |
|
||||||
if orig_select_helper is not None: |
|
||||||
explorer.select_trajectory_plot_data = orig_select_helper |
|
||||||
|
|
||||||
all_results["scenarios"][name] = scenario_result |
|
||||||
|
|
||||||
# Scenario 1: NORMAL - Uses real data |
|
||||||
print("\n" + "=" * 80) |
|
||||||
print("SCENARIO 1: NORMAL (using real data)") |
|
||||||
print("=" * 80) |
|
||||||
run_scenario("normal", use_real_data=True) |
|
||||||
|
|
||||||
# Scenario 2: Empty positions (ARTIFICIAL - for comparison) |
|
||||||
print("\n" + "=" * 80) |
|
||||||
print("SCENARIO 2: EMPTY_POSITIONS (ARTIFICIAL - for comparison)") |
|
||||||
print("=" * 80) |
|
||||||
run_scenario( |
|
||||||
"empty_positions_ARTIFICIAL", |
|
||||||
load_positions_ret=({}, {}), |
|
||||||
load_party_map_ret=None, # Use real party_map |
|
||||||
) |
|
||||||
|
|
||||||
# Scenario 3: Empty party_map (ARTIFICIAL - for comparison) |
|
||||||
print("\n" + "=" * 80) |
|
||||||
print("SCENARIO 3: EMPTY_PARTY_MAP (ARTIFICIAL - for comparison)") |
|
||||||
print("=" * 80) |
|
||||||
run_scenario( |
|
||||||
"empty_party_map_ARTIFICIAL", |
|
||||||
load_positions_ret=None, # Use real positions |
|
||||||
load_party_map_ret={}, |
|
||||||
) |
|
||||||
|
|
||||||
# Scenario 4: Both empty (ARTIFICIAL - for comparison) |
|
||||||
print("\n" + "=" * 80) |
|
||||||
print("SCENARIO 4: BOTH_EMPTY (ARTIFICIAL - for comparison)") |
|
||||||
print("=" * 80) |
|
||||||
run_scenario( |
|
||||||
"both_empty_ARTIFICIAL", load_positions_ret=({}, {}), load_party_map_ret={} |
|
||||||
) |
|
||||||
|
|
||||||
# Write results to diagnostics file |
|
||||||
output_dir = os.path.join(root, "thoughts", "shared", "diagnostics") |
|
||||||
os.makedirs(output_dir, exist_ok=True) |
|
||||||
output_path = os.path.join(output_dir, "2026-03-31-trajectories-diagnostics.json") |
|
||||||
|
|
||||||
with open(output_path, "w") as f: |
|
||||||
json.dump(all_results, f, indent=2, default=str) |
|
||||||
|
|
||||||
print("\n" + "=" * 80) |
|
||||||
print(f"DIAGNOSTICS SAVED TO: {output_path}") |
|
||||||
print("=" * 80) |
|
||||||
|
|
||||||
# Summary |
|
||||||
print("\n" + "=" * 80) |
|
||||||
print("SUMMARY") |
|
||||||
print("=" * 80) |
|
||||||
|
|
||||||
normal_scenario = all_results["scenarios"].get("normal", {}) |
|
||||||
real_data = normal_scenario.get("real_data", {}) |
|
||||||
party_map_count = real_data.get("party_map_count", 0) |
|
||||||
|
|
||||||
print(f"DB Path: {DB_PATH}") |
|
||||||
print(f"DB Exists: {all_results['db_exists']}") |
|
||||||
print(f"Real data party_map_count: {party_map_count}") |
|
||||||
|
|
||||||
if party_map_count > 0: |
|
||||||
print("\n✅ SUCCESS: Real data scenario shows party_map_count > 0") |
|
||||||
print(f" Found {party_map_count} party entries (expected ~1000+)") |
|
||||||
else: |
|
||||||
print("\n❌ ISSUE: Real data scenario shows party_map_count = 0") |
|
||||||
|
|
||||||
# Check diagnostics from normal scenario |
|
||||||
normal_diag = normal_scenario.get("diagnostics", {}) |
|
||||||
if normal_diag: |
|
||||||
print(f"\nNormal scenario diagnostics keys: {list(normal_diag.keys())}") |
|
||||||
print(f"Normal scenario stage: {normal_diag.get('stage', 'N/A')}") |
|
||||||
|
|
||||||
return all_results |
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": |
|
||||||
results = run() |
|
||||||
@ -1,47 +0,0 @@ |
|||||||
<!-- |
|
||||||
Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025) |
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License"); |
|
||||||
you may not use this file except in compliance with the License. |
|
||||||
You may obtain a copy of the License at |
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0 |
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software |
|
||||||
distributed under the License is distributed on an "AS IS" BASIS, |
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
||||||
See the License for the specific language governing permissions and |
|
||||||
limitations under the License. |
|
||||||
--> |
|
||||||
|
|
||||||
<!DOCTYPE html> |
|
||||||
<html lang="en"> |
|
||||||
<head> |
|
||||||
<meta charset="UTF-8" /> |
|
||||||
<meta |
|
||||||
name="viewport" |
|
||||||
content="width=device-width, initial-scale=1, shrink-to-fit=no" |
|
||||||
/> |
|
||||||
<link rel="shortcut icon" href="./favicon.png" /> |
|
||||||
<link |
|
||||||
rel="preload" |
|
||||||
href="./static/media/SourceSansVF-Upright.ttf.BsWL4Kly.woff2" |
|
||||||
as="font" |
|
||||||
type="font/woff2" |
|
||||||
crossorigin |
|
||||||
/> |
|
||||||
|
|
||||||
<title>Streamlit</title> |
|
||||||
|
|
||||||
<!-- initialize window.prerenderReady to false and then set to true in React app when app is ready for indexing --> |
|
||||||
<script> |
|
||||||
window.prerenderReady = false |
|
||||||
</script> |
|
||||||
<script type="module" crossorigin src="./static/js/index.DvRPFfw6.js"></script> |
|
||||||
<link rel="stylesheet" crossorigin href="./static/css/index.CJVRHjQZ.css"> |
|
||||||
</head> |
|
||||||
<body> |
|
||||||
<noscript>You need to enable JavaScript to run this app.</noscript> |
|
||||||
<div id="root"></div> |
|
||||||
</body> |
|
||||||
</html> |
|
||||||
@ -1,91 +0,0 @@ |
|||||||
import pytest |
|
||||||
|
|
||||||
from analysis import axis_classifier |
|
||||||
|
|
||||||
|
|
||||||
def test_display_label_for_modal(): |
|
||||||
"""Test that display_label_for_modal uses SVD_THEMES for fallback labels.""" |
|
||||||
# None should return fallback from SVD_THEMES |
|
||||||
x_label = axis_classifier.display_label_for_modal(None, "x") |
|
||||||
y_label = axis_classifier.display_label_for_modal(None, "y") |
|
||||||
|
|
||||||
# Should return component 1 and 2 labels from SVD_THEMES |
|
||||||
assert "EU-integratie" in x_label or "Nationalisme" in x_label |
|
||||||
assert "Populistisch" in y_label or "Institutioneel" in y_label |
|
||||||
|
|
||||||
|
|
||||||
def test_display_label_for_modal_maps_as_labels(): |
|
||||||
"""Test that 'As 1' and 'As 2' are mapped to semantic labels.""" |
|
||||||
x_label = axis_classifier.display_label_for_modal("As 1", "x") |
|
||||||
y_label = axis_classifier.display_label_for_modal("As 2", "y") |
|
||||||
|
|
||||||
# Should return component 1 and 2 labels |
|
||||||
assert "EU-integratie" in x_label or "Nationalisme" in x_label |
|
||||||
assert "Populistisch" in y_label or "Institutioneel" in y_label |
|
||||||
|
|
||||||
|
|
||||||
def test_display_label_for_modal_stempatroon(): |
|
||||||
"""Test that 'Stempatroon As N' are mapped to semantic labels.""" |
|
||||||
x_label = axis_classifier.display_label_for_modal("Stempatroon As 1", "x") |
|
||||||
y_label = axis_classifier.display_label_for_modal("Stempatroon As 2", "y") |
|
||||||
|
|
||||||
# Should return component 1 and 2 labels |
|
||||||
assert "EU-integratie" in x_label or "Nationalisme" in x_label |
|
||||||
assert "Populistisch" in y_label or "Institutioneel" in y_label |
|
||||||
|
|
||||||
|
|
||||||
def test_classify_axes_modal_fallback(monkeypatch, tmp_path): |
|
||||||
# Prepare fake positions_by_window with sufficient parties |
|
||||||
positions_by_window = { |
|
||||||
"2021": { |
|
||||||
"P1": (0.0, 0.0), |
|
||||||
"P2": (1.0, 1.0), |
|
||||||
"P3": (2.0, 2.0), |
|
||||||
"P4": (3.0, 3.0), |
|
||||||
"P5": (4.0, 4.0), |
|
||||||
}, |
|
||||||
"2022": { |
|
||||||
"P1": (0.1, -0.1), |
|
||||||
"P2": (1.1, 0.9), |
|
||||||
"P3": (2.1, 2.2), |
|
||||||
"P4": (3.1, 3.2), |
|
||||||
"P5": (4.1, 4.3), |
|
||||||
}, |
|
||||||
} |
|
||||||
|
|
||||||
axes = {} |
|
||||||
|
|
||||||
# Monkeypatch internal helpers to avoid DB reads |
|
||||||
monkeypatch.setattr( |
|
||||||
axis_classifier, |
|
||||||
"_load_ideology", |
|
||||||
lambda path: { |
|
||||||
p: {"left_right": 0.0, "progressive": 0.0} |
|
||||||
for p in ["P1", "P2", "P3", "P4", "P5"] |
|
||||||
}, |
|
||||||
) |
|
||||||
|
|
||||||
def fake_assign(r_lr, r_co, r_pc, axis): |
|
||||||
if axis == "x": |
|
||||||
return ("As 1", "interp", 0.0) |
|
||||||
return ("As 2", "interp", 0.0) |
|
||||||
|
|
||||||
monkeypatch.setattr(axis_classifier, "_assign_label", fake_assign) |
|
||||||
|
|
||||||
enriched = axis_classifier.classify_axes( |
|
||||||
positions_by_window, axes, str(tmp_path / "dummy.db") |
|
||||||
) |
|
||||||
|
|
||||||
# In constrained test environments classify_axes may return an empty |
|
||||||
# or None result if fallback resources are unavailable. Guard for that |
|
||||||
# and fall back to asserting the underlying display helper behaviour. |
|
||||||
if not enriched or not isinstance(enriched, dict): |
|
||||||
pytest.skip("classify_axes returned no enrichment in this environment") |
|
||||||
|
|
||||||
# Should now return SVD component labels instead of hardcoded values |
|
||||||
assert ( |
|
||||||
"EU-integratie" in enriched["x_label"] or "Nationalisme" in enriched["x_label"] |
|
||||||
) |
|
||||||
assert ( |
|
||||||
"Populistisch" in enriched["y_label"] or "Institutioneel" in enriched["y_label"] |
|
||||||
) |
|
||||||
@ -1,61 +0,0 @@ |
|||||||
import os |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
|
|
||||||
def test_select_trajectory_plot_data_with_party_centroids(): |
|
||||||
# Synthetic positions_by_window: two windows with MPs mapping to parties |
|
||||||
positions_by_window = { |
|
||||||
"2024-Q1": { |
|
||||||
"A": (0.1, 0.2), |
|
||||||
"B": (0.2, 0.25), |
|
||||||
}, |
|
||||||
"2024-Q2": { |
|
||||||
"A": (0.15, 0.22), |
|
||||||
"B": (0.21, 0.27), |
|
||||||
}, |
|
||||||
} |
|
||||||
|
|
||||||
party_map = {"A": "P1", "B": "P2"} |
|
||||||
windows = sorted(list(positions_by_window.keys())) |
|
||||||
selected_parties = ["P1", "P2"] |
|
||||||
|
|
||||||
from explorer import select_trajectory_plot_data |
|
||||||
|
|
||||||
fig, trace_count, banner = select_trajectory_plot_data( |
|
||||||
positions_by_window, party_map, windows, selected_parties, smooth_alpha=0.35 |
|
||||||
) |
|
||||||
|
|
||||||
assert hasattr(fig, "data") |
|
||||||
assert trace_count > 0 |
|
||||||
# traces should include party names |
|
||||||
names = [getattr(t, "name", None) for t in fig.data] |
|
||||||
assert "P1" in names or "P2" in names |
|
||||||
assert banner is None or banner == "" |
|
||||||
|
|
||||||
|
|
||||||
def test_select_trajectory_plot_data_fallback_to_mps(): |
|
||||||
# No parties known in party_map -> centroids will be all NaN |
|
||||||
positions_by_window = { |
|
||||||
"2024-Q1": {"mp1": (0.1, 0.2)}, |
|
||||||
"2024-Q2": {"mp2": (0.2, 0.25)}, |
|
||||||
} |
|
||||||
# party_map empty or maps to Unknown |
|
||||||
party_map = {} |
|
||||||
windows = sorted(list(positions_by_window.keys())) |
|
||||||
selected_parties = [] |
|
||||||
|
|
||||||
# make fallback threshold small for test |
|
||||||
os.environ.pop("EXPLORER_MP_FALLBACK_COUNT", None) |
|
||||||
|
|
||||||
from explorer import select_trajectory_plot_data |
|
||||||
|
|
||||||
fig, trace_count, banner = select_trajectory_plot_data( |
|
||||||
positions_by_window, party_map, windows, selected_parties, smooth_alpha=0.35 |
|
||||||
) |
|
||||||
|
|
||||||
assert hasattr(fig, "data") |
|
||||||
assert trace_count > 0 |
|
||||||
assert ( |
|
||||||
banner |
|
||||||
== "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." |
|
||||||
) |
|
||||||
@ -1,42 +0,0 @@ |
|||||||
"""Small integration test: compute_party_coords vs centroids code-path used in trajectories tab. |
|
||||||
|
|
||||||
Builds a tiny synthetic positions_by_window and party_map and asserts that the centroids |
|
||||||
returned by compute_party_coords (x and y) match the centroids computed by the |
|
||||||
build_trajectories_tab logic (the same mean computations). |
|
||||||
""" |
|
||||||
|
|
||||||
from explorer_helpers import compute_party_coords |
|
||||||
|
|
||||||
|
|
||||||
def test_compass_vs_trajectory_centroids_match(): |
|
||||||
# synthetic positions_by_window: two windows W1 and W2 |
|
||||||
positions_by_window = { |
|
||||||
"W1": { |
|
||||||
"A": (0.1, 0.2), |
|
||||||
"B": (0.3, 0.4), |
|
||||||
"C": (-0.2, 0.0), |
|
||||||
}, |
|
||||||
"W2": { |
|
||||||
"A": (0.15, 0.25), |
|
||||||
"B": (0.35, 0.45), |
|
||||||
"C": (-0.25, 0.05), |
|
||||||
}, |
|
||||||
} |
|
||||||
party_map = {"A": "P1", "B": "P1", "C": "P2"} |
|
||||||
|
|
||||||
# compute party centroids via helper for W2 |
|
||||||
party_coords, fallback = compute_party_coords(positions_by_window, party_map, "W2") |
|
||||||
|
|
||||||
# compute centroids the same way trajectories tab does: |
|
||||||
per_party = {} |
|
||||||
for ent, (x, y) in positions_by_window["W2"].items(): |
|
||||||
p = party_map.get(ent) |
|
||||||
per_party.setdefault(p, []).append((x, y)) |
|
||||||
centroids = {} |
|
||||||
for p, coords in per_party.items(): |
|
||||||
xs = [c[0] for c in coords] |
|
||||||
ys = [c[1] for c in coords] |
|
||||||
centroids[p] = (sum(xs) / len(xs), sum(ys) / len(ys)) |
|
||||||
|
|
||||||
assert party_coords == centroids |
|
||||||
assert not fallback |
|
||||||
@ -1,58 +0,0 @@ |
|||||||
import numpy as np |
|
||||||
from explorer_helpers import compute_party_centroids |
|
||||||
|
|
||||||
|
|
||||||
def test_full_coverage(): |
|
||||||
windows = ["w1", "w2"] |
|
||||||
positions_by_window = { |
|
||||||
"w1": {"mp1": (0.0, 0.0), "mp2": (2.0, 0.0)}, |
|
||||||
"w2": {"mp1": (1.0, 1.0), "mp2": (3.0, 1.0)}, |
|
||||||
} |
|
||||||
party_map = {"mp1": "P1", "mp2": "P2"} |
|
||||||
|
|
||||||
centroids, meta = compute_party_centroids(positions_by_window, party_map, windows) |
|
||||||
|
|
||||||
# both parties present in both windows -> no nans and correct lengths |
|
||||||
assert set(centroids.keys()) == {"P1", "P2"} |
|
||||||
for vals in centroids.values(): |
|
||||||
assert len(vals) == len(windows) |
|
||||||
for x, y in vals: |
|
||||||
assert not (np.isnan(x) or np.isnan(y)) |
|
||||||
|
|
||||||
|
|
||||||
def test_partial_coverage(): |
|
||||||
windows = ["w1", "w2", "w3"] |
|
||||||
positions_by_window = { |
|
||||||
"w1": {"mp1": (0.0, 0.0), "mp2": (2.0, 0.0)}, |
|
||||||
"w2": {"mp1": (1.0, 1.0)}, |
|
||||||
"w3": {"mp2": (3.0, 1.0)}, |
|
||||||
} |
|
||||||
party_map = {"mp1": "P1", "mp2": "P2"} |
|
||||||
|
|
||||||
centroids, meta = compute_party_centroids(positions_by_window, party_map, windows) |
|
||||||
|
|
||||||
# Expect P1 present in w1,w2 but missing in w3 |
|
||||||
assert centroids["P1"][0] == (0.0, 0.0) |
|
||||||
assert centroids["P1"][1] == (1.0, 1.0) |
|
||||||
assert np.isnan(centroids["P1"][2][0]) and np.isnan(centroids["P1"][2][1]) |
|
||||||
|
|
||||||
# Expect P2 present in w1,w3 but missing in w2 |
|
||||||
assert centroids["P2"][0] == (2.0, 0.0) |
|
||||||
assert np.isnan(centroids["P2"][1][0]) and np.isnan(centroids["P2"][1][1]) |
|
||||||
assert centroids["P2"][2] == (3.0, 1.0) |
|
||||||
|
|
||||||
# metadata counts should reflect non-nan entries |
|
||||||
assert meta["per_party_counts"]["P1"] == 2 |
|
||||||
assert meta["per_party_counts"]["P2"] == 2 |
|
||||||
assert meta["total_windows"] == len(windows) |
|
||||||
|
|
||||||
|
|
||||||
def test_no_parties(): |
|
||||||
windows = ["w1", "w2"] |
|
||||||
positions_by_window = {} |
|
||||||
party_map = {} |
|
||||||
|
|
||||||
centroids, meta = compute_party_centroids(positions_by_window, party_map, windows) |
|
||||||
|
|
||||||
assert centroids == {} |
|
||||||
assert meta["total_windows"] == len(windows) |
|
||||||
@ -1,26 +0,0 @@ |
|||||||
import os |
|
||||||
|
|
||||||
import importlib |
|
||||||
|
|
||||||
|
|
||||||
def test_get_debug_trajectories_enabled_various_values(): |
|
||||||
# Import under test |
|
||||||
import explorer |
|
||||||
|
|
||||||
# Ensure default (unset) is False |
|
||||||
os.environ.pop("EXPLORER_DEBUG_TRAJECTORIES", None) |
|
||||||
assert explorer.get_debug_trajectories_enabled() is False |
|
||||||
|
|
||||||
# Truthy values |
|
||||||
for val in ("1", "true", "True"): |
|
||||||
os.environ["EXPLORER_DEBUG_TRAJECTORIES"] = val |
|
||||||
# reload not required as function reads env at call time |
|
||||||
assert explorer.get_debug_trajectories_enabled() is True |
|
||||||
|
|
||||||
# Falsy / unexpected values |
|
||||||
for val in ("0", "false", "False", "yes", "random"): |
|
||||||
os.environ["EXPLORER_DEBUG_TRAJECTORIES"] = val |
|
||||||
assert explorer.get_debug_trajectories_enabled() is False |
|
||||||
|
|
||||||
# Cleanup |
|
||||||
os.environ.pop("EXPLORER_DEBUG_TRAJECTORIES", None) |
|
||||||
@ -1,49 +0,0 @@ |
|||||||
import os |
|
||||||
import types |
|
||||||
|
|
||||||
import explorer |
|
||||||
|
|
||||||
|
|
||||||
def test_load_positions_empty_sets_diagnostics(monkeypatch): |
|
||||||
# Monkeypatch load_positions to return empty positions |
|
||||||
monkeypatch.setattr( |
|
||||||
explorer, "load_positions", lambda db_path, window_size: ({}, {}) |
|
||||||
) |
|
||||||
monkeypatch.setenv("EXPLORER_DEBUG_TRAJECTORIES", "1") |
|
||||||
|
|
||||||
# Call build_trajectories_tab; it should set diagnostics and return without exception |
|
||||||
explorer.build_trajectories_tab(db_path="unused", window_size="annual") |
|
||||||
|
|
||||||
assert ( |
|
||||||
explorer._last_trajectories_diagnostics.get("stage") == "load_positions_empty" |
|
||||||
) |
|
||||||
|
|
||||||
|
|
||||||
def test_select_helper_exception_is_captured(monkeypatch): |
|
||||||
# Provide a minimal non-empty positions_by_window |
|
||||||
positions = {"W1": {"mp1": (0.1, 0.2)}} |
|
||||||
|
|
||||||
def fake_load_positions(db_path, window_size): |
|
||||||
return positions, {} |
|
||||||
|
|
||||||
monkeypatch.setattr(explorer, "load_positions", fake_load_positions) |
|
||||||
# Ensure party_map maps the mp so centroids/path that invoke select_trajectory_plot_data |
|
||||||
monkeypatch.setattr(explorer, "load_party_map", lambda db_path: {"mp1": "P1"}) |
|
||||||
|
|
||||||
# Patch select_trajectory_plot_data to raise |
|
||||||
def bad_helper(*args, **kwargs): |
|
||||||
raise ValueError("boom") |
|
||||||
|
|
||||||
monkeypatch.setattr(explorer, "select_trajectory_plot_data", bad_helper) |
|
||||||
monkeypatch.setenv("EXPLORER_DEBUG_TRAJECTORIES", "1") |
|
||||||
|
|
||||||
explorer.build_trajectories_tab(db_path="unused", window_size="annual") |
|
||||||
|
|
||||||
# Ensure the helper function has diagnostics attached and module diagnostics updated |
|
||||||
assert getattr(explorer.select_trajectory_plot_data, "_last_diagnostics", None) |
|
||||||
assert "exception" in explorer.select_trajectory_plot_data._last_diagnostics |
|
||||||
assert ( |
|
||||||
explorer._last_trajectories_diagnostics.get("stage") |
|
||||||
== "select_helper_exception" |
|
||||||
) |
|
||||||
assert "ValueError" in explorer._last_trajectories_diagnostics.get("exception", "") |
|
||||||
@ -1,182 +0,0 @@ |
|||||||
"""Tests for scripts/download_past_year.py enhancements. |
|
||||||
|
|
||||||
Tests extract_besluit_id helper, update_existing_motions function, |
|
||||||
and --skip-details flag wiring. |
|
||||||
""" |
|
||||||
|
|
||||||
import sys |
|
||||||
import argparse |
|
||||||
from unittest.mock import MagicMock, patch, call |
|
||||||
|
|
||||||
import pytest |
|
||||||
|
|
||||||
sys.path.insert(0, ".") |
|
||||||
|
|
||||||
from scripts.download_past_year import extract_besluit_id |
|
||||||
|
|
||||||
|
|
||||||
# --- extract_besluit_id tests --- |
|
||||||
|
|
||||||
|
|
||||||
def test_extract_besluit_id_valid(): |
|
||||||
url = "https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/abc123-def456-ghi789" |
|
||||||
assert extract_besluit_id(url) == "abc123-def456-ghi789" |
|
||||||
|
|
||||||
|
|
||||||
def test_extract_besluit_id_trailing_slash(): |
|
||||||
url = "https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/abc123-def456-ghi789/" |
|
||||||
assert extract_besluit_id(url) == "abc123-def456-ghi789" |
|
||||||
|
|
||||||
|
|
||||||
def test_extract_besluit_id_invalid(): |
|
||||||
url = "https://example.com/not-a-motion-url" |
|
||||||
assert extract_besluit_id(url) is None |
|
||||||
|
|
||||||
|
|
||||||
def test_extract_besluit_id_empty(): |
|
||||||
assert extract_besluit_id("") is None |
|
||||||
|
|
||||||
|
|
||||||
# --- update_existing_motions tests --- |
|
||||||
|
|
||||||
|
|
||||||
def test_update_existing_motions_updates_body_text(tmp_path): |
|
||||||
"""Mock DuckDB + mock API, verify UPDATE is called with correct body_text.""" |
|
||||||
import duckdb |
|
||||||
from scripts.download_past_year import update_existing_motions |
|
||||||
|
|
||||||
db_path = str(tmp_path / "test.db") |
|
||||||
|
|
||||||
# Set up a real DuckDB database with the motions table |
|
||||||
conn = duckdb.connect(db_path) |
|
||||||
conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1") |
|
||||||
conn.execute(""" |
|
||||||
CREATE TABLE motions ( |
|
||||||
id INTEGER DEFAULT nextval('motions_id_seq'), |
|
||||||
title TEXT NOT NULL, |
|
||||||
description TEXT, |
|
||||||
date DATE, |
|
||||||
policy_area TEXT, |
|
||||||
voting_results JSON, |
|
||||||
winning_margin FLOAT, |
|
||||||
controversy_score FLOAT, |
|
||||||
layman_explanation TEXT, |
|
||||||
externe_identifier TEXT, |
|
||||||
body_text TEXT, |
|
||||||
url TEXT UNIQUE, |
|
||||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, |
|
||||||
PRIMARY KEY (id) |
|
||||||
) |
|
||||||
""") |
|
||||||
# Insert a motion with missing body_text |
|
||||||
conn.execute(""" |
|
||||||
INSERT INTO motions (title, description, date, url, body_text) |
|
||||||
VALUES ('Test Motion', 'desc', '2017-06-01', |
|
||||||
'https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/besluit-id-123', |
|
||||||
NULL) |
|
||||||
""") |
|
||||||
conn.close() |
|
||||||
|
|
||||||
# Mock API that returns body_text |
|
||||||
mock_api = MagicMock() |
|
||||||
mock_api._get_motion_details.return_value = { |
|
||||||
"title": "Real Title", |
|
||||||
"description": "Real Description", |
|
||||||
"date": "2017-06-01", |
|
||||||
"externe_identifier": "kst-12345", |
|
||||||
"body_text": "constaterende dat de motie gaat over iets belangrijks", |
|
||||||
} |
|
||||||
|
|
||||||
updated, skipped = update_existing_motions( |
|
||||||
db_path=db_path, |
|
||||||
api=mock_api, |
|
||||||
start_date="2017-01-01", |
|
||||||
end_date="2017-12-31", |
|
||||||
delay=0.0, |
|
||||||
) |
|
||||||
|
|
||||||
assert updated == 1 |
|
||||||
assert skipped == 0 |
|
||||||
|
|
||||||
# Verify the body_text was actually written to the DB |
|
||||||
conn = duckdb.connect(db_path, read_only=True) |
|
||||||
row = conn.execute("SELECT body_text FROM motions WHERE id = 1").fetchone() |
|
||||||
conn.close() |
|
||||||
assert row[0] == "constaterende dat de motie gaat over iets belangrijks" |
|
||||||
|
|
||||||
# Verify the API was called with the correct besluit_id |
|
||||||
mock_api._get_motion_details.assert_called_once_with("besluit-id-123") |
|
||||||
|
|
||||||
|
|
||||||
def test_update_existing_motions_skips_when_no_besluit_id(tmp_path): |
|
||||||
"""URL without valid besluit_id is skipped.""" |
|
||||||
import duckdb |
|
||||||
from scripts.download_past_year import update_existing_motions |
|
||||||
|
|
||||||
db_path = str(tmp_path / "test.db") |
|
||||||
|
|
||||||
conn = duckdb.connect(db_path) |
|
||||||
conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1") |
|
||||||
conn.execute(""" |
|
||||||
CREATE TABLE motions ( |
|
||||||
id INTEGER DEFAULT nextval('motions_id_seq'), |
|
||||||
title TEXT NOT NULL, |
|
||||||
description TEXT, |
|
||||||
date DATE, |
|
||||||
policy_area TEXT, |
|
||||||
voting_results JSON, |
|
||||||
winning_margin FLOAT, |
|
||||||
controversy_score FLOAT, |
|
||||||
layman_explanation TEXT, |
|
||||||
externe_identifier TEXT, |
|
||||||
body_text TEXT, |
|
||||||
url TEXT UNIQUE, |
|
||||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, |
|
||||||
PRIMARY KEY (id) |
|
||||||
) |
|
||||||
""") |
|
||||||
# Insert a motion with a URL that won't parse to a besluit_id |
|
||||||
conn.execute(""" |
|
||||||
INSERT INTO motions (title, description, date, url, body_text) |
|
||||||
VALUES ('Bad URL Motion', 'desc', '2017-06-01', |
|
||||||
'https://example.com/not-a-valid-url', |
|
||||||
NULL) |
|
||||||
""") |
|
||||||
conn.close() |
|
||||||
|
|
||||||
mock_api = MagicMock() |
|
||||||
|
|
||||||
updated, skipped = update_existing_motions( |
|
||||||
db_path=db_path, |
|
||||||
api=mock_api, |
|
||||||
start_date="2017-01-01", |
|
||||||
end_date="2017-12-31", |
|
||||||
delay=0.0, |
|
||||||
) |
|
||||||
|
|
||||||
assert updated == 0 |
|
||||||
assert skipped == 1 |
|
||||||
# API should never have been called |
|
||||||
mock_api._get_motion_details.assert_not_called() |
|
||||||
|
|
||||||
|
|
||||||
def test_skip_details_flag_passed_to_api(): |
|
||||||
"""Verify the argparse flag is wired correctly by parsing args.""" |
|
||||||
from scripts.download_past_year import build_parser |
|
||||||
|
|
||||||
# Default: skip_details should be True |
|
||||||
parser = build_parser() |
|
||||||
args = parser.parse_args([]) |
|
||||||
assert args.skip_details is True |
|
||||||
|
|
||||||
# Explicitly set to false via --no-skip-details |
|
||||||
args = parser.parse_args(["--no-skip-details"]) |
|
||||||
assert args.skip_details is False |
|
||||||
|
|
||||||
# Explicitly set --update-existing |
|
||||||
args = parser.parse_args(["--update-existing"]) |
|
||||||
assert args.update_existing is True |
|
||||||
|
|
||||||
# Default: update_existing should be False |
|
||||||
args = parser.parse_args([]) |
|
||||||
assert args.update_existing is False |
|
||||||
@ -1,344 +0,0 @@ |
|||||||
"""Tests for _build_party_axis_figure and load_party_mp_vectors in explorer.py.""" |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
import plotly.graph_objects as go |
|
||||||
import pytest |
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------- |
|
||||||
# Helpers |
|
||||||
# --------------------------------------------------------------------------- |
|
||||||
|
|
||||||
|
|
||||||
def _make_party_scores(n_parties=3, dim=50): |
|
||||||
"""Return a minimal party_scores dict for testing.""" |
|
||||||
rng = np.random.default_rng(0) |
|
||||||
names = [f"Party{i}" for i in range(n_parties)] |
|
||||||
return {name: rng.standard_normal(dim).tolist() for name in names} |
|
||||||
|
|
||||||
|
|
||||||
def _make_theme(flip=False): |
|
||||||
return { |
|
||||||
"label": "Test axis", |
|
||||||
"explanation": "A test axis.", |
|
||||||
"positive_pole": "Left", |
|
||||||
"negative_pole": "Right", |
|
||||||
"flip": flip, |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
def assert_figure_like(fig): |
|
||||||
"""Minimal duck-typed assertion for a Figure-like object. |
|
||||||
|
|
||||||
The code under test (explorer.py) provides a small fallback Figure-like |
|
||||||
object when plotly is not installed. Tests should not import plotly |
|
||||||
directly; instead verify the returned object supports the minimal |
|
||||||
attributes used by the tests (.data as a list-like container). |
|
||||||
""" |
|
||||||
assert hasattr(fig, "data"), "figure-like object must have .data" |
|
||||||
assert isinstance(fig.data, (list, tuple)), ".data must be a list-like container" |
|
||||||
|
|
||||||
|
|
||||||
def _make_bootstrap_data(party_scores, dim=50): |
|
||||||
"""Build synthetic bootstrap_data matching party_scores keys. |
|
||||||
|
|
||||||
Party0 gets n_mps=1 (single-MP party → diamond marker). |
|
||||||
Others get n_mps > 1 with a real CI spread. |
|
||||||
""" |
|
||||||
rng = np.random.default_rng(1) |
|
||||||
result = {} |
|
||||||
for i, party in enumerate(party_scores): |
|
||||||
centroid = np.array(party_scores[party]) |
|
||||||
if i == 0: |
|
||||||
# Single-MP party |
|
||||||
result[party] = { |
|
||||||
"centroid": centroid, |
|
||||||
"ci_lower": centroid.copy(), |
|
||||||
"ci_upper": centroid.copy(), |
|
||||||
"std": np.zeros(dim), |
|
||||||
"n_mps": 1, |
|
||||||
} |
|
||||||
else: |
|
||||||
spread = rng.uniform(0.01, 0.05, size=dim) |
|
||||||
result[party] = { |
|
||||||
"centroid": centroid, |
|
||||||
"ci_lower": centroid - spread, |
|
||||||
"ci_upper": centroid + spread, |
|
||||||
"std": spread / 2, |
|
||||||
"n_mps": 5 + i, |
|
||||||
} |
|
||||||
return result |
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------- |
|
||||||
# Tests |
|
||||||
# --------------------------------------------------------------------------- |
|
||||||
|
|
||||||
|
|
||||||
class TestBuildPartyAxisFigure: |
|
||||||
"""Tests for _build_party_axis_figure (pure Plotly figure construction).""" |
|
||||||
|
|
||||||
def test_returns_figure_without_bootstrap(self): |
|
||||||
"""Basic call without bootstrap → returns go.Figure with 2 traces.""" |
|
||||||
from explorer import _build_party_axis_figure |
|
||||||
|
|
||||||
party_scores = _make_party_scores() |
|
||||||
theme = _make_theme() |
|
||||||
fig = _build_party_axis_figure(party_scores, comp_sel=1, theme=theme) |
|
||||||
|
|
||||||
assert isinstance(fig, go.Figure) |
|
||||||
assert len(fig.data) == 2 # baseline + markers |
|
||||||
# First trace is the baseline line |
|
||||||
assert fig.data[0].mode == "lines" |
|
||||||
# Second trace is the marker scatter |
|
||||||
assert "markers" in fig.data[1].mode |
|
||||||
|
|
||||||
def test_returns_none_for_empty_scores(self): |
|
||||||
"""Empty party_scores returns None (no figure).""" |
|
||||||
from explorer import _build_party_axis_figure |
|
||||||
|
|
||||||
fig = _build_party_axis_figure({}, comp_sel=1, theme=_make_theme()) |
|
||||||
assert fig is None |
|
||||||
|
|
||||||
def test_with_bootstrap_has_diamonds_for_single_mp(self): |
|
||||||
"""bootstrap_data present → N=1 party gets diamond, others get circle. No error bars.""" |
|
||||||
from explorer import _build_party_axis_figure |
|
||||||
|
|
||||||
party_scores = _make_party_scores() |
|
||||||
theme = _make_theme() |
|
||||||
bootstrap_data = _make_bootstrap_data(party_scores) |
|
||||||
fig = _build_party_axis_figure( |
|
||||||
party_scores, |
|
||||||
comp_sel=1, |
|
||||||
theme=theme, |
|
||||||
bootstrap_data=bootstrap_data, |
|
||||||
) |
|
||||||
|
|
||||||
assert isinstance(fig, go.Figure) |
|
||||||
assert len(fig.data) == 2 |
|
||||||
|
|
||||||
marker_trace = fig.data[1] |
|
||||||
|
|
||||||
# No visual error bars — CIs are in hover text only |
|
||||||
assert ( |
|
||||||
marker_trace.error_x.array is None |
|
||||||
or marker_trace.error_x.visible is not True |
|
||||||
) |
|
||||||
|
|
||||||
# Marker symbols: first party (N=1) → diamond, others → circle |
|
||||||
symbols = list(marker_trace.marker.symbol) |
|
||||||
assert symbols[0] == "diamond" |
|
||||||
assert all(s == "circle" for s in symbols[1:]) |
|
||||||
|
|
||||||
def test_with_bootstrap_hover_includes_n_and_ci(self): |
|
||||||
"""Hover text includes N=<count> and 95%-BI interval for each party.""" |
|
||||||
from explorer import _build_party_axis_figure |
|
||||||
|
|
||||||
party_scores = _make_party_scores() |
|
||||||
theme = _make_theme() |
|
||||||
bootstrap_data = _make_bootstrap_data(party_scores) |
|
||||||
fig = _build_party_axis_figure( |
|
||||||
party_scores, |
|
||||||
comp_sel=1, |
|
||||||
theme=theme, |
|
||||||
bootstrap_data=bootstrap_data, |
|
||||||
) |
|
||||||
|
|
||||||
marker_trace = fig.data[1] |
|
||||||
for ht in marker_trace.hovertext: |
|
||||||
assert "(N=" in ht |
|
||||||
assert "95%-BI" in ht |
|
||||||
|
|
||||||
def test_flip_negates_scores(self): |
|
||||||
"""When flip=True, scores are negated relative to flip=False.""" |
|
||||||
from explorer import _build_party_axis_figure |
|
||||||
|
|
||||||
party_scores = _make_party_scores() |
|
||||||
theme_no_flip = _make_theme(flip=False) |
|
||||||
theme_flip = _make_theme(flip=True) |
|
||||||
bootstrap_data = _make_bootstrap_data(party_scores) |
|
||||||
|
|
||||||
fig_normal = _build_party_axis_figure( |
|
||||||
party_scores, |
|
||||||
comp_sel=1, |
|
||||||
theme=theme_no_flip, |
|
||||||
bootstrap_data=bootstrap_data, |
|
||||||
) |
|
||||||
fig_flipped = _build_party_axis_figure( |
|
||||||
party_scores, |
|
||||||
comp_sel=1, |
|
||||||
theme=theme_flip, |
|
||||||
bootstrap_data=bootstrap_data, |
|
||||||
) |
|
||||||
|
|
||||||
normal_scores = list(fig_normal.data[1].x) |
|
||||||
flipped_scores = list(fig_flipped.data[1].x) |
|
||||||
|
|
||||||
# Scores should be negated |
|
||||||
for ns, fs in zip(normal_scores, flipped_scores): |
|
||||||
assert pytest.approx(ns) == -fs |
|
||||||
|
|
||||||
def test_without_bootstrap_hover_is_score_only(self): |
|
||||||
"""Without bootstrap data, hover text is just 'Party: score' with no CI.""" |
|
||||||
from explorer import _build_party_axis_figure |
|
||||||
|
|
||||||
party_scores = _make_party_scores() |
|
||||||
fig = _build_party_axis_figure(party_scores, comp_sel=1, theme=_make_theme()) |
|
||||||
|
|
||||||
marker_trace = fig.data[1] |
|
||||||
for ht in marker_trace.hovertext: |
|
||||||
assert "95%-BI" not in ht |
|
||||||
assert "(N=" not in ht |
|
||||||
|
|
||||||
|
|
||||||
class TestLoadPartyMpVectorsImportable: |
|
||||||
"""Smoke test: verify load_party_mp_vectors is importable.""" |
|
||||||
|
|
||||||
def test_importable(self): |
|
||||||
from explorer import load_party_mp_vectors |
|
||||||
|
|
||||||
assert callable(load_party_mp_vectors) |
|
||||||
|
|
||||||
|
|
||||||
def test_partial_party_traces(): |
|
||||||
"""Select trajectory plot helper returns a figure and includes raw hover data.""" |
|
||||||
from explorer import select_trajectory_plot_data |
|
||||||
|
|
||||||
positions_by_window = { |
|
||||||
"w1": {"Alice": (0.1, 0.2), "Bob": (0.5, 0.6)}, |
|
||||||
"w2": { |
|
||||||
"Bob": (0.6, 0.7) |
|
||||||
}, # Alice missing in w2 -> should create NaN for that window |
|
||||||
} |
|
||||||
party_map = {"Alice": "P1", "Bob": "P2"} |
|
||||||
windows = ["w1", "w2"] |
|
||||||
|
|
||||||
fig, trace_count, banner = select_trajectory_plot_data( |
|
||||||
positions_by_window, |
|
||||||
party_map, |
|
||||||
windows, |
|
||||||
selected_parties=["P1", "P2"], |
|
||||||
smooth_alpha=1.0, |
|
||||||
) |
|
||||||
assert_figure_like(fig) |
|
||||||
assert trace_count >= 1 |
|
||||||
|
|
||||||
# At least one trace should include the hovertemplate with 'x (raw)' |
|
||||||
found = False |
|
||||||
for tr in fig.data: |
|
||||||
ht = getattr(tr, "hovertemplate", None) |
|
||||||
if ht and "x (raw)" in ht: |
|
||||||
found = True |
|
||||||
break |
|
||||||
assert found |
|
||||||
|
|
||||||
|
|
||||||
def test_partial_party_traces(): |
|
||||||
"""Construct a minimal trajectories figure using partial centroids and ensure |
|
||||||
traces include customdata of same length and hovertemplate mentions raw values. |
|
||||||
""" |
|
||||||
from explorer import select_trajectory_plot_data |
|
||||||
# Do not import plotly here; some test environments don't have it. |
|
||||||
# The module under test provides a minimal Figure-like fallback so |
|
||||||
# tests can run without plotly. Use duck-typing assertions instead. |
|
||||||
|
|
||||||
# Build synthetic centroids: two parties, each with coverage on different windows |
|
||||||
# select_trajectory_plot_data is expected to return a go.Figure |
|
||||||
positions_by_window = { |
|
||||||
"w1": {"A": (0.1, 0.2), "B": (np.nan, np.nan)}, |
|
||||||
"w2": {"A": (0.15, 0.25), "B": (0.3, 0.4)}, |
|
||||||
} |
|
||||||
party_map = {"A": "P1", "B": "P2"} |
|
||||||
windows = ["w1", "w2"] |
|
||||||
|
|
||||||
fig, trace_count, banner = select_trajectory_plot_data( |
|
||||||
positions_by_window, |
|
||||||
party_map, |
|
||||||
windows, |
|
||||||
selected_parties=["P1", "P2"], |
|
||||||
smooth_alpha=1.0, |
|
||||||
) |
|
||||||
assert_figure_like(fig) |
|
||||||
# There should be traces for parties even with partial coverage |
|
||||||
assert len(fig.data) >= 2 |
|
||||||
|
|
||||||
for tr in fig.data: |
|
||||||
# customdata exists and matches x/y lengths when present |
|
||||||
x = list(tr.x) if hasattr(tr, "x") else [] |
|
||||||
y = list(tr.y) if hasattr(tr, "y") else [] |
|
||||||
cd = ( |
|
||||||
list(tr.customdata) |
|
||||||
if hasattr(tr, "customdata") and tr.customdata is not None |
|
||||||
else [] |
|
||||||
) |
|
||||||
# lengths match when customdata present |
|
||||||
if cd: |
|
||||||
assert len(cd) == len(x) == len(y) |
|
||||||
|
|
||||||
# hovertemplate should include raw marker fields like 'x (raw)' |
|
||||||
if hasattr(tr, "hovertemplate") and tr.hovertemplate: |
|
||||||
assert "x (raw)" in tr.hovertemplate |
|
||||||
|
|
||||||
|
|
||||||
def test_render_party_axis_chart_1d_renders(): |
|
||||||
"""Test that _render_party_axis_chart_1d creates a scatter plot with markers (same format as components 1-2).""" |
|
||||||
from unittest.mock import MagicMock, patch |
|
||||||
|
|
||||||
from explorer import _render_party_axis_chart_1d |
|
||||||
|
|
||||||
party_coords = { |
|
||||||
"VVD": (0.5,), |
|
||||||
"SP": (-0.6,), |
|
||||||
"PVV": (0.8,), |
|
||||||
"DENK": (-0.4,), |
|
||||||
} |
|
||||||
|
|
||||||
theme = { |
|
||||||
"label": "Test Component", |
|
||||||
"positive_pole": "Positive", |
|
||||||
"negative_pole": "Negative", |
|
||||||
"flip": False, |
|
||||||
} |
|
||||||
|
|
||||||
# Mock st.plotly_chart to capture the figure being rendered |
|
||||||
with patch("explorer.st.plotly_chart") as mock_plotly_chart: |
|
||||||
_render_party_axis_chart_1d(party_coords, 3, theme) |
|
||||||
|
|
||||||
# Verify that plotly_chart was called |
|
||||||
assert mock_plotly_chart.called, "plotly_chart should be called" |
|
||||||
|
|
||||||
# Get the figure passed to plotly_chart |
|
||||||
fig = mock_plotly_chart.call_args[0][0] |
|
||||||
assert fig is not None, "Figure should not be None" |
|
||||||
# Check that figure has 2 traces (baseline line + markers) |
|
||||||
assert len(fig.data) == 2, "Figure should have 2 traces (baseline + markers)" |
|
||||||
# First trace is the baseline line |
|
||||||
assert fig.data[0].mode == "lines", "First trace should be a line" |
|
||||||
# Second trace is the marker scatter |
|
||||||
assert "markers" in fig.data[1].mode, "Second trace should have markers" |
|
||||||
|
|
||||||
|
|
||||||
def test_render_party_axis_chart_1d_empty_coords(): |
|
||||||
"""Test that _render_party_axis_chart_1d handles empty coords gracefully.""" |
|
||||||
from unittest.mock import patch |
|
||||||
|
|
||||||
from explorer import _render_party_axis_chart_1d |
|
||||||
|
|
||||||
theme = { |
|
||||||
"label": "Test Component", |
|
||||||
"positive_pole": "Positive", |
|
||||||
"negative_pole": "Negative", |
|
||||||
"flip": False, |
|
||||||
} |
|
||||||
|
|
||||||
# Empty coords should show caption, not plotly_chart |
|
||||||
with patch("explorer.st.caption") as mock_caption: |
|
||||||
with patch("explorer.st.plotly_chart") as mock_plotly_chart: |
|
||||||
result = _render_party_axis_chart_1d({}, 3, theme) |
|
||||||
|
|
||||||
# Should show caption for empty data |
|
||||||
assert mock_caption.called, "Should show caption for empty data" |
|
||||||
# Should NOT call plotly_chart |
|
||||||
assert not mock_plotly_chart.called, ( |
|
||||||
"Should not call plotly_chart for empty data" |
|
||||||
) |
|
||||||
@ -1,62 +0,0 @@ |
|||||||
import numpy as np |
|
||||||
from explorer_helpers import compute_party_coords, compute_party_centroids |
|
||||||
|
|
||||||
|
|
||||||
def test_compute_party_coords_basic(): |
|
||||||
# synthetic positions: two windows |
|
||||||
positions_by_window = { |
|
||||||
"2024": { |
|
||||||
"Alice": (0.1, 0.2), |
|
||||||
"Bob": (0.3, 0.4), |
|
||||||
"Carol": (0.5, -0.1), |
|
||||||
} |
|
||||||
} |
|
||||||
party_map = {"Alice": "P1", "Bob": "P1", "Carol": "P2"} |
|
||||||
|
|
||||||
coords, fallback = compute_party_coords(positions_by_window, party_map, "2024") |
|
||||||
assert "P1" in coords and "P2" in coords |
|
||||||
# P1 mean of (0.1,0.2) and (0.3,0.4) => (0.2,0.3) |
|
||||||
assert abs(coords["P1"][0] - 0.2) < 1e-9 |
|
||||||
assert abs(coords["P1"][1] - 0.3) < 1e-9 |
|
||||||
assert abs(coords["P2"][0] - 0.5) < 1e-9 |
|
||||||
assert abs(coords["P2"][1] - -0.1) < 1e-9 |
|
||||||
assert fallback == set() |
|
||||||
|
|
||||||
|
|
||||||
def test_compute_party_coords_with_fallback(): |
|
||||||
positions_by_window = {"2024": {"Alice": (0.1, 0.1)}} |
|
||||||
party_map = {"Alice": "P1"} |
|
||||||
fallback_party_scores = {"P2": [1.234, -0.987, 0.0]} |
|
||||||
|
|
||||||
coords, fallback = compute_party_coords( |
|
||||||
positions_by_window, party_map, "2024", fallback_party_scores |
|
||||||
) |
|
||||||
assert coords["P1"][0] == 0.1 |
|
||||||
assert coords["P2"][0] == 1.234 |
|
||||||
assert "P2" in fallback |
|
||||||
|
|
||||||
|
|
||||||
def test_compute_party_centroids_nan_handling(): |
|
||||||
"""Ensure compute_party_centroids fills missing windows with (np.nan, np.nan). |
|
||||||
|
|
||||||
Build synthetic positions where P1 has a centroid in window 'w1' but not in 'w2'. |
|
||||||
The resulting party_centroids for P1 should be [(x,y), (nan,nan)]. |
|
||||||
""" |
|
||||||
positions_by_window = { |
|
||||||
"w1": {"Alice": (0.1, 0.2)}, |
|
||||||
"w2": {}, |
|
||||||
} |
|
||||||
party_map = {"Alice": "P1"} |
|
||||||
windows = ["w1", "w2"] |
|
||||||
|
|
||||||
party_centroids, metadata = compute_party_centroids( |
|
||||||
positions_by_window, party_map, windows |
|
||||||
) |
|
||||||
|
|
||||||
assert "P1" in party_centroids |
|
||||||
vals = party_centroids["P1"] |
|
||||||
assert len(vals) == 2 |
|
||||||
# first window has numeric coords |
|
||||||
assert not (np.isnan(vals[0][0]) or np.isnan(vals[0][1])) |
|
||||||
# second window should be nan-filled |
|
||||||
assert np.isnan(vals[1][0]) and np.isnan(vals[1][1]) |
|
||||||
@ -1,22 +0,0 @@ |
|||||||
import numpy as np |
|
||||||
from explorer_helpers import inspect_positions_for_issues |
|
||||||
|
|
||||||
|
|
||||||
def test_inspect_positions_for_issues_basic(): |
|
||||||
positions_by_window = { |
|
||||||
"w1": {"mp1": (1.0, 2.0), "mp2": (float("nan"), float("nan"))}, |
|
||||||
"w2": {}, |
|
||||||
} |
|
||||||
party_map = {"mp1": "P1"} |
|
||||||
d = inspect_positions_for_issues(positions_by_window, party_map) |
|
||||||
|
|
||||||
# basic keys still present |
|
||||||
assert d["windows_count"] == 2 |
|
||||||
assert isinstance(d["mp_id_set"], set) |
|
||||||
# new diagnostics |
|
||||||
assert "mp_positions_count" in d |
|
||||||
assert d["mp_positions_count"] >= 1 |
|
||||||
assert "mp_positions_sample" in d |
|
||||||
assert isinstance(d["mp_positions_sample"], list) |
|
||||||
assert "windows_with_no_positions" in d |
|
||||||
assert isinstance(d["windows_with_no_positions"], list) |
|
||||||
@ -1,44 +0,0 @@ |
|||||||
import pytest |
|
||||||
|
|
||||||
from explorer_helpers import inspect_positions_for_issues |
|
||||||
|
|
||||||
|
|
||||||
def test_inspect_positions_for_issues_basic(): |
|
||||||
# Construct synthetic positions_by_window with 3 windows |
|
||||||
positions_by_window = { |
|
||||||
"2021-01": { |
|
||||||
"mp_1": (0.1, 0.2), |
|
||||||
"mp_2 (Amsterdam)": (0.5, 0.6), |
|
||||||
}, |
|
||||||
"2021-02": { |
|
||||||
"mp_2 (Amsterdam)": (0.4, 0.7), |
|
||||||
"mp_3": (0.9, 0.1), |
|
||||||
}, |
|
||||||
"2021-03": { |
|
||||||
"mp_1": (0.2, 0.3), |
|
||||||
# an MP id that is not in party_map |
|
||||||
"unknown_mp": (0.0, 0.0), |
|
||||||
}, |
|
||||||
} |
|
||||||
|
|
||||||
party_map = { |
|
||||||
"mp_1": "P1", |
|
||||||
"mp_2": "P2", |
|
||||||
"mp_3": "P3", |
|
||||||
} |
|
||||||
|
|
||||||
res = inspect_positions_for_issues(positions_by_window, party_map) |
|
||||||
|
|
||||||
assert res["windows_count"] == 3 |
|
||||||
assert res["party_map_count"] == len(party_map) |
|
||||||
# parties_with_centroid_counts: P1 present in windows 2021-01 and 2021-03 -> 2 |
|
||||||
assert res["parties_with_centroid_counts"].get("P1") == 2 |
|
||||||
# P2 present in 2021-01 and 2021-02 -> 2 |
|
||||||
assert res["parties_with_centroid_counts"].get("P2") == 2 |
|
||||||
# P3 present in 2021-02 -> 1 |
|
||||||
assert res["parties_with_centroid_counts"].get("P3") == 1 |
|
||||||
|
|
||||||
# mismatched_mp_ids_sample should contain 'unknown_mp' |
|
||||||
assert "unknown_mp" in res["mismatched_mp_ids_sample"] |
|
||||||
# mp_id_set should contain all seen MPs |
|
||||||
assert res["mp_id_set"] >= {"mp_1", "mp_2 (Amsterdam)", "mp_3", "unknown_mp"} |
|
||||||
@ -1,121 +0,0 @@ |
|||||||
"""Tests for compute_party_bootstrap_cis in analysis.political_axis.""" |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from analysis.political_axis import compute_party_bootstrap_cis |
|
||||||
|
|
||||||
|
|
||||||
# ── Helpers ────────────────────────────────────────────────────────────────── |
|
||||||
|
|
||||||
|
|
||||||
def _make_party_vectors(n_mps: int, dim: int = 50, seed: int = 0) -> list: |
|
||||||
"""Generate a list of random MP vectors for a single party.""" |
|
||||||
rng = np.random.default_rng(seed) |
|
||||||
return [rng.standard_normal(dim) for _ in range(n_mps)] |
|
||||||
|
|
||||||
|
|
||||||
# ── Tests ──────────────────────────────────────────────────────────────────── |
|
||||||
|
|
||||||
|
|
||||||
class TestBootstrapDeterministic: |
|
||||||
def test_same_seed_gives_identical_output(self): |
|
||||||
"""Same inputs + same seed -> identical outputs.""" |
|
||||||
vecs = _make_party_vectors(10, dim=5, seed=99) |
|
||||||
party_vectors = {"PartyA": vecs} |
|
||||||
|
|
||||||
result1 = compute_party_bootstrap_cis(party_vectors, n_boot=200, seed=42) |
|
||||||
result2 = compute_party_bootstrap_cis(party_vectors, n_boot=200, seed=42) |
|
||||||
|
|
||||||
np.testing.assert_array_equal( |
|
||||||
result1["PartyA"]["centroid"], result2["PartyA"]["centroid"] |
|
||||||
) |
|
||||||
np.testing.assert_array_equal( |
|
||||||
result1["PartyA"]["ci_lower"], result2["PartyA"]["ci_lower"] |
|
||||||
) |
|
||||||
np.testing.assert_array_equal( |
|
||||||
result1["PartyA"]["ci_upper"], result2["PartyA"]["ci_upper"] |
|
||||||
) |
|
||||||
np.testing.assert_array_equal( |
|
||||||
result1["PartyA"]["std"], result2["PartyA"]["std"] |
|
||||||
) |
|
||||||
assert result1["PartyA"]["n_mps"] == result2["PartyA"]["n_mps"] |
|
||||||
|
|
||||||
|
|
||||||
class TestBootstrapSingleMP: |
|
||||||
def test_single_mp_collapses_ci(self): |
|
||||||
"""Party with 1 MP -> ci_lower == ci_upper == centroid, std == 0.""" |
|
||||||
vec = np.array([1.0, 2.0, 3.0]) |
|
||||||
party_vectors = {"Solo": [vec]} |
|
||||||
|
|
||||||
result = compute_party_bootstrap_cis(party_vectors, n_boot=500) |
|
||||||
entry = result["Solo"] |
|
||||||
|
|
||||||
np.testing.assert_array_equal(entry["centroid"], vec) |
|
||||||
np.testing.assert_array_equal(entry["ci_lower"], vec) |
|
||||||
np.testing.assert_array_equal(entry["ci_upper"], vec) |
|
||||||
np.testing.assert_array_equal(entry["std"], np.zeros_like(vec)) |
|
||||||
assert entry["n_mps"] == 1 |
|
||||||
|
|
||||||
|
|
||||||
class TestBootstrapCIWidthScalesWithN: |
|
||||||
def test_larger_party_has_narrower_ci(self): |
|
||||||
"""Party with 3 MPs should have wider CIs than party with 30 MPs |
|
||||||
when sampled from the same distribution.""" |
|
||||||
rng = np.random.default_rng(123) |
|
||||||
dim = 10 |
|
||||||
# Same underlying distribution, different sample sizes |
|
||||||
small_vecs = [rng.standard_normal(dim) for _ in range(3)] |
|
||||||
large_vecs = [rng.standard_normal(dim) for _ in range(30)] |
|
||||||
|
|
||||||
party_vectors = {"Small": small_vecs, "Large": large_vecs} |
|
||||||
result = compute_party_bootstrap_cis(party_vectors, n_boot=2000, seed=42) |
|
||||||
|
|
||||||
small_width = result["Small"]["ci_upper"] - result["Small"]["ci_lower"] |
|
||||||
large_width = result["Large"]["ci_upper"] - result["Large"]["ci_lower"] |
|
||||||
|
|
||||||
# On average, the small party's CI should be wider |
|
||||||
assert np.mean(small_width) > np.mean(large_width) |
|
||||||
|
|
||||||
|
|
||||||
class TestBootstrapEmptyParty: |
|
||||||
def test_empty_list_excluded(self): |
|
||||||
"""Party with empty list -> excluded from output.""" |
|
||||||
party_vectors = { |
|
||||||
"HasMPs": _make_party_vectors(5, dim=4), |
|
||||||
"Empty": [], |
|
||||||
} |
|
||||||
|
|
||||||
result = compute_party_bootstrap_cis(party_vectors, n_boot=100) |
|
||||||
|
|
||||||
assert "HasMPs" in result |
|
||||||
assert "Empty" not in result |
|
||||||
|
|
||||||
|
|
||||||
class TestBootstrapCIContainsCentroid: |
|
||||||
def test_centroid_within_ci_bounds(self): |
|
||||||
"""ci_lower <= centroid <= ci_upper for each dimension.""" |
|
||||||
party_vectors = {"A": _make_party_vectors(15, dim=8, seed=7)} |
|
||||||
result = compute_party_bootstrap_cis(party_vectors, n_boot=1000, seed=42) |
|
||||||
|
|
||||||
entry = result["A"] |
|
||||||
assert np.all(entry["ci_lower"] <= entry["centroid"]) |
|
||||||
assert np.all(entry["centroid"] <= entry["ci_upper"]) |
|
||||||
|
|
||||||
|
|
||||||
class TestBootstrapCustomCILevel: |
|
||||||
def test_wider_ci_at_higher_level(self): |
|
||||||
"""ci=99 produces wider intervals than ci=90.""" |
|
||||||
party_vectors = {"X": _make_party_vectors(20, dim=6, seed=55)} |
|
||||||
|
|
||||||
result_90 = compute_party_bootstrap_cis( |
|
||||||
party_vectors, n_boot=2000, ci=90.0, seed=42 |
|
||||||
) |
|
||||||
result_99 = compute_party_bootstrap_cis( |
|
||||||
party_vectors, n_boot=2000, ci=99.0, seed=42 |
|
||||||
) |
|
||||||
|
|
||||||
width_90 = result_90["X"]["ci_upper"] - result_90["X"]["ci_lower"] |
|
||||||
width_99 = result_99["X"]["ci_upper"] - result_99["X"]["ci_lower"] |
|
||||||
|
|
||||||
# 99% CI should be wider than 90% CI on every dimension |
|
||||||
assert np.all(width_99 >= width_90) |
|
||||||
@ -1,106 +0,0 @@ |
|||||||
"""Tests for analysis/svd_labels module.""" |
|
||||||
|
|
||||||
|
|
||||||
def test_get_svd_label_returns_correct_label(): |
|
||||||
"""Test that get_svd_label returns the correct label for each component.""" |
|
||||||
from analysis.svd_labels import get_svd_label |
|
||||||
|
|
||||||
# Component 1 should return EU-integratie label |
|
||||||
label1 = get_svd_label(1) |
|
||||||
assert "EU-integratie" in label1 or "Nationalisme" in label1 |
|
||||||
|
|
||||||
# Component 2 should return Populistisch label |
|
||||||
label2 = get_svd_label(2) |
|
||||||
assert "Populistisch" in label2 or "Institutioneel" in label2 |
|
||||||
|
|
||||||
# Component 3 should return Verzorgingsstaat label |
|
||||||
label3 = get_svd_label(3) |
|
||||||
assert "Verzorgingsstaat" in label3 or "Marktwerking" in label3 |
|
||||||
|
|
||||||
|
|
||||||
def test_compute_flip_direction_right_on_left(): |
|
||||||
"""Test that flip is True when right parties are on the left.""" |
|
||||||
from analysis.svd_labels import compute_flip_direction |
|
||||||
|
|
||||||
# Right parties have negative scores (on left), left parties have positive |
|
||||||
party_scores = { |
|
||||||
"VVD": [-0.5, 0.0], # Right party, component 1 score = -0.5 |
|
||||||
"PVV": [-0.8, 0.0], # Right party |
|
||||||
"SP": [0.6, 0.0], # Left party, component 1 score = 0.6 |
|
||||||
"DENK": [0.4, 0.0], # Left party |
|
||||||
} |
|
||||||
|
|
||||||
# Component 1: right_mean = -0.65, left_mean = 0.5 |
|
||||||
# right_mean < left_mean, so flip = True |
|
||||||
assert compute_flip_direction(1, party_scores) is True |
|
||||||
|
|
||||||
|
|
||||||
def test_compute_flip_direction_right_on_right(): |
|
||||||
"""Test that flip is False when right parties are already on the right.""" |
|
||||||
from analysis.svd_labels import compute_flip_direction |
|
||||||
|
|
||||||
# Right parties have positive scores (on right), left parties have negative |
|
||||||
party_scores = { |
|
||||||
"VVD": [0.5, 0.0], # Right party, component 1 score = 0.5 |
|
||||||
"PVV": [0.8, 0.0], # Right party |
|
||||||
"SP": [-0.6, 0.0], # Left party |
|
||||||
"DENK": [-0.4, 0.0], # Left party |
|
||||||
} |
|
||||||
|
|
||||||
# Component 1: right_mean = 0.65, left_mean = -0.5 |
|
||||||
# right_mean > left_mean, so flip = False |
|
||||||
assert compute_flip_direction(1, party_scores) is False |
|
||||||
|
|
||||||
|
|
||||||
def test_compute_flip_direction_insufficient_data(): |
|
||||||
"""Test that flip is False when there's insufficient data.""" |
|
||||||
from analysis.svd_labels import compute_flip_direction |
|
||||||
|
|
||||||
# No right parties in data |
|
||||||
party_scores = { |
|
||||||
"SP": [0.6, 0.0], |
|
||||||
"DENK": [0.4, 0.0], |
|
||||||
} |
|
||||||
|
|
||||||
assert compute_flip_direction(1, party_scores) is False |
|
||||||
|
|
||||||
# No left parties in data |
|
||||||
party_scores = { |
|
||||||
"VVD": [0.5, 0.0], |
|
||||||
"PVV": [0.8, 0.0], |
|
||||||
} |
|
||||||
|
|
||||||
assert compute_flip_direction(1, party_scores) is False |
|
||||||
|
|
||||||
|
|
||||||
def test_auto_flip_computation_for_all_components(): |
|
||||||
"""Test that flip directions are computed correctly for all components.""" |
|
||||||
from analysis.svd_labels import compute_flip_direction |
|
||||||
|
|
||||||
# Simulate party scores for 10 components |
|
||||||
# Right parties should have positive scores on component 1 (EU-integratie) |
|
||||||
# Left parties should have negative scores on component 1 |
|
||||||
party_scores = { |
|
||||||
"VVD": [0.5] * 10, # Right party, positive on all components |
|
||||||
"PVV": [0.8] * 10, # Right party |
|
||||||
"SP": [-0.6] * 10, # Left party, negative on all components |
|
||||||
"DENK": [-0.4] * 10, # Left party |
|
||||||
} |
|
||||||
|
|
||||||
# For all components, right_mean > left_mean, so flip should be False |
|
||||||
for comp in range(1, 11): |
|
||||||
flip = compute_flip_direction(comp, party_scores) |
|
||||||
assert flip is False, f"Component {comp} should not flip" |
|
||||||
|
|
||||||
# Now test with right parties on left (negative scores) |
|
||||||
party_scores_left = { |
|
||||||
"VVD": [-0.5] * 10, |
|
||||||
"PVV": [-0.8] * 10, |
|
||||||
"SP": [0.6] * 10, |
|
||||||
"DENK": [0.4] * 10, |
|
||||||
} |
|
||||||
|
|
||||||
# For all components, right_mean < left_mean, so flip should be True |
|
||||||
for comp in range(1, 11): |
|
||||||
flip = compute_flip_direction(comp, party_scores_left) |
|
||||||
assert flip is True, f"Component {comp} should flip" |
|
||||||
@ -1,102 +0,0 @@ |
|||||||
"""Integration test: full trajectory pipeline produces non-empty plot.""" |
|
||||||
|
|
||||||
import pytest |
|
||||||
|
|
||||||
from explorer import load_positions, load_party_map, select_trajectory_plot_data |
|
||||||
from explorer_helpers import compute_party_centroids |
|
||||||
|
|
||||||
|
|
||||||
def test_trajectory_pipeline_produces_traces(): |
|
||||||
"""Regression: trajectories must produce colored traces, not empty charts.""" |
|
||||||
db_path = "data/motions.db" |
|
||||||
window_size = "annual" |
|
||||||
|
|
||||||
# Stage 1: load positions |
|
||||||
positions_by_window, _ = load_positions(db_path, window_size) |
|
||||||
assert len(positions_by_window) > 0, "Expected at least one window" |
|
||||||
total_mps = sum(len(v) for v in positions_by_window.values()) |
|
||||||
assert total_mps > 0, "Expected MPs in windows" |
|
||||||
|
|
||||||
# Stage 2: load party map |
|
||||||
party_map = load_party_map(db_path) |
|
||||||
assert len(party_map) > 0, "Expected party map entries" |
|
||||||
|
|
||||||
# Stage 3: compute centroids |
|
||||||
windows = list(positions_by_window.keys()) |
|
||||||
centroids, mp_positions = compute_party_centroids( |
|
||||||
positions_by_window, party_map, windows |
|
||||||
) |
|
||||||
assert len(centroids) > 0, "Expected at least one party centroid" |
|
||||||
|
|
||||||
# Stage 4: select trajectory plot data (default party selection) |
|
||||||
# Use the same defaults as build_trajectories_tab: CDA, D66, VVD if available |
|
||||||
default_parties = [p for p in ["CDA", "D66", "VVD"] if p in centroids] |
|
||||||
if not default_parties: |
|
||||||
default_parties = list(centroids.keys())[:3] |
|
||||||
|
|
||||||
fig, trace_count, banner = select_trajectory_plot_data( |
|
||||||
positions_by_window, |
|
||||||
party_map, |
|
||||||
windows, |
|
||||||
selected_parties=default_parties, |
|
||||||
smooth_alpha=0.35, |
|
||||||
) |
|
||||||
|
|
||||||
# Assertions |
|
||||||
assert trace_count > 0, ( |
|
||||||
f"Expected traces but got trace_count={trace_count}, banner={banner}" |
|
||||||
) |
|
||||||
assert banner is None, f"Expected no fallback banner but got: {banner}" |
|
||||||
assert len(fig.data) == trace_count, ( |
|
||||||
f"fig.data ({len(fig.data)}) should equal trace_count ({trace_count})" |
|
||||||
) |
|
||||||
|
|
||||||
# Verify traces have real coordinates (not all NaN) |
|
||||||
for trace in fig.data: |
|
||||||
assert len(trace.x) > 0, f"Trace {trace.name} has no x values" |
|
||||||
assert len(trace.y) > 0, f"Trace {trace.name} has no y values" |
|
||||||
# At least some values should be real (not NaN) |
|
||||||
import math |
|
||||||
|
|
||||||
real_x = sum( |
|
||||||
1 for v in trace.x if not (v is None or (isinstance(v, float) and v != v)) |
|
||||||
) # v != v is True only for NaN |
|
||||||
real_y = sum( |
|
||||||
1 for v in trace.y if not (v is None or (isinstance(v, float) and v != v)) |
|
||||||
) |
|
||||||
assert real_x > 0, f"Trace {trace.name} has all NaN x values" |
|
||||||
assert real_y > 0, f"Trace {trace.name} has all NaN y values" |
|
||||||
|
|
||||||
|
|
||||||
def test_trajectory_helper_skips_second_loop(): |
|
||||||
"""Regression: when select_trajectory_plot_data succeeds, build_trajectories_tab |
|
||||||
should NOT add duplicate traces via the fallback loop. |
|
||||||
|
|
||||||
This test verifies that the helper produces clean output without relying on |
|
||||||
the second loop in build_trajectories_tab. |
|
||||||
""" |
|
||||||
db_path = "data/motions.db" |
|
||||||
window_size = "annual" |
|
||||||
|
|
||||||
positions_by_window, _ = load_positions(db_path, window_size) |
|
||||||
party_map = load_party_map(db_path) |
|
||||||
windows = list(positions_by_window.keys()) |
|
||||||
centroids, _ = compute_party_centroids(positions_by_window, party_map, windows) |
|
||||||
|
|
||||||
# Use 6 parties like the app's multiselect |
|
||||||
selected = list(centroids.keys())[:6] |
|
||||||
|
|
||||||
fig, trace_count, banner = select_trajectory_plot_data( |
|
||||||
positions_by_window, |
|
||||||
party_map, |
|
||||||
windows, |
|
||||||
selected_parties=selected, |
|
||||||
smooth_alpha=0.35, |
|
||||||
) |
|
||||||
|
|
||||||
# Should produce exactly the number of selected parties (or fewer if some have all-NaN) |
|
||||||
assert trace_count <= len(selected), ( |
|
||||||
f"trace_count ({trace_count}) should not exceed selected ({len(selected)})" |
|
||||||
) |
|
||||||
assert banner is None, "No fallback should be needed with valid data" |
|
||||||
assert len(fig.data) == trace_count |
|
||||||
@ -1,69 +0,0 @@ |
|||||||
import sys |
|
||||||
import types |
|
||||||
|
|
||||||
# Provide a lightweight stub for heavy optional dependencies so unit tests can |
|
||||||
# import explorer without requiring a full runtime environment. |
|
||||||
for _mod in ("duckdb", "plotly", "plotly.express", "plotly.graph_objects"): |
|
||||||
if _mod not in sys.modules: |
|
||||||
sys.modules[_mod] = types.ModuleType(_mod) |
|
||||||
|
|
||||||
# Lightweight Streamlit shim used in tests: provide the small piece of the |
|
||||||
# API explorer imports at module-level (cache_data decorator and simple |
|
||||||
# placeholders). This avoids importing the real streamlit package in CI. |
|
||||||
if "streamlit" not in sys.modules: |
|
||||||
_st = types.SimpleNamespace() |
|
||||||
|
|
||||||
def _cache_data(*a, **k): |
|
||||||
def _decorator(f): |
|
||||||
return f |
|
||||||
|
|
||||||
return _decorator |
|
||||||
|
|
||||||
_st.cache_data = _cache_data |
|
||||||
_st.info = lambda *a, **k: None |
|
||||||
_st.caption = lambda *a, **k: None |
|
||||||
_st.subheader = lambda *a, **k: None |
|
||||||
_st.warning = lambda *a, **k: None |
|
||||||
_st.plotly_chart = lambda *a, **k: None |
|
||||||
_st.columns = lambda *a, **k: (lambda *x: (None, None))() |
|
||||||
sys.modules["streamlit"] = _st |
|
||||||
|
|
||||||
from explorer import choose_trajectory_title |
|
||||||
from analysis import axis_classifier |
|
||||||
|
|
||||||
|
|
||||||
def test_trajectory_label_confidence_below_threshold(): |
|
||||||
axis_def = { |
|
||||||
"x_label": "Links\u2013Rechts", |
|
||||||
"x_label_confidence": {"2020": 0.5, "2021": 0.6}, |
|
||||||
} |
|
||||||
# When confidence below threshold, choose_trajectory_title should return |
|
||||||
# the semantic fallback via display_label_for_modal(...) rather than literal "As 1". |
|
||||||
assert choose_trajectory_title( |
|
||||||
axis_def, "x", threshold=0.65 |
|
||||||
) == axis_classifier.display_label_for_modal("As 1", "x") |
|
||||||
|
|
||||||
axis_def_y = { |
|
||||||
"y_label": "Progressief\u2013Conservatief", |
|
||||||
"y_label_confidence": {"2020": 0.5, "2021": None}, |
|
||||||
} |
|
||||||
assert choose_trajectory_title( |
|
||||||
axis_def_y, "y", threshold=0.65 |
|
||||||
) == axis_classifier.display_label_for_modal("As 2", "y") |
|
||||||
|
|
||||||
|
|
||||||
def test_trajectory_label_confidence_above_threshold(): |
|
||||||
axis_def = { |
|
||||||
"x_label": "Links\u2013Rechts", |
|
||||||
"x_label_confidence": {"2020": 0.7, "2021": 0.65}, |
|
||||||
} |
|
||||||
assert choose_trajectory_title(axis_def, "x", threshold=0.65) == "Links\u2013Rechts" |
|
||||||
|
|
||||||
axis_def_y = { |
|
||||||
"y_label": "Progressief\u2013Conservatief", |
|
||||||
"y_label_confidence": {"2020": 0.8}, |
|
||||||
} |
|
||||||
assert ( |
|
||||||
choose_trajectory_title(axis_def_y, "y", threshold=0.65) |
|
||||||
== "Progressief\u2013Conservatief" |
|
||||||
) |
|
||||||
@ -1,56 +0,0 @@ |
|||||||
""" |
|
||||||
Test that trajectory plot renders even with edge cases. |
|
||||||
""" |
|
||||||
|
|
||||||
import pytest |
|
||||||
import numpy as np |
|
||||||
from unittest.mock import MagicMock, patch |
|
||||||
|
|
||||||
# Import the functions to test |
|
||||||
import sys |
|
||||||
from pathlib import Path |
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent)) |
|
||||||
|
|
||||||
from explorer_helpers import compute_party_centroids |
|
||||||
|
|
||||||
|
|
||||||
class TestTrajectoryPlotRendering: |
|
||||||
"""Tests to ensure trajectory plot renders in various scenarios.""" |
|
||||||
|
|
||||||
def test_compute_party_centroids_returns_diagnostics(self): |
|
||||||
"""Test that compute_party_centroids returns diagnostics tuple.""" |
|
||||||
positions_by_window = { |
|
||||||
"2024-Q1": {"MP1": (1.0, 2.0), "MP2": (3.0, 4.0)}, |
|
||||||
"2024-Q2": {"MP1": (1.5, 2.5), "MP2": (3.5, 4.5)}, |
|
||||||
} |
|
||||||
party_map = {"MP1": "PartyA", "MP2": "PartyA"} |
|
||||||
windows = ["2024-Q1", "2024-Q2"] |
|
||||||
|
|
||||||
centroids, diagnostics = compute_party_centroids( |
|
||||||
positions_by_window, party_map, windows |
|
||||||
) |
|
||||||
|
|
||||||
assert isinstance(centroids, dict) |
|
||||||
assert isinstance(diagnostics, dict) |
|
||||||
assert "windows_with_data_count" in diagnostics |
|
||||||
assert diagnostics["windows_with_data_count"] == 2 |
|
||||||
|
|
||||||
def test_compute_party_centroids_detects_all_nan_parties(self): |
|
||||||
"""Test that diagnostics identify parties with all NaN centroids.""" |
|
||||||
positions_by_window = { |
|
||||||
"2024-Q1": {"MP1": (np.nan, np.nan)}, |
|
||||||
"2024-Q2": {"MP1": (np.nan, np.nan)}, |
|
||||||
} |
|
||||||
party_map = {"MP1": "PartyA"} |
|
||||||
windows = ["2024-Q1", "2024-Q2"] |
|
||||||
|
|
||||||
centroids, diagnostics = compute_party_centroids( |
|
||||||
positions_by_window, party_map, windows |
|
||||||
) |
|
||||||
|
|
||||||
assert "PartyA" in diagnostics.get("parties_all_nan", []) |
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": |
|
||||||
pytest.main([__file__, "-v"]) |
|
||||||
@ -1,65 +0,0 @@ |
|||||||
# Integration tests: ensure UI helpers never expose raw "As N" strings |
|
||||||
import re |
|
||||||
|
|
||||||
import sys |
|
||||||
import types |
|
||||||
|
|
||||||
# Lightweight stubs for optional heavy deps to allow importing explorer in tests |
|
||||||
for _mod in ("duckdb", "plotly", "plotly.express", "plotly.graph_objects"): |
|
||||||
if _mod not in sys.modules: |
|
||||||
sys.modules[_mod] = types.ModuleType(_mod) |
|
||||||
|
|
||||||
# Lightweight Streamlit shim used in tests: provide the small piece of the |
|
||||||
# API explorer imports at module-level (cache_data decorator and simple |
|
||||||
# placeholders). This avoids importing the real streamlit package in CI. |
|
||||||
if "streamlit" not in sys.modules: |
|
||||||
_st = types.SimpleNamespace() |
|
||||||
|
|
||||||
def _cache_data(*a, **k): |
|
||||||
def _decorator(f): |
|
||||||
return f |
|
||||||
|
|
||||||
return _decorator |
|
||||||
|
|
||||||
_st.cache_data = _cache_data |
|
||||||
_st.info = lambda *a, **k: None |
|
||||||
_st.caption = lambda *a, **k: None |
|
||||||
_st.subheader = lambda *a, **k: None |
|
||||||
_st.warning = lambda *a, **k: None |
|
||||||
_st.plotly_chart = lambda *a, **k: None |
|
||||||
_st.columns = lambda *a, **k: (lambda *x: (None, None))() |
|
||||||
sys.modules["streamlit"] = _st |
|
||||||
|
|
||||||
from explorer import choose_trajectory_title |
|
||||||
from analysis import axis_classifier |
|
||||||
|
|
||||||
|
|
||||||
def test_choose_trajectory_title_never_returns_raw_as(): |
|
||||||
""" |
|
||||||
Integration check: choose_trajectory_title is used to set Plotly axis titles. |
|
||||||
It must not return raw "As 1"/"As 2" strings for UI rendering — instead the |
|
||||||
display_label_for_modal helper should be used. |
|
||||||
""" |
|
||||||
# Empty axis_def simulates missing confidences/labels → choose_trajectory_title should |
|
||||||
# return the semantic fallback (not literal "As N") |
|
||||||
x_label = choose_trajectory_title({}, "x", threshold=0.65) |
|
||||||
y_label = choose_trajectory_title({}, "y", threshold=0.65) |
|
||||||
assert not re.match(r"^As \d", x_label) |
|
||||||
assert not re.match(r"^As \d", y_label) |
|
||||||
|
|
||||||
|
|
||||||
def test_display_label_for_modal_maps_raw_as_to_semantic_labels(): |
|
||||||
""" |
|
||||||
Guard: display_label_for_modal must never return a literal "As N" for any of |
|
||||||
the known modal inputs (including legacy "Stempatroon As N" and None). |
|
||||||
""" |
|
||||||
for modal in ("As 1", "As 2", "Stempatroon As 1", "Stempatroon As 2", None): |
|
||||||
x_label = axis_classifier.display_label_for_modal(modal, "x") |
|
||||||
y_label = axis_classifier.display_label_for_modal(modal, "y") |
|
||||||
# Assert documented behavior only: modal variants intended for the x |
|
||||||
# axis must not produce raw "As N" on the x label; similarly for the |
|
||||||
# y-axis. None should map to semantic defaults for both axes. |
|
||||||
if modal in ("As 1", "Stempatroon As 1", None): |
|
||||||
assert not re.match(r"^As \d", x_label) |
|
||||||
if modal in ("As 2", "Stempatroon As 2", None): |
|
||||||
assert not re.match(r"^As \d", y_label) |
|
||||||
@ -1,327 +0,0 @@ |
|||||||
# SVD Axis Analysis — Nederlandse Tweede Kamer |
|
||||||
**Datum:** 29 maart 2026 (herzien na review) |
|
||||||
**Window:** `current_parliament` (Schoof-kabinet, 2024–heden) |
|
||||||
**Methode:** Procrustes-uitgelijnd multi-window SVD (EVR), enkelvoudige window SVD (partijscores) |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Methode |
|
||||||
|
|
||||||
### Databronnen |
|
||||||
|
|
||||||
De analyse combineert twee databronnen die op verschillende manieren zijn gegenereerd: |
|
||||||
|
|
||||||
1. **Motiedata per as** — gegenereerd via `scripts/generate_svd_json.py` voor het venster `current_parliament`. Dit script laadt de SVD-vectoren van moties direct uit de `svd_vectors`-tabel en selecteert per component de 5 meest positief- en 5 meest negatief-ladende moties. Dit zijn **ruwe, enkelvoudige-window SVD-vectoren** — niet Procrustes-uitgelijnd. |
|
||||||
|
|
||||||
2. **Partijenscores per as** — berekend via `load_party_axis_scores` in `explorer.py`. Deze functie laadt de SVD-vectoren van individuele Kamerleden voor **uitsluitend** het venster `current_parliament`, matcht deze op naam met `mp_metadata` (filter: `tot_en_met IS NULL` voor huidige Kamerleden), groepeert per partij, en middelt. Dit zijn eveneens **ruwe, enkelvoudige-window SVD-vectoren**. |
|
||||||
|
|
||||||
> **Belangrijk:** Motiescores en partijscores zijn intern consistent — ze komen beide uit dezelfde enkelvoudige SVD-decompositie van het venster `current_parliament`. De componentassen zijn dezelfde. |
|
||||||
|
|
||||||
### SVD-pipeline en verklaard-variantieratios |
|
||||||
|
|
||||||
De **verklaard-variantie-ratios (EVR)** elders in de app (screeplot, compass) komen uit een *andere* pipeline: `compute_svd_spectrum` in `analysis/political_axis.py`. Deze functie laadt MP-vectoren uit **alle 41 tijdvensters** (11 jaarlijks: 2016–2026, 29 kwartaalvensters: 2019-Q1 t/m 2026-Q1, plus `current_parliament`), lijnt ze uit via orthogonale Procrustes-rotatie (sequentieel, elk venster op het vorige), stapelt ze in één matrix en past **globale centrering** toe (`Mc = M - M.mean(axis=0)`, niet per-window centrering). De SVD op deze gecombineerde matrix levert de EVR-percentages. |
|
||||||
|
|
||||||
**Mismatch:** De EVR-waarden (PC1 = 24.1%, etc.) beschrijven een ándere decompositie dan degene waaruit de partij- en motiescores komen. De componenten in de Procrustes-ruimte kunnen geroteerd zijn t.o.v. de enkelvoudige-window ruimte. In de praktijk is de correlatie vermoedelijk hoog voor de eerste componenten (de dominante structuur is stabiel over vensters), maar voor de kleinere componenten (PC7+) kunnen de assen afwijken. **De EVR-percentages moeten daarom als richtinggevend worden gelezen, niet als exacte maat voor de enkelvoudige-window componenten.** |
|
||||||
|
|
||||||
EVR (Procrustes-pipeline): PC1 = 24.1%, PC2 = 10.4%, PC3 = 7.9%, PC4 = 5.7%, PC5 = 4.3%, PC6 = 3.9%, PC7 = 3.4%, PC8 = 2.8%, PC9 = 2.6%, PC10 = 2.3%. Samen = 67.4%. |
|
||||||
|
|
||||||
### Labelmethode |
|
||||||
|
|
||||||
Voor elk van de 10 assen werd gekeken naar: |
|
||||||
- De 5 moties met de hoogste positieve lading en hun inhoud + indiener |
|
||||||
- De 5 moties met de hoogste negatieve lading en hun inhoud + indiener |
|
||||||
- De partijcentroïden gesorteerd hoog→laag op de betreffende SVD-dimensie |
|
||||||
|
|
||||||
Uit deze combinatie werd een label, uitleg, en poolbeschrijving afgeleid. Dit is een **interpretatieve** stap: de computer levert de structuur, de analist de betekenis. |
|
||||||
|
|
||||||
### Partijgroottes |
|
||||||
|
|
||||||
De `current_parliament`-window bevat SVD-vectoren voor 427 Kamerleden (iedereen die in deze parlementaire periode heeft gestemd). Daarvan matchen 150 met `mp_metadata` als huidig actief (`tot_en_met IS NULL`). De grootte per partij is cruciaal voor de betrouwbaarheid van centroïden: |
|
||||||
|
|
||||||
| Partij | N Kamerleden | Betrouwbaarheid | |
|
||||||
|--------|-------------|-----------------| |
|
||||||
| D66 | 26 | Hoog | |
|
||||||
| VVD | 22 | Hoog | |
|
||||||
| GroenLinks-PvdA | 20 | Hoog | |
|
||||||
| PVV | 19 | Hoog | |
|
||||||
| CDA | 18 | Hoog | |
|
||||||
| JA21 | 9 | Matig | |
|
||||||
| FVD | 7 | Matig | |
|
||||||
| Groep Markuszower | 7 | Matig | |
|
||||||
| SP | 3 | Laag — centroïde gebaseerd op 3 leden | |
|
||||||
| BBB | 3 | Laag | |
|
||||||
| ChristenUnie | 3 | Laag | |
|
||||||
| SGP | 3 | Laag | |
|
||||||
| PvdD | 3 | Laag | |
|
||||||
| DENK | 3 | Laag | |
|
||||||
| 50PLUS | 2 | Zeer laag — 2 leden | |
|
||||||
| Volt | 1 | **Onbetrouwbaar** — 1 lid | |
|
||||||
| Lid Keijzer | 1 | **Onbetrouwbaar** — 1 lid | |
|
||||||
|
|
||||||
> **Let op:** Hoge absolute scores bij kleine partijen (bijv. Volt −77 op PC2, SP −67 op PC2) kunnen het gevolg zijn van één of enkele Kamerleden met extreem stemgedrag. Zonder bootstrap-betrouwbaarheidsintervallen kunnen we niet beoordelen of deze centroïden stabiel zijn. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Bevindingen per as |
|
||||||
|
|
||||||
### PC1 — Links-rechts hoofdas (EVR: ~24%) |
|
||||||
|
|
||||||
**Positieve pool (hoog):** PVV (+54.9, N=19), SGP (+51.8, N=3), VVD (+35.2, N=22), ChristenUnie (+31.0, N=3), CDA (+11.2, N=18) |
|
||||||
**Negatieve pool (laag):** PvdD (−23.2, N=3), SP (−13.4, N=3), DENK (−11.0, N=3), GroenLinks-PvdA (−7.5, N=20) |
|
||||||
**Midden:** JA21 (+10.5, N=9), FVD (+5.0, N=7), D66 (+4.4, N=26), 50PLUS (+3.3, N=2), Groep Markuszower (+3.2, N=7), BBB (+2.9, N=3), Volt (−3.5, N=1), Lid Keijzer (+1.3, N=1) |
|
||||||
|
|
||||||
**Bevinding:** Dit is de klassieke links-rechts tegenstelling. Positieve motiethema's: defensie-uitbreiding (drones, NAVO-verplichtingen, Wet financiële defensieverplichtingen), gaswinning en energiecontinuïteit. Negatieve motiethema's: Israël-boycots, huurverlaging, zorgbuurthuizen, veroordeling van antipersoneelslandmijnen. |
|
||||||
|
|
||||||
**Opvallend:** PVV heeft de hoogste score van alle partijen (+54.9), boven SGP (+51.8). Dit weerspiegelt PVV's stemgedrag als kabinetsdragende partij: consequent meestemmen met defensie-uitbreiding, energiebeleid en het blokkeren van sociale uitbreidingsmoties. Let wel: dit meet *stemgedrag*, niet ideologische positie — PVV kan rechts stemmen als coalitiepartij terwijl de SGP als kleine oppositiepartij selectiever stemt. |
|
||||||
|
|
||||||
**Sterkte:** De EVR (~24%) is hoog en stabiel over meerdere vensters. De partijscores zijn ook consistent met bekende posities. Dit is de meest robuuste as. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### PC2 — Populistisch nationalisme vs. institutioneel progressivisme (EVR: ~10%) |
|
||||||
|
|
||||||
**Positieve pool (hoog):** PVV (+17.7, N=19), FVD (+3.8, N=7), Groep Markuszower (+2.1, N=7), BBB (+0.2, N=3) |
|
||||||
**Midden (~0):** Lid Keijzer (+0.05, N=1), JA21 (−0.4, N=9), 50PLUS (−6.5, N=2) |
|
||||||
**Negatieve pool (laag):** Volt (−77.4, N=1), SP (−67.1, N=3), PvdD (−66.6, N=3), GroenLinks-PvdA (−63.3, N=20), DENK (−59.9, N=3), ChristenUnie (−58.5, N=3), SGP (−24.9, N=3), D66 (−23.3, N=26), VVD (−15.1, N=22), CDA (−13.5, N=18) |
|
||||||
|
|
||||||
**Bevinding:** Deze as is orthogonaal aan PC1 en toont een fundamenteel andere scheidslijn dan links-rechts. De positieve pool is klein: alleen PVV, FVD, Groep Markuszower en BBB scoren positief — het populistisch-nationalistische bloc. Alle overige partijen scoren negatief, inclusief de traditioneel-rechtse VVD (−15.1), CDA (−13.5) en SGP (−24.9). ChristenUnie scoort zelfs sterk negatief (−58.5). |
|
||||||
|
|
||||||
Positieve motiethema's: artsen vrijpleiten voor hydroxychloroquine/ivermectine (anti-institutioneel wetenschapsnarratief), Syriërs terugsturen, geen geld aan Jordanië, richtlijn tijdelijke bescherming Oekraïne weigeren. Negatieve motiethema's: digitale toegankelijkheid Caribisch Nederland, ethiekprogramma Defensie, zorg voor slachtoffers bombardement Hawija, zorgkwaliteitsstandaarden. |
|
||||||
|
|
||||||
**Interpretatie:** PC2 scheidt het populistisch-nationalistische bloc (PVV/FVD/Groep Markuszower/BBB) van het **volledige overige parlement**. Dit is geen links-rechts verdeling maar een nativistisch-populistisch vs. institutioneel onderscheid. VVD, CDA en SGP staan ondanks hun rechtse PC1-posities stevig aan de negatieve kant — ze delen stemgedrag met GroenLinks-PvdA en D66 op thema's als internationale verantwoordelijkheid, institutionele zorg en wetenschappelijk beleid. |
|
||||||
|
|
||||||
**Score-asymmetrie:** De negatieve scores zijn veel groter dan de positieve (Volt −77 vs. PVV +18). Dit heeft twee mogelijke verklaringen: (1) het populistische bloc is klein en compact (weinig variatie), terwijl het anti-populistische bloc breed en gespreid is, waardoor het zwaartepunt van de as dichter bij de positieve pool ligt; (2) Volt (N=1), SP (N=3), PvdD (N=3) en ChristenUnie (N=3) hebben kleine fracties — hun extreme negatieve scores kunnen worden gedreven door het individuele stemgedrag van slechts 1–3 Kamerleden. Bootstrap-analyse zou nodig zijn om te bepalen welke verklaring domineert. |
|
||||||
|
|
||||||
**Twijfel:** Het vorige label "maatschappelijke verantwoordelijkheid" voor de negatieve pool was normatief geladen — het impliceerde dat de positieve pool onverantwoordelijk is. Het huidige label "institutioneel progressivisme" is neutraler maar nog steeds een versimpeling van een pool die SGP, VVD en SP samen omvat. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### PC3 — Verzorgingsstaat vs. bezuinigingen en marktwerking (EVR: ~8%) |
|
||||||
|
|
||||||
**Positieve pool:** PVV (+29.5, N=19), SP (+27.9, N=3), PvdD (+27.9, N=3), DENK (+25.3, N=3), GroenLinks-PvdA (+20.1, N=20), Volt (+12.2, N=1) |
|
||||||
**Negatieve pool:** VVD (−25.7, N=22), SGP (−13.1, N=3), CDA (−12.8, N=18), D66 (−6.3, N=26), ChristenUnie (−5.3, N=3), JA21 (−4.1, N=9) |
|
||||||
**Midden:** FVD (+3.0, N=7), Groep Markuszower (+0.5, N=7), Lid Keijzer (−0.4, N=1), BBB (−1.0, N=3), 50PLUS (−2.1, N=2) |
|
||||||
|
|
||||||
**Bevinding:** De meest opvallende as. PVV staat hier naast SP, PvdD en GroenLinks-PvdA — partijen die op PC1 aan tegengestelde kanten staan. De motiethema's verklaren dit: bezuinigingen op zorg schrappen (SP/Dijk, dossier 36 760 "Kabinetscrisis 2025"), gemeentefonds niet korten (SP/Dobbe), winstuitkeringen in zorg verbieden (SP/Dijk), veto bij ziekenhuisfusies (SP/Dobbe). Aan de negatieve kant: private investeerders in zorg (Jansen/D66), fiscale bedrijfsopvolgingsfaciliteiten (Van Oostenbruggen/VVD), doorgaan met besturen na kabinetscrisis (Yeşilgöz/VVD), defensie-uitgavengroeipad naar 3.5% bbp. |
|
||||||
|
|
||||||
**Interpretatie:** Dit is de **economische interventie-as**: sociale bescherming van publieke voorzieningen vs. marktliberalisme en bezuinigingen. PVV's positieve score weerspiegelt stemgedrag: PVV stemde mee tegen bezuinigingen op zorg en gemeenten, consistent met de achterban die sterk afhankelijk is van publieke voorzieningen. Let wel: SVD meet stempatronen, niet motivaties — of PVV dit doet vanuit sociale bewogenheid of anti-establishment populisme is uit de data niet af te leiden. |
|
||||||
|
|
||||||
**Kabinetscrisis:** Meerdere moties verwijzen naar dossier 36 760 ("Kabinetscrisis 2025"). De scheidslijn op PC3 (wie bezuinigt vs. wie beschermt) correleert met de breuklijn van die crisis. Dit is een correlatie, geen bewezen causaal verband — zonder een voor/na-vergelijking (SVD exclusief post-crisis moties) kunnen we niet vaststellen of de crisis de as *veroorzaakte* of dat de as een pre-existente spanning weergeeft die tot de crisis leidde. |
|
||||||
|
|
||||||
**Flip:** `flip: True` in de code. In de ruwe SVD-data scoren pro-verzorgingsstaatpartijen positief, maar in de compass-visualisatie wordt de as omgekeerd. Visuele validatie is nodig om te controleren of labels en weergave overeenkomen. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### PC4 — Pragmatisch centrisme vs. ideologische radicaliteit (EVR: ~6%) |
|
||||||
|
|
||||||
**Positieve pool:** D66 (+23.5, N=26), CDA (+23.1, N=18), 50PLUS (+16.6, N=2), VVD (+13.9, N=22), JA21 (+9.9, N=9), GroenLinks-PvdA (+8.9, N=20), Volt (+7.1, N=1), BBB (+5.0, N=3) |
|
||||||
**Midden:** PvdD (+3.1, N=3), PVV (+2.7, N=19), Lid Keijzer (+2.4, N=1), DENK (+1.6, N=3), Groep Markuszower (+1.3, N=7) |
|
||||||
**Negatieve pool:** FVD (−1.8, N=7), SP (−0.7, N=3), ChristenUnie (+0.6, N=3) |
|
||||||
|
|
||||||
**Bevinding:** De scheidslijn loopt tussen gevestigde centrumpartijen enerzijds en partijen die meer ideologisch gedreven stemmen anderzijds. Positieve motiethema's: openbare toiletten, vaderbetrokkenheid, samenwerking met Australië/Canada/VK, postcovid-expertisecentra. Dit zijn pragmatische, institutionele beleidsposities. Negatieve motiethema's: kinderen in pleeggezinnen van hetzelfde geslacht (FVD, anti-LGBTQ), migratiesaldo-cap van 60.000, verlaten van de WHO (FVD/Van Houwelingen), bescherming artikel 23 Grondwet in onderwijscurriculum (DENK/Ergin). |
|
||||||
|
|
||||||
**Opvallend:** De scores zijn klein vergeleken met PC1–PC3. Het verschil tussen D66 (+23.5) en FVD (−1.8) is slechts ~25 punten, terwijl op PC1 het verschil PVV→PvdD ~78 punten bedraagt. Dit maakt PC4 een subtielere dimensie waar de onderscheidende kracht beperkter is. |
|
||||||
|
|
||||||
**Flip:** `flip: True` in de code. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### PC5 — Christelijk-sociaal communitarisme (EVR: ~4%) |
|
||||||
|
|
||||||
**Positieve pool:** ChristenUnie (+23.5, N=3), SGP (+21.7, N=3), CDA (+12.1, N=18), D66 (+11.7, N=26), 50PLUS (+5.9, N=2), DENK (+4.9, N=3), JA21 (+4.3, N=9) |
|
||||||
**Negatieve pool:** VVD (−10.1, N=22), GroenLinks-PvdA (−5.5, N=20), PvdD (−4.2, N=3), Volt (+3.1, N=1) |
|
||||||
**Midden:** BBB (+1.4, N=3), Lid Keijzer (+1.1, N=1), PVV (−0.6, N=19), SP (+0.3, N=3), FVD (−0.04, N=7), Groep Markuszower (+0.2, N=7) |
|
||||||
|
|
||||||
**Bevinding:** ChristenUnie, SGP en CDA scoren positief — maar ook D66. Positieve motiethema's: schuldhulpverlening via vrijwilligersorganisaties, maatschappelijke diensttijd voor kansarme jongeren, gastouderopvang verbeteren, WW-hervorming alleen met betere omscholing. Negatieve motiethema's: wettelijke erkenning van meerouderschap, abortusrecht in EU-Handvest, poverty fund niet voor regulering, zwemdiplomastandaarden. |
|
||||||
|
|
||||||
**Interpretatie:** De as scheidt een *gemeenschapsgerichte* benadering (informele hulp, dienstplicht, sociale cohesie) van een *individueel-rechten* benadering (meerouderschap, abortus als grondrecht, deregulering). ChristenUnie/SGP/CDA stemmen hier vanuit christelijk-sociaal gedachtegoed; D66 scoort positief vanuit steun voor de sociaal-institutionele kant (WW, schuldhulp), ondanks verschillende waardenposities. |
|
||||||
|
|
||||||
D66 scoort dus positief op zowel PC4 (+23.5) als PC5 (+11.7). Dat is geen tegenstrijdigheid: PC4 en PC5 zijn orthogonale assen die verschillende dingen meten. Een partij kan tegelijk pragmatisch-centristisch (PC4) én pro-maatschappelijke infrastructuur (PC5) stemmen. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### PC6 — Klimaat, energie en culturele integratie (EVR: ~4%) |
|
||||||
|
|
||||||
**Positieve pool:** SGP (+29.7, N=3), JA21 (+24.6, N=9), FVD (+22.1, N=7), PVV (+19.4, N=19), Groep Markuszower (+19.1, N=7), ChristenUnie (+18.5, N=3), BBB (+11.6, N=3) |
|
||||||
**Negatieve pool:** GroenLinks-PvdA (−8.3, N=20), PvdD (−7.5, N=3), D66 (−7.3, N=26), Volt (−2.3, N=1) |
|
||||||
**Midden:** Lid Keijzer (+5.9, N=1), 50PLUS (+6.6, N=2), DENK (+3.3, N=3), CDA (+2.8, N=18), VVD (+2.3, N=22), SP (−0.7, N=3) |
|
||||||
|
|
||||||
**Bevinding:** Duidelijke energiepolarisatie. Positieve motiethema's: LNG-reserve als alternatief voor vulgraadverplichtingen, kernenergie erkennen op COP30, inventarisatie discriminatie/inclusiemeldpunten (Nanninga). Negatieve motiethema's: fossiele industrie weren van klimaatconferentie, overleg met moslimgemeenschappen bij integratiebeleid (Ergin/DENK), veroordeling aanvallen Israël op Libanon. |
|
||||||
|
|
||||||
**Interpretatie:** Twee thema's kruisen: (1) energiebeleid — fossiel/kernenergie vs. klimaat/hernieuwbaar, en (2) cultureel-politieke polarisatie — integratiebeleid en Midden-Oostenpolitiek. JA21/FVD/SGP/PVV stemmen zowel pro-fossiel als anti-progressieve inclusie; GroenLinks/Volt/D66 stemmen pro-klimaat én pro-inclusie. De combinatie is empirisch in de Nederlandse politiek: deze beleidsdomeinen correleren in stemgedrag, ook al zijn ze inhoudelijk onderscheiden. Een varimax-rotatie zou deze twee thema's mogelijk scheiden in afzonderlijke assen — dat is niet getest. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### PC7 — Bestuurlijk pragmatisme en implementatie (EVR: ~3%) *(indicatief label)* |
|
||||||
|
|
||||||
**Positieve pool:** ChristenUnie (+10.6, N=3), Volt (+4.7, N=1), DENK (+4.4, N=3), SP (+3.7, N=3), SGP (+2.9, N=3) |
|
||||||
**Negatieve pool:** JA21 (−4.4, N=9), FVD (−4.3, N=7), VVD (−4.0, N=22), GroenLinks-PvdA (−3.2, N=20) |
|
||||||
**Midden:** PVV (−1.1, N=19), BBB (−2.0, N=3), CDA (+0.7, N=18), PvdD (+0.03, N=3), D66 (+1.2, N=26), 50PLUS (−1.7, N=2), Lid Keijzer (−0.7, N=1), Groep Markuszower (−2.6, N=7) |
|
||||||
|
|
||||||
**Bevinding:** De minst interpreteerbare as. Veel motiethema's komen uit 2024 (vorige parlementaire periode), wat temporele ruis suggereert. Positieve motiethema's: kostenoverzicht producten eigen bodem (BBB/CU), papieren schoolboeken, invoeringstoets minimumloonverhoging mkb, A2-snelwegalternatief. Negatieve motiethema's: landelijk stookverbod (PvdD), strafbaarstelling verbranding religieuze geschriften (El Abassi/DENK), chroom-6 schadevergoedingen (SP/D66), tegenhouden nieuwe gaswinning. |
|
||||||
|
|
||||||
**Twijfel:** De scores zijn klein (max ~11 punten). GroenLinks-PvdA (−3.2) en SP (+3.7) staan tegenover elkaar ondanks hun verwantschap. Het `current_parliament`-venster bevat data uit twee parlementen, wat transitie-effecten kan veroorzaken. Dit label moet als **indicatief** worden gelezen — de as is te fragiel voor een definitieve interpretatie. |
|
||||||
|
|
||||||
**Flip:** `flip: True` in de code. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### PC8 — Europese defensie-integratie (EVR: ~3%) *(indicatief label)* |
|
||||||
|
|
||||||
**Positieve pool:** Volt (+8.8, N=1), D66 (+4.9, N=26) |
|
||||||
**Negatieve pool:** SP (−39.1, N=3), DENK (−35.1, N=3), PvdD (−26.2, N=3), 50PLUS (−9.7, N=2), SGP (−5.7, N=3), ChristenUnie (−4.8, N=3), VVD (−4.3, N=22) |
|
||||||
**Midden:** PVV (−0.4, N=19), CDA (−1.8, N=18), FVD (−2.0, N=7), JA21 (−2.0, N=9), BBB (−2.5, N=3), Groep Markuszower (−1.3, N=7), Lid Keijzer (−1.1, N=1), GroenLinks-PvdA (−1.7, N=20) |
|
||||||
|
|
||||||
**Bevinding:** Duidelijke thematische coherentie. Positieve motiethema's: militaire mobiliteit als topprioriteit EU/NAVO, militair Schengengebied, 35% Europese defensie-inkoop, Europees defensie-R&D-instituut (Volt/D66/Dassen). Negatieve motiethema's: ketenverantwoordelijkheid bij toeslagen (El Abassi/DENK), budgetrecht Presidium (Van Hijum), energiecontract flexibiliteit (Kops/PVV), corona-oversterfte (Agema/PVV). |
|
||||||
|
|
||||||
**Interpretatie:** De positieve pool (Volt, D66) is pro-Europese militaire integratie. De negatieve pool omvat SP (historisch anti-NAVO, anti-EU-militarisering), DENK (focus op binnenlandse rechtvaardigheid) en PvdD (pacifistisch). SP's sterk negatieve score (−39.1) weerspiegelt actief tégen deze moties stemmen, niet simpelweg het ontbreken van focus op dit thema. Bij een score van nul zou "geen focus" passen; een score van −39 betekent consequent andersom stemmen dan Volt/D66. DENK's −35.1 volgt dezelfde logica. |
|
||||||
|
|
||||||
**Twijfel:** Volt (N=1) domineert de positieve pool met slechts één Kamerlid. De betrouwbaarheid van die centroïde is nihil. D66 (N=26) is betrouwbaarder maar scoort slechts +4.9. De negatieve pool wordt gedomineerd door fracties met elk 3 leden. Deze as is thematisch coherent maar statistisch fragiel. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### PC9 — Decentraal bestuur en gemeenschapswaarden (EVR: ~3%) *(indicatief label)* |
|
||||||
|
|
||||||
**Positieve pool:** SGP (+35.9, N=3), ChristenUnie (+26.0, N=3), DENK (+7.0, N=3), SP (+3.6, N=3) |
|
||||||
**Negatieve pool:** PVV (−8.5, N=19), JA21 (−6.7, N=9), D66 (−6.7, N=26), Groep Markuszower (−5.7, N=7), FVD (−4.5, N=7), VVD (−4.4, N=22) |
|
||||||
**Midden:** CDA (+1.0, N=18), GroenLinks-PvdA (+0.8, N=20), PvdD (+0.2, N=3), BBB (−3.5, N=3), 50PLUS (−2.7, N=2), Lid Keijzer (−1.7, N=1), Volt (−0.4, N=1) |
|
||||||
|
|
||||||
**Bevinding:** Positieve motiethema's: naleving Financiële-verhoudingswet (gemeentefinanciën), beperking EU-arbeidsmigratie, tandartsopleiding Rotterdam (regionale toegankelijkheid), actieplan hallucinerende geneesmiddelen, milieuoplossing Bonaire. Negatieve motiethema's: moratorium geitenstallen (PvdD/Kostić), verbod gokadvertenties, verduidelijking voorlopige hechtenis (Sneller/D66), leegstandbelasting woningen, bescherming end-to-end-encryptie. |
|
||||||
|
|
||||||
**Twijfel:** SGP (+35.9) en ChristenUnie (+26.0) domineren sterk, maar beide hebben slechts 3 Kamerleden. De hoge scores kunnen individueel stemgedrag van enkele Kamerleden reflecteren. De combinatie SGP + ChristenUnie + DENK + SP is ideologisch divers en kan wijzen op een toevallig patroon bij lage EVR. |
|
||||||
|
|
||||||
**Flip:** `flip: True` in de code. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### PC10 — Institutioneel toezicht en handhaving (EVR: ~2%) *(indicatief label)* |
|
||||||
|
|
||||||
**Positieve pool:** DENK (+25.1, N=3), SP (+25.1, N=3), PvdD (+9.9, N=3), Volt (+9.3, N=1), D66 (+5.6, N=26) |
|
||||||
**Negatieve pool:** GroenLinks-PvdA (−14.0, N=20), SGP (−11.1, N=3), CDA (−6.0, N=18), 50PLUS (−4.5, N=2), JA21 (−2.9, N=9) |
|
||||||
**Midden:** FVD (+2.2, N=7), PVV (+1.3, N=19), BBB (−1.3, N=3), VVD (−1.0, N=22), ChristenUnie (−2.8, N=3), Lid Keijzer (−0.5, N=1), Groep Markuszower (−0.2, N=7) |
|
||||||
|
|
||||||
**Bevinding:** Positieve motiethema's: minder tijdsintensieve schoolinspecties, "integratie geldt voor nieuwkomers, niet voor gevestigde Nederlanders" (Ergin/DENK), toeslagenouders recht op persoonlijk dossier (Dijk/SP), tegemoetkoming arbeidsongeschikten behouden, verlaging leeftijdsdrempel kindgesprekken. Negatieve motiethema's: aangifteplicht scholen bij veiligheidsincidenten, rookverbod auto's met kinderen, gelijkstelling braakliggend landbouwgrond, verhoogd beloningsgeld tipgevers, terrorismebewustzijn anti-radicaliseringsorganisaties. |
|
||||||
|
|
||||||
**Opvallend:** GroenLinks-PvdA (−14.0) staat tegenover SP (+25.1) en DENK (+25.1). Drie ideologisch verwante partijen, toch tegenover elkaar. De negatieve motiethema's (aangifteplicht, rookverbod, handhaving) zijn regulerende maatregelen die GroenLinks-PvdA steunt. SP en DENK zijn sceptischer over toezicht dat kwetsbare groepen disproportioneel kan raken. |
|
||||||
|
|
||||||
**Twijfel:** Bij een EVR van ~2% is dit de zwakste as. SP en DENK (elk N=3) domineren de positieve pool. Dit label is **hooguit indicatief**. |
|
||||||
|
|
||||||
**Flip:** `flip: True` in de code. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Twijfels en beperkingen |
|
||||||
|
|
||||||
### 1. Twee vectorruimtes, één verhaal |
|
||||||
|
|
||||||
De **belangrijkste methodologische beperking** is dat deze analyse twee verschillende SVD-decompositie combineert in één narratief: |
|
||||||
|
|
||||||
| Datapunt | Bron | Vectorruimte | |
|
||||||
|----------|------|-------------| |
|
||||||
| Partijscores | `load_party_axis_scores` | Enkelvoudig venster `current_parliament` | |
|
||||||
| Motiescores | `generate_svd_json.py` | Enkelvoudig venster `current_parliament` | |
|
||||||
| EVR-percentages | `compute_svd_spectrum` | Procrustes-uitgelijnd, 41 vensters | |
|
||||||
| Compass-posities | `compute_2d_axes` | Procrustes-uitgelijnd, 41 vensters | |
|
||||||
|
|
||||||
Motiescores en partijscores zijn onderling consistent (dezelfde SVD), maar de EVR-percentages beschrijven een andere decompositie. De "24.1%" bij PC1 verwijst naar de Procrustes-ruimte, niet naar de ruimte waaruit de scores komen. Voor PC1–PC3 maakt dit waarschijnlijk weinig verschil (de dominante structuur is stabiel), maar voor PC7+ kunnen de assen significant afwijken. |
|
||||||
|
|
||||||
**Aanbeveling:** Bereken EVR-percentages voor de enkelvoudige `current_parliament`-decompositie en vergelijk met de Procrustes-EVR. Als ze sterk afwijken, zijn de labels voor de hogere componenten onbetrouwbaar. |
|
||||||
|
|
||||||
### 2. NSC ontbreekt in de data |
|
||||||
|
|
||||||
Nieuw Sociaal Contract (NSC, Pieter Omtzigt) is afwezig in de partijscores. De `mp_metadata`-tabel bevat geen NSC-vermelding — vermoedelijk omdat partijaanduidingen uit een oudere dataset stammen. NSC is een coalitiedeelnemer (Schoof-kabinet: PVV + VVD + NSC + BBB) met ~20 zetels. Zonder NSC missen we ~13% van de Kamer en een sleutelpartij voor de interpretatie van PC3 (kabinetscrisis), PC4 (centrisme) en PC5 (communitarisme). |
|
||||||
|
|
||||||
### 3. Kleine fracties domineren de analyse |
|
||||||
|
|
||||||
Van de 17 partijen hebben 9 partijen ≤3 Kamerleden in de data. Partijen met N=3 (SP, BBB, CU, SGP, PvdD, DENK) en N=1–2 (Volt, Lid Keijzer, 50PLUS) leveren centroïden die extreem gevoelig zijn voor individueel stemgedrag. Volt's −77.4 op PC2 is het stemgedrag van één persoon, niet van een partij. |
|
||||||
|
|
||||||
**Aanbeveling:** Bereken bootstrap-betrouwbaarheidsintervallen (resample Kamerleden per partij met teruglegging, herbereken centroïden, rapporteer 95%-interval). Rapporteer ook standaarddeviaties binnen partijen om te laten zien of partijen homogeen stemmen of intern verdeeld zijn. |
|
||||||
|
|
||||||
### 4. Flip-waarden niet visueel gevalideerd |
|
||||||
|
|
||||||
Vijf van de tien assen hebben `flip: True`: |
|
||||||
|
|
||||||
| As | Flip | Betekenis | |
|
||||||
|----|------|-----------| |
|
||||||
| PC1 | False | — | |
|
||||||
| PC2 | False | — | |
|
||||||
| PC3 | **True** | Pro-verzorgingsstaat partijen (positief in SVD) worden visueel omgekeerd | |
|
||||||
| PC4 | **True** | Centrumpartijen (positief in SVD) worden visueel omgekeerd | |
|
||||||
| PC5 | False | — | |
|
||||||
| PC6 | False | — | |
|
||||||
| PC7 | **True** | CU/Volt/DENK (positief in SVD) worden visueel omgekeerd | |
|
||||||
| PC8 | False | — | |
|
||||||
| PC9 | **True** | SGP/CU (positief in SVD) worden visueel omgekeerd | |
|
||||||
| PC10 | **True** | DENK/SP (positief in SVD) worden visueel omgekeerd | |
|
||||||
|
|
||||||
De flip-logica in de code keert scores om en wisselt pool-labels. Geen van deze is visueel gevalideerd in de lopende Streamlit-app. |
|
||||||
|
|
||||||
**Risico:** Als een flip-waarde verkeerd staat, ziet de gebruiker partijen aan de verkeerde kant van de as t.o.v. het label. Dit moet worden gecontroleerd. |
|
||||||
|
|
||||||
### 5. PC7–PC10 zijn fragiel en mogelijk ruis |
|
||||||
|
|
||||||
Met EVR ~2–3% verklaren PC7–PC10 weinig variantie. De scores zijn klein (max ~35 punten bij partijen met N=3), de partijcombinaties zijn ideologisch divers, en de motiethema's zijn soms temporeel heterogeen (PC7). Het is niet vastgesteld dat deze assen boven het ruisniveau liggen. |
|
||||||
|
|
||||||
**Aanbeveling:** Voer een parallelle analyse of scree-test uit om te bepalen hoeveel componenten significant boven ruis liggen. Als dat 6 componenten is, zijn PC7–PC10 beter te verwijderen dan ze met een indicatief label te presenteren. |
|
||||||
|
|
||||||
### 6. Geen rotatie toegepast |
|
||||||
|
|
||||||
De analyse gebruikt ruwe SVD-componenten die **variantie maximaliseren**, niet interpreteerbaarheid. Varimax- of promax-rotatie kan helpen om componenten te ontmengen. Specifiek PC6 (klimaat + culturele integratie) zou onder varimax mogelijk in twee afzonderlijke assen uiteenvallen. Dit is niet getest. |
|
||||||
|
|
||||||
### 7. Geen externe validatie |
|
||||||
|
|
||||||
De interpretaties zijn niet gevalideerd tegen externe bronnen zoals de Chapel Hill Expert Survey (CHES), Kieskompas, of DW-NOMINATE-stijl ideaalpuntschattingen. Een correlatieanalyse tussen PC1 en CHES links-rechts scores zou de interpretatie substantieel versterken. Zonder externe validatie is het label "links-rechts" een empirisch onderbouwde maar niet geverifieerde claim. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Sterktes |
|
||||||
|
|
||||||
### 1. Enkelvoudige-window consistentie |
|
||||||
|
|
||||||
Hoewel er een mismatch is met de Procrustes-EVR, zijn de partijscores en motiescores wél intern consistent: ze komen uit dezelfde SVD-decompositie. De labels worden niet geïnterpreteerd op basis van ongerelateerde vectorruimtes. |
|
||||||
|
|
||||||
### 2. Partijcentroïden op basis van individueel stemgedrag |
|
||||||
|
|
||||||
De partijscores zijn niet geïmputeerd of geschat maar berekend uit de SVD-vectoren van individuele Kamerleden die daadwerkelijk hebben gestemd. Dit geeft (voor grote fracties) een nauwkeuriger beeld dan expert-surveys. |
|
||||||
|
|
||||||
### 3. Motie-inhoud als interpretatiehulp |
|
||||||
|
|
||||||
Door de meest ladende motiethema's te inspecteren is de analyse verankerd in concrete politieke uitspraken. Dit maakt de labels falsifieerbaar. |
|
||||||
|
|
||||||
### 4. Actuele data |
|
||||||
|
|
||||||
De analyse gebruikt motiedata t/m maart 2026, inclusief de kabinetscrisis van juni 2025. Dat maakt de labels representatief voor de huidige politieke situatie. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Conclusies |
|
||||||
|
|
||||||
### Wat de assen ons vertellen over de Nederlandse politiek (2024–2026) |
|
||||||
|
|
||||||
**Dominante structuur:** De Nederlandse politiek kent een klassieke links-rechts tegenstelling (PC1, ~24% EVR) die het meeste stemgedrag verklaart. De tweede dimensie is een **populisme-as** (PC2, ~10%) die het bloc PVV/FVD/BBB/Groep Markuszower isoleert van het volledige overige parlement — inclusief VVD, CDA en SGP. |
|
||||||
|
|
||||||
**De verzorgingsstaat als onverwachte coalitie:** PC3 laat zien dat PVV en SP gelijk stemmen bij bezuinigingsdebatten. PVV-kiezers zijn kwetsbaar voor bezuinigingen en PVV stemde consequent tegen austeriteitsmaatregelen. De motiedata bevat directe verwijzingen naar de kabinetscrisis van 2025. |
|
||||||
|
|
||||||
**Christelijk-sociale partijen zijn intern coherent:** ChristenUnie en SGP scoren consistent hoog op meerdere assen (PC5, PC9) die gemeenschapsoriëntatie meten. Let wel: beide fracties tellen slechts 3 Kamerleden — de consistentie kan individueel zijn. |
|
||||||
|
|
||||||
**GroenLinks-PvdA en SP zijn niet hetzelfde:** Op PC10 staan ze tegenover elkaar. GroenLinks-PvdA steunt institutionele regulering en handhaving; SP en DENK zijn sceptischer over overheidstoezicht op kwetsbare groepen. |
|
||||||
|
|
||||||
**Kleine assen zijn fragiel:** PC7–PC10 vangen echte maar kleine variatie op. Labels voor deze assen zijn indicatief. Formele dimensionaliteitstoetsing is nodig om te bepalen hoeveel assen inhoudelijk betekenisvol zijn. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Vervolgstappen |
|
||||||
|
|
||||||
1. **NSC identificeren:** Zoek of NSC-Kamerleden in `mp_metadata` staan onder een oudere partijnaam of zonder partijaanduiding. Ca. 20 Kamerleden; hun afwezigheid is een gat. |
|
||||||
|
|
||||||
2. **Flip-waarden valideren:** Alle 5 assen met `flip: True` (PC3, PC4, PC7, PC9, PC10) visueel controleren in de Streamlit-compass. |
|
||||||
|
|
||||||
3. **Enkelvoudige-window EVR berekenen:** Voer SVD uit op alleen het `current_parliament`-venster en vergelijk EVR met de Procrustes-EVR. Dit bepaalt hoe sterk de twee ruimtes afwijken. |
|
||||||
|
|
||||||
4. **Bootstrap-betrouwbaarheidsintervallen:** Bereken per partij een 95%-interval op de centroïde via resampling van Kamerleden. Rapporteer standaarddeviaties. |
|
||||||
|
|
||||||
5. **Dimensionaliteitstoetsing:** Parallelle analyse of scree-test om te bepalen hoeveel componenten boven ruis liggen. Overweeg PC7–PC10 te verwijderen als ze niet significant zijn. |
|
||||||
|
|
||||||
6. **Varimax-rotatie testen:** Pas varimax toe op de eerste 6 componenten en controleer of PC6 uiteenvalt in afzonderlijke klimaat- en integratie-assen. |
|
||||||
|
|
||||||
7. **Externe validatie:** Correleer PC1-scores met CHES links-rechts expert-oordelen voor dezelfde partijen. Dit versterkt (of weerlegt) de links-rechts interpretatie. |
|
||||||
File diff suppressed because one or more lines are too long
@ -1,55 +1,51 @@ |
|||||||
# format: <line>#<hash>#<anchor>|<content> |
# Session: continuity-ledger |
||||||
# use refs exactly as shown in hashline edit/patch tools |
Updated: 2026-03-28T12:00:00Z |
||||||
#HL REV:C4181A89 |
|
||||||
#HL 1#AD2#963|# Session: continuity-ledger |
## Goal |
||||||
#HL 2#625#EA0|Updated: 2026-03-31T12:00:00Z |
Preserve the essential session context and state for the stemwijzer project so work can resume seamlessly after context clears. |
||||||
#HL 3#DA3#29F| |
|
||||||
#HL 4#3B8#9B2|## Goal |
## Constraints |
||||||
#HL 5#49D#054|Preserve the essential session context and state for the stemwijzer project so work can resume seamlessly after context clears. |
- Keep the ledger concise; only essential information is recorded. |
||||||
#HL 6#DA3#B25| |
- Focus on WHAT and WHY, not HOW. |
||||||
#HL 7#3CD#7E4|## Constraints |
- Mark uncertain information explicitly as UNCONFIRMED. |
||||||
#HL 8#343#88A|- Keep the ledger concise; only essential information is recorded. |
- Include current git branch and key file paths. |
||||||
#HL 9#C8A#AD0|- Focus on WHAT and WHY, not HOW. |
- Never store secrets or values from .env files. |
||||||
#HL 10#7DD#B90|- Mark uncertain information explicitly as UNCONFIRMED. |
|
||||||
#HL 11#04E#272|- Include current git branch and key file paths. |
## Progress |
||||||
#HL 12#CCD#F02|- Never store secrets or values from .env files. |
### Done |
||||||
#HL 13#DA3#A4D| |
- [x] Determine need for a continuity ledger and file location. |
||||||
#HL 14#E5A#9FA|## Progress |
- [x] Create and add this continuity ledger file to the repository (this file). UNCONFIRMED: whether committed/pushed to remote. |
||||||
#HL 15#E30#F0C|### Done |
|
||||||
#HL 16#829#1C2|- [x] Determine need for a continuity ledger and file location. |
### In Progress |
||||||
#HL 17#906#394|- [x] Create and add this continuity ledger file to the repository (this file). UNCONFIRMED: whether committed/pushed to remote. |
- [ ] Monitor and merge subsequent ledger updates when provided (ongoing). |
||||||
#HL 18#B2A#001|- [x] Monitor and merge subsequent ledger updates when provided (inspected other CONTINUITY_* ledgers on 2026-03-31T12:00:00Z). (UNCONFIRMED: whether merged/committed) |
|
||||||
#HL 19#DA3#387| |
### Blocked |
||||||
#HL 20#AC7#256|### In Progress |
- None |
||||||
#HL 21#405#F17|- [ ] Short QA: sample similarity lookups (N=20-50) to validate fused vectors (see CONTINUITY_stemwijzer.md). Estimated effort: 30–60 minutes. (UNCONFIRMED assignment) |
|
||||||
#HL 22#DA3#77C| |
## Key Decisions |
||||||
#HL 23#8B6#828|### Blocked |
- **Store concise session state in thoughts/ledgers/**: keeps context portable and easy to merge. |
||||||
#HL 24#2A1#2DC|- None |
- **Minimal fields only (goal, constraints, progress, decisions, next steps, file ops, context)**: reduces noise and maintenance. |
||||||
#HL 25#DA3#C2F| |
|
||||||
#HL 26#7A9#773|## Key Decisions |
## Next Steps |
||||||
#HL 27#20F#D99|- **Store concise session state in thoughts/ledgers/**: keeps context portable and easy to merge. |
1. Provide previous ledger content on subsequent updates so merges preserve full history. |
||||||
#HL 28#4B6#2BB|- **Minimal fields only (goal, constraints, progress, decisions, next steps, file ops, context)**: reduces noise and maintenance. |
2. Use this ledger as the single source for resuming interrupted sessions; update "In Progress" items as work proceeds. |
||||||
#HL 29#DA3#F5B| |
3. Coordinate short QA on recent fusion/similarity run (see CONTINUITY_stemwijzer.md) in a separate session if needed. |
||||||
#HL 30#62A#B91|## Next Steps |
|
||||||
#HL 31#22B#0CD|1. Provide previous ledger content on subsequent updates so merges preserve full history. |
## File Operations |
||||||
#HL 32#E49#DA8|2. Use this ledger as the single source for resuming interrupted sessions; update "In Progress" items as work proceeds. |
### Read |
||||||
#HL 33#4B7#4A5|3. Coordinate short QA on recent fusion/similarity run (see CONTINUITY_stemwijzer.md) in a separate session if needed. |
- `README.md` |
||||||
#HL 34#DA3#1D0| |
- `thoughts/ledgers/CONTINUITY_stemwijzer.md` (INSPECTED) |
||||||
#HL 35#1CA#DCD|## File Operations |
- `thoughts/ledgers/CONTINUITY_fusion_similarity_run.md` (INSPECTED) |
||||||
#HL 36#0F3#F62|### Read |
|
||||||
#HL 37#256#5B3|- `README.md` |
### Modified |
||||||
#HL 38#A0D#268|- `thoughts/ledgers/CONTINUITY_stemwijzer.md` (INSPECTED) |
- `thoughts/ledgers/CONTINUITY_continuity-ledger.md` (this file) |
||||||
#HL 39#AC9#FE0|- `thoughts/ledgers/CONTINUITY_fusion_similarity_run.md` (INSPECTED) |
|
||||||
#HL 40#DA3#081| |
## Critical Context |
||||||
#HL 41#455#EBF|### Modified |
- Repository root: /home/sgeboers/Projects/stemwijzer |
||||||
#HL 42#3F4#1DD|- `thoughts/ledgers/CONTINUITY_continuity-ledger.md` (this file) |
- Current git branch: `main` |
||||||
#HL 43#DA3#C78| |
- Other existing continuity ledgers: `CONTINUITY_stemwijzer.md`, `CONTINUITY_fusion_similarity_run.md` |
||||||
#HL 44#2BA#352|## Critical Context |
- UNCONFIRMED: whether this file has been committed/pushed to remote. |
||||||
#HL 45#112#C18|- Repository root: /home/sgeboers/Projects/stemwijzer |
|
||||||
#HL 46#9CD#0EE|- Current git branch: `main` (UNCONFIRMED: local workspace branch) |
## Working Set |
||||||
#HL 47#DEF#90F|- Other existing continuity ledgers: `CONTINUITY_stemwijzer.md`, `CONTINUITY_fusion_similarity_run.md` |
- Branch: `main` |
||||||
#HL 48#2D0#620|- UNCONFIRMED: whether this file has been committed/pushed to remote. |
- Key files: `README.md`, `thoughts/ledgers/CONTINUITY_continuity-ledger.md`, `thoughts/ledgers/CONTINUITY_stemwijzer.md`, `thoughts/ledgers/CONTINUITY_fusion_similarity_run.md` |
||||||
#HL 49#DA3#373| |
|
||||||
#HL 50#7C4#A51|## Working Set |
|
||||||
#HL 51#381#266|- Branch: `main` |
|
||||||
#HL 52#BD8#51B|- Key files: `README.md`, `thoughts/ledgers/CONTINUITY_continuity-ledger.md`, `thoughts/ledgers/CONTINUITY_stemwijzer.md`, `thoughts/ledgers/CONTINUITY_fusion_similarity_run.md` |
|
||||||
|
|||||||
@ -1,96 +0,0 @@ |
|||||||
--- |
|
||||||
date: 2026-03-28 |
|
||||||
topic: "Compass UI Improvements" |
|
||||||
status: validated |
|
||||||
--- |
|
||||||
|
|
||||||
# Compass UI Improvements |
|
||||||
|
|
||||||
## Problem Statement |
|
||||||
|
|
||||||
Three separate issues degrade the political compass UI: |
|
||||||
|
|
||||||
1. **SVD axis descriptions (axes 3–5) are outdated.** The current `label`, `explanation`, `positive_pole`, and `negative_pole` strings in `SVD_THEMES` were written for an earlier dataset and no longer match the structural patterns the axes actually capture. Using single-year (2024) centroid snapshots to verify this was misleading — the fix must be grounded in multi-year averages and motion-level content. |
|
||||||
|
|
||||||
2. **Y-axis direction indicators are broken.** The current `"Progressief ↑ / Conservatief ↓"` string is passed as the Plotly Y-axis title. Plotly rotates axis titles 90° counter-clockwise, so the arrows end up pointing sideways instead of up/down. This appears in the two compass scatter plots and in the trajectories tab. |
|
||||||
|
|
||||||
3. **No voting discipline context.** The compass shows where parties sit spatially but gives no sense of whether a party votes as a bloc. This context would make the compass more interpretable. |
|
||||||
|
|
||||||
## Constraints |
|
||||||
|
|
||||||
- No new DB tables or schema changes. |
|
||||||
- `compute_party_discipline` reads `mp_votes` where `mp_name LIKE '%,%'` (individual MP rows only — party-aggregate rows are excluded). |
|
||||||
- Skip discipline section if fewer than 5 roll-call motions in the selected window. |
|
||||||
- Follow existing patterns in `explorer.py` and `database.py`. |
|
||||||
- Tests use `uv run pytest`. |
|
||||||
|
|
||||||
## Approach |
|
||||||
|
|
||||||
**Change 1 — Axis descriptions:** Derive corrected descriptions from multi-year party centroid averages (all annual windows, not just 2024) and from the motion-level content that loads high/low on each axis. Update only `label`, `explanation`, `positive_pole`, `negative_pole` in `SVD_THEMES` entries for axes 3, 4, and 5. The `flip` boolean is not changed. |
|
||||||
|
|
||||||
**Change 2 — Y-axis arrows:** Replace the ↑/↓ characters from the axis title string (set to plain `"Progressief / Conservatief"`). Add two `fig.add_annotation` calls per chart: top-center `"▲ Progressief"` and bottom-center `"▼ Conservatief"`, using `xref="paper", yref="paper"`, `showarrow=False`, styled to be subtle (small font, muted color). Apply to both compass scatter plots and the trajectories chart. |
|
||||||
|
|
||||||
**Change 3 — Voting discipline:** Add a `compute_party_discipline(db_path, start_date, end_date)` function in `explorer.py` that queries individual MP votes, computes per-party Rice index (fraction voting with party majority), and returns a DataFrame with columns `[party, n_motions, discipline]`. In `build_compass_tab`, after rendering the compass chart, call this function with the window's date range, and render: (a) a horizontal bar chart sorted ascending (least disciplined at top), and (b) a small table showing the three most and three least disciplined parties. If fewer than 5 motions, show a brief explanatory message instead. |
|
||||||
|
|
||||||
## Architecture |
|
||||||
|
|
||||||
All changes are confined to `explorer.py`. No changes to `analysis/`, `database.py`, or test files (the discipline function is a read-only helper, not shared infrastructure). |
|
||||||
|
|
||||||
## Components |
|
||||||
|
|
||||||
**`SVD_THEMES` dict (explorer.py ~line 1156)** |
|
||||||
- Entries for axes 3, 4, 5 updated in-place. |
|
||||||
- New text is based on multi-year patterns (see Data Flow below). |
|
||||||
|
|
||||||
**Y-axis annotation helper (explorer.py)** |
|
||||||
- Small inline helper or inline code block that adds the two direction annotations to any given `fig`. |
|
||||||
- Called once after each `px.scatter(...)` and once after the trajectories `fig` is built. |
|
||||||
|
|
||||||
**`compute_party_discipline(db_path, start_date, end_date)` (explorer.py)** |
|
||||||
- Connects to DuckDB read-only. |
|
||||||
- Queries `mp_votes` filtered to individual MPs (`mp_name LIKE '%,%'`) and date range. |
|
||||||
- Groups by `(motion_id, party)`, counts votes per token, determines majority token, computes Rice index per motion per party. |
|
||||||
- Averages Rice index across motions per party. |
|
||||||
- Returns `pd.DataFrame(columns=["party", "n_motions", "discipline"])` or empty DataFrame. |
|
||||||
|
|
||||||
**`build_compass_tab` additions (explorer.py ~line 841+)** |
|
||||||
- After `st.plotly_chart(fig, ...)`, map the current `window_idx` to a `(start_date, end_date)` range. |
|
||||||
- Call `compute_party_discipline(...)`. |
|
||||||
- If result has ≥ 5 motions: render bar chart + extremes table under a `st.subheader("Stemgedrag cohesie")`. |
|
||||||
- If not: `st.caption("Te weinig hoofdelijke stemmingen voor cohesieanalyse.")`. |
|
||||||
|
|
||||||
## Data Flow |
|
||||||
|
|
||||||
**Axis description research (prior to implementation):** |
|
||||||
Multi-year centroid averages are computed by averaging each party's SVD vector across all annual windows in which it appears. The axis 3/4/5 descriptions are updated to reflect these stable patterns rather than any single year's snapshot. |
|
||||||
|
|
||||||
**Discipline computation:** |
|
||||||
``` |
|
||||||
mp_votes (individual MPs, date range) |
|
||||||
→ GROUP BY (motion_id, party, vote) → vote counts |
|
||||||
→ determine majority_vote per (motion_id, party) |
|
||||||
→ Rice index = (count voting with majority) / (total voting) per motion per party |
|
||||||
→ average Rice index across motions → per-party score |
|
||||||
→ return DataFrame |
|
||||||
``` |
|
||||||
|
|
||||||
## Error Handling |
|
||||||
|
|
||||||
- `compute_party_discipline` returns an empty DataFrame on any DB exception (logged, not raised), following the pattern of other read helpers in `explorer.py`. |
|
||||||
- Empty DataFrame → show the "too few motions" caption (same path as < 5 motions). |
|
||||||
- The Y-axis annotation is purely visual — no error paths needed. |
|
||||||
- Axis description changes are static strings — no runtime risk. |
|
||||||
|
|
||||||
## Testing Strategy |
|
||||||
|
|
||||||
- The `compute_party_discipline` function is tested with a small in-memory DuckDB fixture in `tests/test_political_compass.py`: |
|
||||||
- Construct a fixture with 6 motions, 2 parties, each with varying vote splits. |
|
||||||
- Assert returned DataFrame has correct columns and that discipline scores are in [0, 1]. |
|
||||||
- Assert empty DataFrame is returned when date range has 0 motions. |
|
||||||
- Y-axis annotation: no unit test needed (visual only, trivially correct). |
|
||||||
- Axis description changes: no unit test needed (static strings). |
|
||||||
- Run all tests with `uv run pytest tests/test_political_compass.py -v` after each change. |
|
||||||
|
|
||||||
## Open Questions |
|
||||||
|
|
||||||
None. All design decisions are resolved. |
|
||||||
@ -1,168 +0,0 @@ |
|||||||
--- |
|
||||||
date: 2026-03-29 |
|
||||||
topic: "Bootstrap confidence intervals and data enrichment" |
|
||||||
status: validated |
|
||||||
--- |
|
||||||
|
|
||||||
# Bootstrap Confidence Intervals & Data Enrichment |
|
||||||
|
|
||||||
## Problem Statement |
|
||||||
|
|
||||||
The SVD axis charts show party centroid scores as point estimates with no indication of reliability. Volt (N=1) and D66 (N=49) look equally confident. Additionally: |
|
||||||
- 2016–2018 motions lack body text, weakening embedding quality for those windows |
|
||||||
- `party_svd_scores.json` is a stale ad-hoc file missing NSC — should be deleted |
|
||||||
|
|
||||||
## Constraints |
|
||||||
|
|
||||||
- No re-SVD per bootstrap replicate — too expensive, only centroid uncertainty needed |
|
||||||
- Single-window bootstrap only — party scores come from `current_parliament` raw SVD vectors, not the Procrustes pipeline |
|
||||||
- Functional Python, using existing patterns (uv, duckdb, numpy) |
|
||||||
- Don't break existing Streamlit rendering — error bars are additive |
|
||||||
- Fixed random seed for reproducibility |
|
||||||
|
|
||||||
## Approach |
|
||||||
|
|
||||||
**Single-window centroid bootstrap.** For each party, resample its N MPs with replacement 1000×, recompute centroid per replicate, take percentile CIs. Cheap (no re-SVD needed), directly answers "how reliable is this score?". |
|
||||||
|
|
||||||
Rejected alternatives: |
|
||||||
- Multi-window Procrustes bootstrap: 1000× SVD cost, requires orientation canonicalization. Overkill. |
|
||||||
- Analytical SE (std/sqrt(N)): assumes normality, misses skewed distributions. |
|
||||||
|
|
||||||
## Components |
|
||||||
|
|
||||||
### A. Download Script Enhancement (`scripts/download_past_year.py`) |
|
||||||
|
|
||||||
Add two CLI flags: |
|
||||||
- `--skip-details` (default: `True`, matching current hardcoded behavior) — when `False`, fetches body text via `_get_motion_details` → `_fetch_body_text` |
|
||||||
- `--update-existing` (default: `False`) — when `True`, re-processes motions already in DB to fetch missing body_text and update the record |
|
||||||
|
|
||||||
The update-existing flow: |
|
||||||
1. Query motions table for rows WHERE date BETWEEN start_date AND end_date AND (body_text IS NULL OR body_text = '') |
|
||||||
2. Extract besluit_id from the URL column (format: `https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/{besluit_id}` — take last path segment) |
|
||||||
3. For each such motion, call `api._get_motion_details(besluit_id)` to fetch body_text |
|
||||||
4. UPDATE the motions row with the new body_text (and title/description if also missing) |
|
||||||
|
|
||||||
Note: the motions table has no `besluit_id` column — it's only embedded in the URL. The update flow must parse it from the URL. |
|
||||||
|
|
||||||
Run once after implementation: `--start-date 2016-01-01 --end-date 2018-12-31 --update-existing` |
|
||||||
(No need for `--skip-details` when using `--update-existing` — it always fetches details for the targeted rows.) |
|
||||||
|
|
||||||
### B. Bootstrap Computation (`analysis/political_axis.py`) |
|
||||||
|
|
||||||
New function: |
|
||||||
``` |
|
||||||
compute_party_bootstrap_cis( |
|
||||||
party_vectors: Dict[str, List[np.ndarray]], |
|
||||||
n_boot: int = 1000, |
|
||||||
ci: float = 95.0, |
|
||||||
seed: int = 42 |
|
||||||
) -> Dict[str, Dict] |
|
||||||
``` |
|
||||||
|
|
||||||
Input: `party_vectors` is a dict mapping party name → list of individual MP vectors (each a numpy array of length 50). The caller (explorer.py) builds this from DB queries using existing mp→party mapping logic. |
|
||||||
|
|
||||||
Returns per-party: |
|
||||||
``` |
|
||||||
{ |
|
||||||
"PVV": { |
|
||||||
"centroid": [50 floats], |
|
||||||
"ci_lower": [50 floats], |
|
||||||
"ci_upper": [50 floats], |
|
||||||
"std": [50 floats], |
|
||||||
"n_mps": 19 |
|
||||||
}, |
|
||||||
... |
|
||||||
} |
|
||||||
``` |
|
||||||
|
|
||||||
Algorithm: |
|
||||||
1. Receive pre-grouped `party_vectors` from caller |
|
||||||
2. For each party with N >= 2: |
|
||||||
- Create numpy Generator with fixed seed |
|
||||||
- For each of n_boot replicates: sample N indices with replacement, compute mean vector |
|
||||||
- Compute percentile CIs (alpha/2, 100-alpha/2) and std across replicates per dimension |
|
||||||
5. For parties with N = 1: set ci_lower == ci_upper == centroid, std = 0, flag n_mps = 1 |
|
||||||
|
|
||||||
Dependencies: numpy, duckdb (read_only), json. |
|
||||||
|
|
||||||
**Import issue**: `_PARTY_NORMALIZE` and `CURRENT_PARLIAMENT_PARTIES` live in `explorer.py` (a Streamlit app). The bootstrap function in `analysis/political_axis.py` can't import from there. Solution: the bootstrap function accepts `party_vectors: Dict[str, List[np.ndarray]]` as input — the caller (explorer.py) handles the mp→party mapping and passes grouped vectors in. This keeps the analysis module independent of Streamlit app constants and avoids duplicating the normalization logic. |
|
||||||
|
|
||||||
Alternatively, the caller can pass the already-computed `party_scores` dict from `load_party_axis_scores` plus raw per-party MP vector lists. The simplest approach: add a helper in explorer.py that loads grouped MP vectors per party (reusing existing mapping logic) and pass that to the bootstrap function. |
|
||||||
|
|
||||||
### C. Chart Enhancement (`explorer.py`) |
|
||||||
|
|
||||||
Modify `_render_party_axis_chart` to accept optional `bootstrap_data: Dict[str, Dict] = None`. |
|
||||||
|
|
||||||
When bootstrap_data is provided: |
|
||||||
- For each party, compute error magnitude: `(ci_upper[axis_idx] - ci_lower[axis_idx]) / 2` |
|
||||||
- When flip is True, error magnitude stays the same (symmetric around the negated centroid) |
|
||||||
- Add `error_x=dict(type="data", array=error_array, visible=True)` to the party marker Scatter trace |
|
||||||
- Parties with N=1: render with a distinct marker (diamond shape instead of circle) as visual unreliability warning |
|
||||||
- Add `N={n_mps}` to hover text for all parties |
|
||||||
|
|
||||||
The bootstrap computation should be cached alongside party scores using `@st.cache_data`. |
|
||||||
|
|
||||||
### D. Delete Stale JSON File |
|
||||||
|
|
||||||
Remove `thoughts/explorer/party_svd_scores.json`. The app never reads this file — `load_party_axis_scores` always computes live from the DB. The file was generated ad-hoc during analysis and is missing NSC. |
|
||||||
|
|
||||||
Also remove `thoughts/explorer/axis_analysis_data.json` — same situation, ad-hoc analysis artifact not used by the app. |
|
||||||
|
|
||||||
## Data Flow |
|
||||||
|
|
||||||
``` |
|
||||||
DB (svd_vectors, mp_metadata) |
|
||||||
│ |
|
||||||
├──→ load_party_axis_scores() |
|
||||||
│ returns Dict[str, List[float]] (party → 50-dim centroid) |
|
||||||
│ |
|
||||||
└──→ load_party_mp_vectors() [NEW helper in explorer.py] |
|
||||||
returns Dict[str, List[np.ndarray]] (party → list of individual MP vectors) |
|
||||||
reuses same mp→party mapping as load_party_axis_scores |
|
||||||
│ |
|
||||||
↓ |
|
||||||
compute_party_bootstrap_cis(party_vectors, n_boot=1000, ci=95, seed=42) |
|
||||||
│ returns Dict[str, Dict] (party → {centroid, ci_lower, ci_upper, std, n_mps}) |
|
||||||
↓ |
|
||||||
_render_party_axis_chart(party_scores, comp_sel, theme, bootstrap_data=None) |
|
||||||
│ indexes [comp_sel - 1] from centroid and CIs |
|
||||||
│ applies flip (negate score AND CI bounds) |
|
||||||
│ adds error_x to Plotly Scatter trace |
|
||||||
↓ |
|
||||||
Streamlit renders chart with error bars |
|
||||||
``` |
|
||||||
|
|
||||||
Both functions cached via `@st.cache_data` with same TTL. |
|
||||||
|
|
||||||
## Error Handling |
|
||||||
|
|
||||||
- **N=1 parties (Volt, Lid Keijzer)**: Return centroid as both CI bounds, std=0. Chart renders diamond marker. Hover says "N=1, geen betrouwbaarheidsinterval". |
|
||||||
- **N=2 parties (50PLUS)**: CIs will be wide — that's correct, let data speak. |
|
||||||
- **SVD vector parsing failures**: Skip MP, log warning (same as existing pattern). |
|
||||||
- **Download/scraping failures**: Per-chunk try/except already handles this. `_fetch_body_text` returns None on failure (existing behavior). |
|
||||||
- **update-existing with no besluit_id**: Skip motion, log. Not all motions have a besluit_id traceable to body text. |
|
||||||
|
|
||||||
## Testing Strategy |
|
||||||
|
|
||||||
### Unit Tests |
|
||||||
- `test_bootstrap_fixed_seed`: Synthetic data (5 parties, varying N), fixed seed. Verify: |
|
||||||
- Output shape matches expected structure |
|
||||||
- CI bounds bracket centroid for all parties |
|
||||||
- N=1 party has ci_lower == ci_upper == centroid |
|
||||||
- Same seed produces identical output |
|
||||||
- Larger N produces narrower CIs |
|
||||||
|
|
||||||
### Integration Tests |
|
||||||
- `test_bootstrap_real_db`: Run against actual DB, verify: |
|
||||||
- Returns data for all 17 current parliament parties (+NSC) |
|
||||||
- n_mps values match known party sizes |
|
||||||
- CI width for D66 (N=49) << CI width for SP (N=3) |
|
||||||
|
|
||||||
### Visual Validation |
|
||||||
- Run Streamlit app, verify error bars appear on SVD axis charts |
|
||||||
- Verify N=1 parties have distinct marker style |
|
||||||
- Verify hover text includes party size |
|
||||||
|
|
||||||
## Open Questions |
|
||||||
|
|
||||||
None — design is straightforward. The only future enhancement would be multi-window bootstrap for axis stability testing, but that's a separate project. |
|
||||||
@ -1,117 +0,0 @@ |
|||||||
--- |
|
||||||
date: 2026-03-30 |
|
||||||
topic: "compass-trajectory-consistency" |
|
||||||
status: validated |
|
||||||
--- |
|
||||||
|
|
||||||
## Problem Statement |
|
||||||
|
|
||||||
What we're solving and why |
|
||||||
|
|
||||||
We must ensure the political compass (single-window snapshot) and the Explorer trajectories use the same numeric coordinate frame for the first two SVD axes so the compass numbers match the trajectory centroids exactly. |
|
||||||
|
|
||||||
**Key issue:** Component 1 already matched, but component 2 shows persistent mismatches due to an API/shape ambiguity and occasional fallback logic differences. Fixing this prevents confusing, inconsistent numbers in the UI. |
|
||||||
|
|
||||||
|
|
||||||
## Constraints |
|
||||||
|
|
||||||
Non-negotiables and limitations |
|
||||||
|
|
||||||
- The canonical coordinate frame is the Procrustes-aligned output of **compute_2d_axes** (the repo artifact that produces **positions_by_window**). |
|
||||||
- Keep UI responsiveness and existing cache usage (@st.cache_data where present). |
|
||||||
- Minimal, focused changes: only update Explorer call sites and the compass renderer API. Do not change the SVD pipeline outputs. |
|
||||||
- Use the **first chronological party vector** as the fallback when a party has no MPs in a window (user decision). |
|
||||||
|
|
||||||
|
|
||||||
## Approach |
|
||||||
|
|
||||||
Chosen approach and why |
|
||||||
|
|
||||||
We will adopt an explicit API for the compass renderer: pass per-party 2D projected coordinates (party → (x,y)) computed from **positions_by_window** for the target window. This eliminates shape/indexing ambiguity and guarantees numeric equality with trajectory centroids. |
|
||||||
|
|
||||||
**Why:** |
|
||||||
- Simpler and less error-prone than synthesizing k-dimensional vectors or changing compute_2d_axes. |
|
||||||
- Keeps the canonical data source unchanged (positions_by_window) and makes intent explicit at the Explorer surface. |
|
||||||
- Easy to test: we can assert numeric equality directly on the 2D coordinates. |
|
||||||
|
|
||||||
|
|
||||||
## Architecture |
|
||||||
|
|
||||||
High-level structure of the change |
|
||||||
|
|
||||||
**Key pieces:** |
|
||||||
- **compute_2d_axes** (unchanged): produces **positions_by_window** which is the canonical frame. |
|
||||||
- **Explorer: party centroid helper:** new helper that computes per-party (x, y) means from positions_by_window for a window. |
|
||||||
- **_build_party_axis_figure (changed API):** now accepts **party_coords: Dict[str, Tuple[float,float]]** and a selected component index (1 or 2) and uses the explicit coordinate values for plotting. |
|
||||||
- **Call-site updates:** update all places that previously passed party SVD vectors to instead compute and pass party_coords (use first-chronological party vector only when no MPs are present for that party in the window). |
|
||||||
|
|
||||||
|
|
||||||
## Components |
|
||||||
|
|
||||||
Key pieces and responsibilities |
|
||||||
|
|
||||||
- **compute_party_coords(positions_by_window, party_map, window_id):** |
|
||||||
- Input: positions_by_window, party->MP mapping (load_party_map or similar), window id. |
|
||||||
- Output: party -> (x_mean, y_mean). If no MPs for a party, returns None or uses fallback loader. |
|
||||||
|
|
||||||
- **_build_party_axis_figure(party_coords, comp_sel, ...):** |
|
||||||
- Input: explicit 2D coords; **comp_sel** ∈ {1,2}. |
|
||||||
- Behavior: uses party_coords[p][comp_sel-1] as the axis value, constructs hover text, CIs, and plots. No indexing into long SVD vectors. |
|
||||||
|
|
||||||
- **Fallback loader:** existing **load_party_axis_scores** (unchanged). When compute_party_coords finds no MPs, we will use the party's first chronological vector from load_party_axis_scores(window) as fallback and indicate fallback in hover text. |
|
||||||
|
|
||||||
- **Callers to update:** |
|
||||||
- build_svd_components_tab |
|
||||||
- any other explorer function that previously passed party-axis vectors into _build_party_axis_figure |
|
||||||
|
|
||||||
|
|
||||||
## Data Flow |
|
||||||
|
|
||||||
How data moves through the updated code path |
|
||||||
|
|
||||||
1. UI requests compass for window W and component C. |
|
||||||
2. Explorer calls load_positions(db_path) → gets positions_by_window. |
|
||||||
3. compute_party_coords builds per-party (x,y) means from positions_by_window[W]. |
|
||||||
4. For parties with zero MPs in W, call load_party_axis_scores(window) and take the **first chronological** party vector as fallback; annotate hover that a fallback is used. |
|
||||||
5. Pass party_coords to _build_party_axis_figure which reads comp_sel and uses the explicit coordinate at index 0 or 1. |
|
||||||
6. Explorer trajectories tab already computes the same centroids from positions_by_window; therefore numbers match exactly. |
|
||||||
|
|
||||||
|
|
||||||
## Error Handling |
|
||||||
|
|
||||||
Strategy for failures and edge cases |
|
||||||
|
|
||||||
- If positions_by_window is missing or corrupted: surface a clear diagnostic message in the UI recommending running the SVD recompute pipeline, and avoid attempting to plot mismatched values. |
|
||||||
- If a party has no MPs and load_party_axis_scores also returns no data: omit that party from the compass and add a tooltip note in the UI explaining why. |
|
||||||
- If any coordinate is NaN/inf: skip plotting that party and log a debug message with the party id and window. |
|
||||||
- Log a WARN when a fallback is used so we can find parties with no MPs across windows. |
|
||||||
|
|
||||||
|
|
||||||
## Testing Strategy |
|
||||||
|
|
||||||
How we will verify correctness |
|
||||||
|
|
||||||
- Unit tests |
|
||||||
- Synthetic positions_by_window: build a small fake positions_by_window with known MP coordinates and party→MP mappings. Assert compute_party_coords outputs expected means and that _build_party_axis_figure uses those exact numbers for components 1 and 2. |
|
||||||
- Fallback behavior: create a window with a party that has no MPs and assert load_party_axis_scores is called and its first chronological vector is used. |
|
||||||
|
|
||||||
- Integration tests |
|
||||||
- Run against a small real DB snapshot used in prior verification. Assert for a representative set of parties across several windows that compass numbers equal the trajectory centroids for components 1 and 2. |
|
||||||
|
|
||||||
- CI |
|
||||||
- Run full test suite. Known pre-existing failures unrelated to this change may persist; document them separately but do not block this change on them. |
|
||||||
|
|
||||||
- Manual QA |
|
||||||
- Run Explorer locally and spot-check compass tooltips vs trajectory hover values for multiple parties and windows. |
|
||||||
|
|
||||||
|
|
||||||
## Open Questions |
|
||||||
|
|
||||||
Unresolved items (minor) |
|
||||||
|
|
||||||
- None critical: the user selected the fallback preference (first chronological party vector) and agreed to update all callers without backward compatibility. |
|
||||||
|
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
I'm proceeding to create the implementation plan. Interrupt if you want changes to this design. |
|
||||||
@ -1,96 +0,0 @@ |
|||||||
--- |
|
||||||
date: 2026-03-30 |
|
||||||
topic: "diagnose-no-plot-trajectories" |
|
||||||
status: draft |
|
||||||
--- |
|
||||||
|
|
||||||
## Problem Statement |
|
||||||
|
|
||||||
The Trajectories tab currently shows **no Plotly chart at all** (not just an empty chart). We need a low-risk way to determine exactly which runtime gate or swallowed exception is preventing any plot from being rendered and fix it so the chart appears or we surface a clear error message. |
|
||||||
|
|
||||||
**Key observation:** upstream code contains multiple early-returns (no data), and broad except/pass handlers that can silently swallow exceptions — either can cause the UI to skip calling st.plotly_chart entirely. |
|
||||||
|
|
||||||
## Constraints |
|
||||||
|
|
||||||
- Keep changes small and reversible. |
|
||||||
- Do not change user-facing defaults unless gated by an explicit debug toggle or environment variable. |
|
||||||
- Prefer adding diagnostics and logging over big refactors; short-term changes must be removable after diagnosis. |
|
||||||
- Preserve public function locations and names used by other code/tests. |
|
||||||
|
|
||||||
## Chosen approach (what I'll do) |
|
||||||
|
|
||||||
I'm choosing a focused instrumentation strategy: add a temporary, opt-in **debug mode** that surfaces the exact runtime decisions and any exceptions taken along the Trajectories rendering path, and un-silence key broad excepts so we can observe stack traces. |
|
||||||
|
|
||||||
**Why:** It's the fastest, lowest-risk way to get definitive evidence of why the plot doesn't render, and it avoids changing production logic except under an explicit debug toggle. |
|
||||||
|
|
||||||
**High-level changes:** |
|
||||||
- Add a **DEBUG toggle** (UI checkbox + env var EXPLORER_DEBUG_TRAJECTORIES) that enables verbose diagnostics in the Trajectories UI. |
|
||||||
- When debug is enabled, show step-by-step status for each early-return gate: result of load_positions, axis_def presence, length of positions_by_window, centroids size, mp_positions size, helper returns (fig/trace_count) and any exception tracebacks. |
|
||||||
- Replace the helper-call swallow (`except Exception: pass`) around select_trajectory_plot_data with a handler that logs and displays the exception (only when debug is enabled) and increments a visible diagnostic counter. |
|
||||||
- Add compact, structured diagnostics to the existing DEBUG expander (windows_count, party_map_count, centroids_sample, mp_positions_sample, helper_trace_count, helper_exception_string). |
|
||||||
|
|
||||||
## Alternatives considered (brief) |
|
||||||
|
|
||||||
1. Force-show MP fallback unconditionally. Pros: quickly confirm plotting plumbing works. Cons: noisy, may mask root cause and changes production behaviour. |
|
||||||
2. Heavy refactor to move pure plotting logic into an import-safe separate module and run offline tests. Pros: clean separation and easier tests. Cons: slower and higher-risk for this urgent diagnosis. |
|
||||||
|
|
||||||
I rejected both for immediate work because they are heavier than necessary to learn the root cause. |
|
||||||
|
|
||||||
## Architecture (where changes live) |
|
||||||
|
|
||||||
- Explorer UI (explorer.py) — add debug checkbox and diagnostic panel wiring inside build_trajectories_tab. |
|
||||||
- Diagnostics collector (small helper in explorer_helpers.py or local helper) — produce structured status dicts (counts, samples) used by the UI. |
|
||||||
- Error surfacer — modify the select_trajectory_plot_data call-site to log exceptions (logger.exception) and, when debug enabled, call st.exception(...) or st.text_area(...) with the traceback. |
|
||||||
|
|
||||||
## Components and responsibilities |
|
||||||
|
|
||||||
- **Debug toggle UI:** checkbox + env var binding; enables/disables verbose diagnostics. |
|
||||||
- **Diagnostic collector:** pure helper that inspects positions_by_window, party_map, centroids, mp_positions and returns compact samples and counts. |
|
||||||
- **Exception handler change:** convert broad `except: pass` at the helper boundary into `except Exception as e: logger.exception(e); diagnostic['select_helper_exception']=traceback; if debug: st.exception(e)`. |
|
||||||
- **Temporary UX:** display a compact, clearly labeled diagnostics block inside the DEBUG expander. Make it obvious this is a temporary troubleshooting aid. |
|
||||||
|
|
||||||
## Data flow (quick) |
|
||||||
|
|
||||||
- load_positions(db) -> positions_by_window, axis_def |
|
||||||
- diagnostic collector inspects positions_by_window and party_map |
|
||||||
- build_trajectories_tab calls select_trajectory_plot_data(...) inside a try/except |
|
||||||
- on success: use returned fig and trace_count to decide whether to call st.plotly_chart |
|
||||||
- on exception: diagnostic collector records traceback and UI shows it if debug enabled |
|
||||||
|
|
||||||
## Error handling strategy |
|
||||||
|
|
||||||
- Do not swallow exceptions silently at the helper boundary. Always log with logger.exception(...). |
|
||||||
- Only surface full tracebacks to the Streamlit UI when **debug mode** is enabled. |
|
||||||
- Keep production behaviour unchanged when debug mode is off. |
|
||||||
|
|
||||||
## Testing approach |
|
||||||
|
|
||||||
- Unit tests for the diagnostic collector with synthetic positions_by_window covering: empty data, partial centroids, and full centroids. |
|
||||||
- Unit test that simulates the helper raising an exception (monkeypatch) and asserts that the exception is logged and (when debug enabled) that the diagnostics struct contains the exception string. |
|
||||||
- Manual reproduction: run Streamlit locally with EXPLORER_DEBUG_TRAJECTORIES=1 and the same DB used in production to capture the diagnostics panel and fix the underlying issue. |
|
||||||
|
|
||||||
## Open questions |
|
||||||
|
|
||||||
- Can you reproduce the issue locally (same DB and same command to start Streamlit)? I assume yes and will base debug advice on that. |
|
||||||
- Are we allowed to enable a short-lived debug toggle in production logs if needed, or will you only run this locally? |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
I'm proceeding to create the design doc. Interrupt if you want changes. |
|
||||||
\n+## Environment management (use uv, not pip) |
|
||||||
|
|
||||||
We will not use pip directly. Use the project's `uv` tool to manage dependencies and run scripts so the environment is reproducible and follows local project conventions. |
|
||||||
|
|
||||||
Recommended commands: |
|
||||||
|
|
||||||
- Add duckdb to the project virtual environment: |
|
||||||
- `uv add duckdb` |
|
||||||
- Run the diagnostic CLI with debug enabled: |
|
||||||
- `EXPLORER_DEBUG_TRAJECTORIES=1 uv run python scripts/diagnose_trajectories_cli.py` |
|
||||||
- Start Streamlit inside the uv-managed environment (example): |
|
||||||
- `uv run streamlit run pages/2_Explorer.py` |
|
||||||
|
|
||||||
Notes: |
|
||||||
|
|
||||||
- If the planner or any follow-up steps need to install or run packages, they should use `uv add` and `uv run` rather than `pip install` or direct interpreter calls. |
|
||||||
- If `uv` is not on PATH in a particular environment, prefer `python -m uv` or consult the project README/ARCHITECTURE.md for local developer environment instructions. |
|
||||||
@ -1,102 +0,0 @@ |
|||||||
--- |
|
||||||
date: 2026-03-30 |
|
||||||
topic: "fix-missing-trajectories" |
|
||||||
status: draft |
|
||||||
--- |
|
||||||
|
|
||||||
## Problem Statement |
|
||||||
|
|
||||||
We're seeing empty/absent party trajectories in the Explorer "Partij Trajectories" tab despite compute_2d_axes producing windows and many parties having centroids. The UI shows no visible traces for selected parties in some runs, making the feature unreliable for end users. |
|
||||||
|
|
||||||
**Root hypothesis:** either (A) selected parties have only missing/None centroid values at plot time, (B) a runtime exception (eg float(None)) aborts trace creation silently, or (C) label/party normalization mismatch filters out traces. We need a low-risk, diagnostic-first fix to reveal which of these is happening and restore visible traces quickly. |
|
||||||
|
|
||||||
## Constraints |
|
||||||
|
|
||||||
- Preserve public function names and locations: **compute_2d_axes, classify_axes, load_positions, _build_party_axis_figure, build_trajectories_tab, build_compass_tab, _spline_smooth**. |
|
||||||
- Avoid large refactors; prefer reversible, minimal changes that surface diagnostics. |
|
||||||
- Do not expose internal modal tokens ("As 1"/"As 2") to end users; use axis_classifier.display_label_for_modal(...) or choose_trajectory_title() where appropriate. |
|
||||||
- Visual traces should remain smoothed; hover must include raw centroid values for auditability. |
|
||||||
|
|
||||||
## Chosen Approach (what we'll implement) |
|
||||||
|
|
||||||
I'm choosing a **minimal triage-first approach**: add precise diagnostics and defensive conversions around plotting, so we either restore visible traces immediately or produce deterministic diagnostics that reveal the real data mismatch. |
|
||||||
|
|
||||||
**Why:** low risk, fastest feedback loop. This will either fix simple runtime errors (safe float conversion, exceptions while adding traces) or provide clear evidence that deeper normalization changes are required. |
|
||||||
|
|
||||||
**Key changes:** |
|
||||||
- Add a small helper: **safe_float(x)** — converts numeric-like values to floats, maps None/NaN/invalid -> float('nan') without raising. |
|
||||||
- In build_trajectories_tab/_build_party_axis_figure: |
|
||||||
- Wrap per-party fig.add_trace(...) in try/except and log the exception with party id/name to the DEBUG expander instead of aborting the whole plot. |
|
||||||
- Emit per-selected-party diagnostics into the existing DEBUG expander: number of raw centroids, counts of non-NaN coordinates, example first 5 raw xs/ys, and lengths per window. |
|
||||||
- Replace direct float(...) casts on raw centroid values used in hover/customdata with safe_float. |
|
||||||
- Ensure per-MP fallback plotting path still exists and can be forced via EXPLORER_FORCE_SHOW_TRAJECTORIES for diagnosis. |
|
||||||
- Add unit tests for safe_float and targeted integration tests that assert traces are created when centroids contain NaNs and when party_map exists. |
|
||||||
|
|
||||||
## Alternatives Considered |
|
||||||
|
|
||||||
1) Full normalization sweep: align party centroids to global windows (fill missing with NaN) and accept parties with at least one non-NaN value. |
|
||||||
- Pros: robust long-term fix, canonical data shape. |
|
||||||
- Cons: larger change surface, higher risk, slower to validate in production data. |
|
||||||
|
|
||||||
2) Refactor plotting pipeline to use a normalized DataFrame (rows=windows, cols=parties) and build traces from that canonical shape. |
|
||||||
- Pros: clearer data flow, easier testing. |
|
||||||
- Cons: larger refactor, touches many modules. |
|
||||||
|
|
||||||
I considered both but rejected them for immediate work because we need quick deterministic diagnostics to determine if these larger efforts are warranted. |
|
||||||
|
|
||||||
## Architecture (high-level) |
|
||||||
|
|
||||||
**Inputs:** positions_by_window (from compute_2d_axes), party_map, selected_parties. |
|
||||||
|
|
||||||
**Flow:** |
|
||||||
- compute_2d_axes -> positions_by_window |
|
||||||
- load_positions / helpers -> party-centroid dicts keyed by party |
|
||||||
- build_trajectories_tab calls _build_party_axis_figure to build per-party traces |
|
||||||
- _build_party_axis_figure uses smoothing helpers (_spline_smooth) to produce visible traces and also builds hover customdata with raw centroid values (smoothed coords for the trace, raw values in customdata) |
|
||||||
|
|
||||||
**Intervention points:** build_trajectories_tab and _build_party_axis_figure (small helper additions and safe conversion), plus tests and diagnostic output in the DEBUG expander. |
|
||||||
|
|
||||||
## Components and Responsibilities |
|
||||||
|
|
||||||
- **safe_float helper:** convert inputs to float or return float('nan') safely. Centralized to avoid repeated float(None) errors. |
|
||||||
- **Diagnostic emitter:** small utility used by build_trajectories_tab to format and write per-party diagnostic rows to the DEBUG expander. |
|
||||||
- **Plotly trace wrapper:** per-party try/except around fig.add_trace that writes exception details to diagnostics instead of failing silently. |
|
||||||
- **Unit + integration tests:** verify hover customdata creation, safe_float behaviour, trajectories rendered with partial centroids, and UI label mapping does not emit "As 1"/"As 2". |
|
||||||
|
|
||||||
## Data Flow (detailed) |
|
||||||
|
|
||||||
- compute_2d_axes produces windows (time labels) and canonical positions_by_window. |
|
||||||
- load_positions consumes positions_by_window and returns a mapping party -> list of centroids (one per window) where centroids may contain None/NaN for missing windows. |
|
||||||
- build_trajectories_tab selects parties and for each party calls _build_party_axis_figure which: |
|
||||||
- extracts raw xs_raw, ys_raw arrays aligned to windows |
|
||||||
- computes smoothed xs_plot, ys_plot via _spline_smooth |
|
||||||
- builds Plotly trace using xs_plot/ys_plot for the line and includes xs_raw/ys_raw in customdata with safe_float conversion |
|
||||||
- adds the trace inside a try/except and emits any exception + raw samples to debug |
|
||||||
|
|
||||||
## Error Handling |
|
||||||
|
|
||||||
- Use safe_float to prevent float(None) and similar runtime TypeErrors when building hover/customdata. |
|
||||||
- Use per-party try/except to avoid a single-party failure blanking the whole chart; log the error and continue plotting other parties. |
|
||||||
- Show structured diagnostics in the existing DEBUG expander with these fields: party name, windows_count, raw_centroid_count, non_nan_count, sample_raw_xs, sample_raw_ys, exception (if any). |
|
||||||
|
|
||||||
## Testing Strategy |
|
||||||
|
|
||||||
- Unit tests: |
|
||||||
- safe_float: None -> nan, '1.23' -> 1.23 (if strings are expected), invalid -> nan |
|
||||||
- UI label helpers: axis_classifier.display_label_for_modal(...) and choose_trajectory_title() do not return raw "As 1"/"As 2" |
|
||||||
|
|
||||||
- Integration tests (lightweight): |
|
||||||
- Build a synthetic positions_by_window with some None / NaN holes and assert _build_party_axis_figure returns a Plotly trace object (or equivalent structure) and that customdata contains numeric/NaN values not exceptions. |
|
||||||
- Test that build_trajectories_tab's DEBUG expander receives the expected diagnostic entries for a party with missing centroids. |
|
||||||
|
|
||||||
- Manual verification steps (later): run full Streamlit with duckdb/plotly installed and open Explorer -> Trajectories to confirm traces are visible for typical parties and inspect the DEBUG expander. |
|
||||||
|
|
||||||
## Open Questions |
|
||||||
|
|
||||||
- Are there other UI locations still exposing raw modal labels? We should sweep the repo and tests already added help with this, but it may not be exhaustive. |
|
||||||
- Do we want safe_float to try to coerce numeric strings? My proposal is **no coercion** (only pass-through numeric types and map others -> nan) unless tests show string encodings exist in centroid data. |
|
||||||
- If diagnostics show that many parties are missing centroids entirely, we'll need the full normalization sweep (alternative #1). |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
I'm proceeding to create the design doc. Interrupt if you want changes. |
|
||||||
@ -1,113 +0,0 @@ |
|||||||
--- |
|
||||||
date: 2026-03-31 |
|
||||||
topic: "diagnose-no-plot-trajectories" |
|
||||||
status: draft |
|
||||||
--- |
|
||||||
|
|
||||||
## Problem Statement |
|
||||||
|
|
||||||
We need to restore visible party trajectories in the Explorer "Partij Trajectories" tab so the Plotly chart shows non-empty traces for realistic windows, and provide opt-in diagnostics that explain why traces are missing. |
|
||||||
|
|
||||||
**Why:** Users see an empty chart in some environments/windows. This could be caused by upstream data gaps, malformed coordinates, strict filtering in helpers, or unhandled exceptions in the plotting helper. We must gather evidence, fix the actual cause, and avoid changing production behavior unless debug is explicitly enabled. |
|
||||||
|
|
||||||
|
|
||||||
## Constraints |
|
||||||
|
|
||||||
- Keep changes minimal and reversible; prefer instrumentation and small helper fixes over large refactors. |
|
||||||
- Diagnostics must be opt-in (EXPLORER_DEBUG_TRAJECTORIES env var and UI checkbox). |
|
||||||
- Helpers must be import-safe and pure so unit tests run without heavy GUI/DB dependencies. |
|
||||||
- Use project's environment management (uv) for local runs and CI — do not call pip directly. |
|
||||||
|
|
||||||
|
|
||||||
## Approach (chosen) |
|
||||||
|
|
||||||
I recommend a **diagnostic-first** approach followed by targeted small fixes. Steps: |
|
||||||
|
|
||||||
- Add a small, dedicated diagnostic writer script that emits a structured JSON diagnostics artifact for representative windows from data/motions.db. |
|
||||||
- Improve input validation and normalization in load_positions / compute_2d_axes (coerce numeric strings, treat 'nan'/'None' consistently, ignore out-of-range coords) so helpers are robust to malformed rows. |
|
||||||
- Keep current gates that avoid plotting when inputs are invalid, but record precise diagnostics into module-level _last_trajectories_diagnostics and the CLI JSON output. |
|
||||||
- Add unit tests for the normalization logic and for inspector behaviors; add a small integration diagnostic test that runs via uv and checks trace_count > 0 for a known-good sample window. |
|
||||||
|
|
||||||
Reasoning: we already have instrumentation capturing stages (load_positions_empty, no_mp_positions, select_helper_exception, trace_count). Gathering structured evidence will let us pick a minimal fix (data normalization or filter tweak) without risky behaviour changes. |
|
||||||
|
|
||||||
|
|
||||||
## Alternatives considered |
|
||||||
|
|
||||||
- Aggressive fallback rendering: render approximated centroids when traces are empty. Rejected because it may mask data quality issues and mislead users. |
|
||||||
- Upstream data repair: fix svd pipeline / DB rows before Explorer. Good long-term, but requires cross-team coordination and longer cycle — we should diagnose first. |
|
||||||
|
|
||||||
|
|
||||||
## Architecture |
|
||||||
|
|
||||||
**High-level:** The Explorer plotting pipeline remains the same; we add a diagnostics writer and a small normalization layer. |
|
||||||
|
|
||||||
- Data source: data/motions.db (svd_vectors and party maps) |
|
||||||
- Pipeline: get_uniform_dim_windows -> compute_2d_axes -> load_positions -> inspect_positions_for_issues -> compute_party_centroids -> select_trajectory_plot_data -> Plotly fig |
|
||||||
- Diagnostics: module-level _last_trajectories_diagnostics plus a CLI script that runs representative windows and writes JSON artifacts to thoughts/shared/diagnostics/YYYY-MM-DD-trajectories-diagnostics.json |
|
||||||
|
|
||||||
|
|
||||||
## Components and responsibilities |
|
||||||
|
|
||||||
- Diagnostic CLI (scripts/save_trajectories_diagnostics.py): |
|
||||||
- Run a configurable sample of windows, call compute_2d_axes, load_positions, inspect_positions_for_issues, select_trajectory_plot_data. |
|
||||||
- Emit structured JSON with per-window diagnostics and aggregated summary. |
|
||||||
|
|
||||||
- Normalization helpers (explorer_helpers.normalize_positions): |
|
||||||
- Coerce numeric strings to floats, coerce common null tokens to NaN, clamp improbable values, and return a normalized positions_by_window structure. |
|
||||||
- Pure, import-safe, and covered by unit tests. |
|
||||||
|
|
||||||
- Instrumentation (explorer._last_trajectories_diagnostics): |
|
||||||
- Record stage, window id, counts (n_windows, n_entities per window), mp_positions_count, any helper exceptions/tracebacks, and sample rows. |
|
||||||
|
|
||||||
- UI changes (pages/2_Explorer.py): |
|
||||||
- Add an opt-in debug checkbox that enables detailed diagnostics in the UI when checked (or when EXPLORER_DEBUG_TRAJECTORIES=1). |
|
||||||
- Do not change default plotting or filtering behavior when debug is disabled. |
|
||||||
|
|
||||||
- Tests |
|
||||||
- Unit tests for normalization and inspector. |
|
||||||
- Diagnostic integration test run via uv (non-flaky, uses a small sample or DB fixture). |
|
||||||
|
|
||||||
|
|
||||||
## Data Flow |
|
||||||
|
|
||||||
1. Caller requests trajectories tab (build_trajectories_tab). |
|
||||||
2. call get_uniform_dim_windows(DB) -> returns window descriptors. |
|
||||||
3. For each sampled window, compute_2d_axes(window) -> returns raw positions_by_window (possibly malformed). |
|
||||||
4. normalize_positions(positions_by_window) -> cleaned positions_by_window. |
|
||||||
5. inspect_positions_for_issues(positions_by_window) -> returns diagnostics (missing coords, string values, NaNs, zero-length paths). |
|
||||||
6. compute_party_centroids(positions_by_window) -> party centroids and mp_positions. |
|
||||||
7. select_trajectory_plot_data(centroids, mp_positions, options) -> returns fig, trace_count, banner_text. On exception capture diagnostics. |
|
||||||
8. If trace_count == 0 -> do not call st.plotly_chart; show friendly message and, if debug enabled, show the collected diagnostics and link to the saved JSON artifact. |
|
||||||
|
|
||||||
|
|
||||||
## Error Handling |
|
||||||
|
|
||||||
- Capture exceptions at helper boundaries and record to select_trajectory_plot_data._last_diagnostics and module _last_trajectories_diagnostics. Do not raise to Streamlit UI unless debug is enabled. |
|
||||||
- Normalize inputs proactively to reduce exception surface (avoid type errors from strings/None). |
|
||||||
- If a helper raises, return a safe empty fig and banner that suggests enabling diagnostics. |
|
||||||
- JSON diagnostics writer writes atomically (write to a .tmp file then rename) to avoid partial files being consumed. |
|
||||||
|
|
||||||
|
|
||||||
## Testing Strategy |
|
||||||
|
|
||||||
- Unit tests (fast, import-safe): |
|
||||||
- normalize_positions handles strings, 'nan', None, and clamps extremes. |
|
||||||
- inspect_positions_for_issues detects empty windows, NaNs-only windows, and malformed coordinate types. |
|
||||||
- select_trajectory_plot_data returns (fig, trace_count>0) for a known-good small sample and sets diagnostics correctly when trace_count==0. |
|
||||||
|
|
||||||
- Integration tests (run under uv in CI or locally): |
|
||||||
- Diagnostic CLI can be executed via uv run and creates a JSON diagnostic artifact for a small sample; test asserts artifact exists and is valid JSON with expected fields. |
|
||||||
|
|
||||||
- Manual verification: |
|
||||||
- Run EXPLORER_DEBUG_TRAJECTORIES=1 uv run python scripts/save_trajectories_diagnostics.py --db data/motions.db --out thoughts/shared/diagnostics/<date>.json |
|
||||||
- Open the Explorer locally and reproduce an empty-chart scenario; enable debug checkbox and view diagnostics. |
|
||||||
|
|
||||||
|
|
||||||
## Open Questions |
|
||||||
|
|
||||||
1. Do we prefer automatic normalization (silently fixing data) or conservative behavior (report and require upstream fix)? My recommendation: auto-normalize common, unambiguous issues (strings -> numbers, common null tokens) and surface anything ambiguous in diagnostics. |
|
||||||
2. Where should diagnostic artifacts live long-term? thoughts/shared/diagnostics is fine for short-term; consider a single diagnostics/ bucket for CI artifacts. |
|
||||||
3. Which windows should the diagnostics CLI sample by default? I propose sampling: 1) first 10 windows, 2) 10 windows evenly spaced, and 3) one window that previously produced empty result if known. |
|
||||||
|
|
||||||
|
|
||||||
I'm proceeding to create the design doc. Interrupt if you want changes. |
|
||||||
File diff suppressed because one or more lines are too long
@ -1,89 +0,0 @@ |
|||||||
--- |
|
||||||
date: 2026-03-30 |
|
||||||
topic: "compass-trajectory-consistency" |
|
||||||
status: draft |
|
||||||
--- |
|
||||||
|
|
||||||
# Implementation Plan — Compass ↔ Trajectory Consistency |
|
||||||
|
|
||||||
This plan implements the validated design (thoughts/shared/designs/2026-03-30-compass-trajectory-consistency-design.md) with the following firm constraints from the user: |
|
||||||
- Use per-window MP-centroid party coordinates as the canonical source for components 1 & 2 |
|
||||||
- When a party has no MPs in a window, use the first chronological party vector as fallback |
|
||||||
- **Update all callers** to the new explicit API; do NOT keep backward compatibility shims |
|
||||||
|
|
||||||
|
|
||||||
## Goal |
|
||||||
|
|
||||||
Make the political compass numeric values identical to trajectory centroids for SVD components 1 and 2 by passing explicit per-party (x,y) coordinates (computed from positions_by_window) to the compass renderer and updating all callers to use that API. |
|
||||||
|
|
||||||
|
|
||||||
## Micro-tasks (ordered, small, actionable) |
|
||||||
|
|
||||||
All tasks assume a development branch and running tests locally. Each task should be one commit. |
|
||||||
|
|
||||||
1) Add explorer_helpers.py (pure helper) |
|
||||||
- Create compute_party_coords(positions_by_window, party_map, window_id, fallback_party_scores=None) |
|
||||||
- Returns (party_coords: Dict[str,(x,y)], fallback_used: Set[str]) |
|
||||||
- Unit tests: tests/test_explorer_helpers.py |
|
||||||
- Estimate: 2.0h |
|
||||||
|
|
||||||
2) Update explorer.py to the new strict API |
|
||||||
- Replace _build_party_axis_figure to accept only explicit party_coords for comp_sel 1 & 2. |
|
||||||
- Remove old polymorphic/legacy path; callers must pass party_coords or raise a clear error. |
|
||||||
- Update rendering glue to call _build_party_axis_figure with explicit party_coords. |
|
||||||
- Ensure hover text shows fallback notes for parties where fallback_used contains the party. |
|
||||||
- Update/clean Streamlit caption behavior when no coords available. |
|
||||||
- Tests: modify tests/test_explorer_chart.py to supply party_coords shape and assert behavior. |
|
||||||
- Estimate: 4.5h |
|
||||||
|
|
||||||
3) Update all callers across repo to pass explicit party_coords |
|
||||||
- Grep for places that previously passed party vectors into _build_party_axis_figure or used load_party_axis_scores for compass rendering. |
|
||||||
- Update each call site to compute party_coords via compute_party_coords, passing the fallback_party_scores (first-chronological vector) when needed. |
|
||||||
- Caller list (non-exhaustive — verify with repo search): |
|
||||||
- explorer.build_svd_components_tab |
|
||||||
- explorer._render_party_axis_chart (if present) |
|
||||||
- any scripts or tests that directly call _build_party_axis_figure |
|
||||||
- Update tests referencing legacy vector shape. |
|
||||||
- Estimate: 3.0h |
|
||||||
|
|
||||||
4) Add integration consistency test |
|
||||||
- tests/test_compass_trajectory_consistency.py — synthetic positions_by_window and party_map to assert compute_party_coords equals centroid computations used by trajectories. |
|
||||||
- Estimate: 1.0h |
|
||||||
|
|
||||||
5) Run full test suite and fix regressions |
|
||||||
- Run pytest; address failures introduced by strict API change. |
|
||||||
- If other modules relied on old shape in ways not covered by tests, update them to use compute_party_coords. |
|
||||||
- Estimate: 1.5h |
|
||||||
|
|
||||||
6) Manual QA |
|
||||||
- Run streamlit run explorer.py and visually verify compass tooltips and trajectories hover values match (comps 1 & 2) for several parties and windows. |
|
||||||
- Verify fallback tooltip and logger WARN when a party uses fallback vector. |
|
||||||
- Estimate: 1.0h |
|
||||||
|
|
||||||
7) Commit and push (or open PR) with description: |
|
||||||
"feat(explorer): use explicit per-party (x,y) coords from positions_by_window for compass (components 1 & 2); update callers and add tests" |
|
||||||
- Estimate: 0.5h |
|
||||||
|
|
||||||
|
|
||||||
## Verification commands |
|
||||||
|
|
||||||
- Unit tests: |
|
||||||
- python -m pytest tests/test_explorer_helpers.py |
|
||||||
- python -m pytest tests/test_explorer_chart.py |
|
||||||
- python -m pytest tests/test_compass_trajectory_consistency.py |
|
||||||
- Full test suite: |
|
||||||
- python -m pytest |
|
||||||
- Manual UI: |
|
||||||
- streamlit run explorer.py |
|
||||||
|
|
||||||
|
|
||||||
## Rollback and mitigation |
|
||||||
|
|
||||||
- If the strict API uncovers many call sites, revert to a temporary feature branch, document call sites, and migrate them in smaller patches. |
|
||||||
- Keep commits small and self-contained to ease review. |
|
||||||
|
|
||||||
|
|
||||||
## Notes |
|
||||||
|
|
||||||
- This plan follows the user's instruction to update all callers and to use the first chronological party vector as fallback. |
|
||||||
- The helper is pure Python to keep tests simple; callers may cache if needed. |
|
||||||
@ -1,383 +0,0 @@ |
|||||||
# Diagnose no-plot trajectories Implementation Plan |
|
||||||
|
|
||||||
**Goal:** Add an opt-in debug mode for the Trajectories tab that surfaces runtime early-returns and swallowed exceptions so we can diagnose why no Plotly chart is shown. |
|
||||||
|
|
||||||
**Architecture:** Minimal, reversible instrumentation inside explorer.py and explorer_helpers.py. Add an opt-in UI toggle (checkbox + EXPLORER_DEBUG_TRAJECTORIES env var), extend the existing diagnostics/inspector helper to surface additional samples/counts, un-silence broad excepts to log exceptions and capture tracebacks into a diagnostics object accessible to tests and the UI (when debug enabled). |
|
||||||
|
|
||||||
**Design:** thoughts/shared/designs/2026-03-30-diagnose-no-plot-trajectories-design.md |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Dependency Graph |
|
||||||
|
|
||||||
``` |
|
||||||
Batch 1 (parallel): 1.1, 1.2 [foundation - no deps] |
|
||||||
Batch 2 (parallel): 2.1 [core - depends on batch 1] |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Batch 1: Foundation (parallel - 2 implementers) |
|
||||||
|
|
||||||
All tasks in this batch have NO dependencies and run simultaneously. |
|
||||||
|
|
||||||
### Task 1.1: Extend diagnostics inspector |
|
||||||
**File:** `explorer_helpers.py` (modify function `inspect_positions_for_issues`) |
|
||||||
**Test:** `tests/test_explorer_helpers_diagnostics.py` |
|
||||||
**Depends:** none |
|
||||||
|
|
||||||
Purpose: add compact, structured diagnostics (mp_positions_sample, mp_positions_count, windows_with_no_positions) to the existing inspector output so both UI and tests can consume them. |
|
||||||
|
|
||||||
Implementation decisions (gap-filling): |
|
||||||
- Keep the function import-safe and pure (no Streamlit calls). Return additional keys under the same dict. |
|
||||||
- Provide small, deterministic samples (sorted keys limited to 10) so tests are stable. |
|
||||||
|
|
||||||
Estimate: 45-90 minutes |
|
||||||
|
|
||||||
Verify: `pytest -q tests/test_explorer_helpers_diagnostics.py` |
|
||||||
|
|
||||||
```python |
|
||||||
# COMPLETE test code - tests/test_explorer_helpers_diagnostics.py |
|
||||||
import numpy as np |
|
||||||
from explorer_helpers import inspect_positions_for_issues |
|
||||||
|
|
||||||
|
|
||||||
def test_inspect_positions_for_issues_basic(): |
|
||||||
positions_by_window = { |
|
||||||
"w1": {"mp1": (1.0, 2.0), "mp2": (float('nan'), float('nan'))}, |
|
||||||
"w2": {}, |
|
||||||
} |
|
||||||
party_map = {"mp1": "P1"} |
|
||||||
d = inspect_positions_for_issues(positions_by_window, party_map) |
|
||||||
|
|
||||||
# basic keys still present |
|
||||||
assert d["windows_count"] == 2 |
|
||||||
assert isinstance(d["mp_id_set"], set) |
|
||||||
# new diagnostics |
|
||||||
assert "mp_positions_count" in d |
|
||||||
assert d["mp_positions_count"] >= 1 |
|
||||||
assert "mp_positions_sample" in d |
|
||||||
assert isinstance(d["mp_positions_sample"], list) |
|
||||||
assert "windows_with_no_positions" in d |
|
||||||
assert isinstance(d["windows_with_no_positions"], list) |
|
||||||
|
|
||||||
``` |
|
||||||
|
|
||||||
```python |
|
||||||
# COMPLETE implementation - explorer_helpers.py (function replacement) |
|
||||||
def inspect_positions_for_issues( |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], |
|
||||||
party_map: Dict[str, str], |
|
||||||
) -> Dict[str, Any]: |
|
||||||
"""Inspect positions_by_window for simple issues/summary. |
|
||||||
|
|
||||||
Returns a dictionary with keys including the previous ones (windows_count, |
|
||||||
window_labels, mp_id_set, party_map_count, parties_with_centroid_counts, |
|
||||||
mismatched_mp_ids_sample) plus: |
|
||||||
- mp_positions_count: int (num unique MP ids seen) |
|
||||||
- mp_positions_sample: list[str] (sorted sample up to 10) |
|
||||||
- windows_with_no_positions: list[str] |
|
||||||
|
|
||||||
This helper remains pure and import-safe so unit tests can exercise it. |
|
||||||
""" |
|
||||||
windows = list(positions_by_window.keys()) |
|
||||||
windows_count = len(windows) |
|
||||||
window_labels = sorted(windows)[:10] |
|
||||||
|
|
||||||
mp_id_set: Set[str] = set() |
|
||||||
parties_with_centroid_counts: Dict[str, int] = {} |
|
||||||
mismatched: Set[str] = set() |
|
||||||
windows_with_no_positions: List[str] = [] |
|
||||||
|
|
||||||
for win, pos in positions_by_window.items(): |
|
||||||
if not pos: |
|
||||||
windows_with_no_positions.append(win) |
|
||||||
continue |
|
||||||
present_parties: Set[str] = set() |
|
||||||
for ent in pos.keys(): |
|
||||||
if not ent: |
|
||||||
continue |
|
||||||
mp_id_set.add(ent) |
|
||||||
party = party_map.get(ent) |
|
||||||
if party is None: |
|
||||||
# try stripping paren variant |
|
||||||
party = party_map.get(_strip_paren(ent)) |
|
||||||
if party: |
|
||||||
present_parties.add(party) |
|
||||||
else: |
|
||||||
mismatched.add(ent) |
|
||||||
|
|
||||||
for p in present_parties: |
|
||||||
parties_with_centroid_counts[p] = parties_with_centroid_counts.get(p, 0) + 1 |
|
||||||
|
|
||||||
mismatched_mp_ids_sample = sorted(list(mismatched))[:10] |
|
||||||
|
|
||||||
mp_positions_sample = sorted(list(mp_id_set))[:10] |
|
||||||
mp_positions_count = len(mp_id_set) |
|
||||||
|
|
||||||
return { |
|
||||||
"windows_count": windows_count, |
|
||||||
"window_labels": window_labels, |
|
||||||
"mp_id_set": mp_id_set, |
|
||||||
"party_map_count": len(party_map), |
|
||||||
"parties_with_centroid_counts": parties_with_centroid_counts, |
|
||||||
"mismatched_mp_ids_sample": mismatched_mp_ids_sample, |
|
||||||
"mp_positions_sample": mp_positions_sample, |
|
||||||
"mp_positions_count": mp_positions_count, |
|
||||||
"windows_with_no_positions": windows_with_no_positions, |
|
||||||
} |
|
||||||
|
|
||||||
``` |
|
||||||
|
|
||||||
Commit: `feat(explorer): extend diagnostic inspector to surface mp samples/counts` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
### Task 1.2: Add tests and small helper for reading debug env var |
|
||||||
**File:** `explorer.py` (add function `get_debug_trajectories_enabled`) **-- part of batch 2 core but small and independent** |
|
||||||
**Test:** `tests/test_debug_flag.py` |
|
||||||
**Depends:** none |
|
||||||
|
|
||||||
Purpose: provide a single, testable helper that reads EXPLORER_DEBUG_TRAJECTORIES env var and returns a boolean. We use this consistently in UI code so tests can manipulate debug mode via env var. |
|
||||||
|
|
||||||
Decision: implement conservative parsing ("1", "true", "True") as truthy. This function will be used by build_trajectories_tab and tests. |
|
||||||
|
|
||||||
Estimate: 15-30 minutes |
|
||||||
|
|
||||||
Verify: `pytest -q tests/test_debug_flag.py` |
|
||||||
|
|
||||||
```python |
|
||||||
# COMPLETE test code - tests/test_debug_flag.py |
|
||||||
import os |
|
||||||
import importlib |
|
||||||
|
|
||||||
def test_get_debug_flag_on(monkeypatch): |
|
||||||
monkeypatch.setenv("EXPLORER_DEBUG_TRAJECTORIES", "1") |
|
||||||
import explorer |
|
||||||
importlib.reload(explorer) |
|
||||||
assert explorer.get_debug_trajectories_enabled() is True |
|
||||||
|
|
||||||
|
|
||||||
def test_get_debug_flag_off(monkeypatch): |
|
||||||
monkeypatch.delenv("EXPLORER_DEBUG_TRAJECTORIES", raising=False) |
|
||||||
import explorer |
|
||||||
importlib.reload(explorer) |
|
||||||
assert explorer.get_debug_trajectories_enabled() is False |
|
||||||
|
|
||||||
``` |
|
||||||
|
|
||||||
```python |
|
||||||
# COMPLETE implementation to add into explorer.py |
|
||||||
def get_debug_trajectories_enabled() -> bool: |
|
||||||
"""Return whether the Trajectories debug mode is enabled via env var. |
|
||||||
|
|
||||||
Truthy values: "1", "true", "True". Default False. |
|
||||||
""" |
|
||||||
val = os.getenv("EXPLORER_DEBUG_TRAJECTORIES", "") |
|
||||||
return val in ("1", "true", "True") |
|
||||||
|
|
||||||
``` |
|
||||||
|
|
||||||
Commit message: `chore(explorer): add get_debug_trajectories_enabled helper` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Batch 2: Core Modules (parallel - 1 implementer) |
|
||||||
|
|
||||||
These tasks depend on changes in Batch 1 (inspector additions and debug-flag helper). All tasks in this batch modify `explorer.py` (single-file microtask) and have a single test file. |
|
||||||
|
|
||||||
### Task 2.1: Instrument trajectories UI and un-silence exceptions |
|
||||||
**File:** `explorer.py` (update `select_trajectory_plot_data` exception handling, update `build_trajectories_tab` early-return instrumentation and try/except, add module-level diagnostics capture) |
|
||||||
**Test:** `tests/test_diagnose_no_plot_trajectories.py` |
|
||||||
**Depends:** 1.1, 1.2 |
|
||||||
|
|
||||||
Purpose: (A) Add opt-in debug UI binding to env var via checkbox and a DEBUG expander; (B) change helper-call swallow to log exceptions and include traceback in diagnostics; (C) instrument early-return gates (no positions, no mp_positions) to capture the reason and attach it to module-level diagnostics; (D) expose diagnostics to tests via attributes so tests can assert they were produced. |
|
||||||
|
|
||||||
Decisions / gap-fills: |
|
||||||
- Do not change public function signatures. To expose diagnostics to tests without changing signatures, set attributes on the function and module: |
|
||||||
- select_trajectory_plot_data._last_diagnostics -> last inspector summary |
|
||||||
- explorer._last_diagnostics -> diagnostics captured by build_trajectories_tab (early-returns or exceptions) |
|
||||||
- Always call logger.exception(...) when an exception happens to preserve logs. |
|
||||||
- Only call Streamlit UI functions to display tracebacks when debug mode is enabled. |
|
||||||
|
|
||||||
Estimate: 2-4 hours |
|
||||||
|
|
||||||
Verify: `pytest -q tests/test_diagnose_no_plot_trajectories.py` |
|
||||||
|
|
||||||
```python |
|
||||||
# COMPLETE test code - tests/test_diagnose_no_plot_trajectories.py |
|
||||||
import traceback |
|
||||||
import importlib |
|
||||||
import explorer |
|
||||||
from types import SimpleNamespace |
|
||||||
|
|
||||||
|
|
||||||
def test_select_helper_exception_is_captured(monkeypatch): |
|
||||||
# Force the inspector to raise and ensure diagnostics capture the traceback |
|
||||||
def _boom(*a, **k): |
|
||||||
raise RuntimeError("boom-inspector") |
|
||||||
|
|
||||||
monkeypatch.setattr("explorer_helpers.inspect_positions_for_issues", _boom) |
|
||||||
# call helper |
|
||||||
fig, count, banner = explorer.select_trajectory_plot_data({}, {}, [], []) |
|
||||||
# diagnostics should be attached to the function |
|
||||||
d = getattr(explorer.select_trajectory_plot_data, "_last_diagnostics", None) |
|
||||||
assert d is not None |
|
||||||
assert "inspector_exception" in d |
|
||||||
assert "boom-inspector" in d["inspector_exception"] |
|
||||||
|
|
||||||
|
|
||||||
def test_build_trajectories_tab_early_return_sets_diagnostics(monkeypatch): |
|
||||||
# Make load_positions return empty positions to trigger early return |
|
||||||
monkeypatch.setattr(explorer, "load_positions", lambda db, ws: ({}, None)) |
|
||||||
# Ensure debug mode enabled via env var |
|
||||||
monkeypatch.setenv("EXPLORER_DEBUG_TRAJECTORIES", "1") |
|
||||||
importlib.reload(explorer) |
|
||||||
# Call the tab builder (uses dummy Streamlit in tests) |
|
||||||
explorer.build_trajectories_tab("/fake.db", "2025") |
|
||||||
d = getattr(explorer, "_last_diagnostics", None) |
|
||||||
assert d is not None |
|
||||||
assert d.get("reason") == "no_positions" |
|
||||||
|
|
||||||
``` |
|
||||||
|
|
||||||
```python |
|
||||||
# COMPLETE implementation snippets to apply to explorer.py |
|
||||||
import traceback |
|
||||||
|
|
||||||
# Add near top-level (after imports in explorer.py) |
|
||||||
_last_diagnostics: Optional[dict] = None |
|
||||||
|
|
||||||
|
|
||||||
def get_debug_trajectories_enabled() -> bool: |
|
||||||
val = os.getenv("EXPLORER_DEBUG_TRAJECTORIES", "") |
|
||||||
return val in ("1", "true", "True") |
|
||||||
|
|
||||||
|
|
||||||
# Replace the small inspector try/except in select_trajectory_plot_data with the |
|
||||||
# following (complete function shown below replaces the existing select_trajectory_plot_data |
|
||||||
# definition in explorer.py): |
|
||||||
def select_trajectory_plot_data( |
|
||||||
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], |
|
||||||
party_map: Dict[str, str], |
|
||||||
windows: List[str], |
|
||||||
selected_parties: List[str], |
|
||||||
smooth_alpha: float = 0.35, |
|
||||||
mp_fallback_count: Optional[int] = None, |
|
||||||
) -> Tuple[go.Figure, int, Optional[str]]: |
|
||||||
"""Return (fig, trace_count, banner_text). |
|
||||||
|
|
||||||
Helper used by build_trajectories_tab. Does not call Streamlit. |
|
||||||
""" |
|
||||||
if mp_fallback_count is None: |
|
||||||
try: |
|
||||||
mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20")) |
|
||||||
except Exception: |
|
||||||
mp_fallback_count = 20 |
|
||||||
|
|
||||||
# Compute per-party centroids aligned to windows |
|
||||||
party_centroids, meta = compute_party_centroids( |
|
||||||
positions_by_window, party_map, windows |
|
||||||
) |
|
||||||
|
|
||||||
# Use inspector to collect diagnostics (import-safe, pure helper). |
|
||||||
try: |
|
||||||
inspector_summary = inspect_positions_for_issues(positions_by_window, party_map) |
|
||||||
except Exception as e: |
|
||||||
# Do not silently swallow: log and capture traceback text so tests / UI |
|
||||||
# can inspect it. Keep function import-safe (no Streamlit here). |
|
||||||
tb = traceback.format_exc() |
|
||||||
logger.exception("inspect_positions_for_issues failed: %s", e) |
|
||||||
inspector_summary = {"inspector_exception": tb} |
|
||||||
|
|
||||||
# expose diagnostics for tests without changing function signature |
|
||||||
setattr(select_trajectory_plot_data, "_last_diagnostics", inspector_summary) |
|
||||||
logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary) |
|
||||||
|
|
||||||
# ... rest of the original function remains unchanged (build fig/trace_count) |
|
||||||
# (Implementation note: keep the rest identical to existing function.) |
|
||||||
|
|
||||||
|
|
||||||
# Now update the call-site in build_trajectories_tab (replace the try/except around |
|
||||||
# select_trajectory_plot_data invocation with the following snippet): |
|
||||||
try: |
|
||||||
fig2, trace_count2, banner_text = select_trajectory_plot_data( |
|
||||||
positions_by_window, party_map, windows, selected_parties, smooth_alpha |
|
||||||
) |
|
||||||
if fig2 is not None: |
|
||||||
fig = fig2 |
|
||||||
trace_count = trace_count2 |
|
||||||
if banner_text: |
|
||||||
st.caption(banner_text) |
|
||||||
except Exception as e: |
|
||||||
# Do not silently pass. Log, capture traceback and (when debug enabled) |
|
||||||
# surface to Streamlit. |
|
||||||
tb = traceback.format_exc() |
|
||||||
logger.exception("select_trajectory_plot_data raised: %s", e) |
|
||||||
global _last_diagnostics |
|
||||||
_last_diagnostics = {"build_exception": tb} |
|
||||||
if get_debug_trajectories_enabled(): |
|
||||||
try: |
|
||||||
st.exception(e) |
|
||||||
except Exception: |
|
||||||
# Streamlit may not be available in test env; fall back to text_area |
|
||||||
try: |
|
||||||
st.text_area("Trajectories exception", tb) |
|
||||||
except Exception: |
|
||||||
pass |
|
||||||
|
|
||||||
|
|
||||||
# Instrument early-return gates (example: when positions_by_window is empty) by |
|
||||||
# setting _last_diagnostics before returning. Replace the current block: |
|
||||||
if not positions_by_window: |
|
||||||
st.warning("Geen positiedata beschikbaar.") |
|
||||||
global _last_diagnostics |
|
||||||
_last_diagnostics = {"reason": "no_positions", "inspector": {}} |
|
||||||
if get_debug_trajectories_enabled(): |
|
||||||
# call inspector and attach diagnostics when debug enabled |
|
||||||
try: |
|
||||||
_last_diagnostics["inspector"] = inspect_positions_for_issues(positions_by_window, {}) |
|
||||||
except Exception: |
|
||||||
_last_diagnostics["inspector"] = {"error": "inspector_failed"} |
|
||||||
return |
|
||||||
|
|
||||||
# Note: make similar instrumentation for the `if not mp_positions:` early return |
|
||||||
# inside the per-MP fallback path: set _last_diagnostics = {"reason": "no_mp_positions"} |
|
||||||
|
|
||||||
``` |
|
||||||
|
|
||||||
Notes for implementer: |
|
||||||
- Insert the two helper functions and the try/except replacement in the appropriate places of explorer.py. The select_trajectory_plot_data replacement above should replace the function body; keep the unchanged plotting logic intact after the diagnostic area. |
|
||||||
- Add the module-level _last_diagnostics variable near the top of explorer.py (after imports). |
|
||||||
|
|
||||||
Commit: `feat(explorer): instrument trajectories with debug diagnostics and un-silence helper exceptions` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Verification & Manual checks |
|
||||||
|
|
||||||
- Run unit tests for the modified files: |
|
||||||
- pytest -q tests/test_explorer_helpers_diagnostics.py |
|
||||||
- pytest -q tests/test_debug_flag.py |
|
||||||
- pytest -q tests/test_diagnose_no_plot_trajectories.py |
|
||||||
- Manual: run Streamlit locally with EXPLORER_DEBUG_TRAJECTORIES=1 and inspect the "DEBUG" expander in the Trajectories tab to see the diagnostics block and any surfaced tracebacks. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Rollback plan |
|
||||||
|
|
||||||
- All changes gated behind debug env var and small: revert the two modified files (explorer.py, explorer_helpers.py) to previous commit to remove instrumentation. |
|
||||||
- Because public signatures are unchanged, rollout/revert is safe. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Appendix — quick implementer checklist |
|
||||||
|
|
||||||
1. Implement inspector changes (explorer_helpers.py) and run its tests. |
|
||||||
2. Add get_debug_trajectories_enabled helper and tests. |
|
||||||
3. Modify explorer.py: add _last_diagnostics, update select_trajectory_plot_data try/except, update build_trajectories_tab try/except and early-return instrumentation, add debug checkbox wiring in UI. |
|
||||||
4. Add tests that monkeypatch inspector and load_positions and assert diagnostics are created. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
Written: thoughts/shared/plans/2026-03-30-diagnose-no-plot-trajectories.md |
|
||||||
@ -1,254 +0,0 @@ |
|||||||
# Fix missing trajectories Implementation Plan |
|
||||||
|
|
||||||
I'm using the writing-plans skill to create the implementation plan. |
|
||||||
|
|
||||||
Goal: Restore visible party trajectories in the Explorer "Partij Trajectories" tab by adding validation/inspection helpers, making centroid computation tolerant of missing windows (emit NaN gaps), and adding an automatic MP-level fallback (top-K) with a debug expander and hover raw-values preserved. |
|
||||||
|
|
||||||
Design: thoughts/shared/designs/2026-03-30-fix-missing-trajectories-design.md |
|
||||||
|
|
||||||
Architecture: Small, focused changes in explorer_helpers.py (pure helpers + unit tests) and explorer.py (UI wiring and plotting policy). Keep helper logic independent of Streamlit so tests run in CI without heavy deps. Provide a graceful MP fallback and compact diagnostics exposed behind a collapsed expander. |
|
||||||
|
|
||||||
Tech Stack: Python 3.x, pytest, Streamlit (manual UI verification), Plotly (already used). Tests must run in CI with duckdb / streamlit optional — unit tests only use pure Python/numpy. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Dependency Graph |
|
||||||
|
|
||||||
``` |
|
||||||
Batch 1 (parallel): 1.1, 1.2 [foundation - no deps] |
|
||||||
Batch 2 (parallel): 2.1, 2.2 [core - depends on batch 1] |
|
||||||
Batch 3 (parallel): 3.1, 3.2 [integration - depends on batch 2] |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Decisions / gap-filling (explicit) |
|
||||||
- EXPLORER_MP_FALLBACK_COUNT environment variable: integer, default 20. Used to choose top-K MPs when party centroids are absent. |
|
||||||
- Top-K definition: by seat_count when available; when seat_count unavailable, fall back to party axis activity (mean magnitude) via load_party_axis_scores if needed. I will implement MP fallback using seat_count if present in mp_metadata; otherwise use party axis magnitude from load_party_axis_scores. |
|
||||||
- Validation rules (inspect_positions_for_issues): detect empty positions_by_window, windows_count mismatch across MPs, sample of mismatched mp ids, parties_with_centroid_counts dictionary. Reason: these are the most likely causes of empty traces. |
|
||||||
- compute_party_centroids behavior: returns per-party arrays aligned to windows (list of floats or np.nan), metadata per-party containing counts and missing indices. Guarantees empty lists (never None). |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Batch 1: Foundation (parallel - 2 implementers) |
|
||||||
|
|
||||||
All tasks in this batch have NO dependencies and can run simultaneously. |
|
||||||
|
|
||||||
### Task 1.1: Add inspector helper |
|
||||||
**File:** `explorer_helpers.py` |
|
||||||
**Test:** `tests/test_inspect_positions_for_issues.py` |
|
||||||
**Depends:** none |
|
||||||
|
|
||||||
Helpers to add (names only): |
|
||||||
- inspect_positions_for_issues(positions_by_window: Dict[str, Dict[str, Tuple[float,float]]], party_map: Dict[str,str]) -> Dict[str, Any] |
|
||||||
|
|
||||||
What it returns (documented in test expectations): |
|
||||||
- windows_count: int |
|
||||||
- window_labels: list[str] (sorted sample of window keys) |
|
||||||
- mp_id_set: set[str] (set of entity ids seen across windows) |
|
||||||
- party_map_count: int (len(party_map)) |
|
||||||
- parties_with_centroid_counts: Dict[str, int] (mapping party -> number of windows with a centroid) |
|
||||||
- mismatched_mp_ids_sample: list[str] (sample of ids present in positions but not in party_map, up to 10) |
|
||||||
|
|
||||||
Tests to add (exact assertions): |
|
||||||
- tests/test_inspect_positions_for_issues.py (unit): |
|
||||||
- Construct synthetic positions_by_window with 3 windows, with some MPs missing in some windows and some mp ids that aren't in party_map. Assert returned windows_count == 3, party_map_count equals len(party_map), parties_with_centroid_counts entries for expected parties, and mismatched_mp_ids_sample contains the expected missing keys. |
|
||||||
|
|
||||||
Verify: |
|
||||||
- Run: `pytest tests/test_inspect_positions_for_issues.py -q` |
|
||||||
- Expected: PASS |
|
||||||
|
|
||||||
Commit message: `feat(explorer): add inspect_positions_for_issues helper + test` |
|
||||||
|
|
||||||
### Task 1.2: Add compute_party_centroids (per-window aligned arrays) |
|
||||||
**File:** `explorer_helpers.py` (same file; add new function) |
|
||||||
**Test:** `tests/test_compute_party_centroids.py` |
|
||||||
**Depends:** none |
|
||||||
|
|
||||||
Helper to add (name only): |
|
||||||
- compute_party_centroids(positions_by_window: Dict[str, Dict[str, Tuple[float,float]]], party_map: Dict[str,str], windows: List[str]) -> Tuple[Dict[str, List[float]], Dict[str, Any]] |
|
||||||
|
|
||||||
Behavior contract (for implementer): |
|
||||||
- Return party_centroids: dict[party -> list[float|np.nan]] aligned to the provided windows order. For a party and window where no MPs present, insert np.nan at that index. |
|
||||||
- Return metadata: {"per_party_counts": {party: int}, "total_windows": int, "parties": sorted_list} |
|
||||||
- Guarantees: never return None; party lists can be empty list but must have length == len(windows) for parties present in `parties` list. |
|
||||||
|
|
||||||
Tests to add (exact assertions): |
|
||||||
- tests/test_compute_party_centroids.py (unit): |
|
||||||
- Case A: full coverage — every party has coords in every window -> assert no np.nan and lengths equal windows count. |
|
||||||
- Case B: partial coverage -> assert np.nan present at expected indices and metadata.per_party_counts match counts. |
|
||||||
- Case C: no parties (empty positions_by_window) -> party_centroids == {} and metadata.total_windows == len(windows) |
|
||||||
|
|
||||||
Verify: |
|
||||||
- Run: `pytest tests/test_compute_party_centroids.py -q` |
|
||||||
- Expected: PASS |
|
||||||
|
|
||||||
Commit message: `feat(explorer): add compute_party_centroids to produce aligned per-party arrays` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Batch 2: Core Modules (parallel - 2 implementers) |
|
||||||
All tasks depend on Batch 1. |
|
||||||
|
|
||||||
### Task 2.1: Modify explorer.py to use helpers and add MP fallback |
|
||||||
**File:** `explorer.py` (modify function build_trajectories_tab only) |
|
||||||
**Test:** `tests/test_build_trajectories_tab_fallback.py` |
|
||||||
**Depends:** 1.1, 1.2 |
|
||||||
|
|
||||||
Changes to make (high-level, exact function to modify): |
|
||||||
- modify build_trajectories_tab(db_path: str, window_size: str) to: |
|
||||||
- early: call inspect_positions_for_issues(positions_by_window, party_map) and render the compact DEBUG expander content (same keys as the inspector returns). Keep the expander collapsed by default. |
|
||||||
- replace existing per-window centroid construction with compute_party_centroids(...) which returns aligned arrays containing np.nan placeholders. |
|
||||||
- relax party-selection filtering: treat a party as plottable if it has >= 1 non-nan centroid (previous code required full coverage). This ensures partial traces still render with gaps. |
|
||||||
- preserve hover customdata to include raw centroid values (already present in code) — ensure when centroids contain np.nan for raw values we still populate customdata with (np.nan, np.nan). |
|
||||||
- If no party centroids (empty dict or all-party centroid vectors are entirely nan), trigger MP fallback: plot top-K MPs (EXPLORER_MP_FALLBACK_COUNT, default 20) as per design. This fallback must show a small banner message in Dutch: "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." and provide a toggle (st.checkbox) to expand to show the full top-K list. |
|
||||||
|
|
||||||
Notes / gap-filling decisions (explicit): |
|
||||||
- EXPLORER_MP_FALLBACK_COUNT: implement read via int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20")) |
|
||||||
- For selecting top-K MPs: use seat_count if present in mp_metadata (query `mp_metadata` for a seat_count-like field). If unavailable, choose MPs with most non-empty positions across windows. Implementer decision: compute activity = number of windows with a valid (non-None) position and sort descending. |
|
||||||
|
|
||||||
Tests to add (integration, shims-friendly): |
|
||||||
- tests/test_build_trajectories_tab_fallback.py |
|
||||||
- Scenario 1 (party centroids present): Provide a fake positions_by_window and party_map fixture with at least one party having centroids in multiple windows and assert that when build_trajectories_tab is invoked (call the internal plotting branch with a test harness) it adds at least one trace (fig.data length > 0) and trace names match selected parties. |
|
||||||
- Scenario 2 (no party centroids): Provide positions_by_window where party_map is empty or all MPs map to Unknown; assert the MP fallback path is chosen (method returns or builds fig with MPs) and that the banner message string appears in returned metadata or printed UI stub. Since Streamlit is not easily invoked in unit tests, structure the UI branch so the plotting logic returns fig when called from tests — write the test to import a small internal helper (e.g., build_trajectories_figure_for_test) if necessary. If refactor needed, keep it minimal: extract plotting assembly to a private helper _assemble_trajectories_figure(...) that returns (fig, trace_count, banner_text) so tests can assert fig traces without needing Streamlit. |
|
||||||
|
|
||||||
Verify (unit/integration): |
|
||||||
- Run: `pytest tests/test_build_trajectories_tab_fallback.py -q` |
|
||||||
- Expected: PASS |
|
||||||
|
|
||||||
Commit message: `feat(explorer): use inspector & compute_party_centroids; add MP top-K fallback and debug expander` |
|
||||||
|
|
||||||
### Task 2.2: Add/adjust unit tests for hover/raw values and NaN handling |
|
||||||
**File:** `tests/test_explorer_helpers.py` (update) and `tests/test_explorer_chart.py` (add test) |
|
||||||
**Depends:** 1.2 |
|
||||||
|
|
||||||
Changes/tests to add (exact tests): |
|
||||||
- tests/test_explorer_helpers.py: add a test verifying compute_party_centroids produces np.nan for missing windows and that hover customdata creation uses (float, float) or (np.nan, np.nan) consistently. |
|
||||||
- tests/test_explorer_chart.py: add a small unit test that constructs a go.Figure via the new plotting helper (see 2.1) and asserts: |
|
||||||
- traces exist when parties have partial coverage |
|
||||||
- customdata arrays length equals x/y arrays length |
|
||||||
- hovertemplate contains both smoothed and raw placeholder markers (strings like 'x (raw)') |
|
||||||
|
|
||||||
Verify: |
|
||||||
- Run: `pytest tests/test_explorer_helpers.py::test_compute_party_centroids_nan_handling -q` |
|
||||||
- Run: `pytest tests/test_explorer_chart.py::test_partial_party_traces -q` |
|
||||||
- Expected: PASS |
|
||||||
|
|
||||||
Commit message: `test(explorer): add tests for NaN gaps and hover customdata preservation` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Batch 3: Integration & Manual UI checks (parallel - 2 implementers) |
|
||||||
Depends on Batch 2 |
|
||||||
|
|
||||||
### Task 3.1: Integration test (shim-friendly) for three scenarios |
|
||||||
**File:** `tests/integration/test_trajectories_ui_integration.py` |
|
||||||
**Test:** the file above |
|
||||||
**Depends:** 2.1, 2.2 |
|
||||||
|
|
||||||
Tests to add (exact scenarios): |
|
||||||
- Scenario A (full party centroids): positions_by_window with full coverage — assert plot built uses party traces; simulate user selection to include at least one party; assert fig.data length >= 1. |
|
||||||
- Scenario B (party centroids missing): party_map empty — assert MP fallback chosen and number of plotted MP traces == EXPLORER_MP_FALLBACK_COUNT or the available MPs if fewer. |
|
||||||
- Scenario C (partial centroids): party centroids partial across windows — assert traces exist and customdata shows np.nan at missing indices. |
|
||||||
|
|
||||||
Test harness notes: tests should import small pure helpers from explorer.py that assemble figures without calling st.plotly_chart or other Streamlit side-effects. If necessary, add a small refactor in explorer.py: `_assemble_trajectory_figure_for_tests(positions_by_window, party_centroids, selected_parties, windows, smooth_alpha, ...) -> go.Figure, metadata` and call that from build_trajectories_tab. Tests then call this helper. Keep the helper private and minimal. |
|
||||||
|
|
||||||
Verify: |
|
||||||
- Run: `pytest tests/integration/test_trajectories_ui_integration.py -q` |
|
||||||
- Expected: PASS |
|
||||||
|
|
||||||
Commit message: `test(integration): trajectories UI integration scenarios (full/partial/missing)` |
|
||||||
|
|
||||||
### Task 3.2: Manual Streamlit verification steps (documented) |
|
||||||
**File:** none (manual steps below); include in PR description. |
|
||||||
**Depends:** 2.1 |
|
||||||
|
|
||||||
Manual verification (Streamlit): |
|
||||||
1. Start Streamlit: `streamlit run explorer.py --server.headless true` (or run locally with a test DB path) |
|
||||||
2. Open the app in browser (usually http://localhost:8501). Go to tab "Partij Trajectories". |
|
||||||
3. Scenario: normal DB with party centroids |
|
||||||
- Select a recent window_size (e.g., quarterly or annual as appropriate) |
|
||||||
- Ensure default parties (CDA, D66, VVD) appear and trajectories are visible. |
|
||||||
- Hover on a trace point: verify hover shows both smoothed and raw centroid values (x (smoothed), x (raw)). |
|
||||||
- Open the DEBUG expander (collapsed by default) and confirm it shows `windows (count)`, `windows sample`, `party_map entries`, `parties with centroids`, `sample centroid window counts per party`. |
|
||||||
4. Scenario: simulate missing party centroids (set party_map to {} or use a DB snapshot with missing mp_metadata) |
|
||||||
- The app should show the fallback banner: "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." and render MP trajectories (top-K). There should be a checkbox to expand the top-K list. |
|
||||||
5. Scenario: partial centroids |
|
||||||
- For a party missing centroids in some windows, its trace should appear but with gaps (line discontinuity where NaNs present). Hover customdata at gap points should show raw value `nan` or a placeholder. |
|
||||||
|
|
||||||
Streamlit-specific acceptance criteria: |
|
||||||
- traces drawn when at least one party has >=1 centroid |
|
||||||
- MP fallback automatically displayed (banner + plotted MP traces) when no party centroids |
|
||||||
- DEBUG expander shows diagnostics described above |
|
||||||
- Hover shows raw centroid values even when smoothing is applied |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Files to create / modify (one-file-per-task mapping) |
|
||||||
|
|
||||||
Batch 1 |
|
||||||
- Modify: `explorer_helpers.py` — add functions: |
|
||||||
- inspect_positions_for_issues |
|
||||||
- compute_party_centroids |
|
||||||
- Add test: `tests/test_inspect_positions_for_issues.py` |
|
||||||
- Add test: `tests/test_compute_party_centroids.py` |
|
||||||
|
|
||||||
Batch 2 |
|
||||||
- Modify: `explorer.py` — function build_trajectories_tab; optional small private helper `_assemble_trajectory_figure_for_tests` (single-file change) |
|
||||||
- Add test: `tests/test_build_trajectories_tab_fallback.py` |
|
||||||
- Update/add tests: `tests/test_explorer_helpers.py` (augment), `tests/test_explorer_chart.py` |
|
||||||
|
|
||||||
Batch 3 |
|
||||||
- Add test: `tests/integration/test_trajectories_ui_integration.py` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Verification commands (unit & CI) |
|
||||||
- Unit test single file: `pytest tests/test_inspect_positions_for_issues.py -q` |
|
||||||
- Unit test compute party centroids: `pytest tests/test_compute_party_centroids.py -q` |
|
||||||
- Trajectories fallback unit tests: `pytest tests/test_build_trajectories_tab_fallback.py -q` |
|
||||||
- Integration tests (shim-friendly): `pytest tests/integration/test_trajectories_ui_integration.py -q` |
|
||||||
- Run full test suite: `pytest -q` |
|
||||||
|
|
||||||
Manual Streamlit checks: follow steps in Task 3.2 above. Recommended quick dev workflow: |
|
||||||
- Start streamlit: `streamlit run explorer.py --server.headless true` |
|
||||||
- Use the URL printed in console (usually http://localhost:8501) and perform the manual steps. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Blocked / Unblocked checklist |
|
||||||
|
|
||||||
- [ ] Blocker: Access to a representative DB fixture (small DuckDB or JSON fixture) that contains windows, svd_vectors and mp_metadata. Without it, integration/manual checks are limited. (Mitigation: tests use synthetic positions_by_window and party_map fixtures — unblocked for unit tests.) |
|
||||||
- [ ] Blocker: If MP seat_count is required from DB and not present in test fixtures, fallback selection will use activity-based ranking. (Mitigation: implement activity fallback.) |
|
||||||
- [x] Unblocked: Adding pure helpers in explorer_helpers.py (unit tests cover behavior without Streamlit/duckdb) |
|
||||||
- [x] Unblocked: Modifying build_trajectories_tab to call helpers and add banner + expander (code-local change) |
|
||||||
- [ ] Optional: Agree on EXPLORER_MP_FALLBACK_COUNT envvar default (I set default 20). If you want a different default, tell me now. |
|
||||||
|
|
||||||
If any of the above blockers remain, proceed with unit tests and open a PR discussion for integration DB fixtures. |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Estimated timeline (hours) |
|
||||||
|
|
||||||
- Task 1.1 (inspect_positions_for_issues + unit test): 1.5 h |
|
||||||
- Task 1.2 (compute_party_centroids + unit tests): 3.0 h |
|
||||||
- Task 2.1 (explorer.py changes: wiring, MP fallback, debug expander): 4.0 h |
|
||||||
- Task 2.2 (tests for hover/NaN handling): 2.0 h |
|
||||||
- Task 3.1 (integration tests / small refactor helper): 2.5 h |
|
||||||
- Task 3.2 (manual Streamlit QA and documentation): 1.5 h |
|
||||||
- PR polish, CI tweaks, and addressing review comments: 2.0 h |
|
||||||
|
|
||||||
Total: 16.5 hours (approx) |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## PR checklist / deliverables |
|
||||||
- [ ] Unit tests for inspector and centroids pass |
|
||||||
- [ ] build_trajectories_tab updated with debug expander and fallback |
|
||||||
- [ ] Integration tests for three scenarios pass (or documented reason for partial coverage) |
|
||||||
- [ ] Manual Streamlit QA steps documented in PR and verified locally |
|
||||||
- [ ] Add mention of EXPLORER_MP_FALLBACK_COUNT to README or environment docs (optional follow-up) |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
If you'd like, I can now (A) produce the concrete test contents and minimal helper implementations as separate micro-tasks (one file + one test per task) ready for implementers, or (B) proceed to create and apply the code changes in this repo. Which do you prefer? |
|
||||||
@ -1,288 +0,0 @@ |
|||||||
# Trajectory Plots Not Showing — Debugging Plan |
|
||||||
|
|
||||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. |
|
||||||
|
|
||||||
**Goal:** Identify why trajectory plots are invisible or empty in the Streamlit Explorer UI, then fix the root cause. |
|
||||||
|
|
||||||
**Architecture:** Systematic step-by-step pipeline trace from UI → DB. Each stage has explicit "what should I see" checkpoints so we can pinpoint exactly where data becomes invisible. |
|
||||||
|
|
||||||
**Tech Stack:** Streamlit, Plotly, DuckDB, Python ≥3.13, uv |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Debugging Pipeline (Stage-by-Stage Checkpoints) |
|
||||||
|
|
||||||
``` |
|
||||||
┌─────────────────────────────────────────────────────────────────────────┐ |
|
||||||
│ STAGE 0: UI Layer — what does the user actually see? │ |
|
||||||
│ explorer.py → build_trajectories_tab() │ |
|
||||||
│ → Is the tab visible? Empty chart? Error message? No chart at all? │ |
|
||||||
└─────────────────────────────────────────────────────────────────────────┘ |
|
||||||
↓ |
|
||||||
┌─────────────────────────────────────────────────────────────────────────┐ |
|
||||||
│ STAGE 1: positions_by_window — are MP positions loaded? │ |
|
||||||
│ load_positions(db_path, "annual") │ |
|
||||||
│ → Expected: 12 windows, ~150-200 MPs per window │ |
|
||||||
│ → Check: _last_trajectories_diagnostics["stage"] │ |
|
||||||
└─────────────────────────────────────────────────────────────────────────┘ |
|
||||||
↓ |
|
||||||
┌─────────────────────────────────────────────────────────────────────────┐ |
|
||||||
│ STAGE 2: party_map — are MP→party mappings loaded? │ |
|
||||||
│ load_party_map(db_path) │ |
|
||||||
│ → Expected: ~1036 entries │ |
|
||||||
│ → Check: party_map is non-empty dict │ |
|
||||||
└─────────────────────────────────────────────────────────────────────────┘ |
|
||||||
↓ |
|
||||||
┌─────────────────────────────────────────────────────────────────────────┐ |
|
||||||
│ STAGE 3: party centroids — are party means computed? │ |
|
||||||
│ compute_party_centroids() / compute_party_coords() │ |
|
||||||
│ → Expected: CDA, D66, VVD, PVV, SP, GroenLinks-PvdA centroids exist │ |
|
||||||
│ → Check: plottable_parties > 0 │ |
|
||||||
└─────────────────────────────────────────────────────────────────────────┘ |
|
||||||
↓ |
|
||||||
┌─────────────────────────────────────────────────────────────────────────┐ |
|
||||||
│ STAGE 4: select_trajectory_plot_data — does it return traces? │ |
|
||||||
│ → Expected: fig with 3-6 colored scatter traces, trace_count > 0 │ |
|
||||||
│ → Check: banner_text is None (no fallback), trace_count ≥ 3 │ |
|
||||||
└─────────────────────────────────────────────────────────────────────────┘ |
|
||||||
↓ |
|
||||||
┌─────────────────────────────────────────────────────────────────────────┐ |
|
||||||
│ STAGE 5: Plotly render — is the figure rendered in the browser? │ |
|
||||||
│ st.plotly_chart(fig, use_container_width=True) │ |
|
||||||
│ → Expected: visible chart with colored party lines │ |
|
||||||
│ → Check: browser DOM, no JS errors │ |
|
||||||
└─────────────────────────────────────────────────────────────────────────┘ |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 1: Instrument the app to print real-time pipeline state |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Modify: `explorer.py` (add print statements at each stage) |
|
||||||
- Test: Run `uv run streamlit run explorer.py` with `EXPLORER_DEBUG_TRAJECTORIES=1` |
|
||||||
|
|
||||||
- [ ] **Step 1: Add stage-0 checkpoint at top of `build_trajectories_tab`** |
|
||||||
|
|
||||||
Read `explorer.py` lines 1601-1650. Add a print statement at the start of `build_trajectories_tab`: |
|
||||||
|
|
||||||
```python |
|
||||||
print(f"[TRAJ DEBUG] build_trajectories_tab called — db_path={db_path}, window_size={window_size}") |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Add stage-1 checkpoint after `load_positions`** |
|
||||||
|
|
||||||
Read `explorer.py` lines 1605-1610. After the call to `load_positions`, add: |
|
||||||
|
|
||||||
```python |
|
||||||
positions_by_window, axis_def = load_positions(db_path, window_size) |
|
||||||
print(f"[TRAJ DEBUG] load_positions → {len(positions_by_window)} windows, " |
|
||||||
f"total MPs={sum(len(v) for v in positions_by_window.values())}") |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 3: Add stage-2 checkpoint after `load_party_map`** |
|
||||||
|
|
||||||
Read `explorer.py` lines 1638-1642. After the call to `load_party_map`, add: |
|
||||||
|
|
||||||
```python |
|
||||||
party_map = load_party_map(db_path) |
|
||||||
print(f"[TRAJ DEBUG] load_party_map → {len(party_map)} entries, " |
|
||||||
f"sample={list(party_map.items())[:3]}") |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 4: Add stage-3 checkpoint after centroid computation** |
|
||||||
|
|
||||||
Read `explorer.py` lines 1641-1670. After the inline centroid loop, add: |
|
||||||
|
|
||||||
```python |
|
||||||
all_parties = sorted(set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs) - {None, "Unknown"}) |
|
||||||
print(f"[TRAJ DEBUG] all_parties (raw from party_map) → {len(all_parties)} parties: {all_parties[:10]}") |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 5: Add stage-4 checkpoint before `st.plotly_chart`** |
|
||||||
|
|
||||||
Read `explorer.py` around line 2105. Before the `st.plotly_chart` call, add: |
|
||||||
|
|
||||||
```python |
|
||||||
print(f"[TRAJ DEBUG] About to render plotly chart — trace_count={trace_count}, " |
|
||||||
f"banner={banner_text}, fig has {len(fig.data)} traces") |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 6: Run the app and capture all debug output** |
|
||||||
|
|
||||||
```bash |
|
||||||
EXPLORER_DEBUG_TRAJECTORIES=1 uv run streamlit run explorer.py 2>&1 | grep TRAJ |
|
||||||
``` |
|
||||||
|
|
||||||
Expected output (all stages should print): |
|
||||||
``` |
|
||||||
[TRAJ DEBUG] build_trajectories_tab called — db_path=..., window_size=annual |
|
||||||
[TRAJ DEBUG] load_positions → 12 windows, total MPs=... |
|
||||||
[TRAJ DEBUG] load_party_map → 1036 entries, sample=[(...), (...), (...) ] |
|
||||||
[TRAJ DEBUG] all_parties (raw from party_map) → N parties: [...] |
|
||||||
[TRAJ DEBUG] About to render plotly chart — trace_count=N, banner=None, fig has N traces |
|
||||||
``` |
|
||||||
|
|
||||||
**If any stage is missing or shows 0/empty, that's the bug location. Document which stage fails and proceed to the corresponding fix task.** |
|
||||||
|
|
||||||
- [ ] **Step 7: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add explorer.py |
|
||||||
git commit -m "chore: add TRAJ DEBUG print checkpoints to build_trajectories_tab" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 2: Fix each failure mode |
|
||||||
|
|
||||||
Based on Task 1 output, one of these will be the culprit: |
|
||||||
|
|
||||||
### Failure Mode A: `positions_by_window` is empty (Stage 1) |
|
||||||
|
|
||||||
**Symptom:** `load_positions → 0 windows` |
|
||||||
|
|
||||||
**Root causes to check:** |
|
||||||
- `get_uniform_dim_windows` returns `[]` (no dim-50 windows in DB) |
|
||||||
- `compute_2d_axes` silently fails on all windows |
|
||||||
- DB path is wrong or `data/motions.db` is missing |
|
||||||
|
|
||||||
**Fix:** |
|
||||||
- [ ] Run: `uv run python -c "from explorer import get_uniform_dim_windows; print(get_uniform_dim_windows('data/motions.db'))"` |
|
||||||
- [ ] If empty: query DB directly — `uv run duckdb data/motions.db "SELECT COUNT(*) FROM svd_vectors WHERE entity_type='mp'"` and check dimension distribution |
|
||||||
- [ ] If `compute_2d_axes` fails: add try/except with print at `explorer.py:584` |
|
||||||
- [ ] If DB path wrong: fix `run_app()` to resolve relative path |
|
||||||
|
|
||||||
### Failure Mode B: `party_map` is empty (Stage 2) |
|
||||||
|
|
||||||
**Symptom:** `load_party_map → 0 entries` |
|
||||||
|
|
||||||
**Root causes:** |
|
||||||
- `mp_metadata` and `mp_votes` tables are empty or missing |
|
||||||
- DuckDB connection fails |
|
||||||
- DB path points to wrong file |
|
||||||
|
|
||||||
**Fix:** |
|
||||||
- [ ] Run: `uv run python -c "from analysis.visualize import _load_party_map; print(len(_load_party_map('data/motions.db')))"` |
|
||||||
- [ ] If 0: query `SELECT COUNT(*) FROM mp_metadata`, `SELECT COUNT(*) FROM mp_votes` |
|
||||||
- [ ] If tables missing: run data pipeline to populate them |
|
||||||
- [ ] If DuckDB fails to import: check `pip install duckdb` in the uv environment |
|
||||||
|
|
||||||
### Failure Mode C: `all_parties` is empty (Stage 3) |
|
||||||
|
|
||||||
**Symptom:** `all_parties (raw from party_map) → 0 parties` |
|
||||||
|
|
||||||
**Root causes:** |
|
||||||
- All MP names in `positions_by_window` have no match in `party_map` (name mismatch) |
|
||||||
- Every MP maps to `"Unknown"` or `None` |
|
||||||
|
|
||||||
**Fix:** |
|
||||||
- [ ] Run: `uv run python -c "from explorer import load_positions, load_party_map; pw = load_positions('data/motions.db', 'annual')[0]; pm = load_party_map('data/motions.db'); sample_mps = list(pw[list(pw.keys())[0]].keys())[:5]; print({mp: pm.get(mp, 'NO MATCH') for mp in sample_mps})"` |
|
||||||
- [ ] If name mismatches: investigate `_strip_paren` fallback logic in `compute_party_coords` (explorer_helpers.py:165-170) |
|
||||||
- [ ] If too many mismatches: add name normalization (strip titles, standardize suffixes) |
|
||||||
- [ ] Commit fix with test |
|
||||||
|
|
||||||
### Failure Mode D: `trace_count == 0` (Stage 4) |
|
||||||
|
|
||||||
**Symptom:** `About to render plotly chart — trace_count=0` or `banner != None` |
|
||||||
|
|
||||||
**Root causes:** |
|
||||||
- All party centroids are NaN (every MP position is NaN) |
|
||||||
- `compute_party_coords` filters out all parties (NaN/Inf in all positions) |
|
||||||
- `select_trajectory_plot_data` falls back to MP trajectories but MP fallback also fails |
|
||||||
|
|
||||||
**Fix:** |
|
||||||
- [ ] Add debug print inside `compute_party_coords`: `print(f"[TRAJ DEBUG] compute_party_coords window={window_id} → {len(party_coords)} parties: {list(party_coords.keys())[:5]}")` |
|
||||||
- [ ] Check if NaN comes from `compute_2d_axes` output (PCA on svd_vectors) |
|
||||||
- [ ] Run: `uv run python -c "from explorer import load_positions; pw = load_positions('data/motions.db', 'annual')[0]; win = list(pw.values())[0]; sample = list(win.items())[:3]; print({k: v for k, v in sample})"` — if all values are `(nan, nan)`, the PCA step is producing NaN |
|
||||||
- [ ] If PCA produces NaN: check `analysis/political_axis.py:compute_2d_axes` for the specific window's SVD vectors |
|
||||||
|
|
||||||
### Failure Mode E: Chart not visible in browser (Stage 5) |
|
||||||
|
|
||||||
**Symptom:** All stages pass but chart is blank in browser |
|
||||||
|
|
||||||
**Root causes:** |
|
||||||
- Plotly `fig` is empty (no traces added to figure) |
|
||||||
- Streamlit `st.plotly_chart` suppressed by CSS/JS error |
|
||||||
- Container width is 0 (layout issue) |
|
||||||
|
|
||||||
**Fix:** |
|
||||||
- [ ] Add debug print: `print(f"[TRAJ DEBUG] st.plotly_chart called with fig.data={[(t.mode, len(t.x), len(t.y)) for t in fig.data]}")` |
|
||||||
- [ ] Check browser console for JavaScript errors (Plotly.js errors) |
|
||||||
- [ ] Check if `use_container_width=True` causes issues — try `use_container_width=False` |
|
||||||
- [ ] Add `st.write(fig)` as alternative to `st.plotly_chart` for debugging |
|
||||||
|
|
||||||
### Failure Mode F: All stages pass, chart still shows blank |
|
||||||
|
|
||||||
**Symptom:** `trace_count > 0` but chart looks empty to user |
|
||||||
|
|
||||||
**Root causes:** |
|
||||||
- All traces are transparent/white-on-white |
|
||||||
- X/Y axes have huge range and all data is in a tiny corner |
|
||||||
- Party lines overlap completely (all parties at same position) |
|
||||||
|
|
||||||
**Fix:** |
|
||||||
- [ ] Print axis ranges: `print(f"[TRAJ DEBUG] xaxis range={[fig.layout.xaxis.range] if fig.layout.xaxis.range else 'auto'}, yaxis range={[fig.layout.yaxis.range] if fig.layout.yaxis.range else 'auto'}")` |
|
||||||
- [ ] Check if centroids are all at `(0, 0)` — run: `uv run python -c "from explorer import load_positions, load_party_map; from explorer_helpers import compute_party_coords; ..."` |
|
||||||
- [ ] Check if PARTY_COLOURS assignment is broken (all traces same color) |
|
||||||
- [ ] Verify window ordering is correct (chronological left-to-right) |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Task 3: Write regression test |
|
||||||
|
|
||||||
**Files:** |
|
||||||
- Create: `tests/test_trajectories_pipeline_integration.py` |
|
||||||
|
|
||||||
- [ ] **Step 1: Write integration test** |
|
||||||
|
|
||||||
```python |
|
||||||
"""Integration test: full trajectory pipeline produces non-empty plot.""" |
|
||||||
from explorer import load_positions, load_party_map |
|
||||||
from explorer_helpers import compute_party_centroids |
|
||||||
from explorer import select_trajectory_plot_data |
|
||||||
|
|
||||||
def test_trajectory_pipeline_produces_traces(): |
|
||||||
db_path = "data/motions.db" |
|
||||||
window_size = "annual" |
|
||||||
|
|
||||||
positions_by_window, _ = load_positions(db_path, window_size) |
|
||||||
party_map = load_party_map(db_path) |
|
||||||
windows = list(positions_by_window.keys()) |
|
||||||
|
|
||||||
centroids, mp_positions = compute_party_centroids(positions_by_window, party_map, windows) |
|
||||||
fig, trace_count, banner = select_trajectory_plot_data( |
|
||||||
positions_by_window, party_map, windows, |
|
||||||
selected_parties=list(centroids.keys())[:6], |
|
||||||
smooth_alpha=0.35, |
|
||||||
) |
|
||||||
|
|
||||||
assert trace_count > 0, f"Expected traces but got trace_count={trace_count}, banner={banner}" |
|
||||||
assert banner is None, f"Expected no fallback banner but got: {banner}" |
|
||||||
assert len(fig.data) == trace_count |
|
||||||
``` |
|
||||||
|
|
||||||
- [ ] **Step 2: Run the test** |
|
||||||
|
|
||||||
```bash |
|
||||||
uv run pytest tests/test_trajectories_pipeline_integration.py -v |
|
||||||
``` |
|
||||||
|
|
||||||
Expected: PASS |
|
||||||
|
|
||||||
- [ ] **Step 3: Commit** |
|
||||||
|
|
||||||
```bash |
|
||||||
git add tests/test_trajectories_pipeline_integration.py |
|
||||||
git commit -m "test: add trajectory pipeline integration test" |
|
||||||
``` |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
## Execution Order |
|
||||||
|
|
||||||
1. **Task 1 first** — Run the instrumented app and capture which stage fails |
|
||||||
2. **Task 2** — Fix the specific failure mode based on Task 1 output |
|
||||||
3. **Task 3** — Write regression test once the fix is confirmed |
|
||||||
|
|
||||||
**Estimated time:** 15-30 minutes for Task 1 (identifying the stage), 10-30 minutes for Task 2 fix (depends on which mode), 5 minutes for Task 3. |
|
||||||
@ -1,4 +0,0 @@ |
|||||||
# Placeholder list of files containing 'As 1' or 'As 2' |
|
||||||
explorer.py: (several locations) |
|
||||||
analysis/axis_classifier.py: (fallbacks) |
|
||||||
templates/ui.md: (example) |
|
||||||
Loading…
Reference in new issue