|
|
|
@ -0,0 +1,269 @@ |
|
|
|
|
|
|
|
"""Axis classifier: correlate per-party PCA positions against ideology reference data |
|
|
|
|
|
|
|
to assign honest, dynamic labels to political compass axes. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Public API: classify_axes(positions_by_window, axes, db_path) -> dict |
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import logging |
|
|
|
|
|
|
|
from collections import Counter |
|
|
|
|
|
|
|
from pathlib import Path |
|
|
|
|
|
|
|
from typing import Dict, List, Optional, Tuple |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Module-level caches — loaded once per process lifetime. |
|
|
|
|
|
|
|
_ideology_cache: Optional[Dict[str, Dict[str, float]]] = None |
|
|
|
|
|
|
|
_coalition_cache: Optional[Dict[str, set]] = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Correlation threshold above which we consider an axis "explained" by a dimension. |
|
|
|
|
|
|
|
_THRESHOLD = 0.65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_LABELS = { |
|
|
|
|
|
|
|
"lr": "Links\u2013Rechts", |
|
|
|
|
|
|
|
"co": "Coalitie\u2013Oppositie", |
|
|
|
|
|
|
|
"pc": "Progressief\u2013Conservatief", |
|
|
|
|
|
|
|
"fallback_x": "Stempatroon As 1", |
|
|
|
|
|
|
|
"fallback_y": "Stempatroon As 2", |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_INTERPRETATION_TEMPLATES = { |
|
|
|
|
|
|
|
"lr": "De {orientation} as weerspiegelt de klassieke links-rechts tegenstelling.", |
|
|
|
|
|
|
|
"co": ( |
|
|
|
|
|
|
|
"De {orientation} as weerspiegelt stemgedrag van coalitie- versus " |
|
|
|
|
|
|
|
"oppositiepartijen (r={r:.2f}). Links-rechts is minder dominant dit jaar." |
|
|
|
|
|
|
|
), |
|
|
|
|
|
|
|
"pc": "De {orientation} as weerspiegelt de progressief-conservatieve tegenstelling.", |
|
|
|
|
|
|
|
"fallback": ( |
|
|
|
|
|
|
|
"De {orientation} as weerspiegelt een empirisch stempatroon " |
|
|
|
|
|
|
|
"zonder duidelijke ideologische richting." |
|
|
|
|
|
|
|
), |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _load_ideology(csv_path: Path) -> Dict[str, Dict[str, float]]: |
|
|
|
|
|
|
|
"""Load party ideology scores from CSV. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns {party_name: {"left_right": float, "progressive": float}}. |
|
|
|
|
|
|
|
Returns {} on any error (caller should treat empty as 'skip classification'). |
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
global _ideology_cache |
|
|
|
|
|
|
|
if _ideology_cache is not None: |
|
|
|
|
|
|
|
return _ideology_cache |
|
|
|
|
|
|
|
result: Dict[str, Dict[str, float]] = {} |
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
with open(csv_path, encoding="utf-8") as fh: |
|
|
|
|
|
|
|
lines = fh.read().splitlines() |
|
|
|
|
|
|
|
header = [h.strip() for h in lines[0].split(",")] |
|
|
|
|
|
|
|
lr_idx = header.index("left_right") |
|
|
|
|
|
|
|
pc_idx = header.index("progressive") |
|
|
|
|
|
|
|
for line in lines[1:]: |
|
|
|
|
|
|
|
if not line.strip(): |
|
|
|
|
|
|
|
continue |
|
|
|
|
|
|
|
parts = [p.strip() for p in line.split(",")] |
|
|
|
|
|
|
|
if len(parts) <= max(lr_idx, pc_idx): |
|
|
|
|
|
|
|
continue |
|
|
|
|
|
|
|
result[parts[0]] = { |
|
|
|
|
|
|
|
"left_right": float(parts[lr_idx]), |
|
|
|
|
|
|
|
"progressive": float(parts[pc_idx]), |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
except FileNotFoundError: |
|
|
|
|
|
|
|
_logger.warning( |
|
|
|
|
|
|
|
"party_ideologies.csv not found at %s — axis labels will be generic", |
|
|
|
|
|
|
|
csv_path, |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
return {} |
|
|
|
|
|
|
|
except Exception as exc: |
|
|
|
|
|
|
|
_logger.warning("Failed to load party_ideologies.csv: %s", exc) |
|
|
|
|
|
|
|
return {} |
|
|
|
|
|
|
|
_ideology_cache = result |
|
|
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _load_coalition(csv_path: Path) -> Dict[str, set]: |
|
|
|
|
|
|
|
"""Load coalition membership from CSV. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns {window_id: set_of_party_names}. |
|
|
|
|
|
|
|
Returns {} on any error (coalition dimension will be skipped). |
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
global _coalition_cache |
|
|
|
|
|
|
|
if _coalition_cache is not None: |
|
|
|
|
|
|
|
return _coalition_cache |
|
|
|
|
|
|
|
result: Dict[str, set] = {} |
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
with open(csv_path, encoding="utf-8") as fh: |
|
|
|
|
|
|
|
lines = fh.read().splitlines() |
|
|
|
|
|
|
|
for line in lines[1:]: |
|
|
|
|
|
|
|
if not line.strip(): |
|
|
|
|
|
|
|
continue |
|
|
|
|
|
|
|
parts = [p.strip() for p in line.split(",")] |
|
|
|
|
|
|
|
if len(parts) < 2: |
|
|
|
|
|
|
|
continue |
|
|
|
|
|
|
|
wid, party = parts[0], parts[1] |
|
|
|
|
|
|
|
result.setdefault(wid, set()).add(party) |
|
|
|
|
|
|
|
except FileNotFoundError: |
|
|
|
|
|
|
|
_logger.warning( |
|
|
|
|
|
|
|
"coalition_membership.csv not found at %s — coalition axis detection disabled", |
|
|
|
|
|
|
|
csv_path, |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
return {} |
|
|
|
|
|
|
|
except Exception as exc: |
|
|
|
|
|
|
|
_logger.warning("Failed to load coalition_membership.csv: %s", exc) |
|
|
|
|
|
|
|
return {} |
|
|
|
|
|
|
|
_coalition_cache = result |
|
|
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _window_year(window_id: str) -> Optional[str]: |
|
|
|
|
|
|
|
"""Extract year string from window_id. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns None for 'current_parliament'. |
|
|
|
|
|
|
|
'2016' → '2016', '2016-Q3' → '2016'. |
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
if window_id == "current_parliament": |
|
|
|
|
|
|
|
return None |
|
|
|
|
|
|
|
return window_id.split("-")[0] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _pearsonr(x: List[float], y: List[float]) -> float: |
|
|
|
|
|
|
|
"""Pearson r; returns 0.0 for degenerate input (< 3 points or zero variance).""" |
|
|
|
|
|
|
|
if len(x) < 3: |
|
|
|
|
|
|
|
return 0.0 |
|
|
|
|
|
|
|
xa = np.array(x, dtype=float) |
|
|
|
|
|
|
|
ya = np.array(y, dtype=float) |
|
|
|
|
|
|
|
if xa.std() < 1e-12 or ya.std() < 1e-12: |
|
|
|
|
|
|
|
return 0.0 |
|
|
|
|
|
|
|
return float(np.corrcoef(xa, ya)[0, 1]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _assign_label( |
|
|
|
|
|
|
|
r_lr: float, |
|
|
|
|
|
|
|
r_co: float, |
|
|
|
|
|
|
|
r_pc: float, |
|
|
|
|
|
|
|
axis: str, |
|
|
|
|
|
|
|
) -> Tuple[str, str, float]: |
|
|
|
|
|
|
|
"""Assign label, interpretation and quality score for one axis. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Priority: left-right > coalition > progressive > fallback. |
|
|
|
|
|
|
|
Returns (label, interpretation_string, quality_score). |
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
orientation = "horizontale" if axis == "x" else "verticale" |
|
|
|
|
|
|
|
fallback_label = _LABELS["fallback_x"] if axis == "x" else _LABELS["fallback_y"] |
|
|
|
|
|
|
|
quality = max(abs(r_lr), abs(r_co), abs(r_pc)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if abs(r_lr) >= _THRESHOLD: |
|
|
|
|
|
|
|
return ( |
|
|
|
|
|
|
|
_LABELS["lr"], |
|
|
|
|
|
|
|
_INTERPRETATION_TEMPLATES["lr"].format(orientation=orientation), |
|
|
|
|
|
|
|
quality, |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
if abs(r_co) >= _THRESHOLD: |
|
|
|
|
|
|
|
return ( |
|
|
|
|
|
|
|
_LABELS["co"], |
|
|
|
|
|
|
|
_INTERPRETATION_TEMPLATES["co"].format(orientation=orientation, r=r_co), |
|
|
|
|
|
|
|
quality, |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
if abs(r_pc) >= _THRESHOLD: |
|
|
|
|
|
|
|
return ( |
|
|
|
|
|
|
|
_LABELS["pc"], |
|
|
|
|
|
|
|
_INTERPRETATION_TEMPLATES["pc"].format(orientation=orientation), |
|
|
|
|
|
|
|
quality, |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
return ( |
|
|
|
|
|
|
|
fallback_label, |
|
|
|
|
|
|
|
_INTERPRETATION_TEMPLATES["fallback"].format(orientation=orientation), |
|
|
|
|
|
|
|
quality, |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def classify_axes( |
|
|
|
|
|
|
|
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], |
|
|
|
|
|
|
|
axes: dict, |
|
|
|
|
|
|
|
db_path: str, |
|
|
|
|
|
|
|
) -> dict: |
|
|
|
|
|
|
|
"""Classify compass axes by correlating per-party positions against ideology reference data. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Enriches ``axes`` with: |
|
|
|
|
|
|
|
x_label, y_label — global label (modal across annual windows) |
|
|
|
|
|
|
|
x_quality, y_quality — {window_id: float} max |r| for each window |
|
|
|
|
|
|
|
x_interpretation — {window_id: str} Dutch explanation per window |
|
|
|
|
|
|
|
y_interpretation — {window_id: str} Dutch explanation per window |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns the original ``axes`` dict unchanged if reference data is unavailable. |
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
data_dir = Path(db_path).parent |
|
|
|
|
|
|
|
ideology = _load_ideology(data_dir / "party_ideologies.csv") |
|
|
|
|
|
|
|
if not ideology: |
|
|
|
|
|
|
|
return axes # no reference data — preserve existing behaviour |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
coalition = _load_coalition(data_dir / "coalition_membership.csv") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
x_quality: Dict[str, float] = {} |
|
|
|
|
|
|
|
y_quality: Dict[str, float] = {} |
|
|
|
|
|
|
|
x_interpretation: Dict[str, str] = {} |
|
|
|
|
|
|
|
y_interpretation: Dict[str, str] = {} |
|
|
|
|
|
|
|
annual_x_labels: List[str] = [] |
|
|
|
|
|
|
|
annual_y_labels: List[str] = [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for wid, pos_dict in positions_by_window.items(): |
|
|
|
|
|
|
|
year = _window_year(wid) |
|
|
|
|
|
|
|
is_current = wid == "current_parliament" |
|
|
|
|
|
|
|
is_annual = not is_current and "-" not in wid # e.g. "2016" not "2016-Q3" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Only use parties present in both the positions and the ideology reference. |
|
|
|
|
|
|
|
parties = [p for p in pos_dict if p in ideology] |
|
|
|
|
|
|
|
if len(parties) < 5: |
|
|
|
|
|
|
|
_logger.debug( |
|
|
|
|
|
|
|
"Skipping axis classification for %s: only %d reference parties (need 5)", |
|
|
|
|
|
|
|
wid, |
|
|
|
|
|
|
|
len(parties), |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
party_x = [pos_dict[p][0] for p in parties] |
|
|
|
|
|
|
|
party_y = [pos_dict[p][1] for p in parties] |
|
|
|
|
|
|
|
ref_lr = [ideology[p]["left_right"] for p in parties] |
|
|
|
|
|
|
|
ref_pc = [ideology[p]["progressive"] for p in parties] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Coalition dummy: +1 if in government that year, -1 otherwise. |
|
|
|
|
|
|
|
# current_parliament and windows with no coalition data use a neutral vector. |
|
|
|
|
|
|
|
if year and coalition and year in coalition: |
|
|
|
|
|
|
|
gov_set = coalition[year] |
|
|
|
|
|
|
|
ref_co = [1.0 if p in gov_set else -1.0 for p in parties] |
|
|
|
|
|
|
|
else: |
|
|
|
|
|
|
|
ref_co = [0.0] * len(parties) # neutral — will never exceed threshold |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r_lr_x = _pearsonr(party_x, ref_lr) |
|
|
|
|
|
|
|
r_co_x = _pearsonr(party_x, ref_co) |
|
|
|
|
|
|
|
r_pc_x = _pearsonr(party_x, ref_pc) |
|
|
|
|
|
|
|
x_lbl, x_int, x_q = _assign_label(r_lr_x, r_co_x, r_pc_x, "x") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r_lr_y = _pearsonr(party_y, ref_lr) |
|
|
|
|
|
|
|
r_co_y = _pearsonr(party_y, ref_co) |
|
|
|
|
|
|
|
r_pc_y = _pearsonr(party_y, ref_pc) |
|
|
|
|
|
|
|
y_lbl, y_int, y_q = _assign_label(r_lr_y, r_co_y, r_pc_y, "y") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
x_quality[wid] = x_q |
|
|
|
|
|
|
|
y_quality[wid] = y_q |
|
|
|
|
|
|
|
x_interpretation[wid] = x_int |
|
|
|
|
|
|
|
y_interpretation[wid] = y_int |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Only annual windows vote on the global label (not quarterly, not current_parliament). |
|
|
|
|
|
|
|
if is_annual: |
|
|
|
|
|
|
|
annual_x_labels.append(x_lbl) |
|
|
|
|
|
|
|
annual_y_labels.append(y_lbl) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _modal(labels: List[str], fallback: str) -> str: |
|
|
|
|
|
|
|
if not labels: |
|
|
|
|
|
|
|
return fallback |
|
|
|
|
|
|
|
return Counter(labels).most_common(1)[0][0] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enriched = dict(axes) |
|
|
|
|
|
|
|
enriched["x_label"] = _modal(annual_x_labels, "Links\u2013Rechts") |
|
|
|
|
|
|
|
enriched["y_label"] = _modal(annual_y_labels, "Progressief\u2013Conservatief") |
|
|
|
|
|
|
|
enriched["x_quality"] = x_quality |
|
|
|
|
|
|
|
enriched["y_quality"] = y_quality |
|
|
|
|
|
|
|
enriched["x_interpretation"] = x_interpretation |
|
|
|
|
|
|
|
enriched["y_interpretation"] = y_interpretation |
|
|
|
|
|
|
|
return enriched |