You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/analysis/svd_labels.py

172 lines
5.1 KiB

"""Unified SVD component labels and automatic flip direction computation.
This module provides a single source of truth for SVD component labels,
deriving them from SVD_THEMES in explorer.py. It also computes flip
directions automatically based on party centroids.
"""
import logging
from typing import Dict, List, Optional, Tuple
from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT
_logger = logging.getLogger(__name__)
RIGHT_PARTIES = CANONICAL_RIGHT
LEFT_PARTIES = CANONICAL_LEFT
# Cache for SVD_THEMES to avoid repeated imports
_svd_themes_cache: Optional[Dict[int, Dict[str, str]]] = None
def _get_svd_themes() -> Dict[int, Dict[str, str]]:
"""Import SVD_THEMES from explorer.py.
Returns:
Dict mapping component number to theme dict with keys:
- label: Short label for the component
- explanation: Detailed explanation
- positive_pole: Description of positive pole
- negative_pole: Description of negative pole
- flip: Whether to flip the axis
"""
global _svd_themes_cache
if _svd_themes_cache is not None:
return _svd_themes_cache
# Prefer the lightweight canonical source in analysis.config which is
# intentionally free of heavy runtime dependencies. Fall back to
# explorer.SVD_THEMES only when the config module is unavailable or
# doesn't expose SVD_THEMES.
try:
from analysis import config as _cfg
_svd_themes_cache = getattr(_cfg, "SVD_THEMES", {}) or {}
if _svd_themes_cache:
return _svd_themes_cache
except Exception:
_logger.exception(
"Could not import analysis.config or read SVD_THEMES; falling back to explorer"
)
try:
# Import explorer at runtime as a last resort; explorer may pull in
# heavy dependencies (duckdb/plotly) so we only try this if config
# didn't provide the themes.
import explorer
_svd_themes_cache = getattr(explorer, "SVD_THEMES", {}) or {}
return _svd_themes_cache
except ImportError as e:
_logger.warning("Could not import explorer.SVD_THEMES: %s", e)
return {}
except Exception as e:
_logger.exception("Failed to load SVD_THEMES from explorer.py: %s", e)
return {}
def get_svd_label(component: int) -> str:
"""Get short label for SVD component.
Args:
component: SVD component number (1-indexed)
Returns:
Short label string (e.g., 'EU-integratie–Nationalisme')
Raises:
ValueError: If component < 1
"""
if component < 1:
raise ValueError(f"Component must be >= 1, got {component}")
themes = _get_svd_themes()
if component in themes:
return themes[component].get("label", f"As {component}")
# Fallback labels for components 1-3 (most commonly used)
fallback_labels = {
1: "EU-integratie–Nationalisme",
2: "Populistisch–Institutioneel",
3: "Verzorgingsstaat–Marktwerking",
}
return fallback_labels.get(component, f"As {component}")
def get_svd_theme(component: int) -> Dict[str, str]:
"""Get full theme dict for SVD component.
Args:
component: SVD component number (1-indexed)
Returns:
Dict with keys: label, explanation, positive_pole, negative_pole, flip
"""
if component < 1:
raise ValueError(f"Component must be >= 1, got {component}")
themes = _get_svd_themes()
if component in themes:
return themes[component]
# Return minimal fallback
return {
"label": get_svd_label(component),
"explanation": "",
"positive_pole": "",
"negative_pole": "",
"flip": False,
}
def compute_flip_direction(
component: int,
party_scores: Dict[str, List[float]],
) -> bool:
"""Compute flip direction so right parties appear on the right side.
Args:
component: SVD component number (1-indexed)
party_scores: Dict mapping party name to per-component scores.
party_scores[party][0] is score for component 1 (x-axis),
party_scores[party][1] is score for component 2 (y-axis).
Returns:
True if axis should be flipped so right parties are on right.
False otherwise.
"""
if component < 1:
return False
idx = component - 1 # Convert to 0-indexed
right_scores = []
left_scores = []
for party, scores in party_scores.items():
if len(scores) <= idx:
continue
score = scores[idx]
if party in RIGHT_PARTIES:
right_scores.append(score)
elif party in LEFT_PARTIES:
left_scores.append(score)
if not right_scores or not left_scores:
return False # Default: no flip if insufficient data
right_mean = sum(right_scores) / len(right_scores)
left_mean = sum(left_scores) / len(left_scores)
# Flip if right parties have lower mean (they're on the left)
return right_mean < left_mean
def get_fallback_labels() -> Tuple[str, str]:
"""Get fallback labels for x and y axes (components 1 and 2).
Returns:
Tuple of (x_label, y_label)
"""
return (get_svd_label(1), get_svd_label(2))