You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/analysis/projections.py

128 lines
4.1 KiB

"""SVD projection utilities for the parliamentary explorer.
Pure computation functions for projecting motions and entities onto ideological axes.
No IO or external dependencies - fully testable without Streamlit or DuckDB.
"""
from __future__ import annotations
import math
from typing import Any, Dict, List, Tuple
__all__ = [
"should_swap_axes",
"swap_axes",
"project_motion_scores",
"normalize_coordinates",
]
def should_swap_axes(axis_def: dict) -> bool:
"""Return True if the Y axis is economic left-right and the X axis is not.
When true, caller should swap x/y positions and metadata so the economic
dimension (welfare vs market) is conventionally on the horizontal axis.
"""
economic_labels = {"Verzorgingsstaat–Marktwerking", "Links–Rechts"}
y_label = axis_def.get("y_label")
x_label = axis_def.get("x_label")
return y_label in economic_labels and x_label not in economic_labels
def swap_axes(
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
axis_def: dict,
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], dict]:
"""Swap x and y in all positions and axis metadata.
Pure function — returns (new_positions_by_window, new_axis_def).
"""
new_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
for wid, pos_dict in positions_by_window.items():
new_positions[wid] = {ent: (y, x) for ent, (x, y) in pos_dict.items()}
new_ax = dict(axis_def)
new_ax["x_label"] = axis_def.get("y_label")
new_ax["y_label"] = axis_def.get("x_label")
for x_key, y_key in [
("x_quality", "y_quality"),
("x_interpretation", "y_interpretation"),
("x_top_motions", "y_top_motions"),
("x_label_confidence", "y_label_confidence"),
("x_axis", "y_axis"),
]:
new_ax[x_key] = axis_def.get(y_key)
new_ax[y_key] = axis_def.get(x_key)
return new_positions, new_ax
def project_motion_scores(
motion_scores: Dict[int, float], top_n: int = 5
) -> Tuple[List[Tuple[int, float]], List[Tuple[int, float]]]:
"""Split motion scores into positive and negative poles.
Args:
motion_scores: Dict mapping motion_id to loading score
top_n: Number of top motions per pole
Returns:
Tuple of (positive_pole, negative_pole) where each is a list of (motion_id, score)
"""
sorted_scores = sorted(motion_scores.items(), key=lambda x: x[1], reverse=True)
positive_pole = sorted_scores[:top_n]
negative_pole = sorted_scores[-top_n:][::-1]
return positive_pole, negative_pole
def normalize_coordinates(
positions: Dict[str, Tuple[float, float]],
clamp_abs_value: float = 1e3,
null_tokens: Tuple[str, ...] = ("nan", "NaN", "None", "none", "null", ""),
) -> Dict[str, Tuple[float, float]]:
"""Normalize coordinate values.
Pure function that clamps extreme values and handles null tokens.
Args:
positions: Dict mapping entity names to (x, y) coordinates
clamp_abs_value: Maximum absolute coordinate value
null_tokens: Values to treat as null
Returns:
Dict with normalized coordinates
"""
def _coerce(val: Any) -> float:
if val is None:
return float("nan")
if isinstance(val, (float, int)):
v = float(val)
if math.isnan(v) or math.isinf(v):
return float("nan")
if abs(v) > clamp_abs_value:
return float("nan")
return v
if isinstance(val, str):
if val in null_tokens or val.strip() in null_tokens:
return float("nan")
try:
v = float(val)
if math.isnan(v) or math.isinf(v):
return float("nan")
if abs(v) > clamp_abs_value:
return float("nan")
return v
except ValueError:
return float("nan")
return float("nan")
result = {}
for entity, (x, y) in positions.items():
nx = _coerce(x)
ny = _coerce(y)
result[entity] = (nx, ny)
return result