You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
128 lines
4.1 KiB
128 lines
4.1 KiB
"""SVD projection utilities for the parliamentary explorer.
|
|
|
|
Pure computation functions for projecting motions and entities onto ideological axes.
|
|
No IO or external dependencies - fully testable without Streamlit or DuckDB.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import math
|
|
from typing import Any, Dict, List, Tuple
|
|
|
|
__all__ = [
|
|
"should_swap_axes",
|
|
"swap_axes",
|
|
"project_motion_scores",
|
|
"normalize_coordinates",
|
|
]
|
|
|
|
|
|
def should_swap_axes(axis_def: dict) -> bool:
|
|
"""Return True if the Y axis is economic left-right and the X axis is not.
|
|
|
|
When true, caller should swap x/y positions and metadata so the economic
|
|
dimension (welfare vs market) is conventionally on the horizontal axis.
|
|
"""
|
|
economic_labels = {"Verzorgingsstaat–Marktwerking", "Links–Rechts"}
|
|
y_label = axis_def.get("y_label")
|
|
x_label = axis_def.get("x_label")
|
|
return y_label in economic_labels and x_label not in economic_labels
|
|
|
|
|
|
def swap_axes(
|
|
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
|
|
axis_def: dict,
|
|
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], dict]:
|
|
"""Swap x and y in all positions and axis metadata.
|
|
|
|
Pure function — returns (new_positions_by_window, new_axis_def).
|
|
"""
|
|
new_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
|
|
for wid, pos_dict in positions_by_window.items():
|
|
new_positions[wid] = {ent: (y, x) for ent, (x, y) in pos_dict.items()}
|
|
|
|
new_ax = dict(axis_def)
|
|
new_ax["x_label"] = axis_def.get("y_label")
|
|
new_ax["y_label"] = axis_def.get("x_label")
|
|
|
|
for x_key, y_key in [
|
|
("x_quality", "y_quality"),
|
|
("x_interpretation", "y_interpretation"),
|
|
("x_top_motions", "y_top_motions"),
|
|
("x_label_confidence", "y_label_confidence"),
|
|
("x_axis", "y_axis"),
|
|
]:
|
|
new_ax[x_key] = axis_def.get(y_key)
|
|
new_ax[y_key] = axis_def.get(x_key)
|
|
|
|
return new_positions, new_ax
|
|
|
|
|
|
def project_motion_scores(
|
|
motion_scores: Dict[int, float], top_n: int = 5
|
|
) -> Tuple[List[Tuple[int, float]], List[Tuple[int, float]]]:
|
|
"""Split motion scores into positive and negative poles.
|
|
|
|
Args:
|
|
motion_scores: Dict mapping motion_id to loading score
|
|
top_n: Number of top motions per pole
|
|
|
|
Returns:
|
|
Tuple of (positive_pole, negative_pole) where each is a list of (motion_id, score)
|
|
"""
|
|
sorted_scores = sorted(motion_scores.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
positive_pole = sorted_scores[:top_n]
|
|
negative_pole = sorted_scores[-top_n:][::-1]
|
|
|
|
return positive_pole, negative_pole
|
|
|
|
|
|
def normalize_coordinates(
|
|
positions: Dict[str, Tuple[float, float]],
|
|
clamp_abs_value: float = 1e3,
|
|
null_tokens: Tuple[str, ...] = ("nan", "NaN", "None", "none", "null", ""),
|
|
) -> Dict[str, Tuple[float, float]]:
|
|
"""Normalize coordinate values.
|
|
|
|
Pure function that clamps extreme values and handles null tokens.
|
|
|
|
Args:
|
|
positions: Dict mapping entity names to (x, y) coordinates
|
|
clamp_abs_value: Maximum absolute coordinate value
|
|
null_tokens: Values to treat as null
|
|
|
|
Returns:
|
|
Dict with normalized coordinates
|
|
"""
|
|
|
|
def _coerce(val: Any) -> float:
|
|
if val is None:
|
|
return float("nan")
|
|
if isinstance(val, (float, int)):
|
|
v = float(val)
|
|
if math.isnan(v) or math.isinf(v):
|
|
return float("nan")
|
|
if abs(v) > clamp_abs_value:
|
|
return float("nan")
|
|
return v
|
|
if isinstance(val, str):
|
|
if val in null_tokens or val.strip() in null_tokens:
|
|
return float("nan")
|
|
try:
|
|
v = float(val)
|
|
if math.isnan(v) or math.isinf(v):
|
|
return float("nan")
|
|
if abs(v) > clamp_abs_value:
|
|
return float("nan")
|
|
return v
|
|
except ValueError:
|
|
return float("nan")
|
|
return float("nan")
|
|
|
|
result = {}
|
|
for entity, (x, y) in positions.items():
|
|
nx = _coerce(x)
|
|
ny = _coerce(y)
|
|
result[entity] = (nx, ny)
|
|
return result
|
|
|