motief/analysis/projections.py

"""SVD projection utilities for the parliamentary explorer.

Pure computation functions for projecting motions and entities onto ideological axes.
No IO or external dependencies - fully testable without Streamlit or DuckDB.
"""

from __future__ import annotations

import math
from typing import Any, Dict, List, Tuple

__all__ = [
    "should_swap_axes",
    "swap_axes",
    "project_motion_scores",
    "normalize_coordinates",
]


def should_swap_axes(axis_def: dict) -> bool:
    """Return True if the Y axis is economic left-right and the X axis is not.

    When true, caller should swap x/y positions and metadata so the economic
    dimension (welfare vs market) is conventionally on the horizontal axis.
    """
    economic_labels = {"Verzorgingsstaat–Marktwerking", "Links–Rechts"}
    y_label = axis_def.get("y_label")
    x_label = axis_def.get("x_label")
    return y_label in economic_labels and x_label not in economic_labels


def swap_axes(
    positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
    axis_def: dict,
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], dict]:
    """Swap x and y in all positions and axis metadata.

    Pure function — returns (new_positions_by_window, new_axis_def).
    """
    new_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
    for wid, pos_dict in positions_by_window.items():
        new_positions[wid] = {ent: (y, x) for ent, (x, y) in pos_dict.items()}

    new_ax = dict(axis_def)
    new_ax["x_label"] = axis_def.get("y_label")
    new_ax["y_label"] = axis_def.get("x_label")

    for x_key, y_key in [
        ("x_quality", "y_quality"),
        ("x_interpretation", "y_interpretation"),
        ("x_top_motions", "y_top_motions"),
        ("x_label_confidence", "y_label_confidence"),
        ("x_axis", "y_axis"),
    ]:
        new_ax[x_key] = axis_def.get(y_key)
        new_ax[y_key] = axis_def.get(x_key)

    return new_positions, new_ax


def project_motion_scores(
    motion_scores: Dict[int, float], top_n: int = 5
) -> Tuple[List[Tuple[int, float]], List[Tuple[int, float]]]:
    """Split motion scores into positive and negative poles.

    Args:
        motion_scores: Dict mapping motion_id to loading score
        top_n: Number of top motions per pole

    Returns:
        Tuple of (positive_pole, negative_pole) where each is a list of (motion_id, score)
    """
    sorted_scores = sorted(motion_scores.items(), key=lambda x: x[1], reverse=True)

    positive_pole = sorted_scores[:top_n]
    negative_pole = sorted_scores[-top_n:][::-1]

    return positive_pole, negative_pole


def normalize_coordinates(
    positions: Dict[str, Tuple[float, float]],
    clamp_abs_value: float = 1e3,
    null_tokens: Tuple[str, ...] = ("nan", "NaN", "None", "none", "null", ""),
) -> Dict[str, Tuple[float, float]]:
    """Normalize coordinate values.

    Pure function that clamps extreme values and handles null tokens.

    Args:
        positions: Dict mapping entity names to (x, y) coordinates
        clamp_abs_value: Maximum absolute coordinate value
        null_tokens: Values to treat as null

    Returns:
        Dict with normalized coordinates
    """

    def _coerce(val: Any) -> float:
        if val is None:
            return float("nan")
        if isinstance(val, (float, int)):
            v = float(val)
            if math.isnan(v) or math.isinf(v):
                return float("nan")
            if abs(v) > clamp_abs_value:
                return float("nan")
            return v
        if isinstance(val, str):
            if val in null_tokens or val.strip() in null_tokens:
                return float("nan")
            try:
                v = float(val)
                if math.isnan(v) or math.isinf(v):
                    return float("nan")
                if abs(v) > clamp_abs_value:
                    return float("nan")
                return v
            except ValueError:
                return float("nan")
        return float("nan")

    result = {}
    for entity, (x, y) in positions.items():
        nx = _coerce(x)
        ny = _coerce(y)
        result[entity] = (nx, ny)
    return result