From 3a6710091a3a65a6ea2b08df38e3179d90321793 Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Mon, 13 Apr 2026 23:25:48 +0200 Subject: [PATCH] Use aligned PCA scores for time trajectory view - Add _get_aligned_trajectory_scores() helper for multi-window aligned scores - Update trajectory call to use compute_nd_axes instead of raw SVD scores - Simplify _render_svd_time_trajectory by removing per-window flip computation --- explorer.py | 67 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/explorer.py b/explorer.py index 6d49c7a..a300826 100644 --- a/explorer.py +++ b/explorer.py @@ -626,6 +626,50 @@ def _load_mp_vectors_by_window(db_path: str, window: str) -> Dict[str, np.ndarra return explorer_data.load_mp_vectors_by_window(db_path, window) +def _get_aligned_trajectory_scores( + db_path: str, windows: List[str], n_components: int = 10 +) -> Dict[str, Dict[str, List[float]]]: + """Get aligned PCA scores for all windows as {window: {party: [scores per component]}}. + + Uses compute_nd_axes to get PCA-projected, flip-corrected scores across all windows, + ensuring consistency with the single-window SVD components view. + """ + from analysis.political_axis import compute_nd_axes + + # Get aligned scores for all windows via PCA + scores_by_window, _ = compute_nd_axes(db_path, n_components=n_components) + if not scores_by_window: + return {} + + # Load party map to convert MP names to parties + party_map = load_party_map(db_path) + + # Aggregate MP scores to party centroids per window + result: Dict[str, Dict[str, List[float]]] = {} + for window in windows: + window_scores = scores_by_window.get(window, {}) + if not window_scores: + continue + + # Aggregate MP scores to party averages + party_vecs: Dict[str, List[np.ndarray]] = {} + for mp_name, scores in window_scores.items(): + party = party_map.get( + mp_name, party_map.get(mp_name.split("(")[0].strip(), None) + ) + if party: + party_vecs.setdefault(party, []).append(scores[:n_components]) + + # Compute mean scores per party + result[window] = { + party: np.mean(np.vstack(score_list), axis=0).tolist() + for party, score_list in party_vecs.items() + if score_list + } + + return result + + @st.cache_data(show_spinner="SVD scores met Procrustes-uitlijning laden…") def load_party_scores_all_windows_aligned( db_path: str, windows: List[str] @@ -1117,10 +1161,9 @@ def _render_svd_time_trajectory( idx = comp_sel - 1 # Convert to 0-indexed - # Import flip computation for per-window alignment - from analysis.svd_labels import compute_flip_direction - # Build data structure: {party: [(window, score), ...]} + # Scores are already aligned and flip-corrected via compute_nd_axes, + # so no per-window flip computation needed. party_trajectories: Dict[str, List[Tuple[str, float]]] = {} # Sort windows: current_parliament first, then chronological @@ -1134,26 +1177,13 @@ def _render_svd_time_trajectory( ) sorted_windows.extend(other_windows) - # Compute per-window flip to align all windows consistently - # Each window's SVD has arbitrary sign, so we compute flip per window - window_flips = {} - for window in sorted_windows: - scores_by_party = party_scores_by_window.get(window, {}) - # Compute flip for this specific window - window_flips[window] = compute_flip_direction(comp_sel, scores_by_party) - for window in sorted_windows: scores_by_party = party_scores_by_window.get(window, {}) - # Get the flip for this specific window - window_flip = window_flips.get(window, False) for party in selected_parties: scores = scores_by_party.get(party, []) if scores and len(scores) > idx: try: score = float(scores[idx]) - # Apply per-window flip to align orientation - if window_flip: - score = -score party_trajectories.setdefault(party, []).append((window, score)) except (ValueError, TypeError): continue @@ -2693,9 +2723,8 @@ def build_svd_components_tab(db_path: str) -> None: has_current = "current_parliament" in available_windows all_windows = year_windows + (["current_parliament"] if has_current else []) - # TODO: For full consistency, this should also use aligned PCA scores for all windows. - # Currently uses raw SVD scores for trajectory - single-window view uses aligned scores. - party_scores_by_window = load_party_scores_all_windows(db_path, all_windows) + # Use aligned PCA scores for all windows (consistent with single-window view) + party_scores_by_window = _get_aligned_trajectory_scores(db_path, all_windows) _render_svd_time_trajectory( party_scores_by_window,