"""SVD Components tab for the parliamentary explorer.""" from __future__ import annotations import datetime as _dt import logging import os from typing import Dict, List, Tuple import numpy as np from analysis import config import analysis.explorer_data as explorer_data from analysis.tabs._rendering import ( _render_party_axis_chart_1d, _render_scree_plot, _render_svd_time_trajectory, _render_voting_results, st, ) try: import duckdb except Exception: duckdb = None # type: ignore SVD_THEMES = config.SVD_THEMES KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES logger = logging.getLogger(__name__) def build_svd_components_tab(db_path: str) -> None: """New tab: show top motions contributing to top SVD components. Reads thoughts/explorer/top_svd_top_motions.json and displays a selector for components 1..10 with theme labels/explanations and a detail pane per motion. Components 1-2 use aligned PCA positions (consistent with compass). Components 3-10 use raw SVD scores. """ st.subheader("đŸ”Ŧ SVD Assen — politieke polarisatiethema's") st.markdown( "Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen " "van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren " "het spanningsveld dat de as beschrijft." ) scree_importances = explorer_data.load_scree_data(db_path) if scree_importances: st.markdown( "**Scree-plot** — het relatieve gewicht van elke SVD-as. " "De eerste assen verklaren het meeste van de stemverschillen in de Kamer; " "latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau." ) _render_scree_plot(scree_importances) json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json") if not os.path.exists(json_path): st.warning( f"Top-SVD data not found at {json_path}. Run the importance job to generate it." ) return try: import json with open(json_path, "r", encoding="utf-8") as fh: j = json.load(fh) except Exception as e: st.error(f"Failed to load SVD importance JSON: {e}") return window = j.get("window") rows = j.get("rows", []) if not rows: st.info("Geen top-moties in dataset") return st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**") comp_map: dict[int, list] = {} for r in rows: comp = int(r.get("component", 0)) bucket = comp_map.setdefault(comp, []) existing_ids = {m.get("motion_id") for m in bucket} if r.get("motion_id") not in existing_ids: bucket.append(r) comp_options = sorted(comp_map.keys()) def _comp_label(c: int) -> str: theme = SVD_THEMES.get(c, {}) lbl = theme.get("label", "") return f"As {c} — {lbl}" if lbl else f"As {c}" comp_display = [_comp_label(c) for c in comp_options] party_scores_default = explorer_data.load_party_axis_scores(db_path) party_mp_vectors = explorer_data.load_party_mp_vectors(db_path) bootstrap_data = None if party_mp_vectors: try: from analysis.political_axis import compute_party_bootstrap_cis bootstrap_data = compute_party_bootstrap_cis(party_mp_vectors) except Exception: pass col1, col2 = st.columns([2, 1]) view_mode = "Enkel venster" selected_parties_for_trajectory: list = [] with col2: comp_sel_idx = st.selectbox( "Selecteer SVD-as", options=list(range(len(comp_options))), format_func=lambda i: comp_display[i], index=0, ) comp_sel = comp_options[comp_sel_idx] min_mps = st.number_input( "Min. Kamerleden per partij", min_value=1, max_value=20, value=1, step=1, help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.", ) view_mode = st.radio( "Weergave", options=["Enkel venster", "Tijdtraject"], index=0, help="Enkel venster: toont posities voor ÊÊn tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.", ) selected_parties_for_trajectory = [] if view_mode == "Tijdtraject": all_parties = ( sorted(party_scores_default.keys()) if party_scores_default else [] ) default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8] selected_parties_for_trajectory = st.multiselect( "Partijen om te tonen", options=all_parties, default=default_parties, help="Selecteer de partijen die je wilt zien in het tijdtraject.", ) theme = SVD_THEMES.get(comp_sel, {}) if theme: st.info(f"**{theme['label']}** — {theme['explanation']}") motions = comp_map.get(comp_sel, []) _current_year = str(_dt.date.today().year) available_windows = explorer_data.get_uniform_dim_windows(db_path) year_windows = sorted( w for w in available_windows if w != "current_parliament" and w != _current_year ) has_current = "current_parliament" in available_windows svd_windows = year_windows + (["current_parliament"] if has_current else []) def _svd_window_label(w: str) -> str: if w == "current_parliament": return "Huidig parliament" return w with col1: svd_window = st.selectbox( "Jaar", options=svd_windows, index=len(svd_windows) - 1, format_func=_svd_window_label, key=f"svd_window_{comp_sel}", ) if svd_window == "current_parliament": party_scores = party_scores_default else: party_scores = explorer_data.load_party_axis_scores_for_window(db_path, svd_window) party_mp_counts = ( {p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {} ) def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]: """Get party (x, y) coordinates from aligned PCA positions for a window.""" positions_by_window, _ = explorer_data.load_positions(db_path, "annual") window_pos = positions_by_window.get(window, {}) if not window_pos: return {} _party_map = explorer_data.load_party_map(db_path) party_coords: Dict[str, List[Tuple[float, float]]] = {} for mp_name, (x, y) in window_pos.items(): party = _party_map.get( mp_name, _party_map.get(mp_name.split("(")[0].strip(), None) ) if party: party_coords.setdefault(party, []).append((x, y)) return { party: ( float(np.mean([c[0] for c in coords])), float(np.mean([c[1] for c in coords])), ) for party, coords in party_coords.items() if coords } active_mps = ( explorer_data.load_active_mps(db_path) if svd_window == "current_parliament" else None ) aligned_all_scores = explorer_data.get_aligned_party_scores( db_path, svd_window, active_mps ) party_1d_coords: dict = {} for party, all_scores in aligned_all_scores.items(): idx = comp_sel - 1 if idx < len(all_scores): party_1d_coords[party] = (float(all_scores[idx]),) computed_flips: Dict[int, bool] = {} try: from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT for comp_idx in range(10): right_scores = [] left_scores = [] for party, scores in aligned_all_scores.items(): if party in CANONICAL_RIGHT: right_scores.append(scores[comp_idx]) elif party in CANONICAL_LEFT: left_scores.append(scores[comp_idx]) if right_scores and left_scores: right_avg = np.mean(right_scores) left_avg = np.mean(left_scores) computed_flips[comp_idx + 1] = right_avg < left_avg else: computed_flips[comp_idx + 1] = False except Exception: pass theme_with_flip = { **theme, "flip": computed_flips.get(comp_sel, theme.get("flip", False)), } if min_mps > 1 and party_mp_counts: valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps} party_1d_coords = { p: coords for p, coords in party_1d_coords.items() if p in valid_parties } if view_mode == "Tijdtraject" and selected_parties_for_trajectory: available_windows = explorer_data.get_uniform_dim_windows(db_path) year_windows = sorted( w for w in available_windows if w != "current_parliament" and w != _current_year ) has_current = "current_parliament" in available_windows all_windows = year_windows + (["current_parliament"] if has_current else []) party_scores_by_window = explorer_data._get_aligned_trajectory_scores( db_path, all_windows ) _render_svd_time_trajectory( party_scores_by_window, comp_sel, theme_with_flip, selected_parties_for_trajectory, ) else: _render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip) motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None] motion_details: Dict[int, tuple] = {} if motion_ids: ids_int: List[int] = [] for mid in motion_ids: try: ids_int.append(int(mid)) except Exception: logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid) if ids_int and duckdb is not None: con = None try: placeholders = ", ".join("?" for _ in ids_int) con = duckdb.connect(database=db_path, read_only=True) db_rows = con.execute( f"SELECT id, title, date, policy_area, url, body_text, voting_results " f"FROM motions WHERE id IN ({placeholders})", ids_int, ).fetchall() motion_details = {r[0]: r for r in db_rows} except Exception: logger.exception("Failed to batch-fetch motion details") finally: if con: con.close() pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0] neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0] flip = theme_with_flip.get("flip", False) if theme_with_flip else False pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else "" neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else "" if flip: left_pole, right_pole = pos_pole, neg_pole left_motions, right_motions = pos_motions, neg_motions left_arrow, right_arrow = "▲", "â–ŧ" else: left_pole, right_pole = neg_pole, pos_pole left_motions, right_motions = neg_motions, pos_motions left_arrow, right_arrow = "â–ŧ", "▲" lcol, rcol = st.columns(2) with lcol: st.markdown(f"**← {left_pole}**") for m in left_motions: mid = m.get("motion_id") raw_title = m.get("title") or f"Motie #{mid}" with st.expander(f"{left_arrow} {raw_title}"): row = motion_details.get(int(mid)) if mid is not None else None if row: try: date_str = str(row[2])[:10] except Exception: date_str = "?" st.caption(f"📅 {date_str} | {row[3] or '—'}") if row[4] and str(row[4]).startswith("http"): st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") if row[5]: with st.expander("Toon volledige tekst"): st.write(row[5]) _render_voting_results(row[6]) else: st.caption("_Geen metadata beschikbaar_") with rcol: st.markdown(f"**{right_pole} →**") for m in right_motions: mid = m.get("motion_id") raw_title = m.get("title") or f"Motie #{mid}" with st.expander(f"{right_arrow} {raw_title}"): row = motion_details.get(int(mid)) if mid is not None else None if row: try: date_str = str(row[2])[:10] except Exception: date_str = "?" st.caption(f"📅 {date_str} | {row[3] or '—'}") if row[4] and str(row[4]).startswith("http"): st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") if row[5]: with st.expander("Toon volledige tekst"): st.write(row[5]) _render_voting_results(row[6]) else: st.caption("_Geen metadata beschikbaar_")