You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
motief/analysis/tabs/components.py

374 lines
13 KiB

"""SVD Components tab for the parliamentary explorer."""
from __future__ import annotations
import datetime as _dt
import logging
import os
from typing import Dict, List, Tuple
import numpy as np
from analysis import config
import analysis.explorer_data as explorer_data
from analysis.tabs._rendering import (
_render_party_axis_chart_1d,
_render_scree_plot,
_render_svd_time_trajectory,
_render_voting_results,
st,
)
try:
import duckdb
except Exception:
duckdb = None # type: ignore
SVD_THEMES = config.SVD_THEMES
KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
logger = logging.getLogger(__name__)
def build_svd_components_tab(db_path: str) -> None:
"""New tab: show top motions contributing to top SVD components.
Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
for components 1..10 with theme labels/explanations and a detail pane per motion.
Components 1-2 use aligned PCA positions (consistent with compass).
Components 3-10 use raw SVD scores.
"""
st.subheader("🔬 SVD Assen — politieke polarisatiethema's")
st.markdown(
"Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
"van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
"het spanningsveld dat de as beschrijft."
)
scree_importances = explorer_data.load_scree_data(db_path)
if scree_importances:
st.markdown(
"**Scree-plot** — het relatieve gewicht van elke SVD-as. "
"De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
"latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau."
)
_render_scree_plot(scree_importances)
json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
if not os.path.exists(json_path):
st.warning(
f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
)
return
try:
import json
with open(json_path, "r", encoding="utf-8") as fh:
j = json.load(fh)
except Exception as e:
st.error(f"Failed to load SVD importance JSON: {e}")
return
window = j.get("window")
rows = j.get("rows", [])
if not rows:
st.info("Geen top-moties in dataset")
return
st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")
comp_map: dict[int, list] = {}
for r in rows:
comp = int(r.get("component", 0))
bucket = comp_map.setdefault(comp, [])
existing_ids = {m.get("motion_id") for m in bucket}
if r.get("motion_id") not in existing_ids:
bucket.append(r)
comp_options = sorted(comp_map.keys())
def _comp_label(c: int) -> str:
theme = SVD_THEMES.get(c, {})
lbl = theme.get("label", "")
return f"As {c}{lbl}" if lbl else f"As {c}"
comp_display = [_comp_label(c) for c in comp_options]
party_scores_default = explorer_data.load_party_axis_scores(db_path)
party_mp_vectors = explorer_data.load_party_mp_vectors(db_path)
bootstrap_data = None
if party_mp_vectors:
try:
from analysis.political_axis import compute_party_bootstrap_cis
bootstrap_data = compute_party_bootstrap_cis(party_mp_vectors)
except Exception:
pass
col1, col2 = st.columns([2, 1])
view_mode = "Enkel venster"
selected_parties_for_trajectory: list = []
with col2:
comp_sel_idx = st.selectbox(
"Selecteer SVD-as",
options=list(range(len(comp_options))),
format_func=lambda i: comp_display[i],
index=0,
)
comp_sel = comp_options[comp_sel_idx]
min_mps = st.number_input(
"Min. Kamerleden per partij",
min_value=1,
max_value=20,
value=1,
step=1,
help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.",
)
view_mode = st.radio(
"Weergave",
options=["Enkel venster", "Tijdtraject"],
index=0,
help="Enkel venster: toont posities voor één tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.",
)
selected_parties_for_trajectory = []
if view_mode == "Tijdtraject":
all_parties = (
sorted(party_scores_default.keys()) if party_scores_default else []
)
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8]
selected_parties_for_trajectory = st.multiselect(
"Partijen om te tonen",
options=all_parties,
default=default_parties,
help="Selecteer de partijen die je wilt zien in het tijdtraject.",
)
theme = SVD_THEMES.get(comp_sel, {})
if theme:
st.info(f"**{theme['label']}** — {theme['explanation']}")
motions = comp_map.get(comp_sel, [])
_current_year = str(_dt.date.today().year)
available_windows = explorer_data.get_uniform_dim_windows(db_path)
year_windows = sorted(
w for w in available_windows if w != "current_parliament" and w != _current_year
)
has_current = "current_parliament" in available_windows
svd_windows = year_windows + (["current_parliament"] if has_current else [])
def _svd_window_label(w: str) -> str:
if w == "current_parliament":
return "Huidig parliament"
return w
with col1:
svd_window = st.selectbox(
"Jaar",
options=svd_windows,
index=len(svd_windows) - 1,
format_func=_svd_window_label,
key=f"svd_window_{comp_sel}",
)
if svd_window == "current_parliament":
party_scores = party_scores_default
else:
party_scores = explorer_data.load_party_axis_scores_for_window(db_path, svd_window)
party_mp_counts = (
{p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {}
)
def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]:
"""Get party (x, y) coordinates from aligned PCA positions for a window."""
positions_by_window, _ = explorer_data.load_positions(db_path, "annual")
window_pos = positions_by_window.get(window, {})
if not window_pos:
return {}
_party_map = explorer_data.load_party_map(db_path)
party_coords: Dict[str, List[Tuple[float, float]]] = {}
for mp_name, (x, y) in window_pos.items():
party = _party_map.get(
mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
)
if party:
party_coords.setdefault(party, []).append((x, y))
return {
party: (
float(np.mean([c[0] for c in coords])),
float(np.mean([c[1] for c in coords])),
)
for party, coords in party_coords.items()
if coords
}
active_mps = (
explorer_data.load_active_mps(db_path)
if svd_window == "current_parliament"
else None
)
aligned_all_scores = explorer_data.get_aligned_party_scores(
db_path, svd_window, active_mps
)
party_1d_coords: dict = {}
for party, all_scores in aligned_all_scores.items():
idx = comp_sel - 1
if idx < len(all_scores):
party_1d_coords[party] = (float(all_scores[idx]),)
computed_flips: Dict[int, bool] = {}
try:
from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT
for comp_idx in range(10):
right_scores = []
left_scores = []
for party, scores in aligned_all_scores.items():
if party in CANONICAL_RIGHT:
right_scores.append(scores[comp_idx])
elif party in CANONICAL_LEFT:
left_scores.append(scores[comp_idx])
if right_scores and left_scores:
right_avg = np.mean(right_scores)
left_avg = np.mean(left_scores)
computed_flips[comp_idx + 1] = right_avg < left_avg
else:
computed_flips[comp_idx + 1] = False
except Exception:
pass
theme_with_flip = {
**theme,
"flip": computed_flips.get(comp_sel, theme.get("flip", False)),
}
if min_mps > 1 and party_mp_counts:
valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps}
party_1d_coords = {
p: coords for p, coords in party_1d_coords.items() if p in valid_parties
}
if view_mode == "Tijdtraject" and selected_parties_for_trajectory:
available_windows = explorer_data.get_uniform_dim_windows(db_path)
year_windows = sorted(
w
for w in available_windows
if w != "current_parliament" and w != _current_year
)
has_current = "current_parliament" in available_windows
all_windows = year_windows + (["current_parliament"] if has_current else [])
party_scores_by_window = explorer_data._get_aligned_trajectory_scores(
db_path, all_windows
)
_render_svd_time_trajectory(
party_scores_by_window,
comp_sel,
theme_with_flip,
selected_parties_for_trajectory,
)
else:
_render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip)
motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
motion_details: Dict[int, tuple] = {}
if motion_ids:
ids_int: List[int] = []
for mid in motion_ids:
try:
ids_int.append(int(mid))
except Exception:
logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
if ids_int and duckdb is not None:
con = None
try:
placeholders = ", ".join("?" for _ in ids_int)
con = duckdb.connect(database=db_path, read_only=True)
db_rows = con.execute(
f"SELECT id, title, date, policy_area, url, body_text, voting_results "
f"FROM motions WHERE id IN ({placeholders})",
ids_int,
).fetchall()
motion_details = {r[0]: r for r in db_rows}
except Exception:
logger.exception("Failed to batch-fetch motion details")
finally:
if con:
con.close()
pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
flip = theme_with_flip.get("flip", False) if theme_with_flip else False
pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else ""
neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else ""
if flip:
left_pole, right_pole = pos_pole, neg_pole
left_motions, right_motions = pos_motions, neg_motions
left_arrow, right_arrow = "", ""
else:
left_pole, right_pole = neg_pole, pos_pole
left_motions, right_motions = neg_motions, pos_motions
left_arrow, right_arrow = "", ""
lcol, rcol = st.columns(2)
with lcol:
st.markdown(f"**← {left_pole}**")
for m in left_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{left_arrow} {raw_title}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")
with rcol:
st.markdown(f"**{right_pole} →**")
for m in right_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{right_arrow} {raw_title}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")