You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
372 lines
13 KiB
372 lines
13 KiB
"""SVD Components tab for the parliamentary explorer."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import datetime as _dt
|
|
import logging
|
|
import os
|
|
from typing import Dict, List, Tuple
|
|
|
|
import numpy as np
|
|
|
|
from analysis import config
|
|
import analysis.explorer_data as explorer_data
|
|
from analysis.tabs._rendering import (
|
|
_render_party_axis_chart_1d,
|
|
_render_scree_plot,
|
|
_render_svd_time_trajectory,
|
|
_render_voting_results,
|
|
st,
|
|
)
|
|
|
|
try:
|
|
import duckdb
|
|
except Exception:
|
|
duckdb = None # type: ignore
|
|
|
|
SVD_THEMES = config.SVD_THEMES
|
|
KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def build_svd_components_tab(db_path: str) -> None:
|
|
"""New tab: show top motions contributing to top SVD components.
|
|
|
|
Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
|
|
for components 1..10 with theme labels/explanations and a detail pane per motion.
|
|
|
|
Components 1-2 use aligned PCA positions (consistent with compass).
|
|
Components 3-10 use raw SVD scores.
|
|
"""
|
|
st.subheader("SVD Assen — politieke polarisatiethema's")
|
|
st.markdown(
|
|
"Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
|
|
"van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
|
|
"het spanningsveld dat de as beschrijft."
|
|
)
|
|
|
|
scree_importances = explorer_data.load_scree_data(db_path)
|
|
if scree_importances:
|
|
st.markdown(
|
|
"**Scree-plot** — het relatieve gewicht van elke SVD-as. "
|
|
"De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
|
|
"latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau."
|
|
)
|
|
_render_scree_plot(scree_importances)
|
|
|
|
json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
|
|
if not os.path.exists(json_path):
|
|
st.warning(
|
|
f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
|
|
)
|
|
return
|
|
|
|
try:
|
|
import json
|
|
|
|
with open(json_path, "r", encoding="utf-8") as fh:
|
|
j = json.load(fh)
|
|
except Exception as e:
|
|
st.error(f"Failed to load SVD importance JSON: {e}")
|
|
return
|
|
|
|
window = j.get("window")
|
|
rows = j.get("rows", [])
|
|
if not rows:
|
|
st.info("Geen top-moties in dataset")
|
|
return
|
|
|
|
st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")
|
|
|
|
comp_map: dict[int, list] = {}
|
|
for r in rows:
|
|
comp = int(r.get("component", 0))
|
|
bucket = comp_map.setdefault(comp, [])
|
|
existing_ids = {m.get("motion_id") for m in bucket}
|
|
if r.get("motion_id") not in existing_ids:
|
|
bucket.append(r)
|
|
|
|
comp_options = sorted(comp_map.keys())
|
|
|
|
def _comp_label(c: int) -> str:
|
|
theme = SVD_THEMES.get(c, {})
|
|
lbl = theme.get("label", "")
|
|
return f"As {c} — {lbl}" if lbl else f"As {c}"
|
|
|
|
comp_display = [_comp_label(c) for c in comp_options]
|
|
|
|
party_scores_default = explorer_data.load_party_axis_scores(db_path)
|
|
party_mp_vectors = explorer_data.load_party_mp_vectors(db_path)
|
|
bootstrap_data = None
|
|
if party_mp_vectors:
|
|
try:
|
|
from analysis.political_axis import compute_party_bootstrap_cis
|
|
|
|
bootstrap_data = compute_party_bootstrap_cis(party_mp_vectors)
|
|
except Exception:
|
|
pass
|
|
|
|
col1, col2 = st.columns([2, 1])
|
|
|
|
view_mode = "Enkel venster"
|
|
selected_parties_for_trajectory: list = []
|
|
|
|
with col2:
|
|
comp_sel_idx = st.selectbox(
|
|
"Selecteer SVD-as",
|
|
options=list(range(len(comp_options))),
|
|
format_func=lambda i: comp_display[i],
|
|
index=0,
|
|
)
|
|
comp_sel = comp_options[comp_sel_idx]
|
|
|
|
min_mps = st.number_input(
|
|
"Min. Kamerleden per partij",
|
|
min_value=1,
|
|
max_value=20,
|
|
value=1,
|
|
step=1,
|
|
help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.",
|
|
)
|
|
|
|
view_mode = st.radio(
|
|
"Weergave",
|
|
options=["Enkel venster", "Tijdtraject"],
|
|
index=0,
|
|
help="Enkel venster: toont posities voor één tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.",
|
|
)
|
|
|
|
selected_parties_for_trajectory = []
|
|
if view_mode == "Tijdtraject":
|
|
all_parties = (
|
|
sorted(party_scores_default.keys()) if party_scores_default else []
|
|
)
|
|
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8]
|
|
selected_parties_for_trajectory = st.multiselect(
|
|
"Partijen om te tonen",
|
|
options=all_parties,
|
|
default=default_parties,
|
|
help="Selecteer de partijen die je wilt zien in het tijdtraject.",
|
|
)
|
|
|
|
theme = SVD_THEMES.get(comp_sel, {})
|
|
if theme:
|
|
st.info(f"**{theme['label']}** — {theme['explanation']}")
|
|
|
|
motions = comp_map.get(comp_sel, [])
|
|
|
|
_current_year = str(_dt.date.today().year)
|
|
available_windows = explorer_data.get_uniform_dim_windows(db_path)
|
|
year_windows = sorted(
|
|
w for w in available_windows if w != "current_parliament" and w != _current_year
|
|
)
|
|
has_current = "current_parliament" in available_windows
|
|
svd_windows = year_windows + (["current_parliament"] if has_current else [])
|
|
|
|
def _svd_window_label(w: str) -> str:
|
|
if w == "current_parliament":
|
|
return "Huidig parlement"
|
|
return w
|
|
|
|
with col1:
|
|
svd_window = st.selectbox(
|
|
"Jaar",
|
|
options=svd_windows,
|
|
index=len(svd_windows) - 1,
|
|
format_func=_svd_window_label,
|
|
key=f"svd_window_{comp_sel}",
|
|
)
|
|
|
|
if svd_window == "current_parliament":
|
|
party_scores = party_scores_default
|
|
else:
|
|
party_scores = explorer_data.load_party_axis_scores_for_window(db_path, svd_window)
|
|
|
|
party_mp_counts = (
|
|
{p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {}
|
|
)
|
|
|
|
def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]:
|
|
"""Get party (x, y) coordinates from aligned PCA positions for a window."""
|
|
positions_by_window, _ = explorer_data.load_positions(db_path, "annual")
|
|
window_pos = positions_by_window.get(window, {})
|
|
if not window_pos:
|
|
return {}
|
|
|
|
_party_map = explorer_data.load_party_map(db_path)
|
|
|
|
party_coords: Dict[str, List[Tuple[float, float]]] = {}
|
|
for mp_name, (x, y) in window_pos.items():
|
|
party = _party_map.get(
|
|
mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
|
|
)
|
|
if party:
|
|
party_coords.setdefault(party, []).append((x, y))
|
|
|
|
return {
|
|
party: (
|
|
float(np.mean([c[0] for c in coords])),
|
|
float(np.mean([c[1] for c in coords])),
|
|
)
|
|
for party, coords in party_coords.items()
|
|
if coords
|
|
}
|
|
|
|
active_mps = (
|
|
explorer_data.load_active_mps(db_path)
|
|
if svd_window == "current_parliament"
|
|
else None
|
|
)
|
|
aligned_all_scores = explorer_data.get_aligned_party_scores(
|
|
db_path, svd_window, active_mps
|
|
)
|
|
|
|
party_1d_coords: dict = {}
|
|
for party, all_scores in aligned_all_scores.items():
|
|
idx = comp_sel - 1
|
|
if idx < len(all_scores):
|
|
party_1d_coords[party] = (float(all_scores[idx]),)
|
|
|
|
computed_flips: Dict[int, bool] = {}
|
|
try:
|
|
from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT
|
|
|
|
for comp_idx in range(10):
|
|
right_scores = []
|
|
left_scores = []
|
|
for party, scores in aligned_all_scores.items():
|
|
if party in CANONICAL_RIGHT:
|
|
right_scores.append(scores[comp_idx])
|
|
elif party in CANONICAL_LEFT:
|
|
left_scores.append(scores[comp_idx])
|
|
|
|
if right_scores and left_scores:
|
|
right_avg = np.mean(right_scores)
|
|
left_avg = np.mean(left_scores)
|
|
computed_flips[comp_idx + 1] = right_avg < left_avg
|
|
else:
|
|
computed_flips[comp_idx + 1] = False
|
|
except Exception:
|
|
pass
|
|
|
|
theme_with_flip = {
|
|
**theme,
|
|
"flip": computed_flips.get(comp_sel, theme.get("flip", False)),
|
|
}
|
|
|
|
if min_mps > 1 and party_mp_counts:
|
|
valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps}
|
|
party_1d_coords = {
|
|
p: coords for p, coords in party_1d_coords.items() if p in valid_parties
|
|
}
|
|
|
|
if view_mode == "Tijdtraject" and selected_parties_for_trajectory:
|
|
available_windows = explorer_data.get_uniform_dim_windows(db_path)
|
|
year_windows = sorted(
|
|
w
|
|
for w in available_windows
|
|
if w != "current_parliament" and w != _current_year
|
|
)
|
|
has_current = "current_parliament" in available_windows
|
|
all_windows = year_windows + (["current_parliament"] if has_current else [])
|
|
|
|
party_scores_by_window = explorer_data._get_aligned_trajectory_scores(
|
|
db_path, all_windows
|
|
)
|
|
|
|
_render_svd_time_trajectory(
|
|
party_scores_by_window,
|
|
comp_sel,
|
|
theme_with_flip,
|
|
selected_parties_for_trajectory,
|
|
)
|
|
else:
|
|
_render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip)
|
|
|
|
motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
|
|
motion_details: Dict[int, tuple] = {}
|
|
if motion_ids:
|
|
ids_int: List[int] = []
|
|
for mid in motion_ids:
|
|
try:
|
|
ids_int.append(int(mid))
|
|
except Exception:
|
|
logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
|
|
|
|
if ids_int and duckdb is not None:
|
|
con = None
|
|
try:
|
|
placeholders = ", ".join("?" for _ in ids_int)
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
db_rows = con.execute(
|
|
f"SELECT id, title, date, policy_area, url, body_text, voting_results "
|
|
f"FROM motions WHERE id IN ({placeholders})",
|
|
ids_int,
|
|
).fetchall()
|
|
motion_details = {r[0]: r for r in db_rows}
|
|
except Exception:
|
|
logger.exception("Failed to batch-fetch motion details")
|
|
finally:
|
|
if con:
|
|
con.close()
|
|
|
|
pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
|
|
neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
|
|
|
|
flip = theme_with_flip.get("flip", False) if theme_with_flip else False
|
|
pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else ""
|
|
neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else ""
|
|
|
|
if flip:
|
|
left_pole, right_pole = pos_pole, neg_pole
|
|
left_motions, right_motions = pos_motions, neg_motions
|
|
else:
|
|
left_pole, right_pole = neg_pole, pos_pole
|
|
left_motions, right_motions = neg_motions, pos_motions
|
|
|
|
lcol, rcol = st.columns(2)
|
|
|
|
with lcol:
|
|
st.markdown(f"**← {left_pole}**")
|
|
for m in left_motions:
|
|
mid = m.get("motion_id")
|
|
raw_title = m.get("title") or f"Motie #{mid}"
|
|
with st.expander(raw_title):
|
|
row = motion_details.get(int(mid)) if mid is not None else None
|
|
if row:
|
|
try:
|
|
date_str = str(row[2])[:10]
|
|
except Exception:
|
|
date_str = "?"
|
|
st.caption(f"{date_str} | {row[3] or '—'}")
|
|
if row[4] and str(row[4]).startswith("http"):
|
|
st.markdown(f"[Bekijk op Tweede Kamer]({row[4]})")
|
|
if row[5]:
|
|
with st.expander("Toon volledige tekst"):
|
|
st.write(row[5])
|
|
_render_voting_results(row[6])
|
|
else:
|
|
st.caption("_Geen metadata beschikbaar_")
|
|
|
|
with rcol:
|
|
st.markdown(f"**{right_pole} →**")
|
|
for m in right_motions:
|
|
mid = m.get("motion_id")
|
|
raw_title = m.get("title") or f"Motie #{mid}"
|
|
with st.expander(raw_title):
|
|
row = motion_details.get(int(mid)) if mid is not None else None
|
|
if row:
|
|
try:
|
|
date_str = str(row[2])[:10]
|
|
except Exception:
|
|
date_str = "?"
|
|
st.caption(f"{date_str} | {row[3] or '—'}")
|
|
if row[4] and str(row[4]).startswith("http"):
|
|
st.markdown(f"[Bekijk op Tweede Kamer]({row[4]})")
|
|
if row[5]:
|
|
with st.expander("Toon volledige tekst"):
|
|
st.write(row[5])
|
|
_render_voting_results(row[6])
|
|
else:
|
|
st.caption("_Geen metadata beschikbaar_")
|
|
|