|
|
"""Parlement Explorer — Streamlit data analysis app.
|
|
|
|
|
|
Three tabs:
|
|
|
1. Politiek Kompas — 2D scatter of MPs/parties, window slider
|
|
|
2. Partij Trajectories — party centroid lines over time
|
|
|
3. SVD Components — component themes, scree plot, party positions
|
|
|
|
|
|
Run with: streamlit run explorer.py
|
|
|
|
|
|
Import-safe: heavy computation is behind @st.cache_data and only runs at UI time.
|
|
|
All DuckDB connections are read_only=True so the app can run alongside the pipeline.
|
|
|
"""
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
import json
|
|
|
import logging
|
|
|
import os
|
|
|
import re
|
|
|
import traceback
|
|
|
from datetime import datetime
|
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
|
|
try:
|
|
|
import duckdb
|
|
|
|
|
|
_DUCKDB_AVAILABLE = True
|
|
|
except Exception:
|
|
|
duckdb = None
|
|
|
_DUCKDB_AVAILABLE = False
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
|
|
|
from analysis import config
|
|
|
from analysis import explorer_data
|
|
|
from analysis import projections
|
|
|
from analysis import trajectory
|
|
|
|
|
|
# Backwards-compatible re-export used by tests
|
|
|
choose_trajectory_title = trajectory.choose_trajectory_title
|
|
|
|
|
|
try:
|
|
|
import plotly.express as px
|
|
|
import plotly.graph_objects as go
|
|
|
except Exception:
|
|
|
px = None
|
|
|
import types
|
|
|
|
|
|
class _DummyTrace:
|
|
|
def __init__(self, **kwargs):
|
|
|
self.name = kwargs.get("name")
|
|
|
self.x = kwargs.get("x")
|
|
|
self.y = kwargs.get("y")
|
|
|
self.text = kwargs.get("text")
|
|
|
self.customdata = kwargs.get("customdata")
|
|
|
|
|
|
class _DummyFigure:
|
|
|
def __init__(self):
|
|
|
self.data = []
|
|
|
|
|
|
def add_trace(self, trace):
|
|
|
if isinstance(trace, _DummyTrace):
|
|
|
self.data.append(trace)
|
|
|
else:
|
|
|
try:
|
|
|
name = getattr(trace, "name", None)
|
|
|
x = getattr(trace, "x", None)
|
|
|
y = getattr(trace, "y", None)
|
|
|
text = getattr(trace, "text", None)
|
|
|
customdata = getattr(trace, "customdata", None)
|
|
|
except Exception:
|
|
|
name = trace.get("name") if hasattr(trace, "get") else None
|
|
|
x = trace.get("x") if hasattr(trace, "get") else None
|
|
|
y = trace.get("y") if hasattr(trace, "get") else None
|
|
|
text = trace.get("text") if hasattr(trace, "get") else None
|
|
|
customdata = (
|
|
|
trace.get("customdata") if hasattr(trace, "get") else None
|
|
|
)
|
|
|
self.data.append(
|
|
|
_DummyTrace(name=name, x=x, y=y, text=text, customdata=customdata)
|
|
|
)
|
|
|
|
|
|
def add_annotation(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
go = types.SimpleNamespace(
|
|
|
Figure=_DummyFigure, Scatter=lambda **kwargs: _DummyTrace(**kwargs)
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
import streamlit as st
|
|
|
except Exception:
|
|
|
class _DummySt:
|
|
|
def cache_data(self, *args, **kwargs):
|
|
|
def _decorator(func):
|
|
|
return func
|
|
|
return _decorator
|
|
|
|
|
|
def markdown(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def subheader(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def plotly_chart(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def caption(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def text_area(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def json(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def checkbox(self, *args, **kwargs):
|
|
|
return kwargs.get("value", False)
|
|
|
|
|
|
def warning(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def info(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def selectbox(self, *args, **kwargs):
|
|
|
opts = (
|
|
|
kwargs.get("options")
|
|
|
if kwargs.get("options") is not None
|
|
|
else (args[1] if len(args) > 1 else [])
|
|
|
)
|
|
|
return opts[0] if opts else None
|
|
|
|
|
|
def multiselect(self, *args, **kwargs):
|
|
|
opts = (
|
|
|
kwargs.get("options")
|
|
|
if kwargs.get("options") is not None
|
|
|
else (args[1] if len(args) > 1 else [])
|
|
|
)
|
|
|
default = kwargs.get("default")
|
|
|
if default is not None:
|
|
|
return default
|
|
|
return opts[:6] if opts else []
|
|
|
|
|
|
def number_input(self, *args, **kwargs):
|
|
|
return kwargs.get("value") if "value" in kwargs else 1
|
|
|
|
|
|
def slider(self, *args, **kwargs):
|
|
|
return kwargs.get("value") if "value" in kwargs else 0.35
|
|
|
|
|
|
def expander(self, *args, **kwargs):
|
|
|
class _Ctx:
|
|
|
def __enter__(self_inner):
|
|
|
return self_inner
|
|
|
|
|
|
def __exit__(self_inner, exc_type, exc, tb):
|
|
|
return False
|
|
|
return _Ctx()
|
|
|
|
|
|
def columns(self, *args, **kwargs):
|
|
|
class _Col:
|
|
|
def markdown(self, *a, **k):
|
|
|
return None
|
|
|
|
|
|
def metric(self, *a, **k):
|
|
|
return None
|
|
|
|
|
|
def dataframe(self, *a, **k):
|
|
|
return None
|
|
|
n = len(args[0]) if args else 1
|
|
|
return tuple(_Col() for _ in range(n))
|
|
|
|
|
|
st = _DummySt()
|
|
|
|
|
|
# Re-export trajectories diagnostics for backwards compatibility
|
|
|
from analysis.tabs.trajectories import (
|
|
|
_last_diagnostics,
|
|
|
_last_trajectories_diagnostics,
|
|
|
get_debug_trajectories_enabled,
|
|
|
)
|
|
|
|
|
|
|
|
|
def select_trajectory_plot_data(*args, **kwargs):
|
|
|
"""Lazy wrapper around analysis.tabs.trajectories.select_trajectory_plot_data."""
|
|
|
from analysis.tabs.trajectories import (
|
|
|
select_trajectory_plot_data as _impl,
|
|
|
)
|
|
|
|
|
|
return _impl(*args, **kwargs)
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
PARTY_COLOURS: Dict[str, str] = config.PARTY_COLOURS
|
|
|
SVD_THEMES: dict[int, dict[str, str]] = config.SVD_THEMES
|
|
|
KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
|
|
|
CURRENT_PARLIAMENT_PARTIES = config.CURRENT_PARLIAMENT_PARTIES
|
|
|
_PARTY_NORMALIZE = config._PARTY_NORMALIZE
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Cached loaders
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Beschikbare tijdsvensters laden…")
|
|
|
def get_available_windows(db_path: str) -> List[str]:
|
|
|
"""Return sorted list of distinct window_ids from svd_vectors."""
|
|
|
return explorer_data.get_available_windows(db_path)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner=False)
|
|
|
def get_uniform_dim_windows(db_path: str) -> List[str]:
|
|
|
"""Return only windows whose dominant MP-vector dimension is >= 25."""
|
|
|
return explorer_data.get_uniform_dim_windows(db_path)
|
|
|
|
|
|
|
|
|
def _should_swap_axes(axis_def: dict) -> bool:
|
|
|
"""Return True if the Y axis is economic left-right and the X axis is not."""
|
|
|
return projections.should_swap_axes(axis_def)
|
|
|
|
|
|
|
|
|
def _swap_axes(
|
|
|
positions_by_window: dict,
|
|
|
axis_def: dict,
|
|
|
) -> tuple:
|
|
|
"""Swap x and y in all positions and axis metadata."""
|
|
|
return projections.swap_axes(positions_by_window, axis_def)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="2D posities berekenen (kan even duren)…")
|
|
|
def load_positions(
|
|
|
db_path: str, window_size: str = "annual"
|
|
|
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
|
|
|
"""Compute 2D positions per window using PCA on aligned SVD vectors."""
|
|
|
return explorer_data.load_positions(db_path, window_size)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partijkaart laden…")
|
|
|
def load_party_map(db_path: str) -> Dict[str, str]:
|
|
|
"""Return {mp_name: party} mapping, with party names normalised to abbreviations."""
|
|
|
return explorer_data.load_party_map(db_path)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Actieve Kamerleden laden…")
|
|
|
def load_active_mps(db_path: str) -> set:
|
|
|
"""Return the set of mp_name values that are currently seated in parliament."""
|
|
|
return explorer_data.load_active_mps(db_path)
|
|
|
|
|
|
|
|
|
def get_aligned_party_scores(
|
|
|
db_path: str, window: str, active_mps: set | None = None
|
|
|
) -> Dict[str, np.ndarray]:
|
|
|
"""Get party scores for all N components from aligned PCA positions."""
|
|
|
return explorer_data.get_aligned_party_scores(db_path, window, active_mps)
|
|
|
|
|
|
|
|
|
def compute_party_discipline(
|
|
|
db_path: str,
|
|
|
start_date: str,
|
|
|
end_date: str,
|
|
|
) -> pd.DataFrame:
|
|
|
"""Compute per-party voting discipline (Rice index) for roll-call votes in a date range."""
|
|
|
return explorer_data.compute_party_discipline(db_path, start_date, end_date)
|
|
|
|
|
|
|
|
|
def _load_mp_vectors_by_party(db_path: str) -> Dict[str, List[np.ndarray]]:
|
|
|
"""Load individual MP SVD vectors grouped by party for current_parliament."""
|
|
|
return explorer_data.load_mp_vectors_by_party(db_path)
|
|
|
|
|
|
|
|
|
def _load_mp_vectors_by_party_for_window(
|
|
|
db_path: str, window: str
|
|
|
) -> Dict[str, List[np.ndarray]]:
|
|
|
"""Load individual MP SVD vectors grouped by party for a specific window."""
|
|
|
return explorer_data.load_mp_vectors_by_party_for_window(db_path, window)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partijposities op SVD-assen laden…")
|
|
|
def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
|
|
|
"""Return per-party SVD vectors, computed as mean of individual MP vectors."""
|
|
|
try:
|
|
|
return explorer_data.compute_party_axis_scores(
|
|
|
explorer_data.load_mp_vectors_by_party(db_path)
|
|
|
)
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load party axis scores")
|
|
|
return {}
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partijposities voor jaar laden…")
|
|
|
def load_party_axis_scores_for_window(
|
|
|
db_path: str, window: str
|
|
|
) -> Dict[str, List[float]]:
|
|
|
"""Return per-party SVD vectors for a specific window."""
|
|
|
try:
|
|
|
return explorer_data.compute_party_axis_scores(
|
|
|
explorer_data.load_mp_vectors_by_party_for_window(db_path, window)
|
|
|
)
|
|
|
except Exception:
|
|
|
logger.exception(f"Failed to load party axis scores for window {window}")
|
|
|
return {}
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="SVD scores voor alle vensters laden…")
|
|
|
def load_party_scores_all_windows(
|
|
|
db_path: str, windows: List[str]
|
|
|
) -> Dict[str, Dict[str, List[float]]]:
|
|
|
"""Load party SVD scores for all specified windows."""
|
|
|
result: Dict[str, Dict[str, List[float]]] = {}
|
|
|
for window in windows:
|
|
|
if window == "current_parliament":
|
|
|
result[window] = load_party_axis_scores(db_path)
|
|
|
else:
|
|
|
result[window] = load_party_axis_scores_for_window(db_path, window)
|
|
|
return result
|
|
|
|
|
|
|
|
|
def _load_mp_vectors_by_window(db_path: str, window: str) -> Dict[str, np.ndarray]:
|
|
|
"""Load individual MP SVD vectors for a specific window."""
|
|
|
return explorer_data.load_mp_vectors_by_window(db_path, window)
|
|
|
|
|
|
|
|
|
def _get_aligned_trajectory_scores(
|
|
|
db_path: str, windows: List[str], n_components: int = 10
|
|
|
) -> Dict[str, Dict[str, List[float]]]:
|
|
|
"""Get aligned PCA scores for all windows as {window: {party: [scores per component]}}."""
|
|
|
return explorer_data._get_aligned_trajectory_scores(db_path, windows, n_components)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="SVD scores met Procrustes-uitlijning laden…")
|
|
|
def load_party_scores_all_windows_aligned(
|
|
|
db_path: str, windows: List[str]
|
|
|
) -> Dict[str, Dict[str, List[float]]]:
|
|
|
"""Load party SVD scores for all windows with Procrustes alignment."""
|
|
|
from analysis.trajectory import _procrustes_align_windows
|
|
|
|
|
|
raw_window_vecs: Dict[str, Dict[str, np.ndarray]] = {}
|
|
|
party_map = load_party_map(db_path)
|
|
|
|
|
|
for window in windows:
|
|
|
mp_vecs = _load_mp_vectors_by_window(db_path, window)
|
|
|
if mp_vecs:
|
|
|
raw_window_vecs[window] = mp_vecs
|
|
|
|
|
|
aligned_window_vecs = _procrustes_align_windows(raw_window_vecs)
|
|
|
|
|
|
result: Dict[str, Dict[str, List[float]]] = {}
|
|
|
for window in windows:
|
|
|
if window not in aligned_window_vecs:
|
|
|
continue
|
|
|
|
|
|
mp_vecs = aligned_window_vecs[window]
|
|
|
party_vecs: Dict[str, List[np.ndarray]] = {}
|
|
|
|
|
|
for mp_name, vec in mp_vecs.items():
|
|
|
party = party_map.get(mp_name)
|
|
|
if party:
|
|
|
if party not in party_vecs:
|
|
|
party_vecs[party] = []
|
|
|
party_vecs[party].append(vec)
|
|
|
|
|
|
result[window] = {}
|
|
|
for party, vecs in party_vecs.items():
|
|
|
if vecs:
|
|
|
avg_vec = np.mean(vecs, axis=0)
|
|
|
result[window][party] = avg_vec.tolist()
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partij-MP vectoren laden…")
|
|
|
def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]:
|
|
|
"""Return per-party lists of individual MP SVD vectors."""
|
|
|
try:
|
|
|
return explorer_data.load_mp_vectors_by_party(db_path)
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load party MP vectors")
|
|
|
return {}
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Bootstrap CI berekenen…")
|
|
|
def _cached_bootstrap_cis(
|
|
|
party_mp_vectors: Dict[str, List[np.ndarray]],
|
|
|
) -> Dict[str, Dict]:
|
|
|
"""Thin caching wrapper around compute_party_bootstrap_cis."""
|
|
|
from analysis.political_axis import compute_party_bootstrap_cis
|
|
|
|
|
|
return compute_party_bootstrap_cis(party_mp_vectors)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Scree-plot laden…")
|
|
|
def load_scree_data(db_path: str) -> List[float]:
|
|
|
"""Return explained variance ratios (%) for all SVD components, sorted descending."""
|
|
|
try:
|
|
|
from analysis.political_axis import compute_svd_spectrum
|
|
|
|
|
|
return compute_svd_spectrum(db_path)
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load scree data")
|
|
|
return []
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Moties laden…")
|
|
|
def load_motions_df(db_path: str) -> pd.DataFrame:
|
|
|
"""Load the full motions table as a pandas DataFrame (read-only)."""
|
|
|
return explorer_data.load_motions_df(db_path)
|
|
|
|
|
|
|
|
|
def build_svd_components_tab(*args, **kwargs):
|
|
|
"""Build the SVD Components tab."""
|
|
|
from analysis.tabs.components import build_svd_components_tab as _impl
|
|
|
|
|
|
return _impl(*args, **kwargs)
|
|
|
|
|
|
|
|
|
def build_mp_quiz_tab(*args, **kwargs):
|
|
|
"""Build the MP Quiz tab."""
|
|
|
from analysis.tabs.quiz import build_mp_quiz_tab as _impl
|
|
|
|
|
|
return _impl(*args, **kwargs)
|
|
|
|
|
|
|
|
|
def run_app() -> None:
|
|
|
st.set_page_config(
|
|
|
layout="wide",
|
|
|
page_title="Parlement Explorer",
|
|
|
page_icon="🏛️",
|
|
|
)
|
|
|
st.title("🏛️ Parlement Explorer")
|
|
|
|
|
|
st.sidebar.title("Instellingen")
|
|
|
db_path = "data/motions.db"
|
|
|
window_size = "annual"
|
|
|
|
|
|
with st.sidebar.expander("ℹ️ Over", expanded=False):
|
|
|
try:
|
|
|
if _DUCKDB_AVAILABLE:
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
n_motions = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0]
|
|
|
con.close()
|
|
|
st.markdown(
|
|
|
f"**Moties:** {n_motions:,} \n"
|
|
|
f"**Vensters:** per jaar + huidig parlement"
|
|
|
)
|
|
|
else:
|
|
|
st.warning(
|
|
|
"DuckDB niet beschikbaar in deze Python-omgeving; DB diagnostics zijn niet beschikbaar."
|
|
|
)
|
|
|
except Exception as e:
|
|
|
st.warning(f"DB niet bereikbaar: {e}")
|
|
|
|
|
|
tab_labels = [
|
|
|
"🧭 Politiek Kompas",
|
|
|
"📈 Trajectories",
|
|
|
"🔬 SVD Components",
|
|
|
]
|
|
|
|
|
|
if hasattr(st, "tabs") and callable(getattr(st, "tabs")):
|
|
|
tab1, tab2, tab3 = st.tabs(tab_labels)
|
|
|
with tab1:
|
|
|
build_compass_tab(db_path, window_size)
|
|
|
with tab2:
|
|
|
build_trajectories_tab(db_path, window_size)
|
|
|
with tab3:
|
|
|
build_svd_components_tab(db_path)
|
|
|
else:
|
|
|
selection = st.radio("Tab", tab_labels)
|
|
|
if selection == tab_labels[0]:
|
|
|
build_compass_tab(db_path, window_size)
|
|
|
elif selection == tab_labels[1]:
|
|
|
build_trajectories_tab(db_path, window_size)
|
|
|
else:
|
|
|
build_svd_components_tab(db_path)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
logging.basicConfig(
|
|
|
level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
|
|
|
)
|
|
|
run_app()
|
|
|
|