|
|
"""Parlement Explorer — Streamlit data analysis app.
|
|
|
|
|
|
Four tabs:
|
|
|
1. Politiek Kompas — 2D scatter of MPs/parties, window slider
|
|
|
2. Partij Trajectories — party centroid lines over time
|
|
|
3. Motie Zoeken — text search + similarity lookup
|
|
|
4. Motie Browser — sortable table + detail panel
|
|
|
|
|
|
Run with: streamlit run explorer.py
|
|
|
|
|
|
Import-safe: heavy computation is behind @st.cache_data and only runs at UI time.
|
|
|
All DuckDB connections are read_only=True so the app can run alongside the pipeline.
|
|
|
"""
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
import json
|
|
|
import logging
|
|
|
import os
|
|
|
import re
|
|
|
import traceback
|
|
|
from datetime import datetime
|
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
|
|
try:
|
|
|
import duckdb
|
|
|
|
|
|
_DUCKDB_AVAILABLE = True
|
|
|
except Exception:
|
|
|
duckdb = None
|
|
|
_DUCKDB_AVAILABLE = False
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
|
|
|
from analysis import config
|
|
|
from analysis import explorer_data
|
|
|
from analysis import projections
|
|
|
from analysis import trajectory
|
|
|
|
|
|
try:
|
|
|
import plotly.express as px
|
|
|
import plotly.graph_objects as go
|
|
|
except Exception:
|
|
|
# Plotly may be unavailable in lightweight test environments. Provide a tiny
|
|
|
# local fallback that exposes a Figure-like object with `.data` and
|
|
|
# `add_trace()` so unit tests can run without installing plotly.
|
|
|
px = None
|
|
|
import types
|
|
|
|
|
|
class _DummyTrace:
|
|
|
def __init__(self, **kwargs):
|
|
|
# Preserve commonly-used attributes accessed by tests
|
|
|
self.name = kwargs.get("name")
|
|
|
self.x = kwargs.get("x")
|
|
|
self.y = kwargs.get("y")
|
|
|
self.text = kwargs.get("text")
|
|
|
self.customdata = kwargs.get("customdata")
|
|
|
|
|
|
class _DummyFigure:
|
|
|
def __init__(self):
|
|
|
self.data = []
|
|
|
|
|
|
def add_trace(self, trace):
|
|
|
# plotly passes a Scatter object; our tests only inspect `.data`
|
|
|
# elements for `.name` and `.customdata`. Accept both our
|
|
|
# _DummyTrace and dict-like kwargs.
|
|
|
if isinstance(trace, _DummyTrace):
|
|
|
self.data.append(trace)
|
|
|
else:
|
|
|
# Some code may call go.Scatter(...) which returns an object;
|
|
|
# if a mapping is passed here instead, coerce to _DummyTrace.
|
|
|
try:
|
|
|
# attempt attribute access
|
|
|
name = getattr(trace, "name", None)
|
|
|
x = getattr(trace, "x", None)
|
|
|
y = getattr(trace, "y", None)
|
|
|
text = getattr(trace, "text", None)
|
|
|
customdata = getattr(trace, "customdata", None)
|
|
|
except Exception:
|
|
|
# Last resort: treat as mapping
|
|
|
name = trace.get("name") if hasattr(trace, "get") else None
|
|
|
x = trace.get("x") if hasattr(trace, "get") else None
|
|
|
y = trace.get("y") if hasattr(trace, "get") else None
|
|
|
text = trace.get("text") if hasattr(trace, "get") else None
|
|
|
customdata = (
|
|
|
trace.get("customdata") if hasattr(trace, "get") else None
|
|
|
)
|
|
|
self.data.append(
|
|
|
_DummyTrace(name=name, x=x, y=y, text=text, customdata=customdata)
|
|
|
)
|
|
|
|
|
|
def add_annotation(self, *args, **kwargs):
|
|
|
# noop for tests that don't import full plotly
|
|
|
return None
|
|
|
|
|
|
go = types.SimpleNamespace(
|
|
|
Figure=_DummyFigure, Scatter=lambda **kwargs: _DummyTrace(**kwargs)
|
|
|
)
|
|
|
try:
|
|
|
import streamlit as st
|
|
|
except Exception:
|
|
|
# Minimal dummy replacement for Streamlit used during tests / import-time.
|
|
|
# We only need a tiny subset so unit tests can import explorer without
|
|
|
# installing streamlit. All functions here are no-ops or simple fallbacks.
|
|
|
class _DummySt:
|
|
|
def cache_data(self, *args, **kwargs):
|
|
|
def _decorator(func):
|
|
|
return func
|
|
|
|
|
|
return _decorator
|
|
|
|
|
|
def markdown(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def subheader(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def plotly_chart(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def caption(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def text_area(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def json(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def checkbox(self, *args, **kwargs):
|
|
|
# default to False unless value provided
|
|
|
return kwargs.get("value", False)
|
|
|
|
|
|
def warning(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def info(self, *args, **kwargs):
|
|
|
return None
|
|
|
|
|
|
def selectbox(self, *args, **kwargs):
|
|
|
# return first option if options provided
|
|
|
opts = (
|
|
|
kwargs.get("options")
|
|
|
if kwargs.get("options") is not None
|
|
|
else (args[1] if len(args) > 1 else [])
|
|
|
)
|
|
|
return opts[0] if opts else None
|
|
|
|
|
|
def multiselect(self, *args, **kwargs):
|
|
|
opts = (
|
|
|
kwargs.get("options")
|
|
|
if kwargs.get("options") is not None
|
|
|
else (args[1] if len(args) > 1 else [])
|
|
|
)
|
|
|
default = kwargs.get("default")
|
|
|
if default is not None:
|
|
|
return default
|
|
|
return opts[:6] if opts else []
|
|
|
|
|
|
def number_input(self, *args, **kwargs):
|
|
|
return kwargs.get("value") if "value" in kwargs else 1
|
|
|
|
|
|
def slider(self, *args, **kwargs):
|
|
|
return kwargs.get("value") if "value" in kwargs else 0.35
|
|
|
|
|
|
def expander(self, *args, **kwargs):
|
|
|
class _Ctx:
|
|
|
def __enter__(self_inner):
|
|
|
return self_inner
|
|
|
|
|
|
def __exit__(self_inner, exc_type, exc, tb):
|
|
|
return False
|
|
|
|
|
|
return _Ctx()
|
|
|
|
|
|
def columns(self, *args, **kwargs):
|
|
|
# Return a tuple of simple objects with the methods used in the UI
|
|
|
class _Col:
|
|
|
def markdown(self, *a, **k):
|
|
|
return None
|
|
|
|
|
|
def metric(self, *a, **k):
|
|
|
return None
|
|
|
|
|
|
def dataframe(self, *a, **k):
|
|
|
return None
|
|
|
|
|
|
n = len(args[0]) if args else 1
|
|
|
return tuple(_Col() for _ in range(n))
|
|
|
|
|
|
st = _DummySt()
|
|
|
# Temporary diagnostics for Trajectories plotting — set by instrumentation when
|
|
|
# EXPLORER_DEBUG_TRAJECTORIES is enabled. This is intended to be small, opt-in and
|
|
|
# reversible once root cause is found.
|
|
|
_last_trajectories_diagnostics: dict = {}
|
|
|
# Backwards/alternate name used by instrumentation: keep a second module-level
|
|
|
# reference so callers/tests can look for either name.
|
|
|
_last_diagnostics = _last_trajectories_diagnostics
|
|
|
|
|
|
|
|
|
def get_debug_trajectories_enabled() -> bool:
|
|
|
"""Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode.
|
|
|
|
|
|
Accepts '1', 'true', 'True'. Used as default for a per-tab checkbox.
|
|
|
"""
|
|
|
v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES")
|
|
|
return str(v) in ("1", "true", "True")
|
|
|
|
|
|
|
|
|
from explorer_helpers import (
|
|
|
inspect_positions_for_issues,
|
|
|
compute_party_centroids,
|
|
|
)
|
|
|
|
|
|
|
|
|
def select_trajectory_plot_data(
|
|
|
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
|
|
|
party_map: Dict[str, str],
|
|
|
windows: List[str],
|
|
|
selected_parties: List[str],
|
|
|
smooth_alpha: float = 0.35,
|
|
|
mp_fallback_count: Optional[int] = None,
|
|
|
) -> Tuple[go.Figure, int, Optional[str]]:
|
|
|
"""Return (fig, trace_count, banner_text).
|
|
|
|
|
|
Helper used by build_trajectories_tab. Does not call Streamlit.
|
|
|
"""
|
|
|
# Use env var default if not provided
|
|
|
if mp_fallback_count is None:
|
|
|
try:
|
|
|
mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20"))
|
|
|
except Exception:
|
|
|
mp_fallback_count = 20
|
|
|
|
|
|
# Compute per-party centroids aligned to windows
|
|
|
party_centroids, meta = compute_party_centroids(
|
|
|
positions_by_window, party_map, windows
|
|
|
)
|
|
|
|
|
|
# Use inspector to collect diagnostics (import-safe, pure helper). Keep this
|
|
|
# call local to the helper to ensure the inspector is exercised and the
|
|
|
# diagnostics are available for logging/debugging. Do not call Streamlit
|
|
|
# from here so the function remains import-safe for tests.
|
|
|
try:
|
|
|
inspector_summary = inspect_positions_for_issues(positions_by_window, party_map)
|
|
|
except Exception:
|
|
|
# Capture traceback diagnostics so callers (and tests) can inspect what went wrong.
|
|
|
tb = traceback.format_exc()
|
|
|
inspector_summary = {}
|
|
|
try:
|
|
|
# Attach diagnostics to the helper function for callers that want to inspect
|
|
|
# the last error directly on the function object.
|
|
|
select_trajectory_plot_data._last_diagnostics = {
|
|
|
"stage": "inspector_exception",
|
|
|
"exception": tb,
|
|
|
}
|
|
|
except Exception:
|
|
|
# best-effort only
|
|
|
pass
|
|
|
try:
|
|
|
# Also update the module-level trajectories diagnostics so the UI can show
|
|
|
# a compact summary when debugging is enabled.
|
|
|
_last_trajectories_diagnostics.update(
|
|
|
{"stage": "inspector_exception", "exception": tb}
|
|
|
)
|
|
|
except Exception:
|
|
|
pass
|
|
|
logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary)
|
|
|
|
|
|
# Determine which parties have at least one non-nan centroid
|
|
|
plottable_parties = []
|
|
|
for p, vals in party_centroids.items():
|
|
|
has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals)
|
|
|
if has_valid:
|
|
|
plottable_parties.append(p)
|
|
|
|
|
|
# DEBUG: Show plottable_parties status (use logger.debug instead of print)
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] plottable_parties: %d parties, sample=%s",
|
|
|
len(plottable_parties),
|
|
|
(plottable_parties[:5] if plottable_parties else "empty"),
|
|
|
)
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] party_centroids keys: %s",
|
|
|
list(party_centroids.keys())[:10],
|
|
|
)
|
|
|
if party_centroids:
|
|
|
sample_party = list(party_centroids.keys())[0]
|
|
|
sample_vals = party_centroids[sample_party]
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] Sample party '%s' centroids: %s...",
|
|
|
sample_party,
|
|
|
sample_vals[:3],
|
|
|
)
|
|
|
|
|
|
fig = go.Figure()
|
|
|
trace_count = 0
|
|
|
banner_text: Optional[str] = None
|
|
|
|
|
|
def _ema_smooth(values: List[float], alpha: float) -> List[float]:
|
|
|
if not values or alpha >= 1.0:
|
|
|
return values
|
|
|
smoothed: List[float] = []
|
|
|
prev = None
|
|
|
for v in values:
|
|
|
if v is None or (isinstance(v, float) and np.isnan(v)):
|
|
|
smoothed.append(float(np.nan))
|
|
|
continue
|
|
|
v = float(v)
|
|
|
if prev is None:
|
|
|
prev = v
|
|
|
else:
|
|
|
prev = alpha * v + (1 - alpha) * prev
|
|
|
smoothed.append(float(prev))
|
|
|
return smoothed
|
|
|
|
|
|
# If no plottable parties, fallback to MP trajectories
|
|
|
if not plottable_parties:
|
|
|
# Build mp_positions across windows
|
|
|
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
|
|
|
for wid in windows:
|
|
|
pos = positions_by_window.get(wid, {})
|
|
|
for mp_name, xy in pos.items():
|
|
|
try:
|
|
|
x, y = float(xy[0]), float(xy[1])
|
|
|
except Exception:
|
|
|
continue
|
|
|
mp_positions.setdefault(mp_name, {})[wid] = (x, y)
|
|
|
|
|
|
# Rank MPs by activity (number of windows with positions)
|
|
|
mp_activity = sorted(
|
|
|
[(mp, len(wdict)) for mp, wdict in mp_positions.items()],
|
|
|
key=lambda t: t[1],
|
|
|
reverse=True,
|
|
|
)
|
|
|
top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]]
|
|
|
|
|
|
for mp in top_mps:
|
|
|
wids_sorted = sorted(mp_positions.get(mp, {}).keys())
|
|
|
if not wids_sorted:
|
|
|
continue
|
|
|
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
|
|
|
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
|
|
|
xs = _ema_smooth(xs_raw, smooth_alpha)
|
|
|
ys = _ema_smooth(ys_raw, smooth_alpha)
|
|
|
custom_raw = [
|
|
|
(
|
|
|
float(rx) if rx is not None else float(np.nan),
|
|
|
float(ry) if ry is not None else float(np.nan),
|
|
|
)
|
|
|
for rx, ry in zip(xs_raw, ys_raw)
|
|
|
]
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=xs,
|
|
|
y=ys,
|
|
|
mode="lines+markers",
|
|
|
name=mp,
|
|
|
text=wids_sorted,
|
|
|
customdata=custom_raw,
|
|
|
line=dict(color="#888888", shape="spline", smoothing=1.3),
|
|
|
marker=dict(color="#888888", size=6),
|
|
|
)
|
|
|
)
|
|
|
trace_count += 1
|
|
|
|
|
|
banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] Fallback to MP trajectories: trace_count=%d, top_mps=%d",
|
|
|
trace_count,
|
|
|
len(top_mps),
|
|
|
)
|
|
|
return fig, trace_count, banner_text
|
|
|
|
|
|
# Otherwise plot party centroids for selected parties intersecting plottable
|
|
|
to_plot = [p for p in selected_parties if p in plottable_parties]
|
|
|
# If none selected, default to all plottable
|
|
|
if not to_plot:
|
|
|
to_plot = plottable_parties
|
|
|
|
|
|
for party in to_plot:
|
|
|
vals = party_centroids.get(party, [])
|
|
|
if not vals:
|
|
|
continue
|
|
|
xs_raw = [v[0] for v in vals]
|
|
|
ys_raw = [v[1] for v in vals]
|
|
|
xs = _ema_smooth(xs_raw, smooth_alpha)
|
|
|
ys = _ema_smooth(ys_raw, smooth_alpha)
|
|
|
# Ensure customdata preserves NaNs
|
|
|
custom_raw = [
|
|
|
(
|
|
|
float(x) if (x is not None and not np.isnan(x)) else float(np.nan),
|
|
|
float(y) if (y is not None and not np.isnan(y)) else float(np.nan),
|
|
|
)
|
|
|
for x, y in zip(xs_raw, ys_raw)
|
|
|
]
|
|
|
colour = PARTY_COLOURS.get(party, "#9E9E9E")
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=xs,
|
|
|
y=ys,
|
|
|
mode="lines+markers",
|
|
|
name=party,
|
|
|
text=windows,
|
|
|
customdata=custom_raw,
|
|
|
line=dict(color=colour, shape="spline", smoothing=1.3),
|
|
|
marker=dict(color=colour, size=8),
|
|
|
)
|
|
|
)
|
|
|
trace_count += 1
|
|
|
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] Final trace_count=%d, plottable_parties=%d, to_plot=%s",
|
|
|
trace_count,
|
|
|
len(plottable_parties),
|
|
|
(len(to_plot) if "to_plot" in dir() else "N/A"),
|
|
|
)
|
|
|
return fig, trace_count, None
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Party colour palette (consistent across tabs)
|
|
|
PARTY_COLOURS: Dict[str, str] = config.PARTY_COLOURS
|
|
|
|
|
|
SVD_THEMES: dict[int, dict[str, str]] = config.SVD_THEMES
|
|
|
|
|
|
KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
|
|
|
|
|
|
CURRENT_PARLIAMENT_PARTIES = config.CURRENT_PARLIAMENT_PARTIES
|
|
|
|
|
|
_PARTY_NORMALIZE = config._PARTY_NORMALIZE
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Cached loaders
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Beschikbare tijdsvensters laden…")
|
|
|
def get_available_windows(db_path: str) -> List[str]:
|
|
|
"""Return sorted list of distinct window_ids from svd_vectors."""
|
|
|
return explorer_data.get_available_windows(db_path)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner=False)
|
|
|
def get_uniform_dim_windows(db_path: str) -> List[str]:
|
|
|
"""Return only windows whose dominant MP-vector dimension is >= 25.
|
|
|
|
|
|
Some windows contain a mix of vector lengths due to multiple pipeline runs
|
|
|
(e.g. 2016 has both dim=1 and dim=50 rows). We find the most common dimension
|
|
|
per window and include only windows where that dominant dim >= 25.
|
|
|
Windows with too few dim-25+ entities (< 10) are also excluded to avoid
|
|
|
degenerate PCA inputs.
|
|
|
"""
|
|
|
return explorer_data.get_uniform_dim_windows(db_path)
|
|
|
|
|
|
|
|
|
def _should_swap_axes(axis_def: dict) -> bool:
|
|
|
"""Return True if the Y axis is economic left-right and the X axis is not."""
|
|
|
return projections.should_swap_axes(axis_def)
|
|
|
|
|
|
|
|
|
def _swap_axes(
|
|
|
positions_by_window: dict,
|
|
|
axis_def: dict,
|
|
|
) -> tuple:
|
|
|
"""Swap x and y in all positions and axis metadata."""
|
|
|
return projections.swap_axes(positions_by_window, axis_def)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="2D posities berekenen (kan even duren)…")
|
|
|
def load_positions(
|
|
|
db_path: str, window_size: str = "annual"
|
|
|
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
|
|
|
"""Compute 2D positions per window using PCA on aligned SVD vectors.
|
|
|
|
|
|
Returns:
|
|
|
positions_by_window: {window_id: {entity_name: (x, y)}}
|
|
|
axis_def: dict with x_axis, y_axis, method keys
|
|
|
"""
|
|
|
from analysis.political_axis import compute_2d_axes
|
|
|
|
|
|
# Use only annual windows (quarterly windows are excluded by get_uniform_dim_windows).
|
|
|
all_available = get_uniform_dim_windows(db_path)
|
|
|
|
|
|
if not all_available:
|
|
|
return {}, {}
|
|
|
|
|
|
positions_by_window, axis_def = compute_2d_axes(
|
|
|
db_path,
|
|
|
window_ids=all_available,
|
|
|
method="pca",
|
|
|
pca_residual=True,
|
|
|
normalize_vectors=True,
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
from analysis.axis_classifier import classify_axes
|
|
|
|
|
|
axis_def = classify_axes(positions_by_window, axis_def, db_path)
|
|
|
except Exception:
|
|
|
import logging
|
|
|
|
|
|
logging.getLogger(__name__).exception(
|
|
|
"classify_axes failed; using generic axis labels"
|
|
|
)
|
|
|
|
|
|
# Axis orientation is guaranteed by compute_2d_axes via canonical party anchors
|
|
|
# (Procrustes alignment + sign-fixing). We do NOT forcibly override axis labels
|
|
|
# here so the classifier output (if available) can be surfaced conditionally in
|
|
|
# the UI based on per-window confidence. Label selection is performed at render
|
|
|
# time in the tabs so we can show fallback labels while still surfacing the
|
|
|
# classifier interpretation and confidence when informative.
|
|
|
|
|
|
# Filter displayed windows by window_size AFTER PCA computation.
|
|
|
if window_size == "annual":
|
|
|
annual_keys = set(w for w in all_available if "-Q" not in w)
|
|
|
positions_by_window = {
|
|
|
w: v for w, v in positions_by_window.items() if w in annual_keys
|
|
|
}
|
|
|
|
|
|
return positions_by_window, axis_def
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partijkaart laden…")
|
|
|
def load_party_map(db_path: str) -> Dict[str, str]:
|
|
|
"""Return {mp_name: party} mapping, with party names normalised to abbreviations."""
|
|
|
return explorer_data.load_party_map(db_path)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Actieve Kamerleden laden…")
|
|
|
def load_active_mps(db_path: str) -> set:
|
|
|
"""Return the set of mp_name values that are currently seated in parliament.
|
|
|
|
|
|
An MP is considered active if their mp_metadata row has tot_en_met IS NULL,
|
|
|
meaning they have no recorded end date for their current seat.
|
|
|
"""
|
|
|
return explorer_data.load_active_mps(db_path)
|
|
|
|
|
|
|
|
|
def compute_party_discipline(
|
|
|
db_path: str,
|
|
|
start_date: str,
|
|
|
end_date: str,
|
|
|
) -> pd.DataFrame:
|
|
|
"""Compute per-party voting discipline (Rice index) for roll-call votes in a date range.
|
|
|
|
|
|
Only individual MP vote rows are used (mp_name LIKE '%,%').
|
|
|
Returns a DataFrame with columns [party, n_motions, discipline] sorted by discipline ascending.
|
|
|
Returns an empty DataFrame if fewer than 1 qualifying motion exists or on any DB error.
|
|
|
|
|
|
Rice index per motion per party = fraction of party MPs voting with the party majority.
|
|
|
The per-party score is the average Rice index across all motions in the date range.
|
|
|
Only 'voor' and 'tegen' votes are counted; absent and abstaining MPs are excluded from the
|
|
|
Rice index calculation.
|
|
|
"""
|
|
|
return trajectory.compute_party_discipline(db_path, start_date, end_date)
|
|
|
|
|
|
|
|
|
def _load_mp_vectors_by_party(db_path: str) -> Dict[str, List[np.ndarray]]:
|
|
|
"""Load individual MP SVD vectors grouped by party for current_parliament."""
|
|
|
return explorer_data.load_mp_vectors_by_party(db_path)
|
|
|
|
|
|
|
|
|
def _load_mp_vectors_by_party_for_window(
|
|
|
db_path: str, window: str
|
|
|
) -> Dict[str, List[np.ndarray]]:
|
|
|
"""Load individual MP SVD vectors grouped by party for a specific window."""
|
|
|
return explorer_data.load_mp_vectors_by_party_for_window(db_path, window)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partijposities op SVD-assen laden…")
|
|
|
def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
|
|
|
"""Return per-party SVD vectors, computed as mean of individual MP vectors."""
|
|
|
try:
|
|
|
return explorer_data.compute_party_axis_scores(
|
|
|
explorer_data.load_mp_vectors_by_party(db_path)
|
|
|
)
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load party axis scores")
|
|
|
return {}
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partijposities voor jaar laden…")
|
|
|
def load_party_axis_scores_for_window(
|
|
|
db_path: str, window: str
|
|
|
) -> Dict[str, List[float]]:
|
|
|
"""Return per-party SVD vectors for a specific window."""
|
|
|
try:
|
|
|
return explorer_data.compute_party_axis_scores(
|
|
|
explorer_data.load_mp_vectors_by_party_for_window(db_path, window)
|
|
|
)
|
|
|
except Exception:
|
|
|
logger.exception(f"Failed to load party axis scores for window {window}")
|
|
|
return {}
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="SVD scores voor alle vensters laden…")
|
|
|
def load_party_scores_all_windows(
|
|
|
db_path: str, windows: List[str]
|
|
|
) -> Dict[str, Dict[str, List[float]]]:
|
|
|
"""Load party SVD scores for all specified windows.
|
|
|
|
|
|
Args:
|
|
|
db_path: Path to DuckDB database
|
|
|
windows: List of window IDs to load
|
|
|
|
|
|
Returns:
|
|
|
{window_id: {party_name: [float * k]}} — scores per party per window
|
|
|
"""
|
|
|
result: Dict[str, Dict[str, List[float]]] = {}
|
|
|
for window in windows:
|
|
|
if window == "current_parliament":
|
|
|
result[window] = load_party_axis_scores(db_path)
|
|
|
else:
|
|
|
result[window] = load_party_axis_scores_for_window(db_path, window)
|
|
|
return result
|
|
|
|
|
|
|
|
|
def _load_mp_vectors_by_window(db_path: str, window: str) -> Dict[str, np.ndarray]:
|
|
|
"""Load individual MP SVD vectors for a specific window."""
|
|
|
return explorer_data.load_mp_vectors_by_window(db_path, window)
|
|
|
|
|
|
|
|
|
def _get_aligned_trajectory_scores(
|
|
|
db_path: str, windows: List[str], n_components: int = 10
|
|
|
) -> Dict[str, Dict[str, List[float]]]:
|
|
|
"""Get aligned PCA scores for all windows as {window: {party: [scores per component]}}.
|
|
|
|
|
|
Uses compute_nd_axes to get PCA-projected, flip-corrected scores across all windows,
|
|
|
ensuring consistency with the single-window SVD components view.
|
|
|
"""
|
|
|
from analysis.political_axis import compute_nd_axes
|
|
|
|
|
|
# Get aligned scores for all windows via PCA
|
|
|
scores_by_window, _ = compute_nd_axes(db_path, n_components=n_components)
|
|
|
if not scores_by_window:
|
|
|
return {}
|
|
|
|
|
|
# Load party map to convert MP names to parties
|
|
|
party_map = load_party_map(db_path)
|
|
|
|
|
|
# Aggregate MP scores to party centroids per window
|
|
|
result: Dict[str, Dict[str, List[float]]] = {}
|
|
|
for window in windows:
|
|
|
window_scores = scores_by_window.get(window, {})
|
|
|
if not window_scores:
|
|
|
continue
|
|
|
|
|
|
# Aggregate MP scores to party averages
|
|
|
party_vecs: Dict[str, List[np.ndarray]] = {}
|
|
|
for mp_name, scores in window_scores.items():
|
|
|
party = party_map.get(
|
|
|
mp_name, party_map.get(mp_name.split("(")[0].strip(), None)
|
|
|
)
|
|
|
if party:
|
|
|
party_vecs.setdefault(party, []).append(scores[:n_components])
|
|
|
|
|
|
# Compute mean scores per party
|
|
|
result[window] = {
|
|
|
party: np.mean(np.vstack(score_list), axis=0).tolist()
|
|
|
for party, score_list in party_vecs.items()
|
|
|
if score_list
|
|
|
}
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="SVD scores met Procrustes-uitlijning laden…")
|
|
|
def load_party_scores_all_windows_aligned(
|
|
|
db_path: str, windows: List[str]
|
|
|
) -> Dict[str, Dict[str, List[float]]]:
|
|
|
"""Load party SVD scores for all windows with Procrustes alignment.
|
|
|
|
|
|
This ensures consistent orientation across years by aligning SVD vectors
|
|
|
using Procrustes rotation, similar to how components 1-2 are aligned.
|
|
|
|
|
|
Args:
|
|
|
db_path: Path to DuckDB database
|
|
|
windows: List of window IDs to load
|
|
|
|
|
|
Returns:
|
|
|
{window_id: {party_name: [float * k]}} — aligned scores per party per window
|
|
|
"""
|
|
|
from analysis.trajectory import _procrustes_align_windows
|
|
|
|
|
|
# Load raw MP vectors for each window
|
|
|
raw_window_vecs: Dict[str, Dict[str, np.ndarray]] = {}
|
|
|
party_map = load_party_map(db_path)
|
|
|
|
|
|
for window in windows:
|
|
|
mp_vecs = _load_mp_vectors_by_window(db_path, window)
|
|
|
if mp_vecs:
|
|
|
raw_window_vecs[window] = mp_vecs
|
|
|
|
|
|
# Apply Procrustes alignment
|
|
|
aligned_window_vecs = _procrustes_align_windows(raw_window_vecs)
|
|
|
|
|
|
# Convert MP vectors to party averages
|
|
|
result: Dict[str, Dict[str, List[float]]] = {}
|
|
|
for window in windows:
|
|
|
if window not in aligned_window_vecs:
|
|
|
continue
|
|
|
|
|
|
mp_vecs = aligned_window_vecs[window]
|
|
|
party_vecs: Dict[str, List[np.ndarray]] = {}
|
|
|
|
|
|
for mp_name, vec in mp_vecs.items():
|
|
|
party = party_map.get(mp_name)
|
|
|
if party:
|
|
|
if party not in party_vecs:
|
|
|
party_vecs[party] = []
|
|
|
party_vecs[party].append(vec)
|
|
|
|
|
|
# Average per party
|
|
|
result[window] = {}
|
|
|
for party, vecs in party_vecs.items():
|
|
|
if vecs:
|
|
|
avg_vec = np.mean(vecs, axis=0)
|
|
|
result[window][party] = avg_vec.tolist()
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partij-MP vectoren laden…")
|
|
|
def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]:
|
|
|
"""Return per-party lists of individual MP SVD vectors."""
|
|
|
try:
|
|
|
return explorer_data.load_mp_vectors_by_party(db_path)
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load party MP vectors")
|
|
|
return {}
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Bootstrap CI berekenen…")
|
|
|
def _cached_bootstrap_cis(
|
|
|
party_mp_vectors: Dict[str, List[np.ndarray]],
|
|
|
) -> Dict[str, Dict]:
|
|
|
"""Thin caching wrapper around compute_party_bootstrap_cis."""
|
|
|
from analysis.political_axis import compute_party_bootstrap_cis
|
|
|
|
|
|
return compute_party_bootstrap_cis(party_mp_vectors)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Scree-plot laden…")
|
|
|
def load_scree_data(db_path: str) -> List[float]:
|
|
|
"""Return explained variance ratios (%) for all SVD components, sorted descending.
|
|
|
|
|
|
Uses the same Procrustes-aligned multi-window matrix as the compass axes so the
|
|
|
scree plot is consistent with what the compass actually uses.
|
|
|
"""
|
|
|
try:
|
|
|
from analysis.political_axis import compute_svd_spectrum
|
|
|
|
|
|
return compute_svd_spectrum(db_path)
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load scree data")
|
|
|
return []
|
|
|
|
|
|
|
|
|
def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
|
|
|
"""Render a scree plot showing relative SVD component importance.
|
|
|
|
|
|
Highlighted bars for the top-2 components (used in the compass); muted bars
|
|
|
for the rest. A cumulative-variance dashed line on the same y-axis helps
|
|
|
spot the elbow. A 50 % cumulative threshold line is drawn for reference.
|
|
|
|
|
|
Args:
|
|
|
importances: List of importance values sorted descending (from load_scree_data).
|
|
|
n_show: How many components to display (default: first 15).
|
|
|
"""
|
|
|
if not importances:
|
|
|
return
|
|
|
# importances are already EVR percentages summing to ~100 over all components.
|
|
|
# Slice to n_show for display; cumulative line shows how much variance is covered.
|
|
|
data = list(importances[:n_show])
|
|
|
ranks = list(range(1, len(data) + 1))
|
|
|
|
|
|
# Cumulative variance for the dashed overlay line
|
|
|
cumsum = []
|
|
|
running = 0.0
|
|
|
for v in data:
|
|
|
running += v
|
|
|
cumsum.append(running)
|
|
|
|
|
|
# Colour: first 2 bars highlighted (compass axes), rest muted
|
|
|
n_highlight = 2
|
|
|
bar_colours = [
|
|
|
"#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data))
|
|
|
]
|
|
|
|
|
|
fig = go.Figure()
|
|
|
|
|
|
# Bars
|
|
|
fig.add_trace(
|
|
|
go.Bar(
|
|
|
x=ranks,
|
|
|
y=data,
|
|
|
marker_color=bar_colours,
|
|
|
hovertemplate="As %{x}<br><b>%{y:.1f}%</b> verklaarde variantie<extra></extra>",
|
|
|
showlegend=False,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
# Cumulative variance line (dashed, warm amber)
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=ranks,
|
|
|
y=cumsum,
|
|
|
mode="lines+markers",
|
|
|
line={"color": "#F57C00", "width": 2, "dash": "dot"},
|
|
|
marker={"size": 5, "color": "#F57C00"},
|
|
|
hovertemplate="As %{x}<br>Cumulatief: <b>%{y:.1f}%</b><extra></extra>",
|
|
|
name="Cumulatief",
|
|
|
showlegend=True,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
# 50 % reference line
|
|
|
fig.add_hline(
|
|
|
y=50,
|
|
|
line_dash="dash",
|
|
|
line_color="#BDBDBD",
|
|
|
line_width=1,
|
|
|
annotation_text="50%",
|
|
|
annotation_position="right",
|
|
|
annotation_font_color="#9E9E9E",
|
|
|
annotation_font_size=11,
|
|
|
)
|
|
|
|
|
|
# Annotations on the top-2 bars showing their % value
|
|
|
for i in range(min(n_highlight, len(data))):
|
|
|
fig.add_annotation(
|
|
|
x=ranks[i],
|
|
|
y=data[i] + 0.3,
|
|
|
text=f"{data[i]:.1f}%",
|
|
|
showarrow=False,
|
|
|
font={"size": 11, "color": "#1565C0"},
|
|
|
yanchor="bottom",
|
|
|
)
|
|
|
|
|
|
fig.update_layout(
|
|
|
height=280,
|
|
|
margin={"l": 10, "r": 50, "t": 30, "b": 40},
|
|
|
title={
|
|
|
"text": "Belang per SVD-as",
|
|
|
"font": {"size": 13, "color": "#555555"},
|
|
|
"x": 0.02,
|
|
|
"xanchor": "left",
|
|
|
},
|
|
|
legend={
|
|
|
"orientation": "h",
|
|
|
"x": 0.5,
|
|
|
"xanchor": "center",
|
|
|
"y": 1.08,
|
|
|
"font": {"size": 11},
|
|
|
},
|
|
|
xaxis={
|
|
|
"title": {"text": "As (rang)", "font": {"size": 11}},
|
|
|
"tickmode": "linear",
|
|
|
"tick0": 1,
|
|
|
"dtick": 1,
|
|
|
"showline": False,
|
|
|
"showgrid": False,
|
|
|
},
|
|
|
yaxis={
|
|
|
"title": {"text": "% van totale variantie", "font": {"size": 11}},
|
|
|
"showline": False,
|
|
|
"showgrid": True,
|
|
|
"gridcolor": "#eeeeee",
|
|
|
"ticksuffix": "%",
|
|
|
"range": [0, max(cumsum) * 1.08],
|
|
|
},
|
|
|
plot_bgcolor="rgba(0,0,0,0)",
|
|
|
paper_bgcolor="rgba(0,0,0,0)",
|
|
|
bargap=0.25,
|
|
|
)
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
def _build_party_axis_figure(
|
|
|
party_coords: Dict[str, Tuple[float, float]],
|
|
|
comp_sel: int,
|
|
|
theme: dict,
|
|
|
bootstrap_data: Optional[Dict[str, Dict]] = None,
|
|
|
) -> Optional[go.Figure]:
|
|
|
"""Build a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
|
|
|
|
|
|
Accepts explicit per-party 2D coordinates (x,y) and uses the component selection to
|
|
|
pick the value (comp_sel==1 -> x, comp_sel==2 -> y). This makes the API explicit and
|
|
|
avoids indexing into long SVD vectors.
|
|
|
|
|
|
Returns go.Figure or None if no data available.
|
|
|
"""
|
|
|
if not party_coords:
|
|
|
return None
|
|
|
|
|
|
if comp_sel not in (1, 2):
|
|
|
raise ValueError(
|
|
|
"_build_party_axis_figure only supports comp_sel 1 or 2 when using explicit coords"
|
|
|
)
|
|
|
|
|
|
axis_idx = comp_sel - 1
|
|
|
flip = theme.get("flip", False)
|
|
|
|
|
|
parties = []
|
|
|
scores = []
|
|
|
colours = []
|
|
|
|
|
|
# Support two shapes for party_coords:
|
|
|
# - explicit 2D coords: (x, y)
|
|
|
# - full SVD vectors (len>2) where we should pick the axis_idx element
|
|
|
for party, val in party_coords.items():
|
|
|
try:
|
|
|
# explicit (x, y)
|
|
|
if hasattr(val, "__len__") and len(val) == 2:
|
|
|
x, y = val
|
|
|
score = float(x if axis_idx == 0 else y)
|
|
|
else:
|
|
|
# treat as sequence/array-like of full SVD vector
|
|
|
score = float(val[axis_idx])
|
|
|
|
|
|
if flip:
|
|
|
score = -score
|
|
|
except Exception:
|
|
|
# skip malformed entries silently
|
|
|
continue
|
|
|
|
|
|
parties.append(party)
|
|
|
scores.append(score)
|
|
|
colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))
|
|
|
|
|
|
if not scores:
|
|
|
return None
|
|
|
|
|
|
# Build hover text: include N when bootstrap data available
|
|
|
hover = []
|
|
|
symbols = []
|
|
|
if bootstrap_data:
|
|
|
for p, s in zip(parties, scores):
|
|
|
bd = bootstrap_data.get(p)
|
|
|
if bd:
|
|
|
n_mps = bd.get("n_mps", "?")
|
|
|
ci_low = None
|
|
|
ci_high = None
|
|
|
try:
|
|
|
ci_low = float(bd["ci_lower"][axis_idx])
|
|
|
ci_high = float(bd["ci_upper"][axis_idx])
|
|
|
except Exception:
|
|
|
pass
|
|
|
if ci_low is not None and ci_high is not None:
|
|
|
hover.append(
|
|
|
f"{p}: {s:.3f} (N={n_mps}, 95%-BI: [{ci_low:.3f}, {ci_high:.3f}])"
|
|
|
)
|
|
|
else:
|
|
|
hover.append(f"{p}: {s:.3f} (N={n_mps})")
|
|
|
symbols.append("diamond" if n_mps == 1 else "circle")
|
|
|
else:
|
|
|
hover.append(f"{p}: {s:.3f}")
|
|
|
symbols.append("circle")
|
|
|
marker_kwargs = {"size": 14, "color": colours, "symbol": symbols}
|
|
|
else:
|
|
|
hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
|
|
|
marker_kwargs = {"size": 14, "color": colours}
|
|
|
|
|
|
fig = go.Figure()
|
|
|
x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
|
|
|
if x_min == x_max:
|
|
|
x_min, x_max = x_min - 1, x_max + 1
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=[x_min, x_max],
|
|
|
y=[0, 0],
|
|
|
mode="lines",
|
|
|
line={"color": "#cccccc", "width": 1},
|
|
|
hoverinfo="skip",
|
|
|
showlegend=False,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
scatter_kwargs = {
|
|
|
"x": scores,
|
|
|
"y": [0] * len(scores),
|
|
|
"mode": "markers+text",
|
|
|
"text": parties,
|
|
|
"textposition": "top center",
|
|
|
"marker": marker_kwargs,
|
|
|
"hovertext": hover,
|
|
|
"hoverinfo": "text",
|
|
|
"showlegend": False,
|
|
|
}
|
|
|
fig.add_trace(go.Scatter(**scatter_kwargs))
|
|
|
|
|
|
pos_pole = theme.get("positive_pole", "")
|
|
|
neg_pole = theme.get("negative_pole", "")
|
|
|
# Labels always from poles: negative_pole = LEFT, positive_pole = RIGHT
|
|
|
left_label = neg_pole
|
|
|
right_label = pos_pole
|
|
|
|
|
|
fig.update_layout(
|
|
|
height=160,
|
|
|
margin={"l": 10, "r": 10, "t": 10, "b": 30},
|
|
|
xaxis={
|
|
|
"title": f"← {left_label} | {right_label} →",
|
|
|
"showticklabels": False,
|
|
|
"showline": False,
|
|
|
"showgrid": False,
|
|
|
"zeroline": False,
|
|
|
},
|
|
|
yaxis={"visible": False, "range": [-1, 2]},
|
|
|
plot_bgcolor="rgba(0,0,0,0)",
|
|
|
paper_bgcolor="rgba(0,0,0,0)",
|
|
|
)
|
|
|
return fig
|
|
|
|
|
|
|
|
|
def _render_party_axis_chart(
|
|
|
party_coords: Dict[str, Tuple[float, float]],
|
|
|
comp_sel: int,
|
|
|
theme: dict,
|
|
|
bootstrap_data: Optional[Dict[str, Dict]] = None,
|
|
|
) -> None:
|
|
|
"""Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
|
|
|
|
|
|
Expects explicit per-party coords mapping (party -> (x,y)) for components 1 & 2.
|
|
|
"""
|
|
|
fig = _build_party_axis_figure(party_coords, comp_sel, theme, bootstrap_data)
|
|
|
if fig is None:
|
|
|
st.caption("_Partijdata niet beschikbaar voor deze as._")
|
|
|
return
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
def _render_party_axis_chart_1d(
|
|
|
party_coords: Dict[str, Tuple[float, ...]],
|
|
|
comp_sel: int,
|
|
|
theme: dict,
|
|
|
) -> None:
|
|
|
"""Render a 1D horizontal scatter of party positions on SVD component `comp_sel`.
|
|
|
|
|
|
Uses the same format as components 1-2: parties as markers on a horizontal line
|
|
|
with axis title showing poles with arrows.
|
|
|
|
|
|
Args:
|
|
|
party_coords: Dict mapping party name to tuple of scores (score_for_comp,)
|
|
|
comp_sel: SVD component number (1-indexed)
|
|
|
theme: Dict with label, positive_pole, negative_pole, flip
|
|
|
"""
|
|
|
import plotly.graph_objects as go
|
|
|
|
|
|
if not party_coords:
|
|
|
st.caption("_Partijdata niet beschikbaar voor deze as._")
|
|
|
return
|
|
|
|
|
|
# Extract scores and parties
|
|
|
parties = []
|
|
|
scores = []
|
|
|
colours = []
|
|
|
|
|
|
for party, coords in party_coords.items():
|
|
|
try:
|
|
|
score = float(coords[0])
|
|
|
parties.append(party)
|
|
|
scores.append(score)
|
|
|
colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))
|
|
|
except Exception:
|
|
|
continue
|
|
|
|
|
|
if not scores:
|
|
|
st.caption("_Partijdata niet beschikbaar voor deze as._")
|
|
|
return
|
|
|
|
|
|
# Apply flip if needed (ensures right parties appear on right side)
|
|
|
flip = theme.get("flip", False)
|
|
|
if flip:
|
|
|
scores = [-s for s in scores]
|
|
|
|
|
|
# Build hover text
|
|
|
hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
|
|
|
|
|
|
# Create figure with same format as components 1-2
|
|
|
fig = go.Figure()
|
|
|
x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
|
|
|
if x_min == x_max:
|
|
|
x_min, x_max = x_min - 1, x_max + 1
|
|
|
|
|
|
# Add horizontal axis line
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=[x_min, x_max],
|
|
|
y=[0, 0],
|
|
|
mode="lines",
|
|
|
line={"color": "#cccccc", "width": 1},
|
|
|
hoverinfo="skip",
|
|
|
showlegend=False,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
# Add party markers
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=scores,
|
|
|
y=[0] * len(scores),
|
|
|
mode="markers+text",
|
|
|
text=parties,
|
|
|
textposition="top center",
|
|
|
marker={"size": 14, "color": colours},
|
|
|
hovertext=hover,
|
|
|
hoverinfo="text",
|
|
|
showlegend=False,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
# Determine pole labels based on flip
|
|
|
pos_pole = theme.get("positive_pole", "")
|
|
|
neg_pole = theme.get("negative_pole", "")
|
|
|
# Labels always from poles: negative_pole = LEFT, positive_pole = RIGHT
|
|
|
left_label = neg_pole
|
|
|
right_label = pos_pole
|
|
|
|
|
|
# Update layout with same format as components 1-2
|
|
|
fig.update_layout(
|
|
|
height=160,
|
|
|
margin={"l": 10, "r": 10, "t": 10, "b": 30},
|
|
|
xaxis={
|
|
|
"title": f"← {left_label} | {right_label} →",
|
|
|
"showticklabels": False,
|
|
|
"showline": False,
|
|
|
"showgrid": False,
|
|
|
"zeroline": False,
|
|
|
},
|
|
|
yaxis={"visible": False, "range": [-1, 2]},
|
|
|
plot_bgcolor="rgba(0,0,0,0)",
|
|
|
paper_bgcolor="rgba(0,0,0,0)",
|
|
|
)
|
|
|
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
def _render_svd_time_trajectory(
|
|
|
party_scores_by_window: Dict[str, Dict[str, List[float]]],
|
|
|
comp_sel: int,
|
|
|
theme: dict,
|
|
|
selected_parties: List[str],
|
|
|
) -> None:
|
|
|
"""Render a time trajectory plot showing party positions over time on an SVD component.
|
|
|
|
|
|
Args:
|
|
|
party_scores_by_window: {window_id: {party_name: [scores]}}
|
|
|
comp_sel: SVD component number (1-indexed)
|
|
|
theme: Theme dict with label, positive_pole, negative_pole, flip
|
|
|
selected_parties: List of party names to display
|
|
|
"""
|
|
|
if not party_scores_by_window or not selected_parties:
|
|
|
st.caption("_Geen data beschikbaar voor tijdtraject._")
|
|
|
return
|
|
|
|
|
|
idx = comp_sel - 1 # Convert to 0-indexed
|
|
|
|
|
|
# Build data structure: {party: [(window, score), ...]}
|
|
|
# Scores are already aligned and flip-corrected via compute_nd_axes,
|
|
|
# so no per-window flip computation needed.
|
|
|
party_trajectories: Dict[str, List[Tuple[str, float]]] = {}
|
|
|
|
|
|
# Sort windows: current_parliament first, then chronological
|
|
|
all_windows = list(party_scores_by_window.keys())
|
|
|
sorted_windows = []
|
|
|
if "current_parliament" in all_windows:
|
|
|
sorted_windows.append("current_parliament")
|
|
|
# Add other windows in reverse chronological order (newest first)
|
|
|
other_windows = sorted(
|
|
|
[w for w in all_windows if w != "current_parliament"], reverse=True
|
|
|
)
|
|
|
sorted_windows.extend(other_windows)
|
|
|
|
|
|
for window in sorted_windows:
|
|
|
scores_by_party = party_scores_by_window.get(window, {})
|
|
|
for party in selected_parties:
|
|
|
scores = scores_by_party.get(party, [])
|
|
|
if scores and len(scores) > idx:
|
|
|
try:
|
|
|
score = float(scores[idx])
|
|
|
party_trajectories.setdefault(party, []).append((window, score))
|
|
|
except (ValueError, TypeError):
|
|
|
continue
|
|
|
|
|
|
if not party_trajectories:
|
|
|
st.caption("_Geen data beschikbaar voor geselecteerde partijen._")
|
|
|
return
|
|
|
|
|
|
# Create figure
|
|
|
fig = go.Figure()
|
|
|
|
|
|
# Find score range for x-axis
|
|
|
all_scores = []
|
|
|
for traj in party_trajectories.values():
|
|
|
all_scores.extend([s for _, s in traj])
|
|
|
|
|
|
if not all_scores:
|
|
|
st.caption("_Geen scores beschikbaar._")
|
|
|
return
|
|
|
|
|
|
x_min, x_max = min(all_scores) * 1.15, max(all_scores) * 1.15
|
|
|
if x_min == x_max:
|
|
|
x_min, x_max = x_min - 1, x_max + 1
|
|
|
|
|
|
# Y positions: current at top (y=0), earlier below
|
|
|
window_to_y = {w: i for i, w in enumerate(sorted_windows)}
|
|
|
|
|
|
# Add horizontal grey axis lines at y=0 for each year (like single-year chart)
|
|
|
for window in sorted_windows:
|
|
|
y_pos = window_to_y[window]
|
|
|
# Horizontal grey line at y=0 for this year (matching single-year chart style)
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=[x_min, x_max],
|
|
|
y=[y_pos, y_pos],
|
|
|
mode="lines",
|
|
|
line={"color": "#cccccc", "width": 1},
|
|
|
hoverinfo="skip",
|
|
|
showlegend=False,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
# Add traces for each party
|
|
|
for party in selected_parties:
|
|
|
if party not in party_trajectories:
|
|
|
continue
|
|
|
|
|
|
traj = party_trajectories[party]
|
|
|
if len(traj) < 1:
|
|
|
continue
|
|
|
|
|
|
x_vals = [score for _, score in traj]
|
|
|
y_vals = [window_to_y[window] for window, _ in traj]
|
|
|
color = PARTY_COLOURS.get(party, "#9E9E9E")
|
|
|
|
|
|
# Add connecting line
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=x_vals,
|
|
|
y=y_vals,
|
|
|
mode="lines",
|
|
|
line={"color": color, "width": 2},
|
|
|
hoverinfo="skip",
|
|
|
showlegend=False,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
# Add markers with hover
|
|
|
hover_texts = [f"{party}<br>{window}: {score:.3f}" for window, score in traj]
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=x_vals,
|
|
|
y=y_vals,
|
|
|
mode="markers+text",
|
|
|
text=[party] * len(traj),
|
|
|
textposition="top center",
|
|
|
marker={"size": 12, "color": color},
|
|
|
hovertext=hover_texts,
|
|
|
hoverinfo="text",
|
|
|
showlegend=False,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
# Determine pole labels based on theme (use reference flip from current_parliament)
|
|
|
pos_pole = theme.get("positive_pole", "")
|
|
|
neg_pole = theme.get("negative_pole", "")
|
|
|
# Labels always from poles: negative_pole = LEFT, positive_pole = RIGHT
|
|
|
left_label = neg_pole
|
|
|
right_label = pos_pole
|
|
|
|
|
|
# Y-axis labels
|
|
|
y_labels = {}
|
|
|
for window in sorted_windows:
|
|
|
if window == "current_parliament":
|
|
|
y_labels[window_to_y[window]] = "Huidig"
|
|
|
else:
|
|
|
y_labels[window_to_y[window]] = window
|
|
|
|
|
|
# Update layout
|
|
|
fig.update_layout(
|
|
|
height=max(400, len(sorted_windows) * 60 + 100),
|
|
|
margin={"l": 80, "r": 10, "t": 10, "b": 30},
|
|
|
xaxis={
|
|
|
"title": f"← {left_label} | {right_label} →",
|
|
|
"range": [x_min, x_max],
|
|
|
"showticklabels": False,
|
|
|
"showline": False,
|
|
|
"showgrid": True,
|
|
|
"gridcolor": "rgba(0,0,0,0.1)",
|
|
|
"zeroline": True,
|
|
|
"zerolinecolor": "rgba(0,0,0,0.2)",
|
|
|
},
|
|
|
yaxis={
|
|
|
"tickvals": list(y_labels.keys()),
|
|
|
"ticktext": list(y_labels.values()),
|
|
|
"tickmode": "array",
|
|
|
"autorange": "reversed", # Top to bottom
|
|
|
"showgrid": False,
|
|
|
},
|
|
|
plot_bgcolor="rgba(0,0,0,0)",
|
|
|
paper_bgcolor="rgba(0,0,0,0)",
|
|
|
)
|
|
|
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Moties laden…")
|
|
|
def load_motions_df(db_path: str) -> pd.DataFrame:
|
|
|
"""Load the full motions table as a pandas DataFrame (read-only)."""
|
|
|
return explorer_data.load_motions_df(db_path)
|
|
|
|
|
|
|
|
|
def query_similar(
|
|
|
db_path: str,
|
|
|
source_motion_id: int,
|
|
|
vector_type: str = "fused",
|
|
|
top_k: int = 10,
|
|
|
) -> pd.DataFrame:
|
|
|
"""Return top-k similar motions from similarity_cache (read-only)."""
|
|
|
return explorer_data.query_similar(db_path, source_motion_id, vector_type, top_k)
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Shared rendering helpers
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def _render_voting_results(voting_results_json) -> None:
|
|
|
"""Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.
|
|
|
|
|
|
The JSON is stored as {party_or_mp: vote} where vote is one of
|
|
|
'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
|
|
|
"""
|
|
|
if not voting_results_json:
|
|
|
return
|
|
|
try:
|
|
|
vdata = (
|
|
|
json.loads(voting_results_json)
|
|
|
if isinstance(voting_results_json, str)
|
|
|
else voting_results_json
|
|
|
)
|
|
|
if not isinstance(vdata, dict) or not vdata:
|
|
|
return
|
|
|
# Group {vote: [actor, ...]}
|
|
|
by_vote: Dict[str, List[str]] = {}
|
|
|
for actor, vote in vdata.items():
|
|
|
vote_str = str(vote).lower().strip()
|
|
|
by_vote.setdefault(vote_str, []).append(str(actor))
|
|
|
# Render in fixed order
|
|
|
vote_order = ["voor", "tegen", "onthouden", "afwezig"]
|
|
|
vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "⬜"}
|
|
|
rows_shown = False
|
|
|
for v in vote_order + [k for k in by_vote if k not in vote_order]:
|
|
|
actors = by_vote.get(v)
|
|
|
if not actors:
|
|
|
continue
|
|
|
emoji = vote_emoji.get(v, "▪️")
|
|
|
st.markdown(
|
|
|
f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
|
|
|
)
|
|
|
rows_shown = True
|
|
|
if not rows_shown:
|
|
|
st.caption("_Geen stemuitslag beschikbaar_")
|
|
|
except Exception:
|
|
|
pass
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 1: Politiek Kompas
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def _add_y_direction_annotations(fig: go.Figure) -> None:
|
|
|
"""Add ▲ Progressief / ▼ Conservatief labels above and below the Y axis."""
|
|
|
common = dict(
|
|
|
xref="paper",
|
|
|
yref="paper",
|
|
|
x=-0.07,
|
|
|
showarrow=False,
|
|
|
font=dict(size=11, color="#666666"),
|
|
|
)
|
|
|
fig.add_annotation(**common, y=1.02, text="▲ Progressief", xanchor="center")
|
|
|
fig.add_annotation(**common, y=-0.06, text="▼ Conservatief", xanchor="center")
|
|
|
|
|
|
|
|
|
def _window_to_dates(window_id: str) -> tuple[str, str]:
|
|
|
"""Return (start_date, end_date) ISO strings for a given window_id."""
|
|
|
return trajectory.window_to_dates(window_id)
|
|
|
|
|
|
|
|
|
def build_compass_tab(db_path: str, window_size: str) -> None:
|
|
|
st.subheader("Politiek Kompas")
|
|
|
st.markdown(
|
|
|
"2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
|
|
|
)
|
|
|
|
|
|
# Compass always uses annual windows regardless of the sidebar window_size setting.
|
|
|
positions_by_window, axis_def = load_positions(db_path, "annual")
|
|
|
# load_positions may return None for axis_def when resources are missing
|
|
|
# (e.g. classifier fallback or failed enrichment). Guard so UI rendering
|
|
|
# code doesn't crash on axis_def.get calls.
|
|
|
if axis_def is None:
|
|
|
axis_def = {}
|
|
|
if not positions_by_window:
|
|
|
st.warning(
|
|
|
"Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
|
|
|
)
|
|
|
return
|
|
|
|
|
|
party_map = load_party_map(db_path)
|
|
|
active_mps = load_active_mps(db_path)
|
|
|
|
|
|
# Sort windows: year windows first (ascending), current_parliament last.
|
|
|
year_windows = sorted(w for w in positions_by_window if w != "current_parliament")
|
|
|
has_current = "current_parliament" in positions_by_window
|
|
|
windows = year_windows + (["current_parliament"] if has_current else [])
|
|
|
|
|
|
# Motion counts per year — sparse years get a warning label.
|
|
|
_SPARSE_YEARS = {"2016", "2017", "2018"}
|
|
|
_THRESHOLD = 0.65
|
|
|
|
|
|
def _window_label(w: str) -> str:
|
|
|
if w == "current_parliament":
|
|
|
return "Huidig parlement"
|
|
|
if w in _SPARSE_YEARS:
|
|
|
return f"{w} ⚠️"
|
|
|
return w
|
|
|
|
|
|
col1, col2 = st.columns([3, 1])
|
|
|
with col2:
|
|
|
window_idx = st.selectbox(
|
|
|
"Jaar",
|
|
|
options=windows,
|
|
|
index=len(windows) - 1, # default: current_parliament
|
|
|
format_func=_window_label,
|
|
|
)
|
|
|
level = st.radio(
|
|
|
"Weergave",
|
|
|
options=["Kamerleden", "Partijen"],
|
|
|
index=0,
|
|
|
horizontal=True,
|
|
|
)
|
|
|
min_mps = st.number_input(
|
|
|
"Min. Kamerleden per partij",
|
|
|
min_value=1,
|
|
|
max_value=20,
|
|
|
value=3,
|
|
|
step=1,
|
|
|
help="Partijen met minder dan dit aantal zetels worden niet weergegeven.",
|
|
|
)
|
|
|
|
|
|
pos = positions_by_window.get(window_idx, {})
|
|
|
if not pos:
|
|
|
st.info(f"Geen data voor venster {window_idx}")
|
|
|
return
|
|
|
|
|
|
# For current_parliament, restrict to MPs who are still seated (tot_en_met IS NULL).
|
|
|
# Historical windows include all MPs active at the time — no restriction needed.
|
|
|
if window_idx == "current_parliament":
|
|
|
pos = {mp: xy for mp, xy in pos.items() if mp in active_mps}
|
|
|
|
|
|
# Deduplicate MPs whose names appear both with and without a parenthetical first name,
|
|
|
# e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and
|
|
|
# average positions if both variants are present.
|
|
|
def _strip_paren(name: str) -> str:
|
|
|
return re.sub(r"\s*\([^)]*\)", "", name).strip()
|
|
|
|
|
|
deduped: Dict[str, Tuple[float, float]] = {}
|
|
|
for name, (x, y) in pos.items():
|
|
|
base = _strip_paren(name)
|
|
|
if base in deduped:
|
|
|
ox, oy = deduped[base]
|
|
|
deduped[base] = ((ox + x) / 2, (oy + y) / 2)
|
|
|
else:
|
|
|
deduped[base] = (x, y)
|
|
|
pos = deduped
|
|
|
|
|
|
rows = []
|
|
|
for name, (x, y) in pos.items():
|
|
|
party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown")
|
|
|
rows.append({"name": name, "x": x, "y": y, "party": party})
|
|
|
|
|
|
df_pos = pd.DataFrame(rows)
|
|
|
|
|
|
# Drop parties below the minimum MP threshold (unreliable centroids).
|
|
|
party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts()
|
|
|
valid_parties = set(party_counts[party_counts >= min_mps].index)
|
|
|
df_pos = df_pos[df_pos["party"].isin(valid_parties)]
|
|
|
|
|
|
if df_pos.empty:
|
|
|
st.info("Geen partijen met genoeg Kamerleden voor dit venster.")
|
|
|
return
|
|
|
|
|
|
# The first two SVD axes are clear, interpretable axes for our dataset.
|
|
|
# Show the classifier-provided full labels on the compass unconditionally
|
|
|
# so users see the canonical interpretation. We keep the confidence-based
|
|
|
# captions/interpretations in the expander but do not hide the axis titles
|
|
|
# for the compass. Note: the vertical axis title is rotated by Plotly —
|
|
|
# this can make "Progressief–Conservatief" look reversed because the word
|
|
|
# "Progressief" appears at the top when rendered; we therefore add explicit
|
|
|
# directional annotations to make the polarity unambiguous.
|
|
|
# Prefer classifier-provided labels for the first two axes. However, the
|
|
|
# classifier sometimes returns the concise numeric fallbacks "As 1"/"As 2"
|
|
|
# when it couldn't find an interpretable label. For the compass we prefer
|
|
|
# conventional semantic defaults instead of the generic "As N" strings so
|
|
|
# the chart remains readable.
|
|
|
_raw_x = axis_def.get("x_label")
|
|
|
_raw_y = axis_def.get("y_label")
|
|
|
|
|
|
# Use the classifier helper to map internal/modal labels (e.g. "As 1") to
|
|
|
# user-facing labels. Import at function-time to avoid module import cycles
|
|
|
# and keep explorer lightweight. If the helper is unavailable fall back to
|
|
|
# labels from the unified svd_labels module.
|
|
|
try:
|
|
|
from analysis.axis_classifier import display_label_for_modal
|
|
|
|
|
|
_x_label = display_label_for_modal(_raw_x, "x")
|
|
|
_y_label = display_label_for_modal(_raw_y, "y")
|
|
|
except Exception:
|
|
|
from analysis.svd_labels import get_fallback_labels
|
|
|
|
|
|
_x_fallback, _y_fallback = get_fallback_labels()
|
|
|
_x_label = _raw_x or _x_fallback
|
|
|
_y_label = _raw_y or _y_fallback
|
|
|
|
|
|
if level == "Partijen":
|
|
|
# Aggregate to party centroids
|
|
|
df_party = df_pos.groupby("party", as_index=False).agg(
|
|
|
x=("x", "mean"), y=("y", "mean"), n=("name", "count")
|
|
|
)
|
|
|
df_party["name"] = df_party["party"]
|
|
|
colour_map = {
|
|
|
p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique()
|
|
|
}
|
|
|
fig = px.scatter(
|
|
|
df_party,
|
|
|
x="x",
|
|
|
y="y",
|
|
|
color="party",
|
|
|
text="party",
|
|
|
hover_name="party",
|
|
|
hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True},
|
|
|
color_discrete_map=colour_map,
|
|
|
title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",
|
|
|
labels={
|
|
|
"x": _x_label,
|
|
|
"y": _y_label,
|
|
|
"n": "Kamerleden",
|
|
|
},
|
|
|
)
|
|
|
fig.update_traces(textposition="top center", marker_size=14)
|
|
|
else:
|
|
|
colour_map = {
|
|
|
p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()
|
|
|
}
|
|
|
fig = px.scatter(
|
|
|
df_pos,
|
|
|
x="x",
|
|
|
y="y",
|
|
|
color="party",
|
|
|
hover_name="name",
|
|
|
hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
|
|
|
color_discrete_map=colour_map,
|
|
|
title=f"Politiek Kompas — {_window_label(window_idx)}",
|
|
|
labels={"x": _x_label, "y": _y_label},
|
|
|
)
|
|
|
|
|
|
fig.update_layout(
|
|
|
height=600,
|
|
|
legend_title_text="Partij",
|
|
|
xaxis={"range": [-1, 1]},
|
|
|
yaxis={"range": [-0.6, 0.6]},
|
|
|
)
|
|
|
_add_y_direction_annotations(fig)
|
|
|
|
|
|
with col1:
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
_x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "")
|
|
|
_y_interp = axis_def.get("y_interpretation", {}).get(window_idx, "")
|
|
|
if (
|
|
|
_x_interp
|
|
|
and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD
|
|
|
):
|
|
|
st.caption(_x_interp)
|
|
|
if (
|
|
|
_y_interp
|
|
|
and axis_def.get("y_quality", {}).get(window_idx, 1.0) < _THRESHOLD
|
|
|
):
|
|
|
st.caption(_y_interp)
|
|
|
|
|
|
# Voting discipline analysis
|
|
|
st.markdown("---")
|
|
|
st.markdown(
|
|
|
"**Stemdiscipline analyse:** De Rice-index meet hoe eensgezind partijen stemmen "
|
|
|
"tijdens hoofdelijke stemmingen. Een score van 100% betekent dat alle MPs van "
|
|
|
"een partij hetzelfde stemden; 50% wijst op een gelijke splitsing binnen de partij. "
|
|
|
"Partijen met hoge discipline (>95%) zoals PVV en SGP stemmen als een blok, wat "
|
|
|
"wijst op sterke partijdiscipline en homogene membership. Lagere discipline (<85%) "
|
|
|
"bij partijen als PvdA of SP kan duiden op interne factiestrijd, gewetensvragen "
|
|
|
"bij ethische thema's, of een brede ideologische koers die ruimte laat voor "
|
|
|
"afwijkende meningen. De discipline varieert ook per onderwerp — ethische kwesties "
|
|
|
"tonen vaak meer interne verschillen dan economische thema's."
|
|
|
)
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 2: Partij Trajectories
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def choose_trajectory_title(axis_def: dict, axis: str, threshold: float = 0.65) -> str:
|
|
|
"""Choose a short trajectory axis title based on aggregated confidence."""
|
|
|
return trajectory.choose_trajectory_title(axis_def, axis, threshold)
|
|
|
|
|
|
|
|
|
def build_trajectories_tab(db_path: str, window_size: str) -> None:
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] build_trajectories_tab called — db_path=%s, window_size=%s",
|
|
|
db_path,
|
|
|
window_size,
|
|
|
)
|
|
|
st.subheader("Partij Trajectories")
|
|
|
st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")
|
|
|
|
|
|
positions_by_window, axis_def = load_positions(db_path, window_size)
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] load_positions → %d windows, total MPs=%d",
|
|
|
len(positions_by_window),
|
|
|
sum(len(v) for v in positions_by_window.values()),
|
|
|
)
|
|
|
if axis_def is None:
|
|
|
axis_def = {}
|
|
|
if not positions_by_window:
|
|
|
# Instrumentation: record why trajectories tab aborted early
|
|
|
try:
|
|
|
_last_trajectories_diagnostics.update(
|
|
|
{
|
|
|
"stage": "load_positions_empty",
|
|
|
"positions_by_window_len": len(positions_by_window),
|
|
|
}
|
|
|
)
|
|
|
except Exception:
|
|
|
pass
|
|
|
try:
|
|
|
st.warning("Geen positiedata beschikbaar.")
|
|
|
except Exception:
|
|
|
pass
|
|
|
# If debug enabled, show diagnostics in UI (best-effort)
|
|
|
try:
|
|
|
if get_debug_trajectories_enabled():
|
|
|
try:
|
|
|
st.text_area(
|
|
|
"Trajectories diagnostics",
|
|
|
json.dumps(_last_trajectories_diagnostics, default=str),
|
|
|
height=160,
|
|
|
)
|
|
|
except Exception:
|
|
|
pass
|
|
|
except Exception:
|
|
|
pass
|
|
|
return
|
|
|
|
|
|
party_map = load_party_map(db_path)
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] load_party_map → %d entries, sample=%s",
|
|
|
len(party_map),
|
|
|
list(party_map.items())[:3],
|
|
|
)
|
|
|
|
|
|
# Add name normalization to improve matching
|
|
|
def normalize_mp_name(name):
|
|
|
"""Normalize MP name for better matching between data sources."""
|
|
|
if not name:
|
|
|
return ""
|
|
|
# Remove extra whitespace
|
|
|
name = name.strip()
|
|
|
# Ensure consistent spacing after comma
|
|
|
if "," in name and ", " not in name:
|
|
|
name = name.replace(",", ", ")
|
|
|
return name
|
|
|
|
|
|
# Normalize party_map keys
|
|
|
party_map = {normalize_mp_name(k): v for k, v in party_map.items()}
|
|
|
|
|
|
# Also normalize MP names in positions_by_window
|
|
|
normalized_positions = {}
|
|
|
for window, positions in positions_by_window.items():
|
|
|
normalized_positions[window] = {
|
|
|
normalize_mp_name(k): v for k, v in positions.items()
|
|
|
}
|
|
|
positions_by_window = normalized_positions
|
|
|
|
|
|
# After normalization, log the match rate
|
|
|
all_mp_names = set()
|
|
|
for positions in positions_by_window.values():
|
|
|
all_mp_names.update(positions.keys())
|
|
|
|
|
|
matched_names = sum(1 for mp in all_mp_names if mp in party_map)
|
|
|
if all_mp_names:
|
|
|
logger.info(
|
|
|
f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)"
|
|
|
)
|
|
|
else:
|
|
|
logger.info("MP name matching: no MPs found in positions data")
|
|
|
|
|
|
if matched_names == 0 and len(all_mp_names) > 0:
|
|
|
logger.warning("No MP names matched between positions and party_map!")
|
|
|
logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}")
|
|
|
logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}")
|
|
|
|
|
|
windows = sorted(positions_by_window.keys())
|
|
|
|
|
|
# Compute party centroids per window
|
|
|
centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
|
|
|
all_parties: set = set()
|
|
|
|
|
|
# Helper to normalise MP names (strip parenthetical first names) to match
|
|
|
# entries in the party_map. This mirrors the behaviour used in the compass
|
|
|
# tab so both tabs resolve parties the same way.
|
|
|
def _strip_paren(name: str) -> str:
|
|
|
return re.sub(r"\s*\([^)]*\)", "", name).strip()
|
|
|
|
|
|
for wid in windows:
|
|
|
pos = positions_by_window.get(wid, {})
|
|
|
per_party: Dict[str, List[Tuple[float, float]]] = {}
|
|
|
for mp_name, (x, y) in pos.items():
|
|
|
# Try exact match first, then stripped-name match to handle
|
|
|
# variants like "Dijk, J.P. (Jimmy)" -> "Dijk, J.P." used in mp_metadata
|
|
|
party = party_map.get(mp_name) or party_map.get(
|
|
|
_strip_paren(mp_name), "Unknown"
|
|
|
)
|
|
|
if party == "Unknown":
|
|
|
continue
|
|
|
per_party.setdefault(party, []).append((x, y))
|
|
|
for party, coords in per_party.items():
|
|
|
all_parties.add(party)
|
|
|
xs = [c[0] for c in coords]
|
|
|
ys = [c[1] for c in coords]
|
|
|
centroids.setdefault(party, {})[wid] = (
|
|
|
float(np.mean(xs)),
|
|
|
float(np.mean(ys)),
|
|
|
)
|
|
|
|
|
|
all_parties = sorted(
|
|
|
set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs)
|
|
|
- {None, "Unknown"}
|
|
|
)
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] all_parties (raw from party_map) → %d parties: %s",
|
|
|
len(all_parties),
|
|
|
all_parties[:10],
|
|
|
)
|
|
|
all_parties_sorted = sorted(all_parties)
|
|
|
|
|
|
# If no parties were found after mapping MPs to parties, show a helpful
|
|
|
# message instead of rendering an empty chart. This commonly happens when
|
|
|
# the party map failed to load (DB error) or the min_mps threshold filtered
|
|
|
# out all parties.
|
|
|
if not all_parties_sorted:
|
|
|
st.info(
|
|
|
"Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat."
|
|
|
)
|
|
|
try:
|
|
|
st.caption(f"Bekende partijen in party_map: {len(party_map)}")
|
|
|
except Exception:
|
|
|
pass
|
|
|
# Do not return here: allow per-MP fallback plotting below when no
|
|
|
# party-level centroids are available so the user still sees trajectories.
|
|
|
|
|
|
# Default: show CDA, D66, VVD — the three parties that span the political centre
|
|
|
default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties]
|
|
|
if not default_parties:
|
|
|
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
|
|
|
if not default_parties:
|
|
|
default_parties = all_parties_sorted[:6]
|
|
|
|
|
|
selected_parties = st.multiselect(
|
|
|
"Selecteer partijen",
|
|
|
options=all_parties_sorted,
|
|
|
default=default_parties,
|
|
|
)
|
|
|
|
|
|
# Ensure EMA smoothing helper is available for per-MP fallback plotting which
|
|
|
# appears earlier in the function. Define here so calls above won't fail.
|
|
|
def _ema_smooth(values: List[float], alpha: float) -> List[float]:
|
|
|
if not values or alpha >= 1.0:
|
|
|
return values
|
|
|
smoothed = [values[0]]
|
|
|
for v in values[1:]:
|
|
|
smoothed.append(alpha * v + (1 - alpha) * smoothed[-1])
|
|
|
return smoothed
|
|
|
|
|
|
# default smoothing alpha used for inline per-MP plotting; may be overridden
|
|
|
# by the smoothing controls shown later in the UI.
|
|
|
smooth_alpha = 0.35
|
|
|
|
|
|
# If no party-level centroids were computed, fall back to per-MP trajectories
|
|
|
# so the user still sees a plot even when the party_map is missing or empty.
|
|
|
if not centroids:
|
|
|
# Fallback: plot individual MP trajectories
|
|
|
st.info(
|
|
|
"Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
|
|
|
)
|
|
|
|
|
|
# Build per-MP time series from positions_by_window
|
|
|
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
|
|
|
for wid in windows:
|
|
|
pos = positions_by_window.get(wid, {})
|
|
|
for mp_name, xy in pos.items():
|
|
|
# Defensive conversion: skip malformed coordinates instead of raising
|
|
|
try:
|
|
|
x, y = float(xy[0]), float(xy[1])
|
|
|
except Exception:
|
|
|
# skip malformed entries silently (diagnostics will show counts)
|
|
|
continue
|
|
|
mp_positions.setdefault(mp_name, {})[wid] = (x, y)
|
|
|
|
|
|
# Filter to MPs with at least 2 windows and not all NaN
|
|
|
mp_positions = {
|
|
|
mp: pos
|
|
|
for mp, pos in mp_positions.items()
|
|
|
if len(pos) >= 2
|
|
|
and not all(np.isnan(x) and np.isnan(y) for x, y in pos.values())
|
|
|
}
|
|
|
|
|
|
if not mp_positions:
|
|
|
st.warning("Geen positiedata beschikbaar voor trajectplotten.")
|
|
|
_last_trajectories_diagnostics.update(
|
|
|
{
|
|
|
"stage": "no_mp_positions",
|
|
|
"mp_positions_count": 0,
|
|
|
}
|
|
|
)
|
|
|
# show diagnostics when debug enabled
|
|
|
try:
|
|
|
if get_debug_trajectories_enabled():
|
|
|
try:
|
|
|
st.text_area(
|
|
|
"Trajectories diagnostics",
|
|
|
json.dumps(_last_trajectories_diagnostics, default=str),
|
|
|
height=160,
|
|
|
)
|
|
|
except Exception:
|
|
|
pass
|
|
|
except Exception:
|
|
|
pass
|
|
|
return
|
|
|
|
|
|
# Store for later use
|
|
|
st.session_state["_trajectory_mp_positions"] = mp_positions
|
|
|
|
|
|
mp_list = sorted(mp_positions.keys())
|
|
|
default_mps = mp_list[:6]
|
|
|
selected_mps = st.multiselect(
|
|
|
"Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps
|
|
|
)
|
|
|
|
|
|
# Plot per-MP trajectories
|
|
|
fig = go.Figure()
|
|
|
trace_count = 0
|
|
|
for mp in selected_mps:
|
|
|
wids_sorted = sorted(mp_positions[mp].keys())
|
|
|
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
|
|
|
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
|
|
|
xs = _ema_smooth(xs_raw, smooth_alpha)
|
|
|
ys = _ema_smooth(ys_raw, smooth_alpha)
|
|
|
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=xs,
|
|
|
y=ys,
|
|
|
mode="lines+markers",
|
|
|
name=mp,
|
|
|
text=wids_sorted,
|
|
|
customdata=custom_raw,
|
|
|
line=dict(color="#888888", shape="spline", smoothing=1.3),
|
|
|
marker=dict(color="#888888", size=6),
|
|
|
hovertemplate=(
|
|
|
f"<b>{mp}</b><br>"
|
|
|
"venster: %{text}<br>"
|
|
|
"x (smoothed): %{x:.3f}<br>"
|
|
|
"x (raw): %{customdata[0]:.3f}<br>"
|
|
|
"y (smoothed): %{y:.3f}<br>"
|
|
|
"y (raw): %{customdata[1]:.3f}<extra></extra>"
|
|
|
),
|
|
|
)
|
|
|
)
|
|
|
trace_count += 1
|
|
|
|
|
|
_add_y_direction_annotations(fig)
|
|
|
if trace_count == 0:
|
|
|
st.info(
|
|
|
"Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data."
|
|
|
)
|
|
|
else:
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
return
|
|
|
|
|
|
# Developer override: if EXPLORER_FORCE_SHOW_TRAJECTORIES=1 in the
|
|
|
# environment, bypass party filtering and show the first MPs' trajectories
|
|
|
# directly (helps diagnose production environments where party mapping
|
|
|
# or filtering prevents any traces from appearing). This is safe to keep
|
|
|
# in main because it only triggers when explicitly enabled.
|
|
|
if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"):
|
|
|
# Build per-MP time series from positions_by_window and plot first 6 MPs
|
|
|
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
|
|
|
for wid in windows:
|
|
|
pos = positions_by_window.get(wid, {})
|
|
|
for mp_name, (x, y) in pos.items():
|
|
|
mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y))
|
|
|
|
|
|
mp_list = sorted(mp_positions.keys())
|
|
|
if not mp_list:
|
|
|
st.info("Geen MP-positiegegevens beschikbaar om te tonen.")
|
|
|
return
|
|
|
|
|
|
sample_mps = mp_list[:6]
|
|
|
fig = go.Figure()
|
|
|
for mp in sample_mps:
|
|
|
wids_sorted = sorted(mp_positions[mp].keys())
|
|
|
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
|
|
|
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
|
|
|
xs = _ema_smooth(xs_raw, 0.35)
|
|
|
ys = _ema_smooth(ys_raw, 0.35)
|
|
|
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=xs,
|
|
|
y=ys,
|
|
|
mode="lines+markers",
|
|
|
name=mp,
|
|
|
text=wids_sorted,
|
|
|
customdata=custom_raw,
|
|
|
line=dict(color="#444444", shape="spline", smoothing=1.3),
|
|
|
marker=dict(color="#444444", size=6),
|
|
|
hovertemplate=(
|
|
|
f"<b>{mp}</b><br>"
|
|
|
"venster: %{text}<br>"
|
|
|
"x (smoothed): %{x:.3f}<br>"
|
|
|
"x (raw): %{customdata[0]:.3f}<br>"
|
|
|
"y (smoothed): %{y:.3f}<br>"
|
|
|
"y (raw): %{customdata[1]:.3f}<extra></extra>"
|
|
|
),
|
|
|
)
|
|
|
)
|
|
|
_add_y_direction_annotations(fig)
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
return
|
|
|
|
|
|
# Debug expander: show data used to build trajectories so we can diagnose
|
|
|
# why no traces are appearing. Leave this collapsed by default in normal
|
|
|
# runs; when troubleshooting it will show counts and small samples.
|
|
|
try:
|
|
|
# Add a little opt-in checkbox in the UI to enable debug diagnostic output
|
|
|
debug_checkbox = False
|
|
|
try:
|
|
|
debug_checkbox = st.checkbox(
|
|
|
"Enable trajectories diagnostics (show extra info)",
|
|
|
value=get_debug_trajectories_enabled(),
|
|
|
)
|
|
|
except Exception:
|
|
|
debug_checkbox = get_debug_trajectories_enabled()
|
|
|
if debug_checkbox:
|
|
|
try:
|
|
|
with st.expander(
|
|
|
"DEBUG: Trajectories data (showing diagnostics)", expanded=False
|
|
|
):
|
|
|
st.write("windows (count):", len(windows))
|
|
|
st.write("windows sample:", windows[:10])
|
|
|
st.write("party_map entries:", len(party_map))
|
|
|
st.write("parties with centroids:", len(all_parties_sorted))
|
|
|
st.write("default_parties:", default_parties)
|
|
|
st.write("selected_parties:", selected_parties)
|
|
|
st.write("min_mps setting:", min_mps)
|
|
|
# sample centroid counts per party
|
|
|
sample = {
|
|
|
p: len(centroids.get(p, {}))
|
|
|
for p in list(all_parties_sorted)[:8]
|
|
|
}
|
|
|
st.write("sample centroid window counts per party:", sample)
|
|
|
except Exception:
|
|
|
pass
|
|
|
except Exception:
|
|
|
# Don't crash UI if st isn't available or expander fails
|
|
|
pass
|
|
|
|
|
|
# Smoothing controls
|
|
|
smoothing_method = st.selectbox(
|
|
|
"Smoothing methode",
|
|
|
options=["EMA", "Spline", "None"],
|
|
|
index=0,
|
|
|
help="EMA = exponential moving average; Spline = low-degree polynomial spline fit; None = raw centroids",
|
|
|
)
|
|
|
|
|
|
# EMA alpha only shown/used when EMA is selected
|
|
|
smooth_alpha = 1.0
|
|
|
if smoothing_method == "EMA":
|
|
|
smooth_alpha = st.slider(
|
|
|
"Glad maken (EMA-\u03b1)",
|
|
|
min_value=0.1,
|
|
|
max_value=1.0,
|
|
|
value=0.35,
|
|
|
step=0.05,
|
|
|
help=(
|
|
|
"\u03b1=1.0 toont de ruwe data; lagere waarden maken de lijn gladder. "
|
|
|
"Standaard 0.35 voor een goed evenwicht tussen detail en ruis."
|
|
|
),
|
|
|
)
|
|
|
|
|
|
def _ema_smooth(values: List[float], alpha: float) -> List[float]:
|
|
|
"""Apply exponential moving average; alpha=1.0 means no smoothing."""
|
|
|
if not values or alpha >= 1.0:
|
|
|
return values
|
|
|
smoothed = [values[0]]
|
|
|
for v in values[1:]:
|
|
|
smoothed.append(alpha * v + (1 - alpha) * smoothed[-1])
|
|
|
return smoothed
|
|
|
|
|
|
def _spline_smooth(values: List[float]) -> List[float]:
|
|
|
"""Perform a basic low-degree polynomial fit over index -> value and evaluate at indices.
|
|
|
|
|
|
This provides a simple spline-like smoothing without adding scipy as a dependency.
|
|
|
For very small N this returns the raw values.
|
|
|
"""
|
|
|
n = len(values)
|
|
|
if n <= 2:
|
|
|
return values
|
|
|
deg = min(3, n - 1)
|
|
|
try:
|
|
|
idx = np.arange(n, dtype=float)
|
|
|
coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg)
|
|
|
smooth = np.polyval(coeffs, idx)
|
|
|
return [float(v) for v in smooth]
|
|
|
except Exception:
|
|
|
return values
|
|
|
|
|
|
fig = go.Figure()
|
|
|
trace_count = 0
|
|
|
helper_succeeded = False
|
|
|
# New: delegate plotting selection to helper for testability
|
|
|
# Note: select_trajectory_plot_data returns (fig, trace_count, banner_text)
|
|
|
try:
|
|
|
fig2, trace_count2, banner_text = select_trajectory_plot_data(
|
|
|
positions_by_window, party_map, windows, selected_parties, smooth_alpha
|
|
|
)
|
|
|
# If helper returned a figure, replace
|
|
|
if fig2 is not None:
|
|
|
fig = fig2
|
|
|
trace_count = trace_count2
|
|
|
helper_succeeded = True
|
|
|
if banner_text:
|
|
|
try:
|
|
|
st.caption(banner_text)
|
|
|
except Exception:
|
|
|
pass
|
|
|
try:
|
|
|
_last_trajectories_diagnostics.update({"banner_text": banner_text})
|
|
|
except Exception:
|
|
|
pass
|
|
|
except Exception as e:
|
|
|
tb = traceback.format_exc()
|
|
|
# attach diagnostics to the helper and module
|
|
|
try:
|
|
|
select_trajectory_plot_data._last_diagnostics = {"exception": tb}
|
|
|
except Exception:
|
|
|
pass
|
|
|
try:
|
|
|
_last_trajectories_diagnostics.update(
|
|
|
{"stage": "select_helper_exception", "exception": tb}
|
|
|
)
|
|
|
except Exception:
|
|
|
pass
|
|
|
logger.exception("select_trajectory_plot_data failed")
|
|
|
debug_enabled = get_debug_trajectories_enabled()
|
|
|
if debug_enabled:
|
|
|
try:
|
|
|
st.text_area("select_trajectory_plot_data traceback", tb, height=240)
|
|
|
except Exception:
|
|
|
pass
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] helper_succeeded=%s", helper_succeeded
|
|
|
)
|
|
|
if not helper_succeeded:
|
|
|
for party in selected_parties:
|
|
|
if party not in centroids:
|
|
|
continue
|
|
|
wids_sorted = sorted(centroids[party].keys())
|
|
|
xs_raw = [centroids[party][w][0] for w in wids_sorted]
|
|
|
ys_raw = [centroids[party][w][1] for w in wids_sorted]
|
|
|
xs = _ema_smooth(xs_raw, smooth_alpha)
|
|
|
ys = _ema_smooth(ys_raw, smooth_alpha)
|
|
|
# Preserve raw (unsmoothed) values per-point so hover can show both raw and smoothed
|
|
|
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
|
|
|
colour = PARTY_COLOURS.get(party, "#9E9E9E")
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=xs,
|
|
|
y=ys,
|
|
|
mode="lines+markers",
|
|
|
name=party,
|
|
|
text=wids_sorted, # full window ID for hover
|
|
|
customdata=custom_raw,
|
|
|
line=dict(color=colour, shape="spline", smoothing=1.3),
|
|
|
marker=dict(color=colour, size=8),
|
|
|
hovertemplate=(
|
|
|
f"<b>{party}</b><br>"
|
|
|
"venster: %{text}<br>"
|
|
|
"x (smoothed): %{x:.3f}<br>"
|
|
|
"x (raw): %{customdata[0]:.3f}<br>"
|
|
|
"y (smoothed): %{y:.3f}<br>"
|
|
|
"y (raw): %{customdata[1]:.3f}<extra></extra>"
|
|
|
),
|
|
|
)
|
|
|
)
|
|
|
trace_count += 1
|
|
|
|
|
|
# For trajectories, the chart spans multiple windows. Use the classifier's
|
|
|
# per-window confidences aggregated (mean) to decide whether to use the
|
|
|
# classifier label or fall back to the conventional short label.
|
|
|
_THRESHOLD = 0.65
|
|
|
x_conf_map = axis_def.get("x_label_confidence", {}) or {}
|
|
|
y_conf_map = axis_def.get("y_label_confidence", {}) or {}
|
|
|
|
|
|
def _mean_conf(m: dict) -> Optional[float]:
|
|
|
vals = [v for v in m.values() if v is not None]
|
|
|
if not vals:
|
|
|
return None
|
|
|
return float(sum(vals) / len(vals))
|
|
|
|
|
|
x_mean = _mean_conf(x_conf_map)
|
|
|
y_mean = _mean_conf(y_conf_map)
|
|
|
|
|
|
x_title = choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD)
|
|
|
y_title = choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD)
|
|
|
|
|
|
fig.update_layout(
|
|
|
title="Partij trajectories",
|
|
|
xaxis_title=x_title,
|
|
|
yaxis_title=y_title,
|
|
|
height=600,
|
|
|
legend_title_text="Partij",
|
|
|
)
|
|
|
_add_y_direction_annotations(fig)
|
|
|
# If no traces were added to the figure, show a diagnostic message so the
|
|
|
# user knows why the plot is empty.
|
|
|
try:
|
|
|
_last_trajectories_diagnostics.update({"trace_count": trace_count})
|
|
|
except Exception:
|
|
|
pass
|
|
|
debug_enabled = get_debug_trajectories_enabled()
|
|
|
# Add detailed diagnostics to understand why trace_count is 0
|
|
|
if trace_count == 0:
|
|
|
_last_trajectories_diagnostics.update(
|
|
|
{
|
|
|
"stage": "zero_traces",
|
|
|
"positions_count": sum(len(pos) for pos in positions_by_window.values())
|
|
|
if positions_by_window
|
|
|
else 0,
|
|
|
"party_map_count": len(party_map) if party_map else 0,
|
|
|
"centroids_count": len(centroids) if centroids else 0,
|
|
|
"selected_parties_count": len(selected_parties)
|
|
|
if selected_parties
|
|
|
else 0,
|
|
|
"timestamp": datetime.now().isoformat(),
|
|
|
}
|
|
|
)
|
|
|
# Check if there are positions but no centroids (name mismatch)
|
|
|
if positions_by_window and party_map and not centroids:
|
|
|
# Sample some MP names from positions
|
|
|
sample_mps = []
|
|
|
for window, positions in list(positions_by_window.items())[:1]:
|
|
|
sample_mps = list(positions.keys())[:5]
|
|
|
break
|
|
|
# Check if these MPs are in party_map
|
|
|
matched = sum(1 for mp in sample_mps if mp in party_map)
|
|
|
_last_trajectories_diagnostics["name_match_check"] = {
|
|
|
"sample_mps": sample_mps,
|
|
|
"matched_in_party_map": matched,
|
|
|
"sample_size": len(sample_mps),
|
|
|
}
|
|
|
if trace_count == 0:
|
|
|
st.info("📊 **Geen trajecten getekend**")
|
|
|
|
|
|
# Show diagnostic information
|
|
|
with st.expander("🔍 Diagnostische informatie"):
|
|
|
st.write("**Data status:**")
|
|
|
st.write(
|
|
|
f"- Positie vensters: {len(positions_by_window) if positions_by_window else 0}"
|
|
|
)
|
|
|
st.write(f"- Party mappings: {len(party_map) if party_map else 0}")
|
|
|
st.write(
|
|
|
f"- Geselecteerde partijen: {len(selected_parties) if selected_parties else 0}"
|
|
|
)
|
|
|
|
|
|
if "centroid_diagnostics" in locals():
|
|
|
st.write("**Centroid berekening:**")
|
|
|
st.write(
|
|
|
f"- Partijen met posities: {len(centroid_diagnostics.get('parties_with_positions', []))}"
|
|
|
)
|
|
|
st.write(
|
|
|
f"- Partijen met alleen NaN: {len(centroid_diagnostics.get('parties_all_nan', []))}"
|
|
|
)
|
|
|
|
|
|
st.write("\n**Mogelijke oorzaken:**")
|
|
|
st.write("1. Geen SVD vectoren berekend voor de geselecteerde vensters")
|
|
|
st.write("2. MP namen in posities komen niet overeen met party_map")
|
|
|
st.write("3. Alle geselecteerde partijen hebben te weinig MPs (< 5)")
|
|
|
|
|
|
# Add a button to run diagnostics
|
|
|
if st.button("🔧 Database diagnostiek uitvoeren"):
|
|
|
with st.spinner("Bezig met diagnostiek..."):
|
|
|
# Import and run diagnostics
|
|
|
from scripts.diagnose_trajectories_cli import (
|
|
|
run as diagnose_trajectories,
|
|
|
)
|
|
|
|
|
|
results = diagnose_trajectories(db_path)
|
|
|
st.json(results)
|
|
|
else:
|
|
|
# DEBUG: show trace_count and figure data size before rendering
|
|
|
try:
|
|
|
st.info(
|
|
|
f"[DEBUG] trace_count={trace_count}, fig data count={len(fig.data)}, layout title={fig.layout.title.text if fig.layout.title else 'none'}"
|
|
|
)
|
|
|
except Exception:
|
|
|
pass
|
|
|
try:
|
|
|
logging.getLogger(__name__).debug(
|
|
|
"[TRAJ DEBUG] About to render plotly chart — trace_count=%d, banner=%s, fig has %d traces",
|
|
|
trace_count,
|
|
|
banner_text,
|
|
|
len(fig.data),
|
|
|
)
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
except Exception as e:
|
|
|
st.error(f"Trajectories rendering failed: {e}")
|
|
|
# Always show diagnostics when rendering fails, regardless of trace_count
|
|
|
if get_debug_trajectories_enabled():
|
|
|
try:
|
|
|
st.json(_last_trajectories_diagnostics)
|
|
|
except Exception:
|
|
|
st.text_area(
|
|
|
"Trajectories diagnostics (JSON failed)",
|
|
|
json.dumps(_last_trajectories_diagnostics, default=str),
|
|
|
height=240,
|
|
|
)
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 3: Motie Zoeken
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def build_search_tab(db_path: str, show_rejected: bool) -> None:
|
|
|
st.subheader("Motie Zoeken")
|
|
|
|
|
|
df = load_motions_df(db_path)
|
|
|
if df.empty:
|
|
|
st.warning("Geen moties beschikbaar.")
|
|
|
return
|
|
|
|
|
|
if not show_rejected:
|
|
|
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
|
|
|
|
|
|
# Controls
|
|
|
col1, col2, col3 = st.columns([2, 1, 1])
|
|
|
with col1:
|
|
|
query = st.text_input(
|
|
|
"Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
|
|
|
)
|
|
|
with col2:
|
|
|
years = sorted(df["year"].dropna().astype(int).unique().tolist())
|
|
|
if years:
|
|
|
year_range = st.select_slider(
|
|
|
"Jaar", options=years, value=(years[0], years[-1])
|
|
|
)
|
|
|
else:
|
|
|
year_range = (2019, 2024)
|
|
|
with col3:
|
|
|
min_controversy = st.slider(
|
|
|
"Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
|
|
|
)
|
|
|
|
|
|
# Apply filters in-memory
|
|
|
working = df.copy()
|
|
|
working = working[
|
|
|
(working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
|
|
|
]
|
|
|
if min_controversy > 0:
|
|
|
working = working[working["controversy_score"] >= min_controversy]
|
|
|
if query:
|
|
|
q = query.lower()
|
|
|
mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
|
|
|
working = working[mask]
|
|
|
|
|
|
working = working.sort_values(by="controversy_score", ascending=False)
|
|
|
st.caption(f"{len(working)} resultaten (top 50 getoond)")
|
|
|
|
|
|
for _, row in working.head(50).iterrows():
|
|
|
title = row.get("title") or f"Motie #{row['id']}"
|
|
|
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
|
|
|
controversy = row.get("controversy_score") or 0
|
|
|
with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
|
|
|
cols = st.columns(3)
|
|
|
cols[0].metric("Controverse", f"{controversy:.2f}")
|
|
|
cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
|
|
|
cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")
|
|
|
|
|
|
# Voting breakdown
|
|
|
_render_voting_results(row.get("voting_results"))
|
|
|
|
|
|
# Link to original motion
|
|
|
url = row.get("url")
|
|
|
if url and str(url).startswith("http"):
|
|
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
|
|
|
|
|
|
# Similar motions
|
|
|
sim = query_similar(db_path, int(row["id"]), top_k=5)
|
|
|
if not sim.empty:
|
|
|
st.markdown("**Vergelijkbare moties:**")
|
|
|
for _, s in sim.iterrows():
|
|
|
s_date = (
|
|
|
pd.to_datetime(s["date"]).strftime("%Y")
|
|
|
if pd.notna(s.get("date"))
|
|
|
else ""
|
|
|
)
|
|
|
st.markdown(
|
|
|
f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
|
|
|
)
|
|
|
else:
|
|
|
st.caption("_Nog geen vergelijkbare moties beschikbaar_")
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 4: Motie Browser
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def build_browser_tab(db_path: str, show_rejected: bool) -> None:
|
|
|
st.subheader("Motie Browser")
|
|
|
|
|
|
df = load_motions_df(db_path)
|
|
|
if df.empty:
|
|
|
st.warning("Geen moties beschikbaar.")
|
|
|
return
|
|
|
|
|
|
if not show_rejected:
|
|
|
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
|
|
|
|
|
|
# Controls
|
|
|
col1, col2, col3 = st.columns(3)
|
|
|
with col1:
|
|
|
years = sorted(df["year"].dropna().astype(int).unique().tolist())
|
|
|
year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
|
|
|
with col2:
|
|
|
min_controversy_b = st.slider(
|
|
|
"Min. controverse",
|
|
|
min_value=0.0,
|
|
|
max_value=1.0,
|
|
|
value=0.0,
|
|
|
step=0.05,
|
|
|
key="browser_controversy",
|
|
|
)
|
|
|
with col3:
|
|
|
sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])
|
|
|
|
|
|
# Filter
|
|
|
working = df.copy()
|
|
|
if year_filter != "(Alle)":
|
|
|
working = working[working["year"] == int(year_filter)]
|
|
|
if min_controversy_b > 0:
|
|
|
working = working[working["controversy_score"] >= min_controversy_b]
|
|
|
|
|
|
sort_map = {
|
|
|
"Datum (nieuw)": ("date", False),
|
|
|
"Controverse": ("controversy_score", False),
|
|
|
"Marge": ("winning_margin", True),
|
|
|
}
|
|
|
sort_col, sort_asc = sort_map[sort_by]
|
|
|
working = working.sort_values(by=sort_col, ascending=sort_asc)
|
|
|
|
|
|
# Display table
|
|
|
display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
|
|
|
available_display = [c for c in display_cols if c in working.columns]
|
|
|
st.dataframe(
|
|
|
working[available_display].reset_index(drop=True),
|
|
|
use_container_width=True,
|
|
|
height=350,
|
|
|
)
|
|
|
|
|
|
st.divider()
|
|
|
|
|
|
# Detail panel
|
|
|
st.markdown("**Detail weergave** — vul een motie-ID in:")
|
|
|
sel_id = st.number_input(
|
|
|
"Motie ID",
|
|
|
min_value=int(working["id"].min()) if not working.empty else 1,
|
|
|
max_value=int(working["id"].max()) if not working.empty else 99999,
|
|
|
value=int(working["id"].iloc[0]) if not working.empty else 1,
|
|
|
step=1,
|
|
|
)
|
|
|
motion_row = df[df["id"] == sel_id]
|
|
|
if not motion_row.empty:
|
|
|
row = motion_row.iloc[0]
|
|
|
st.markdown(f"### {row.get('title') or 'Onbekend'}")
|
|
|
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
|
|
|
st.caption(
|
|
|
f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
|
|
|
)
|
|
|
|
|
|
# Link to original source
|
|
|
url = row.get("url")
|
|
|
if url and str(url).startswith("http"):
|
|
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
|
|
|
|
|
|
# Voting breakdown
|
|
|
st.markdown("**Stemuitslag:**")
|
|
|
_render_voting_results(row.get("voting_results"))
|
|
|
|
|
|
# Similar motions
|
|
|
sim = query_similar(db_path, int(sel_id), top_k=10)
|
|
|
if not sim.empty:
|
|
|
st.markdown("**Vergelijkbare moties:**")
|
|
|
st.dataframe(
|
|
|
sim[["title", "score", "date", "policy_area"]],
|
|
|
use_container_width=True,
|
|
|
)
|
|
|
else:
|
|
|
st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")
|
|
|
|
|
|
|
|
|
def build_svd_components_tab(db_path: str) -> None:
|
|
|
"""New tab: show top motions contributing to top SVD components.
|
|
|
|
|
|
Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
|
|
|
for components 1..10 with theme labels/explanations and a detail pane per motion.
|
|
|
|
|
|
Components 1-2 use aligned PCA positions (consistent with compass).
|
|
|
Components 3-10 use raw SVD scores.
|
|
|
"""
|
|
|
st.subheader("🔬 SVD Assen — politieke polarisatiethema's")
|
|
|
st.markdown(
|
|
|
"Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
|
|
|
"van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
|
|
|
"het spanningsveld dat de as beschrijft."
|
|
|
)
|
|
|
|
|
|
# Scree plot: relative importance of each SVD component
|
|
|
scree_importances = load_scree_data(db_path)
|
|
|
if scree_importances:
|
|
|
st.markdown(
|
|
|
"**Scree-plot** — het relatieve gewicht van elke SVD-as. "
|
|
|
"De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
|
|
|
"latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau."
|
|
|
)
|
|
|
_render_scree_plot(scree_importances)
|
|
|
|
|
|
json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
|
|
|
if not os.path.exists(json_path):
|
|
|
st.warning(
|
|
|
f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
|
|
|
)
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
with open(json_path, "r", encoding="utf-8") as fh:
|
|
|
j = json.load(fh)
|
|
|
except Exception as e:
|
|
|
st.error(f"Failed to load SVD importance JSON: {e}")
|
|
|
return
|
|
|
|
|
|
window = j.get("window")
|
|
|
rows = j.get("rows", [])
|
|
|
if not rows:
|
|
|
st.info("Geen top-moties in dataset")
|
|
|
return
|
|
|
|
|
|
st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")
|
|
|
|
|
|
# Build mapping component -> list of motions (deduplicate by motion_id per component)
|
|
|
comp_map: dict[int, list] = {}
|
|
|
for r in rows:
|
|
|
comp = int(r.get("component", 0))
|
|
|
bucket = comp_map.setdefault(comp, [])
|
|
|
existing_ids = {m.get("motion_id") for m in bucket}
|
|
|
if r.get("motion_id") not in existing_ids:
|
|
|
bucket.append(r)
|
|
|
|
|
|
comp_options = sorted(comp_map.keys())
|
|
|
|
|
|
# Build display labels for selectbox: "As 1 — Regulering vs. status-quo"
|
|
|
def _comp_label(c: int) -> str:
|
|
|
theme = SVD_THEMES.get(c, {})
|
|
|
lbl = theme.get("label", "")
|
|
|
return f"As {c} — {lbl}" if lbl else f"As {c}"
|
|
|
|
|
|
comp_display = [_comp_label(c) for c in comp_options]
|
|
|
|
|
|
# Load default party scores early (needed for sidebar controls)
|
|
|
party_scores_default = load_party_axis_scores(db_path)
|
|
|
party_mp_vectors = load_party_mp_vectors(db_path)
|
|
|
bootstrap_data = (
|
|
|
_cached_bootstrap_cis(party_mp_vectors) if party_mp_vectors else None
|
|
|
)
|
|
|
|
|
|
# Sidebar controls for window selection and minimum MPs filter
|
|
|
col1, col2 = st.columns([2, 1])
|
|
|
|
|
|
# Initialize view mode (will be set in col2 if render succeeds)
|
|
|
view_mode = "Enkel venster"
|
|
|
selected_parties_for_trajectory: list = []
|
|
|
|
|
|
with col2:
|
|
|
comp_sel_idx = st.selectbox(
|
|
|
"Selecteer SVD-as",
|
|
|
options=list(range(len(comp_options))),
|
|
|
format_func=lambda i: comp_display[i],
|
|
|
index=0,
|
|
|
)
|
|
|
comp_sel = comp_options[comp_sel_idx]
|
|
|
|
|
|
# Minimum MPs filter (only relevant for components 1-2 which use party centroids)
|
|
|
min_mps = st.number_input(
|
|
|
"Min. Kamerleden per partij",
|
|
|
min_value=1,
|
|
|
max_value=20,
|
|
|
value=1,
|
|
|
step=1,
|
|
|
help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.",
|
|
|
)
|
|
|
|
|
|
# View selector for party axis display
|
|
|
view_mode = st.radio(
|
|
|
"Weergave",
|
|
|
options=["Enkel venster", "Tijdtraject"],
|
|
|
index=0,
|
|
|
help="Enkel venster: toont posities voor één tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.",
|
|
|
)
|
|
|
|
|
|
# Party multi-select for time trajectory view
|
|
|
selected_parties_for_trajectory = []
|
|
|
if view_mode == "Tijdtraject":
|
|
|
# Get list of parties with scores
|
|
|
all_parties = (
|
|
|
sorted(party_scores_default.keys()) if party_scores_default else []
|
|
|
)
|
|
|
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8]
|
|
|
selected_parties_for_trajectory = st.multiselect(
|
|
|
"Partijen om te tonen",
|
|
|
options=all_parties,
|
|
|
default=default_parties,
|
|
|
help="Selecteer de partijen die je wilt zien in het tijdtraject.",
|
|
|
)
|
|
|
|
|
|
# Show theme explanation
|
|
|
theme = SVD_THEMES.get(comp_sel, {})
|
|
|
if theme:
|
|
|
st.info(f"**{theme['label']}** — {theme['explanation']}")
|
|
|
|
|
|
motions = comp_map.get(comp_sel, [])
|
|
|
|
|
|
# Party axis chart
|
|
|
# Default party scores already loaded earlier for sidebar controls.
|
|
|
# ALL components 1-10 use raw (non-aligned) SVD vectors.
|
|
|
# The compass uses Procrustes-aligned PCA — separate visualization.
|
|
|
# Get available windows from svd_vectors
|
|
|
available_windows = get_uniform_dim_windows(db_path)
|
|
|
year_windows = sorted(w for w in available_windows if w != "current_parliament")
|
|
|
has_current = "current_parliament" in available_windows
|
|
|
svd_windows = year_windows + (["current_parliament"] if has_current else [])
|
|
|
|
|
|
def _svd_window_label(w: str) -> str:
|
|
|
if w == "current_parliament":
|
|
|
return "Huidig parliament"
|
|
|
return w
|
|
|
|
|
|
with col1:
|
|
|
svd_window = st.selectbox(
|
|
|
"Jaar",
|
|
|
options=svd_windows,
|
|
|
index=len(svd_windows) - 1, # default: current_parliament
|
|
|
format_func=_svd_window_label,
|
|
|
key=f"svd_window_{comp_sel}",
|
|
|
)
|
|
|
|
|
|
# Load party scores for the selected window (used for components 3-10)
|
|
|
if svd_window == "current_parliament":
|
|
|
party_scores = party_scores_default
|
|
|
else:
|
|
|
party_scores = load_party_axis_scores_for_window(db_path, svd_window)
|
|
|
|
|
|
# Compute MP counts from party_mp_vectors
|
|
|
party_mp_counts = (
|
|
|
{p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {}
|
|
|
)
|
|
|
|
|
|
# For components 1-2, use aligned positions from load_positions (same as compass)
|
|
|
# for consistency. For components 3-10, use raw SVD scores.
|
|
|
def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]:
|
|
|
"""Get party (x, y) coordinates from aligned PCA positions for a window."""
|
|
|
positions_by_window, _ = load_positions(db_path, "annual")
|
|
|
window_pos = positions_by_window.get(window, {})
|
|
|
if not window_pos:
|
|
|
return {}
|
|
|
|
|
|
# Load party map to convert MP names to parties
|
|
|
_party_map = load_party_map(db_path)
|
|
|
|
|
|
# Aggregate MP positions to party centroids
|
|
|
party_coords: Dict[str, List[Tuple[float, float]]] = {}
|
|
|
for mp_name, (x, y) in window_pos.items():
|
|
|
party = _party_map.get(
|
|
|
mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
|
|
|
)
|
|
|
if party:
|
|
|
party_coords.setdefault(party, []).append((x, y))
|
|
|
|
|
|
# Compute mean position per party
|
|
|
return {
|
|
|
party: (
|
|
|
float(np.mean([c[0] for c in coords])),
|
|
|
float(np.mean([c[1] for c in coords])),
|
|
|
)
|
|
|
for party, coords in party_coords.items()
|
|
|
if coords
|
|
|
}
|
|
|
|
|
|
# Load aligned scores for ALL components 1-10 using PCA on aligned vectors.
|
|
|
# This ensures consistency between compass and SVD components tab.
|
|
|
def _get_aligned_party_scores(window: str) -> Dict[str, np.ndarray]:
|
|
|
"""Get party scores for all N components from aligned PCA positions."""
|
|
|
from analysis.political_axis import compute_nd_axes
|
|
|
|
|
|
scores_by_window, _ = compute_nd_axes(db_path, n_components=10)
|
|
|
window_scores = scores_by_window.get(window, {})
|
|
|
if not window_scores:
|
|
|
return {}
|
|
|
|
|
|
# Load party map to convert MP names to parties
|
|
|
_party_map = load_party_map(db_path)
|
|
|
|
|
|
# Aggregate MP scores to party centroids per component
|
|
|
n_comps = 10
|
|
|
party_scores_agg: Dict[str, List[np.ndarray]] = {}
|
|
|
for mp_name, scores in window_scores.items():
|
|
|
party = _party_map.get(
|
|
|
mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
|
|
|
)
|
|
|
if party:
|
|
|
party_scores_agg.setdefault(party, []).append(scores[:n_comps])
|
|
|
|
|
|
# Compute mean scores per party for each component
|
|
|
return {
|
|
|
party: np.mean(np.vstack(score_list), axis=0)
|
|
|
for party, score_list in party_scores_agg.items()
|
|
|
if score_list
|
|
|
}
|
|
|
|
|
|
# Extract 1D scores for this component using aligned PCA scores
|
|
|
party_1d_coords: dict = {}
|
|
|
aligned_all_scores = _get_aligned_party_scores(svd_window)
|
|
|
for party, all_scores in aligned_all_scores.items():
|
|
|
idx = comp_sel - 1 # 0-indexed
|
|
|
if idx < len(all_scores):
|
|
|
party_1d_coords[party] = (float(all_scores[idx]),)
|
|
|
|
|
|
# Auto-compute flip directions for ALL components 1-10 based on aligned party centroids.
|
|
|
# Since we now use aligned PCA scores for all components, compute flip directly from
|
|
|
# aligned scores to ensure canonical right parties (PVV, FVD, JA21, SGP) appear on RIGHT.
|
|
|
computed_flips: Dict[int, bool] = {}
|
|
|
try:
|
|
|
from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT
|
|
|
|
|
|
# Compute flip for each component based on aligned party scores
|
|
|
for comp_idx in range(10):
|
|
|
right_scores = []
|
|
|
left_scores = []
|
|
|
for party, scores in aligned_all_scores.items():
|
|
|
if party in CANONICAL_RIGHT:
|
|
|
right_scores.append(scores[comp_idx])
|
|
|
elif party in CANONICAL_LEFT:
|
|
|
left_scores.append(scores[comp_idx])
|
|
|
|
|
|
if right_scores and left_scores:
|
|
|
right_avg = np.mean(right_scores)
|
|
|
left_avg = np.mean(left_scores)
|
|
|
# Flip if right parties score lower than left (we want RIGHT > LEFT)
|
|
|
computed_flips[comp_idx + 1] = right_avg < left_avg
|
|
|
else:
|
|
|
computed_flips[comp_idx + 1] = False
|
|
|
except Exception:
|
|
|
# If flip computation fails, keep existing flip values from SVD_THEMES
|
|
|
pass
|
|
|
|
|
|
# Build theme override with computed flip for this component
|
|
|
# (avoids mutating SVD_THEMES which persists stale values across Streamlit reruns)
|
|
|
theme_with_flip = {
|
|
|
**theme,
|
|
|
"flip": computed_flips.get(comp_sel, theme.get("flip", False)),
|
|
|
}
|
|
|
|
|
|
# Filter parties by minimum MP count
|
|
|
if min_mps > 1 and party_mp_counts:
|
|
|
valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps}
|
|
|
party_1d_coords = {
|
|
|
p: coords for p, coords in party_1d_coords.items() if p in valid_parties
|
|
|
}
|
|
|
|
|
|
# Render party axis chart (single window or time trajectory)
|
|
|
if view_mode == "Tijdtraject" and selected_parties_for_trajectory:
|
|
|
# Load party scores for all windows and render time trajectory
|
|
|
available_windows = get_uniform_dim_windows(db_path)
|
|
|
year_windows = sorted(w for w in available_windows if w != "current_parliament")
|
|
|
has_current = "current_parliament" in available_windows
|
|
|
all_windows = year_windows + (["current_parliament"] if has_current else [])
|
|
|
|
|
|
# Use aligned PCA scores for all windows (consistent with single-window view)
|
|
|
party_scores_by_window = _get_aligned_trajectory_scores(db_path, all_windows)
|
|
|
|
|
|
_render_svd_time_trajectory(
|
|
|
party_scores_by_window,
|
|
|
comp_sel,
|
|
|
theme_with_flip,
|
|
|
selected_parties_for_trajectory,
|
|
|
)
|
|
|
else:
|
|
|
# Single-window view: render 1D party axis chart
|
|
|
_render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip)
|
|
|
|
|
|
# Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
|
|
|
motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
|
|
|
motion_details: Dict[int, tuple] = {}
|
|
|
if motion_ids:
|
|
|
# Defensively convert motion_ids to integers, skipping invalid values
|
|
|
ids_int: List[int] = []
|
|
|
for mid in motion_ids:
|
|
|
try:
|
|
|
ids_int.append(int(mid))
|
|
|
except Exception:
|
|
|
logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
|
|
|
|
|
|
# If no valid ids remain, skip the DB query
|
|
|
if ids_int:
|
|
|
con = None
|
|
|
try:
|
|
|
placeholders = ", ".join("?" for _ in ids_int)
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
db_rows = con.execute(
|
|
|
f"SELECT id, title, date, policy_area, url, body_text, voting_results "
|
|
|
f"FROM motions WHERE id IN ({placeholders})",
|
|
|
ids_int,
|
|
|
).fetchall()
|
|
|
motion_details = {r[0]: r for r in db_rows}
|
|
|
except Exception:
|
|
|
logger.exception("Failed to batch-fetch motion details")
|
|
|
finally:
|
|
|
if con:
|
|
|
con.close()
|
|
|
|
|
|
# Split motions by pole sign
|
|
|
pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
|
|
|
neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
|
|
|
|
|
|
flip = theme_with_flip.get("flip", False) if theme_with_flip else False
|
|
|
pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else ""
|
|
|
neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else ""
|
|
|
|
|
|
# Derive left/right labels from flip direction
|
|
|
# flip=True: positive_pole on left, negative_pole on right
|
|
|
# flip=False: negative_pole on left, positive_pole on right
|
|
|
if flip:
|
|
|
left_pole, right_pole = pos_pole, neg_pole
|
|
|
left_motions, right_motions = pos_motions, neg_motions
|
|
|
left_arrow, right_arrow = "▲", "▼"
|
|
|
else:
|
|
|
left_pole, right_pole = neg_pole, pos_pole
|
|
|
left_motions, right_motions = neg_motions, pos_motions
|
|
|
left_arrow, right_arrow = "▼", "▲"
|
|
|
|
|
|
lcol, rcol = st.columns(2)
|
|
|
|
|
|
with lcol:
|
|
|
st.markdown(f"**← {left_pole}**")
|
|
|
for m in left_motions:
|
|
|
mid = m.get("motion_id")
|
|
|
raw_title = m.get("title") or f"Motie #{mid}"
|
|
|
with st.expander(f"{left_arrow} {raw_title}"):
|
|
|
row = motion_details.get(int(mid)) if mid is not None else None
|
|
|
if row:
|
|
|
try:
|
|
|
date_str = str(row[2])[:10]
|
|
|
except Exception:
|
|
|
date_str = "?"
|
|
|
st.caption(f"📅 {date_str} | {row[3] or '—'}")
|
|
|
if row[4] and str(row[4]).startswith("http"):
|
|
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
|
|
|
if row[5]:
|
|
|
with st.expander("Toon volledige tekst"):
|
|
|
st.write(row[5])
|
|
|
_render_voting_results(row[6])
|
|
|
else:
|
|
|
st.caption("_Geen metadata beschikbaar_")
|
|
|
|
|
|
with rcol:
|
|
|
st.markdown(f"**{right_pole} →**")
|
|
|
for m in right_motions:
|
|
|
mid = m.get("motion_id")
|
|
|
raw_title = m.get("title") or f"Motie #{mid}"
|
|
|
with st.expander(f"{right_arrow} {raw_title}"):
|
|
|
row = motion_details.get(int(mid)) if mid is not None else None
|
|
|
if row:
|
|
|
try:
|
|
|
date_str = str(row[2])[:10]
|
|
|
except Exception:
|
|
|
date_str = "?"
|
|
|
st.caption(f"📅 {date_str} | {row[3] or '—'}")
|
|
|
if row[4] and str(row[4]).startswith("http"):
|
|
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
|
|
|
if row[5]:
|
|
|
with st.expander("Toon volledige tekst"):
|
|
|
st.write(row[5])
|
|
|
_render_voting_results(row[6])
|
|
|
else:
|
|
|
st.caption("_Geen metadata beschikbaar_")
|
|
|
|
|
|
|
|
|
def build_mp_quiz_tab(db_path: str) -> None:
|
|
|
"""Interactive quiz: narrow MPs by asking motion vote questions.
|
|
|
|
|
|
Minimal viable flow:
|
|
|
- seed with top-N controversial motions (SEED_MOTIONS)
|
|
|
- present one question at a time, store answers in st.session_state['mp_quiz_votes']
|
|
|
- after each answer call MotionDatabase.match_mps_for_votes to rank MPs
|
|
|
- if multiple candidates remain, call choose_discriminating_motions to pick next question
|
|
|
- stop when unique MP found or no discriminating motions remain
|
|
|
"""
|
|
|
st.subheader("🧑⚖️ Welk tweede kamerlid ben jij?")
|
|
|
st.markdown(
|
|
|
"Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt."
|
|
|
)
|
|
|
|
|
|
SEED_MOTIONS = 8
|
|
|
MAX_QUESTIONS = 20
|
|
|
|
|
|
# initialize session state
|
|
|
if "mp_quiz_votes" not in st.session_state:
|
|
|
st.session_state["mp_quiz_votes"] = {}
|
|
|
if "mp_quiz_asked" not in st.session_state:
|
|
|
st.session_state["mp_quiz_asked"] = []
|
|
|
|
|
|
from database import MotionDatabase as _MotionDatabase
|
|
|
|
|
|
db_inst = _MotionDatabase(db_path)
|
|
|
|
|
|
df = load_motions_df(db_path)
|
|
|
if df.empty:
|
|
|
st.warning("Geen moties beschikbaar om de quiz te starten.")
|
|
|
return
|
|
|
|
|
|
# seed from motions that actually have individual MP vote records
|
|
|
seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
|
|
|
if not seed_ids:
|
|
|
st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
|
|
|
return
|
|
|
|
|
|
# Determine next motion to ask
|
|
|
def _next_motion_id():
|
|
|
# prefer seed motions not yet asked
|
|
|
for mid in seed_ids:
|
|
|
if str(mid) not in st.session_state["mp_quiz_votes"]:
|
|
|
return mid
|
|
|
# otherwise ask discriminating motion based on remaining candidate MPs
|
|
|
# compute current candidate set
|
|
|
|
|
|
try:
|
|
|
user_votes = {
|
|
|
int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
|
|
|
}
|
|
|
ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
|
|
|
except Exception:
|
|
|
ranked = []
|
|
|
|
|
|
candidates = [r["mp_name"] for r in ranked]
|
|
|
excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()]
|
|
|
if not candidates:
|
|
|
return None
|
|
|
try:
|
|
|
next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
|
|
|
return next_ids[0] if next_ids else None
|
|
|
except Exception:
|
|
|
return None
|
|
|
|
|
|
# show progress and controls
|
|
|
col1, col2 = st.columns([3, 1])
|
|
|
with col2:
|
|
|
st.caption(
|
|
|
f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}"
|
|
|
)
|
|
|
if st.button("Reset quiz"):
|
|
|
st.session_state["mp_quiz_votes"] = {}
|
|
|
st.session_state["mp_quiz_asked"] = []
|
|
|
st.rerun()
|
|
|
|
|
|
# main question loop (single question per render, wrapped in a form to avoid
|
|
|
# premature reruns when the user changes the radio selection)
|
|
|
next_mid = _next_motion_id()
|
|
|
if next_mid is None:
|
|
|
st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
|
|
|
else:
|
|
|
motion_rows = df[df["id"] == next_mid]
|
|
|
if motion_rows.empty:
|
|
|
# motion has votes but isn't in the motions DataFrame — skip it
|
|
|
st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
|
|
|
st.rerun()
|
|
|
return
|
|
|
motion_row = motion_rows.iloc[0]
|
|
|
st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
|
|
|
if motion_row.get("layman_explanation"):
|
|
|
st.info(motion_row.get("layman_explanation"))
|
|
|
|
|
|
with st.form(key=f"mp_quiz_form_{next_mid}"):
|
|
|
choice = st.radio(
|
|
|
"Wat zou jij stemmen?",
|
|
|
options=["Voor", "Tegen", "Onthouden", "Geen stem"],
|
|
|
index=3,
|
|
|
)
|
|
|
submitted = st.form_submit_button("Beantwoord en verder")
|
|
|
|
|
|
if submitted:
|
|
|
st.session_state["mp_quiz_votes"][str(next_mid)] = choice
|
|
|
st.session_state["mp_quiz_asked"].append(next_mid)
|
|
|
st.rerun()
|
|
|
|
|
|
# display current ranking
|
|
|
try:
|
|
|
user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
|
|
|
ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
|
|
|
except Exception:
|
|
|
ranking = []
|
|
|
|
|
|
if ranking:
|
|
|
st.markdown("**Top kandidaten**")
|
|
|
# show as table
|
|
|
import pandas as pd
|
|
|
|
|
|
rdf = pd.DataFrame(ranking)
|
|
|
st.dataframe(rdf.head(10), use_container_width=True)
|
|
|
|
|
|
# check uniqueness
|
|
|
top_pct = ranking[0]["agreement_pct"] if ranking else 0.0
|
|
|
top_matches = [r for r in ranking if r["agreement_pct"] == top_pct]
|
|
|
if len(top_matches) == 1 and top_matches[0]["overlap"] > 0:
|
|
|
st.success(
|
|
|
f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})"
|
|
|
)
|
|
|
else:
|
|
|
if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS:
|
|
|
st.warning(
|
|
|
"Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten."
|
|
|
)
|
|
|
else:
|
|
|
st.info("Nog geen unieke match — vraag meer om verder te verfijnen.")
|
|
|
else:
|
|
|
st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.")
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# App entry
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def run_app() -> None:
|
|
|
st.set_page_config(
|
|
|
layout="wide",
|
|
|
page_title="Parlement Explorer",
|
|
|
page_icon="🏛️",
|
|
|
)
|
|
|
st.title("🏛️ Parlement Explorer")
|
|
|
|
|
|
# Sidebar
|
|
|
st.sidebar.title("Instellingen")
|
|
|
db_path = "data/motions.db"
|
|
|
window_size = "annual"
|
|
|
show_rejected = st.sidebar.checkbox("Toon verworpen moties", value=False)
|
|
|
|
|
|
# About section
|
|
|
with st.sidebar.expander("ℹ️ Over", expanded=False):
|
|
|
try:
|
|
|
if _DUCKDB_AVAILABLE:
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
n_motions = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0]
|
|
|
n_fused = con.execute(
|
|
|
"SELECT COUNT(*) FROM fused_embeddings"
|
|
|
).fetchone()[0]
|
|
|
n_sim = con.execute("SELECT COUNT(*) FROM similarity_cache").fetchone()[
|
|
|
0
|
|
|
]
|
|
|
con.close()
|
|
|
st.markdown(
|
|
|
f"**Moties:** {n_motions:,} \n"
|
|
|
f"**Fused embeddings:** {n_fused:,} \n"
|
|
|
f"**Similarity cache:** {n_sim:,}"
|
|
|
)
|
|
|
else:
|
|
|
st.warning(
|
|
|
"DuckDB niet beschikbaar in deze Python-omgeving; DB diagnostics zijn niet beschikbaar."
|
|
|
)
|
|
|
except Exception as e:
|
|
|
st.warning(f"DB niet bereikbaar: {e}")
|
|
|
|
|
|
# Main tabs
|
|
|
# Streamlit tabs compatibility: some older/newer Streamlit builds expose different APIs.
|
|
|
tab_labels = [
|
|
|
"🧭 Politiek Kompas",
|
|
|
"📈 Trajectories",
|
|
|
"🔍 Motie Zoeken",
|
|
|
"📋 Motie Browser",
|
|
|
"🔬 SVD Components",
|
|
|
]
|
|
|
|
|
|
if hasattr(st, "tabs") and callable(getattr(st, "tabs")):
|
|
|
tab1, tab2, tab3, tab4, tab5 = st.tabs(tab_labels)
|
|
|
with tab1:
|
|
|
build_compass_tab(db_path, window_size)
|
|
|
with tab2:
|
|
|
build_trajectories_tab(db_path, window_size)
|
|
|
with tab3:
|
|
|
build_search_tab(db_path, show_rejected)
|
|
|
with tab4:
|
|
|
build_browser_tab(db_path, show_rejected)
|
|
|
with tab5:
|
|
|
build_svd_components_tab(db_path)
|
|
|
else:
|
|
|
# Fallback for environments where `st.tabs` is not available: use a radio selector
|
|
|
selection = st.radio("Tab", tab_labels)
|
|
|
if selection == tab_labels[0]:
|
|
|
build_compass_tab(db_path, window_size)
|
|
|
elif selection == tab_labels[1]:
|
|
|
build_trajectories_tab(db_path, window_size)
|
|
|
elif selection == tab_labels[2]:
|
|
|
build_search_tab(db_path, show_rejected)
|
|
|
elif selection == tab_labels[3]:
|
|
|
build_browser_tab(db_path, show_rejected)
|
|
|
else:
|
|
|
build_svd_components_tab(db_path)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
logging.basicConfig(
|
|
|
level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
|
|
|
)
|
|
|
run_app()
|
|
|
|