diff --git a/analysis/explorer_data.py b/analysis/explorer_data.py
index 8100604..728faa9 100644
--- a/analysis/explorer_data.py
+++ b/analysis/explorer_data.py
@@ -23,6 +23,7 @@ from analysis.config import CURRENT_PARLIAMENT_PARTIES, _PARTY_NORMALIZE
__all__ = [
"get_available_windows",
"get_uniform_dim_windows",
+ "load_positions",
"load_party_map",
"load_active_mps",
"load_mp_vectors_by_window",
@@ -37,6 +38,9 @@ __all__ = [
"load_motions_df",
"query_similar",
"compute_party_axis_scores",
+ "get_aligned_party_scores",
+ "compute_party_discipline",
+ "_get_aligned_trajectory_scores",
]
logger = logging.getLogger(__name__)
@@ -567,3 +571,139 @@ def compute_party_axis_scores(
except Exception:
logger.exception("Failed to compute party axis scores")
return {}
+
+
+def load_positions(
+ db_path: str, window_size: str = "annual"
+) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
+ """Compute 2D positions per window using PCA on aligned SVD vectors.
+
+ Returns:
+ positions_by_window: {window_id: {entity_name: (x, y)}}
+ axis_def: dict with x_axis, y_axis, method keys
+ """
+ from analysis.political_axis import compute_2d_axes
+
+ all_available = get_uniform_dim_windows(db_path)
+
+ if not all_available:
+ return {}, {}
+
+ positions_by_window, axis_def = compute_2d_axes(
+ db_path,
+ window_ids=all_available,
+ method="pca",
+ pca_residual=True,
+ normalize_vectors=True,
+ )
+
+ try:
+ from analysis.axis_classifier import classify_axes
+
+ axis_def = classify_axes(positions_by_window, axis_def, db_path)
+ except Exception:
+ logger.exception("classify_axes failed; using generic axis labels")
+
+ if window_size == "annual":
+ annual_keys = set(w for w in all_available if "-Q" not in w)
+ positions_by_window = {
+ w: v for w, v in positions_by_window.items() if w in annual_keys
+ }
+
+ return positions_by_window, axis_def
+
+
+def get_aligned_party_scores(
+ db_path: str, window: str, active_mps: set | None = None
+) -> Dict[str, np.ndarray]:
+ """Get party scores for all N components from aligned PCA positions.
+
+ For current_parliament, pass active_mps to filter to only seated MPs
+ (matching the compass behaviour). Historical windows include all MPs.
+ """
+ from analysis.political_axis import compute_nd_axes
+
+ annual_windows = get_uniform_dim_windows(db_path)
+ scores_by_window, _ = compute_nd_axes(
+ db_path, window_ids=annual_windows, n_components=10
+ )
+ window_scores = scores_by_window.get(window, {})
+ if not window_scores:
+ return {}
+
+ if window == "current_parliament" and active_mps is not None:
+ window_scores = {mp: sc for mp, sc in window_scores.items() if mp in active_mps}
+
+ _party_map = load_party_map(db_path)
+
+ n_comps = 10
+ party_scores_agg: Dict[str, List[np.ndarray]] = {}
+ for mp_name, scores in window_scores.items():
+ party = _party_map.get(
+ mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
+ )
+ if party:
+ party_scores_agg.setdefault(party, []).append(scores[:n_comps])
+
+ return {
+ party: np.mean(np.vstack(score_list), axis=0)
+ for party, score_list in party_scores_agg.items()
+ if score_list
+ }
+
+
+def compute_party_discipline(
+ db_path: str,
+ start_date: str,
+ end_date: str,
+) -> pd.DataFrame:
+ """Compute per-party voting discipline (Rice index) for roll-call votes in a date range.
+
+ Only individual MP vote rows are used (mp_name LIKE '%,%').
+ Returns a DataFrame with columns [party, n_motions, discipline] sorted by discipline ascending.
+ Returns an empty DataFrame if fewer than 1 qualifying motion exists or on any DB error.
+ """
+ from analysis import trajectory
+
+ return trajectory.compute_party_discipline(db_path, start_date, end_date)
+
+
+def _get_aligned_trajectory_scores(
+ db_path: str, windows: List[str], n_components: int = 10
+) -> Dict[str, Dict[str, List[float]]]:
+ """Get aligned PCA scores for all windows as {window: {party: [scores per component]}}.
+
+ Uses compute_nd_axes to get PCA-projected, flip-corrected scores across all windows,
+ ensuring consistency with the single-window SVD components view.
+ """
+ from analysis.political_axis import compute_nd_axes
+
+ scores_by_window, _ = compute_nd_axes(
+ db_path, window_ids=windows, n_components=n_components
+ )
+ if not scores_by_window:
+ return {}
+
+ party_map = load_party_map(db_path)
+
+ result: Dict[str, Dict[str, List[float]]] = {}
+ for window in windows:
+ window_scores = scores_by_window.get(window, {})
+ if not window_scores:
+ continue
+
+ party_vecs: Dict[str, List[np.ndarray]] = {}
+ for mp_name, scores in window_scores.items():
+ party = party_map.get(
+ mp_name, party_map.get(mp_name.split("(")[0].strip(), None)
+ )
+ if party:
+ party_vecs.setdefault(party, []).append(scores[:n_components])
+
+ result[window] = {
+ party: np.mean(np.vstack(score_list), axis=0).tolist()
+ for party, score_list in party_vecs.items()
+ if score_list
+ }
+
+ return result
diff --git a/analysis/tabs/_rendering.py b/analysis/tabs/_rendering.py
new file mode 100644
index 0000000..131a63b
--- /dev/null
+++ b/analysis/tabs/_rendering.py
@@ -0,0 +1,796 @@
+"""Rendering helpers for explorer tabs.
+
+This module contains all Plotly/Streamlit rendering functions extracted from
+explorer.py. It is import-safe: plotly and streamlit are optional.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Dict, List, Optional, Tuple
+
+try:
+ import plotly.express as px
+ import plotly.graph_objects as go
+except Exception:
+ px = None
+ import types
+
+ class _DummyTrace:
+ def __init__(self, **kwargs):
+ self.name = kwargs.get("name")
+ self.x = kwargs.get("x")
+ self.y = kwargs.get("y")
+ self.text = kwargs.get("text")
+ self.customdata = kwargs.get("customdata")
+
+ class _DummyFigure:
+ def __init__(self):
+ self.data = []
+
+ def add_trace(self, trace):
+ if isinstance(trace, _DummyTrace):
+ self.data.append(trace)
+ else:
+ try:
+ name = getattr(trace, "name", None)
+ x = getattr(trace, "x", None)
+ y = getattr(trace, "y", None)
+ text = getattr(trace, "text", None)
+ customdata = getattr(trace, "customdata", None)
+ except Exception:
+ name = trace.get("name") if hasattr(trace, "get") else None
+ x = trace.get("x") if hasattr(trace, "get") else None
+ y = trace.get("y") if hasattr(trace, "get") else None
+ text = trace.get("text") if hasattr(trace, "get") else None
+ customdata = (
+ trace.get("customdata") if hasattr(trace, "get") else None
+ )
+ self.data.append(
+ _DummyTrace(name=name, x=x, y=y, text=text, customdata=customdata)
+ )
+
+ def add_annotation(self, *args, **kwargs):
+ return None
+
+ def update_layout(self, **kwargs):
+ return None
+
+ def update_traces(self, **kwargs):
+ return None
+
+ def add_hline(self, **kwargs):
+ return None
+
+ go = types.SimpleNamespace(
+ Figure=_DummyFigure,
+ Scatter=lambda **kwargs: _DummyTrace(**kwargs),
+ Bar=lambda **kwargs: _DummyTrace(**kwargs),
+ )
+
+try:
+ import streamlit as st
+except Exception:
+
+ class _DummySt:
+ def cache_data(self, *args, **kwargs):
+ def _decorator(func):
+ return func
+
+ return _decorator
+
+ def markdown(self, *args, **kwargs):
+ return None
+
+ def subheader(self, *args, **kwargs):
+ return None
+
+ def plotly_chart(self, *args, **kwargs):
+ return None
+
+ def caption(self, *args, **kwargs):
+ return None
+
+ def text_area(self, *args, **kwargs):
+ return None
+
+ def json(self, *args, **kwargs):
+ return None
+
+ def checkbox(self, *args, **kwargs):
+ return kwargs.get("value", False)
+
+ def warning(self, *args, **kwargs):
+ return None
+
+ def info(self, *args, **kwargs):
+ return None
+
+ def error(self, *args, **kwargs):
+ return None
+
+ def success(self, *args, **kwargs):
+ return None
+
+ def selectbox(self, *args, **kwargs):
+ opts = (
+ kwargs.get("options")
+ if kwargs.get("options") is not None
+ else (args[1] if len(args) > 1 else [])
+ )
+ return opts[0] if opts else None
+
+ def multiselect(self, *args, **kwargs):
+ opts = (
+ kwargs.get("options")
+ if kwargs.get("options") is not None
+ else (args[1] if len(args) > 1 else [])
+ )
+ default = kwargs.get("default")
+ if default is not None:
+ return default
+ return opts[:6] if opts else []
+
+ def number_input(self, *args, **kwargs):
+ return kwargs.get("value") if "value" in kwargs else 1
+
+ def slider(self, *args, **kwargs):
+ return kwargs.get("value") if "value" in kwargs else 0.35
+
+ def select_slider(self, *args, **kwargs):
+ return kwargs.get("value") if "value" in kwargs else (None, None)
+
+ def expander(self, *args, **kwargs):
+ class _Ctx:
+ def __enter__(self_inner):
+ return self_inner
+
+ def __exit__(self_inner, exc_type, exc, tb):
+ return False
+
+ return _Ctx()
+
+ def columns(self, *args, **kwargs):
+ class _Col:
+ def markdown(self, *a, **k):
+ return None
+
+ def metric(self, *a, **k):
+ return None
+
+ def dataframe(self, *a, **k):
+ return None
+
+ def write(self, *a, **k):
+ return None
+
+ def text_input(self, *a, **k):
+ return None
+
+ n = len(args[0]) if args else 1
+ return tuple(_Col() for _ in range(n))
+
+ def form(self, *args, **kwargs):
+ class _Ctx:
+ def __enter__(self_inner):
+ return self_inner
+
+ def __exit__(self_inner, exc_type, exc, tb):
+ return False
+
+ return _Ctx()
+
+ def form_submit_button(self, *args, **kwargs):
+ return False
+
+ def button(self, *args, **kwargs):
+ return False
+
+ def rerun(self, *args, **kwargs):
+ return None
+
+ def divider(self, *args, **kwargs):
+ return None
+
+ def spinner(self, *args, **kwargs):
+ class _Ctx:
+ def __enter__(self_inner):
+ return self_inner
+
+ def __exit__(self_inner, exc_type, exc, tb):
+ return False
+
+ return _Ctx()
+
+ def write(self, *args, **kwargs):
+ return None
+
+ def dataframe(self, *args, **kwargs):
+ return None
+
+ def set_page_config(self, *args, **kwargs):
+ return None
+
+ def title(self, *args, **kwargs):
+ return None
+
+ def sidebar(self, *args, **kwargs):
+ return self
+
+ def radio(self, *args, **kwargs):
+ return kwargs.get("value") if "value" in kwargs else None
+
+ def text_input(self, *args, **kwargs):
+ return kwargs.get("value", "")
+
+ def tabs(self, *args, **kwargs):
+ n = len(args[0]) if args else 1
+ return [self for _ in range(n)]
+
+ @property
+ def session_state(self):
+ if not hasattr(self, "_session_state"):
+ self._session_state = {}
+ return self._session_state
+
+ st = _DummySt()
+
+from analysis.config import PARTY_COLOURS
+
+logger = logging.getLogger(__name__)
+
+
+def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
+ """Render a scree plot showing relative SVD component importance.
+
+ Highlighted bars for the top-2 components (used in the compass); muted bars
+ for the rest. A cumulative-variance dashed line on the same y-axis helps
+ spot the elbow. A 50 % cumulative threshold line is drawn for reference.
+
+ Args:
+ importances: List of importance values sorted descending (from load_scree_data).
+ n_show: How many components to display (default: first 15).
+ """
+ if not importances:
+ return
+ data = list(importances[:n_show])
+ ranks = list(range(1, len(data) + 1))
+
+ cumsum = []
+ running = 0.0
+ for v in data:
+ running += v
+ cumsum.append(running)
+
+ n_highlight = 2
+ bar_colours = [
+ "#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data))
+ ]
+
+ fig = go.Figure()
+
+ fig.add_trace(
+ go.Bar(
+ x=ranks,
+ y=data,
+ marker_color=bar_colours,
+ hovertemplate="As %{x}
%{y:.1f}% verklaarde variantie",
+ showlegend=False,
+ )
+ )
+
+ fig.add_trace(
+ go.Scatter(
+ x=ranks,
+ y=cumsum,
+ mode="lines+markers",
+ line={"color": "#F57C00", "width": 2, "dash": "dot"},
+ marker={"size": 5, "color": "#F57C00"},
+ hovertemplate="As %{x}
Cumulatief: %{y:.1f}%",
+ name="Cumulatief",
+ showlegend=True,
+ )
+ )
+
+ fig.add_hline(
+ y=50,
+ line_dash="dash",
+ line_color="#BDBDBD",
+ line_width=1,
+ annotation_text="50%",
+ annotation_position="right",
+ annotation_font_color="#9E9E9E",
+ annotation_font_size=11,
+ )
+
+ for i in range(min(n_highlight, len(data))):
+ fig.add_annotation(
+ x=ranks[i],
+ y=data[i] + 0.3,
+ text=f"{data[i]:.1f}%",
+ showarrow=False,
+ font={"size": 11, "color": "#1565C0"},
+ yanchor="bottom",
+ )
+
+ fig.update_layout(
+ height=280,
+ margin={"l": 10, "r": 50, "t": 30, "b": 40},
+ title={
+ "text": "Belang per SVD-as",
+ "font": {"size": 13, "color": "#555555"},
+ "x": 0.02,
+ "xanchor": "left",
+ },
+ legend={
+ "orientation": "h",
+ "x": 0.5,
+ "xanchor": "center",
+ "y": 1.08,
+ "font": {"size": 11},
+ },
+ xaxis={
+ "title": {"text": "As (rang)", "font": {"size": 11}},
+ "tickmode": "linear",
+ "tick0": 1,
+ "dtick": 1,
+ "showline": False,
+ "showgrid": False,
+ },
+ yaxis={
+ "title": {"text": "% van totale variantie", "font": {"size": 11}},
+ "showline": False,
+ "showgrid": True,
+ "gridcolor": "#eeeeee",
+ "ticksuffix": "%",
+ "range": [0, max(cumsum) * 1.08],
+ },
+ plot_bgcolor="rgba(0,0,0,0)",
+ paper_bgcolor="rgba(0,0,0,0)",
+ bargap=0.25,
+ )
+ st.plotly_chart(fig, use_container_width=True)
+
+
+def _build_party_axis_figure(
+ party_coords: Dict[str, Tuple[float, float]],
+ comp_sel: int,
+ theme: dict,
+ bootstrap_data: Optional[Dict[str, Dict]] = None,
+) -> Optional[go.Figure]:
+ """Build a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
+
+ Accepts explicit per-party 2D coordinates (x,y) and uses the component selection to
+ pick the value (comp_sel==1 -> x, comp_sel==2 -> y). This makes the API explicit and
+ avoids indexing into long SVD vectors.
+
+ Returns go.Figure or None if no data available.
+ """
+ if not party_coords:
+ return None
+
+ if comp_sel not in (1, 2):
+ raise ValueError(
+ "_build_party_axis_figure only supports comp_sel 1 or 2 when using explicit coords"
+ )
+
+ axis_idx = comp_sel - 1
+ flip = theme.get("flip", False)
+
+ parties = []
+ scores = []
+ colours = []
+
+ for party, val in party_coords.items():
+ try:
+ if hasattr(val, "__len__") and len(val) == 2:
+ x, y = val
+ score = float(x if axis_idx == 0 else y)
+ else:
+ score = float(val[axis_idx])
+
+ if flip:
+ score = -score
+ except Exception:
+ continue
+
+ parties.append(party)
+ scores.append(score)
+ colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))
+
+ if not scores:
+ return None
+
+ hover = []
+ symbols = []
+ if bootstrap_data:
+ for p, s in zip(parties, scores):
+ bd = bootstrap_data.get(p)
+ if bd:
+ n_mps = bd.get("n_mps", "?")
+ ci_low = None
+ ci_high = None
+ try:
+ ci_low = float(bd["ci_lower"][axis_idx])
+ ci_high = float(bd["ci_upper"][axis_idx])
+ except Exception:
+ pass
+ if ci_low is not None and ci_high is not None:
+ hover.append(
+ f"{p}: {s:.3f} (N={n_mps}, 95%-BI: [{ci_low:.3f}, {ci_high:.3f}])"
+ )
+ else:
+ hover.append(f"{p}: {s:.3f} (N={n_mps})")
+ symbols.append("diamond" if n_mps == 1 else "circle")
+ else:
+ hover.append(f"{p}: {s:.3f}")
+ symbols.append("circle")
+ marker_kwargs = {"size": 14, "color": colours, "symbol": symbols}
+ else:
+ hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
+ marker_kwargs = {"size": 14, "color": colours}
+
+ fig = go.Figure()
+ x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
+ if x_min == x_max:
+ x_min, x_max = x_min - 1, x_max + 1
+ fig.add_trace(
+ go.Scatter(
+ x=[x_min, x_max],
+ y=[0, 0],
+ mode="lines",
+ line={"color": "#cccccc", "width": 1},
+ hoverinfo="skip",
+ showlegend=False,
+ )
+ )
+
+ scatter_kwargs = {
+ "x": scores,
+ "y": [0] * len(scores),
+ "mode": "markers+text",
+ "text": parties,
+ "textposition": "top center",
+ "marker": marker_kwargs,
+ "hovertext": hover,
+ "hoverinfo": "text",
+ "showlegend": False,
+ }
+ fig.add_trace(go.Scatter(**scatter_kwargs))
+
+ pos_pole = theme.get("positive_pole", "")
+ neg_pole = theme.get("negative_pole", "")
+ left_label = neg_pole
+ right_label = pos_pole
+
+ fig.update_layout(
+ height=160,
+ margin={"l": 10, "r": 10, "t": 10, "b": 30},
+ xaxis={
+ "title": f"← {left_label} | {right_label} →",
+ "showticklabels": False,
+ "showline": False,
+ "showgrid": False,
+ "zeroline": False,
+ },
+ yaxis={"visible": False, "range": [-1, 2]},
+ plot_bgcolor="rgba(0,0,0,0)",
+ paper_bgcolor="rgba(0,0,0,0)",
+ )
+ return fig
+
+
+def _render_party_axis_chart(
+ party_coords: Dict[str, Tuple[float, float]],
+ comp_sel: int,
+ theme: dict,
+ bootstrap_data: Optional[Dict[str, Dict]] = None,
+) -> None:
+ """Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
+
+ Expects explicit per-party coords mapping (party -> (x,y)) for components 1 & 2.
+ """
+ fig = _build_party_axis_figure(party_coords, comp_sel, theme, bootstrap_data)
+ if fig is None:
+ st.caption("_Partijdata niet beschikbaar voor deze as._")
+ return
+ st.plotly_chart(fig, use_container_width=True)
+
+
+def _render_party_axis_chart_1d(
+ party_coords: Dict[str, Tuple[float, ...]],
+ comp_sel: int,
+ theme: dict,
+) -> None:
+ """Render a 1D horizontal scatter of party positions on SVD component `comp_sel`.
+
+ Uses the same format as components 1-2: parties as markers on a horizontal line
+ with axis title showing poles with arrows.
+
+ Args:
+ party_coords: Dict mapping party name to tuple of scores (score_for_comp,)
+ comp_sel: SVD component number (1-indexed)
+ theme: Dict with label, positive_pole, negative_pole, flip
+ """
+ if not party_coords:
+ st.caption("_Partijdata niet beschikbaar voor deze as._")
+ return
+
+ parties = []
+ scores = []
+ colours = []
+
+ for party, coords in party_coords.items():
+ try:
+ score = float(coords[0])
+ parties.append(party)
+ scores.append(score)
+ colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))
+ except Exception:
+ continue
+
+ if not scores:
+ st.caption("_Partijdata niet beschikbaar voor deze as._")
+ return
+
+ flip = theme.get("flip", False)
+ if flip:
+ scores = [-s for s in scores]
+
+ hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
+
+ fig = go.Figure()
+ x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
+ if x_min == x_max:
+ x_min, x_max = x_min - 1, x_max + 1
+
+ fig.add_trace(
+ go.Scatter(
+ x=[x_min, x_max],
+ y=[0, 0],
+ mode="lines",
+ line={"color": "#cccccc", "width": 1},
+ hoverinfo="skip",
+ showlegend=False,
+ )
+ )
+
+ fig.add_trace(
+ go.Scatter(
+ x=scores,
+ y=[0] * len(scores),
+ mode="markers+text",
+ text=parties,
+ textposition="top center",
+ marker={"size": 14, "color": colours},
+ hovertext=hover,
+ hoverinfo="text",
+ showlegend=False,
+ )
+ )
+
+ pos_pole = theme.get("positive_pole", "")
+ neg_pole = theme.get("negative_pole", "")
+ left_label = neg_pole
+ right_label = pos_pole
+
+ fig.update_layout(
+ height=160,
+ margin={"l": 10, "r": 10, "t": 10, "b": 30},
+ xaxis={
+ "title": f"← {left_label} | {right_label} →",
+ "showticklabels": False,
+ "showline": False,
+ "showgrid": False,
+ "zeroline": False,
+ },
+ yaxis={"visible": False, "range": [-1, 2]},
+ plot_bgcolor="rgba(0,0,0,0)",
+ paper_bgcolor="rgba(0,0,0,0)",
+ )
+
+ st.plotly_chart(fig, use_container_width=True)
+
+
+def _render_svd_time_trajectory(
+ party_scores_by_window: Dict[str, Dict[str, List[float]]],
+ comp_sel: int,
+ theme: dict,
+ selected_parties: List[str],
+) -> None:
+ """Render a time trajectory plot showing party positions over time on an SVD component.
+
+ Args:
+ party_scores_by_window: {window_id: {party_name: [scores]}}
+ comp_sel: SVD component number (1-indexed)
+ theme: Theme dict with label, positive_pole, negative_pole, flip
+ selected_parties: List of party names to display
+ """
+ if not party_scores_by_window or not selected_parties:
+ st.caption("_Geen data beschikbaar voor tijdtraject._")
+ return
+
+ idx = comp_sel - 1
+
+ party_trajectories: Dict[str, List[Tuple[str, float]]] = {}
+
+ all_windows = list(party_scores_by_window.keys())
+ sorted_windows = []
+ if "current_parliament" in all_windows:
+ sorted_windows.append("current_parliament")
+ other_windows = sorted(
+ [w for w in all_windows if w != "current_parliament"], reverse=True
+ )
+ sorted_windows.extend(other_windows)
+
+ for window in sorted_windows:
+ scores_by_party = party_scores_by_window.get(window, {})
+ for party in selected_parties:
+ scores = scores_by_party.get(party, [])
+ if scores and len(scores) > idx:
+ try:
+ score = float(scores[idx])
+ party_trajectories.setdefault(party, []).append((window, score))
+ except (ValueError, TypeError):
+ continue
+
+ if not party_trajectories:
+ st.caption("_Geen data beschikbaar voor geselecteerde partijen._")
+ return
+
+ fig = go.Figure()
+
+ all_scores = []
+ for traj in party_trajectories.values():
+ all_scores.extend([s for _, s in traj])
+
+ if not all_scores:
+ st.caption("_Geen scores beschikbaar._")
+ return
+
+ x_min, x_max = min(all_scores) * 1.15, max(all_scores) * 1.15
+ if x_min == x_max:
+ x_min, x_max = x_min - 1, x_max + 1
+
+ window_to_y = {w: i for i, w in enumerate(sorted_windows)}
+
+ for window in sorted_windows:
+ y_pos = window_to_y[window]
+ fig.add_trace(
+ go.Scatter(
+ x=[x_min, x_max],
+ y=[y_pos, y_pos],
+ mode="lines",
+ line={"color": "#cccccc", "width": 1},
+ hoverinfo="skip",
+ showlegend=False,
+ )
+ )
+
+ for party in selected_parties:
+ if party not in party_trajectories:
+ continue
+
+ traj = party_trajectories[party]
+ if len(traj) < 1:
+ continue
+
+ x_vals = [score for _, score in traj]
+ y_vals = [window_to_y[window] for window, _ in traj]
+ color = PARTY_COLOURS.get(party, "#9E9E9E")
+
+ fig.add_trace(
+ go.Scatter(
+ x=x_vals,
+ y=y_vals,
+ mode="lines",
+ line={"color": color, "width": 2},
+ hoverinfo="skip",
+ showlegend=False,
+ )
+ )
+
+ hover_texts = [f"{party}
{window}: {score:.3f}" for window, score in traj]
+ fig.add_trace(
+ go.Scatter(
+ x=x_vals,
+ y=y_vals,
+ mode="markers+text",
+ text=[party] * len(traj),
+ textposition="top center",
+ marker={"size": 12, "color": color},
+ hovertext=hover_texts,
+ hoverinfo="text",
+ showlegend=False,
+ )
+ )
+
+ pos_pole = theme.get("positive_pole", "")
+ neg_pole = theme.get("negative_pole", "")
+ left_label = neg_pole
+ right_label = pos_pole
+
+ y_labels = {}
+ for window in sorted_windows:
+ if window == "current_parliament":
+ y_labels[window_to_y[window]] = "Huidig"
+ else:
+ y_labels[window_to_y[window]] = window
+
+ fig.update_layout(
+ height=max(400, len(sorted_windows) * 60 + 100),
+ margin={"l": 80, "r": 10, "t": 10, "b": 30},
+ xaxis={
+ "title": f"← {left_label} | {right_label} →",
+ "range": [x_min, x_max],
+ "showticklabels": False,
+ "showline": False,
+ "showgrid": True,
+ "gridcolor": "rgba(0,0,0,0.1)",
+ "zeroline": True,
+ "zerolinecolor": "rgba(0,0,0,0.2)",
+ },
+ yaxis={
+ "tickvals": list(y_labels.keys()),
+ "ticktext": list(y_labels.values()),
+ "tickmode": "array",
+ "autorange": "reversed",
+ "showgrid": False,
+ },
+ plot_bgcolor="rgba(0,0,0,0)",
+ paper_bgcolor="rgba(0,0,0,0)",
+ )
+
+ st.plotly_chart(fig, use_container_width=True)
+
+
+def _render_voting_results(voting_results_json) -> None:
+ """Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.
+
+ The JSON is stored as {party_or_mp: vote} where vote is one of
+ 'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
+ """
+ if not voting_results_json:
+ return
+ try:
+ vdata = (
+ json.loads(voting_results_json)
+ if isinstance(voting_results_json, str)
+ else voting_results_json
+ )
+ if not isinstance(vdata, dict) or not vdata:
+ return
+ by_vote: Dict[str, List[str]] = {}
+ for actor, vote in vdata.items():
+ vote_str = str(vote).lower().strip()
+ by_vote.setdefault(vote_str, []).append(str(actor))
+ vote_order = ["voor", "tegen", "onthouden", "afwezig"]
+ vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "⬜"}
+ rows_shown = False
+ for v in vote_order + [k for k in by_vote if k not in vote_order]:
+ actors = by_vote.get(v)
+ if not actors:
+ continue
+ emoji = vote_emoji.get(v, "▪️")
+ st.markdown(
+ f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
+ )
+ rows_shown = True
+ if not rows_shown:
+ st.caption("_Geen stemuitslag beschikbaar_")
+ except Exception:
+ pass
+
+
+def _add_y_direction_annotations(fig: go.Figure) -> None:
+ """Add ▲ Progressief / ▼ Conservatief labels above and below the Y axis."""
+ common = dict(
+ xref="paper",
+ yref="paper",
+ x=-0.07,
+ showarrow=False,
+ font=dict(size=11, color="#666666"),
+ )
+ fig.add_annotation(**common, y=1.02, text="▲ Progressief", xanchor="center")
+ fig.add_annotation(**common, y=-0.06, text="▼ Conservatief", xanchor="center")
diff --git a/analysis/tabs/browser.py b/analysis/tabs/browser.py
index e240fd0..91fe8b6 100644
--- a/analysis/tabs/browser.py
+++ b/analysis/tabs/browser.py
@@ -1,18 +1,95 @@
-"""Browser tab for the parliamentary explorer.
-
-This module will contain the browser tab implementation.
-Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
-"""
+"""Browser tab for the parliamentary explorer."""
from __future__ import annotations
+import pandas as pd
+
+import analysis.explorer_data as explorer_data
+from analysis.tabs._rendering import _render_voting_results, st
+
def build_browser_tab(db_path: str, show_rejected: bool) -> None:
- """Build the Motie Browser tab.
+ """Build the Motie Browser tab."""
+ st.subheader("Motie Browser")
+
+ df = explorer_data.load_motions_df(db_path)
+ if df.empty:
+ st.warning("Geen moties beschikbaar.")
+ return
+
+ if not show_rejected:
+ df = df[df["title"].fillna("").str.strip() != "Verworpen."]
+
+ col1, col2, col3 = st.columns(3)
+ with col1:
+ years = sorted(df["year"].dropna().astype(int).unique().tolist())
+ year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
+ with col2:
+ min_controversy_b = st.slider(
+ "Min. controverse",
+ min_value=0.0,
+ max_value=1.0,
+ value=0.0,
+ step=0.05,
+ key="browser_controversy",
+ )
+ with col3:
+ sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])
+
+ working = df.copy()
+ if year_filter != "(Alle)":
+ working = working[working["year"] == int(year_filter)]
+ if min_controversy_b > 0:
+ working = working[working["controversy_score"] >= min_controversy_b]
+
+ sort_map = {
+ "Datum (nieuw)": ("date", False),
+ "Controverse": ("controversy_score", False),
+ "Marge": ("winning_margin", True),
+ }
+ sort_col, sort_asc = sort_map[sort_by]
+ working = working.sort_values(by=sort_col, ascending=sort_asc)
+
+ display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
+ available_display = [c for c in display_cols if c in working.columns]
+ st.dataframe(
+ working[available_display].reset_index(drop=True),
+ use_container_width=True,
+ height=350,
+ )
+
+ st.divider()
+
+ st.markdown("**Detail weergave** — vul een motie-ID in:")
+ sel_id = st.number_input(
+ "Motie ID",
+ min_value=int(working["id"].min()) if not working.empty else 1,
+ max_value=int(working["id"].max()) if not working.empty else 99999,
+ value=int(working["id"].iloc[0]) if not working.empty else 1,
+ step=1,
+ )
+ motion_row = df[df["id"] == sel_id]
+ if not motion_row.empty:
+ row = motion_row.iloc[0]
+ st.markdown(f"### {row.get('title') or 'Onbekend'}")
+ date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
+ st.caption(
+ f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
+ )
+
+ url = row.get("url")
+ if url and str(url).startswith("http"):
+ st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
- Currently delegates to explorer.py implementation.
- Will be extracted when rendering logic is decoupled from Streamlit.
- """
- import explorer
+ st.markdown("**Stemuitslag:**")
+ _render_voting_results(row.get("voting_results"))
- explorer.build_browser_tab(db_path, show_rejected)
+ sim = explorer_data.query_similar(db_path, int(sel_id), top_k=10)
+ if not sim.empty:
+ st.markdown("**Vergelijkbare moties:**")
+ st.dataframe(
+ sim[["title", "score", "date", "policy_area"]],
+ use_container_width=True,
+ )
+ else:
+ st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")
diff --git a/analysis/tabs/compass.py b/analysis/tabs/compass.py
index 3ca9199..66c5a16 100644
--- a/analysis/tabs/compass.py
+++ b/analysis/tabs/compass.py
@@ -1,20 +1,200 @@
-"""Compass tab for the parliamentary explorer.
-
-This module will contain the compass tab implementation.
-Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
-"""
+"""Compass tab for the parliamentary explorer."""
from __future__ import annotations
-from typing import List
+import datetime as _dt
+import re
+from typing import Dict, Tuple
+
+import numpy as np
+import pandas as pd
+
+from analysis import config
+import analysis.explorer_data as explorer_data
+from analysis.tabs._rendering import px, st
+
+PARTY_COLOURS = config.PARTY_COLOURS
def build_compass_tab(db_path: str, window_size: str) -> None:
- """Build the Politiek Kompas tab.
+ """Build the Politiek Kompas tab."""
+ st.subheader("Politiek Kompas")
+ st.markdown(
+ "2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
+ )
+
+ # Compass always uses annual windows regardless of the sidebar window_size setting.
+ positions_by_window, axis_def = explorer_data.load_positions(db_path, "annual")
+ if axis_def is None:
+ axis_def = {}
+ if not positions_by_window:
+ st.warning(
+ "Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
+ )
+ return
+
+ party_map = explorer_data.load_party_map(db_path)
+ active_mps = explorer_data.load_active_mps(db_path)
+
+ _current_year = str(_dt.date.today().year)
+ year_windows = sorted(
+ w
+ for w in positions_by_window
+ if w != "current_parliament" and w != _current_year
+ )
+ has_current = "current_parliament" in positions_by_window
+ windows = year_windows + (["current_parliament"] if has_current else [])
+
+ _SPARSE_YEARS = {"2016", "2017", "2018"}
+ _THRESHOLD = 0.65
+
+ def _window_label(w: str) -> str:
+ if w == "current_parliament":
+ return "Huidig parlement"
+ if w in _SPARSE_YEARS:
+ return f"{w} ⚠️"
+ return w
+
+ col1, col2 = st.columns([3, 1])
+ with col2:
+ window_idx = st.selectbox(
+ "Jaar",
+ options=windows,
+ index=len(windows) - 1,
+ format_func=_window_label,
+ )
+ level = st.radio(
+ "Weergave",
+ options=["Kamerleden", "Partijen"],
+ index=0,
+ horizontal=True,
+ )
+ min_mps = st.number_input(
+ "Min. Kamerleden per partij",
+ min_value=1,
+ max_value=20,
+ value=3,
+ step=1,
+ help="Partijen met minder dan dit aantal zetels worden niet weergegeven.",
+ )
+
+ pos = positions_by_window.get(window_idx, {})
+ if not pos:
+ st.info(f"Geen data voor venster {window_idx}")
+ return
+
+ if window_idx == "current_parliament":
+ pos = {mp: xy for mp, xy in pos.items() if mp in active_mps}
+
+ def _strip_paren(name: str) -> str:
+ return re.sub(r"\s*\([^)]*\)", "", name).strip()
+
+ deduped: Dict[str, Tuple[float, float]] = {}
+ for name, (x, y) in pos.items():
+ base = _strip_paren(name)
+ if base in deduped:
+ ox, oy = deduped[base]
+ deduped[base] = ((ox + x) / 2, (oy + y) / 2)
+ else:
+ deduped[base] = (x, y)
+ pos = deduped
+
+ rows = []
+ for name, (x, y) in pos.items():
+ party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown")
+ rows.append({"name": name, "x": x, "y": y, "party": party})
+
+ df_pos = pd.DataFrame(rows)
+
+ party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts()
+ valid_parties = set(party_counts[party_counts >= min_mps].index)
+ df_pos = df_pos[df_pos["party"].isin(valid_parties)]
+
+ if df_pos.empty:
+ st.info("Geen partijen met genoeg Kamerleden voor dit venster.")
+ return
+
+ _raw_x = axis_def.get("x_label")
+ _raw_y = axis_def.get("y_label")
+
+ try:
+ from analysis.axis_classifier import display_label_for_modal
+
+ _x_label = display_label_for_modal(_raw_x, "x")
+ _y_label = display_label_for_modal(_raw_y, "y")
+ except Exception:
+ from analysis.svd_labels import get_fallback_labels
+
+ _x_fallback, _y_fallback = get_fallback_labels()
+ _x_label = _raw_x or _x_fallback
+ _y_label = _raw_y or _y_fallback
+
+ if level == "Partijen":
+ df_party = df_pos.groupby("party", as_index=False).agg(
+ x=("x", "mean"), y=("y", "mean"), n=("name", "count")
+ )
+ df_party["name"] = df_party["party"]
+ colour_map = {
+ p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique()
+ }
+ fig = px.scatter(
+ df_party,
+ x="x",
+ y="y",
+ color="party",
+ text="party",
+ hover_name="party",
+ hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True},
+ color_discrete_map=colour_map,
+ title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",
+ labels={
+ "x": _x_label,
+ "y": _y_label,
+ "n": "Kamerleden",
+ },
+ )
+ fig.update_traces(textposition="top center", marker_size=14)
+ else:
+ colour_map = {
+ p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()
+ }
+ fig = px.scatter(
+ df_pos,
+ x="x",
+ y="y",
+ color="party",
+ hover_name="name",
+ hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
+ color_discrete_map=colour_map,
+ title=f"Politiek Kompas — {_window_label(window_idx)}",
+ labels={"x": _x_label, "y": _y_label},
+ )
- Currently delegates to explorer.py implementation.
- Will be extracted when rendering logic is decoupled from Streamlit.
- """
- import explorer
+ fig.update_layout(
+ height=600,
+ legend_title_text="Partij",
+ xaxis={"range": [-1, 1]},
+ yaxis={"range": [-0.6, 0.6]},
+ )
+ with col1:
+ st.plotly_chart(fig, use_container_width=True)
+ _x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "")
+ if (
+ _x_interp
+ and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD
+ ):
+ st.caption(_x_interp)
- explorer.build_compass_tab(db_path, window_size)
+ # Voting discipline analysis
+ st.markdown("---")
+ st.markdown(
+ "**Stemdiscipline analyse:** De Rice-index meet hoe eensgezind partijen stemmen "
+ "tijdens hoofdelijke stemmingen. Een score van 100% betekent dat alle MPs van "
+ "een partij hetzelfde stemden; 50% wijst op een gelijke splitsing binnen de partij. "
+ "Partijen met hoge discipline (>95%) zoals PVV en SGP stemmen als een blok, wat "
+ "wijst op sterke partijdiscipline en homogene membership. Lagere discipline (<85%) "
+ "bij partijen als PvdA of SP kan duiden op interne factiestrijd, gewetensvragen "
+ "bij ethische thema's, of een brede ideologische koers die ruimte laat voor "
+ "afwijkende meningen. De discipline varieert ook per onderwerp — ethische kwesties "
+ "tonen vaak meer interne verschillen dan economische thema's."
+ )
diff --git a/analysis/tabs/components.py b/analysis/tabs/components.py
index 8dc806f..89e94bd 100644
--- a/analysis/tabs/components.py
+++ b/analysis/tabs/components.py
@@ -1,18 +1,374 @@
-"""SVD Components tab for the parliamentary explorer.
-
-This module will contain the SVD components tab implementation.
-Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
-"""
+"""SVD Components tab for the parliamentary explorer."""
from __future__ import annotations
+import datetime as _dt
+import logging
+import os
+from typing import Dict, List, Tuple
+
+import numpy as np
+
+from analysis import config
+import analysis.explorer_data as explorer_data
+from analysis.tabs._rendering import (
+ _render_party_axis_chart_1d,
+ _render_scree_plot,
+ _render_svd_time_trajectory,
+ _render_voting_results,
+ st,
+)
+
+try:
+ import duckdb
+except Exception:
+ duckdb = None # type: ignore
+
+SVD_THEMES = config.SVD_THEMES
+KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
+
+logger = logging.getLogger(__name__)
+
def build_svd_components_tab(db_path: str) -> None:
- """Build the SVD Components tab.
+ """New tab: show top motions contributing to top SVD components.
- Currently delegates to explorer.py implementation.
- Will be extracted when rendering logic is decoupled from Streamlit.
+ Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
+ for components 1..10 with theme labels/explanations and a detail pane per motion.
+
+ Components 1-2 use aligned PCA positions (consistent with compass).
+ Components 3-10 use raw SVD scores.
"""
- import explorer
+ st.subheader("🔬 SVD Assen — politieke polarisatiethema's")
+ st.markdown(
+ "Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
+ "van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
+ "het spanningsveld dat de as beschrijft."
+ )
+
+ scree_importances = explorer_data.load_scree_data(db_path)
+ if scree_importances:
+ st.markdown(
+ "**Scree-plot** — het relatieve gewicht van elke SVD-as. "
+ "De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
+ "latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau."
+ )
+ _render_scree_plot(scree_importances)
+
+ json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
+ if not os.path.exists(json_path):
+ st.warning(
+ f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
+ )
+ return
+
+ try:
+ import json
+
+ with open(json_path, "r", encoding="utf-8") as fh:
+ j = json.load(fh)
+ except Exception as e:
+ st.error(f"Failed to load SVD importance JSON: {e}")
+ return
+
+ window = j.get("window")
+ rows = j.get("rows", [])
+ if not rows:
+ st.info("Geen top-moties in dataset")
+ return
+
+ st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")
+
+ comp_map: dict[int, list] = {}
+ for r in rows:
+ comp = int(r.get("component", 0))
+ bucket = comp_map.setdefault(comp, [])
+ existing_ids = {m.get("motion_id") for m in bucket}
+ if r.get("motion_id") not in existing_ids:
+ bucket.append(r)
+
+ comp_options = sorted(comp_map.keys())
+
+ def _comp_label(c: int) -> str:
+ theme = SVD_THEMES.get(c, {})
+ lbl = theme.get("label", "")
+ return f"As {c} — {lbl}" if lbl else f"As {c}"
+
+ comp_display = [_comp_label(c) for c in comp_options]
+
+ party_scores_default = explorer_data.load_party_axis_scores(db_path)
+ party_mp_vectors = explorer_data.load_party_mp_vectors(db_path)
+ bootstrap_data = None
+ if party_mp_vectors:
+ try:
+ from analysis.political_axis import compute_party_bootstrap_cis
+
+ bootstrap_data = compute_party_bootstrap_cis(party_mp_vectors)
+ except Exception:
+ pass
+
+ col1, col2 = st.columns([2, 1])
+
+ view_mode = "Enkel venster"
+ selected_parties_for_trajectory: list = []
+
+ with col2:
+ comp_sel_idx = st.selectbox(
+ "Selecteer SVD-as",
+ options=list(range(len(comp_options))),
+ format_func=lambda i: comp_display[i],
+ index=0,
+ )
+ comp_sel = comp_options[comp_sel_idx]
+
+ min_mps = st.number_input(
+ "Min. Kamerleden per partij",
+ min_value=1,
+ max_value=20,
+ value=1,
+ step=1,
+ help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.",
+ )
+
+ view_mode = st.radio(
+ "Weergave",
+ options=["Enkel venster", "Tijdtraject"],
+ index=0,
+ help="Enkel venster: toont posities voor één tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.",
+ )
+
+ selected_parties_for_trajectory = []
+ if view_mode == "Tijdtraject":
+ all_parties = (
+ sorted(party_scores_default.keys()) if party_scores_default else []
+ )
+ default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8]
+ selected_parties_for_trajectory = st.multiselect(
+ "Partijen om te tonen",
+ options=all_parties,
+ default=default_parties,
+ help="Selecteer de partijen die je wilt zien in het tijdtraject.",
+ )
+
+ theme = SVD_THEMES.get(comp_sel, {})
+ if theme:
+ st.info(f"**{theme['label']}** — {theme['explanation']}")
+
+ motions = comp_map.get(comp_sel, [])
+
+ _current_year = str(_dt.date.today().year)
+ available_windows = explorer_data.get_uniform_dim_windows(db_path)
+ year_windows = sorted(
+ w for w in available_windows if w != "current_parliament" and w != _current_year
+ )
+ has_current = "current_parliament" in available_windows
+ svd_windows = year_windows + (["current_parliament"] if has_current else [])
+
+ def _svd_window_label(w: str) -> str:
+ if w == "current_parliament":
+ return "Huidig parliament"
+ return w
+
+ with col1:
+ svd_window = st.selectbox(
+ "Jaar",
+ options=svd_windows,
+ index=len(svd_windows) - 1,
+ format_func=_svd_window_label,
+ key=f"svd_window_{comp_sel}",
+ )
+
+ if svd_window == "current_parliament":
+ party_scores = party_scores_default
+ else:
+ party_scores = explorer_data.load_party_axis_scores_for_window(db_path, svd_window)
+
+ party_mp_counts = (
+ {p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {}
+ )
+
+ def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]:
+ """Get party (x, y) coordinates from aligned PCA positions for a window."""
+ positions_by_window, _ = explorer_data.load_positions(db_path, "annual")
+ window_pos = positions_by_window.get(window, {})
+ if not window_pos:
+ return {}
+
+ _party_map = explorer_data.load_party_map(db_path)
+
+ party_coords: Dict[str, List[Tuple[float, float]]] = {}
+ for mp_name, (x, y) in window_pos.items():
+ party = _party_map.get(
+ mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
+ )
+ if party:
+ party_coords.setdefault(party, []).append((x, y))
+
+ return {
+ party: (
+ float(np.mean([c[0] for c in coords])),
+ float(np.mean([c[1] for c in coords])),
+ )
+ for party, coords in party_coords.items()
+ if coords
+ }
+
+ active_mps = (
+ explorer_data.load_active_mps(db_path)
+ if svd_window == "current_parliament"
+ else None
+ )
+ aligned_all_scores = explorer_data.get_aligned_party_scores(
+ db_path, svd_window, active_mps
+ )
+
+ party_1d_coords: dict = {}
+ for party, all_scores in aligned_all_scores.items():
+ idx = comp_sel - 1
+ if idx < len(all_scores):
+ party_1d_coords[party] = (float(all_scores[idx]),)
+
+ computed_flips: Dict[int, bool] = {}
+ try:
+ from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT
+
+ for comp_idx in range(10):
+ right_scores = []
+ left_scores = []
+ for party, scores in aligned_all_scores.items():
+ if party in CANONICAL_RIGHT:
+ right_scores.append(scores[comp_idx])
+ elif party in CANONICAL_LEFT:
+ left_scores.append(scores[comp_idx])
+
+ if right_scores and left_scores:
+ right_avg = np.mean(right_scores)
+ left_avg = np.mean(left_scores)
+ computed_flips[comp_idx + 1] = right_avg < left_avg
+ else:
+ computed_flips[comp_idx + 1] = False
+ except Exception:
+ pass
+
+ theme_with_flip = {
+ **theme,
+ "flip": computed_flips.get(comp_sel, theme.get("flip", False)),
+ }
+
+ if min_mps > 1 and party_mp_counts:
+ valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps}
+ party_1d_coords = {
+ p: coords for p, coords in party_1d_coords.items() if p in valid_parties
+ }
+
+ if view_mode == "Tijdtraject" and selected_parties_for_trajectory:
+ available_windows = explorer_data.get_uniform_dim_windows(db_path)
+ year_windows = sorted(
+ w
+ for w in available_windows
+ if w != "current_parliament" and w != _current_year
+ )
+ has_current = "current_parliament" in available_windows
+ all_windows = year_windows + (["current_parliament"] if has_current else [])
+
+ party_scores_by_window = explorer_data._get_aligned_trajectory_scores(
+ db_path, all_windows
+ )
+
+ _render_svd_time_trajectory(
+ party_scores_by_window,
+ comp_sel,
+ theme_with_flip,
+ selected_parties_for_trajectory,
+ )
+ else:
+ _render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip)
+
+ motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
+ motion_details: Dict[int, tuple] = {}
+ if motion_ids:
+ ids_int: List[int] = []
+ for mid in motion_ids:
+ try:
+ ids_int.append(int(mid))
+ except Exception:
+ logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
+
+ if ids_int and duckdb is not None:
+ con = None
+ try:
+ placeholders = ", ".join("?" for _ in ids_int)
+ con = duckdb.connect(database=db_path, read_only=True)
+ db_rows = con.execute(
+ f"SELECT id, title, date, policy_area, url, body_text, voting_results "
+ f"FROM motions WHERE id IN ({placeholders})",
+ ids_int,
+ ).fetchall()
+ motion_details = {r[0]: r for r in db_rows}
+ except Exception:
+ logger.exception("Failed to batch-fetch motion details")
+ finally:
+ if con:
+ con.close()
+
+ pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
+ neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
+
+ flip = theme_with_flip.get("flip", False) if theme_with_flip else False
+ pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else ""
+ neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else ""
+
+ if flip:
+ left_pole, right_pole = pos_pole, neg_pole
+ left_motions, right_motions = pos_motions, neg_motions
+ left_arrow, right_arrow = "▲", "▼"
+ else:
+ left_pole, right_pole = neg_pole, pos_pole
+ left_motions, right_motions = neg_motions, pos_motions
+ left_arrow, right_arrow = "▼", "▲"
+
+ lcol, rcol = st.columns(2)
+
+ with lcol:
+ st.markdown(f"**← {left_pole}**")
+ for m in left_motions:
+ mid = m.get("motion_id")
+ raw_title = m.get("title") or f"Motie #{mid}"
+ with st.expander(f"{left_arrow} {raw_title}"):
+ row = motion_details.get(int(mid)) if mid is not None else None
+ if row:
+ try:
+ date_str = str(row[2])[:10]
+ except Exception:
+ date_str = "?"
+ st.caption(f"📅 {date_str} | {row[3] or '—'}")
+ if row[4] and str(row[4]).startswith("http"):
+ st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
+ if row[5]:
+ with st.expander("Toon volledige tekst"):
+ st.write(row[5])
+ _render_voting_results(row[6])
+ else:
+ st.caption("_Geen metadata beschikbaar_")
- explorer.build_svd_components_tab(db_path)
+ with rcol:
+ st.markdown(f"**{right_pole} →**")
+ for m in right_motions:
+ mid = m.get("motion_id")
+ raw_title = m.get("title") or f"Motie #{mid}"
+ with st.expander(f"{right_arrow} {raw_title}"):
+ row = motion_details.get(int(mid)) if mid is not None else None
+ if row:
+ try:
+ date_str = str(row[2])[:10]
+ except Exception:
+ date_str = "?"
+ st.caption(f"📅 {date_str} | {row[3] or '—'}")
+ if row[4] and str(row[4]).startswith("http"):
+ st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
+ if row[5]:
+ with st.expander("Toon volledige tekst"):
+ st.write(row[5])
+ _render_voting_results(row[6])
+ else:
+ st.caption("_Geen metadata beschikbaar_")
diff --git a/analysis/tabs/quiz.py b/analysis/tabs/quiz.py
index 5c7bc9f..253fb33 100644
--- a/analysis/tabs/quiz.py
+++ b/analysis/tabs/quiz.py
@@ -1,18 +1,132 @@
-"""MP Quiz tab for the parliamentary explorer.
-
-This module will contain the MP quiz tab implementation.
-Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
-"""
+"""MP Quiz tab for the parliamentary explorer."""
from __future__ import annotations
+import pandas as pd
+
+import analysis.explorer_data as explorer_data
+from analysis.tabs._rendering import st
+
def build_mp_quiz_tab(db_path: str) -> None:
- """Build the MP Quiz tab.
+ """Interactive quiz: narrow MPs by asking motion vote questions.
- Currently delegates to explorer.py implementation.
- Will be extracted when rendering logic is decoupled from Streamlit.
+ Minimal viable flow:
+ - seed with top-N controversial motions (SEED_MOTIONS)
+ - present one question at a time, store answers in st.session_state['mp_quiz_votes']
+ - after each answer call MotionDatabase.match_mps_for_votes to rank MPs
+ - if multiple candidates remain, call choose_discriminating_motions to pick next question
+ - stop when unique MP found or no discriminating motions remain
"""
- import explorer
+ st.subheader("🧑⚖️ Welk tweede kamerlid ben jij?")
+ st.markdown(
+ "Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt."
+ )
+
+ SEED_MOTIONS = 8
+ MAX_QUESTIONS = 20
+
+ if "mp_quiz_votes" not in st.session_state:
+ st.session_state["mp_quiz_votes"] = {}
+ if "mp_quiz_asked" not in st.session_state:
+ st.session_state["mp_quiz_asked"] = []
+
+ from database import MotionDatabase as _MotionDatabase
+
+ db_inst = _MotionDatabase(db_path)
+
+ df = explorer_data.load_motions_df(db_path)
+ if df.empty:
+ st.warning("Geen moties beschikbaar om de quiz te starten.")
+ return
+
+ seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
+ if not seed_ids:
+ st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
+ return
+
+ def _next_motion_id():
+ for mid in seed_ids:
+ if str(mid) not in st.session_state["mp_quiz_votes"]:
+ return mid
+ try:
+ user_votes = {
+ int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
+ }
+ ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
+ except Exception:
+ ranked = []
+
+ candidates = [r["mp_name"] for r in ranked]
+ excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()]
+ if not candidates:
+ return None
+ try:
+ next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
+ return next_ids[0] if next_ids else None
+ except Exception:
+ return None
+
+ col1, col2 = st.columns([3, 1])
+ with col2:
+ st.caption(
+ f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}"
+ )
+ if st.button("Reset quiz"):
+ st.session_state["mp_quiz_votes"] = {}
+ st.session_state["mp_quiz_asked"] = []
+ st.rerun()
+
+ next_mid = _next_motion_id()
+ if next_mid is None:
+ st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
+ else:
+ motion_rows = df[df["id"] == next_mid]
+ if motion_rows.empty:
+ st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
+ st.rerun()
+ return
+ motion_row = motion_rows.iloc[0]
+ st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
+ if motion_row.get("layman_explanation"):
+ st.info(motion_row.get("layman_explanation"))
+
+ with st.form(key=f"mp_quiz_form_{next_mid}"):
+ choice = st.radio(
+ "Wat zou jij stemmen?",
+ options=["Voor", "Tegen", "Onthouden", "Geen stem"],
+ index=3,
+ )
+ submitted = st.form_submit_button("Beantwoord en verder")
+
+ if submitted:
+ st.session_state["mp_quiz_votes"][str(next_mid)] = choice
+ st.session_state["mp_quiz_asked"].append(next_mid)
+ st.rerun()
+
+ try:
+ user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
+ ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
+ except Exception:
+ ranking = []
+
+ if ranking:
+ st.markdown("**Top kandidaten**")
+ rdf = pd.DataFrame(ranking)
+ st.dataframe(rdf.head(10), use_container_width=True)
- explorer.build_mp_quiz_tab(db_path)
+ top_pct = ranking[0]["agreement_pct"] if ranking else 0.0
+ top_matches = [r for r in ranking if r["agreement_pct"] == top_pct]
+ if len(top_matches) == 1 and top_matches[0]["overlap"] > 0:
+ st.success(
+ f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})"
+ )
+ else:
+ if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS:
+ st.warning(
+ "Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten."
+ )
+ else:
+ st.info("Nog geen unieke match — vraag meer om verder te verfijnen.")
+ else:
+ st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.")
diff --git a/analysis/tabs/search.py b/analysis/tabs/search.py
index 2821bf7..de0fb23 100644
--- a/analysis/tabs/search.py
+++ b/analysis/tabs/search.py
@@ -1,18 +1,84 @@
-"""Search tab for the parliamentary explorer.
-
-This module will contain the search tab implementation.
-Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
-"""
+"""Search tab for the parliamentary explorer."""
from __future__ import annotations
+import pandas as pd
+
+import analysis.explorer_data as explorer_data
+from analysis.tabs._rendering import _render_voting_results, st
+
def build_search_tab(db_path: str, show_rejected: bool) -> None:
- """Build the Motie Zoeken tab.
+ """Build the Motie Zoeken tab."""
+ st.subheader("Motie Zoeken")
+
+ df = explorer_data.load_motions_df(db_path)
+ if df.empty:
+ st.warning("Geen moties beschikbaar.")
+ return
+
+ if not show_rejected:
+ df = df[df["title"].fillna("").str.strip() != "Verworpen."]
+
+ col1, col2, col3 = st.columns([2, 1, 1])
+ with col1:
+ query = st.text_input(
+ "Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
+ )
+ with col2:
+ years = sorted(df["year"].dropna().astype(int).unique().tolist())
+ if years:
+ year_range = st.select_slider(
+ "Jaar", options=years, value=(years[0], years[-1])
+ )
+ else:
+ year_range = (2019, 2024)
+ with col3:
+ min_controversy = st.slider(
+ "Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
+ )
+
+ working = df.copy()
+ working = working[
+ (working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
+ ]
+ if min_controversy > 0:
+ working = working[working["controversy_score"] >= min_controversy]
+ if query:
+ q = query.lower()
+ mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
+ working = working[mask]
+
+ working = working.sort_values(by="controversy_score", ascending=False)
+ st.caption(f"{len(working)} resultaten (top 50 getoond)")
+
+ for _, row in working.head(50).iterrows():
+ title = row.get("title") or f"Motie #{row['id']}"
+ date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
+ controversy = row.get("controversy_score") or 0
+ with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
+ cols = st.columns(3)
+ cols[0].metric("Controverse", f"{controversy:.2f}")
+ cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
+ cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")
+
+ _render_voting_results(row.get("voting_results"))
- Currently delegates to explorer.py implementation.
- Will be extracted when rendering logic is decoupled from Streamlit.
- """
- import explorer
+ url = row.get("url")
+ if url and str(url).startswith("http"):
+ st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
- explorer.build_search_tab(db_path, show_rejected)
+ sim = explorer_data.query_similar(db_path, int(row["id"]), top_k=5)
+ if not sim.empty:
+ st.markdown("**Vergelijkbare moties:**")
+ for _, s in sim.iterrows():
+ s_date = (
+ pd.to_datetime(s["date"]).strftime("%Y")
+ if pd.notna(s.get("date"))
+ else ""
+ )
+ st.markdown(
+ f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
+ )
+ else:
+ st.caption("_Nog geen vergelijkbare moties beschikbaar_")
diff --git a/analysis/tabs/trajectories.py b/analysis/tabs/trajectories.py
index 18e39fd..863e19b 100644
--- a/analysis/tabs/trajectories.py
+++ b/analysis/tabs/trajectories.py
@@ -1,20 +1,774 @@
-"""Trajectories tab for the parliamentary explorer.
-
-This module will contain the trajectories tab implementation.
-Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
-"""
+"""Trajectories tab for the parliamentary explorer."""
from __future__ import annotations
-from typing import List
+import json
+import logging
+import os
+import re
+import traceback
+from datetime import datetime
+from typing import Dict, List, Optional, Tuple
+import numpy as np
-def build_trajectories_tab(db_path: str, window_size: str) -> None:
- """Build the Partij Trajectories tab.
+from analysis import config
+import analysis.explorer_data as explorer_data
+from analysis import trajectory
+from analysis.tabs._rendering import (
+ PARTY_COLOURS,
+ _add_y_direction_annotations,
+ go,
+ st,
+)
+from explorer_helpers import compute_party_centroids, inspect_positions_for_issues
+
+KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
+
+logger = logging.getLogger(__name__)
+
+_last_trajectories_diagnostics: dict = {}
+_last_diagnostics = _last_trajectories_diagnostics
+
+
+def get_debug_trajectories_enabled() -> bool:
+ """Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode."""
+ v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES")
+ return str(v) in ("1", "true", "True")
+
+
+def select_trajectory_plot_data(
+ positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
+ party_map: Dict[str, str],
+ windows: List[str],
+ selected_parties: List[str],
+ smooth_alpha: float = 0.35,
+ mp_fallback_count: Optional[int] = None,
+) -> Tuple[go.Figure, int, Optional[str]]:
+ """Return (fig, trace_count, banner_text).
- Currently delegates to explorer.py implementation.
- Will be extracted when rendering logic is decoupled from Streamlit.
+ Helper used by build_trajectories_tab. Does not call Streamlit.
"""
- import explorer
+ if mp_fallback_count is None:
+ try:
+ mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20"))
+ except Exception:
+ mp_fallback_count = 20
+
+ party_centroids, meta = compute_party_centroids(
+ positions_by_window, party_map, windows
+ )
+
+ try:
+ inspector_summary = inspect_positions_for_issues(positions_by_window, party_map)
+ except Exception:
+ tb = traceback.format_exc()
+ inspector_summary = {}
+ try:
+ select_trajectory_plot_data._last_diagnostics = {
+ "stage": "inspector_exception",
+ "exception": tb,
+ }
+ except Exception:
+ pass
+ try:
+ _last_trajectories_diagnostics.update(
+ {"stage": "inspector_exception", "exception": tb}
+ )
+ except Exception:
+ pass
+ logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary)
+
+ plottable_parties = []
+ for p, vals in party_centroids.items():
+ has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals)
+ if has_valid:
+ plottable_parties.append(p)
+
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] plottable_parties: %d parties, sample=%s",
+ len(plottable_parties),
+ (plottable_parties[:5] if plottable_parties else "empty"),
+ )
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] party_centroids keys: %s",
+ list(party_centroids.keys())[:10],
+ )
+ if party_centroids:
+ sample_party = list(party_centroids.keys())[0]
+ sample_vals = party_centroids[sample_party]
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] Sample party '%s' centroids: %s...",
+ sample_party,
+ sample_vals[:3],
+ )
+
+ fig = go.Figure()
+ trace_count = 0
+ banner_text: Optional[str] = None
+
+ def _ema_smooth(values: List[float], alpha: float) -> List[float]:
+ if not values or alpha >= 1.0:
+ return values
+ smoothed: List[float] = []
+ prev = None
+ for v in values:
+ if v is None or (isinstance(v, float) and np.isnan(v)):
+ smoothed.append(float(np.nan))
+ continue
+ v = float(v)
+ if prev is None:
+ prev = v
+ else:
+ prev = alpha * v + (1 - alpha) * prev
+ smoothed.append(float(prev))
+ return smoothed
+
+ if not plottable_parties:
+ mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
+ for wid in windows:
+ pos = positions_by_window.get(wid, {})
+ for mp_name, xy in pos.items():
+ try:
+ x, y = float(xy[0]), float(xy[1])
+ except Exception:
+ continue
+ mp_positions.setdefault(mp_name, {})[wid] = (x, y)
+
+ mp_activity = sorted(
+ [(mp, len(wdict)) for mp, wdict in mp_positions.items()],
+ key=lambda t: t[1],
+ reverse=True,
+ )
+ top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]]
+
+ for mp in top_mps:
+ wids_sorted = sorted(mp_positions.get(mp, {}).keys())
+ if not wids_sorted:
+ continue
+ xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
+ ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
+ xs = _ema_smooth(xs_raw, smooth_alpha)
+ ys = _ema_smooth(ys_raw, smooth_alpha)
+ custom_raw = [
+ (
+ float(rx) if rx is not None else float(np.nan),
+ float(ry) if ry is not None else float(np.nan),
+ )
+ for rx, ry in zip(xs_raw, ys_raw)
+ ]
+ fig.add_trace(
+ go.Scatter(
+ x=xs,
+ y=ys,
+ mode="lines+markers",
+ name=mp,
+ text=wids_sorted,
+ customdata=custom_raw,
+ line=dict(color="#888888", shape="spline", smoothing=1.3),
+ marker=dict(color="#888888", size=6),
+ )
+ )
+ trace_count += 1
+
+ banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] Fallback to MP trajectories: trace_count=%d, top_mps=%d",
+ trace_count,
+ len(top_mps),
+ )
+ return fig, trace_count, banner_text
+
+ to_plot = [p for p in selected_parties if p in plottable_parties]
+ if not to_plot:
+ to_plot = plottable_parties
+
+ for party in to_plot:
+ vals = party_centroids.get(party, [])
+ if not vals:
+ continue
+ xs_raw = [v[0] for v in vals]
+ ys_raw = [v[1] for v in vals]
+ xs = _ema_smooth(xs_raw, smooth_alpha)
+ ys = _ema_smooth(ys_raw, smooth_alpha)
+ custom_raw = [
+ (
+ float(x) if (x is not None and not np.isnan(x)) else float(np.nan),
+ float(y) if (y is not None and not np.isnan(y)) else float(np.nan),
+ )
+ for x, y in zip(xs_raw, ys_raw)
+ ]
+ colour = PARTY_COLOURS.get(party, "#9E9E9E")
+ fig.add_trace(
+ go.Scatter(
+ x=xs,
+ y=ys,
+ mode="lines+markers",
+ name=party,
+ text=windows,
+ customdata=custom_raw,
+ line=dict(color=colour, shape="spline", smoothing=1.3),
+ marker=dict(color=colour, size=8),
+ )
+ )
+ trace_count += 1
+
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] Final trace_count=%d, plottable_parties=%d, to_plot=%s",
+ trace_count,
+ len(plottable_parties),
+ (len(to_plot) if "to_plot" in dir() else "N/A"),
+ )
+ return fig, trace_count, None
+
+
+def build_trajectories_tab(db_path: str, window_size: str) -> None:
+ """Build the Partij Trajectories tab."""
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] build_trajectories_tab called — db_path=%s, window_size=%s",
+ db_path,
+ window_size,
+ )
+ st.subheader("Partij Trajectories")
+ st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")
+
+ positions_by_window, axis_def = explorer_data.load_positions(db_path, window_size)
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] load_positions → %d windows, total MPs=%d",
+ len(positions_by_window),
+ sum(len(v) for v in positions_by_window.values()),
+ )
+ if axis_def is None:
+ axis_def = {}
+ if not positions_by_window:
+ try:
+ _last_trajectories_diagnostics.update(
+ {
+ "stage": "load_positions_empty",
+ "positions_by_window_len": len(positions_by_window),
+ }
+ )
+ except Exception:
+ pass
+ try:
+ st.warning("Geen positiedata beschikbaar.")
+ except Exception:
+ pass
+ try:
+ if get_debug_trajectories_enabled():
+ try:
+ st.text_area(
+ "Trajectories diagnostics",
+ json.dumps(_last_trajectories_diagnostics, default=str),
+ height=160,
+ )
+ except Exception:
+ pass
+ except Exception:
+ pass
+ return
+
+ party_map = explorer_data.load_party_map(db_path)
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] load_party_map → %d entries, sample=%s",
+ len(party_map),
+ list(party_map.items())[:3],
+ )
+
+ def normalize_mp_name(name):
+ """Normalize MP name for better matching between data sources."""
+ if not name:
+ return ""
+ name = name.strip()
+ if "," in name and ", " not in name:
+ name = name.replace(",", ", ")
+ return name
+
+ party_map = {normalize_mp_name(k): v for k, v in party_map.items()}
+
+ normalized_positions = {}
+ for window, positions in positions_by_window.items():
+ normalized_positions[window] = {
+ normalize_mp_name(k): v for k, v in positions.items()
+ }
+ positions_by_window = normalized_positions
+
+ all_mp_names = set()
+ for positions in positions_by_window.values():
+ all_mp_names.update(positions.keys())
+
+ matched_names = sum(1 for mp in all_mp_names if mp in party_map)
+ if all_mp_names:
+ logger.info(
+ f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)"
+ )
+ else:
+ logger.info("MP name matching: no MPs found in positions data")
+
+ if matched_names == 0 and len(all_mp_names) > 0:
+ logger.warning("No MP names matched between positions and party_map!")
+ logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}")
+ logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}")
+
+ windows = sorted(positions_by_window.keys())
+
+ centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
+ all_parties: set = set()
+
+ def _strip_paren(name: str) -> str:
+ return re.sub(r"\s*\([^)]*\)", "", name).strip()
+
+ for wid in windows:
+ pos = positions_by_window.get(wid, {})
+ per_party: Dict[str, List[Tuple[float, float]]] = {}
+ for mp_name, (x, y) in pos.items():
+ party = party_map.get(mp_name) or party_map.get(
+ _strip_paren(mp_name), "Unknown"
+ )
+ if party == "Unknown":
+ continue
+ per_party.setdefault(party, []).append((x, y))
+ for party, coords in per_party.items():
+ all_parties.add(party)
+ xs = [c[0] for c in coords]
+ ys = [c[1] for c in coords]
+ centroids.setdefault(party, {})[wid] = (
+ float(np.mean(xs)),
+ float(np.mean(ys)),
+ )
+
+ all_parties = sorted(
+ set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs)
+ - {None, "Unknown"}
+ )
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] all_parties (raw from party_map) → %d parties: %s",
+ len(all_parties),
+ all_parties[:10],
+ )
+ all_parties_sorted = sorted(all_parties)
+
+ if not all_parties_sorted:
+ st.info(
+ "Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat."
+ )
+ try:
+ st.caption(f"Bekende partijen in party_map: {len(party_map)}")
+ except Exception:
+ pass
+
+ default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties]
+ if not default_parties:
+ default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
+ if not default_parties:
+ default_parties = all_parties_sorted[:6]
+
+ selected_parties = st.multiselect(
+ "Selecteer partijen",
+ options=all_parties_sorted,
+ default=default_parties,
+ )
+
+ def _ema_smooth(values: List[float], alpha: float) -> List[float]:
+ if not values or alpha >= 1.0:
+ return values
+ smoothed = [values[0]]
+ for v in values[1:]:
+ smoothed.append(alpha * v + (1 - alpha) * smoothed[-1])
+ return smoothed
+
+ smooth_alpha = 0.35
+
+ if not centroids:
+ st.info(
+ "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
+ )
+
+ mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
+ for wid in windows:
+ pos = positions_by_window.get(wid, {})
+ for mp_name, xy in pos.items():
+ try:
+ x, y = float(xy[0]), float(xy[1])
+ except Exception:
+ continue
+ mp_positions.setdefault(mp_name, {})[wid] = (x, y)
+
+ mp_positions = {
+ mp: pos
+ for mp, pos in mp_positions.items()
+ if len(pos) >= 2
+ and not all(np.isnan(x) and np.isnan(y) for x, y in pos.values())
+ }
+
+ if not mp_positions:
+ st.warning("Geen positiedata beschikbaar voor trajectplotten.")
+ _last_trajectories_diagnostics.update(
+ {
+ "stage": "no_mp_positions",
+ "mp_positions_count": 0,
+ }
+ )
+ try:
+ if get_debug_trajectories_enabled():
+ try:
+ st.text_area(
+ "Trajectories diagnostics",
+ json.dumps(_last_trajectories_diagnostics, default=str),
+ height=160,
+ )
+ except Exception:
+ pass
+ except Exception:
+ pass
+ return
+
+ st.session_state["_trajectory_mp_positions"] = mp_positions
+
+ mp_list = sorted(mp_positions.keys())
+ default_mps = mp_list[:6]
+ selected_mps = st.multiselect(
+ "Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps
+ )
+
+ fig = go.Figure()
+ trace_count = 0
+ for mp in selected_mps:
+ wids_sorted = sorted(mp_positions[mp].keys())
+ xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
+ ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
+ xs = _ema_smooth(xs_raw, smooth_alpha)
+ ys = _ema_smooth(ys_raw, smooth_alpha)
+ custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
+ fig.add_trace(
+ go.Scatter(
+ x=xs,
+ y=ys,
+ mode="lines+markers",
+ name=mp,
+ text=wids_sorted,
+ customdata=custom_raw,
+ line=dict(color="#888888", shape="spline", smoothing=1.3),
+ marker=dict(color="#888888", size=6),
+ hovertemplate=(
+ f"{mp}
"
+ "venster: %{text}
"
+ "x (smoothed): %{x:.3f}
"
+ "x (raw): %{customdata[0]:.3f}
"
+ "y (smoothed): %{y:.3f}
"
+ "y (raw): %{customdata[1]:.3f}"
+ ),
+ )
+ )
+ trace_count += 1
+
+ _add_y_direction_annotations(fig)
+ if trace_count == 0:
+ st.info(
+ "Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data."
+ )
+ else:
+ st.plotly_chart(fig, use_container_width=True)
+ return
+
+ if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"):
+ mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
+ for wid in windows:
+ pos = positions_by_window.get(wid, {})
+ for mp_name, (x, y) in pos.items():
+ mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y))
+
+ mp_list = sorted(mp_positions.keys())
+ if not mp_list:
+ st.info("Geen MP-positiegegevens beschikbaar om te tonen.")
+ return
+
+ sample_mps = mp_list[:6]
+ fig = go.Figure()
+ for mp in sample_mps:
+ wids_sorted = sorted(mp_positions[mp].keys())
+ xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
+ ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
+ xs = _ema_smooth(xs_raw, 0.35)
+ ys = _ema_smooth(ys_raw, 0.35)
+ custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
+ fig.add_trace(
+ go.Scatter(
+ x=xs,
+ y=ys,
+ mode="lines+markers",
+ name=mp,
+ text=wids_sorted,
+ customdata=custom_raw,
+ line=dict(color="#444444", shape="spline", smoothing=1.3),
+ marker=dict(color="#444444", size=6),
+ hovertemplate=(
+ f"{mp}
"
+ "venster: %{text}
"
+ "x (smoothed): %{x:.3f}
"
+ "x (raw): %{customdata[0]:.3f}
"
+ "y (smoothed): %{y:.3f}
"
+ "y (raw): %{customdata[1]:.3f}"
+ ),
+ )
+ )
+ _add_y_direction_annotations(fig)
+ st.plotly_chart(fig, use_container_width=True)
+ return
+
+ try:
+ debug_checkbox = False
+ try:
+ debug_checkbox = st.checkbox(
+ "Enable trajectories diagnostics (show extra info)",
+ value=get_debug_trajectories_enabled(),
+ )
+ except Exception:
+ debug_checkbox = get_debug_trajectories_enabled()
+ if debug_checkbox:
+ try:
+ with st.expander(
+ "DEBUG: Trajectories data (showing diagnostics)", expanded=False
+ ):
+ st.write("windows (count):", len(windows))
+ st.write("windows sample:", windows[:10])
+ st.write("party_map entries:", len(party_map))
+ st.write("parties with centroids:", len(all_parties_sorted))
+ st.write("default_parties:", default_parties)
+ st.write("selected_parties:", selected_parties)
+ st.write("min_mps setting:", 3)
+ sample = {
+ p: len(centroids.get(p, {}))
+ for p in list(all_parties_sorted)[:8]
+ }
+ st.write("sample centroid window counts per party:", sample)
+ except Exception:
+ pass
+ except Exception:
+ pass
+
+ smoothing_method = st.selectbox(
+ "Smoothing methode",
+ options=["EMA", "Spline", "None"],
+ index=0,
+ help="EMA = exponential moving average; Spline = low-degree polynomial spline fit; None = raw centroids",
+ )
+
+ smooth_alpha = 1.0
+ if smoothing_method == "EMA":
+ smooth_alpha = st.slider(
+ "Glad maken (EMA-\u03b1)",
+ min_value=0.1,
+ max_value=1.0,
+ value=0.35,
+ step=0.05,
+ help=(
+ "\u03b1=1.0 toont de ruwe data; lagere waarden maken de lijn gladder. "
+ "Standaard 0.35 voor een goed evenwicht tussen detail en ruis."
+ ),
+ )
+
+ def _spline_smooth(values: List[float]) -> List[float]:
+ n = len(values)
+ if n <= 2:
+ return values
+ deg = min(3, n - 1)
+ try:
+ idx = np.arange(n, dtype=float)
+ coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg)
+ smooth = np.polyval(coeffs, idx)
+ return [float(v) for v in smooth]
+ except Exception:
+ return values
+
+ fig = go.Figure()
+ trace_count = 0
+ helper_succeeded = False
+ try:
+ fig2, trace_count2, banner_text = select_trajectory_plot_data(
+ positions_by_window, party_map, windows, selected_parties, smooth_alpha
+ )
+ if fig2 is not None:
+ fig = fig2
+ trace_count = trace_count2
+ helper_succeeded = True
+ if banner_text:
+ try:
+ st.caption(banner_text)
+ except Exception:
+ pass
+ try:
+ _last_trajectories_diagnostics.update({"banner_text": banner_text})
+ except Exception:
+ pass
+ except Exception as e:
+ tb = traceback.format_exc()
+ try:
+ select_trajectory_plot_data._last_diagnostics = {"exception": tb}
+ except Exception:
+ pass
+ try:
+ _last_trajectories_diagnostics.update(
+ {"stage": "select_helper_exception", "exception": tb}
+ )
+ except Exception:
+ pass
+ logger.exception("select_trajectory_plot_data failed")
+ debug_enabled = get_debug_trajectories_enabled()
+ if debug_enabled:
+ try:
+ st.text_area("select_trajectory_plot_data traceback", tb, height=240)
+ except Exception:
+ pass
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] helper_succeeded=%s", helper_succeeded
+ )
+ if not helper_succeeded:
+ for party in selected_parties:
+ if party not in centroids:
+ continue
+ wids_sorted = sorted(centroids[party].keys())
+ xs_raw = [centroids[party][w][0] for w in wids_sorted]
+ ys_raw = [centroids[party][w][1] for w in wids_sorted]
+ xs = _ema_smooth(xs_raw, smooth_alpha)
+ ys = _ema_smooth(ys_raw, smooth_alpha)
+ custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
+ colour = PARTY_COLOURS.get(party, "#9E9E9E")
+ fig.add_trace(
+ go.Scatter(
+ x=xs,
+ y=ys,
+ mode="lines+markers",
+ name=party,
+ text=wids_sorted,
+ customdata=custom_raw,
+ line=dict(color=colour, shape="spline", smoothing=1.3),
+ marker=dict(color=colour, size=8),
+ hovertemplate=(
+ f"{party}
"
+ "venster: %{text}
"
+ "x (smoothed): %{x:.3f}
"
+ "x (raw): %{customdata[0]:.3f}
"
+ "y (smoothed): %{y:.3f}
"
+ "y (raw): %{customdata[1]:.3f}"
+ ),
+ )
+ )
+ trace_count += 1
+
+ _THRESHOLD = 0.65
+ x_conf_map = axis_def.get("x_label_confidence", {}) or {}
+ y_conf_map = axis_def.get("y_label_confidence", {}) or {}
+
+ def _mean_conf(m: dict) -> Optional[float]:
+ vals = [v for v in m.values() if v is not None]
+ if not vals:
+ return None
+ return float(sum(vals) / len(vals))
+
+ x_mean = _mean_conf(x_conf_map)
+ y_mean = _mean_conf(y_conf_map)
+
+ x_title = trajectory.choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD)
+ y_title = trajectory.choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD)
+
+ fig.update_layout(
+ title="Partij trajectories",
+ xaxis_title=x_title,
+ yaxis_title=y_title,
+ height=600,
+ legend_title_text="Partij",
+ )
+ _add_y_direction_annotations(fig)
+ try:
+ _last_trajectories_diagnostics.update({"trace_count": trace_count})
+ except Exception:
+ pass
+ debug_enabled = get_debug_trajectories_enabled()
+ if trace_count == 0:
+ _last_trajectories_diagnostics.update(
+ {
+ "stage": "zero_traces",
+ "positions_count": sum(len(pos) for pos in positions_by_window.values())
+ if positions_by_window
+ else 0,
+ "party_map_count": len(party_map) if party_map else 0,
+ "centroids_count": len(centroids) if centroids else 0,
+ "selected_parties_count": len(selected_parties)
+ if selected_parties
+ else 0,
+ "timestamp": datetime.now().isoformat(),
+ }
+ )
+ if positions_by_window and party_map and not centroids:
+ sample_mps = []
+ for window, positions in list(positions_by_window.items())[:1]:
+ sample_mps = list(positions.keys())[:5]
+ break
+ matched = sum(1 for mp in sample_mps if mp in party_map)
+ _last_trajectories_diagnostics["name_match_check"] = {
+ "sample_mps": sample_mps,
+ "matched_in_party_map": matched,
+ "sample_size": len(sample_mps),
+ }
+ if trace_count == 0:
+ st.info("📊 **Geen trajecten getekend**")
+
+ with st.expander("🔍 Diagnostische informatie"):
+ st.write("**Data status:**")
+ st.write(
+ f"- Positie vensters: {len(positions_by_window) if positions_by_window else 0}"
+ )
+ st.write(f"- Party mappings: {len(party_map) if party_map else 0}")
+ st.write(
+ f"- Geselecteerde partijen: {len(selected_parties) if selected_parties else 0}"
+ )
+
+ if "centroid_diagnostics" in locals():
+ st.write("**Centroid berekening:**")
+ st.write(
+ f"- Partijen met posities: {len(centroid_diagnostics.get('parties_with_positions', []))}"
+ )
+ st.write(
+ f"- Partijen met alleen NaN: {len(centroid_diagnostics.get('parties_all_nan', []))}"
+ )
+
+ st.write("\n**Mogelijke oorzaken:**")
+ st.write("1. Geen SVD vectoren berekend voor de geselecteerde vensters")
+ st.write("2. MP namen in posities komen niet overeen met party_map")
+ st.write("3. Alle geselecteerde partijen hebben te weinig MPs (< 5)")
+
+ if st.button("🔧 Database diagnostiek uitvoeren"):
+ with st.spinner("Bezig met diagnostiek..."):
+ from scripts.diagnose_trajectories_cli import (
+ run as diagnose_trajectories,
+ )
- explorer.build_trajectories_tab(db_path, window_size)
+ results = diagnose_trajectories(db_path)
+ st.json(results)
+ else:
+ try:
+ st.info(
+ f"[DEBUG] trace_count={trace_count}, fig data count={len(fig.data)}, layout title={fig.layout.title.text if fig.layout.title else 'none'}"
+ )
+ except Exception:
+ pass
+ try:
+ logging.getLogger(__name__).debug(
+ "[TRAJ DEBUG] About to render plotly chart — trace_count=%d, banner=%s, fig has %d traces",
+ trace_count,
+ banner_text,
+ len(fig.data),
+ )
+ st.plotly_chart(fig, use_container_width=True)
+ except Exception as e:
+ st.error(f"Trajectories rendering failed: {e}")
+ if get_debug_trajectories_enabled():
+ try:
+ st.json(_last_trajectories_diagnostics)
+ except Exception:
+ st.text_area(
+ "Trajectories diagnostics (JSON failed)",
+ json.dumps(_last_trajectories_diagnostics, default=str),
+ height=240,
+ )
diff --git a/explorer.py b/explorer.py
index 89fb348..7369c85 100644
--- a/explorer.py
+++ b/explorer.py
@@ -37,19 +37,18 @@ from analysis import explorer_data
from analysis import projections
from analysis import trajectory
+# Backwards-compatible re-export used by tests
+choose_trajectory_title = trajectory.choose_trajectory_title
+
try:
import plotly.express as px
import plotly.graph_objects as go
except Exception:
- # Plotly may be unavailable in lightweight test environments. Provide a tiny
- # local fallback that exposes a Figure-like object with `.data` and
- # `add_trace()` so unit tests can run without installing plotly.
px = None
import types
class _DummyTrace:
def __init__(self, **kwargs):
- # Preserve commonly-used attributes accessed by tests
self.name = kwargs.get("name")
self.x = kwargs.get("x")
self.y = kwargs.get("y")
@@ -61,23 +60,16 @@ except Exception:
self.data = []
def add_trace(self, trace):
- # plotly passes a Scatter object; our tests only inspect `.data`
- # elements for `.name` and `.customdata`. Accept both our
- # _DummyTrace and dict-like kwargs.
if isinstance(trace, _DummyTrace):
self.data.append(trace)
else:
- # Some code may call go.Scatter(...) which returns an object;
- # if a mapping is passed here instead, coerce to _DummyTrace.
try:
- # attempt attribute access
name = getattr(trace, "name", None)
x = getattr(trace, "x", None)
y = getattr(trace, "y", None)
text = getattr(trace, "text", None)
customdata = getattr(trace, "customdata", None)
except Exception:
- # Last resort: treat as mapping
name = trace.get("name") if hasattr(trace, "get") else None
x = trace.get("x") if hasattr(trace, "get") else None
y = trace.get("y") if hasattr(trace, "get") else None
@@ -90,23 +82,19 @@ except Exception:
)
def add_annotation(self, *args, **kwargs):
- # noop for tests that don't import full plotly
return None
go = types.SimpleNamespace(
Figure=_DummyFigure, Scatter=lambda **kwargs: _DummyTrace(**kwargs)
)
+
try:
import streamlit as st
except Exception:
- # Minimal dummy replacement for Streamlit used during tests / import-time.
- # We only need a tiny subset so unit tests can import explorer without
- # installing streamlit. All functions here are no-ops or simple fallbacks.
class _DummySt:
def cache_data(self, *args, **kwargs):
def _decorator(func):
return func
-
return _decorator
def markdown(self, *args, **kwargs):
@@ -128,7 +116,6 @@ except Exception:
return None
def checkbox(self, *args, **kwargs):
- # default to False unless value provided
return kwargs.get("value", False)
def warning(self, *args, **kwargs):
@@ -138,7 +125,6 @@ except Exception:
return None
def selectbox(self, *args, **kwargs):
- # return first option if options provided
opts = (
kwargs.get("options")
if kwargs.get("options") is not None
@@ -170,11 +156,9 @@ except Exception:
def __exit__(self_inner, exc_type, exc, tb):
return False
-
return _Ctx()
def columns(self, *args, **kwargs):
- # Return a tuple of simple objects with the methods used in the UI
class _Col:
def markdown(self, *a, **k):
return None
@@ -184,251 +168,34 @@ except Exception:
def dataframe(self, *a, **k):
return None
-
n = len(args[0]) if args else 1
return tuple(_Col() for _ in range(n))
st = _DummySt()
-# Temporary diagnostics for Trajectories plotting — set by instrumentation when
-# EXPLORER_DEBUG_TRAJECTORIES is enabled. This is intended to be small, opt-in and
-# reversible once root cause is found.
-_last_trajectories_diagnostics: dict = {}
-# Backwards/alternate name used by instrumentation: keep a second module-level
-# reference so callers/tests can look for either name.
-_last_diagnostics = _last_trajectories_diagnostics
-
-
-def get_debug_trajectories_enabled() -> bool:
- """Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode.
- Accepts '1', 'true', 'True'. Used as default for a per-tab checkbox.
- """
- v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES")
- return str(v) in ("1", "true", "True")
-
-
-from explorer_helpers import (
- inspect_positions_for_issues,
- compute_party_centroids,
+# Re-export trajectories diagnostics for backwards compatibility
+from analysis.tabs.trajectories import (
+ _last_diagnostics,
+ _last_trajectories_diagnostics,
+ get_debug_trajectories_enabled,
)
-def select_trajectory_plot_data(
- positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
- party_map: Dict[str, str],
- windows: List[str],
- selected_parties: List[str],
- smooth_alpha: float = 0.35,
- mp_fallback_count: Optional[int] = None,
-) -> Tuple[go.Figure, int, Optional[str]]:
- """Return (fig, trace_count, banner_text).
-
- Helper used by build_trajectories_tab. Does not call Streamlit.
- """
- # Use env var default if not provided
- if mp_fallback_count is None:
- try:
- mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20"))
- except Exception:
- mp_fallback_count = 20
-
- # Compute per-party centroids aligned to windows
- party_centroids, meta = compute_party_centroids(
- positions_by_window, party_map, windows
+def select_trajectory_plot_data(*args, **kwargs):
+ """Lazy wrapper around analysis.tabs.trajectories.select_trajectory_plot_data."""
+ from analysis.tabs.trajectories import (
+ select_trajectory_plot_data as _impl,
)
- # Use inspector to collect diagnostics (import-safe, pure helper). Keep this
- # call local to the helper to ensure the inspector is exercised and the
- # diagnostics are available for logging/debugging. Do not call Streamlit
- # from here so the function remains import-safe for tests.
- try:
- inspector_summary = inspect_positions_for_issues(positions_by_window, party_map)
- except Exception:
- # Capture traceback diagnostics so callers (and tests) can inspect what went wrong.
- tb = traceback.format_exc()
- inspector_summary = {}
- try:
- # Attach diagnostics to the helper function for callers that want to inspect
- # the last error directly on the function object.
- select_trajectory_plot_data._last_diagnostics = {
- "stage": "inspector_exception",
- "exception": tb,
- }
- except Exception:
- # best-effort only
- pass
- try:
- # Also update the module-level trajectories diagnostics so the UI can show
- # a compact summary when debugging is enabled.
- _last_trajectories_diagnostics.update(
- {"stage": "inspector_exception", "exception": tb}
- )
- except Exception:
- pass
- logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary)
-
- # Determine which parties have at least one non-nan centroid
- plottable_parties = []
- for p, vals in party_centroids.items():
- has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals)
- if has_valid:
- plottable_parties.append(p)
-
- # DEBUG: Show plottable_parties status (use logger.debug instead of print)
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] plottable_parties: %d parties, sample=%s",
- len(plottable_parties),
- (plottable_parties[:5] if plottable_parties else "empty"),
- )
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] party_centroids keys: %s",
- list(party_centroids.keys())[:10],
- )
- if party_centroids:
- sample_party = list(party_centroids.keys())[0]
- sample_vals = party_centroids[sample_party]
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] Sample party '%s' centroids: %s...",
- sample_party,
- sample_vals[:3],
- )
-
- fig = go.Figure()
- trace_count = 0
- banner_text: Optional[str] = None
-
- def _ema_smooth(values: List[float], alpha: float) -> List[float]:
- if not values or alpha >= 1.0:
- return values
- smoothed: List[float] = []
- prev = None
- for v in values:
- if v is None or (isinstance(v, float) and np.isnan(v)):
- smoothed.append(float(np.nan))
- continue
- v = float(v)
- if prev is None:
- prev = v
- else:
- prev = alpha * v + (1 - alpha) * prev
- smoothed.append(float(prev))
- return smoothed
-
- # If no plottable parties, fallback to MP trajectories
- if not plottable_parties:
- # Build mp_positions across windows
- mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
- for wid in windows:
- pos = positions_by_window.get(wid, {})
- for mp_name, xy in pos.items():
- try:
- x, y = float(xy[0]), float(xy[1])
- except Exception:
- continue
- mp_positions.setdefault(mp_name, {})[wid] = (x, y)
-
- # Rank MPs by activity (number of windows with positions)
- mp_activity = sorted(
- [(mp, len(wdict)) for mp, wdict in mp_positions.items()],
- key=lambda t: t[1],
- reverse=True,
- )
- top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]]
-
- for mp in top_mps:
- wids_sorted = sorted(mp_positions.get(mp, {}).keys())
- if not wids_sorted:
- continue
- xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
- ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
- xs = _ema_smooth(xs_raw, smooth_alpha)
- ys = _ema_smooth(ys_raw, smooth_alpha)
- custom_raw = [
- (
- float(rx) if rx is not None else float(np.nan),
- float(ry) if ry is not None else float(np.nan),
- )
- for rx, ry in zip(xs_raw, ys_raw)
- ]
- fig.add_trace(
- go.Scatter(
- x=xs,
- y=ys,
- mode="lines+markers",
- name=mp,
- text=wids_sorted,
- customdata=custom_raw,
- line=dict(color="#888888", shape="spline", smoothing=1.3),
- marker=dict(color="#888888", size=6),
- )
- )
- trace_count += 1
-
- banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] Fallback to MP trajectories: trace_count=%d, top_mps=%d",
- trace_count,
- len(top_mps),
- )
- return fig, trace_count, banner_text
-
- # Otherwise plot party centroids for selected parties intersecting plottable
- to_plot = [p for p in selected_parties if p in plottable_parties]
- # If none selected, default to all plottable
- if not to_plot:
- to_plot = plottable_parties
-
- for party in to_plot:
- vals = party_centroids.get(party, [])
- if not vals:
- continue
- xs_raw = [v[0] for v in vals]
- ys_raw = [v[1] for v in vals]
- xs = _ema_smooth(xs_raw, smooth_alpha)
- ys = _ema_smooth(ys_raw, smooth_alpha)
- # Ensure customdata preserves NaNs
- custom_raw = [
- (
- float(x) if (x is not None and not np.isnan(x)) else float(np.nan),
- float(y) if (y is not None and not np.isnan(y)) else float(np.nan),
- )
- for x, y in zip(xs_raw, ys_raw)
- ]
- colour = PARTY_COLOURS.get(party, "#9E9E9E")
- fig.add_trace(
- go.Scatter(
- x=xs,
- y=ys,
- mode="lines+markers",
- name=party,
- text=windows,
- customdata=custom_raw,
- line=dict(color=colour, shape="spline", smoothing=1.3),
- marker=dict(color=colour, size=8),
- )
- )
- trace_count += 1
-
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] Final trace_count=%d, plottable_parties=%d, to_plot=%s",
- trace_count,
- len(plottable_parties),
- (len(to_plot) if "to_plot" in dir() else "N/A"),
- )
- return fig, trace_count, None
+ return _impl(*args, **kwargs)
logger = logging.getLogger(__name__)
-# Party colour palette (consistent across tabs)
PARTY_COLOURS: Dict[str, str] = config.PARTY_COLOURS
-
SVD_THEMES: dict[int, dict[str, str]] = config.SVD_THEMES
-
KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
-
CURRENT_PARLIAMENT_PARTIES = config.CURRENT_PARLIAMENT_PARTIES
-
_PARTY_NORMALIZE = config._PARTY_NORMALIZE
@@ -445,14 +212,7 @@ def get_available_windows(db_path: str) -> List[str]:
@st.cache_data(show_spinner=False)
def get_uniform_dim_windows(db_path: str) -> List[str]:
- """Return only windows whose dominant MP-vector dimension is >= 25.
-
- Some windows contain a mix of vector lengths due to multiple pipeline runs
- (e.g. 2016 has both dim=1 and dim=50 rows). We find the most common dimension
- per window and include only windows where that dominant dim >= 25.
- Windows with too few dim-25+ entities (< 10) are also excluded to avoid
- degenerate PCA inputs.
- """
+ """Return only windows whose dominant MP-vector dimension is >= 25."""
return explorer_data.get_uniform_dim_windows(db_path)
@@ -473,53 +233,8 @@ def _swap_axes(
def load_positions(
db_path: str, window_size: str = "annual"
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
- """Compute 2D positions per window using PCA on aligned SVD vectors.
-
- Returns:
- positions_by_window: {window_id: {entity_name: (x, y)}}
- axis_def: dict with x_axis, y_axis, method keys
- """
- from analysis.political_axis import compute_2d_axes
-
- all_available = get_uniform_dim_windows(db_path)
-
- if not all_available:
- return {}, {}
-
- positions_by_window, axis_def = compute_2d_axes(
- db_path,
- window_ids=all_available,
- method="pca",
- pca_residual=True,
- normalize_vectors=True,
- )
-
- try:
- from analysis.axis_classifier import classify_axes
-
- axis_def = classify_axes(positions_by_window, axis_def, db_path)
- except Exception:
- import logging
-
- logging.getLogger(__name__).exception(
- "classify_axes failed; using generic axis labels"
- )
-
- # Axis orientation is guaranteed by compute_2d_axes via canonical party anchors
- # (Procrustes alignment + sign-fixing). We do NOT forcibly override axis labels
- # here so the classifier output (if available) can be surfaced conditionally in
- # the UI based on per-window confidence. Label selection is performed at render
- # time in the tabs so we can show fallback labels while still surfacing the
- # classifier interpretation and confidence when informative.
-
- # Filter displayed windows by window_size AFTER PCA computation.
- if window_size == "annual":
- annual_keys = set(w for w in all_available if "-Q" not in w)
- positions_by_window = {
- w: v for w, v in positions_by_window.items() if w in annual_keys
- }
-
- return positions_by_window, axis_def
+ """Compute 2D positions per window using PCA on aligned SVD vectors."""
+ return explorer_data.load_positions(db_path, window_size)
@st.cache_data(show_spinner="Partijkaart laden…")
@@ -530,62 +245,15 @@ def load_party_map(db_path: str) -> Dict[str, str]:
@st.cache_data(show_spinner="Actieve Kamerleden laden…")
def load_active_mps(db_path: str) -> set:
- """Return the set of mp_name values that are currently seated in parliament.
-
- An MP is considered active if their mp_metadata row has tot_en_met IS NULL,
- meaning they have no recorded end date for their current seat.
- """
+ """Return the set of mp_name values that are currently seated in parliament."""
return explorer_data.load_active_mps(db_path)
def get_aligned_party_scores(
db_path: str, window: str, active_mps: set | None = None
) -> Dict[str, np.ndarray]:
- """Get party scores for all N components from aligned PCA positions.
-
- For current_parliament, pass active_mps to filter to only seated MPs
- (matching the compass behaviour). Historical windows include all MPs.
-
- Args:
- db_path: Path to DuckDB database
- window: Window identifier (e.g. 'current_parliament', '2025')
- active_mps: Set of active MP names to filter current_parliament by.
- Required when window is 'current_parliament' to match compass.
- """
- from analysis.political_axis import compute_nd_axes
-
- annual_windows = get_uniform_dim_windows(db_path)
- scores_by_window, _ = compute_nd_axes(
- db_path, window_ids=annual_windows, n_components=10
- )
- window_scores = scores_by_window.get(window, {})
- if not window_scores:
- return {}
-
- # For current_parliament, filter to active MPs (still seated) to match compass.
- # Historical windows include all MPs active at the time — no restriction needed.
- if window == "current_parliament" and active_mps is not None:
- window_scores = {mp: sc for mp, sc in window_scores.items() if mp in active_mps}
-
- # Load party map to convert MP names to parties
- _party_map = load_party_map(db_path)
-
- # Aggregate MP scores to party centroids per component
- n_comps = 10
- party_scores_agg: Dict[str, List[np.ndarray]] = {}
- for mp_name, scores in window_scores.items():
- party = _party_map.get(
- mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
- )
- if party:
- party_scores_agg.setdefault(party, []).append(scores[:n_comps])
-
- # Compute mean scores per party for each component
- return {
- party: np.mean(np.vstack(score_list), axis=0)
- for party, score_list in party_scores_agg.items()
- if score_list
- }
+ """Get party scores for all N components from aligned PCA positions."""
+ return explorer_data.get_aligned_party_scores(db_path, window, active_mps)
def compute_party_discipline(
@@ -593,18 +261,8 @@ def compute_party_discipline(
start_date: str,
end_date: str,
) -> pd.DataFrame:
- """Compute per-party voting discipline (Rice index) for roll-call votes in a date range.
-
- Only individual MP vote rows are used (mp_name LIKE '%,%').
- Returns a DataFrame with columns [party, n_motions, discipline] sorted by discipline ascending.
- Returns an empty DataFrame if fewer than 1 qualifying motion exists or on any DB error.
-
- Rice index per motion per party = fraction of party MPs voting with the party majority.
- The per-party score is the average Rice index across all motions in the date range.
- Only 'voor' and 'tegen' votes are counted; absent and abstaining MPs are excluded from the
- Rice index calculation.
- """
- return trajectory.compute_party_discipline(db_path, start_date, end_date)
+ """Compute per-party voting discipline (Rice index) for roll-call votes in a date range."""
+ return explorer_data.compute_party_discipline(db_path, start_date, end_date)
def _load_mp_vectors_by_party(db_path: str) -> Dict[str, List[np.ndarray]]:
@@ -649,15 +307,7 @@ def load_party_axis_scores_for_window(
def load_party_scores_all_windows(
db_path: str, windows: List[str]
) -> Dict[str, Dict[str, List[float]]]:
- """Load party SVD scores for all specified windows.
-
- Args:
- db_path: Path to DuckDB database
- windows: List of window IDs to load
-
- Returns:
- {window_id: {party_name: [float * k]}} — scores per party per window
- """
+ """Load party SVD scores for all specified windows."""
result: Dict[str, Dict[str, List[float]]] = {}
for window in windows:
if window == "current_parliament":
@@ -675,68 +325,17 @@ def _load_mp_vectors_by_window(db_path: str, window: str) -> Dict[str, np.ndarra
def _get_aligned_trajectory_scores(
db_path: str, windows: List[str], n_components: int = 10
) -> Dict[str, Dict[str, List[float]]]:
- """Get aligned PCA scores for all windows as {window: {party: [scores per component]}}.
-
- Uses compute_nd_axes to get PCA-projected, flip-corrected scores across all windows,
- ensuring consistency with the single-window SVD components view.
- """
- from analysis.political_axis import compute_nd_axes
-
- # Get aligned scores for the requested windows via PCA (annual-only, no quarterly)
- scores_by_window, _ = compute_nd_axes(
- db_path, window_ids=windows, n_components=n_components
- )
- if not scores_by_window:
- return {}
-
- # Load party map to convert MP names to parties
- party_map = load_party_map(db_path)
-
- # Aggregate MP scores to party centroids per window
- result: Dict[str, Dict[str, List[float]]] = {}
- for window in windows:
- window_scores = scores_by_window.get(window, {})
- if not window_scores:
- continue
-
- # Aggregate MP scores to party averages
- party_vecs: Dict[str, List[np.ndarray]] = {}
- for mp_name, scores in window_scores.items():
- party = party_map.get(
- mp_name, party_map.get(mp_name.split("(")[0].strip(), None)
- )
- if party:
- party_vecs.setdefault(party, []).append(scores[:n_components])
-
- # Compute mean scores per party
- result[window] = {
- party: np.mean(np.vstack(score_list), axis=0).tolist()
- for party, score_list in party_vecs.items()
- if score_list
- }
-
- return result
+ """Get aligned PCA scores for all windows as {window: {party: [scores per component]}}."""
+ return explorer_data._get_aligned_trajectory_scores(db_path, windows, n_components)
@st.cache_data(show_spinner="SVD scores met Procrustes-uitlijning laden…")
def load_party_scores_all_windows_aligned(
db_path: str, windows: List[str]
) -> Dict[str, Dict[str, List[float]]]:
- """Load party SVD scores for all windows with Procrustes alignment.
-
- This ensures consistent orientation across years by aligning SVD vectors
- using Procrustes rotation, similar to how components 1-2 are aligned.
-
- Args:
- db_path: Path to DuckDB database
- windows: List of window IDs to load
-
- Returns:
- {window_id: {party_name: [float * k]}} — aligned scores per party per window
- """
+ """Load party SVD scores for all windows with Procrustes alignment."""
from analysis.trajectory import _procrustes_align_windows
- # Load raw MP vectors for each window
raw_window_vecs: Dict[str, Dict[str, np.ndarray]] = {}
party_map = load_party_map(db_path)
@@ -745,10 +344,8 @@ def load_party_scores_all_windows_aligned(
if mp_vecs:
raw_window_vecs[window] = mp_vecs
- # Apply Procrustes alignment
aligned_window_vecs = _procrustes_align_windows(raw_window_vecs)
- # Convert MP vectors to party averages
result: Dict[str, Dict[str, List[float]]] = {}
for window in windows:
if window not in aligned_window_vecs:
@@ -764,7 +361,6 @@ def load_party_scores_all_windows_aligned(
party_vecs[party] = []
party_vecs[party].append(vec)
- # Average per party
result[window] = {}
for party, vecs in party_vecs.items():
if vecs:
@@ -796,11 +392,7 @@ def _cached_bootstrap_cis(
@st.cache_data(show_spinner="Scree-plot laden…")
def load_scree_data(db_path: str) -> List[float]:
- """Return explained variance ratios (%) for all SVD components, sorted descending.
-
- Uses the same Procrustes-aligned multi-window matrix as the compass axes so the
- scree plot is consistent with what the compass actually uses.
- """
+ """Return explained variance ratios (%) for all SVD components, sorted descending."""
try:
from analysis.political_axis import compute_svd_spectrum
@@ -810,554 +402,6 @@ def load_scree_data(db_path: str) -> List[float]:
return []
-def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
- """Render a scree plot showing relative SVD component importance.
-
- Highlighted bars for the top-2 components (used in the compass); muted bars
- for the rest. A cumulative-variance dashed line on the same y-axis helps
- spot the elbow. A 50 % cumulative threshold line is drawn for reference.
-
- Args:
- importances: List of importance values sorted descending (from load_scree_data).
- n_show: How many components to display (default: first 15).
- """
- if not importances:
- return
- # importances are already EVR percentages summing to ~100 over all components.
- # Slice to n_show for display; cumulative line shows how much variance is covered.
- data = list(importances[:n_show])
- ranks = list(range(1, len(data) + 1))
-
- # Cumulative variance for the dashed overlay line
- cumsum = []
- running = 0.0
- for v in data:
- running += v
- cumsum.append(running)
-
- # Colour: first 2 bars highlighted (compass axes), rest muted
- n_highlight = 2
- bar_colours = [
- "#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data))
- ]
-
- fig = go.Figure()
-
- # Bars
- fig.add_trace(
- go.Bar(
- x=ranks,
- y=data,
- marker_color=bar_colours,
- hovertemplate="As %{x}
%{y:.1f}% verklaarde variantie",
- showlegend=False,
- )
- )
-
- # Cumulative variance line (dashed, warm amber)
- fig.add_trace(
- go.Scatter(
- x=ranks,
- y=cumsum,
- mode="lines+markers",
- line={"color": "#F57C00", "width": 2, "dash": "dot"},
- marker={"size": 5, "color": "#F57C00"},
- hovertemplate="As %{x}
Cumulatief: %{y:.1f}%",
- name="Cumulatief",
- showlegend=True,
- )
- )
-
- # 50 % reference line
- fig.add_hline(
- y=50,
- line_dash="dash",
- line_color="#BDBDBD",
- line_width=1,
- annotation_text="50%",
- annotation_position="right",
- annotation_font_color="#9E9E9E",
- annotation_font_size=11,
- )
-
- # Annotations on the top-2 bars showing their % value
- for i in range(min(n_highlight, len(data))):
- fig.add_annotation(
- x=ranks[i],
- y=data[i] + 0.3,
- text=f"{data[i]:.1f}%",
- showarrow=False,
- font={"size": 11, "color": "#1565C0"},
- yanchor="bottom",
- )
-
- fig.update_layout(
- height=280,
- margin={"l": 10, "r": 50, "t": 30, "b": 40},
- title={
- "text": "Belang per SVD-as",
- "font": {"size": 13, "color": "#555555"},
- "x": 0.02,
- "xanchor": "left",
- },
- legend={
- "orientation": "h",
- "x": 0.5,
- "xanchor": "center",
- "y": 1.08,
- "font": {"size": 11},
- },
- xaxis={
- "title": {"text": "As (rang)", "font": {"size": 11}},
- "tickmode": "linear",
- "tick0": 1,
- "dtick": 1,
- "showline": False,
- "showgrid": False,
- },
- yaxis={
- "title": {"text": "% van totale variantie", "font": {"size": 11}},
- "showline": False,
- "showgrid": True,
- "gridcolor": "#eeeeee",
- "ticksuffix": "%",
- "range": [0, max(cumsum) * 1.08],
- },
- plot_bgcolor="rgba(0,0,0,0)",
- paper_bgcolor="rgba(0,0,0,0)",
- bargap=0.25,
- )
- st.plotly_chart(fig, use_container_width=True)
-
-
-def _build_party_axis_figure(
- party_coords: Dict[str, Tuple[float, float]],
- comp_sel: int,
- theme: dict,
- bootstrap_data: Optional[Dict[str, Dict]] = None,
-) -> Optional[go.Figure]:
- """Build a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
-
- Accepts explicit per-party 2D coordinates (x,y) and uses the component selection to
- pick the value (comp_sel==1 -> x, comp_sel==2 -> y). This makes the API explicit and
- avoids indexing into long SVD vectors.
-
- Returns go.Figure or None if no data available.
- """
- if not party_coords:
- return None
-
- if comp_sel not in (1, 2):
- raise ValueError(
- "_build_party_axis_figure only supports comp_sel 1 or 2 when using explicit coords"
- )
-
- axis_idx = comp_sel - 1
- flip = theme.get("flip", False)
-
- parties = []
- scores = []
- colours = []
-
- # Support two shapes for party_coords:
- # - explicit 2D coords: (x, y)
- # - full SVD vectors (len>2) where we should pick the axis_idx element
- for party, val in party_coords.items():
- try:
- # explicit (x, y)
- if hasattr(val, "__len__") and len(val) == 2:
- x, y = val
- score = float(x if axis_idx == 0 else y)
- else:
- # treat as sequence/array-like of full SVD vector
- score = float(val[axis_idx])
-
- if flip:
- score = -score
- except Exception:
- # skip malformed entries silently
- continue
-
- parties.append(party)
- scores.append(score)
- colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))
-
- if not scores:
- return None
-
- # Build hover text: include N when bootstrap data available
- hover = []
- symbols = []
- if bootstrap_data:
- for p, s in zip(parties, scores):
- bd = bootstrap_data.get(p)
- if bd:
- n_mps = bd.get("n_mps", "?")
- ci_low = None
- ci_high = None
- try:
- ci_low = float(bd["ci_lower"][axis_idx])
- ci_high = float(bd["ci_upper"][axis_idx])
- except Exception:
- pass
- if ci_low is not None and ci_high is not None:
- hover.append(
- f"{p}: {s:.3f} (N={n_mps}, 95%-BI: [{ci_low:.3f}, {ci_high:.3f}])"
- )
- else:
- hover.append(f"{p}: {s:.3f} (N={n_mps})")
- symbols.append("diamond" if n_mps == 1 else "circle")
- else:
- hover.append(f"{p}: {s:.3f}")
- symbols.append("circle")
- marker_kwargs = {"size": 14, "color": colours, "symbol": symbols}
- else:
- hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
- marker_kwargs = {"size": 14, "color": colours}
-
- fig = go.Figure()
- x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
- if x_min == x_max:
- x_min, x_max = x_min - 1, x_max + 1
- fig.add_trace(
- go.Scatter(
- x=[x_min, x_max],
- y=[0, 0],
- mode="lines",
- line={"color": "#cccccc", "width": 1},
- hoverinfo="skip",
- showlegend=False,
- )
- )
-
- scatter_kwargs = {
- "x": scores,
- "y": [0] * len(scores),
- "mode": "markers+text",
- "text": parties,
- "textposition": "top center",
- "marker": marker_kwargs,
- "hovertext": hover,
- "hoverinfo": "text",
- "showlegend": False,
- }
- fig.add_trace(go.Scatter(**scatter_kwargs))
-
- pos_pole = theme.get("positive_pole", "")
- neg_pole = theme.get("negative_pole", "")
- # Labels always from poles: negative_pole = LEFT, positive_pole = RIGHT
- left_label = neg_pole
- right_label = pos_pole
-
- fig.update_layout(
- height=160,
- margin={"l": 10, "r": 10, "t": 10, "b": 30},
- xaxis={
- "title": f"← {left_label} | {right_label} →",
- "showticklabels": False,
- "showline": False,
- "showgrid": False,
- "zeroline": False,
- },
- yaxis={"visible": False, "range": [-1, 2]},
- plot_bgcolor="rgba(0,0,0,0)",
- paper_bgcolor="rgba(0,0,0,0)",
- )
- return fig
-
-
-def _render_party_axis_chart(
- party_coords: Dict[str, Tuple[float, float]],
- comp_sel: int,
- theme: dict,
- bootstrap_data: Optional[Dict[str, Dict]] = None,
-) -> None:
- """Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
-
- Expects explicit per-party coords mapping (party -> (x,y)) for components 1 & 2.
- """
- fig = _build_party_axis_figure(party_coords, comp_sel, theme, bootstrap_data)
- if fig is None:
- st.caption("_Partijdata niet beschikbaar voor deze as._")
- return
- st.plotly_chart(fig, use_container_width=True)
-
-
-def _render_party_axis_chart_1d(
- party_coords: Dict[str, Tuple[float, ...]],
- comp_sel: int,
- theme: dict,
-) -> None:
- """Render a 1D horizontal scatter of party positions on SVD component `comp_sel`.
-
- Uses the same format as components 1-2: parties as markers on a horizontal line
- with axis title showing poles with arrows.
-
- Args:
- party_coords: Dict mapping party name to tuple of scores (score_for_comp,)
- comp_sel: SVD component number (1-indexed)
- theme: Dict with label, positive_pole, negative_pole, flip
- """
- import plotly.graph_objects as go
-
- if not party_coords:
- st.caption("_Partijdata niet beschikbaar voor deze as._")
- return
-
- # Extract scores and parties
- parties = []
- scores = []
- colours = []
-
- for party, coords in party_coords.items():
- try:
- score = float(coords[0])
- parties.append(party)
- scores.append(score)
- colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))
- except Exception:
- continue
-
- if not scores:
- st.caption("_Partijdata niet beschikbaar voor deze as._")
- return
-
- # Apply flip if needed (ensures right parties appear on right side)
- flip = theme.get("flip", False)
- if flip:
- scores = [-s for s in scores]
-
- # Build hover text
- hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
-
- # Create figure with same format as components 1-2
- fig = go.Figure()
- x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
- if x_min == x_max:
- x_min, x_max = x_min - 1, x_max + 1
-
- # Add horizontal axis line
- fig.add_trace(
- go.Scatter(
- x=[x_min, x_max],
- y=[0, 0],
- mode="lines",
- line={"color": "#cccccc", "width": 1},
- hoverinfo="skip",
- showlegend=False,
- )
- )
-
- # Add party markers
- fig.add_trace(
- go.Scatter(
- x=scores,
- y=[0] * len(scores),
- mode="markers+text",
- text=parties,
- textposition="top center",
- marker={"size": 14, "color": colours},
- hovertext=hover,
- hoverinfo="text",
- showlegend=False,
- )
- )
-
- # Determine pole labels based on flip
- pos_pole = theme.get("positive_pole", "")
- neg_pole = theme.get("negative_pole", "")
- # Labels always from poles: negative_pole = LEFT, positive_pole = RIGHT
- left_label = neg_pole
- right_label = pos_pole
-
- # Update layout with same format as components 1-2
- fig.update_layout(
- height=160,
- margin={"l": 10, "r": 10, "t": 10, "b": 30},
- xaxis={
- "title": f"← {left_label} | {right_label} →",
- "showticklabels": False,
- "showline": False,
- "showgrid": False,
- "zeroline": False,
- },
- yaxis={"visible": False, "range": [-1, 2]},
- plot_bgcolor="rgba(0,0,0,0)",
- paper_bgcolor="rgba(0,0,0,0)",
- )
-
- st.plotly_chart(fig, use_container_width=True)
-
-
-def _render_svd_time_trajectory(
- party_scores_by_window: Dict[str, Dict[str, List[float]]],
- comp_sel: int,
- theme: dict,
- selected_parties: List[str],
-) -> None:
- """Render a time trajectory plot showing party positions over time on an SVD component.
-
- Args:
- party_scores_by_window: {window_id: {party_name: [scores]}}
- comp_sel: SVD component number (1-indexed)
- theme: Theme dict with label, positive_pole, negative_pole, flip
- selected_parties: List of party names to display
- """
- if not party_scores_by_window or not selected_parties:
- st.caption("_Geen data beschikbaar voor tijdtraject._")
- return
-
- idx = comp_sel - 1 # Convert to 0-indexed
-
- # Build data structure: {party: [(window, score), ...]}
- # Scores are already aligned and flip-corrected via compute_nd_axes,
- # so no per-window flip computation needed.
- party_trajectories: Dict[str, List[Tuple[str, float]]] = {}
-
- # Sort windows: current_parliament first, then chronological
- all_windows = list(party_scores_by_window.keys())
- sorted_windows = []
- if "current_parliament" in all_windows:
- sorted_windows.append("current_parliament")
- # Add other windows in reverse chronological order (newest first)
- other_windows = sorted(
- [w for w in all_windows if w != "current_parliament"], reverse=True
- )
- sorted_windows.extend(other_windows)
-
- for window in sorted_windows:
- scores_by_party = party_scores_by_window.get(window, {})
- for party in selected_parties:
- scores = scores_by_party.get(party, [])
- if scores and len(scores) > idx:
- try:
- score = float(scores[idx])
- party_trajectories.setdefault(party, []).append((window, score))
- except (ValueError, TypeError):
- continue
-
- if not party_trajectories:
- st.caption("_Geen data beschikbaar voor geselecteerde partijen._")
- return
-
- # Create figure
- fig = go.Figure()
-
- # Find score range for x-axis
- all_scores = []
- for traj in party_trajectories.values():
- all_scores.extend([s for _, s in traj])
-
- if not all_scores:
- st.caption("_Geen scores beschikbaar._")
- return
-
- x_min, x_max = min(all_scores) * 1.15, max(all_scores) * 1.15
- if x_min == x_max:
- x_min, x_max = x_min - 1, x_max + 1
-
- # Y positions: current at top (y=0), earlier below
- window_to_y = {w: i for i, w in enumerate(sorted_windows)}
-
- # Add horizontal grey axis lines at y=0 for each year (like single-year chart)
- for window in sorted_windows:
- y_pos = window_to_y[window]
- # Horizontal grey line at y=0 for this year (matching single-year chart style)
- fig.add_trace(
- go.Scatter(
- x=[x_min, x_max],
- y=[y_pos, y_pos],
- mode="lines",
- line={"color": "#cccccc", "width": 1},
- hoverinfo="skip",
- showlegend=False,
- )
- )
-
- # Add traces for each party
- for party in selected_parties:
- if party not in party_trajectories:
- continue
-
- traj = party_trajectories[party]
- if len(traj) < 1:
- continue
-
- x_vals = [score for _, score in traj]
- y_vals = [window_to_y[window] for window, _ in traj]
- color = PARTY_COLOURS.get(party, "#9E9E9E")
-
- # Add connecting line
- fig.add_trace(
- go.Scatter(
- x=x_vals,
- y=y_vals,
- mode="lines",
- line={"color": color, "width": 2},
- hoverinfo="skip",
- showlegend=False,
- )
- )
-
- # Add markers with hover
- hover_texts = [f"{party}
{window}: {score:.3f}" for window, score in traj]
- fig.add_trace(
- go.Scatter(
- x=x_vals,
- y=y_vals,
- mode="markers+text",
- text=[party] * len(traj),
- textposition="top center",
- marker={"size": 12, "color": color},
- hovertext=hover_texts,
- hoverinfo="text",
- showlegend=False,
- )
- )
-
- # Determine pole labels based on theme (use reference flip from current_parliament)
- pos_pole = theme.get("positive_pole", "")
- neg_pole = theme.get("negative_pole", "")
- # Labels always from poles: negative_pole = LEFT, positive_pole = RIGHT
- left_label = neg_pole
- right_label = pos_pole
-
- # Y-axis labels
- y_labels = {}
- for window in sorted_windows:
- if window == "current_parliament":
- y_labels[window_to_y[window]] = "Huidig"
- else:
- y_labels[window_to_y[window]] = window
-
- # Update layout
- fig.update_layout(
- height=max(400, len(sorted_windows) * 60 + 100),
- margin={"l": 80, "r": 10, "t": 10, "b": 30},
- xaxis={
- "title": f"← {left_label} | {right_label} →",
- "range": [x_min, x_max],
- "showticklabels": False,
- "showline": False,
- "showgrid": True,
- "gridcolor": "rgba(0,0,0,0.1)",
- "zeroline": True,
- "zerolinecolor": "rgba(0,0,0,0.2)",
- },
- yaxis={
- "tickvals": list(y_labels.keys()),
- "ticktext": list(y_labels.values()),
- "tickmode": "array",
- "autorange": "reversed", # Top to bottom
- "showgrid": False,
- },
- plot_bgcolor="rgba(0,0,0,0)",
- paper_bgcolor="rgba(0,0,0,0)",
- )
-
- st.plotly_chart(fig, use_container_width=True)
-
-
@st.cache_data(show_spinner="Moties laden…")
def load_motions_df(db_path: str) -> pd.DataFrame:
"""Load the full motions table as a pandas DataFrame (read-only)."""
@@ -1374,1644 +418,51 @@ def query_similar(
return explorer_data.query_similar(db_path, source_motion_id, vector_type, top_k)
-# ---------------------------------------------------------------------------
-# Shared rendering helpers
-# ---------------------------------------------------------------------------
-
-
-def _render_voting_results(voting_results_json) -> None:
- """Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.
-
- The JSON is stored as {party_or_mp: vote} where vote is one of
- 'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
- """
- if not voting_results_json:
- return
- try:
- vdata = (
- json.loads(voting_results_json)
- if isinstance(voting_results_json, str)
- else voting_results_json
- )
- if not isinstance(vdata, dict) or not vdata:
- return
- # Group {vote: [actor, ...]}
- by_vote: Dict[str, List[str]] = {}
- for actor, vote in vdata.items():
- vote_str = str(vote).lower().strip()
- by_vote.setdefault(vote_str, []).append(str(actor))
- # Render in fixed order
- vote_order = ["voor", "tegen", "onthouden", "afwezig"]
- vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "⬜"}
- rows_shown = False
- for v in vote_order + [k for k in by_vote if k not in vote_order]:
- actors = by_vote.get(v)
- if not actors:
- continue
- emoji = vote_emoji.get(v, "▪️")
- st.markdown(
- f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
- )
- rows_shown = True
- if not rows_shown:
- st.caption("_Geen stemuitslag beschikbaar_")
- except Exception:
- pass
-
-
-# ---------------------------------------------------------------------------
-# Tab 1: Politiek Kompas
-# ---------------------------------------------------------------------------
-
-
-def _add_y_direction_annotations(fig: go.Figure) -> None:
- """Add ▲ Progressief / ▼ Conservatief labels above and below the Y axis."""
- common = dict(
- xref="paper",
- yref="paper",
- x=-0.07,
- showarrow=False,
- font=dict(size=11, color="#666666"),
- )
- fig.add_annotation(**common, y=1.02, text="▲ Progressief", xanchor="center")
- fig.add_annotation(**common, y=-0.06, text="▼ Conservatief", xanchor="center")
-
-
def _window_to_dates(window_id: str) -> tuple[str, str]:
"""Return (start_date, end_date) ISO strings for a given window_id."""
return trajectory.window_to_dates(window_id)
-def build_compass_tab(db_path: str, window_size: str) -> None:
- st.subheader("Politiek Kompas")
- st.markdown(
- "2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
- )
+def build_compass_tab(*args, **kwargs):
+ """Build the Politiek Kompas tab."""
+ from analysis.tabs.compass import build_compass_tab as _impl
- # Compass always uses annual windows regardless of the sidebar window_size setting.
- positions_by_window, axis_def = load_positions(db_path, "annual")
- # load_positions may return None for axis_def when resources are missing
- # (e.g. classifier fallback or failed enrichment). Guard so UI rendering
- # code doesn't crash on axis_def.get calls.
- if axis_def is None:
- axis_def = {}
- if not positions_by_window:
- st.warning(
- "Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
- )
- return
+ return _impl(*args, **kwargs)
- party_map = load_party_map(db_path)
- active_mps = load_active_mps(db_path)
-
- # Sort windows: year windows first (ascending), current_parliament last.
- # Exclude the current calendar year — it is already fully covered by current_parliament
- # and showing both creates confusion (2026 ⊂ current_parliament).
- import datetime as _dt
-
- _current_year = str(_dt.date.today().year)
- year_windows = sorted(
- w
- for w in positions_by_window
- if w != "current_parliament" and w != _current_year
- )
- has_current = "current_parliament" in positions_by_window
- windows = year_windows + (["current_parliament"] if has_current else [])
-
- # Motion counts per year — sparse years get a warning label.
- _SPARSE_YEARS = {"2016", "2017", "2018"}
- _THRESHOLD = 0.65
-
- def _window_label(w: str) -> str:
- if w == "current_parliament":
- return "Huidig parlement"
- if w in _SPARSE_YEARS:
- return f"{w} ⚠️"
- return w
-
- col1, col2 = st.columns([3, 1])
- with col2:
- window_idx = st.selectbox(
- "Jaar",
- options=windows,
- index=len(windows) - 1, # default: current_parliament
- format_func=_window_label,
- )
- level = st.radio(
- "Weergave",
- options=["Kamerleden", "Partijen"],
- index=0,
- horizontal=True,
- )
- min_mps = st.number_input(
- "Min. Kamerleden per partij",
- min_value=1,
- max_value=20,
- value=3,
- step=1,
- help="Partijen met minder dan dit aantal zetels worden niet weergegeven.",
- )
- pos = positions_by_window.get(window_idx, {})
- if not pos:
- st.info(f"Geen data voor venster {window_idx}")
- return
-
- # For current_parliament, restrict to MPs who are still seated (tot_en_met IS NULL).
- # Historical windows include all MPs active at the time — no restriction needed.
- if window_idx == "current_parliament":
- pos = {mp: xy for mp, xy in pos.items() if mp in active_mps}
-
- # Deduplicate MPs whose names appear both with and without a parenthetical first name,
- # e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and
- # average positions if both variants are present.
- def _strip_paren(name: str) -> str:
- return re.sub(r"\s*\([^)]*\)", "", name).strip()
-
- deduped: Dict[str, Tuple[float, float]] = {}
- for name, (x, y) in pos.items():
- base = _strip_paren(name)
- if base in deduped:
- ox, oy = deduped[base]
- deduped[base] = ((ox + x) / 2, (oy + y) / 2)
- else:
- deduped[base] = (x, y)
- pos = deduped
-
- rows = []
- for name, (x, y) in pos.items():
- party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown")
- rows.append({"name": name, "x": x, "y": y, "party": party})
-
- df_pos = pd.DataFrame(rows)
-
- # Drop parties below the minimum MP threshold (unreliable centroids).
- party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts()
- valid_parties = set(party_counts[party_counts >= min_mps].index)
- df_pos = df_pos[df_pos["party"].isin(valid_parties)]
-
- if df_pos.empty:
- st.info("Geen partijen met genoeg Kamerleden voor dit venster.")
- return
-
- # The first two SVD axes are clear, interpretable axes for our dataset.
- # Show the classifier-provided full labels on the compass unconditionally
- # so users see the canonical interpretation. We keep the confidence-based
- # captions/interpretations in the expander but do not hide the axis titles
- # for the compass. Note: the vertical axis title is rotated by Plotly —
- # this can make "Progressief–Conservatief" look reversed because the word
- # "Progressief" appears at the top when rendered; we therefore add explicit
- # directional annotations to make the polarity unambiguous.
- # Prefer classifier-provided labels for the first two axes. However, the
- # classifier sometimes returns the concise numeric fallbacks "As 1"/"As 2"
- # when it couldn't find an interpretable label. For the compass we prefer
- # conventional semantic defaults instead of the generic "As N" strings so
- # the chart remains readable.
- _raw_x = axis_def.get("x_label")
- _raw_y = axis_def.get("y_label")
-
- # Use the classifier helper to map internal/modal labels (e.g. "As 1") to
- # user-facing labels. Import at function-time to avoid module import cycles
- # and keep explorer lightweight. If the helper is unavailable fall back to
- # labels from the unified svd_labels module.
- try:
- from analysis.axis_classifier import display_label_for_modal
+def build_trajectories_tab(*args, **kwargs):
+ """Build the Partij Trajectories tab."""
+ from analysis.tabs.trajectories import build_trajectories_tab as _impl
- _x_label = display_label_for_modal(_raw_x, "x")
- _y_label = display_label_for_modal(_raw_y, "y")
- except Exception:
- from analysis.svd_labels import get_fallback_labels
+ return _impl(*args, **kwargs)
- _x_fallback, _y_fallback = get_fallback_labels()
- _x_label = _raw_x or _x_fallback
- _y_label = _raw_y or _y_fallback
- if level == "Partijen":
- # Aggregate to party centroids
- df_party = df_pos.groupby("party", as_index=False).agg(
- x=("x", "mean"), y=("y", "mean"), n=("name", "count")
- )
- df_party["name"] = df_party["party"]
- colour_map = {
- p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique()
- }
- fig = px.scatter(
- df_party,
- x="x",
- y="y",
- color="party",
- text="party",
- hover_name="party",
- hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True},
- color_discrete_map=colour_map,
- title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",
- labels={
- "x": _x_label,
- "y": _y_label,
- "n": "Kamerleden",
- },
- )
- fig.update_traces(textposition="top center", marker_size=14)
- else:
- colour_map = {
- p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()
- }
- fig = px.scatter(
- df_pos,
- x="x",
- y="y",
- color="party",
- hover_name="name",
- hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
- color_discrete_map=colour_map,
- title=f"Politiek Kompas — {_window_label(window_idx)}",
- labels={"x": _x_label, "y": _y_label},
- )
+def build_search_tab(*args, **kwargs):
+ """Build the Motie Zoeken tab."""
+ from analysis.tabs.search import build_search_tab as _impl
- fig.update_layout(
- height=600,
- legend_title_text="Partij",
- xaxis={"range": [-1, 1]},
- yaxis={"range": [-0.6, 0.6]},
- )
- with col1:
- st.plotly_chart(fig, use_container_width=True)
- _x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "")
- if (
- _x_interp
- and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD
- ):
- st.caption(_x_interp)
-
- # Voting discipline analysis
- st.markdown("---")
- st.markdown(
- "**Stemdiscipline analyse:** De Rice-index meet hoe eensgezind partijen stemmen "
- "tijdens hoofdelijke stemmingen. Een score van 100% betekent dat alle MPs van "
- "een partij hetzelfde stemden; 50% wijst op een gelijke splitsing binnen de partij. "
- "Partijen met hoge discipline (>95%) zoals PVV en SGP stemmen als een blok, wat "
- "wijst op sterke partijdiscipline en homogene membership. Lagere discipline (<85%) "
- "bij partijen als PvdA of SP kan duiden op interne factiestrijd, gewetensvragen "
- "bij ethische thema's, of een brede ideologische koers die ruimte laat voor "
- "afwijkende meningen. De discipline varieert ook per onderwerp — ethische kwesties "
- "tonen vaak meer interne verschillen dan economische thema's."
- )
+ return _impl(*args, **kwargs)
-# ---------------------------------------------------------------------------
-# Tab 2: Partij Trajectories
-# ---------------------------------------------------------------------------
-
-
-def choose_trajectory_title(axis_def: dict, axis: str, threshold: float = 0.65) -> str:
- """Choose a short trajectory axis title based on aggregated confidence."""
- return trajectory.choose_trajectory_title(axis_def, axis, threshold)
-
-
-def build_trajectories_tab(db_path: str, window_size: str) -> None:
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] build_trajectories_tab called — db_path=%s, window_size=%s",
- db_path,
- window_size,
- )
- st.subheader("Partij Trajectories")
- st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")
-
- positions_by_window, axis_def = load_positions(db_path, window_size)
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] load_positions → %d windows, total MPs=%d",
- len(positions_by_window),
- sum(len(v) for v in positions_by_window.values()),
- )
- if axis_def is None:
- axis_def = {}
- if not positions_by_window:
- # Instrumentation: record why trajectories tab aborted early
- try:
- _last_trajectories_diagnostics.update(
- {
- "stage": "load_positions_empty",
- "positions_by_window_len": len(positions_by_window),
- }
- )
- except Exception:
- pass
- try:
- st.warning("Geen positiedata beschikbaar.")
- except Exception:
- pass
- # If debug enabled, show diagnostics in UI (best-effort)
- try:
- if get_debug_trajectories_enabled():
- try:
- st.text_area(
- "Trajectories diagnostics",
- json.dumps(_last_trajectories_diagnostics, default=str),
- height=160,
- )
- except Exception:
- pass
- except Exception:
- pass
- return
-
- party_map = load_party_map(db_path)
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] load_party_map → %d entries, sample=%s",
- len(party_map),
- list(party_map.items())[:3],
- )
-
- # Add name normalization to improve matching
- def normalize_mp_name(name):
- """Normalize MP name for better matching between data sources."""
- if not name:
- return ""
- # Remove extra whitespace
- name = name.strip()
- # Ensure consistent spacing after comma
- if "," in name and ", " not in name:
- name = name.replace(",", ", ")
- return name
-
- # Normalize party_map keys
- party_map = {normalize_mp_name(k): v for k, v in party_map.items()}
-
- # Also normalize MP names in positions_by_window
- normalized_positions = {}
- for window, positions in positions_by_window.items():
- normalized_positions[window] = {
- normalize_mp_name(k): v for k, v in positions.items()
- }
- positions_by_window = normalized_positions
-
- # After normalization, log the match rate
- all_mp_names = set()
- for positions in positions_by_window.values():
- all_mp_names.update(positions.keys())
-
- matched_names = sum(1 for mp in all_mp_names if mp in party_map)
- if all_mp_names:
- logger.info(
- f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)"
- )
- else:
- logger.info("MP name matching: no MPs found in positions data")
-
- if matched_names == 0 and len(all_mp_names) > 0:
- logger.warning("No MP names matched between positions and party_map!")
- logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}")
- logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}")
-
- windows = sorted(positions_by_window.keys())
-
- # Compute party centroids per window
- centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
- all_parties: set = set()
-
- # Helper to normalise MP names (strip parenthetical first names) to match
- # entries in the party_map. This mirrors the behaviour used in the compass
- # tab so both tabs resolve parties the same way.
- def _strip_paren(name: str) -> str:
- return re.sub(r"\s*\([^)]*\)", "", name).strip()
-
- for wid in windows:
- pos = positions_by_window.get(wid, {})
- per_party: Dict[str, List[Tuple[float, float]]] = {}
- for mp_name, (x, y) in pos.items():
- # Try exact match first, then stripped-name match to handle
- # variants like "Dijk, J.P. (Jimmy)" -> "Dijk, J.P." used in mp_metadata
- party = party_map.get(mp_name) or party_map.get(
- _strip_paren(mp_name), "Unknown"
- )
- if party == "Unknown":
- continue
- per_party.setdefault(party, []).append((x, y))
- for party, coords in per_party.items():
- all_parties.add(party)
- xs = [c[0] for c in coords]
- ys = [c[1] for c in coords]
- centroids.setdefault(party, {})[wid] = (
- float(np.mean(xs)),
- float(np.mean(ys)),
- )
-
- all_parties = sorted(
- set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs)
- - {None, "Unknown"}
- )
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] all_parties (raw from party_map) → %d parties: %s",
- len(all_parties),
- all_parties[:10],
- )
- all_parties_sorted = sorted(all_parties)
-
- # If no parties were found after mapping MPs to parties, show a helpful
- # message instead of rendering an empty chart. This commonly happens when
- # the party map failed to load (DB error) or the min_mps threshold filtered
- # out all parties.
- if not all_parties_sorted:
- st.info(
- "Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat."
- )
- try:
- st.caption(f"Bekende partijen in party_map: {len(party_map)}")
- except Exception:
- pass
- # Do not return here: allow per-MP fallback plotting below when no
- # party-level centroids are available so the user still sees trajectories.
-
- # Default: show CDA, D66, VVD — the three parties that span the political centre
- default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties]
- if not default_parties:
- default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
- if not default_parties:
- default_parties = all_parties_sorted[:6]
-
- selected_parties = st.multiselect(
- "Selecteer partijen",
- options=all_parties_sorted,
- default=default_parties,
- )
-
- # Ensure EMA smoothing helper is available for per-MP fallback plotting which
- # appears earlier in the function. Define here so calls above won't fail.
- def _ema_smooth(values: List[float], alpha: float) -> List[float]:
- if not values or alpha >= 1.0:
- return values
- smoothed = [values[0]]
- for v in values[1:]:
- smoothed.append(alpha * v + (1 - alpha) * smoothed[-1])
- return smoothed
-
- # default smoothing alpha used for inline per-MP plotting; may be overridden
- # by the smoothing controls shown later in the UI.
- smooth_alpha = 0.35
-
- # If no party-level centroids were computed, fall back to per-MP trajectories
- # so the user still sees a plot even when the party_map is missing or empty.
- if not centroids:
- # Fallback: plot individual MP trajectories
- st.info(
- "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
- )
-
- # Build per-MP time series from positions_by_window
- mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
- for wid in windows:
- pos = positions_by_window.get(wid, {})
- for mp_name, xy in pos.items():
- # Defensive conversion: skip malformed coordinates instead of raising
- try:
- x, y = float(xy[0]), float(xy[1])
- except Exception:
- # skip malformed entries silently (diagnostics will show counts)
- continue
- mp_positions.setdefault(mp_name, {})[wid] = (x, y)
-
- # Filter to MPs with at least 2 windows and not all NaN
- mp_positions = {
- mp: pos
- for mp, pos in mp_positions.items()
- if len(pos) >= 2
- and not all(np.isnan(x) and np.isnan(y) for x, y in pos.values())
- }
-
- if not mp_positions:
- st.warning("Geen positiedata beschikbaar voor trajectplotten.")
- _last_trajectories_diagnostics.update(
- {
- "stage": "no_mp_positions",
- "mp_positions_count": 0,
- }
- )
- # show diagnostics when debug enabled
- try:
- if get_debug_trajectories_enabled():
- try:
- st.text_area(
- "Trajectories diagnostics",
- json.dumps(_last_trajectories_diagnostics, default=str),
- height=160,
- )
- except Exception:
- pass
- except Exception:
- pass
- return
-
- # Store for later use
- st.session_state["_trajectory_mp_positions"] = mp_positions
-
- mp_list = sorted(mp_positions.keys())
- default_mps = mp_list[:6]
- selected_mps = st.multiselect(
- "Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps
- )
-
- # Plot per-MP trajectories
- fig = go.Figure()
- trace_count = 0
- for mp in selected_mps:
- wids_sorted = sorted(mp_positions[mp].keys())
- xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
- ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
- xs = _ema_smooth(xs_raw, smooth_alpha)
- ys = _ema_smooth(ys_raw, smooth_alpha)
- custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
- fig.add_trace(
- go.Scatter(
- x=xs,
- y=ys,
- mode="lines+markers",
- name=mp,
- text=wids_sorted,
- customdata=custom_raw,
- line=dict(color="#888888", shape="spline", smoothing=1.3),
- marker=dict(color="#888888", size=6),
- hovertemplate=(
- f"{mp}
"
- "venster: %{text}
"
- "x (smoothed): %{x:.3f}
"
- "x (raw): %{customdata[0]:.3f}
"
- "y (smoothed): %{y:.3f}
"
- "y (raw): %{customdata[1]:.3f}"
- ),
- )
- )
- trace_count += 1
-
- _add_y_direction_annotations(fig)
- if trace_count == 0:
- st.info(
- "Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data."
- )
- else:
- st.plotly_chart(fig, use_container_width=True)
- return
-
- # Developer override: if EXPLORER_FORCE_SHOW_TRAJECTORIES=1 in the
- # environment, bypass party filtering and show the first MPs' trajectories
- # directly (helps diagnose production environments where party mapping
- # or filtering prevents any traces from appearing). This is safe to keep
- # in main because it only triggers when explicitly enabled.
- if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"):
- # Build per-MP time series from positions_by_window and plot first 6 MPs
- mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
- for wid in windows:
- pos = positions_by_window.get(wid, {})
- for mp_name, (x, y) in pos.items():
- mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y))
-
- mp_list = sorted(mp_positions.keys())
- if not mp_list:
- st.info("Geen MP-positiegegevens beschikbaar om te tonen.")
- return
-
- sample_mps = mp_list[:6]
- fig = go.Figure()
- for mp in sample_mps:
- wids_sorted = sorted(mp_positions[mp].keys())
- xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
- ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
- xs = _ema_smooth(xs_raw, 0.35)
- ys = _ema_smooth(ys_raw, 0.35)
- custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
- fig.add_trace(
- go.Scatter(
- x=xs,
- y=ys,
- mode="lines+markers",
- name=mp,
- text=wids_sorted,
- customdata=custom_raw,
- line=dict(color="#444444", shape="spline", smoothing=1.3),
- marker=dict(color="#444444", size=6),
- hovertemplate=(
- f"{mp}
"
- "venster: %{text}
"
- "x (smoothed): %{x:.3f}
"
- "x (raw): %{customdata[0]:.3f}
"
- "y (smoothed): %{y:.3f}
"
- "y (raw): %{customdata[1]:.3f}"
- ),
- )
- )
- _add_y_direction_annotations(fig)
- st.plotly_chart(fig, use_container_width=True)
- return
-
- # Debug expander: show data used to build trajectories so we can diagnose
- # why no traces are appearing. Leave this collapsed by default in normal
- # runs; when troubleshooting it will show counts and small samples.
- try:
- # Add a little opt-in checkbox in the UI to enable debug diagnostic output
- debug_checkbox = False
- try:
- debug_checkbox = st.checkbox(
- "Enable trajectories diagnostics (show extra info)",
- value=get_debug_trajectories_enabled(),
- )
- except Exception:
- debug_checkbox = get_debug_trajectories_enabled()
- if debug_checkbox:
- try:
- with st.expander(
- "DEBUG: Trajectories data (showing diagnostics)", expanded=False
- ):
- st.write("windows (count):", len(windows))
- st.write("windows sample:", windows[:10])
- st.write("party_map entries:", len(party_map))
- st.write("parties with centroids:", len(all_parties_sorted))
- st.write("default_parties:", default_parties)
- st.write("selected_parties:", selected_parties)
- st.write("min_mps setting:", min_mps)
- # sample centroid counts per party
- sample = {
- p: len(centroids.get(p, {}))
- for p in list(all_parties_sorted)[:8]
- }
- st.write("sample centroid window counts per party:", sample)
- except Exception:
- pass
- except Exception:
- # Don't crash UI if st isn't available or expander fails
- pass
-
- # Smoothing controls
- smoothing_method = st.selectbox(
- "Smoothing methode",
- options=["EMA", "Spline", "None"],
- index=0,
- help="EMA = exponential moving average; Spline = low-degree polynomial spline fit; None = raw centroids",
- )
-
- # EMA alpha only shown/used when EMA is selected
- smooth_alpha = 1.0
- if smoothing_method == "EMA":
- smooth_alpha = st.slider(
- "Glad maken (EMA-\u03b1)",
- min_value=0.1,
- max_value=1.0,
- value=0.35,
- step=0.05,
- help=(
- "\u03b1=1.0 toont de ruwe data; lagere waarden maken de lijn gladder. "
- "Standaard 0.35 voor een goed evenwicht tussen detail en ruis."
- ),
- )
-
- def _ema_smooth(values: List[float], alpha: float) -> List[float]:
- """Apply exponential moving average; alpha=1.0 means no smoothing."""
- if not values or alpha >= 1.0:
- return values
- smoothed = [values[0]]
- for v in values[1:]:
- smoothed.append(alpha * v + (1 - alpha) * smoothed[-1])
- return smoothed
-
- def _spline_smooth(values: List[float]) -> List[float]:
- """Perform a basic low-degree polynomial fit over index -> value and evaluate at indices.
-
- This provides a simple spline-like smoothing without adding scipy as a dependency.
- For very small N this returns the raw values.
- """
- n = len(values)
- if n <= 2:
- return values
- deg = min(3, n - 1)
- try:
- idx = np.arange(n, dtype=float)
- coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg)
- smooth = np.polyval(coeffs, idx)
- return [float(v) for v in smooth]
- except Exception:
- return values
-
- fig = go.Figure()
- trace_count = 0
- helper_succeeded = False
- # New: delegate plotting selection to helper for testability
- # Note: select_trajectory_plot_data returns (fig, trace_count, banner_text)
- try:
- fig2, trace_count2, banner_text = select_trajectory_plot_data(
- positions_by_window, party_map, windows, selected_parties, smooth_alpha
- )
- # If helper returned a figure, replace
- if fig2 is not None:
- fig = fig2
- trace_count = trace_count2
- helper_succeeded = True
- if banner_text:
- try:
- st.caption(banner_text)
- except Exception:
- pass
- try:
- _last_trajectories_diagnostics.update({"banner_text": banner_text})
- except Exception:
- pass
- except Exception as e:
- tb = traceback.format_exc()
- # attach diagnostics to the helper and module
- try:
- select_trajectory_plot_data._last_diagnostics = {"exception": tb}
- except Exception:
- pass
- try:
- _last_trajectories_diagnostics.update(
- {"stage": "select_helper_exception", "exception": tb}
- )
- except Exception:
- pass
- logger.exception("select_trajectory_plot_data failed")
- debug_enabled = get_debug_trajectories_enabled()
- if debug_enabled:
- try:
- st.text_area("select_trajectory_plot_data traceback", tb, height=240)
- except Exception:
- pass
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] helper_succeeded=%s", helper_succeeded
- )
- if not helper_succeeded:
- for party in selected_parties:
- if party not in centroids:
- continue
- wids_sorted = sorted(centroids[party].keys())
- xs_raw = [centroids[party][w][0] for w in wids_sorted]
- ys_raw = [centroids[party][w][1] for w in wids_sorted]
- xs = _ema_smooth(xs_raw, smooth_alpha)
- ys = _ema_smooth(ys_raw, smooth_alpha)
- # Preserve raw (unsmoothed) values per-point so hover can show both raw and smoothed
- custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
- colour = PARTY_COLOURS.get(party, "#9E9E9E")
- fig.add_trace(
- go.Scatter(
- x=xs,
- y=ys,
- mode="lines+markers",
- name=party,
- text=wids_sorted, # full window ID for hover
- customdata=custom_raw,
- line=dict(color=colour, shape="spline", smoothing=1.3),
- marker=dict(color=colour, size=8),
- hovertemplate=(
- f"{party}
"
- "venster: %{text}
"
- "x (smoothed): %{x:.3f}
"
- "x (raw): %{customdata[0]:.3f}
"
- "y (smoothed): %{y:.3f}
"
- "y (raw): %{customdata[1]:.3f}"
- ),
- )
- )
- trace_count += 1
-
- # For trajectories, the chart spans multiple windows. Use the classifier's
- # per-window confidences aggregated (mean) to decide whether to use the
- # classifier label or fall back to the conventional short label.
- _THRESHOLD = 0.65
- x_conf_map = axis_def.get("x_label_confidence", {}) or {}
- y_conf_map = axis_def.get("y_label_confidence", {}) or {}
-
- def _mean_conf(m: dict) -> Optional[float]:
- vals = [v for v in m.values() if v is not None]
- if not vals:
- return None
- return float(sum(vals) / len(vals))
-
- x_mean = _mean_conf(x_conf_map)
- y_mean = _mean_conf(y_conf_map)
-
- x_title = choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD)
- y_title = choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD)
-
- fig.update_layout(
- title="Partij trajectories",
- xaxis_title=x_title,
- yaxis_title=y_title,
- height=600,
- legend_title_text="Partij",
- )
- _add_y_direction_annotations(fig)
- # If no traces were added to the figure, show a diagnostic message so the
- # user knows why the plot is empty.
- try:
- _last_trajectories_diagnostics.update({"trace_count": trace_count})
- except Exception:
- pass
- debug_enabled = get_debug_trajectories_enabled()
- # Add detailed diagnostics to understand why trace_count is 0
- if trace_count == 0:
- _last_trajectories_diagnostics.update(
- {
- "stage": "zero_traces",
- "positions_count": sum(len(pos) for pos in positions_by_window.values())
- if positions_by_window
- else 0,
- "party_map_count": len(party_map) if party_map else 0,
- "centroids_count": len(centroids) if centroids else 0,
- "selected_parties_count": len(selected_parties)
- if selected_parties
- else 0,
- "timestamp": datetime.now().isoformat(),
- }
- )
- # Check if there are positions but no centroids (name mismatch)
- if positions_by_window and party_map and not centroids:
- # Sample some MP names from positions
- sample_mps = []
- for window, positions in list(positions_by_window.items())[:1]:
- sample_mps = list(positions.keys())[:5]
- break
- # Check if these MPs are in party_map
- matched = sum(1 for mp in sample_mps if mp in party_map)
- _last_trajectories_diagnostics["name_match_check"] = {
- "sample_mps": sample_mps,
- "matched_in_party_map": matched,
- "sample_size": len(sample_mps),
- }
- if trace_count == 0:
- st.info("📊 **Geen trajecten getekend**")
-
- # Show diagnostic information
- with st.expander("🔍 Diagnostische informatie"):
- st.write("**Data status:**")
- st.write(
- f"- Positie vensters: {len(positions_by_window) if positions_by_window else 0}"
- )
- st.write(f"- Party mappings: {len(party_map) if party_map else 0}")
- st.write(
- f"- Geselecteerde partijen: {len(selected_parties) if selected_parties else 0}"
- )
-
- if "centroid_diagnostics" in locals():
- st.write("**Centroid berekening:**")
- st.write(
- f"- Partijen met posities: {len(centroid_diagnostics.get('parties_with_positions', []))}"
- )
- st.write(
- f"- Partijen met alleen NaN: {len(centroid_diagnostics.get('parties_all_nan', []))}"
- )
-
- st.write("\n**Mogelijke oorzaken:**")
- st.write("1. Geen SVD vectoren berekend voor de geselecteerde vensters")
- st.write("2. MP namen in posities komen niet overeen met party_map")
- st.write("3. Alle geselecteerde partijen hebben te weinig MPs (< 5)")
-
- # Add a button to run diagnostics
- if st.button("🔧 Database diagnostiek uitvoeren"):
- with st.spinner("Bezig met diagnostiek..."):
- # Import and run diagnostics
- from scripts.diagnose_trajectories_cli import (
- run as diagnose_trajectories,
- )
-
- results = diagnose_trajectories(db_path)
- st.json(results)
- else:
- # DEBUG: show trace_count and figure data size before rendering
- try:
- st.info(
- f"[DEBUG] trace_count={trace_count}, fig data count={len(fig.data)}, layout title={fig.layout.title.text if fig.layout.title else 'none'}"
- )
- except Exception:
- pass
- try:
- logging.getLogger(__name__).debug(
- "[TRAJ DEBUG] About to render plotly chart — trace_count=%d, banner=%s, fig has %d traces",
- trace_count,
- banner_text,
- len(fig.data),
- )
- st.plotly_chart(fig, use_container_width=True)
- except Exception as e:
- st.error(f"Trajectories rendering failed: {e}")
- # Always show diagnostics when rendering fails, regardless of trace_count
- if get_debug_trajectories_enabled():
- try:
- st.json(_last_trajectories_diagnostics)
- except Exception:
- st.text_area(
- "Trajectories diagnostics (JSON failed)",
- json.dumps(_last_trajectories_diagnostics, default=str),
- height=240,
- )
-
-
-# ---------------------------------------------------------------------------
-# Tab 3: Motie Zoeken
-# ---------------------------------------------------------------------------
-
-
-def build_search_tab(db_path: str, show_rejected: bool) -> None:
- st.subheader("Motie Zoeken")
-
- df = load_motions_df(db_path)
- if df.empty:
- st.warning("Geen moties beschikbaar.")
- return
+def build_browser_tab(*args, **kwargs):
+ """Build the Motie Browser tab."""
+ from analysis.tabs.browser import build_browser_tab as _impl
- if not show_rejected:
- df = df[df["title"].fillna("").str.strip() != "Verworpen."]
+ return _impl(*args, **kwargs)
- # Controls
- col1, col2, col3 = st.columns([2, 1, 1])
- with col1:
- query = st.text_input(
- "Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
- )
- with col2:
- years = sorted(df["year"].dropna().astype(int).unique().tolist())
- if years:
- year_range = st.select_slider(
- "Jaar", options=years, value=(years[0], years[-1])
- )
- else:
- year_range = (2019, 2024)
- with col3:
- min_controversy = st.slider(
- "Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
- )
-
- # Apply filters in-memory
- working = df.copy()
- working = working[
- (working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
- ]
- if min_controversy > 0:
- working = working[working["controversy_score"] >= min_controversy]
- if query:
- q = query.lower()
- mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
- working = working[mask]
-
- working = working.sort_values(by="controversy_score", ascending=False)
- st.caption(f"{len(working)} resultaten (top 50 getoond)")
-
- for _, row in working.head(50).iterrows():
- title = row.get("title") or f"Motie #{row['id']}"
- date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
- controversy = row.get("controversy_score") or 0
- with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
- cols = st.columns(3)
- cols[0].metric("Controverse", f"{controversy:.2f}")
- cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
- cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")
-
- # Voting breakdown
- _render_voting_results(row.get("voting_results"))
-
- # Link to original motion
- url = row.get("url")
- if url and str(url).startswith("http"):
- st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
-
- # Similar motions
- sim = query_similar(db_path, int(row["id"]), top_k=5)
- if not sim.empty:
- st.markdown("**Vergelijkbare moties:**")
- for _, s in sim.iterrows():
- s_date = (
- pd.to_datetime(s["date"]).strftime("%Y")
- if pd.notna(s.get("date"))
- else ""
- )
- st.markdown(
- f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
- )
- else:
- st.caption("_Nog geen vergelijkbare moties beschikbaar_")
-
-
-# ---------------------------------------------------------------------------
-# Tab 4: Motie Browser
-# ---------------------------------------------------------------------------
-
-
-def build_browser_tab(db_path: str, show_rejected: bool) -> None:
- st.subheader("Motie Browser")
-
- df = load_motions_df(db_path)
- if df.empty:
- st.warning("Geen moties beschikbaar.")
- return
-
- if not show_rejected:
- df = df[df["title"].fillna("").str.strip() != "Verworpen."]
-
- # Controls
- col1, col2, col3 = st.columns(3)
- with col1:
- years = sorted(df["year"].dropna().astype(int).unique().tolist())
- year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
- with col2:
- min_controversy_b = st.slider(
- "Min. controverse",
- min_value=0.0,
- max_value=1.0,
- value=0.0,
- step=0.05,
- key="browser_controversy",
- )
- with col3:
- sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])
-
- # Filter
- working = df.copy()
- if year_filter != "(Alle)":
- working = working[working["year"] == int(year_filter)]
- if min_controversy_b > 0:
- working = working[working["controversy_score"] >= min_controversy_b]
-
- sort_map = {
- "Datum (nieuw)": ("date", False),
- "Controverse": ("controversy_score", False),
- "Marge": ("winning_margin", True),
- }
- sort_col, sort_asc = sort_map[sort_by]
- working = working.sort_values(by=sort_col, ascending=sort_asc)
-
- # Display table
- display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
- available_display = [c for c in display_cols if c in working.columns]
- st.dataframe(
- working[available_display].reset_index(drop=True),
- use_container_width=True,
- height=350,
- )
-
- st.divider()
-
- # Detail panel
- st.markdown("**Detail weergave** — vul een motie-ID in:")
- sel_id = st.number_input(
- "Motie ID",
- min_value=int(working["id"].min()) if not working.empty else 1,
- max_value=int(working["id"].max()) if not working.empty else 99999,
- value=int(working["id"].iloc[0]) if not working.empty else 1,
- step=1,
- )
- motion_row = df[df["id"] == sel_id]
- if not motion_row.empty:
- row = motion_row.iloc[0]
- st.markdown(f"### {row.get('title') or 'Onbekend'}")
- date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
- st.caption(
- f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
- )
-
- # Link to original source
- url = row.get("url")
- if url and str(url).startswith("http"):
- st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
-
- # Voting breakdown
- st.markdown("**Stemuitslag:**")
- _render_voting_results(row.get("voting_results"))
-
- # Similar motions
- sim = query_similar(db_path, int(sel_id), top_k=10)
- if not sim.empty:
- st.markdown("**Vergelijkbare moties:**")
- st.dataframe(
- sim[["title", "score", "date", "policy_area"]],
- use_container_width=True,
- )
- else:
- st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")
-
-
-def build_svd_components_tab(db_path: str) -> None:
- """New tab: show top motions contributing to top SVD components.
-
- Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
- for components 1..10 with theme labels/explanations and a detail pane per motion.
-
- Components 1-2 use aligned PCA positions (consistent with compass).
- Components 3-10 use raw SVD scores.
- """
- st.subheader("🔬 SVD Assen — politieke polarisatiethema's")
- st.markdown(
- "Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
- "van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
- "het spanningsveld dat de as beschrijft."
- )
-
- # Scree plot: relative importance of each SVD component
- scree_importances = load_scree_data(db_path)
- if scree_importances:
- st.markdown(
- "**Scree-plot** — het relatieve gewicht van elke SVD-as. "
- "De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
- "latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau."
- )
- _render_scree_plot(scree_importances)
-
- json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
- if not os.path.exists(json_path):
- st.warning(
- f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
- )
- return
-
- try:
- with open(json_path, "r", encoding="utf-8") as fh:
- j = json.load(fh)
- except Exception as e:
- st.error(f"Failed to load SVD importance JSON: {e}")
- return
-
- window = j.get("window")
- rows = j.get("rows", [])
- if not rows:
- st.info("Geen top-moties in dataset")
- return
-
- st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")
-
- # Build mapping component -> list of motions (deduplicate by motion_id per component)
- comp_map: dict[int, list] = {}
- for r in rows:
- comp = int(r.get("component", 0))
- bucket = comp_map.setdefault(comp, [])
- existing_ids = {m.get("motion_id") for m in bucket}
- if r.get("motion_id") not in existing_ids:
- bucket.append(r)
-
- comp_options = sorted(comp_map.keys())
-
- # Build display labels for selectbox: "As 1 — Regulering vs. status-quo"
- def _comp_label(c: int) -> str:
- theme = SVD_THEMES.get(c, {})
- lbl = theme.get("label", "")
- return f"As {c} — {lbl}" if lbl else f"As {c}"
-
- comp_display = [_comp_label(c) for c in comp_options]
-
- # Load default party scores early (needed for sidebar controls)
- party_scores_default = load_party_axis_scores(db_path)
- party_mp_vectors = load_party_mp_vectors(db_path)
- bootstrap_data = (
- _cached_bootstrap_cis(party_mp_vectors) if party_mp_vectors else None
- )
-
- # Sidebar controls for window selection and minimum MPs filter
- col1, col2 = st.columns([2, 1])
-
- # Initialize view mode (will be set in col2 if render succeeds)
- view_mode = "Enkel venster"
- selected_parties_for_trajectory: list = []
-
- with col2:
- comp_sel_idx = st.selectbox(
- "Selecteer SVD-as",
- options=list(range(len(comp_options))),
- format_func=lambda i: comp_display[i],
- index=0,
- )
- comp_sel = comp_options[comp_sel_idx]
-
- # Minimum MPs filter (only relevant for components 1-2 which use party centroids)
- min_mps = st.number_input(
- "Min. Kamerleden per partij",
- min_value=1,
- max_value=20,
- value=1,
- step=1,
- help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.",
- )
-
- # View selector for party axis display
- view_mode = st.radio(
- "Weergave",
- options=["Enkel venster", "Tijdtraject"],
- index=0,
- help="Enkel venster: toont posities voor één tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.",
- )
-
- # Party multi-select for time trajectory view
- selected_parties_for_trajectory = []
- if view_mode == "Tijdtraject":
- # Get list of parties with scores
- all_parties = (
- sorted(party_scores_default.keys()) if party_scores_default else []
- )
- default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8]
- selected_parties_for_trajectory = st.multiselect(
- "Partijen om te tonen",
- options=all_parties,
- default=default_parties,
- help="Selecteer de partijen die je wilt zien in het tijdtraject.",
- )
-
- # Show theme explanation
- theme = SVD_THEMES.get(comp_sel, {})
- if theme:
- st.info(f"**{theme['label']}** — {theme['explanation']}")
-
- motions = comp_map.get(comp_sel, [])
-
- # Party axis chart
- # Default party scores already loaded earlier for sidebar controls.
- # ALL components 1-10 use raw (non-aligned) SVD vectors.
- # The compass uses Procrustes-aligned PCA — separate visualization.
- # Get available windows from svd_vectors; exclude current year (covered by current_parliament)
- import datetime as _dt
- _current_year = str(_dt.date.today().year)
- available_windows = get_uniform_dim_windows(db_path)
- year_windows = sorted(
- w for w in available_windows if w != "current_parliament" and w != _current_year
- )
- has_current = "current_parliament" in available_windows
- svd_windows = year_windows + (["current_parliament"] if has_current else [])
-
- def _svd_window_label(w: str) -> str:
- if w == "current_parliament":
- return "Huidig parliament"
- return w
-
- with col1:
- svd_window = st.selectbox(
- "Jaar",
- options=svd_windows,
- index=len(svd_windows) - 1, # default: current_parliament
- format_func=_svd_window_label,
- key=f"svd_window_{comp_sel}",
- )
-
- # Load party scores for the selected window (used for components 3-10)
- if svd_window == "current_parliament":
- party_scores = party_scores_default
- else:
- party_scores = load_party_axis_scores_for_window(db_path, svd_window)
-
- # Compute MP counts from party_mp_vectors
- party_mp_counts = (
- {p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {}
- )
-
- # For components 1-2, use aligned positions from load_positions (same as compass)
- # for consistency. For components 3-10, use raw SVD scores.
- def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]:
- """Get party (x, y) coordinates from aligned PCA positions for a window."""
- positions_by_window, _ = load_positions(db_path, "annual")
- window_pos = positions_by_window.get(window, {})
- if not window_pos:
- return {}
-
- # Load party map to convert MP names to parties
- _party_map = load_party_map(db_path)
-
- # Aggregate MP positions to party centroids
- party_coords: Dict[str, List[Tuple[float, float]]] = {}
- for mp_name, (x, y) in window_pos.items():
- party = _party_map.get(
- mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
- )
- if party:
- party_coords.setdefault(party, []).append((x, y))
-
- # Compute mean position per party
- return {
- party: (
- float(np.mean([c[0] for c in coords])),
- float(np.mean([c[1] for c in coords])),
- )
- for party, coords in party_coords.items()
- if coords
- }
-
- # Load aligned scores for ALL components 1-10 using PCA on aligned vectors.
- # This ensures consistency between compass and SVD components tab.
- def _get_aligned_party_scores(window: str) -> Dict[str, np.ndarray]:
- """Get party scores for all N components from aligned PCA positions."""
- active_mps = (
- load_active_mps(db_path) if window == "current_parliament" else None
- )
- return get_aligned_party_scores(db_path, window, active_mps)
-
- # Extract 1D scores for this component using Procrustes-aligned PCA scores.
- # All 10 components use _get_aligned_party_scores (compute_nd_axes with annual-only
- # windows). This is mathematically identical to the compass x/y positions for
- # components 1 and 2, and consistently uses the same aligned data for 3-10.
- party_1d_coords: dict = {}
- aligned_all_scores = _get_aligned_party_scores(svd_window)
- for party, all_scores in aligned_all_scores.items():
- idx = comp_sel - 1 # 0-indexed
- if idx < len(all_scores):
- party_1d_coords[party] = (float(all_scores[idx]),)
-
- # Auto-compute flip directions for ALL components 1-10 based on aligned party centroids.
- # Since we now use aligned PCA scores for all components, compute flip directly from
- # aligned scores to ensure canonical right parties (PVV, FVD, JA21, SGP) appear on RIGHT.
- computed_flips: Dict[int, bool] = {}
- try:
- from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT
-
- # Compute flip for each component based on aligned party scores
- for comp_idx in range(10):
- right_scores = []
- left_scores = []
- for party, scores in aligned_all_scores.items():
- if party in CANONICAL_RIGHT:
- right_scores.append(scores[comp_idx])
- elif party in CANONICAL_LEFT:
- left_scores.append(scores[comp_idx])
-
- if right_scores and left_scores:
- right_avg = np.mean(right_scores)
- left_avg = np.mean(left_scores)
- # Flip if right parties score lower than left (we want RIGHT > LEFT)
- computed_flips[comp_idx + 1] = right_avg < left_avg
- else:
- computed_flips[comp_idx + 1] = False
- except Exception:
- # If flip computation fails, keep existing flip values from SVD_THEMES
- pass
-
- # Build theme override with computed flip for this component
- # (avoids mutating SVD_THEMES which persists stale values across Streamlit reruns)
- theme_with_flip = {
- **theme,
- "flip": computed_flips.get(comp_sel, theme.get("flip", False)),
- }
-
- # Filter parties by minimum MP count
- if min_mps > 1 and party_mp_counts:
- valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps}
- party_1d_coords = {
- p: coords for p, coords in party_1d_coords.items() if p in valid_parties
- }
-
- # Render party axis chart (single window or time trajectory)
- if view_mode == "Tijdtraject" and selected_parties_for_trajectory:
- # Load party scores for all windows and render time trajectory
- available_windows = get_uniform_dim_windows(db_path)
- year_windows = sorted(
- w
- for w in available_windows
- if w != "current_parliament" and w != _current_year
- )
- has_current = "current_parliament" in available_windows
- all_windows = year_windows + (["current_parliament"] if has_current else [])
-
- # Use aligned PCA scores for all windows (consistent with single-window view)
- party_scores_by_window = _get_aligned_trajectory_scores(db_path, all_windows)
-
- _render_svd_time_trajectory(
- party_scores_by_window,
- comp_sel,
- theme_with_flip,
- selected_parties_for_trajectory,
- )
- else:
- # Single-window view: render 1D party axis chart
- _render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip)
-
- # Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
- motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
- motion_details: Dict[int, tuple] = {}
- if motion_ids:
- # Defensively convert motion_ids to integers, skipping invalid values
- ids_int: List[int] = []
- for mid in motion_ids:
- try:
- ids_int.append(int(mid))
- except Exception:
- logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
-
- # If no valid ids remain, skip the DB query
- if ids_int:
- con = None
- try:
- placeholders = ", ".join("?" for _ in ids_int)
- con = duckdb.connect(database=db_path, read_only=True)
- db_rows = con.execute(
- f"SELECT id, title, date, policy_area, url, body_text, voting_results "
- f"FROM motions WHERE id IN ({placeholders})",
- ids_int,
- ).fetchall()
- motion_details = {r[0]: r for r in db_rows}
- except Exception:
- logger.exception("Failed to batch-fetch motion details")
- finally:
- if con:
- con.close()
-
- # Split motions by pole sign
- pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
- neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
-
- flip = theme_with_flip.get("flip", False) if theme_with_flip else False
- pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else ""
- neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else ""
-
- # Derive left/right labels from flip direction
- # flip=True: positive_pole on left, negative_pole on right
- # flip=False: negative_pole on left, positive_pole on right
- if flip:
- left_pole, right_pole = pos_pole, neg_pole
- left_motions, right_motions = pos_motions, neg_motions
- left_arrow, right_arrow = "▲", "▼"
- else:
- left_pole, right_pole = neg_pole, pos_pole
- left_motions, right_motions = neg_motions, pos_motions
- left_arrow, right_arrow = "▼", "▲"
-
- lcol, rcol = st.columns(2)
-
- with lcol:
- st.markdown(f"**← {left_pole}**")
- for m in left_motions:
- mid = m.get("motion_id")
- raw_title = m.get("title") or f"Motie #{mid}"
- with st.expander(f"{left_arrow} {raw_title}"):
- row = motion_details.get(int(mid)) if mid is not None else None
- if row:
- try:
- date_str = str(row[2])[:10]
- except Exception:
- date_str = "?"
- st.caption(f"📅 {date_str} | {row[3] or '—'}")
- if row[4] and str(row[4]).startswith("http"):
- st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
- if row[5]:
- with st.expander("Toon volledige tekst"):
- st.write(row[5])
- _render_voting_results(row[6])
- else:
- st.caption("_Geen metadata beschikbaar_")
-
- with rcol:
- st.markdown(f"**{right_pole} →**")
- for m in right_motions:
- mid = m.get("motion_id")
- raw_title = m.get("title") or f"Motie #{mid}"
- with st.expander(f"{right_arrow} {raw_title}"):
- row = motion_details.get(int(mid)) if mid is not None else None
- if row:
- try:
- date_str = str(row[2])[:10]
- except Exception:
- date_str = "?"
- st.caption(f"📅 {date_str} | {row[3] or '—'}")
- if row[4] and str(row[4]).startswith("http"):
- st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
- if row[5]:
- with st.expander("Toon volledige tekst"):
- st.write(row[5])
- _render_voting_results(row[6])
- else:
- st.caption("_Geen metadata beschikbaar_")
-
-
-def build_mp_quiz_tab(db_path: str) -> None:
- """Interactive quiz: narrow MPs by asking motion vote questions.
-
- Minimal viable flow:
- - seed with top-N controversial motions (SEED_MOTIONS)
- - present one question at a time, store answers in st.session_state['mp_quiz_votes']
- - after each answer call MotionDatabase.match_mps_for_votes to rank MPs
- - if multiple candidates remain, call choose_discriminating_motions to pick next question
- - stop when unique MP found or no discriminating motions remain
- """
- st.subheader("🧑⚖️ Welk tweede kamerlid ben jij?")
- st.markdown(
- "Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt."
- )
-
- SEED_MOTIONS = 8
- MAX_QUESTIONS = 20
-
- # initialize session state
- if "mp_quiz_votes" not in st.session_state:
- st.session_state["mp_quiz_votes"] = {}
- if "mp_quiz_asked" not in st.session_state:
- st.session_state["mp_quiz_asked"] = []
+def build_svd_components_tab(*args, **kwargs):
+ """Build the SVD Components tab."""
+ from analysis.tabs.components import build_svd_components_tab as _impl
- from database import MotionDatabase as _MotionDatabase
+ return _impl(*args, **kwargs)
- db_inst = _MotionDatabase(db_path)
- df = load_motions_df(db_path)
- if df.empty:
- st.warning("Geen moties beschikbaar om de quiz te starten.")
- return
+def build_mp_quiz_tab(*args, **kwargs):
+ """Build the MP Quiz tab."""
+ from analysis.tabs.quiz import build_mp_quiz_tab as _impl
- # seed from motions that actually have individual MP vote records
- seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
- if not seed_ids:
- st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
- return
-
- # Determine next motion to ask
- def _next_motion_id():
- # prefer seed motions not yet asked
- for mid in seed_ids:
- if str(mid) not in st.session_state["mp_quiz_votes"]:
- return mid
- # otherwise ask discriminating motion based on remaining candidate MPs
- # compute current candidate set
-
- try:
- user_votes = {
- int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
- }
- ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
- except Exception:
- ranked = []
-
- candidates = [r["mp_name"] for r in ranked]
- excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()]
- if not candidates:
- return None
- try:
- next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
- return next_ids[0] if next_ids else None
- except Exception:
- return None
-
- # show progress and controls
- col1, col2 = st.columns([3, 1])
- with col2:
- st.caption(
- f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}"
- )
- if st.button("Reset quiz"):
- st.session_state["mp_quiz_votes"] = {}
- st.session_state["mp_quiz_asked"] = []
- st.rerun()
-
- # main question loop (single question per render, wrapped in a form to avoid
- # premature reruns when the user changes the radio selection)
- next_mid = _next_motion_id()
- if next_mid is None:
- st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
- else:
- motion_rows = df[df["id"] == next_mid]
- if motion_rows.empty:
- # motion has votes but isn't in the motions DataFrame — skip it
- st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
- st.rerun()
- return
- motion_row = motion_rows.iloc[0]
- st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
- if motion_row.get("layman_explanation"):
- st.info(motion_row.get("layman_explanation"))
-
- with st.form(key=f"mp_quiz_form_{next_mid}"):
- choice = st.radio(
- "Wat zou jij stemmen?",
- options=["Voor", "Tegen", "Onthouden", "Geen stem"],
- index=3,
- )
- submitted = st.form_submit_button("Beantwoord en verder")
-
- if submitted:
- st.session_state["mp_quiz_votes"][str(next_mid)] = choice
- st.session_state["mp_quiz_asked"].append(next_mid)
- st.rerun()
-
- # display current ranking
- try:
- user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
- ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
- except Exception:
- ranking = []
-
- if ranking:
- st.markdown("**Top kandidaten**")
- # show as table
- import pandas as pd
-
- rdf = pd.DataFrame(ranking)
- st.dataframe(rdf.head(10), use_container_width=True)
-
- # check uniqueness
- top_pct = ranking[0]["agreement_pct"] if ranking else 0.0
- top_matches = [r for r in ranking if r["agreement_pct"] == top_pct]
- if len(top_matches) == 1 and top_matches[0]["overlap"] > 0:
- st.success(
- f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})"
- )
- else:
- if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS:
- st.warning(
- "Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten."
- )
- else:
- st.info("Nog geen unieke match — vraag meer om verder te verfijnen.")
- else:
- st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.")
-
-
-# ---------------------------------------------------------------------------
-# App entry
-# ---------------------------------------------------------------------------
+ return _impl(*args, **kwargs)
def run_app() -> None:
@@ -3022,13 +473,11 @@ def run_app() -> None:
)
st.title("🏛️ Parlement Explorer")
- # Sidebar
st.sidebar.title("Instellingen")
db_path = "data/motions.db"
window_size = "annual"
show_rejected = st.sidebar.checkbox("Toon verworpen moties", value=False)
- # About section
with st.sidebar.expander("ℹ️ Over", expanded=False):
try:
if _DUCKDB_AVAILABLE:
@@ -3053,8 +502,6 @@ def run_app() -> None:
except Exception as e:
st.warning(f"DB niet bereikbaar: {e}")
- # Main tabs
- # Streamlit tabs compatibility: some older/newer Streamlit builds expose different APIs.
tab_labels = [
"🧭 Politiek Kompas",
"📈 Trajectories",
@@ -3076,7 +523,6 @@ def run_app() -> None:
with tab5:
build_svd_components_tab(db_path)
else:
- # Fallback for environments where `st.tabs` is not available: use a radio selector
selection = st.radio("Tab", tab_labels)
if selection == tab_labels[0]:
build_compass_tab(db_path, window_size)
diff --git a/scheduler.py b/scheduler.py
new file mode 100644
index 0000000..0024654
--- /dev/null
+++ b/scheduler.py
@@ -0,0 +1,170 @@
+"""Automated pipeline scheduling.
+
+Runs the parliamentary embedding pipeline and motion summarization
+on a configurable schedule using the `schedule` library.
+
+Usage:
+ uv run python scheduler.py # start scheduler loop
+ uv run python scheduler.py --once # run once and exit
+ uv run python scheduler.py --pipeline-time 03:00 --summarizer-every 6
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import signal
+import sys
+import time
+from typing import Callable
+
+import schedule
+
+from config import config
+import argparse
+
+from pipeline.run_pipeline import run as run_pipeline
+from summarizer import summarizer
+
+_logger = logging.getLogger(__name__)
+
+
+class PipelineScheduler:
+ """Schedules and runs pipeline jobs."""
+
+ def __init__(self, db_path: str = "data/motions.db"):
+ self.db_path = db_path
+ self._running = False
+
+ def run_pipeline(self) -> int:
+ """Run the full embedding pipeline.
+
+ Returns the exit code from the pipeline run.
+ """
+ _logger.info("Starting scheduled pipeline run")
+ try:
+ args = argparse.Namespace(
+ db_path=self.db_path,
+ window_size="annual",
+ start_date=None,
+ end_date=None,
+ svd_k=50,
+ svd_workers=None,
+ text_model=None,
+ text_batch_size=200,
+ skip_metadata=False,
+ skip_extract=False,
+ skip_svd=False,
+ skip_text=False,
+ skip_fusion=False,
+ dry_run=False,
+ )
+ result = run_pipeline(args)
+ _logger.info("Pipeline run completed with code %s", result)
+ return result if isinstance(result, int) else 0
+ except Exception:
+ _logger.exception("Pipeline run failed")
+ return 1
+
+ def run_summarizer(self) -> None:
+ """Run motion summarization for missing explanations."""
+ _logger.info("Starting scheduled summarizer run")
+ try:
+ summarizer.update_motion_summaries()
+ _logger.info("Summarizer run completed")
+ except Exception:
+ _logger.exception("Summarizer run failed")
+
+ def schedule_daily(self, time_str: str = "02:00") -> None:
+ """Schedule the pipeline to run daily at *time_str*."""
+ _logger.info("Scheduling daily pipeline run at %s", time_str)
+ schedule.every().day.at(time_str).do(self.run_pipeline)
+
+ def schedule_summarizer(self, every_n_hours: int = 6) -> None:
+ """Schedule the summarizer to run every *every_n_hours* hours."""
+ _logger.info("Scheduling summarizer every %s hours", every_n_hours)
+ schedule.every(every_n_hours).hours.do(self.run_summarizer)
+
+ def _signal_handler(self, signum, frame) -> None:
+ """Handle shutdown signals gracefully."""
+ _logger.info("Received signal %s, shutting down", signum)
+ self.stop()
+
+ def start(self) -> None:
+ """Start the scheduler loop.
+
+ Blocks until :meth:`stop` is called or a signal is received.
+ """
+ self._running = True
+
+ # Register signal handlers for graceful shutdown
+ signal.signal(signal.SIGTERM, self._signal_handler)
+ signal.signal(signal.SIGINT, self._signal_handler)
+
+ _logger.info("Scheduler started")
+ while self._running:
+ schedule.run_pending()
+ time.sleep(1)
+ _logger.info("Scheduler stopped")
+
+ def stop(self) -> None:
+ """Stop the scheduler loop."""
+ self._running = False
+
+
+def build_parser() -> argparse.ArgumentParser:
+ parser = argparse.ArgumentParser(
+ description="Automated pipeline scheduler",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ parser.add_argument(
+ "--db-path",
+ default="data/motions.db",
+ help="Path to the DuckDB file",
+ )
+ parser.add_argument(
+ "--pipeline-time",
+ default="02:00",
+ help="Daily pipeline run time (HH:MM)",
+ )
+ parser.add_argument(
+ "--summarizer-every",
+ type=int,
+ default=6,
+ help="Run summarizer every N hours",
+ )
+ parser.add_argument(
+ "--once",
+ action="store_true",
+ help="Run pipeline + summarizer once and exit (no scheduling loop)",
+ )
+ return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+ parser = build_parser()
+ args = parser.parse_args(argv)
+
+ logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s %(levelname)s %(name)s %(message)s",
+ )
+
+ sched = PipelineScheduler(db_path=args.db_path)
+
+ if args.once:
+ _logger.info("Running in single-shot mode")
+ pipeline_rc = sched.run_pipeline()
+ sched.run_summarizer()
+ return pipeline_rc
+
+ sched.schedule_daily(args.pipeline_time)
+ if args.summarizer_every > 0:
+ sched.schedule_summarizer(args.summarizer_every)
+
+ sched.start()
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/tests/test_explorer_decomposition.py b/tests/test_explorer_decomposition.py
new file mode 100644
index 0000000..c8574b5
--- /dev/null
+++ b/tests/test_explorer_decomposition.py
@@ -0,0 +1,95 @@
+"""Tests for explorer.py decomposition (P3-001).
+
+Acceptance criteria:
+- explorer.py must be under 1500 lines.
+- Tab modules must define their build functions locally (not re-export from explorer).
+- No circular imports between explorer.py and analysis.tabs.
+"""
+
+import ast
+import inspect
+import pathlib
+
+
+class TestExplorerDecomposition:
+ """RED test: explorer.py must be under 1500 lines."""
+
+ def test_explorer_line_count_under_1500(self):
+ path = pathlib.Path("explorer.py")
+ lines = path.read_text(encoding="utf-8").splitlines()
+ assert len(lines) < 1500, (
+ f"explorer.py has {len(lines)} lines; target is < 1500. "
+ f"Extract tab functions and rendering helpers into analysis/tabs/."
+ )
+
+ def test_tab_modules_define_functions_locally(self):
+ """Each tab module must define its build_*_tab without delegating to explorer."""
+ tabs = [
+ ("analysis/tabs/compass.py", "build_compass_tab"),
+ ("analysis/tabs/trajectories.py", "build_trajectories_tab"),
+ ("analysis/tabs/search.py", "build_search_tab"),
+ ("analysis/tabs/browser.py", "build_browser_tab"),
+ ("analysis/tabs/components.py", "build_svd_components_tab"),
+ ("analysis/tabs/quiz.py", "build_mp_quiz_tab"),
+ ]
+ for module_path, func_name in tabs:
+ source = pathlib.Path(module_path).read_text(encoding="utf-8")
+ tree = ast.parse(source)
+ func_def = None
+ for node in ast.walk(tree):
+ if isinstance(node, ast.FunctionDef) and node.name == func_name:
+ func_def = node
+ break
+ assert func_def is not None, (
+ f"{module_path} must define {func_name}"
+ )
+ # Ensure it's not a one-liner stub that imports from explorer
+ body = func_def.body
+ assert len(body) > 3, (
+ f"{module_path}.{func_name} looks like a stub ({len(body)} lines). "
+ f"Extract the real implementation from explorer.py."
+ )
+
+ def test_rendering_helpers_extracted(self):
+ """Rendering helpers should not live in explorer.py."""
+ helpers = [
+ "_render_scree_plot",
+ "_build_party_axis_figure",
+ "_render_party_axis_chart",
+ "_render_party_axis_chart_1d",
+ "_render_svd_time_trajectory",
+ "_render_voting_results",
+ "_add_y_direction_annotations",
+ ]
+ source = pathlib.Path("explorer.py").read_text(encoding="utf-8")
+ tree = ast.parse(source)
+ defined = {node.name for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)}
+ for helper in helpers:
+ assert helper not in defined, (
+ f"{helper} should be extracted from explorer.py "
+ f"into analysis/tabs/_rendering.py"
+ )
+
+ def test_no_circular_import_tabs_to_explorer(self):
+ """Tab modules must not import from explorer."""
+ tab_modules = [
+ "analysis/tabs/compass.py",
+ "analysis/tabs/trajectories.py",
+ "analysis/tabs/search.py",
+ "analysis/tabs/browser.py",
+ "analysis/tabs/components.py",
+ "analysis/tabs/quiz.py",
+ "analysis/tabs/_rendering.py",
+ ]
+ for module_path in tab_modules:
+ if not pathlib.Path(module_path).exists():
+ continue
+ source = pathlib.Path(module_path).read_text(encoding="utf-8")
+ assert "from explorer import" not in source, (
+ f"{module_path} imports from explorer.py — "
+ f"move shared helpers to explorer_data.py or _rendering.py instead"
+ )
+ assert "import explorer" not in source, (
+ f"{module_path} imports explorer module — "
+ f"move shared helpers to explorer_data.py or _rendering.py instead"
+ )
diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
new file mode 100644
index 0000000..3c16787
--- /dev/null
+++ b/tests/test_scheduler.py
@@ -0,0 +1,159 @@
+"""Tests for scheduler.py — automated pipeline scheduling.
+
+TDD: write failing test, implement, refactor.
+"""
+
+from __future__ import annotations
+
+import signal
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+class TestPipelineSchedulerInit:
+ def test_default_db_path(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler()
+ assert sched.db_path == "data/motions.db"
+ assert not sched._running
+
+ def test_custom_db_path(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler(db_path="/tmp/test.db")
+ assert sched.db_path == "/tmp/test.db"
+
+
+class TestPipelineSchedulerRunPipeline:
+ def test_calls_pipeline_run_with_db_path(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler(db_path="/tmp/test.db")
+ with patch("scheduler.run_pipeline") as mock_run:
+ mock_run.return_value = 0
+ sched.run_pipeline()
+ mock_run.assert_called_once()
+ # Verify args contain db_path via Namespace
+ args = mock_run.call_args[0][0]
+ assert args.db_path == "/tmp/test.db"
+
+ def test_logs_error_on_pipeline_failure(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler()
+ with patch("scheduler.run_pipeline") as mock_run:
+ mock_run.side_effect = RuntimeError("pipeline failed")
+ with patch("scheduler._logger") as mock_logger:
+ result = sched.run_pipeline()
+ assert result == 1
+ mock_logger.exception.assert_called_once()
+
+
+class TestPipelineSchedulerRunSummarizer:
+ def test_calls_summarizer_update(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler()
+ with patch("scheduler.summarizer") as mock_summarizer:
+ sched.run_summarizer()
+ mock_summarizer.update_motion_summaries.assert_called_once()
+
+ def test_logs_error_on_summarizer_failure(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler()
+ with patch("scheduler.summarizer") as mock_summarizer:
+ mock_summarizer.update_motion_summaries.side_effect = RuntimeError(
+ "summarizer failed"
+ )
+ with patch("scheduler._logger") as mock_logger:
+ sched.run_summarizer()
+ mock_logger.exception.assert_called_once()
+
+
+class TestPipelineSchedulerSchedule:
+ def test_schedule_daily_adds_job(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler()
+ with patch("scheduler.schedule") as mock_schedule:
+ mock_job = MagicMock()
+ mock_schedule.every.return_value.day.at.return_value.do = mock_job
+ sched.schedule_daily("02:00")
+ mock_schedule.every.assert_called_once()
+
+ def test_schedule_summarizer_adds_job(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler()
+ with patch("scheduler.schedule") as mock_schedule:
+ mock_job = MagicMock()
+ mock_schedule.every.return_value.hour.do = mock_job
+ sched.schedule_summarizer(every_n_hours=6)
+ mock_schedule.every.assert_called_once()
+
+
+class TestPipelineSchedulerLoop:
+ def test_start_runs_pending_jobs(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler()
+ call_count = 0
+
+ def _stop_after_first(*args, **kwargs):
+ nonlocal call_count
+ call_count += 1
+ if call_count >= 3:
+ sched.stop()
+
+ with patch("scheduler.schedule.run_pending") as mock_run_pending:
+ with patch("scheduler.time.sleep", side_effect=_stop_after_first):
+ with patch("scheduler.signal.signal"):
+ sched.start()
+ assert mock_run_pending.called
+ assert not sched._running
+
+ def test_stop_sets_running_false(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler()
+ sched._running = True
+ sched.stop()
+ assert not sched._running
+
+ def test_signal_handler_stops_scheduler(self):
+ from scheduler import PipelineScheduler
+
+ sched = PipelineScheduler()
+ sched._running = True
+ with patch.object(sched, "stop") as mock_stop:
+ sched._signal_handler(signal.SIGINT, None)
+ mock_stop.assert_called_once()
+
+
+class TestSchedulerCLI:
+ def test_main_parses_args(self):
+ from scheduler import main
+
+ with patch("scheduler.PipelineScheduler") as mock_sched_class:
+ mock_sched = MagicMock()
+ mock_sched_class.return_value = mock_sched
+ rc = main(["--pipeline-time", "03:00"])
+ assert rc == 0
+ mock_sched_class.assert_called_once_with(db_path="data/motions.db")
+ mock_sched.schedule_daily.assert_called_once_with("03:00")
+ mock_sched.start.assert_called_once()
+
+ def test_main_custom_db_path(self):
+ from scheduler import main
+
+ with patch("scheduler.PipelineScheduler") as mock_sched_class:
+ mock_sched = MagicMock()
+ mock_sched.run_pipeline.return_value = 0
+ mock_sched_class.return_value = mock_sched
+ rc = main(["--db-path", "/tmp/test.db", "--once"])
+ assert rc == 0
+ mock_sched_class.assert_called_once_with(db_path="/tmp/test.db")
+ mock_sched.run_pipeline.assert_called_once()