refactor: decompose explorer.py into analysis/tabs/ and add scheduler

- Extract 6 tab functions from explorer.py (3097 → 543 lines)
- Create analysis/tabs/_rendering.py with shared plotly helpers
- Move data logic to analysis/explorer_data.py
- Add lazy-import wrappers in explorer.py for backward compat
- Add scheduler.py with PipelineScheduler for daily pipeline runs
- Add test_explorer_decomposition.py (5 tests, all pass)
- Add test_scheduler.py (13 tests, all pass)
- Full test suite: 222 passed, 2 skipped
main
Sven Geboers 4 weeks ago
parent 203ae178ca
commit 3bdb43f162
  1. 140
      analysis/explorer_data.py
  2. 796
      analysis/tabs/_rendering.py
  3. 99
      analysis/tabs/browser.py
  4. 204
      analysis/tabs/compass.py
  5. 376
      analysis/tabs/components.py
  6. 134
      analysis/tabs/quiz.py
  7. 88
      analysis/tabs/search.py
  8. 776
      analysis/tabs/trajectories.py
  9. 2656
      explorer.py
  10. 170
      scheduler.py
  11. 95
      tests/test_explorer_decomposition.py
  12. 159
      tests/test_scheduler.py

@ -23,6 +23,7 @@ from analysis.config import CURRENT_PARLIAMENT_PARTIES, _PARTY_NORMALIZE
__all__ = [ __all__ = [
"get_available_windows", "get_available_windows",
"get_uniform_dim_windows", "get_uniform_dim_windows",
"load_positions",
"load_party_map", "load_party_map",
"load_active_mps", "load_active_mps",
"load_mp_vectors_by_window", "load_mp_vectors_by_window",
@ -37,6 +38,9 @@ __all__ = [
"load_motions_df", "load_motions_df",
"query_similar", "query_similar",
"compute_party_axis_scores", "compute_party_axis_scores",
"get_aligned_party_scores",
"compute_party_discipline",
"_get_aligned_trajectory_scores",
] ]
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -567,3 +571,139 @@ def compute_party_axis_scores(
except Exception: except Exception:
logger.exception("Failed to compute party axis scores") logger.exception("Failed to compute party axis scores")
return {} return {}
def load_positions(
db_path: str, window_size: str = "annual"
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
"""Compute 2D positions per window using PCA on aligned SVD vectors.
Returns:
positions_by_window: {window_id: {entity_name: (x, y)}}
axis_def: dict with x_axis, y_axis, method keys
"""
from analysis.political_axis import compute_2d_axes
all_available = get_uniform_dim_windows(db_path)
if not all_available:
return {}, {}
positions_by_window, axis_def = compute_2d_axes(
db_path,
window_ids=all_available,
method="pca",
pca_residual=True,
normalize_vectors=True,
)
try:
from analysis.axis_classifier import classify_axes
axis_def = classify_axes(positions_by_window, axis_def, db_path)
except Exception:
logger.exception("classify_axes failed; using generic axis labels")
if window_size == "annual":
annual_keys = set(w for w in all_available if "-Q" not in w)
positions_by_window = {
w: v for w, v in positions_by_window.items() if w in annual_keys
}
return positions_by_window, axis_def
def get_aligned_party_scores(
db_path: str, window: str, active_mps: set | None = None
) -> Dict[str, np.ndarray]:
"""Get party scores for all N components from aligned PCA positions.
For current_parliament, pass active_mps to filter to only seated MPs
(matching the compass behaviour). Historical windows include all MPs.
"""
from analysis.political_axis import compute_nd_axes
annual_windows = get_uniform_dim_windows(db_path)
scores_by_window, _ = compute_nd_axes(
db_path, window_ids=annual_windows, n_components=10
)
window_scores = scores_by_window.get(window, {})
if not window_scores:
return {}
if window == "current_parliament" and active_mps is not None:
window_scores = {mp: sc for mp, sc in window_scores.items() if mp in active_mps}
_party_map = load_party_map(db_path)
n_comps = 10
party_scores_agg: Dict[str, List[np.ndarray]] = {}
for mp_name, scores in window_scores.items():
party = _party_map.get(
mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
)
if party:
party_scores_agg.setdefault(party, []).append(scores[:n_comps])
return {
party: np.mean(np.vstack(score_list), axis=0)
for party, score_list in party_scores_agg.items()
if score_list
}
def compute_party_discipline(
db_path: str,
start_date: str,
end_date: str,
) -> pd.DataFrame:
"""Compute per-party voting discipline (Rice index) for roll-call votes in a date range.
Only individual MP vote rows are used (mp_name LIKE '%,%').
Returns a DataFrame with columns [party, n_motions, discipline] sorted by discipline ascending.
Returns an empty DataFrame if fewer than 1 qualifying motion exists or on any DB error.
"""
from analysis import trajectory
return trajectory.compute_party_discipline(db_path, start_date, end_date)
def _get_aligned_trajectory_scores(
db_path: str, windows: List[str], n_components: int = 10
) -> Dict[str, Dict[str, List[float]]]:
"""Get aligned PCA scores for all windows as {window: {party: [scores per component]}}.
Uses compute_nd_axes to get PCA-projected, flip-corrected scores across all windows,
ensuring consistency with the single-window SVD components view.
"""
from analysis.political_axis import compute_nd_axes
scores_by_window, _ = compute_nd_axes(
db_path, window_ids=windows, n_components=n_components
)
if not scores_by_window:
return {}
party_map = load_party_map(db_path)
result: Dict[str, Dict[str, List[float]]] = {}
for window in windows:
window_scores = scores_by_window.get(window, {})
if not window_scores:
continue
party_vecs: Dict[str, List[np.ndarray]] = {}
for mp_name, scores in window_scores.items():
party = party_map.get(
mp_name, party_map.get(mp_name.split("(")[0].strip(), None)
)
if party:
party_vecs.setdefault(party, []).append(scores[:n_components])
result[window] = {
party: np.mean(np.vstack(score_list), axis=0).tolist()
for party, score_list in party_vecs.items()
if score_list
}
return result

@ -0,0 +1,796 @@
"""Rendering helpers for explorer tabs.
This module contains all Plotly/Streamlit rendering functions extracted from
explorer.py. It is import-safe: plotly and streamlit are optional.
"""
from __future__ import annotations
import json
import logging
from typing import Dict, List, Optional, Tuple
try:
import plotly.express as px
import plotly.graph_objects as go
except Exception:
px = None
import types
class _DummyTrace:
def __init__(self, **kwargs):
self.name = kwargs.get("name")
self.x = kwargs.get("x")
self.y = kwargs.get("y")
self.text = kwargs.get("text")
self.customdata = kwargs.get("customdata")
class _DummyFigure:
def __init__(self):
self.data = []
def add_trace(self, trace):
if isinstance(trace, _DummyTrace):
self.data.append(trace)
else:
try:
name = getattr(trace, "name", None)
x = getattr(trace, "x", None)
y = getattr(trace, "y", None)
text = getattr(trace, "text", None)
customdata = getattr(trace, "customdata", None)
except Exception:
name = trace.get("name") if hasattr(trace, "get") else None
x = trace.get("x") if hasattr(trace, "get") else None
y = trace.get("y") if hasattr(trace, "get") else None
text = trace.get("text") if hasattr(trace, "get") else None
customdata = (
trace.get("customdata") if hasattr(trace, "get") else None
)
self.data.append(
_DummyTrace(name=name, x=x, y=y, text=text, customdata=customdata)
)
def add_annotation(self, *args, **kwargs):
return None
def update_layout(self, **kwargs):
return None
def update_traces(self, **kwargs):
return None
def add_hline(self, **kwargs):
return None
go = types.SimpleNamespace(
Figure=_DummyFigure,
Scatter=lambda **kwargs: _DummyTrace(**kwargs),
Bar=lambda **kwargs: _DummyTrace(**kwargs),
)
try:
import streamlit as st
except Exception:
class _DummySt:
def cache_data(self, *args, **kwargs):
def _decorator(func):
return func
return _decorator
def markdown(self, *args, **kwargs):
return None
def subheader(self, *args, **kwargs):
return None
def plotly_chart(self, *args, **kwargs):
return None
def caption(self, *args, **kwargs):
return None
def text_area(self, *args, **kwargs):
return None
def json(self, *args, **kwargs):
return None
def checkbox(self, *args, **kwargs):
return kwargs.get("value", False)
def warning(self, *args, **kwargs):
return None
def info(self, *args, **kwargs):
return None
def error(self, *args, **kwargs):
return None
def success(self, *args, **kwargs):
return None
def selectbox(self, *args, **kwargs):
opts = (
kwargs.get("options")
if kwargs.get("options") is not None
else (args[1] if len(args) > 1 else [])
)
return opts[0] if opts else None
def multiselect(self, *args, **kwargs):
opts = (
kwargs.get("options")
if kwargs.get("options") is not None
else (args[1] if len(args) > 1 else [])
)
default = kwargs.get("default")
if default is not None:
return default
return opts[:6] if opts else []
def number_input(self, *args, **kwargs):
return kwargs.get("value") if "value" in kwargs else 1
def slider(self, *args, **kwargs):
return kwargs.get("value") if "value" in kwargs else 0.35
def select_slider(self, *args, **kwargs):
return kwargs.get("value") if "value" in kwargs else (None, None)
def expander(self, *args, **kwargs):
class _Ctx:
def __enter__(self_inner):
return self_inner
def __exit__(self_inner, exc_type, exc, tb):
return False
return _Ctx()
def columns(self, *args, **kwargs):
class _Col:
def markdown(self, *a, **k):
return None
def metric(self, *a, **k):
return None
def dataframe(self, *a, **k):
return None
def write(self, *a, **k):
return None
def text_input(self, *a, **k):
return None
n = len(args[0]) if args else 1
return tuple(_Col() for _ in range(n))
def form(self, *args, **kwargs):
class _Ctx:
def __enter__(self_inner):
return self_inner
def __exit__(self_inner, exc_type, exc, tb):
return False
return _Ctx()
def form_submit_button(self, *args, **kwargs):
return False
def button(self, *args, **kwargs):
return False
def rerun(self, *args, **kwargs):
return None
def divider(self, *args, **kwargs):
return None
def spinner(self, *args, **kwargs):
class _Ctx:
def __enter__(self_inner):
return self_inner
def __exit__(self_inner, exc_type, exc, tb):
return False
return _Ctx()
def write(self, *args, **kwargs):
return None
def dataframe(self, *args, **kwargs):
return None
def set_page_config(self, *args, **kwargs):
return None
def title(self, *args, **kwargs):
return None
def sidebar(self, *args, **kwargs):
return self
def radio(self, *args, **kwargs):
return kwargs.get("value") if "value" in kwargs else None
def text_input(self, *args, **kwargs):
return kwargs.get("value", "")
def tabs(self, *args, **kwargs):
n = len(args[0]) if args else 1
return [self for _ in range(n)]
@property
def session_state(self):
if not hasattr(self, "_session_state"):
self._session_state = {}
return self._session_state
st = _DummySt()
from analysis.config import PARTY_COLOURS
logger = logging.getLogger(__name__)
def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
"""Render a scree plot showing relative SVD component importance.
Highlighted bars for the top-2 components (used in the compass); muted bars
for the rest. A cumulative-variance dashed line on the same y-axis helps
spot the elbow. A 50 % cumulative threshold line is drawn for reference.
Args:
importances: List of importance values sorted descending (from load_scree_data).
n_show: How many components to display (default: first 15).
"""
if not importances:
return
data = list(importances[:n_show])
ranks = list(range(1, len(data) + 1))
cumsum = []
running = 0.0
for v in data:
running += v
cumsum.append(running)
n_highlight = 2
bar_colours = [
"#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data))
]
fig = go.Figure()
fig.add_trace(
go.Bar(
x=ranks,
y=data,
marker_color=bar_colours,
hovertemplate="As %{x}<br><b>%{y:.1f}%</b> verklaarde variantie<extra></extra>",
showlegend=False,
)
)
fig.add_trace(
go.Scatter(
x=ranks,
y=cumsum,
mode="lines+markers",
line={"color": "#F57C00", "width": 2, "dash": "dot"},
marker={"size": 5, "color": "#F57C00"},
hovertemplate="As %{x}<br>Cumulatief: <b>%{y:.1f}%</b><extra></extra>",
name="Cumulatief",
showlegend=True,
)
)
fig.add_hline(
y=50,
line_dash="dash",
line_color="#BDBDBD",
line_width=1,
annotation_text="50%",
annotation_position="right",
annotation_font_color="#9E9E9E",
annotation_font_size=11,
)
for i in range(min(n_highlight, len(data))):
fig.add_annotation(
x=ranks[i],
y=data[i] + 0.3,
text=f"{data[i]:.1f}%",
showarrow=False,
font={"size": 11, "color": "#1565C0"},
yanchor="bottom",
)
fig.update_layout(
height=280,
margin={"l": 10, "r": 50, "t": 30, "b": 40},
title={
"text": "Belang per SVD-as",
"font": {"size": 13, "color": "#555555"},
"x": 0.02,
"xanchor": "left",
},
legend={
"orientation": "h",
"x": 0.5,
"xanchor": "center",
"y": 1.08,
"font": {"size": 11},
},
xaxis={
"title": {"text": "As (rang)", "font": {"size": 11}},
"tickmode": "linear",
"tick0": 1,
"dtick": 1,
"showline": False,
"showgrid": False,
},
yaxis={
"title": {"text": "% van totale variantie", "font": {"size": 11}},
"showline": False,
"showgrid": True,
"gridcolor": "#eeeeee",
"ticksuffix": "%",
"range": [0, max(cumsum) * 1.08],
},
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
bargap=0.25,
)
st.plotly_chart(fig, use_container_width=True)
def _build_party_axis_figure(
party_coords: Dict[str, Tuple[float, float]],
comp_sel: int,
theme: dict,
bootstrap_data: Optional[Dict[str, Dict]] = None,
) -> Optional[go.Figure]:
"""Build a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
Accepts explicit per-party 2D coordinates (x,y) and uses the component selection to
pick the value (comp_sel==1 -> x, comp_sel==2 -> y). This makes the API explicit and
avoids indexing into long SVD vectors.
Returns go.Figure or None if no data available.
"""
if not party_coords:
return None
if comp_sel not in (1, 2):
raise ValueError(
"_build_party_axis_figure only supports comp_sel 1 or 2 when using explicit coords"
)
axis_idx = comp_sel - 1
flip = theme.get("flip", False)
parties = []
scores = []
colours = []
for party, val in party_coords.items():
try:
if hasattr(val, "__len__") and len(val) == 2:
x, y = val
score = float(x if axis_idx == 0 else y)
else:
score = float(val[axis_idx])
if flip:
score = -score
except Exception:
continue
parties.append(party)
scores.append(score)
colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))
if not scores:
return None
hover = []
symbols = []
if bootstrap_data:
for p, s in zip(parties, scores):
bd = bootstrap_data.get(p)
if bd:
n_mps = bd.get("n_mps", "?")
ci_low = None
ci_high = None
try:
ci_low = float(bd["ci_lower"][axis_idx])
ci_high = float(bd["ci_upper"][axis_idx])
except Exception:
pass
if ci_low is not None and ci_high is not None:
hover.append(
f"{p}: {s:.3f} (N={n_mps}, 95%-BI: [{ci_low:.3f}, {ci_high:.3f}])"
)
else:
hover.append(f"{p}: {s:.3f} (N={n_mps})")
symbols.append("diamond" if n_mps == 1 else "circle")
else:
hover.append(f"{p}: {s:.3f}")
symbols.append("circle")
marker_kwargs = {"size": 14, "color": colours, "symbol": symbols}
else:
hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
marker_kwargs = {"size": 14, "color": colours}
fig = go.Figure()
x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
if x_min == x_max:
x_min, x_max = x_min - 1, x_max + 1
fig.add_trace(
go.Scatter(
x=[x_min, x_max],
y=[0, 0],
mode="lines",
line={"color": "#cccccc", "width": 1},
hoverinfo="skip",
showlegend=False,
)
)
scatter_kwargs = {
"x": scores,
"y": [0] * len(scores),
"mode": "markers+text",
"text": parties,
"textposition": "top center",
"marker": marker_kwargs,
"hovertext": hover,
"hoverinfo": "text",
"showlegend": False,
}
fig.add_trace(go.Scatter(**scatter_kwargs))
pos_pole = theme.get("positive_pole", "")
neg_pole = theme.get("negative_pole", "")
left_label = neg_pole
right_label = pos_pole
fig.update_layout(
height=160,
margin={"l": 10, "r": 10, "t": 10, "b": 30},
xaxis={
"title": f"{left_label} | {right_label}",
"showticklabels": False,
"showline": False,
"showgrid": False,
"zeroline": False,
},
yaxis={"visible": False, "range": [-1, 2]},
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
)
return fig
def _render_party_axis_chart(
party_coords: Dict[str, Tuple[float, float]],
comp_sel: int,
theme: dict,
bootstrap_data: Optional[Dict[str, Dict]] = None,
) -> None:
"""Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
Expects explicit per-party coords mapping (party -> (x,y)) for components 1 & 2.
"""
fig = _build_party_axis_figure(party_coords, comp_sel, theme, bootstrap_data)
if fig is None:
st.caption("_Partijdata niet beschikbaar voor deze as._")
return
st.plotly_chart(fig, use_container_width=True)
def _render_party_axis_chart_1d(
party_coords: Dict[str, Tuple[float, ...]],
comp_sel: int,
theme: dict,
) -> None:
"""Render a 1D horizontal scatter of party positions on SVD component `comp_sel`.
Uses the same format as components 1-2: parties as markers on a horizontal line
with axis title showing poles with arrows.
Args:
party_coords: Dict mapping party name to tuple of scores (score_for_comp,)
comp_sel: SVD component number (1-indexed)
theme: Dict with label, positive_pole, negative_pole, flip
"""
if not party_coords:
st.caption("_Partijdata niet beschikbaar voor deze as._")
return
parties = []
scores = []
colours = []
for party, coords in party_coords.items():
try:
score = float(coords[0])
parties.append(party)
scores.append(score)
colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))
except Exception:
continue
if not scores:
st.caption("_Partijdata niet beschikbaar voor deze as._")
return
flip = theme.get("flip", False)
if flip:
scores = [-s for s in scores]
hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
fig = go.Figure()
x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
if x_min == x_max:
x_min, x_max = x_min - 1, x_max + 1
fig.add_trace(
go.Scatter(
x=[x_min, x_max],
y=[0, 0],
mode="lines",
line={"color": "#cccccc", "width": 1},
hoverinfo="skip",
showlegend=False,
)
)
fig.add_trace(
go.Scatter(
x=scores,
y=[0] * len(scores),
mode="markers+text",
text=parties,
textposition="top center",
marker={"size": 14, "color": colours},
hovertext=hover,
hoverinfo="text",
showlegend=False,
)
)
pos_pole = theme.get("positive_pole", "")
neg_pole = theme.get("negative_pole", "")
left_label = neg_pole
right_label = pos_pole
fig.update_layout(
height=160,
margin={"l": 10, "r": 10, "t": 10, "b": 30},
xaxis={
"title": f"{left_label} | {right_label}",
"showticklabels": False,
"showline": False,
"showgrid": False,
"zeroline": False,
},
yaxis={"visible": False, "range": [-1, 2]},
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
)
st.plotly_chart(fig, use_container_width=True)
def _render_svd_time_trajectory(
party_scores_by_window: Dict[str, Dict[str, List[float]]],
comp_sel: int,
theme: dict,
selected_parties: List[str],
) -> None:
"""Render a time trajectory plot showing party positions over time on an SVD component.
Args:
party_scores_by_window: {window_id: {party_name: [scores]}}
comp_sel: SVD component number (1-indexed)
theme: Theme dict with label, positive_pole, negative_pole, flip
selected_parties: List of party names to display
"""
if not party_scores_by_window or not selected_parties:
st.caption("_Geen data beschikbaar voor tijdtraject._")
return
idx = comp_sel - 1
party_trajectories: Dict[str, List[Tuple[str, float]]] = {}
all_windows = list(party_scores_by_window.keys())
sorted_windows = []
if "current_parliament" in all_windows:
sorted_windows.append("current_parliament")
other_windows = sorted(
[w for w in all_windows if w != "current_parliament"], reverse=True
)
sorted_windows.extend(other_windows)
for window in sorted_windows:
scores_by_party = party_scores_by_window.get(window, {})
for party in selected_parties:
scores = scores_by_party.get(party, [])
if scores and len(scores) > idx:
try:
score = float(scores[idx])
party_trajectories.setdefault(party, []).append((window, score))
except (ValueError, TypeError):
continue
if not party_trajectories:
st.caption("_Geen data beschikbaar voor geselecteerde partijen._")
return
fig = go.Figure()
all_scores = []
for traj in party_trajectories.values():
all_scores.extend([s for _, s in traj])
if not all_scores:
st.caption("_Geen scores beschikbaar._")
return
x_min, x_max = min(all_scores) * 1.15, max(all_scores) * 1.15
if x_min == x_max:
x_min, x_max = x_min - 1, x_max + 1
window_to_y = {w: i for i, w in enumerate(sorted_windows)}
for window in sorted_windows:
y_pos = window_to_y[window]
fig.add_trace(
go.Scatter(
x=[x_min, x_max],
y=[y_pos, y_pos],
mode="lines",
line={"color": "#cccccc", "width": 1},
hoverinfo="skip",
showlegend=False,
)
)
for party in selected_parties:
if party not in party_trajectories:
continue
traj = party_trajectories[party]
if len(traj) < 1:
continue
x_vals = [score for _, score in traj]
y_vals = [window_to_y[window] for window, _ in traj]
color = PARTY_COLOURS.get(party, "#9E9E9E")
fig.add_trace(
go.Scatter(
x=x_vals,
y=y_vals,
mode="lines",
line={"color": color, "width": 2},
hoverinfo="skip",
showlegend=False,
)
)
hover_texts = [f"{party}<br>{window}: {score:.3f}" for window, score in traj]
fig.add_trace(
go.Scatter(
x=x_vals,
y=y_vals,
mode="markers+text",
text=[party] * len(traj),
textposition="top center",
marker={"size": 12, "color": color},
hovertext=hover_texts,
hoverinfo="text",
showlegend=False,
)
)
pos_pole = theme.get("positive_pole", "")
neg_pole = theme.get("negative_pole", "")
left_label = neg_pole
right_label = pos_pole
y_labels = {}
for window in sorted_windows:
if window == "current_parliament":
y_labels[window_to_y[window]] = "Huidig"
else:
y_labels[window_to_y[window]] = window
fig.update_layout(
height=max(400, len(sorted_windows) * 60 + 100),
margin={"l": 80, "r": 10, "t": 10, "b": 30},
xaxis={
"title": f"{left_label} | {right_label}",
"range": [x_min, x_max],
"showticklabels": False,
"showline": False,
"showgrid": True,
"gridcolor": "rgba(0,0,0,0.1)",
"zeroline": True,
"zerolinecolor": "rgba(0,0,0,0.2)",
},
yaxis={
"tickvals": list(y_labels.keys()),
"ticktext": list(y_labels.values()),
"tickmode": "array",
"autorange": "reversed",
"showgrid": False,
},
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
)
st.plotly_chart(fig, use_container_width=True)
def _render_voting_results(voting_results_json) -> None:
"""Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.
The JSON is stored as {party_or_mp: vote} where vote is one of
'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
"""
if not voting_results_json:
return
try:
vdata = (
json.loads(voting_results_json)
if isinstance(voting_results_json, str)
else voting_results_json
)
if not isinstance(vdata, dict) or not vdata:
return
by_vote: Dict[str, List[str]] = {}
for actor, vote in vdata.items():
vote_str = str(vote).lower().strip()
by_vote.setdefault(vote_str, []).append(str(actor))
vote_order = ["voor", "tegen", "onthouden", "afwezig"]
vote_emoji = {"voor": "", "tegen": "", "onthouden": "🟡", "afwezig": ""}
rows_shown = False
for v in vote_order + [k for k in by_vote if k not in vote_order]:
actors = by_vote.get(v)
if not actors:
continue
emoji = vote_emoji.get(v, "")
st.markdown(
f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
)
rows_shown = True
if not rows_shown:
st.caption("_Geen stemuitslag beschikbaar_")
except Exception:
pass
def _add_y_direction_annotations(fig: go.Figure) -> None:
"""Add ▲ Progressief / ▼ Conservatief labels above and below the Y axis."""
common = dict(
xref="paper",
yref="paper",
x=-0.07,
showarrow=False,
font=dict(size=11, color="#666666"),
)
fig.add_annotation(**common, y=1.02, text="▲ Progressief", xanchor="center")
fig.add_annotation(**common, y=-0.06, text="▼ Conservatief", xanchor="center")

@ -1,18 +1,95 @@
"""Browser tab for the parliamentary explorer. """Browser tab for the parliamentary explorer."""
This module will contain the browser tab implementation.
Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
"""
from __future__ import annotations from __future__ import annotations
import pandas as pd
import analysis.explorer_data as explorer_data
from analysis.tabs._rendering import _render_voting_results, st
def build_browser_tab(db_path: str, show_rejected: bool) -> None: def build_browser_tab(db_path: str, show_rejected: bool) -> None:
"""Build the Motie Browser tab. """Build the Motie Browser tab."""
st.subheader("Motie Browser")
df = explorer_data.load_motions_df(db_path)
if df.empty:
st.warning("Geen moties beschikbaar.")
return
if not show_rejected:
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
col1, col2, col3 = st.columns(3)
with col1:
years = sorted(df["year"].dropna().astype(int).unique().tolist())
year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
with col2:
min_controversy_b = st.slider(
"Min. controverse",
min_value=0.0,
max_value=1.0,
value=0.0,
step=0.05,
key="browser_controversy",
)
with col3:
sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])
working = df.copy()
if year_filter != "(Alle)":
working = working[working["year"] == int(year_filter)]
if min_controversy_b > 0:
working = working[working["controversy_score"] >= min_controversy_b]
sort_map = {
"Datum (nieuw)": ("date", False),
"Controverse": ("controversy_score", False),
"Marge": ("winning_margin", True),
}
sort_col, sort_asc = sort_map[sort_by]
working = working.sort_values(by=sort_col, ascending=sort_asc)
display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
available_display = [c for c in display_cols if c in working.columns]
st.dataframe(
working[available_display].reset_index(drop=True),
use_container_width=True,
height=350,
)
st.divider()
st.markdown("**Detail weergave** — vul een motie-ID in:")
sel_id = st.number_input(
"Motie ID",
min_value=int(working["id"].min()) if not working.empty else 1,
max_value=int(working["id"].max()) if not working.empty else 99999,
value=int(working["id"].iloc[0]) if not working.empty else 1,
step=1,
)
motion_row = df[df["id"] == sel_id]
if not motion_row.empty:
row = motion_row.iloc[0]
st.markdown(f"### {row.get('title') or 'Onbekend'}")
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
st.caption(
f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
)
url = row.get("url")
if url and str(url).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
Currently delegates to explorer.py implementation. st.markdown("**Stemuitslag:**")
Will be extracted when rendering logic is decoupled from Streamlit. _render_voting_results(row.get("voting_results"))
"""
import explorer
explorer.build_browser_tab(db_path, show_rejected) sim = explorer_data.query_similar(db_path, int(sel_id), top_k=10)
if not sim.empty:
st.markdown("**Vergelijkbare moties:**")
st.dataframe(
sim[["title", "score", "date", "policy_area"]],
use_container_width=True,
)
else:
st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")

@ -1,20 +1,200 @@
"""Compass tab for the parliamentary explorer. """Compass tab for the parliamentary explorer."""
This module will contain the compass tab implementation.
Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
"""
from __future__ import annotations from __future__ import annotations
from typing import List import datetime as _dt
import re
from typing import Dict, Tuple
import numpy as np
import pandas as pd
from analysis import config
import analysis.explorer_data as explorer_data
from analysis.tabs._rendering import px, st
PARTY_COLOURS = config.PARTY_COLOURS
def build_compass_tab(db_path: str, window_size: str) -> None: def build_compass_tab(db_path: str, window_size: str) -> None:
"""Build the Politiek Kompas tab. """Build the Politiek Kompas tab."""
st.subheader("Politiek Kompas")
st.markdown(
"2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
)
# Compass always uses annual windows regardless of the sidebar window_size setting.
positions_by_window, axis_def = explorer_data.load_positions(db_path, "annual")
if axis_def is None:
axis_def = {}
if not positions_by_window:
st.warning(
"Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
)
return
party_map = explorer_data.load_party_map(db_path)
active_mps = explorer_data.load_active_mps(db_path)
_current_year = str(_dt.date.today().year)
year_windows = sorted(
w
for w in positions_by_window
if w != "current_parliament" and w != _current_year
)
has_current = "current_parliament" in positions_by_window
windows = year_windows + (["current_parliament"] if has_current else [])
_SPARSE_YEARS = {"2016", "2017", "2018"}
_THRESHOLD = 0.65
def _window_label(w: str) -> str:
if w == "current_parliament":
return "Huidig parlement"
if w in _SPARSE_YEARS:
return f"{w}"
return w
col1, col2 = st.columns([3, 1])
with col2:
window_idx = st.selectbox(
"Jaar",
options=windows,
index=len(windows) - 1,
format_func=_window_label,
)
level = st.radio(
"Weergave",
options=["Kamerleden", "Partijen"],
index=0,
horizontal=True,
)
min_mps = st.number_input(
"Min. Kamerleden per partij",
min_value=1,
max_value=20,
value=3,
step=1,
help="Partijen met minder dan dit aantal zetels worden niet weergegeven.",
)
pos = positions_by_window.get(window_idx, {})
if not pos:
st.info(f"Geen data voor venster {window_idx}")
return
if window_idx == "current_parliament":
pos = {mp: xy for mp, xy in pos.items() if mp in active_mps}
def _strip_paren(name: str) -> str:
return re.sub(r"\s*\([^)]*\)", "", name).strip()
deduped: Dict[str, Tuple[float, float]] = {}
for name, (x, y) in pos.items():
base = _strip_paren(name)
if base in deduped:
ox, oy = deduped[base]
deduped[base] = ((ox + x) / 2, (oy + y) / 2)
else:
deduped[base] = (x, y)
pos = deduped
rows = []
for name, (x, y) in pos.items():
party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown")
rows.append({"name": name, "x": x, "y": y, "party": party})
df_pos = pd.DataFrame(rows)
party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts()
valid_parties = set(party_counts[party_counts >= min_mps].index)
df_pos = df_pos[df_pos["party"].isin(valid_parties)]
if df_pos.empty:
st.info("Geen partijen met genoeg Kamerleden voor dit venster.")
return
_raw_x = axis_def.get("x_label")
_raw_y = axis_def.get("y_label")
try:
from analysis.axis_classifier import display_label_for_modal
_x_label = display_label_for_modal(_raw_x, "x")
_y_label = display_label_for_modal(_raw_y, "y")
except Exception:
from analysis.svd_labels import get_fallback_labels
_x_fallback, _y_fallback = get_fallback_labels()
_x_label = _raw_x or _x_fallback
_y_label = _raw_y or _y_fallback
if level == "Partijen":
df_party = df_pos.groupby("party", as_index=False).agg(
x=("x", "mean"), y=("y", "mean"), n=("name", "count")
)
df_party["name"] = df_party["party"]
colour_map = {
p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique()
}
fig = px.scatter(
df_party,
x="x",
y="y",
color="party",
text="party",
hover_name="party",
hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True},
color_discrete_map=colour_map,
title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",
labels={
"x": _x_label,
"y": _y_label,
"n": "Kamerleden",
},
)
fig.update_traces(textposition="top center", marker_size=14)
else:
colour_map = {
p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()
}
fig = px.scatter(
df_pos,
x="x",
y="y",
color="party",
hover_name="name",
hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
color_discrete_map=colour_map,
title=f"Politiek Kompas — {_window_label(window_idx)}",
labels={"x": _x_label, "y": _y_label},
)
Currently delegates to explorer.py implementation. fig.update_layout(
Will be extracted when rendering logic is decoupled from Streamlit. height=600,
""" legend_title_text="Partij",
import explorer xaxis={"range": [-1, 1]},
yaxis={"range": [-0.6, 0.6]},
)
with col1:
st.plotly_chart(fig, use_container_width=True)
_x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "")
if (
_x_interp
and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD
):
st.caption(_x_interp)
explorer.build_compass_tab(db_path, window_size) # Voting discipline analysis
st.markdown("---")
st.markdown(
"**Stemdiscipline analyse:** De Rice-index meet hoe eensgezind partijen stemmen "
"tijdens hoofdelijke stemmingen. Een score van 100% betekent dat alle MPs van "
"een partij hetzelfde stemden; 50% wijst op een gelijke splitsing binnen de partij. "
"Partijen met hoge discipline (>95%) zoals PVV en SGP stemmen als een blok, wat "
"wijst op sterke partijdiscipline en homogene membership. Lagere discipline (<85%) "
"bij partijen als PvdA of SP kan duiden op interne factiestrijd, gewetensvragen "
"bij ethische thema's, of een brede ideologische koers die ruimte laat voor "
"afwijkende meningen. De discipline varieert ook per onderwerp — ethische kwesties "
"tonen vaak meer interne verschillen dan economische thema's."
)

@ -1,18 +1,374 @@
"""SVD Components tab for the parliamentary explorer. """SVD Components tab for the parliamentary explorer."""
This module will contain the SVD components tab implementation.
Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
"""
from __future__ import annotations from __future__ import annotations
import datetime as _dt
import logging
import os
from typing import Dict, List, Tuple
import numpy as np
from analysis import config
import analysis.explorer_data as explorer_data
from analysis.tabs._rendering import (
_render_party_axis_chart_1d,
_render_scree_plot,
_render_svd_time_trajectory,
_render_voting_results,
st,
)
try:
import duckdb
except Exception:
duckdb = None # type: ignore
SVD_THEMES = config.SVD_THEMES
KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
logger = logging.getLogger(__name__)
def build_svd_components_tab(db_path: str) -> None: def build_svd_components_tab(db_path: str) -> None:
"""Build the SVD Components tab. """New tab: show top motions contributing to top SVD components.
Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
for components 1..10 with theme labels/explanations and a detail pane per motion.
Currently delegates to explorer.py implementation. Components 1-2 use aligned PCA positions (consistent with compass).
Will be extracted when rendering logic is decoupled from Streamlit. Components 3-10 use raw SVD scores.
""" """
import explorer st.subheader("🔬 SVD Assen — politieke polarisatiethema's")
st.markdown(
"Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
"van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
"het spanningsveld dat de as beschrijft."
)
scree_importances = explorer_data.load_scree_data(db_path)
if scree_importances:
st.markdown(
"**Scree-plot** — het relatieve gewicht van elke SVD-as. "
"De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
"latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau."
)
_render_scree_plot(scree_importances)
json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
if not os.path.exists(json_path):
st.warning(
f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
)
return
try:
import json
with open(json_path, "r", encoding="utf-8") as fh:
j = json.load(fh)
except Exception as e:
st.error(f"Failed to load SVD importance JSON: {e}")
return
window = j.get("window")
rows = j.get("rows", [])
if not rows:
st.info("Geen top-moties in dataset")
return
st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")
comp_map: dict[int, list] = {}
for r in rows:
comp = int(r.get("component", 0))
bucket = comp_map.setdefault(comp, [])
existing_ids = {m.get("motion_id") for m in bucket}
if r.get("motion_id") not in existing_ids:
bucket.append(r)
comp_options = sorted(comp_map.keys())
def _comp_label(c: int) -> str:
theme = SVD_THEMES.get(c, {})
lbl = theme.get("label", "")
return f"As {c}{lbl}" if lbl else f"As {c}"
comp_display = [_comp_label(c) for c in comp_options]
party_scores_default = explorer_data.load_party_axis_scores(db_path)
party_mp_vectors = explorer_data.load_party_mp_vectors(db_path)
bootstrap_data = None
if party_mp_vectors:
try:
from analysis.political_axis import compute_party_bootstrap_cis
bootstrap_data = compute_party_bootstrap_cis(party_mp_vectors)
except Exception:
pass
col1, col2 = st.columns([2, 1])
view_mode = "Enkel venster"
selected_parties_for_trajectory: list = []
with col2:
comp_sel_idx = st.selectbox(
"Selecteer SVD-as",
options=list(range(len(comp_options))),
format_func=lambda i: comp_display[i],
index=0,
)
comp_sel = comp_options[comp_sel_idx]
min_mps = st.number_input(
"Min. Kamerleden per partij",
min_value=1,
max_value=20,
value=1,
step=1,
help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.",
)
view_mode = st.radio(
"Weergave",
options=["Enkel venster", "Tijdtraject"],
index=0,
help="Enkel venster: toont posities voor één tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.",
)
selected_parties_for_trajectory = []
if view_mode == "Tijdtraject":
all_parties = (
sorted(party_scores_default.keys()) if party_scores_default else []
)
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8]
selected_parties_for_trajectory = st.multiselect(
"Partijen om te tonen",
options=all_parties,
default=default_parties,
help="Selecteer de partijen die je wilt zien in het tijdtraject.",
)
theme = SVD_THEMES.get(comp_sel, {})
if theme:
st.info(f"**{theme['label']}** — {theme['explanation']}")
motions = comp_map.get(comp_sel, [])
_current_year = str(_dt.date.today().year)
available_windows = explorer_data.get_uniform_dim_windows(db_path)
year_windows = sorted(
w for w in available_windows if w != "current_parliament" and w != _current_year
)
has_current = "current_parliament" in available_windows
svd_windows = year_windows + (["current_parliament"] if has_current else [])
def _svd_window_label(w: str) -> str:
if w == "current_parliament":
return "Huidig parliament"
return w
with col1:
svd_window = st.selectbox(
"Jaar",
options=svd_windows,
index=len(svd_windows) - 1,
format_func=_svd_window_label,
key=f"svd_window_{comp_sel}",
)
if svd_window == "current_parliament":
party_scores = party_scores_default
else:
party_scores = explorer_data.load_party_axis_scores_for_window(db_path, svd_window)
party_mp_counts = (
{p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {}
)
def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]:
"""Get party (x, y) coordinates from aligned PCA positions for a window."""
positions_by_window, _ = explorer_data.load_positions(db_path, "annual")
window_pos = positions_by_window.get(window, {})
if not window_pos:
return {}
_party_map = explorer_data.load_party_map(db_path)
party_coords: Dict[str, List[Tuple[float, float]]] = {}
for mp_name, (x, y) in window_pos.items():
party = _party_map.get(
mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
)
if party:
party_coords.setdefault(party, []).append((x, y))
return {
party: (
float(np.mean([c[0] for c in coords])),
float(np.mean([c[1] for c in coords])),
)
for party, coords in party_coords.items()
if coords
}
active_mps = (
explorer_data.load_active_mps(db_path)
if svd_window == "current_parliament"
else None
)
aligned_all_scores = explorer_data.get_aligned_party_scores(
db_path, svd_window, active_mps
)
party_1d_coords: dict = {}
for party, all_scores in aligned_all_scores.items():
idx = comp_sel - 1
if idx < len(all_scores):
party_1d_coords[party] = (float(all_scores[idx]),)
computed_flips: Dict[int, bool] = {}
try:
from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT
for comp_idx in range(10):
right_scores = []
left_scores = []
for party, scores in aligned_all_scores.items():
if party in CANONICAL_RIGHT:
right_scores.append(scores[comp_idx])
elif party in CANONICAL_LEFT:
left_scores.append(scores[comp_idx])
if right_scores and left_scores:
right_avg = np.mean(right_scores)
left_avg = np.mean(left_scores)
computed_flips[comp_idx + 1] = right_avg < left_avg
else:
computed_flips[comp_idx + 1] = False
except Exception:
pass
theme_with_flip = {
**theme,
"flip": computed_flips.get(comp_sel, theme.get("flip", False)),
}
if min_mps > 1 and party_mp_counts:
valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps}
party_1d_coords = {
p: coords for p, coords in party_1d_coords.items() if p in valid_parties
}
if view_mode == "Tijdtraject" and selected_parties_for_trajectory:
available_windows = explorer_data.get_uniform_dim_windows(db_path)
year_windows = sorted(
w
for w in available_windows
if w != "current_parliament" and w != _current_year
)
has_current = "current_parliament" in available_windows
all_windows = year_windows + (["current_parliament"] if has_current else [])
party_scores_by_window = explorer_data._get_aligned_trajectory_scores(
db_path, all_windows
)
_render_svd_time_trajectory(
party_scores_by_window,
comp_sel,
theme_with_flip,
selected_parties_for_trajectory,
)
else:
_render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip)
motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
motion_details: Dict[int, tuple] = {}
if motion_ids:
ids_int: List[int] = []
for mid in motion_ids:
try:
ids_int.append(int(mid))
except Exception:
logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
if ids_int and duckdb is not None:
con = None
try:
placeholders = ", ".join("?" for _ in ids_int)
con = duckdb.connect(database=db_path, read_only=True)
db_rows = con.execute(
f"SELECT id, title, date, policy_area, url, body_text, voting_results "
f"FROM motions WHERE id IN ({placeholders})",
ids_int,
).fetchall()
motion_details = {r[0]: r for r in db_rows}
except Exception:
logger.exception("Failed to batch-fetch motion details")
finally:
if con:
con.close()
pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
flip = theme_with_flip.get("flip", False) if theme_with_flip else False
pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else ""
neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else ""
if flip:
left_pole, right_pole = pos_pole, neg_pole
left_motions, right_motions = pos_motions, neg_motions
left_arrow, right_arrow = "", ""
else:
left_pole, right_pole = neg_pole, pos_pole
left_motions, right_motions = neg_motions, pos_motions
left_arrow, right_arrow = "", ""
lcol, rcol = st.columns(2)
with lcol:
st.markdown(f"**← {left_pole}**")
for m in left_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{left_arrow} {raw_title}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")
explorer.build_svd_components_tab(db_path) with rcol:
st.markdown(f"**{right_pole} →**")
for m in right_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{right_arrow} {raw_title}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")

@ -1,18 +1,132 @@
"""MP Quiz tab for the parliamentary explorer. """MP Quiz tab for the parliamentary explorer."""
This module will contain the MP quiz tab implementation.
Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
"""
from __future__ import annotations from __future__ import annotations
import pandas as pd
import analysis.explorer_data as explorer_data
from analysis.tabs._rendering import st
def build_mp_quiz_tab(db_path: str) -> None: def build_mp_quiz_tab(db_path: str) -> None:
"""Build the MP Quiz tab. """Interactive quiz: narrow MPs by asking motion vote questions.
Currently delegates to explorer.py implementation. Minimal viable flow:
Will be extracted when rendering logic is decoupled from Streamlit. - seed with top-N controversial motions (SEED_MOTIONS)
- present one question at a time, store answers in st.session_state['mp_quiz_votes']
- after each answer call MotionDatabase.match_mps_for_votes to rank MPs
- if multiple candidates remain, call choose_discriminating_motions to pick next question
- stop when unique MP found or no discriminating motions remain
""" """
import explorer st.subheader("🧑 Welk tweede kamerlid ben jij?")
st.markdown(
"Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt."
)
SEED_MOTIONS = 8
MAX_QUESTIONS = 20
if "mp_quiz_votes" not in st.session_state:
st.session_state["mp_quiz_votes"] = {}
if "mp_quiz_asked" not in st.session_state:
st.session_state["mp_quiz_asked"] = []
from database import MotionDatabase as _MotionDatabase
db_inst = _MotionDatabase(db_path)
df = explorer_data.load_motions_df(db_path)
if df.empty:
st.warning("Geen moties beschikbaar om de quiz te starten.")
return
seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
if not seed_ids:
st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
return
def _next_motion_id():
for mid in seed_ids:
if str(mid) not in st.session_state["mp_quiz_votes"]:
return mid
try:
user_votes = {
int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
}
ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
except Exception:
ranked = []
candidates = [r["mp_name"] for r in ranked]
excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()]
if not candidates:
return None
try:
next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
return next_ids[0] if next_ids else None
except Exception:
return None
col1, col2 = st.columns([3, 1])
with col2:
st.caption(
f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}"
)
if st.button("Reset quiz"):
st.session_state["mp_quiz_votes"] = {}
st.session_state["mp_quiz_asked"] = []
st.rerun()
next_mid = _next_motion_id()
if next_mid is None:
st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
else:
motion_rows = df[df["id"] == next_mid]
if motion_rows.empty:
st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
st.rerun()
return
motion_row = motion_rows.iloc[0]
st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
if motion_row.get("layman_explanation"):
st.info(motion_row.get("layman_explanation"))
with st.form(key=f"mp_quiz_form_{next_mid}"):
choice = st.radio(
"Wat zou jij stemmen?",
options=["Voor", "Tegen", "Onthouden", "Geen stem"],
index=3,
)
submitted = st.form_submit_button("Beantwoord en verder")
if submitted:
st.session_state["mp_quiz_votes"][str(next_mid)] = choice
st.session_state["mp_quiz_asked"].append(next_mid)
st.rerun()
try:
user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
except Exception:
ranking = []
if ranking:
st.markdown("**Top kandidaten**")
rdf = pd.DataFrame(ranking)
st.dataframe(rdf.head(10), use_container_width=True)
explorer.build_mp_quiz_tab(db_path) top_pct = ranking[0]["agreement_pct"] if ranking else 0.0
top_matches = [r for r in ranking if r["agreement_pct"] == top_pct]
if len(top_matches) == 1 and top_matches[0]["overlap"] > 0:
st.success(
f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})"
)
else:
if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS:
st.warning(
"Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten."
)
else:
st.info("Nog geen unieke match — vraag meer om verder te verfijnen.")
else:
st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.")

@ -1,18 +1,84 @@
"""Search tab for the parliamentary explorer. """Search tab for the parliamentary explorer."""
This module will contain the search tab implementation.
Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
"""
from __future__ import annotations from __future__ import annotations
import pandas as pd
import analysis.explorer_data as explorer_data
from analysis.tabs._rendering import _render_voting_results, st
def build_search_tab(db_path: str, show_rejected: bool) -> None: def build_search_tab(db_path: str, show_rejected: bool) -> None:
"""Build the Motie Zoeken tab. """Build the Motie Zoeken tab."""
st.subheader("Motie Zoeken")
df = explorer_data.load_motions_df(db_path)
if df.empty:
st.warning("Geen moties beschikbaar.")
return
if not show_rejected:
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
col1, col2, col3 = st.columns([2, 1, 1])
with col1:
query = st.text_input(
"Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
)
with col2:
years = sorted(df["year"].dropna().astype(int).unique().tolist())
if years:
year_range = st.select_slider(
"Jaar", options=years, value=(years[0], years[-1])
)
else:
year_range = (2019, 2024)
with col3:
min_controversy = st.slider(
"Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
)
working = df.copy()
working = working[
(working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
]
if min_controversy > 0:
working = working[working["controversy_score"] >= min_controversy]
if query:
q = query.lower()
mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
working = working[mask]
working = working.sort_values(by="controversy_score", ascending=False)
st.caption(f"{len(working)} resultaten (top 50 getoond)")
for _, row in working.head(50).iterrows():
title = row.get("title") or f"Motie #{row['id']}"
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
controversy = row.get("controversy_score") or 0
with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
cols = st.columns(3)
cols[0].metric("Controverse", f"{controversy:.2f}")
cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")
_render_voting_results(row.get("voting_results"))
Currently delegates to explorer.py implementation. url = row.get("url")
Will be extracted when rendering logic is decoupled from Streamlit. if url and str(url).startswith("http"):
""" st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
import explorer
explorer.build_search_tab(db_path, show_rejected) sim = explorer_data.query_similar(db_path, int(row["id"]), top_k=5)
if not sim.empty:
st.markdown("**Vergelijkbare moties:**")
for _, s in sim.iterrows():
s_date = (
pd.to_datetime(s["date"]).strftime("%Y")
if pd.notna(s.get("date"))
else ""
)
st.markdown(
f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
)
else:
st.caption("_Nog geen vergelijkbare moties beschikbaar_")

@ -1,20 +1,774 @@
"""Trajectories tab for the parliamentary explorer. """Trajectories tab for the parliamentary explorer."""
This module will contain the trajectories tab implementation. from __future__ import annotations
Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
import json
import logging
import os
import re
import traceback
from datetime import datetime
from typing import Dict, List, Optional, Tuple
import numpy as np
from analysis import config
import analysis.explorer_data as explorer_data
from analysis import trajectory
from analysis.tabs._rendering import (
PARTY_COLOURS,
_add_y_direction_annotations,
go,
st,
)
from explorer_helpers import compute_party_centroids, inspect_positions_for_issues
KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
logger = logging.getLogger(__name__)
_last_trajectories_diagnostics: dict = {}
_last_diagnostics = _last_trajectories_diagnostics
def get_debug_trajectories_enabled() -> bool:
"""Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode."""
v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES")
return str(v) in ("1", "true", "True")
def select_trajectory_plot_data(
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
party_map: Dict[str, str],
windows: List[str],
selected_parties: List[str],
smooth_alpha: float = 0.35,
mp_fallback_count: Optional[int] = None,
) -> Tuple[go.Figure, int, Optional[str]]:
"""Return (fig, trace_count, banner_text).
Helper used by build_trajectories_tab. Does not call Streamlit.
""" """
if mp_fallback_count is None:
try:
mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20"))
except Exception:
mp_fallback_count = 20
from __future__ import annotations party_centroids, meta = compute_party_centroids(
positions_by_window, party_map, windows
)
try:
inspector_summary = inspect_positions_for_issues(positions_by_window, party_map)
except Exception:
tb = traceback.format_exc()
inspector_summary = {}
try:
select_trajectory_plot_data._last_diagnostics = {
"stage": "inspector_exception",
"exception": tb,
}
except Exception:
pass
try:
_last_trajectories_diagnostics.update(
{"stage": "inspector_exception", "exception": tb}
)
except Exception:
pass
logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary)
plottable_parties = []
for p, vals in party_centroids.items():
has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals)
if has_valid:
plottable_parties.append(p)
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] plottable_parties: %d parties, sample=%s",
len(plottable_parties),
(plottable_parties[:5] if plottable_parties else "empty"),
)
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] party_centroids keys: %s",
list(party_centroids.keys())[:10],
)
if party_centroids:
sample_party = list(party_centroids.keys())[0]
sample_vals = party_centroids[sample_party]
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] Sample party '%s' centroids: %s...",
sample_party,
sample_vals[:3],
)
fig = go.Figure()
trace_count = 0
banner_text: Optional[str] = None
from typing import List def _ema_smooth(values: List[float], alpha: float) -> List[float]:
if not values or alpha >= 1.0:
return values
smoothed: List[float] = []
prev = None
for v in values:
if v is None or (isinstance(v, float) and np.isnan(v)):
smoothed.append(float(np.nan))
continue
v = float(v)
if prev is None:
prev = v
else:
prev = alpha * v + (1 - alpha) * prev
smoothed.append(float(prev))
return smoothed
if not plottable_parties:
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
for wid in windows:
pos = positions_by_window.get(wid, {})
for mp_name, xy in pos.items():
try:
x, y = float(xy[0]), float(xy[1])
except Exception:
continue
mp_positions.setdefault(mp_name, {})[wid] = (x, y)
mp_activity = sorted(
[(mp, len(wdict)) for mp, wdict in mp_positions.items()],
key=lambda t: t[1],
reverse=True,
)
top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]]
for mp in top_mps:
wids_sorted = sorted(mp_positions.get(mp, {}).keys())
if not wids_sorted:
continue
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
xs = _ema_smooth(xs_raw, smooth_alpha)
ys = _ema_smooth(ys_raw, smooth_alpha)
custom_raw = [
(
float(rx) if rx is not None else float(np.nan),
float(ry) if ry is not None else float(np.nan),
)
for rx, ry in zip(xs_raw, ys_raw)
]
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=mp,
text=wids_sorted,
customdata=custom_raw,
line=dict(color="#888888", shape="spline", smoothing=1.3),
marker=dict(color="#888888", size=6),
)
)
trace_count += 1
banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] Fallback to MP trajectories: trace_count=%d, top_mps=%d",
trace_count,
len(top_mps),
)
return fig, trace_count, banner_text
to_plot = [p for p in selected_parties if p in plottable_parties]
if not to_plot:
to_plot = plottable_parties
for party in to_plot:
vals = party_centroids.get(party, [])
if not vals:
continue
xs_raw = [v[0] for v in vals]
ys_raw = [v[1] for v in vals]
xs = _ema_smooth(xs_raw, smooth_alpha)
ys = _ema_smooth(ys_raw, smooth_alpha)
custom_raw = [
(
float(x) if (x is not None and not np.isnan(x)) else float(np.nan),
float(y) if (y is not None and not np.isnan(y)) else float(np.nan),
)
for x, y in zip(xs_raw, ys_raw)
]
colour = PARTY_COLOURS.get(party, "#9E9E9E")
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=party,
text=windows,
customdata=custom_raw,
line=dict(color=colour, shape="spline", smoothing=1.3),
marker=dict(color=colour, size=8),
)
)
trace_count += 1
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] Final trace_count=%d, plottable_parties=%d, to_plot=%s",
trace_count,
len(plottable_parties),
(len(to_plot) if "to_plot" in dir() else "N/A"),
)
return fig, trace_count, None
def build_trajectories_tab(db_path: str, window_size: str) -> None: def build_trajectories_tab(db_path: str, window_size: str) -> None:
"""Build the Partij Trajectories tab. """Build the Partij Trajectories tab."""
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] build_trajectories_tab called — db_path=%s, window_size=%s",
db_path,
window_size,
)
st.subheader("Partij Trajectories")
st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")
Currently delegates to explorer.py implementation. positions_by_window, axis_def = explorer_data.load_positions(db_path, window_size)
Will be extracted when rendering logic is decoupled from Streamlit. logging.getLogger(__name__).debug(
""" "[TRAJ DEBUG] load_positions → %d windows, total MPs=%d",
import explorer len(positions_by_window),
sum(len(v) for v in positions_by_window.values()),
)
if axis_def is None:
axis_def = {}
if not positions_by_window:
try:
_last_trajectories_diagnostics.update(
{
"stage": "load_positions_empty",
"positions_by_window_len": len(positions_by_window),
}
)
except Exception:
pass
try:
st.warning("Geen positiedata beschikbaar.")
except Exception:
pass
try:
if get_debug_trajectories_enabled():
try:
st.text_area(
"Trajectories diagnostics",
json.dumps(_last_trajectories_diagnostics, default=str),
height=160,
)
except Exception:
pass
except Exception:
pass
return
party_map = explorer_data.load_party_map(db_path)
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] load_party_map → %d entries, sample=%s",
len(party_map),
list(party_map.items())[:3],
)
def normalize_mp_name(name):
"""Normalize MP name for better matching between data sources."""
if not name:
return ""
name = name.strip()
if "," in name and ", " not in name:
name = name.replace(",", ", ")
return name
party_map = {normalize_mp_name(k): v for k, v in party_map.items()}
normalized_positions = {}
for window, positions in positions_by_window.items():
normalized_positions[window] = {
normalize_mp_name(k): v for k, v in positions.items()
}
positions_by_window = normalized_positions
all_mp_names = set()
for positions in positions_by_window.values():
all_mp_names.update(positions.keys())
matched_names = sum(1 for mp in all_mp_names if mp in party_map)
if all_mp_names:
logger.info(
f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)"
)
else:
logger.info("MP name matching: no MPs found in positions data")
if matched_names == 0 and len(all_mp_names) > 0:
logger.warning("No MP names matched between positions and party_map!")
logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}")
logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}")
windows = sorted(positions_by_window.keys())
centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
all_parties: set = set()
def _strip_paren(name: str) -> str:
return re.sub(r"\s*\([^)]*\)", "", name).strip()
for wid in windows:
pos = positions_by_window.get(wid, {})
per_party: Dict[str, List[Tuple[float, float]]] = {}
for mp_name, (x, y) in pos.items():
party = party_map.get(mp_name) or party_map.get(
_strip_paren(mp_name), "Unknown"
)
if party == "Unknown":
continue
per_party.setdefault(party, []).append((x, y))
for party, coords in per_party.items():
all_parties.add(party)
xs = [c[0] for c in coords]
ys = [c[1] for c in coords]
centroids.setdefault(party, {})[wid] = (
float(np.mean(xs)),
float(np.mean(ys)),
)
all_parties = sorted(
set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs)
- {None, "Unknown"}
)
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] all_parties (raw from party_map) → %d parties: %s",
len(all_parties),
all_parties[:10],
)
all_parties_sorted = sorted(all_parties)
if not all_parties_sorted:
st.info(
"Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat."
)
try:
st.caption(f"Bekende partijen in party_map: {len(party_map)}")
except Exception:
pass
default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties]
if not default_parties:
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
if not default_parties:
default_parties = all_parties_sorted[:6]
selected_parties = st.multiselect(
"Selecteer partijen",
options=all_parties_sorted,
default=default_parties,
)
def _ema_smooth(values: List[float], alpha: float) -> List[float]:
if not values or alpha >= 1.0:
return values
smoothed = [values[0]]
for v in values[1:]:
smoothed.append(alpha * v + (1 - alpha) * smoothed[-1])
return smoothed
smooth_alpha = 0.35
if not centroids:
st.info(
"Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
)
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
for wid in windows:
pos = positions_by_window.get(wid, {})
for mp_name, xy in pos.items():
try:
x, y = float(xy[0]), float(xy[1])
except Exception:
continue
mp_positions.setdefault(mp_name, {})[wid] = (x, y)
mp_positions = {
mp: pos
for mp, pos in mp_positions.items()
if len(pos) >= 2
and not all(np.isnan(x) and np.isnan(y) for x, y in pos.values())
}
if not mp_positions:
st.warning("Geen positiedata beschikbaar voor trajectplotten.")
_last_trajectories_diagnostics.update(
{
"stage": "no_mp_positions",
"mp_positions_count": 0,
}
)
try:
if get_debug_trajectories_enabled():
try:
st.text_area(
"Trajectories diagnostics",
json.dumps(_last_trajectories_diagnostics, default=str),
height=160,
)
except Exception:
pass
except Exception:
pass
return
st.session_state["_trajectory_mp_positions"] = mp_positions
mp_list = sorted(mp_positions.keys())
default_mps = mp_list[:6]
selected_mps = st.multiselect(
"Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps
)
fig = go.Figure()
trace_count = 0
for mp in selected_mps:
wids_sorted = sorted(mp_positions[mp].keys())
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
xs = _ema_smooth(xs_raw, smooth_alpha)
ys = _ema_smooth(ys_raw, smooth_alpha)
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=mp,
text=wids_sorted,
customdata=custom_raw,
line=dict(color="#888888", shape="spline", smoothing=1.3),
marker=dict(color="#888888", size=6),
hovertemplate=(
f"<b>{mp}</b><br>"
"venster: %{text}<br>"
"x (smoothed): %{x:.3f}<br>"
"x (raw): %{customdata[0]:.3f}<br>"
"y (smoothed): %{y:.3f}<br>"
"y (raw): %{customdata[1]:.3f}<extra></extra>"
),
)
)
trace_count += 1
_add_y_direction_annotations(fig)
if trace_count == 0:
st.info(
"Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data."
)
else:
st.plotly_chart(fig, use_container_width=True)
return
if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"):
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
for wid in windows:
pos = positions_by_window.get(wid, {})
for mp_name, (x, y) in pos.items():
mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y))
mp_list = sorted(mp_positions.keys())
if not mp_list:
st.info("Geen MP-positiegegevens beschikbaar om te tonen.")
return
sample_mps = mp_list[:6]
fig = go.Figure()
for mp in sample_mps:
wids_sorted = sorted(mp_positions[mp].keys())
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
xs = _ema_smooth(xs_raw, 0.35)
ys = _ema_smooth(ys_raw, 0.35)
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=mp,
text=wids_sorted,
customdata=custom_raw,
line=dict(color="#444444", shape="spline", smoothing=1.3),
marker=dict(color="#444444", size=6),
hovertemplate=(
f"<b>{mp}</b><br>"
"venster: %{text}<br>"
"x (smoothed): %{x:.3f}<br>"
"x (raw): %{customdata[0]:.3f}<br>"
"y (smoothed): %{y:.3f}<br>"
"y (raw): %{customdata[1]:.3f}<extra></extra>"
),
)
)
_add_y_direction_annotations(fig)
st.plotly_chart(fig, use_container_width=True)
return
try:
debug_checkbox = False
try:
debug_checkbox = st.checkbox(
"Enable trajectories diagnostics (show extra info)",
value=get_debug_trajectories_enabled(),
)
except Exception:
debug_checkbox = get_debug_trajectories_enabled()
if debug_checkbox:
try:
with st.expander(
"DEBUG: Trajectories data (showing diagnostics)", expanded=False
):
st.write("windows (count):", len(windows))
st.write("windows sample:", windows[:10])
st.write("party_map entries:", len(party_map))
st.write("parties with centroids:", len(all_parties_sorted))
st.write("default_parties:", default_parties)
st.write("selected_parties:", selected_parties)
st.write("min_mps setting:", 3)
sample = {
p: len(centroids.get(p, {}))
for p in list(all_parties_sorted)[:8]
}
st.write("sample centroid window counts per party:", sample)
except Exception:
pass
except Exception:
pass
smoothing_method = st.selectbox(
"Smoothing methode",
options=["EMA", "Spline", "None"],
index=0,
help="EMA = exponential moving average; Spline = low-degree polynomial spline fit; None = raw centroids",
)
smooth_alpha = 1.0
if smoothing_method == "EMA":
smooth_alpha = st.slider(
"Glad maken (EMA-\u03b1)",
min_value=0.1,
max_value=1.0,
value=0.35,
step=0.05,
help=(
"\u03b1=1.0 toont de ruwe data; lagere waarden maken de lijn gladder. "
"Standaard 0.35 voor een goed evenwicht tussen detail en ruis."
),
)
def _spline_smooth(values: List[float]) -> List[float]:
n = len(values)
if n <= 2:
return values
deg = min(3, n - 1)
try:
idx = np.arange(n, dtype=float)
coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg)
smooth = np.polyval(coeffs, idx)
return [float(v) for v in smooth]
except Exception:
return values
fig = go.Figure()
trace_count = 0
helper_succeeded = False
try:
fig2, trace_count2, banner_text = select_trajectory_plot_data(
positions_by_window, party_map, windows, selected_parties, smooth_alpha
)
if fig2 is not None:
fig = fig2
trace_count = trace_count2
helper_succeeded = True
if banner_text:
try:
st.caption(banner_text)
except Exception:
pass
try:
_last_trajectories_diagnostics.update({"banner_text": banner_text})
except Exception:
pass
except Exception as e:
tb = traceback.format_exc()
try:
select_trajectory_plot_data._last_diagnostics = {"exception": tb}
except Exception:
pass
try:
_last_trajectories_diagnostics.update(
{"stage": "select_helper_exception", "exception": tb}
)
except Exception:
pass
logger.exception("select_trajectory_plot_data failed")
debug_enabled = get_debug_trajectories_enabled()
if debug_enabled:
try:
st.text_area("select_trajectory_plot_data traceback", tb, height=240)
except Exception:
pass
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] helper_succeeded=%s", helper_succeeded
)
if not helper_succeeded:
for party in selected_parties:
if party not in centroids:
continue
wids_sorted = sorted(centroids[party].keys())
xs_raw = [centroids[party][w][0] for w in wids_sorted]
ys_raw = [centroids[party][w][1] for w in wids_sorted]
xs = _ema_smooth(xs_raw, smooth_alpha)
ys = _ema_smooth(ys_raw, smooth_alpha)
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
colour = PARTY_COLOURS.get(party, "#9E9E9E")
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=party,
text=wids_sorted,
customdata=custom_raw,
line=dict(color=colour, shape="spline", smoothing=1.3),
marker=dict(color=colour, size=8),
hovertemplate=(
f"<b>{party}</b><br>"
"venster: %{text}<br>"
"x (smoothed): %{x:.3f}<br>"
"x (raw): %{customdata[0]:.3f}<br>"
"y (smoothed): %{y:.3f}<br>"
"y (raw): %{customdata[1]:.3f}<extra></extra>"
),
)
)
trace_count += 1
_THRESHOLD = 0.65
x_conf_map = axis_def.get("x_label_confidence", {}) or {}
y_conf_map = axis_def.get("y_label_confidence", {}) or {}
def _mean_conf(m: dict) -> Optional[float]:
vals = [v for v in m.values() if v is not None]
if not vals:
return None
return float(sum(vals) / len(vals))
x_mean = _mean_conf(x_conf_map)
y_mean = _mean_conf(y_conf_map)
x_title = trajectory.choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD)
y_title = trajectory.choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD)
fig.update_layout(
title="Partij trajectories",
xaxis_title=x_title,
yaxis_title=y_title,
height=600,
legend_title_text="Partij",
)
_add_y_direction_annotations(fig)
try:
_last_trajectories_diagnostics.update({"trace_count": trace_count})
except Exception:
pass
debug_enabled = get_debug_trajectories_enabled()
if trace_count == 0:
_last_trajectories_diagnostics.update(
{
"stage": "zero_traces",
"positions_count": sum(len(pos) for pos in positions_by_window.values())
if positions_by_window
else 0,
"party_map_count": len(party_map) if party_map else 0,
"centroids_count": len(centroids) if centroids else 0,
"selected_parties_count": len(selected_parties)
if selected_parties
else 0,
"timestamp": datetime.now().isoformat(),
}
)
if positions_by_window and party_map and not centroids:
sample_mps = []
for window, positions in list(positions_by_window.items())[:1]:
sample_mps = list(positions.keys())[:5]
break
matched = sum(1 for mp in sample_mps if mp in party_map)
_last_trajectories_diagnostics["name_match_check"] = {
"sample_mps": sample_mps,
"matched_in_party_map": matched,
"sample_size": len(sample_mps),
}
if trace_count == 0:
st.info("📊 **Geen trajecten getekend**")
with st.expander("🔍 Diagnostische informatie"):
st.write("**Data status:**")
st.write(
f"- Positie vensters: {len(positions_by_window) if positions_by_window else 0}"
)
st.write(f"- Party mappings: {len(party_map) if party_map else 0}")
st.write(
f"- Geselecteerde partijen: {len(selected_parties) if selected_parties else 0}"
)
if "centroid_diagnostics" in locals():
st.write("**Centroid berekening:**")
st.write(
f"- Partijen met posities: {len(centroid_diagnostics.get('parties_with_positions', []))}"
)
st.write(
f"- Partijen met alleen NaN: {len(centroid_diagnostics.get('parties_all_nan', []))}"
)
st.write("\n**Mogelijke oorzaken:**")
st.write("1. Geen SVD vectoren berekend voor de geselecteerde vensters")
st.write("2. MP namen in posities komen niet overeen met party_map")
st.write("3. Alle geselecteerde partijen hebben te weinig MPs (< 5)")
if st.button("🔧 Database diagnostiek uitvoeren"):
with st.spinner("Bezig met diagnostiek..."):
from scripts.diagnose_trajectories_cli import (
run as diagnose_trajectories,
)
explorer.build_trajectories_tab(db_path, window_size) results = diagnose_trajectories(db_path)
st.json(results)
else:
try:
st.info(
f"[DEBUG] trace_count={trace_count}, fig data count={len(fig.data)}, layout title={fig.layout.title.text if fig.layout.title else 'none'}"
)
except Exception:
pass
try:
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] About to render plotly chart — trace_count=%d, banner=%s, fig has %d traces",
trace_count,
banner_text,
len(fig.data),
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Trajectories rendering failed: {e}")
if get_debug_trajectories_enabled():
try:
st.json(_last_trajectories_diagnostics)
except Exception:
st.text_area(
"Trajectories diagnostics (JSON failed)",
json.dumps(_last_trajectories_diagnostics, default=str),
height=240,
)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,170 @@
"""Automated pipeline scheduling.
Runs the parliamentary embedding pipeline and motion summarization
on a configurable schedule using the `schedule` library.
Usage:
uv run python scheduler.py # start scheduler loop
uv run python scheduler.py --once # run once and exit
uv run python scheduler.py --pipeline-time 03:00 --summarizer-every 6
"""
from __future__ import annotations
import argparse
import logging
import signal
import sys
import time
from typing import Callable
import schedule
from config import config
import argparse
from pipeline.run_pipeline import run as run_pipeline
from summarizer import summarizer
_logger = logging.getLogger(__name__)
class PipelineScheduler:
"""Schedules and runs pipeline jobs."""
def __init__(self, db_path: str = "data/motions.db"):
self.db_path = db_path
self._running = False
def run_pipeline(self) -> int:
"""Run the full embedding pipeline.
Returns the exit code from the pipeline run.
"""
_logger.info("Starting scheduled pipeline run")
try:
args = argparse.Namespace(
db_path=self.db_path,
window_size="annual",
start_date=None,
end_date=None,
svd_k=50,
svd_workers=None,
text_model=None,
text_batch_size=200,
skip_metadata=False,
skip_extract=False,
skip_svd=False,
skip_text=False,
skip_fusion=False,
dry_run=False,
)
result = run_pipeline(args)
_logger.info("Pipeline run completed with code %s", result)
return result if isinstance(result, int) else 0
except Exception:
_logger.exception("Pipeline run failed")
return 1
def run_summarizer(self) -> None:
"""Run motion summarization for missing explanations."""
_logger.info("Starting scheduled summarizer run")
try:
summarizer.update_motion_summaries()
_logger.info("Summarizer run completed")
except Exception:
_logger.exception("Summarizer run failed")
def schedule_daily(self, time_str: str = "02:00") -> None:
"""Schedule the pipeline to run daily at *time_str*."""
_logger.info("Scheduling daily pipeline run at %s", time_str)
schedule.every().day.at(time_str).do(self.run_pipeline)
def schedule_summarizer(self, every_n_hours: int = 6) -> None:
"""Schedule the summarizer to run every *every_n_hours* hours."""
_logger.info("Scheduling summarizer every %s hours", every_n_hours)
schedule.every(every_n_hours).hours.do(self.run_summarizer)
def _signal_handler(self, signum, frame) -> None:
"""Handle shutdown signals gracefully."""
_logger.info("Received signal %s, shutting down", signum)
self.stop()
def start(self) -> None:
"""Start the scheduler loop.
Blocks until :meth:`stop` is called or a signal is received.
"""
self._running = True
# Register signal handlers for graceful shutdown
signal.signal(signal.SIGTERM, self._signal_handler)
signal.signal(signal.SIGINT, self._signal_handler)
_logger.info("Scheduler started")
while self._running:
schedule.run_pending()
time.sleep(1)
_logger.info("Scheduler stopped")
def stop(self) -> None:
"""Stop the scheduler loop."""
self._running = False
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Automated pipeline scheduler",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--db-path",
default="data/motions.db",
help="Path to the DuckDB file",
)
parser.add_argument(
"--pipeline-time",
default="02:00",
help="Daily pipeline run time (HH:MM)",
)
parser.add_argument(
"--summarizer-every",
type=int,
default=6,
help="Run summarizer every N hours",
)
parser.add_argument(
"--once",
action="store_true",
help="Run pipeline + summarizer once and exit (no scheduling loop)",
)
return parser
def main(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s %(message)s",
)
sched = PipelineScheduler(db_path=args.db_path)
if args.once:
_logger.info("Running in single-shot mode")
pipeline_rc = sched.run_pipeline()
sched.run_summarizer()
return pipeline_rc
sched.schedule_daily(args.pipeline_time)
if args.summarizer_every > 0:
sched.schedule_summarizer(args.summarizer_every)
sched.start()
return 0
if __name__ == "__main__":
sys.exit(main())

@ -0,0 +1,95 @@
"""Tests for explorer.py decomposition (P3-001).
Acceptance criteria:
- explorer.py must be under 1500 lines.
- Tab modules must define their build functions locally (not re-export from explorer).
- No circular imports between explorer.py and analysis.tabs.
"""
import ast
import inspect
import pathlib
class TestExplorerDecomposition:
"""RED test: explorer.py must be under 1500 lines."""
def test_explorer_line_count_under_1500(self):
path = pathlib.Path("explorer.py")
lines = path.read_text(encoding="utf-8").splitlines()
assert len(lines) < 1500, (
f"explorer.py has {len(lines)} lines; target is < 1500. "
f"Extract tab functions and rendering helpers into analysis/tabs/."
)
def test_tab_modules_define_functions_locally(self):
"""Each tab module must define its build_*_tab without delegating to explorer."""
tabs = [
("analysis/tabs/compass.py", "build_compass_tab"),
("analysis/tabs/trajectories.py", "build_trajectories_tab"),
("analysis/tabs/search.py", "build_search_tab"),
("analysis/tabs/browser.py", "build_browser_tab"),
("analysis/tabs/components.py", "build_svd_components_tab"),
("analysis/tabs/quiz.py", "build_mp_quiz_tab"),
]
for module_path, func_name in tabs:
source = pathlib.Path(module_path).read_text(encoding="utf-8")
tree = ast.parse(source)
func_def = None
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == func_name:
func_def = node
break
assert func_def is not None, (
f"{module_path} must define {func_name}"
)
# Ensure it's not a one-liner stub that imports from explorer
body = func_def.body
assert len(body) > 3, (
f"{module_path}.{func_name} looks like a stub ({len(body)} lines). "
f"Extract the real implementation from explorer.py."
)
def test_rendering_helpers_extracted(self):
"""Rendering helpers should not live in explorer.py."""
helpers = [
"_render_scree_plot",
"_build_party_axis_figure",
"_render_party_axis_chart",
"_render_party_axis_chart_1d",
"_render_svd_time_trajectory",
"_render_voting_results",
"_add_y_direction_annotations",
]
source = pathlib.Path("explorer.py").read_text(encoding="utf-8")
tree = ast.parse(source)
defined = {node.name for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)}
for helper in helpers:
assert helper not in defined, (
f"{helper} should be extracted from explorer.py "
f"into analysis/tabs/_rendering.py"
)
def test_no_circular_import_tabs_to_explorer(self):
"""Tab modules must not import from explorer."""
tab_modules = [
"analysis/tabs/compass.py",
"analysis/tabs/trajectories.py",
"analysis/tabs/search.py",
"analysis/tabs/browser.py",
"analysis/tabs/components.py",
"analysis/tabs/quiz.py",
"analysis/tabs/_rendering.py",
]
for module_path in tab_modules:
if not pathlib.Path(module_path).exists():
continue
source = pathlib.Path(module_path).read_text(encoding="utf-8")
assert "from explorer import" not in source, (
f"{module_path} imports from explorer.py — "
f"move shared helpers to explorer_data.py or _rendering.py instead"
)
assert "import explorer" not in source, (
f"{module_path} imports explorer module — "
f"move shared helpers to explorer_data.py or _rendering.py instead"
)

@ -0,0 +1,159 @@
"""Tests for scheduler.py — automated pipeline scheduling.
TDD: write failing test, implement, refactor.
"""
from __future__ import annotations
import signal
from unittest.mock import MagicMock, patch
import pytest
class TestPipelineSchedulerInit:
def test_default_db_path(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler()
assert sched.db_path == "data/motions.db"
assert not sched._running
def test_custom_db_path(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler(db_path="/tmp/test.db")
assert sched.db_path == "/tmp/test.db"
class TestPipelineSchedulerRunPipeline:
def test_calls_pipeline_run_with_db_path(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler(db_path="/tmp/test.db")
with patch("scheduler.run_pipeline") as mock_run:
mock_run.return_value = 0
sched.run_pipeline()
mock_run.assert_called_once()
# Verify args contain db_path via Namespace
args = mock_run.call_args[0][0]
assert args.db_path == "/tmp/test.db"
def test_logs_error_on_pipeline_failure(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler()
with patch("scheduler.run_pipeline") as mock_run:
mock_run.side_effect = RuntimeError("pipeline failed")
with patch("scheduler._logger") as mock_logger:
result = sched.run_pipeline()
assert result == 1
mock_logger.exception.assert_called_once()
class TestPipelineSchedulerRunSummarizer:
def test_calls_summarizer_update(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler()
with patch("scheduler.summarizer") as mock_summarizer:
sched.run_summarizer()
mock_summarizer.update_motion_summaries.assert_called_once()
def test_logs_error_on_summarizer_failure(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler()
with patch("scheduler.summarizer") as mock_summarizer:
mock_summarizer.update_motion_summaries.side_effect = RuntimeError(
"summarizer failed"
)
with patch("scheduler._logger") as mock_logger:
sched.run_summarizer()
mock_logger.exception.assert_called_once()
class TestPipelineSchedulerSchedule:
def test_schedule_daily_adds_job(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler()
with patch("scheduler.schedule") as mock_schedule:
mock_job = MagicMock()
mock_schedule.every.return_value.day.at.return_value.do = mock_job
sched.schedule_daily("02:00")
mock_schedule.every.assert_called_once()
def test_schedule_summarizer_adds_job(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler()
with patch("scheduler.schedule") as mock_schedule:
mock_job = MagicMock()
mock_schedule.every.return_value.hour.do = mock_job
sched.schedule_summarizer(every_n_hours=6)
mock_schedule.every.assert_called_once()
class TestPipelineSchedulerLoop:
def test_start_runs_pending_jobs(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler()
call_count = 0
def _stop_after_first(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count >= 3:
sched.stop()
with patch("scheduler.schedule.run_pending") as mock_run_pending:
with patch("scheduler.time.sleep", side_effect=_stop_after_first):
with patch("scheduler.signal.signal"):
sched.start()
assert mock_run_pending.called
assert not sched._running
def test_stop_sets_running_false(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler()
sched._running = True
sched.stop()
assert not sched._running
def test_signal_handler_stops_scheduler(self):
from scheduler import PipelineScheduler
sched = PipelineScheduler()
sched._running = True
with patch.object(sched, "stop") as mock_stop:
sched._signal_handler(signal.SIGINT, None)
mock_stop.assert_called_once()
class TestSchedulerCLI:
def test_main_parses_args(self):
from scheduler import main
with patch("scheduler.PipelineScheduler") as mock_sched_class:
mock_sched = MagicMock()
mock_sched_class.return_value = mock_sched
rc = main(["--pipeline-time", "03:00"])
assert rc == 0
mock_sched_class.assert_called_once_with(db_path="data/motions.db")
mock_sched.schedule_daily.assert_called_once_with("03:00")
mock_sched.start.assert_called_once()
def test_main_custom_db_path(self):
from scheduler import main
with patch("scheduler.PipelineScheduler") as mock_sched_class:
mock_sched = MagicMock()
mock_sched.run_pipeline.return_value = 0
mock_sched_class.return_value = mock_sched
rc = main(["--db-path", "/tmp/test.db", "--once"])
assert rc == 0
mock_sched_class.assert_called_once_with(db_path="/tmp/test.db")
mock_sched.run_pipeline.assert_called_once()
Loading…
Cancel
Save