diff --git a/analysis/explorer_data.py b/analysis/explorer_data.py index 8100604..728faa9 100644 --- a/analysis/explorer_data.py +++ b/analysis/explorer_data.py @@ -23,6 +23,7 @@ from analysis.config import CURRENT_PARLIAMENT_PARTIES, _PARTY_NORMALIZE __all__ = [ "get_available_windows", "get_uniform_dim_windows", + "load_positions", "load_party_map", "load_active_mps", "load_mp_vectors_by_window", @@ -37,6 +38,9 @@ __all__ = [ "load_motions_df", "query_similar", "compute_party_axis_scores", + "get_aligned_party_scores", + "compute_party_discipline", + "_get_aligned_trajectory_scores", ] logger = logging.getLogger(__name__) @@ -567,3 +571,139 @@ def compute_party_axis_scores( except Exception: logger.exception("Failed to compute party axis scores") return {} + + +def load_positions( + db_path: str, window_size: str = "annual" +) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]: + """Compute 2D positions per window using PCA on aligned SVD vectors. + + Returns: + positions_by_window: {window_id: {entity_name: (x, y)}} + axis_def: dict with x_axis, y_axis, method keys + """ + from analysis.political_axis import compute_2d_axes + + all_available = get_uniform_dim_windows(db_path) + + if not all_available: + return {}, {} + + positions_by_window, axis_def = compute_2d_axes( + db_path, + window_ids=all_available, + method="pca", + pca_residual=True, + normalize_vectors=True, + ) + + try: + from analysis.axis_classifier import classify_axes + + axis_def = classify_axes(positions_by_window, axis_def, db_path) + except Exception: + logger.exception("classify_axes failed; using generic axis labels") + + if window_size == "annual": + annual_keys = set(w for w in all_available if "-Q" not in w) + positions_by_window = { + w: v for w, v in positions_by_window.items() if w in annual_keys + } + + return positions_by_window, axis_def + + +def get_aligned_party_scores( + db_path: str, window: str, active_mps: set | None = None +) -> Dict[str, np.ndarray]: + """Get party scores for all N components from aligned PCA positions. + + For current_parliament, pass active_mps to filter to only seated MPs + (matching the compass behaviour). Historical windows include all MPs. + """ + from analysis.political_axis import compute_nd_axes + + annual_windows = get_uniform_dim_windows(db_path) + scores_by_window, _ = compute_nd_axes( + db_path, window_ids=annual_windows, n_components=10 + ) + window_scores = scores_by_window.get(window, {}) + if not window_scores: + return {} + + if window == "current_parliament" and active_mps is not None: + window_scores = {mp: sc for mp, sc in window_scores.items() if mp in active_mps} + + _party_map = load_party_map(db_path) + + n_comps = 10 + party_scores_agg: Dict[str, List[np.ndarray]] = {} + for mp_name, scores in window_scores.items(): + party = _party_map.get( + mp_name, _party_map.get(mp_name.split("(")[0].strip(), None) + ) + if party: + party_scores_agg.setdefault(party, []).append(scores[:n_comps]) + + return { + party: np.mean(np.vstack(score_list), axis=0) + for party, score_list in party_scores_agg.items() + if score_list + } + + +def compute_party_discipline( + db_path: str, + start_date: str, + end_date: str, +) -> pd.DataFrame: + """Compute per-party voting discipline (Rice index) for roll-call votes in a date range. + + Only individual MP vote rows are used (mp_name LIKE '%,%'). + Returns a DataFrame with columns [party, n_motions, discipline] sorted by discipline ascending. + Returns an empty DataFrame if fewer than 1 qualifying motion exists or on any DB error. + """ + from analysis import trajectory + + return trajectory.compute_party_discipline(db_path, start_date, end_date) + + +def _get_aligned_trajectory_scores( + db_path: str, windows: List[str], n_components: int = 10 +) -> Dict[str, Dict[str, List[float]]]: + """Get aligned PCA scores for all windows as {window: {party: [scores per component]}}. + + Uses compute_nd_axes to get PCA-projected, flip-corrected scores across all windows, + ensuring consistency with the single-window SVD components view. + """ + from analysis.political_axis import compute_nd_axes + + scores_by_window, _ = compute_nd_axes( + db_path, window_ids=windows, n_components=n_components + ) + if not scores_by_window: + return {} + + party_map = load_party_map(db_path) + + result: Dict[str, Dict[str, List[float]]] = {} + for window in windows: + window_scores = scores_by_window.get(window, {}) + if not window_scores: + continue + + party_vecs: Dict[str, List[np.ndarray]] = {} + for mp_name, scores in window_scores.items(): + party = party_map.get( + mp_name, party_map.get(mp_name.split("(")[0].strip(), None) + ) + if party: + party_vecs.setdefault(party, []).append(scores[:n_components]) + + result[window] = { + party: np.mean(np.vstack(score_list), axis=0).tolist() + for party, score_list in party_vecs.items() + if score_list + } + + return result diff --git a/analysis/tabs/_rendering.py b/analysis/tabs/_rendering.py new file mode 100644 index 0000000..131a63b --- /dev/null +++ b/analysis/tabs/_rendering.py @@ -0,0 +1,796 @@ +"""Rendering helpers for explorer tabs. + +This module contains all Plotly/Streamlit rendering functions extracted from +explorer.py. It is import-safe: plotly and streamlit are optional. +""" + +from __future__ import annotations + +import json +import logging +from typing import Dict, List, Optional, Tuple + +try: + import plotly.express as px + import plotly.graph_objects as go +except Exception: + px = None + import types + + class _DummyTrace: + def __init__(self, **kwargs): + self.name = kwargs.get("name") + self.x = kwargs.get("x") + self.y = kwargs.get("y") + self.text = kwargs.get("text") + self.customdata = kwargs.get("customdata") + + class _DummyFigure: + def __init__(self): + self.data = [] + + def add_trace(self, trace): + if isinstance(trace, _DummyTrace): + self.data.append(trace) + else: + try: + name = getattr(trace, "name", None) + x = getattr(trace, "x", None) + y = getattr(trace, "y", None) + text = getattr(trace, "text", None) + customdata = getattr(trace, "customdata", None) + except Exception: + name = trace.get("name") if hasattr(trace, "get") else None + x = trace.get("x") if hasattr(trace, "get") else None + y = trace.get("y") if hasattr(trace, "get") else None + text = trace.get("text") if hasattr(trace, "get") else None + customdata = ( + trace.get("customdata") if hasattr(trace, "get") else None + ) + self.data.append( + _DummyTrace(name=name, x=x, y=y, text=text, customdata=customdata) + ) + + def add_annotation(self, *args, **kwargs): + return None + + def update_layout(self, **kwargs): + return None + + def update_traces(self, **kwargs): + return None + + def add_hline(self, **kwargs): + return None + + go = types.SimpleNamespace( + Figure=_DummyFigure, + Scatter=lambda **kwargs: _DummyTrace(**kwargs), + Bar=lambda **kwargs: _DummyTrace(**kwargs), + ) + +try: + import streamlit as st +except Exception: + + class _DummySt: + def cache_data(self, *args, **kwargs): + def _decorator(func): + return func + + return _decorator + + def markdown(self, *args, **kwargs): + return None + + def subheader(self, *args, **kwargs): + return None + + def plotly_chart(self, *args, **kwargs): + return None + + def caption(self, *args, **kwargs): + return None + + def text_area(self, *args, **kwargs): + return None + + def json(self, *args, **kwargs): + return None + + def checkbox(self, *args, **kwargs): + return kwargs.get("value", False) + + def warning(self, *args, **kwargs): + return None + + def info(self, *args, **kwargs): + return None + + def error(self, *args, **kwargs): + return None + + def success(self, *args, **kwargs): + return None + + def selectbox(self, *args, **kwargs): + opts = ( + kwargs.get("options") + if kwargs.get("options") is not None + else (args[1] if len(args) > 1 else []) + ) + return opts[0] if opts else None + + def multiselect(self, *args, **kwargs): + opts = ( + kwargs.get("options") + if kwargs.get("options") is not None + else (args[1] if len(args) > 1 else []) + ) + default = kwargs.get("default") + if default is not None: + return default + return opts[:6] if opts else [] + + def number_input(self, *args, **kwargs): + return kwargs.get("value") if "value" in kwargs else 1 + + def slider(self, *args, **kwargs): + return kwargs.get("value") if "value" in kwargs else 0.35 + + def select_slider(self, *args, **kwargs): + return kwargs.get("value") if "value" in kwargs else (None, None) + + def expander(self, *args, **kwargs): + class _Ctx: + def __enter__(self_inner): + return self_inner + + def __exit__(self_inner, exc_type, exc, tb): + return False + + return _Ctx() + + def columns(self, *args, **kwargs): + class _Col: + def markdown(self, *a, **k): + return None + + def metric(self, *a, **k): + return None + + def dataframe(self, *a, **k): + return None + + def write(self, *a, **k): + return None + + def text_input(self, *a, **k): + return None + + n = len(args[0]) if args else 1 + return tuple(_Col() for _ in range(n)) + + def form(self, *args, **kwargs): + class _Ctx: + def __enter__(self_inner): + return self_inner + + def __exit__(self_inner, exc_type, exc, tb): + return False + + return _Ctx() + + def form_submit_button(self, *args, **kwargs): + return False + + def button(self, *args, **kwargs): + return False + + def rerun(self, *args, **kwargs): + return None + + def divider(self, *args, **kwargs): + return None + + def spinner(self, *args, **kwargs): + class _Ctx: + def __enter__(self_inner): + return self_inner + + def __exit__(self_inner, exc_type, exc, tb): + return False + + return _Ctx() + + def write(self, *args, **kwargs): + return None + + def dataframe(self, *args, **kwargs): + return None + + def set_page_config(self, *args, **kwargs): + return None + + def title(self, *args, **kwargs): + return None + + def sidebar(self, *args, **kwargs): + return self + + def radio(self, *args, **kwargs): + return kwargs.get("value") if "value" in kwargs else None + + def text_input(self, *args, **kwargs): + return kwargs.get("value", "") + + def tabs(self, *args, **kwargs): + n = len(args[0]) if args else 1 + return [self for _ in range(n)] + + @property + def session_state(self): + if not hasattr(self, "_session_state"): + self._session_state = {} + return self._session_state + + st = _DummySt() + +from analysis.config import PARTY_COLOURS + +logger = logging.getLogger(__name__) + + +def _render_scree_plot(importances: List[float], n_show: int = 15) -> None: + """Render a scree plot showing relative SVD component importance. + + Highlighted bars for the top-2 components (used in the compass); muted bars + for the rest. A cumulative-variance dashed line on the same y-axis helps + spot the elbow. A 50 % cumulative threshold line is drawn for reference. + + Args: + importances: List of importance values sorted descending (from load_scree_data). + n_show: How many components to display (default: first 15). + """ + if not importances: + return + data = list(importances[:n_show]) + ranks = list(range(1, len(data) + 1)) + + cumsum = [] + running = 0.0 + for v in data: + running += v + cumsum.append(running) + + n_highlight = 2 + bar_colours = [ + "#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data)) + ] + + fig = go.Figure() + + fig.add_trace( + go.Bar( + x=ranks, + y=data, + marker_color=bar_colours, + hovertemplate="As %{x}
%{y:.1f}% verklaarde variantie", + showlegend=False, + ) + ) + + fig.add_trace( + go.Scatter( + x=ranks, + y=cumsum, + mode="lines+markers", + line={"color": "#F57C00", "width": 2, "dash": "dot"}, + marker={"size": 5, "color": "#F57C00"}, + hovertemplate="As %{x}
Cumulatief: %{y:.1f}%", + name="Cumulatief", + showlegend=True, + ) + ) + + fig.add_hline( + y=50, + line_dash="dash", + line_color="#BDBDBD", + line_width=1, + annotation_text="50%", + annotation_position="right", + annotation_font_color="#9E9E9E", + annotation_font_size=11, + ) + + for i in range(min(n_highlight, len(data))): + fig.add_annotation( + x=ranks[i], + y=data[i] + 0.3, + text=f"{data[i]:.1f}%", + showarrow=False, + font={"size": 11, "color": "#1565C0"}, + yanchor="bottom", + ) + + fig.update_layout( + height=280, + margin={"l": 10, "r": 50, "t": 30, "b": 40}, + title={ + "text": "Belang per SVD-as", + "font": {"size": 13, "color": "#555555"}, + "x": 0.02, + "xanchor": "left", + }, + legend={ + "orientation": "h", + "x": 0.5, + "xanchor": "center", + "y": 1.08, + "font": {"size": 11}, + }, + xaxis={ + "title": {"text": "As (rang)", "font": {"size": 11}}, + "tickmode": "linear", + "tick0": 1, + "dtick": 1, + "showline": False, + "showgrid": False, + }, + yaxis={ + "title": {"text": "% van totale variantie", "font": {"size": 11}}, + "showline": False, + "showgrid": True, + "gridcolor": "#eeeeee", + "ticksuffix": "%", + "range": [0, max(cumsum) * 1.08], + }, + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + bargap=0.25, + ) + st.plotly_chart(fig, use_container_width=True) + + +def _build_party_axis_figure( + party_coords: Dict[str, Tuple[float, float]], + comp_sel: int, + theme: dict, + bootstrap_data: Optional[Dict[str, Dict]] = None, +) -> Optional[go.Figure]: + """Build a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`. + + Accepts explicit per-party 2D coordinates (x,y) and uses the component selection to + pick the value (comp_sel==1 -> x, comp_sel==2 -> y). This makes the API explicit and + avoids indexing into long SVD vectors. + + Returns go.Figure or None if no data available. + """ + if not party_coords: + return None + + if comp_sel not in (1, 2): + raise ValueError( + "_build_party_axis_figure only supports comp_sel 1 or 2 when using explicit coords" + ) + + axis_idx = comp_sel - 1 + flip = theme.get("flip", False) + + parties = [] + scores = [] + colours = [] + + for party, val in party_coords.items(): + try: + if hasattr(val, "__len__") and len(val) == 2: + x, y = val + score = float(x if axis_idx == 0 else y) + else: + score = float(val[axis_idx]) + + if flip: + score = -score + except Exception: + continue + + parties.append(party) + scores.append(score) + colours.append(PARTY_COLOURS.get(party, "#9E9E9E")) + + if not scores: + return None + + hover = [] + symbols = [] + if bootstrap_data: + for p, s in zip(parties, scores): + bd = bootstrap_data.get(p) + if bd: + n_mps = bd.get("n_mps", "?") + ci_low = None + ci_high = None + try: + ci_low = float(bd["ci_lower"][axis_idx]) + ci_high = float(bd["ci_upper"][axis_idx]) + except Exception: + pass + if ci_low is not None and ci_high is not None: + hover.append( + f"{p}: {s:.3f} (N={n_mps}, 95%-BI: [{ci_low:.3f}, {ci_high:.3f}])" + ) + else: + hover.append(f"{p}: {s:.3f} (N={n_mps})") + symbols.append("diamond" if n_mps == 1 else "circle") + else: + hover.append(f"{p}: {s:.3f}") + symbols.append("circle") + marker_kwargs = {"size": 14, "color": colours, "symbol": symbols} + else: + hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)] + marker_kwargs = {"size": 14, "color": colours} + + fig = go.Figure() + x_min, x_max = min(scores) * 1.15, max(scores) * 1.15 + if x_min == x_max: + x_min, x_max = x_min - 1, x_max + 1 + fig.add_trace( + go.Scatter( + x=[x_min, x_max], + y=[0, 0], + mode="lines", + line={"color": "#cccccc", "width": 1}, + hoverinfo="skip", + showlegend=False, + ) + ) + + scatter_kwargs = { + "x": scores, + "y": [0] * len(scores), + "mode": "markers+text", + "text": parties, + "textposition": "top center", + "marker": marker_kwargs, + "hovertext": hover, + "hoverinfo": "text", + "showlegend": False, + } + fig.add_trace(go.Scatter(**scatter_kwargs)) + + pos_pole = theme.get("positive_pole", "") + neg_pole = theme.get("negative_pole", "") + left_label = neg_pole + right_label = pos_pole + + fig.update_layout( + height=160, + margin={"l": 10, "r": 10, "t": 10, "b": 30}, + xaxis={ + "title": f"← {left_label} | {right_label} →", + "showticklabels": False, + "showline": False, + "showgrid": False, + "zeroline": False, + }, + yaxis={"visible": False, "range": [-1, 2]}, + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + ) + return fig + + +def _render_party_axis_chart( + party_coords: Dict[str, Tuple[float, float]], + comp_sel: int, + theme: dict, + bootstrap_data: Optional[Dict[str, Dict]] = None, +) -> None: + """Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`. + + Expects explicit per-party coords mapping (party -> (x,y)) for components 1 & 2. + """ + fig = _build_party_axis_figure(party_coords, comp_sel, theme, bootstrap_data) + if fig is None: + st.caption("_Partijdata niet beschikbaar voor deze as._") + return + st.plotly_chart(fig, use_container_width=True) + + +def _render_party_axis_chart_1d( + party_coords: Dict[str, Tuple[float, ...]], + comp_sel: int, + theme: dict, +) -> None: + """Render a 1D horizontal scatter of party positions on SVD component `comp_sel`. + + Uses the same format as components 1-2: parties as markers on a horizontal line + with axis title showing poles with arrows. + + Args: + party_coords: Dict mapping party name to tuple of scores (score_for_comp,) + comp_sel: SVD component number (1-indexed) + theme: Dict with label, positive_pole, negative_pole, flip + """ + if not party_coords: + st.caption("_Partijdata niet beschikbaar voor deze as._") + return + + parties = [] + scores = [] + colours = [] + + for party, coords in party_coords.items(): + try: + score = float(coords[0]) + parties.append(party) + scores.append(score) + colours.append(PARTY_COLOURS.get(party, "#9E9E9E")) + except Exception: + continue + + if not scores: + st.caption("_Partijdata niet beschikbaar voor deze as._") + return + + flip = theme.get("flip", False) + if flip: + scores = [-s for s in scores] + + hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)] + + fig = go.Figure() + x_min, x_max = min(scores) * 1.15, max(scores) * 1.15 + if x_min == x_max: + x_min, x_max = x_min - 1, x_max + 1 + + fig.add_trace( + go.Scatter( + x=[x_min, x_max], + y=[0, 0], + mode="lines", + line={"color": "#cccccc", "width": 1}, + hoverinfo="skip", + showlegend=False, + ) + ) + + fig.add_trace( + go.Scatter( + x=scores, + y=[0] * len(scores), + mode="markers+text", + text=parties, + textposition="top center", + marker={"size": 14, "color": colours}, + hovertext=hover, + hoverinfo="text", + showlegend=False, + ) + ) + + pos_pole = theme.get("positive_pole", "") + neg_pole = theme.get("negative_pole", "") + left_label = neg_pole + right_label = pos_pole + + fig.update_layout( + height=160, + margin={"l": 10, "r": 10, "t": 10, "b": 30}, + xaxis={ + "title": f"← {left_label} | {right_label} →", + "showticklabels": False, + "showline": False, + "showgrid": False, + "zeroline": False, + }, + yaxis={"visible": False, "range": [-1, 2]}, + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + ) + + st.plotly_chart(fig, use_container_width=True) + + +def _render_svd_time_trajectory( + party_scores_by_window: Dict[str, Dict[str, List[float]]], + comp_sel: int, + theme: dict, + selected_parties: List[str], +) -> None: + """Render a time trajectory plot showing party positions over time on an SVD component. + + Args: + party_scores_by_window: {window_id: {party_name: [scores]}} + comp_sel: SVD component number (1-indexed) + theme: Theme dict with label, positive_pole, negative_pole, flip + selected_parties: List of party names to display + """ + if not party_scores_by_window or not selected_parties: + st.caption("_Geen data beschikbaar voor tijdtraject._") + return + + idx = comp_sel - 1 + + party_trajectories: Dict[str, List[Tuple[str, float]]] = {} + + all_windows = list(party_scores_by_window.keys()) + sorted_windows = [] + if "current_parliament" in all_windows: + sorted_windows.append("current_parliament") + other_windows = sorted( + [w for w in all_windows if w != "current_parliament"], reverse=True + ) + sorted_windows.extend(other_windows) + + for window in sorted_windows: + scores_by_party = party_scores_by_window.get(window, {}) + for party in selected_parties: + scores = scores_by_party.get(party, []) + if scores and len(scores) > idx: + try: + score = float(scores[idx]) + party_trajectories.setdefault(party, []).append((window, score)) + except (ValueError, TypeError): + continue + + if not party_trajectories: + st.caption("_Geen data beschikbaar voor geselecteerde partijen._") + return + + fig = go.Figure() + + all_scores = [] + for traj in party_trajectories.values(): + all_scores.extend([s for _, s in traj]) + + if not all_scores: + st.caption("_Geen scores beschikbaar._") + return + + x_min, x_max = min(all_scores) * 1.15, max(all_scores) * 1.15 + if x_min == x_max: + x_min, x_max = x_min - 1, x_max + 1 + + window_to_y = {w: i for i, w in enumerate(sorted_windows)} + + for window in sorted_windows: + y_pos = window_to_y[window] + fig.add_trace( + go.Scatter( + x=[x_min, x_max], + y=[y_pos, y_pos], + mode="lines", + line={"color": "#cccccc", "width": 1}, + hoverinfo="skip", + showlegend=False, + ) + ) + + for party in selected_parties: + if party not in party_trajectories: + continue + + traj = party_trajectories[party] + if len(traj) < 1: + continue + + x_vals = [score for _, score in traj] + y_vals = [window_to_y[window] for window, _ in traj] + color = PARTY_COLOURS.get(party, "#9E9E9E") + + fig.add_trace( + go.Scatter( + x=x_vals, + y=y_vals, + mode="lines", + line={"color": color, "width": 2}, + hoverinfo="skip", + showlegend=False, + ) + ) + + hover_texts = [f"{party}
{window}: {score:.3f}" for window, score in traj] + fig.add_trace( + go.Scatter( + x=x_vals, + y=y_vals, + mode="markers+text", + text=[party] * len(traj), + textposition="top center", + marker={"size": 12, "color": color}, + hovertext=hover_texts, + hoverinfo="text", + showlegend=False, + ) + ) + + pos_pole = theme.get("positive_pole", "") + neg_pole = theme.get("negative_pole", "") + left_label = neg_pole + right_label = pos_pole + + y_labels = {} + for window in sorted_windows: + if window == "current_parliament": + y_labels[window_to_y[window]] = "Huidig" + else: + y_labels[window_to_y[window]] = window + + fig.update_layout( + height=max(400, len(sorted_windows) * 60 + 100), + margin={"l": 80, "r": 10, "t": 10, "b": 30}, + xaxis={ + "title": f"← {left_label} | {right_label} →", + "range": [x_min, x_max], + "showticklabels": False, + "showline": False, + "showgrid": True, + "gridcolor": "rgba(0,0,0,0.1)", + "zeroline": True, + "zerolinecolor": "rgba(0,0,0,0.2)", + }, + yaxis={ + "tickvals": list(y_labels.keys()), + "ticktext": list(y_labels.values()), + "tickmode": "array", + "autorange": "reversed", + "showgrid": False, + }, + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + ) + + st.plotly_chart(fig, use_container_width=True) + + +def _render_voting_results(voting_results_json) -> None: + """Render a voting_results JSON blob as a grouped voor/tegen/onthouden table. + + The JSON is stored as {party_or_mp: vote} where vote is one of + 'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability. + """ + if not voting_results_json: + return + try: + vdata = ( + json.loads(voting_results_json) + if isinstance(voting_results_json, str) + else voting_results_json + ) + if not isinstance(vdata, dict) or not vdata: + return + by_vote: Dict[str, List[str]] = {} + for actor, vote in vdata.items(): + vote_str = str(vote).lower().strip() + by_vote.setdefault(vote_str, []).append(str(actor)) + vote_order = ["voor", "tegen", "onthouden", "afwezig"] + vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "⬜"} + rows_shown = False + for v in vote_order + [k for k in by_vote if k not in vote_order]: + actors = by_vote.get(v) + if not actors: + continue + emoji = vote_emoji.get(v, "▪️") + st.markdown( + f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}" + ) + rows_shown = True + if not rows_shown: + st.caption("_Geen stemuitslag beschikbaar_") + except Exception: + pass + + +def _add_y_direction_annotations(fig: go.Figure) -> None: + """Add ▲ Progressief / ▼ Conservatief labels above and below the Y axis.""" + common = dict( + xref="paper", + yref="paper", + x=-0.07, + showarrow=False, + font=dict(size=11, color="#666666"), + ) + fig.add_annotation(**common, y=1.02, text="▲ Progressief", xanchor="center") + fig.add_annotation(**common, y=-0.06, text="▼ Conservatief", xanchor="center") diff --git a/analysis/tabs/browser.py b/analysis/tabs/browser.py index e240fd0..91fe8b6 100644 --- a/analysis/tabs/browser.py +++ b/analysis/tabs/browser.py @@ -1,18 +1,95 @@ -"""Browser tab for the parliamentary explorer. - -This module will contain the browser tab implementation. -Currently: Tab logic remains in explorer.py pending Streamlit decoupling. -""" +"""Browser tab for the parliamentary explorer.""" from __future__ import annotations +import pandas as pd + +import analysis.explorer_data as explorer_data +from analysis.tabs._rendering import _render_voting_results, st + def build_browser_tab(db_path: str, show_rejected: bool) -> None: - """Build the Motie Browser tab. + """Build the Motie Browser tab.""" + st.subheader("Motie Browser") + + df = explorer_data.load_motions_df(db_path) + if df.empty: + st.warning("Geen moties beschikbaar.") + return + + if not show_rejected: + df = df[df["title"].fillna("").str.strip() != "Verworpen."] + + col1, col2, col3 = st.columns(3) + with col1: + years = sorted(df["year"].dropna().astype(int).unique().tolist()) + year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years]) + with col2: + min_controversy_b = st.slider( + "Min. controverse", + min_value=0.0, + max_value=1.0, + value=0.0, + step=0.05, + key="browser_controversy", + ) + with col3: + sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"]) + + working = df.copy() + if year_filter != "(Alle)": + working = working[working["year"] == int(year_filter)] + if min_controversy_b > 0: + working = working[working["controversy_score"] >= min_controversy_b] + + sort_map = { + "Datum (nieuw)": ("date", False), + "Controverse": ("controversy_score", False), + "Marge": ("winning_margin", True), + } + sort_col, sort_asc = sort_map[sort_by] + working = working.sort_values(by=sort_col, ascending=sort_asc) + + display_cols = ["id", "title", "date", "controversy_score", "winning_margin"] + available_display = [c for c in display_cols if c in working.columns] + st.dataframe( + working[available_display].reset_index(drop=True), + use_container_width=True, + height=350, + ) + + st.divider() + + st.markdown("**Detail weergave** — vul een motie-ID in:") + sel_id = st.number_input( + "Motie ID", + min_value=int(working["id"].min()) if not working.empty else 1, + max_value=int(working["id"].max()) if not working.empty else 99999, + value=int(working["id"].iloc[0]) if not working.empty else 1, + step=1, + ) + motion_row = df[df["id"] == sel_id] + if not motion_row.empty: + row = motion_row.iloc[0] + st.markdown(f"### {row.get('title') or 'Onbekend'}") + date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?" + st.caption( + f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}" + ) + + url = row.get("url") + if url and str(url).startswith("http"): + st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})") - Currently delegates to explorer.py implementation. - Will be extracted when rendering logic is decoupled from Streamlit. - """ - import explorer + st.markdown("**Stemuitslag:**") + _render_voting_results(row.get("voting_results")) - explorer.build_browser_tab(db_path, show_rejected) + sim = explorer_data.query_similar(db_path, int(sel_id), top_k=10) + if not sim.empty: + st.markdown("**Vergelijkbare moties:**") + st.dataframe( + sim[["title", "score", "date", "policy_area"]], + use_container_width=True, + ) + else: + st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_") diff --git a/analysis/tabs/compass.py b/analysis/tabs/compass.py index 3ca9199..66c5a16 100644 --- a/analysis/tabs/compass.py +++ b/analysis/tabs/compass.py @@ -1,20 +1,200 @@ -"""Compass tab for the parliamentary explorer. - -This module will contain the compass tab implementation. -Currently: Tab logic remains in explorer.py pending Streamlit decoupling. -""" +"""Compass tab for the parliamentary explorer.""" from __future__ import annotations -from typing import List +import datetime as _dt +import re +from typing import Dict, Tuple + +import numpy as np +import pandas as pd + +from analysis import config +import analysis.explorer_data as explorer_data +from analysis.tabs._rendering import px, st + +PARTY_COLOURS = config.PARTY_COLOURS def build_compass_tab(db_path: str, window_size: str) -> None: - """Build the Politiek Kompas tab. + """Build the Politiek Kompas tab.""" + st.subheader("Politiek Kompas") + st.markdown( + "2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)." + ) + + # Compass always uses annual windows regardless of the sidebar window_size setting. + positions_by_window, axis_def = explorer_data.load_positions(db_path, "annual") + if axis_def is None: + axis_def = {} + if not positions_by_window: + st.warning( + "Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid." + ) + return + + party_map = explorer_data.load_party_map(db_path) + active_mps = explorer_data.load_active_mps(db_path) + + _current_year = str(_dt.date.today().year) + year_windows = sorted( + w + for w in positions_by_window + if w != "current_parliament" and w != _current_year + ) + has_current = "current_parliament" in positions_by_window + windows = year_windows + (["current_parliament"] if has_current else []) + + _SPARSE_YEARS = {"2016", "2017", "2018"} + _THRESHOLD = 0.65 + + def _window_label(w: str) -> str: + if w == "current_parliament": + return "Huidig parlement" + if w in _SPARSE_YEARS: + return f"{w} ⚠️" + return w + + col1, col2 = st.columns([3, 1]) + with col2: + window_idx = st.selectbox( + "Jaar", + options=windows, + index=len(windows) - 1, + format_func=_window_label, + ) + level = st.radio( + "Weergave", + options=["Kamerleden", "Partijen"], + index=0, + horizontal=True, + ) + min_mps = st.number_input( + "Min. Kamerleden per partij", + min_value=1, + max_value=20, + value=3, + step=1, + help="Partijen met minder dan dit aantal zetels worden niet weergegeven.", + ) + + pos = positions_by_window.get(window_idx, {}) + if not pos: + st.info(f"Geen data voor venster {window_idx}") + return + + if window_idx == "current_parliament": + pos = {mp: xy for mp, xy in pos.items() if mp in active_mps} + + def _strip_paren(name: str) -> str: + return re.sub(r"\s*\([^)]*\)", "", name).strip() + + deduped: Dict[str, Tuple[float, float]] = {} + for name, (x, y) in pos.items(): + base = _strip_paren(name) + if base in deduped: + ox, oy = deduped[base] + deduped[base] = ((ox + x) / 2, (oy + y) / 2) + else: + deduped[base] = (x, y) + pos = deduped + + rows = [] + for name, (x, y) in pos.items(): + party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown") + rows.append({"name": name, "x": x, "y": y, "party": party}) + + df_pos = pd.DataFrame(rows) + + party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts() + valid_parties = set(party_counts[party_counts >= min_mps].index) + df_pos = df_pos[df_pos["party"].isin(valid_parties)] + + if df_pos.empty: + st.info("Geen partijen met genoeg Kamerleden voor dit venster.") + return + + _raw_x = axis_def.get("x_label") + _raw_y = axis_def.get("y_label") + + try: + from analysis.axis_classifier import display_label_for_modal + + _x_label = display_label_for_modal(_raw_x, "x") + _y_label = display_label_for_modal(_raw_y, "y") + except Exception: + from analysis.svd_labels import get_fallback_labels + + _x_fallback, _y_fallback = get_fallback_labels() + _x_label = _raw_x or _x_fallback + _y_label = _raw_y or _y_fallback + + if level == "Partijen": + df_party = df_pos.groupby("party", as_index=False).agg( + x=("x", "mean"), y=("y", "mean"), n=("name", "count") + ) + df_party["name"] = df_party["party"] + colour_map = { + p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique() + } + fig = px.scatter( + df_party, + x="x", + y="y", + color="party", + text="party", + hover_name="party", + hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True}, + color_discrete_map=colour_map, + title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)", + labels={ + "x": _x_label, + "y": _y_label, + "n": "Kamerleden", + }, + ) + fig.update_traces(textposition="top center", marker_size=14) + else: + colour_map = { + p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique() + } + fig = px.scatter( + df_pos, + x="x", + y="y", + color="party", + hover_name="name", + hover_data={"party": True, "x": ":.3f", "y": ":.3f"}, + color_discrete_map=colour_map, + title=f"Politiek Kompas — {_window_label(window_idx)}", + labels={"x": _x_label, "y": _y_label}, + ) - Currently delegates to explorer.py implementation. - Will be extracted when rendering logic is decoupled from Streamlit. - """ - import explorer + fig.update_layout( + height=600, + legend_title_text="Partij", + xaxis={"range": [-1, 1]}, + yaxis={"range": [-0.6, 0.6]}, + ) + with col1: + st.plotly_chart(fig, use_container_width=True) + _x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "") + if ( + _x_interp + and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD + ): + st.caption(_x_interp) - explorer.build_compass_tab(db_path, window_size) + # Voting discipline analysis + st.markdown("---") + st.markdown( + "**Stemdiscipline analyse:** De Rice-index meet hoe eensgezind partijen stemmen " + "tijdens hoofdelijke stemmingen. Een score van 100% betekent dat alle MPs van " + "een partij hetzelfde stemden; 50% wijst op een gelijke splitsing binnen de partij. " + "Partijen met hoge discipline (>95%) zoals PVV en SGP stemmen als een blok, wat " + "wijst op sterke partijdiscipline en homogene membership. Lagere discipline (<85%) " + "bij partijen als PvdA of SP kan duiden op interne factiestrijd, gewetensvragen " + "bij ethische thema's, of een brede ideologische koers die ruimte laat voor " + "afwijkende meningen. De discipline varieert ook per onderwerp — ethische kwesties " + "tonen vaak meer interne verschillen dan economische thema's." + ) diff --git a/analysis/tabs/components.py b/analysis/tabs/components.py index 8dc806f..89e94bd 100644 --- a/analysis/tabs/components.py +++ b/analysis/tabs/components.py @@ -1,18 +1,374 @@ -"""SVD Components tab for the parliamentary explorer. - -This module will contain the SVD components tab implementation. -Currently: Tab logic remains in explorer.py pending Streamlit decoupling. -""" +"""SVD Components tab for the parliamentary explorer.""" from __future__ import annotations +import datetime as _dt +import logging +import os +from typing import Dict, List, Tuple + +import numpy as np + +from analysis import config +import analysis.explorer_data as explorer_data +from analysis.tabs._rendering import ( + _render_party_axis_chart_1d, + _render_scree_plot, + _render_svd_time_trajectory, + _render_voting_results, + st, +) + +try: + import duckdb +except Exception: + duckdb = None # type: ignore + +SVD_THEMES = config.SVD_THEMES +KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES + +logger = logging.getLogger(__name__) + def build_svd_components_tab(db_path: str) -> None: - """Build the SVD Components tab. + """New tab: show top motions contributing to top SVD components. - Currently delegates to explorer.py implementation. - Will be extracted when rendering logic is decoupled from Streamlit. + Reads thoughts/explorer/top_svd_top_motions.json and displays a selector + for components 1..10 with theme labels/explanations and a detail pane per motion. + + Components 1-2 use aligned PCA positions (consistent with compass). + Components 3-10 use raw SVD scores. """ - import explorer + st.subheader("🔬 SVD Assen — politieke polarisatiethema's") + st.markdown( + "Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen " + "van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren " + "het spanningsveld dat de as beschrijft." + ) + + scree_importances = explorer_data.load_scree_data(db_path) + if scree_importances: + st.markdown( + "**Scree-plot** — het relatieve gewicht van elke SVD-as. " + "De eerste assen verklaren het meeste van de stemverschillen in de Kamer; " + "latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau." + ) + _render_scree_plot(scree_importances) + + json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json") + if not os.path.exists(json_path): + st.warning( + f"Top-SVD data not found at {json_path}. Run the importance job to generate it." + ) + return + + try: + import json + + with open(json_path, "r", encoding="utf-8") as fh: + j = json.load(fh) + except Exception as e: + st.error(f"Failed to load SVD importance JSON: {e}") + return + + window = j.get("window") + rows = j.get("rows", []) + if not rows: + st.info("Geen top-moties in dataset") + return + + st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**") + + comp_map: dict[int, list] = {} + for r in rows: + comp = int(r.get("component", 0)) + bucket = comp_map.setdefault(comp, []) + existing_ids = {m.get("motion_id") for m in bucket} + if r.get("motion_id") not in existing_ids: + bucket.append(r) + + comp_options = sorted(comp_map.keys()) + + def _comp_label(c: int) -> str: + theme = SVD_THEMES.get(c, {}) + lbl = theme.get("label", "") + return f"As {c} — {lbl}" if lbl else f"As {c}" + + comp_display = [_comp_label(c) for c in comp_options] + + party_scores_default = explorer_data.load_party_axis_scores(db_path) + party_mp_vectors = explorer_data.load_party_mp_vectors(db_path) + bootstrap_data = None + if party_mp_vectors: + try: + from analysis.political_axis import compute_party_bootstrap_cis + + bootstrap_data = compute_party_bootstrap_cis(party_mp_vectors) + except Exception: + pass + + col1, col2 = st.columns([2, 1]) + + view_mode = "Enkel venster" + selected_parties_for_trajectory: list = [] + + with col2: + comp_sel_idx = st.selectbox( + "Selecteer SVD-as", + options=list(range(len(comp_options))), + format_func=lambda i: comp_display[i], + index=0, + ) + comp_sel = comp_options[comp_sel_idx] + + min_mps = st.number_input( + "Min. Kamerleden per partij", + min_value=1, + max_value=20, + value=1, + step=1, + help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.", + ) + + view_mode = st.radio( + "Weergave", + options=["Enkel venster", "Tijdtraject"], + index=0, + help="Enkel venster: toont posities voor één tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.", + ) + + selected_parties_for_trajectory = [] + if view_mode == "Tijdtraject": + all_parties = ( + sorted(party_scores_default.keys()) if party_scores_default else [] + ) + default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8] + selected_parties_for_trajectory = st.multiselect( + "Partijen om te tonen", + options=all_parties, + default=default_parties, + help="Selecteer de partijen die je wilt zien in het tijdtraject.", + ) + + theme = SVD_THEMES.get(comp_sel, {}) + if theme: + st.info(f"**{theme['label']}** — {theme['explanation']}") + + motions = comp_map.get(comp_sel, []) + + _current_year = str(_dt.date.today().year) + available_windows = explorer_data.get_uniform_dim_windows(db_path) + year_windows = sorted( + w for w in available_windows if w != "current_parliament" and w != _current_year + ) + has_current = "current_parliament" in available_windows + svd_windows = year_windows + (["current_parliament"] if has_current else []) + + def _svd_window_label(w: str) -> str: + if w == "current_parliament": + return "Huidig parliament" + return w + + with col1: + svd_window = st.selectbox( + "Jaar", + options=svd_windows, + index=len(svd_windows) - 1, + format_func=_svd_window_label, + key=f"svd_window_{comp_sel}", + ) + + if svd_window == "current_parliament": + party_scores = party_scores_default + else: + party_scores = explorer_data.load_party_axis_scores_for_window(db_path, svd_window) + + party_mp_counts = ( + {p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {} + ) + + def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]: + """Get party (x, y) coordinates from aligned PCA positions for a window.""" + positions_by_window, _ = explorer_data.load_positions(db_path, "annual") + window_pos = positions_by_window.get(window, {}) + if not window_pos: + return {} + + _party_map = explorer_data.load_party_map(db_path) + + party_coords: Dict[str, List[Tuple[float, float]]] = {} + for mp_name, (x, y) in window_pos.items(): + party = _party_map.get( + mp_name, _party_map.get(mp_name.split("(")[0].strip(), None) + ) + if party: + party_coords.setdefault(party, []).append((x, y)) + + return { + party: ( + float(np.mean([c[0] for c in coords])), + float(np.mean([c[1] for c in coords])), + ) + for party, coords in party_coords.items() + if coords + } + + active_mps = ( + explorer_data.load_active_mps(db_path) + if svd_window == "current_parliament" + else None + ) + aligned_all_scores = explorer_data.get_aligned_party_scores( + db_path, svd_window, active_mps + ) + + party_1d_coords: dict = {} + for party, all_scores in aligned_all_scores.items(): + idx = comp_sel - 1 + if idx < len(all_scores): + party_1d_coords[party] = (float(all_scores[idx]),) + + computed_flips: Dict[int, bool] = {} + try: + from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT + + for comp_idx in range(10): + right_scores = [] + left_scores = [] + for party, scores in aligned_all_scores.items(): + if party in CANONICAL_RIGHT: + right_scores.append(scores[comp_idx]) + elif party in CANONICAL_LEFT: + left_scores.append(scores[comp_idx]) + + if right_scores and left_scores: + right_avg = np.mean(right_scores) + left_avg = np.mean(left_scores) + computed_flips[comp_idx + 1] = right_avg < left_avg + else: + computed_flips[comp_idx + 1] = False + except Exception: + pass + + theme_with_flip = { + **theme, + "flip": computed_flips.get(comp_sel, theme.get("flip", False)), + } + + if min_mps > 1 and party_mp_counts: + valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps} + party_1d_coords = { + p: coords for p, coords in party_1d_coords.items() if p in valid_parties + } + + if view_mode == "Tijdtraject" and selected_parties_for_trajectory: + available_windows = explorer_data.get_uniform_dim_windows(db_path) + year_windows = sorted( + w + for w in available_windows + if w != "current_parliament" and w != _current_year + ) + has_current = "current_parliament" in available_windows + all_windows = year_windows + (["current_parliament"] if has_current else []) + + party_scores_by_window = explorer_data._get_aligned_trajectory_scores( + db_path, all_windows + ) + + _render_svd_time_trajectory( + party_scores_by_window, + comp_sel, + theme_with_flip, + selected_parties_for_trajectory, + ) + else: + _render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip) + + motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None] + motion_details: Dict[int, tuple] = {} + if motion_ids: + ids_int: List[int] = [] + for mid in motion_ids: + try: + ids_int.append(int(mid)) + except Exception: + logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid) + + if ids_int and duckdb is not None: + con = None + try: + placeholders = ", ".join("?" for _ in ids_int) + con = duckdb.connect(database=db_path, read_only=True) + db_rows = con.execute( + f"SELECT id, title, date, policy_area, url, body_text, voting_results " + f"FROM motions WHERE id IN ({placeholders})", + ids_int, + ).fetchall() + motion_details = {r[0]: r for r in db_rows} + except Exception: + logger.exception("Failed to batch-fetch motion details") + finally: + if con: + con.close() + + pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0] + neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0] + + flip = theme_with_flip.get("flip", False) if theme_with_flip else False + pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else "" + neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else "" + + if flip: + left_pole, right_pole = pos_pole, neg_pole + left_motions, right_motions = pos_motions, neg_motions + left_arrow, right_arrow = "▲", "▼" + else: + left_pole, right_pole = neg_pole, pos_pole + left_motions, right_motions = neg_motions, pos_motions + left_arrow, right_arrow = "▼", "▲" + + lcol, rcol = st.columns(2) + + with lcol: + st.markdown(f"**← {left_pole}**") + for m in left_motions: + mid = m.get("motion_id") + raw_title = m.get("title") or f"Motie #{mid}" + with st.expander(f"{left_arrow} {raw_title}"): + row = motion_details.get(int(mid)) if mid is not None else None + if row: + try: + date_str = str(row[2])[:10] + except Exception: + date_str = "?" + st.caption(f"📅 {date_str} | {row[3] or '—'}") + if row[4] and str(row[4]).startswith("http"): + st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") + if row[5]: + with st.expander("Toon volledige tekst"): + st.write(row[5]) + _render_voting_results(row[6]) + else: + st.caption("_Geen metadata beschikbaar_") - explorer.build_svd_components_tab(db_path) + with rcol: + st.markdown(f"**{right_pole} →**") + for m in right_motions: + mid = m.get("motion_id") + raw_title = m.get("title") or f"Motie #{mid}" + with st.expander(f"{right_arrow} {raw_title}"): + row = motion_details.get(int(mid)) if mid is not None else None + if row: + try: + date_str = str(row[2])[:10] + except Exception: + date_str = "?" + st.caption(f"📅 {date_str} | {row[3] or '—'}") + if row[4] and str(row[4]).startswith("http"): + st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") + if row[5]: + with st.expander("Toon volledige tekst"): + st.write(row[5]) + _render_voting_results(row[6]) + else: + st.caption("_Geen metadata beschikbaar_") diff --git a/analysis/tabs/quiz.py b/analysis/tabs/quiz.py index 5c7bc9f..253fb33 100644 --- a/analysis/tabs/quiz.py +++ b/analysis/tabs/quiz.py @@ -1,18 +1,132 @@ -"""MP Quiz tab for the parliamentary explorer. - -This module will contain the MP quiz tab implementation. -Currently: Tab logic remains in explorer.py pending Streamlit decoupling. -""" +"""MP Quiz tab for the parliamentary explorer.""" from __future__ import annotations +import pandas as pd + +import analysis.explorer_data as explorer_data +from analysis.tabs._rendering import st + def build_mp_quiz_tab(db_path: str) -> None: - """Build the MP Quiz tab. + """Interactive quiz: narrow MPs by asking motion vote questions. - Currently delegates to explorer.py implementation. - Will be extracted when rendering logic is decoupled from Streamlit. + Minimal viable flow: + - seed with top-N controversial motions (SEED_MOTIONS) + - present one question at a time, store answers in st.session_state['mp_quiz_votes'] + - after each answer call MotionDatabase.match_mps_for_votes to rank MPs + - if multiple candidates remain, call choose_discriminating_motions to pick next question + - stop when unique MP found or no discriminating motions remain """ - import explorer + st.subheader("🧑‍⚖️ Welk tweede kamerlid ben jij?") + st.markdown( + "Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt." + ) + + SEED_MOTIONS = 8 + MAX_QUESTIONS = 20 + + if "mp_quiz_votes" not in st.session_state: + st.session_state["mp_quiz_votes"] = {} + if "mp_quiz_asked" not in st.session_state: + st.session_state["mp_quiz_asked"] = [] + + from database import MotionDatabase as _MotionDatabase + + db_inst = _MotionDatabase(db_path) + + df = explorer_data.load_motions_df(db_path) + if df.empty: + st.warning("Geen moties beschikbaar om de quiz te starten.") + return + + seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS) + if not seed_ids: + st.warning("Geen individuele stemdata beschikbaar voor de quiz.") + return + + def _next_motion_id(): + for mid in seed_ids: + if str(mid) not in st.session_state["mp_quiz_votes"]: + return mid + try: + user_votes = { + int(k): v for k, v in st.session_state["mp_quiz_votes"].items() + } + ranked = db_inst.match_mps_for_votes(user_votes, limit=200) + except Exception: + ranked = [] + + candidates = [r["mp_name"] for r in ranked] + excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()] + if not candidates: + return None + try: + next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1) + return next_ids[0] if next_ids else None + except Exception: + return None + + col1, col2 = st.columns([3, 1]) + with col2: + st.caption( + f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}" + ) + if st.button("Reset quiz"): + st.session_state["mp_quiz_votes"] = {} + st.session_state["mp_quiz_asked"] = [] + st.rerun() + + next_mid = _next_motion_id() + if next_mid is None: + st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.") + else: + motion_rows = df[df["id"] == next_mid] + if motion_rows.empty: + st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem" + st.rerun() + return + motion_row = motion_rows.iloc[0] + st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}") + if motion_row.get("layman_explanation"): + st.info(motion_row.get("layman_explanation")) + + with st.form(key=f"mp_quiz_form_{next_mid}"): + choice = st.radio( + "Wat zou jij stemmen?", + options=["Voor", "Tegen", "Onthouden", "Geen stem"], + index=3, + ) + submitted = st.form_submit_button("Beantwoord en verder") + + if submitted: + st.session_state["mp_quiz_votes"][str(next_mid)] = choice + st.session_state["mp_quiz_asked"].append(next_mid) + st.rerun() + + try: + user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()} + ranking = db_inst.match_mps_for_votes(user_votes, limit=50) + except Exception: + ranking = [] + + if ranking: + st.markdown("**Top kandidaten**") + rdf = pd.DataFrame(ranking) + st.dataframe(rdf.head(10), use_container_width=True) - explorer.build_mp_quiz_tab(db_path) + top_pct = ranking[0]["agreement_pct"] if ranking else 0.0 + top_matches = [r for r in ranking if r["agreement_pct"] == top_pct] + if len(top_matches) == 1 and top_matches[0]["overlap"] > 0: + st.success( + f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})" + ) + else: + if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS: + st.warning( + "Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten." + ) + else: + st.info("Nog geen unieke match — vraag meer om verder te verfijnen.") + else: + st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.") diff --git a/analysis/tabs/search.py b/analysis/tabs/search.py index 2821bf7..de0fb23 100644 --- a/analysis/tabs/search.py +++ b/analysis/tabs/search.py @@ -1,18 +1,84 @@ -"""Search tab for the parliamentary explorer. - -This module will contain the search tab implementation. -Currently: Tab logic remains in explorer.py pending Streamlit decoupling. -""" +"""Search tab for the parliamentary explorer.""" from __future__ import annotations +import pandas as pd + +import analysis.explorer_data as explorer_data +from analysis.tabs._rendering import _render_voting_results, st + def build_search_tab(db_path: str, show_rejected: bool) -> None: - """Build the Motie Zoeken tab. + """Build the Motie Zoeken tab.""" + st.subheader("Motie Zoeken") + + df = explorer_data.load_motions_df(db_path) + if df.empty: + st.warning("Geen moties beschikbaar.") + return + + if not show_rejected: + df = df[df["title"].fillna("").str.strip() != "Verworpen."] + + col1, col2, col3 = st.columns([2, 1, 1]) + with col1: + query = st.text_input( + "Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen" + ) + with col2: + years = sorted(df["year"].dropna().astype(int).unique().tolist()) + if years: + year_range = st.select_slider( + "Jaar", options=years, value=(years[0], years[-1]) + ) + else: + year_range = (2019, 2024) + with col3: + min_controversy = st.slider( + "Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05 + ) + + working = df.copy() + working = working[ + (working["year"] >= year_range[0]) & (working["year"] <= year_range[1]) + ] + if min_controversy > 0: + working = working[working["controversy_score"] >= min_controversy] + if query: + q = query.lower() + mask = working["title"].fillna("").str.lower().str.contains(q, regex=False) + working = working[mask] + + working = working.sort_values(by="controversy_score", ascending=False) + st.caption(f"{len(working)} resultaten (top 50 getoond)") + + for _, row in working.head(50).iterrows(): + title = row.get("title") or f"Motie #{row['id']}" + date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?" + controversy = row.get("controversy_score") or 0 + with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"): + cols = st.columns(3) + cols[0].metric("Controverse", f"{controversy:.2f}") + cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}") + cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?") + + _render_voting_results(row.get("voting_results")) - Currently delegates to explorer.py implementation. - Will be extracted when rendering logic is decoupled from Streamlit. - """ - import explorer + url = row.get("url") + if url and str(url).startswith("http"): + st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})") - explorer.build_search_tab(db_path, show_rejected) + sim = explorer_data.query_similar(db_path, int(row["id"]), top_k=5) + if not sim.empty: + st.markdown("**Vergelijkbare moties:**") + for _, s in sim.iterrows(): + s_date = ( + pd.to_datetime(s["date"]).strftime("%Y") + if pd.notna(s.get("date")) + else "" + ) + st.markdown( + f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*" + ) + else: + st.caption("_Nog geen vergelijkbare moties beschikbaar_") diff --git a/analysis/tabs/trajectories.py b/analysis/tabs/trajectories.py index 18e39fd..863e19b 100644 --- a/analysis/tabs/trajectories.py +++ b/analysis/tabs/trajectories.py @@ -1,20 +1,774 @@ -"""Trajectories tab for the parliamentary explorer. - -This module will contain the trajectories tab implementation. -Currently: Tab logic remains in explorer.py pending Streamlit decoupling. -""" +"""Trajectories tab for the parliamentary explorer.""" from __future__ import annotations -from typing import List +import json +import logging +import os +import re +import traceback +from datetime import datetime +from typing import Dict, List, Optional, Tuple +import numpy as np -def build_trajectories_tab(db_path: str, window_size: str) -> None: - """Build the Partij Trajectories tab. +from analysis import config +import analysis.explorer_data as explorer_data +from analysis import trajectory +from analysis.tabs._rendering import ( + PARTY_COLOURS, + _add_y_direction_annotations, + go, + st, +) +from explorer_helpers import compute_party_centroids, inspect_positions_for_issues + +KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES + +logger = logging.getLogger(__name__) + +_last_trajectories_diagnostics: dict = {} +_last_diagnostics = _last_trajectories_diagnostics + + +def get_debug_trajectories_enabled() -> bool: + """Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode.""" + v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES") + return str(v) in ("1", "true", "True") + + +def select_trajectory_plot_data( + positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], + party_map: Dict[str, str], + windows: List[str], + selected_parties: List[str], + smooth_alpha: float = 0.35, + mp_fallback_count: Optional[int] = None, +) -> Tuple[go.Figure, int, Optional[str]]: + """Return (fig, trace_count, banner_text). - Currently delegates to explorer.py implementation. - Will be extracted when rendering logic is decoupled from Streamlit. + Helper used by build_trajectories_tab. Does not call Streamlit. """ - import explorer + if mp_fallback_count is None: + try: + mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20")) + except Exception: + mp_fallback_count = 20 + + party_centroids, meta = compute_party_centroids( + positions_by_window, party_map, windows + ) + + try: + inspector_summary = inspect_positions_for_issues(positions_by_window, party_map) + except Exception: + tb = traceback.format_exc() + inspector_summary = {} + try: + select_trajectory_plot_data._last_diagnostics = { + "stage": "inspector_exception", + "exception": tb, + } + except Exception: + pass + try: + _last_trajectories_diagnostics.update( + {"stage": "inspector_exception", "exception": tb} + ) + except Exception: + pass + logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary) + + plottable_parties = [] + for p, vals in party_centroids.items(): + has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals) + if has_valid: + plottable_parties.append(p) + + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] plottable_parties: %d parties, sample=%s", + len(plottable_parties), + (plottable_parties[:5] if plottable_parties else "empty"), + ) + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] party_centroids keys: %s", + list(party_centroids.keys())[:10], + ) + if party_centroids: + sample_party = list(party_centroids.keys())[0] + sample_vals = party_centroids[sample_party] + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] Sample party '%s' centroids: %s...", + sample_party, + sample_vals[:3], + ) + + fig = go.Figure() + trace_count = 0 + banner_text: Optional[str] = None + + def _ema_smooth(values: List[float], alpha: float) -> List[float]: + if not values or alpha >= 1.0: + return values + smoothed: List[float] = [] + prev = None + for v in values: + if v is None or (isinstance(v, float) and np.isnan(v)): + smoothed.append(float(np.nan)) + continue + v = float(v) + if prev is None: + prev = v + else: + prev = alpha * v + (1 - alpha) * prev + smoothed.append(float(prev)) + return smoothed + + if not plottable_parties: + mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} + for wid in windows: + pos = positions_by_window.get(wid, {}) + for mp_name, xy in pos.items(): + try: + x, y = float(xy[0]), float(xy[1]) + except Exception: + continue + mp_positions.setdefault(mp_name, {})[wid] = (x, y) + + mp_activity = sorted( + [(mp, len(wdict)) for mp, wdict in mp_positions.items()], + key=lambda t: t[1], + reverse=True, + ) + top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]] + + for mp in top_mps: + wids_sorted = sorted(mp_positions.get(mp, {}).keys()) + if not wids_sorted: + continue + xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] + ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] + xs = _ema_smooth(xs_raw, smooth_alpha) + ys = _ema_smooth(ys_raw, smooth_alpha) + custom_raw = [ + ( + float(rx) if rx is not None else float(np.nan), + float(ry) if ry is not None else float(np.nan), + ) + for rx, ry in zip(xs_raw, ys_raw) + ] + fig.add_trace( + go.Scatter( + x=xs, + y=ys, + mode="lines+markers", + name=mp, + text=wids_sorted, + customdata=custom_raw, + line=dict(color="#888888", shape="spline", smoothing=1.3), + marker=dict(color="#888888", size=6), + ) + ) + trace_count += 1 + + banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] Fallback to MP trajectories: trace_count=%d, top_mps=%d", + trace_count, + len(top_mps), + ) + return fig, trace_count, banner_text + + to_plot = [p for p in selected_parties if p in plottable_parties] + if not to_plot: + to_plot = plottable_parties + + for party in to_plot: + vals = party_centroids.get(party, []) + if not vals: + continue + xs_raw = [v[0] for v in vals] + ys_raw = [v[1] for v in vals] + xs = _ema_smooth(xs_raw, smooth_alpha) + ys = _ema_smooth(ys_raw, smooth_alpha) + custom_raw = [ + ( + float(x) if (x is not None and not np.isnan(x)) else float(np.nan), + float(y) if (y is not None and not np.isnan(y)) else float(np.nan), + ) + for x, y in zip(xs_raw, ys_raw) + ] + colour = PARTY_COLOURS.get(party, "#9E9E9E") + fig.add_trace( + go.Scatter( + x=xs, + y=ys, + mode="lines+markers", + name=party, + text=windows, + customdata=custom_raw, + line=dict(color=colour, shape="spline", smoothing=1.3), + marker=dict(color=colour, size=8), + ) + ) + trace_count += 1 + + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] Final trace_count=%d, plottable_parties=%d, to_plot=%s", + trace_count, + len(plottable_parties), + (len(to_plot) if "to_plot" in dir() else "N/A"), + ) + return fig, trace_count, None + + +def build_trajectories_tab(db_path: str, window_size: str) -> None: + """Build the Partij Trajectories tab.""" + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] build_trajectories_tab called — db_path=%s, window_size=%s", + db_path, + window_size, + ) + st.subheader("Partij Trajectories") + st.markdown("Hoe bewegen partijen over de tijdsvensters heen?") + + positions_by_window, axis_def = explorer_data.load_positions(db_path, window_size) + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] load_positions → %d windows, total MPs=%d", + len(positions_by_window), + sum(len(v) for v in positions_by_window.values()), + ) + if axis_def is None: + axis_def = {} + if not positions_by_window: + try: + _last_trajectories_diagnostics.update( + { + "stage": "load_positions_empty", + "positions_by_window_len": len(positions_by_window), + } + ) + except Exception: + pass + try: + st.warning("Geen positiedata beschikbaar.") + except Exception: + pass + try: + if get_debug_trajectories_enabled(): + try: + st.text_area( + "Trajectories diagnostics", + json.dumps(_last_trajectories_diagnostics, default=str), + height=160, + ) + except Exception: + pass + except Exception: + pass + return + + party_map = explorer_data.load_party_map(db_path) + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] load_party_map → %d entries, sample=%s", + len(party_map), + list(party_map.items())[:3], + ) + + def normalize_mp_name(name): + """Normalize MP name for better matching between data sources.""" + if not name: + return "" + name = name.strip() + if "," in name and ", " not in name: + name = name.replace(",", ", ") + return name + + party_map = {normalize_mp_name(k): v for k, v in party_map.items()} + + normalized_positions = {} + for window, positions in positions_by_window.items(): + normalized_positions[window] = { + normalize_mp_name(k): v for k, v in positions.items() + } + positions_by_window = normalized_positions + + all_mp_names = set() + for positions in positions_by_window.values(): + all_mp_names.update(positions.keys()) + + matched_names = sum(1 for mp in all_mp_names if mp in party_map) + if all_mp_names: + logger.info( + f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)" + ) + else: + logger.info("MP name matching: no MPs found in positions data") + + if matched_names == 0 and len(all_mp_names) > 0: + logger.warning("No MP names matched between positions and party_map!") + logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}") + logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}") + + windows = sorted(positions_by_window.keys()) + + centroids: Dict[str, Dict[str, Tuple[float, float]]] = {} + all_parties: set = set() + + def _strip_paren(name: str) -> str: + return re.sub(r"\s*\([^)]*\)", "", name).strip() + + for wid in windows: + pos = positions_by_window.get(wid, {}) + per_party: Dict[str, List[Tuple[float, float]]] = {} + for mp_name, (x, y) in pos.items(): + party = party_map.get(mp_name) or party_map.get( + _strip_paren(mp_name), "Unknown" + ) + if party == "Unknown": + continue + per_party.setdefault(party, []).append((x, y)) + for party, coords in per_party.items(): + all_parties.add(party) + xs = [c[0] for c in coords] + ys = [c[1] for c in coords] + centroids.setdefault(party, {})[wid] = ( + float(np.mean(xs)), + float(np.mean(ys)), + ) + + all_parties = sorted( + set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs) + - {None, "Unknown"} + ) + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] all_parties (raw from party_map) → %d parties: %s", + len(all_parties), + all_parties[:10], + ) + all_parties_sorted = sorted(all_parties) + + if not all_parties_sorted: + st.info( + "Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat." + ) + try: + st.caption(f"Bekende partijen in party_map: {len(party_map)}") + except Exception: + pass + + default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties] + if not default_parties: + default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties] + if not default_parties: + default_parties = all_parties_sorted[:6] + + selected_parties = st.multiselect( + "Selecteer partijen", + options=all_parties_sorted, + default=default_parties, + ) + + def _ema_smooth(values: List[float], alpha: float) -> List[float]: + if not values or alpha >= 1.0: + return values + smoothed = [values[0]] + for v in values[1:]: + smoothed.append(alpha * v + (1 - alpha) * smoothed[-1]) + return smoothed + + smooth_alpha = 0.35 + + if not centroids: + st.info( + "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." + ) + + mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} + for wid in windows: + pos = positions_by_window.get(wid, {}) + for mp_name, xy in pos.items(): + try: + x, y = float(xy[0]), float(xy[1]) + except Exception: + continue + mp_positions.setdefault(mp_name, {})[wid] = (x, y) + + mp_positions = { + mp: pos + for mp, pos in mp_positions.items() + if len(pos) >= 2 + and not all(np.isnan(x) and np.isnan(y) for x, y in pos.values()) + } + + if not mp_positions: + st.warning("Geen positiedata beschikbaar voor trajectplotten.") + _last_trajectories_diagnostics.update( + { + "stage": "no_mp_positions", + "mp_positions_count": 0, + } + ) + try: + if get_debug_trajectories_enabled(): + try: + st.text_area( + "Trajectories diagnostics", + json.dumps(_last_trajectories_diagnostics, default=str), + height=160, + ) + except Exception: + pass + except Exception: + pass + return + + st.session_state["_trajectory_mp_positions"] = mp_positions + + mp_list = sorted(mp_positions.keys()) + default_mps = mp_list[:6] + selected_mps = st.multiselect( + "Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps + ) + + fig = go.Figure() + trace_count = 0 + for mp in selected_mps: + wids_sorted = sorted(mp_positions[mp].keys()) + xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] + ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] + xs = _ema_smooth(xs_raw, smooth_alpha) + ys = _ema_smooth(ys_raw, smooth_alpha) + custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] + fig.add_trace( + go.Scatter( + x=xs, + y=ys, + mode="lines+markers", + name=mp, + text=wids_sorted, + customdata=custom_raw, + line=dict(color="#888888", shape="spline", smoothing=1.3), + marker=dict(color="#888888", size=6), + hovertemplate=( + f"{mp}
" + "venster: %{text}
" + "x (smoothed): %{x:.3f}
" + "x (raw): %{customdata[0]:.3f}
" + "y (smoothed): %{y:.3f}
" + "y (raw): %{customdata[1]:.3f}" + ), + ) + ) + trace_count += 1 + + _add_y_direction_annotations(fig) + if trace_count == 0: + st.info( + "Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data." + ) + else: + st.plotly_chart(fig, use_container_width=True) + return + + if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"): + mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} + for wid in windows: + pos = positions_by_window.get(wid, {}) + for mp_name, (x, y) in pos.items(): + mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y)) + + mp_list = sorted(mp_positions.keys()) + if not mp_list: + st.info("Geen MP-positiegegevens beschikbaar om te tonen.") + return + + sample_mps = mp_list[:6] + fig = go.Figure() + for mp in sample_mps: + wids_sorted = sorted(mp_positions[mp].keys()) + xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] + ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] + xs = _ema_smooth(xs_raw, 0.35) + ys = _ema_smooth(ys_raw, 0.35) + custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] + fig.add_trace( + go.Scatter( + x=xs, + y=ys, + mode="lines+markers", + name=mp, + text=wids_sorted, + customdata=custom_raw, + line=dict(color="#444444", shape="spline", smoothing=1.3), + marker=dict(color="#444444", size=6), + hovertemplate=( + f"{mp}
" + "venster: %{text}
" + "x (smoothed): %{x:.3f}
" + "x (raw): %{customdata[0]:.3f}
" + "y (smoothed): %{y:.3f}
" + "y (raw): %{customdata[1]:.3f}" + ), + ) + ) + _add_y_direction_annotations(fig) + st.plotly_chart(fig, use_container_width=True) + return + + try: + debug_checkbox = False + try: + debug_checkbox = st.checkbox( + "Enable trajectories diagnostics (show extra info)", + value=get_debug_trajectories_enabled(), + ) + except Exception: + debug_checkbox = get_debug_trajectories_enabled() + if debug_checkbox: + try: + with st.expander( + "DEBUG: Trajectories data (showing diagnostics)", expanded=False + ): + st.write("windows (count):", len(windows)) + st.write("windows sample:", windows[:10]) + st.write("party_map entries:", len(party_map)) + st.write("parties with centroids:", len(all_parties_sorted)) + st.write("default_parties:", default_parties) + st.write("selected_parties:", selected_parties) + st.write("min_mps setting:", 3) + sample = { + p: len(centroids.get(p, {})) + for p in list(all_parties_sorted)[:8] + } + st.write("sample centroid window counts per party:", sample) + except Exception: + pass + except Exception: + pass + + smoothing_method = st.selectbox( + "Smoothing methode", + options=["EMA", "Spline", "None"], + index=0, + help="EMA = exponential moving average; Spline = low-degree polynomial spline fit; None = raw centroids", + ) + + smooth_alpha = 1.0 + if smoothing_method == "EMA": + smooth_alpha = st.slider( + "Glad maken (EMA-\u03b1)", + min_value=0.1, + max_value=1.0, + value=0.35, + step=0.05, + help=( + "\u03b1=1.0 toont de ruwe data; lagere waarden maken de lijn gladder. " + "Standaard 0.35 voor een goed evenwicht tussen detail en ruis." + ), + ) + + def _spline_smooth(values: List[float]) -> List[float]: + n = len(values) + if n <= 2: + return values + deg = min(3, n - 1) + try: + idx = np.arange(n, dtype=float) + coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg) + smooth = np.polyval(coeffs, idx) + return [float(v) for v in smooth] + except Exception: + return values + + fig = go.Figure() + trace_count = 0 + helper_succeeded = False + try: + fig2, trace_count2, banner_text = select_trajectory_plot_data( + positions_by_window, party_map, windows, selected_parties, smooth_alpha + ) + if fig2 is not None: + fig = fig2 + trace_count = trace_count2 + helper_succeeded = True + if banner_text: + try: + st.caption(banner_text) + except Exception: + pass + try: + _last_trajectories_diagnostics.update({"banner_text": banner_text}) + except Exception: + pass + except Exception as e: + tb = traceback.format_exc() + try: + select_trajectory_plot_data._last_diagnostics = {"exception": tb} + except Exception: + pass + try: + _last_trajectories_diagnostics.update( + {"stage": "select_helper_exception", "exception": tb} + ) + except Exception: + pass + logger.exception("select_trajectory_plot_data failed") + debug_enabled = get_debug_trajectories_enabled() + if debug_enabled: + try: + st.text_area("select_trajectory_plot_data traceback", tb, height=240) + except Exception: + pass + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] helper_succeeded=%s", helper_succeeded + ) + if not helper_succeeded: + for party in selected_parties: + if party not in centroids: + continue + wids_sorted = sorted(centroids[party].keys()) + xs_raw = [centroids[party][w][0] for w in wids_sorted] + ys_raw = [centroids[party][w][1] for w in wids_sorted] + xs = _ema_smooth(xs_raw, smooth_alpha) + ys = _ema_smooth(ys_raw, smooth_alpha) + custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] + colour = PARTY_COLOURS.get(party, "#9E9E9E") + fig.add_trace( + go.Scatter( + x=xs, + y=ys, + mode="lines+markers", + name=party, + text=wids_sorted, + customdata=custom_raw, + line=dict(color=colour, shape="spline", smoothing=1.3), + marker=dict(color=colour, size=8), + hovertemplate=( + f"{party}
" + "venster: %{text}
" + "x (smoothed): %{x:.3f}
" + "x (raw): %{customdata[0]:.3f}
" + "y (smoothed): %{y:.3f}
" + "y (raw): %{customdata[1]:.3f}" + ), + ) + ) + trace_count += 1 + + _THRESHOLD = 0.65 + x_conf_map = axis_def.get("x_label_confidence", {}) or {} + y_conf_map = axis_def.get("y_label_confidence", {}) or {} + + def _mean_conf(m: dict) -> Optional[float]: + vals = [v for v in m.values() if v is not None] + if not vals: + return None + return float(sum(vals) / len(vals)) + + x_mean = _mean_conf(x_conf_map) + y_mean = _mean_conf(y_conf_map) + + x_title = trajectory.choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD) + y_title = trajectory.choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD) + + fig.update_layout( + title="Partij trajectories", + xaxis_title=x_title, + yaxis_title=y_title, + height=600, + legend_title_text="Partij", + ) + _add_y_direction_annotations(fig) + try: + _last_trajectories_diagnostics.update({"trace_count": trace_count}) + except Exception: + pass + debug_enabled = get_debug_trajectories_enabled() + if trace_count == 0: + _last_trajectories_diagnostics.update( + { + "stage": "zero_traces", + "positions_count": sum(len(pos) for pos in positions_by_window.values()) + if positions_by_window + else 0, + "party_map_count": len(party_map) if party_map else 0, + "centroids_count": len(centroids) if centroids else 0, + "selected_parties_count": len(selected_parties) + if selected_parties + else 0, + "timestamp": datetime.now().isoformat(), + } + ) + if positions_by_window and party_map and not centroids: + sample_mps = [] + for window, positions in list(positions_by_window.items())[:1]: + sample_mps = list(positions.keys())[:5] + break + matched = sum(1 for mp in sample_mps if mp in party_map) + _last_trajectories_diagnostics["name_match_check"] = { + "sample_mps": sample_mps, + "matched_in_party_map": matched, + "sample_size": len(sample_mps), + } + if trace_count == 0: + st.info("📊 **Geen trajecten getekend**") + + with st.expander("🔍 Diagnostische informatie"): + st.write("**Data status:**") + st.write( + f"- Positie vensters: {len(positions_by_window) if positions_by_window else 0}" + ) + st.write(f"- Party mappings: {len(party_map) if party_map else 0}") + st.write( + f"- Geselecteerde partijen: {len(selected_parties) if selected_parties else 0}" + ) + + if "centroid_diagnostics" in locals(): + st.write("**Centroid berekening:**") + st.write( + f"- Partijen met posities: {len(centroid_diagnostics.get('parties_with_positions', []))}" + ) + st.write( + f"- Partijen met alleen NaN: {len(centroid_diagnostics.get('parties_all_nan', []))}" + ) + + st.write("\n**Mogelijke oorzaken:**") + st.write("1. Geen SVD vectoren berekend voor de geselecteerde vensters") + st.write("2. MP namen in posities komen niet overeen met party_map") + st.write("3. Alle geselecteerde partijen hebben te weinig MPs (< 5)") + + if st.button("🔧 Database diagnostiek uitvoeren"): + with st.spinner("Bezig met diagnostiek..."): + from scripts.diagnose_trajectories_cli import ( + run as diagnose_trajectories, + ) - explorer.build_trajectories_tab(db_path, window_size) + results = diagnose_trajectories(db_path) + st.json(results) + else: + try: + st.info( + f"[DEBUG] trace_count={trace_count}, fig data count={len(fig.data)}, layout title={fig.layout.title.text if fig.layout.title else 'none'}" + ) + except Exception: + pass + try: + logging.getLogger(__name__).debug( + "[TRAJ DEBUG] About to render plotly chart — trace_count=%d, banner=%s, fig has %d traces", + trace_count, + banner_text, + len(fig.data), + ) + st.plotly_chart(fig, use_container_width=True) + except Exception as e: + st.error(f"Trajectories rendering failed: {e}") + if get_debug_trajectories_enabled(): + try: + st.json(_last_trajectories_diagnostics) + except Exception: + st.text_area( + "Trajectories diagnostics (JSON failed)", + json.dumps(_last_trajectories_diagnostics, default=str), + height=240, + ) diff --git a/explorer.py b/explorer.py index 89fb348..7369c85 100644 --- a/explorer.py +++ b/explorer.py @@ -37,19 +37,18 @@ from analysis import explorer_data from analysis import projections from analysis import trajectory +# Backwards-compatible re-export used by tests +choose_trajectory_title = trajectory.choose_trajectory_title + try: import plotly.express as px import plotly.graph_objects as go except Exception: - # Plotly may be unavailable in lightweight test environments. Provide a tiny - # local fallback that exposes a Figure-like object with `.data` and - # `add_trace()` so unit tests can run without installing plotly. px = None import types class _DummyTrace: def __init__(self, **kwargs): - # Preserve commonly-used attributes accessed by tests self.name = kwargs.get("name") self.x = kwargs.get("x") self.y = kwargs.get("y") @@ -61,23 +60,16 @@ except Exception: self.data = [] def add_trace(self, trace): - # plotly passes a Scatter object; our tests only inspect `.data` - # elements for `.name` and `.customdata`. Accept both our - # _DummyTrace and dict-like kwargs. if isinstance(trace, _DummyTrace): self.data.append(trace) else: - # Some code may call go.Scatter(...) which returns an object; - # if a mapping is passed here instead, coerce to _DummyTrace. try: - # attempt attribute access name = getattr(trace, "name", None) x = getattr(trace, "x", None) y = getattr(trace, "y", None) text = getattr(trace, "text", None) customdata = getattr(trace, "customdata", None) except Exception: - # Last resort: treat as mapping name = trace.get("name") if hasattr(trace, "get") else None x = trace.get("x") if hasattr(trace, "get") else None y = trace.get("y") if hasattr(trace, "get") else None @@ -90,23 +82,19 @@ except Exception: ) def add_annotation(self, *args, **kwargs): - # noop for tests that don't import full plotly return None go = types.SimpleNamespace( Figure=_DummyFigure, Scatter=lambda **kwargs: _DummyTrace(**kwargs) ) + try: import streamlit as st except Exception: - # Minimal dummy replacement for Streamlit used during tests / import-time. - # We only need a tiny subset so unit tests can import explorer without - # installing streamlit. All functions here are no-ops or simple fallbacks. class _DummySt: def cache_data(self, *args, **kwargs): def _decorator(func): return func - return _decorator def markdown(self, *args, **kwargs): @@ -128,7 +116,6 @@ except Exception: return None def checkbox(self, *args, **kwargs): - # default to False unless value provided return kwargs.get("value", False) def warning(self, *args, **kwargs): @@ -138,7 +125,6 @@ except Exception: return None def selectbox(self, *args, **kwargs): - # return first option if options provided opts = ( kwargs.get("options") if kwargs.get("options") is not None @@ -170,11 +156,9 @@ except Exception: def __exit__(self_inner, exc_type, exc, tb): return False - return _Ctx() def columns(self, *args, **kwargs): - # Return a tuple of simple objects with the methods used in the UI class _Col: def markdown(self, *a, **k): return None @@ -184,251 +168,34 @@ except Exception: def dataframe(self, *a, **k): return None - n = len(args[0]) if args else 1 return tuple(_Col() for _ in range(n)) st = _DummySt() -# Temporary diagnostics for Trajectories plotting — set by instrumentation when -# EXPLORER_DEBUG_TRAJECTORIES is enabled. This is intended to be small, opt-in and -# reversible once root cause is found. -_last_trajectories_diagnostics: dict = {} -# Backwards/alternate name used by instrumentation: keep a second module-level -# reference so callers/tests can look for either name. -_last_diagnostics = _last_trajectories_diagnostics - - -def get_debug_trajectories_enabled() -> bool: - """Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode. - Accepts '1', 'true', 'True'. Used as default for a per-tab checkbox. - """ - v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES") - return str(v) in ("1", "true", "True") - - -from explorer_helpers import ( - inspect_positions_for_issues, - compute_party_centroids, +# Re-export trajectories diagnostics for backwards compatibility +from analysis.tabs.trajectories import ( + _last_diagnostics, + _last_trajectories_diagnostics, + get_debug_trajectories_enabled, ) -def select_trajectory_plot_data( - positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], - party_map: Dict[str, str], - windows: List[str], - selected_parties: List[str], - smooth_alpha: float = 0.35, - mp_fallback_count: Optional[int] = None, -) -> Tuple[go.Figure, int, Optional[str]]: - """Return (fig, trace_count, banner_text). - - Helper used by build_trajectories_tab. Does not call Streamlit. - """ - # Use env var default if not provided - if mp_fallback_count is None: - try: - mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20")) - except Exception: - mp_fallback_count = 20 - - # Compute per-party centroids aligned to windows - party_centroids, meta = compute_party_centroids( - positions_by_window, party_map, windows +def select_trajectory_plot_data(*args, **kwargs): + """Lazy wrapper around analysis.tabs.trajectories.select_trajectory_plot_data.""" + from analysis.tabs.trajectories import ( + select_trajectory_plot_data as _impl, ) - # Use inspector to collect diagnostics (import-safe, pure helper). Keep this - # call local to the helper to ensure the inspector is exercised and the - # diagnostics are available for logging/debugging. Do not call Streamlit - # from here so the function remains import-safe for tests. - try: - inspector_summary = inspect_positions_for_issues(positions_by_window, party_map) - except Exception: - # Capture traceback diagnostics so callers (and tests) can inspect what went wrong. - tb = traceback.format_exc() - inspector_summary = {} - try: - # Attach diagnostics to the helper function for callers that want to inspect - # the last error directly on the function object. - select_trajectory_plot_data._last_diagnostics = { - "stage": "inspector_exception", - "exception": tb, - } - except Exception: - # best-effort only - pass - try: - # Also update the module-level trajectories diagnostics so the UI can show - # a compact summary when debugging is enabled. - _last_trajectories_diagnostics.update( - {"stage": "inspector_exception", "exception": tb} - ) - except Exception: - pass - logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary) - - # Determine which parties have at least one non-nan centroid - plottable_parties = [] - for p, vals in party_centroids.items(): - has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals) - if has_valid: - plottable_parties.append(p) - - # DEBUG: Show plottable_parties status (use logger.debug instead of print) - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] plottable_parties: %d parties, sample=%s", - len(plottable_parties), - (plottable_parties[:5] if plottable_parties else "empty"), - ) - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] party_centroids keys: %s", - list(party_centroids.keys())[:10], - ) - if party_centroids: - sample_party = list(party_centroids.keys())[0] - sample_vals = party_centroids[sample_party] - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] Sample party '%s' centroids: %s...", - sample_party, - sample_vals[:3], - ) - - fig = go.Figure() - trace_count = 0 - banner_text: Optional[str] = None - - def _ema_smooth(values: List[float], alpha: float) -> List[float]: - if not values or alpha >= 1.0: - return values - smoothed: List[float] = [] - prev = None - for v in values: - if v is None or (isinstance(v, float) and np.isnan(v)): - smoothed.append(float(np.nan)) - continue - v = float(v) - if prev is None: - prev = v - else: - prev = alpha * v + (1 - alpha) * prev - smoothed.append(float(prev)) - return smoothed - - # If no plottable parties, fallback to MP trajectories - if not plottable_parties: - # Build mp_positions across windows - mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} - for wid in windows: - pos = positions_by_window.get(wid, {}) - for mp_name, xy in pos.items(): - try: - x, y = float(xy[0]), float(xy[1]) - except Exception: - continue - mp_positions.setdefault(mp_name, {})[wid] = (x, y) - - # Rank MPs by activity (number of windows with positions) - mp_activity = sorted( - [(mp, len(wdict)) for mp, wdict in mp_positions.items()], - key=lambda t: t[1], - reverse=True, - ) - top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]] - - for mp in top_mps: - wids_sorted = sorted(mp_positions.get(mp, {}).keys()) - if not wids_sorted: - continue - xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] - ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] - xs = _ema_smooth(xs_raw, smooth_alpha) - ys = _ema_smooth(ys_raw, smooth_alpha) - custom_raw = [ - ( - float(rx) if rx is not None else float(np.nan), - float(ry) if ry is not None else float(np.nan), - ) - for rx, ry in zip(xs_raw, ys_raw) - ] - fig.add_trace( - go.Scatter( - x=xs, - y=ys, - mode="lines+markers", - name=mp, - text=wids_sorted, - customdata=custom_raw, - line=dict(color="#888888", shape="spline", smoothing=1.3), - marker=dict(color="#888888", size=6), - ) - ) - trace_count += 1 - - banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] Fallback to MP trajectories: trace_count=%d, top_mps=%d", - trace_count, - len(top_mps), - ) - return fig, trace_count, banner_text - - # Otherwise plot party centroids for selected parties intersecting plottable - to_plot = [p for p in selected_parties if p in plottable_parties] - # If none selected, default to all plottable - if not to_plot: - to_plot = plottable_parties - - for party in to_plot: - vals = party_centroids.get(party, []) - if not vals: - continue - xs_raw = [v[0] for v in vals] - ys_raw = [v[1] for v in vals] - xs = _ema_smooth(xs_raw, smooth_alpha) - ys = _ema_smooth(ys_raw, smooth_alpha) - # Ensure customdata preserves NaNs - custom_raw = [ - ( - float(x) if (x is not None and not np.isnan(x)) else float(np.nan), - float(y) if (y is not None and not np.isnan(y)) else float(np.nan), - ) - for x, y in zip(xs_raw, ys_raw) - ] - colour = PARTY_COLOURS.get(party, "#9E9E9E") - fig.add_trace( - go.Scatter( - x=xs, - y=ys, - mode="lines+markers", - name=party, - text=windows, - customdata=custom_raw, - line=dict(color=colour, shape="spline", smoothing=1.3), - marker=dict(color=colour, size=8), - ) - ) - trace_count += 1 - - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] Final trace_count=%d, plottable_parties=%d, to_plot=%s", - trace_count, - len(plottable_parties), - (len(to_plot) if "to_plot" in dir() else "N/A"), - ) - return fig, trace_count, None + return _impl(*args, **kwargs) logger = logging.getLogger(__name__) -# Party colour palette (consistent across tabs) PARTY_COLOURS: Dict[str, str] = config.PARTY_COLOURS - SVD_THEMES: dict[int, dict[str, str]] = config.SVD_THEMES - KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES - CURRENT_PARLIAMENT_PARTIES = config.CURRENT_PARLIAMENT_PARTIES - _PARTY_NORMALIZE = config._PARTY_NORMALIZE @@ -445,14 +212,7 @@ def get_available_windows(db_path: str) -> List[str]: @st.cache_data(show_spinner=False) def get_uniform_dim_windows(db_path: str) -> List[str]: - """Return only windows whose dominant MP-vector dimension is >= 25. - - Some windows contain a mix of vector lengths due to multiple pipeline runs - (e.g. 2016 has both dim=1 and dim=50 rows). We find the most common dimension - per window and include only windows where that dominant dim >= 25. - Windows with too few dim-25+ entities (< 10) are also excluded to avoid - degenerate PCA inputs. - """ + """Return only windows whose dominant MP-vector dimension is >= 25.""" return explorer_data.get_uniform_dim_windows(db_path) @@ -473,53 +233,8 @@ def _swap_axes( def load_positions( db_path: str, window_size: str = "annual" ) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]: - """Compute 2D positions per window using PCA on aligned SVD vectors. - - Returns: - positions_by_window: {window_id: {entity_name: (x, y)}} - axis_def: dict with x_axis, y_axis, method keys - """ - from analysis.political_axis import compute_2d_axes - - all_available = get_uniform_dim_windows(db_path) - - if not all_available: - return {}, {} - - positions_by_window, axis_def = compute_2d_axes( - db_path, - window_ids=all_available, - method="pca", - pca_residual=True, - normalize_vectors=True, - ) - - try: - from analysis.axis_classifier import classify_axes - - axis_def = classify_axes(positions_by_window, axis_def, db_path) - except Exception: - import logging - - logging.getLogger(__name__).exception( - "classify_axes failed; using generic axis labels" - ) - - # Axis orientation is guaranteed by compute_2d_axes via canonical party anchors - # (Procrustes alignment + sign-fixing). We do NOT forcibly override axis labels - # here so the classifier output (if available) can be surfaced conditionally in - # the UI based on per-window confidence. Label selection is performed at render - # time in the tabs so we can show fallback labels while still surfacing the - # classifier interpretation and confidence when informative. - - # Filter displayed windows by window_size AFTER PCA computation. - if window_size == "annual": - annual_keys = set(w for w in all_available if "-Q" not in w) - positions_by_window = { - w: v for w, v in positions_by_window.items() if w in annual_keys - } - - return positions_by_window, axis_def + """Compute 2D positions per window using PCA on aligned SVD vectors.""" + return explorer_data.load_positions(db_path, window_size) @st.cache_data(show_spinner="Partijkaart laden…") @@ -530,62 +245,15 @@ def load_party_map(db_path: str) -> Dict[str, str]: @st.cache_data(show_spinner="Actieve Kamerleden laden…") def load_active_mps(db_path: str) -> set: - """Return the set of mp_name values that are currently seated in parliament. - - An MP is considered active if their mp_metadata row has tot_en_met IS NULL, - meaning they have no recorded end date for their current seat. - """ + """Return the set of mp_name values that are currently seated in parliament.""" return explorer_data.load_active_mps(db_path) def get_aligned_party_scores( db_path: str, window: str, active_mps: set | None = None ) -> Dict[str, np.ndarray]: - """Get party scores for all N components from aligned PCA positions. - - For current_parliament, pass active_mps to filter to only seated MPs - (matching the compass behaviour). Historical windows include all MPs. - - Args: - db_path: Path to DuckDB database - window: Window identifier (e.g. 'current_parliament', '2025') - active_mps: Set of active MP names to filter current_parliament by. - Required when window is 'current_parliament' to match compass. - """ - from analysis.political_axis import compute_nd_axes - - annual_windows = get_uniform_dim_windows(db_path) - scores_by_window, _ = compute_nd_axes( - db_path, window_ids=annual_windows, n_components=10 - ) - window_scores = scores_by_window.get(window, {}) - if not window_scores: - return {} - - # For current_parliament, filter to active MPs (still seated) to match compass. - # Historical windows include all MPs active at the time — no restriction needed. - if window == "current_parliament" and active_mps is not None: - window_scores = {mp: sc for mp, sc in window_scores.items() if mp in active_mps} - - # Load party map to convert MP names to parties - _party_map = load_party_map(db_path) - - # Aggregate MP scores to party centroids per component - n_comps = 10 - party_scores_agg: Dict[str, List[np.ndarray]] = {} - for mp_name, scores in window_scores.items(): - party = _party_map.get( - mp_name, _party_map.get(mp_name.split("(")[0].strip(), None) - ) - if party: - party_scores_agg.setdefault(party, []).append(scores[:n_comps]) - - # Compute mean scores per party for each component - return { - party: np.mean(np.vstack(score_list), axis=0) - for party, score_list in party_scores_agg.items() - if score_list - } + """Get party scores for all N components from aligned PCA positions.""" + return explorer_data.get_aligned_party_scores(db_path, window, active_mps) def compute_party_discipline( @@ -593,18 +261,8 @@ def compute_party_discipline( start_date: str, end_date: str, ) -> pd.DataFrame: - """Compute per-party voting discipline (Rice index) for roll-call votes in a date range. - - Only individual MP vote rows are used (mp_name LIKE '%,%'). - Returns a DataFrame with columns [party, n_motions, discipline] sorted by discipline ascending. - Returns an empty DataFrame if fewer than 1 qualifying motion exists or on any DB error. - - Rice index per motion per party = fraction of party MPs voting with the party majority. - The per-party score is the average Rice index across all motions in the date range. - Only 'voor' and 'tegen' votes are counted; absent and abstaining MPs are excluded from the - Rice index calculation. - """ - return trajectory.compute_party_discipline(db_path, start_date, end_date) + """Compute per-party voting discipline (Rice index) for roll-call votes in a date range.""" + return explorer_data.compute_party_discipline(db_path, start_date, end_date) def _load_mp_vectors_by_party(db_path: str) -> Dict[str, List[np.ndarray]]: @@ -649,15 +307,7 @@ def load_party_axis_scores_for_window( def load_party_scores_all_windows( db_path: str, windows: List[str] ) -> Dict[str, Dict[str, List[float]]]: - """Load party SVD scores for all specified windows. - - Args: - db_path: Path to DuckDB database - windows: List of window IDs to load - - Returns: - {window_id: {party_name: [float * k]}} — scores per party per window - """ + """Load party SVD scores for all specified windows.""" result: Dict[str, Dict[str, List[float]]] = {} for window in windows: if window == "current_parliament": @@ -675,68 +325,17 @@ def _load_mp_vectors_by_window(db_path: str, window: str) -> Dict[str, np.ndarra def _get_aligned_trajectory_scores( db_path: str, windows: List[str], n_components: int = 10 ) -> Dict[str, Dict[str, List[float]]]: - """Get aligned PCA scores for all windows as {window: {party: [scores per component]}}. - - Uses compute_nd_axes to get PCA-projected, flip-corrected scores across all windows, - ensuring consistency with the single-window SVD components view. - """ - from analysis.political_axis import compute_nd_axes - - # Get aligned scores for the requested windows via PCA (annual-only, no quarterly) - scores_by_window, _ = compute_nd_axes( - db_path, window_ids=windows, n_components=n_components - ) - if not scores_by_window: - return {} - - # Load party map to convert MP names to parties - party_map = load_party_map(db_path) - - # Aggregate MP scores to party centroids per window - result: Dict[str, Dict[str, List[float]]] = {} - for window in windows: - window_scores = scores_by_window.get(window, {}) - if not window_scores: - continue - - # Aggregate MP scores to party averages - party_vecs: Dict[str, List[np.ndarray]] = {} - for mp_name, scores in window_scores.items(): - party = party_map.get( - mp_name, party_map.get(mp_name.split("(")[0].strip(), None) - ) - if party: - party_vecs.setdefault(party, []).append(scores[:n_components]) - - # Compute mean scores per party - result[window] = { - party: np.mean(np.vstack(score_list), axis=0).tolist() - for party, score_list in party_vecs.items() - if score_list - } - - return result + """Get aligned PCA scores for all windows as {window: {party: [scores per component]}}.""" + return explorer_data._get_aligned_trajectory_scores(db_path, windows, n_components) @st.cache_data(show_spinner="SVD scores met Procrustes-uitlijning laden…") def load_party_scores_all_windows_aligned( db_path: str, windows: List[str] ) -> Dict[str, Dict[str, List[float]]]: - """Load party SVD scores for all windows with Procrustes alignment. - - This ensures consistent orientation across years by aligning SVD vectors - using Procrustes rotation, similar to how components 1-2 are aligned. - - Args: - db_path: Path to DuckDB database - windows: List of window IDs to load - - Returns: - {window_id: {party_name: [float * k]}} — aligned scores per party per window - """ + """Load party SVD scores for all windows with Procrustes alignment.""" from analysis.trajectory import _procrustes_align_windows - # Load raw MP vectors for each window raw_window_vecs: Dict[str, Dict[str, np.ndarray]] = {} party_map = load_party_map(db_path) @@ -745,10 +344,8 @@ def load_party_scores_all_windows_aligned( if mp_vecs: raw_window_vecs[window] = mp_vecs - # Apply Procrustes alignment aligned_window_vecs = _procrustes_align_windows(raw_window_vecs) - # Convert MP vectors to party averages result: Dict[str, Dict[str, List[float]]] = {} for window in windows: if window not in aligned_window_vecs: @@ -764,7 +361,6 @@ def load_party_scores_all_windows_aligned( party_vecs[party] = [] party_vecs[party].append(vec) - # Average per party result[window] = {} for party, vecs in party_vecs.items(): if vecs: @@ -796,11 +392,7 @@ def _cached_bootstrap_cis( @st.cache_data(show_spinner="Scree-plot laden…") def load_scree_data(db_path: str) -> List[float]: - """Return explained variance ratios (%) for all SVD components, sorted descending. - - Uses the same Procrustes-aligned multi-window matrix as the compass axes so the - scree plot is consistent with what the compass actually uses. - """ + """Return explained variance ratios (%) for all SVD components, sorted descending.""" try: from analysis.political_axis import compute_svd_spectrum @@ -810,554 +402,6 @@ def load_scree_data(db_path: str) -> List[float]: return [] -def _render_scree_plot(importances: List[float], n_show: int = 15) -> None: - """Render a scree plot showing relative SVD component importance. - - Highlighted bars for the top-2 components (used in the compass); muted bars - for the rest. A cumulative-variance dashed line on the same y-axis helps - spot the elbow. A 50 % cumulative threshold line is drawn for reference. - - Args: - importances: List of importance values sorted descending (from load_scree_data). - n_show: How many components to display (default: first 15). - """ - if not importances: - return - # importances are already EVR percentages summing to ~100 over all components. - # Slice to n_show for display; cumulative line shows how much variance is covered. - data = list(importances[:n_show]) - ranks = list(range(1, len(data) + 1)) - - # Cumulative variance for the dashed overlay line - cumsum = [] - running = 0.0 - for v in data: - running += v - cumsum.append(running) - - # Colour: first 2 bars highlighted (compass axes), rest muted - n_highlight = 2 - bar_colours = [ - "#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data)) - ] - - fig = go.Figure() - - # Bars - fig.add_trace( - go.Bar( - x=ranks, - y=data, - marker_color=bar_colours, - hovertemplate="As %{x}
%{y:.1f}% verklaarde variantie", - showlegend=False, - ) - ) - - # Cumulative variance line (dashed, warm amber) - fig.add_trace( - go.Scatter( - x=ranks, - y=cumsum, - mode="lines+markers", - line={"color": "#F57C00", "width": 2, "dash": "dot"}, - marker={"size": 5, "color": "#F57C00"}, - hovertemplate="As %{x}
Cumulatief: %{y:.1f}%", - name="Cumulatief", - showlegend=True, - ) - ) - - # 50 % reference line - fig.add_hline( - y=50, - line_dash="dash", - line_color="#BDBDBD", - line_width=1, - annotation_text="50%", - annotation_position="right", - annotation_font_color="#9E9E9E", - annotation_font_size=11, - ) - - # Annotations on the top-2 bars showing their % value - for i in range(min(n_highlight, len(data))): - fig.add_annotation( - x=ranks[i], - y=data[i] + 0.3, - text=f"{data[i]:.1f}%", - showarrow=False, - font={"size": 11, "color": "#1565C0"}, - yanchor="bottom", - ) - - fig.update_layout( - height=280, - margin={"l": 10, "r": 50, "t": 30, "b": 40}, - title={ - "text": "Belang per SVD-as", - "font": {"size": 13, "color": "#555555"}, - "x": 0.02, - "xanchor": "left", - }, - legend={ - "orientation": "h", - "x": 0.5, - "xanchor": "center", - "y": 1.08, - "font": {"size": 11}, - }, - xaxis={ - "title": {"text": "As (rang)", "font": {"size": 11}}, - "tickmode": "linear", - "tick0": 1, - "dtick": 1, - "showline": False, - "showgrid": False, - }, - yaxis={ - "title": {"text": "% van totale variantie", "font": {"size": 11}}, - "showline": False, - "showgrid": True, - "gridcolor": "#eeeeee", - "ticksuffix": "%", - "range": [0, max(cumsum) * 1.08], - }, - plot_bgcolor="rgba(0,0,0,0)", - paper_bgcolor="rgba(0,0,0,0)", - bargap=0.25, - ) - st.plotly_chart(fig, use_container_width=True) - - -def _build_party_axis_figure( - party_coords: Dict[str, Tuple[float, float]], - comp_sel: int, - theme: dict, - bootstrap_data: Optional[Dict[str, Dict]] = None, -) -> Optional[go.Figure]: - """Build a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`. - - Accepts explicit per-party 2D coordinates (x,y) and uses the component selection to - pick the value (comp_sel==1 -> x, comp_sel==2 -> y). This makes the API explicit and - avoids indexing into long SVD vectors. - - Returns go.Figure or None if no data available. - """ - if not party_coords: - return None - - if comp_sel not in (1, 2): - raise ValueError( - "_build_party_axis_figure only supports comp_sel 1 or 2 when using explicit coords" - ) - - axis_idx = comp_sel - 1 - flip = theme.get("flip", False) - - parties = [] - scores = [] - colours = [] - - # Support two shapes for party_coords: - # - explicit 2D coords: (x, y) - # - full SVD vectors (len>2) where we should pick the axis_idx element - for party, val in party_coords.items(): - try: - # explicit (x, y) - if hasattr(val, "__len__") and len(val) == 2: - x, y = val - score = float(x if axis_idx == 0 else y) - else: - # treat as sequence/array-like of full SVD vector - score = float(val[axis_idx]) - - if flip: - score = -score - except Exception: - # skip malformed entries silently - continue - - parties.append(party) - scores.append(score) - colours.append(PARTY_COLOURS.get(party, "#9E9E9E")) - - if not scores: - return None - - # Build hover text: include N when bootstrap data available - hover = [] - symbols = [] - if bootstrap_data: - for p, s in zip(parties, scores): - bd = bootstrap_data.get(p) - if bd: - n_mps = bd.get("n_mps", "?") - ci_low = None - ci_high = None - try: - ci_low = float(bd["ci_lower"][axis_idx]) - ci_high = float(bd["ci_upper"][axis_idx]) - except Exception: - pass - if ci_low is not None and ci_high is not None: - hover.append( - f"{p}: {s:.3f} (N={n_mps}, 95%-BI: [{ci_low:.3f}, {ci_high:.3f}])" - ) - else: - hover.append(f"{p}: {s:.3f} (N={n_mps})") - symbols.append("diamond" if n_mps == 1 else "circle") - else: - hover.append(f"{p}: {s:.3f}") - symbols.append("circle") - marker_kwargs = {"size": 14, "color": colours, "symbol": symbols} - else: - hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)] - marker_kwargs = {"size": 14, "color": colours} - - fig = go.Figure() - x_min, x_max = min(scores) * 1.15, max(scores) * 1.15 - if x_min == x_max: - x_min, x_max = x_min - 1, x_max + 1 - fig.add_trace( - go.Scatter( - x=[x_min, x_max], - y=[0, 0], - mode="lines", - line={"color": "#cccccc", "width": 1}, - hoverinfo="skip", - showlegend=False, - ) - ) - - scatter_kwargs = { - "x": scores, - "y": [0] * len(scores), - "mode": "markers+text", - "text": parties, - "textposition": "top center", - "marker": marker_kwargs, - "hovertext": hover, - "hoverinfo": "text", - "showlegend": False, - } - fig.add_trace(go.Scatter(**scatter_kwargs)) - - pos_pole = theme.get("positive_pole", "") - neg_pole = theme.get("negative_pole", "") - # Labels always from poles: negative_pole = LEFT, positive_pole = RIGHT - left_label = neg_pole - right_label = pos_pole - - fig.update_layout( - height=160, - margin={"l": 10, "r": 10, "t": 10, "b": 30}, - xaxis={ - "title": f"← {left_label} | {right_label} →", - "showticklabels": False, - "showline": False, - "showgrid": False, - "zeroline": False, - }, - yaxis={"visible": False, "range": [-1, 2]}, - plot_bgcolor="rgba(0,0,0,0)", - paper_bgcolor="rgba(0,0,0,0)", - ) - return fig - - -def _render_party_axis_chart( - party_coords: Dict[str, Tuple[float, float]], - comp_sel: int, - theme: dict, - bootstrap_data: Optional[Dict[str, Dict]] = None, -) -> None: - """Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`. - - Expects explicit per-party coords mapping (party -> (x,y)) for components 1 & 2. - """ - fig = _build_party_axis_figure(party_coords, comp_sel, theme, bootstrap_data) - if fig is None: - st.caption("_Partijdata niet beschikbaar voor deze as._") - return - st.plotly_chart(fig, use_container_width=True) - - -def _render_party_axis_chart_1d( - party_coords: Dict[str, Tuple[float, ...]], - comp_sel: int, - theme: dict, -) -> None: - """Render a 1D horizontal scatter of party positions on SVD component `comp_sel`. - - Uses the same format as components 1-2: parties as markers on a horizontal line - with axis title showing poles with arrows. - - Args: - party_coords: Dict mapping party name to tuple of scores (score_for_comp,) - comp_sel: SVD component number (1-indexed) - theme: Dict with label, positive_pole, negative_pole, flip - """ - import plotly.graph_objects as go - - if not party_coords: - st.caption("_Partijdata niet beschikbaar voor deze as._") - return - - # Extract scores and parties - parties = [] - scores = [] - colours = [] - - for party, coords in party_coords.items(): - try: - score = float(coords[0]) - parties.append(party) - scores.append(score) - colours.append(PARTY_COLOURS.get(party, "#9E9E9E")) - except Exception: - continue - - if not scores: - st.caption("_Partijdata niet beschikbaar voor deze as._") - return - - # Apply flip if needed (ensures right parties appear on right side) - flip = theme.get("flip", False) - if flip: - scores = [-s for s in scores] - - # Build hover text - hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)] - - # Create figure with same format as components 1-2 - fig = go.Figure() - x_min, x_max = min(scores) * 1.15, max(scores) * 1.15 - if x_min == x_max: - x_min, x_max = x_min - 1, x_max + 1 - - # Add horizontal axis line - fig.add_trace( - go.Scatter( - x=[x_min, x_max], - y=[0, 0], - mode="lines", - line={"color": "#cccccc", "width": 1}, - hoverinfo="skip", - showlegend=False, - ) - ) - - # Add party markers - fig.add_trace( - go.Scatter( - x=scores, - y=[0] * len(scores), - mode="markers+text", - text=parties, - textposition="top center", - marker={"size": 14, "color": colours}, - hovertext=hover, - hoverinfo="text", - showlegend=False, - ) - ) - - # Determine pole labels based on flip - pos_pole = theme.get("positive_pole", "") - neg_pole = theme.get("negative_pole", "") - # Labels always from poles: negative_pole = LEFT, positive_pole = RIGHT - left_label = neg_pole - right_label = pos_pole - - # Update layout with same format as components 1-2 - fig.update_layout( - height=160, - margin={"l": 10, "r": 10, "t": 10, "b": 30}, - xaxis={ - "title": f"← {left_label} | {right_label} →", - "showticklabels": False, - "showline": False, - "showgrid": False, - "zeroline": False, - }, - yaxis={"visible": False, "range": [-1, 2]}, - plot_bgcolor="rgba(0,0,0,0)", - paper_bgcolor="rgba(0,0,0,0)", - ) - - st.plotly_chart(fig, use_container_width=True) - - -def _render_svd_time_trajectory( - party_scores_by_window: Dict[str, Dict[str, List[float]]], - comp_sel: int, - theme: dict, - selected_parties: List[str], -) -> None: - """Render a time trajectory plot showing party positions over time on an SVD component. - - Args: - party_scores_by_window: {window_id: {party_name: [scores]}} - comp_sel: SVD component number (1-indexed) - theme: Theme dict with label, positive_pole, negative_pole, flip - selected_parties: List of party names to display - """ - if not party_scores_by_window or not selected_parties: - st.caption("_Geen data beschikbaar voor tijdtraject._") - return - - idx = comp_sel - 1 # Convert to 0-indexed - - # Build data structure: {party: [(window, score), ...]} - # Scores are already aligned and flip-corrected via compute_nd_axes, - # so no per-window flip computation needed. - party_trajectories: Dict[str, List[Tuple[str, float]]] = {} - - # Sort windows: current_parliament first, then chronological - all_windows = list(party_scores_by_window.keys()) - sorted_windows = [] - if "current_parliament" in all_windows: - sorted_windows.append("current_parliament") - # Add other windows in reverse chronological order (newest first) - other_windows = sorted( - [w for w in all_windows if w != "current_parliament"], reverse=True - ) - sorted_windows.extend(other_windows) - - for window in sorted_windows: - scores_by_party = party_scores_by_window.get(window, {}) - for party in selected_parties: - scores = scores_by_party.get(party, []) - if scores and len(scores) > idx: - try: - score = float(scores[idx]) - party_trajectories.setdefault(party, []).append((window, score)) - except (ValueError, TypeError): - continue - - if not party_trajectories: - st.caption("_Geen data beschikbaar voor geselecteerde partijen._") - return - - # Create figure - fig = go.Figure() - - # Find score range for x-axis - all_scores = [] - for traj in party_trajectories.values(): - all_scores.extend([s for _, s in traj]) - - if not all_scores: - st.caption("_Geen scores beschikbaar._") - return - - x_min, x_max = min(all_scores) * 1.15, max(all_scores) * 1.15 - if x_min == x_max: - x_min, x_max = x_min - 1, x_max + 1 - - # Y positions: current at top (y=0), earlier below - window_to_y = {w: i for i, w in enumerate(sorted_windows)} - - # Add horizontal grey axis lines at y=0 for each year (like single-year chart) - for window in sorted_windows: - y_pos = window_to_y[window] - # Horizontal grey line at y=0 for this year (matching single-year chart style) - fig.add_trace( - go.Scatter( - x=[x_min, x_max], - y=[y_pos, y_pos], - mode="lines", - line={"color": "#cccccc", "width": 1}, - hoverinfo="skip", - showlegend=False, - ) - ) - - # Add traces for each party - for party in selected_parties: - if party not in party_trajectories: - continue - - traj = party_trajectories[party] - if len(traj) < 1: - continue - - x_vals = [score for _, score in traj] - y_vals = [window_to_y[window] for window, _ in traj] - color = PARTY_COLOURS.get(party, "#9E9E9E") - - # Add connecting line - fig.add_trace( - go.Scatter( - x=x_vals, - y=y_vals, - mode="lines", - line={"color": color, "width": 2}, - hoverinfo="skip", - showlegend=False, - ) - ) - - # Add markers with hover - hover_texts = [f"{party}
{window}: {score:.3f}" for window, score in traj] - fig.add_trace( - go.Scatter( - x=x_vals, - y=y_vals, - mode="markers+text", - text=[party] * len(traj), - textposition="top center", - marker={"size": 12, "color": color}, - hovertext=hover_texts, - hoverinfo="text", - showlegend=False, - ) - ) - - # Determine pole labels based on theme (use reference flip from current_parliament) - pos_pole = theme.get("positive_pole", "") - neg_pole = theme.get("negative_pole", "") - # Labels always from poles: negative_pole = LEFT, positive_pole = RIGHT - left_label = neg_pole - right_label = pos_pole - - # Y-axis labels - y_labels = {} - for window in sorted_windows: - if window == "current_parliament": - y_labels[window_to_y[window]] = "Huidig" - else: - y_labels[window_to_y[window]] = window - - # Update layout - fig.update_layout( - height=max(400, len(sorted_windows) * 60 + 100), - margin={"l": 80, "r": 10, "t": 10, "b": 30}, - xaxis={ - "title": f"← {left_label} | {right_label} →", - "range": [x_min, x_max], - "showticklabels": False, - "showline": False, - "showgrid": True, - "gridcolor": "rgba(0,0,0,0.1)", - "zeroline": True, - "zerolinecolor": "rgba(0,0,0,0.2)", - }, - yaxis={ - "tickvals": list(y_labels.keys()), - "ticktext": list(y_labels.values()), - "tickmode": "array", - "autorange": "reversed", # Top to bottom - "showgrid": False, - }, - plot_bgcolor="rgba(0,0,0,0)", - paper_bgcolor="rgba(0,0,0,0)", - ) - - st.plotly_chart(fig, use_container_width=True) - - @st.cache_data(show_spinner="Moties laden…") def load_motions_df(db_path: str) -> pd.DataFrame: """Load the full motions table as a pandas DataFrame (read-only).""" @@ -1374,1644 +418,51 @@ def query_similar( return explorer_data.query_similar(db_path, source_motion_id, vector_type, top_k) -# --------------------------------------------------------------------------- -# Shared rendering helpers -# --------------------------------------------------------------------------- - - -def _render_voting_results(voting_results_json) -> None: - """Render a voting_results JSON blob as a grouped voor/tegen/onthouden table. - - The JSON is stored as {party_or_mp: vote} where vote is one of - 'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability. - """ - if not voting_results_json: - return - try: - vdata = ( - json.loads(voting_results_json) - if isinstance(voting_results_json, str) - else voting_results_json - ) - if not isinstance(vdata, dict) or not vdata: - return - # Group {vote: [actor, ...]} - by_vote: Dict[str, List[str]] = {} - for actor, vote in vdata.items(): - vote_str = str(vote).lower().strip() - by_vote.setdefault(vote_str, []).append(str(actor)) - # Render in fixed order - vote_order = ["voor", "tegen", "onthouden", "afwezig"] - vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "⬜"} - rows_shown = False - for v in vote_order + [k for k in by_vote if k not in vote_order]: - actors = by_vote.get(v) - if not actors: - continue - emoji = vote_emoji.get(v, "▪️") - st.markdown( - f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}" - ) - rows_shown = True - if not rows_shown: - st.caption("_Geen stemuitslag beschikbaar_") - except Exception: - pass - - -# --------------------------------------------------------------------------- -# Tab 1: Politiek Kompas -# --------------------------------------------------------------------------- - - -def _add_y_direction_annotations(fig: go.Figure) -> None: - """Add ▲ Progressief / ▼ Conservatief labels above and below the Y axis.""" - common = dict( - xref="paper", - yref="paper", - x=-0.07, - showarrow=False, - font=dict(size=11, color="#666666"), - ) - fig.add_annotation(**common, y=1.02, text="▲ Progressief", xanchor="center") - fig.add_annotation(**common, y=-0.06, text="▼ Conservatief", xanchor="center") - - def _window_to_dates(window_id: str) -> tuple[str, str]: """Return (start_date, end_date) ISO strings for a given window_id.""" return trajectory.window_to_dates(window_id) -def build_compass_tab(db_path: str, window_size: str) -> None: - st.subheader("Politiek Kompas") - st.markdown( - "2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)." - ) +def build_compass_tab(*args, **kwargs): + """Build the Politiek Kompas tab.""" + from analysis.tabs.compass import build_compass_tab as _impl - # Compass always uses annual windows regardless of the sidebar window_size setting. - positions_by_window, axis_def = load_positions(db_path, "annual") - # load_positions may return None for axis_def when resources are missing - # (e.g. classifier fallback or failed enrichment). Guard so UI rendering - # code doesn't crash on axis_def.get calls. - if axis_def is None: - axis_def = {} - if not positions_by_window: - st.warning( - "Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid." - ) - return + return _impl(*args, **kwargs) - party_map = load_party_map(db_path) - active_mps = load_active_mps(db_path) - - # Sort windows: year windows first (ascending), current_parliament last. - # Exclude the current calendar year — it is already fully covered by current_parliament - # and showing both creates confusion (2026 ⊂ current_parliament). - import datetime as _dt - - _current_year = str(_dt.date.today().year) - year_windows = sorted( - w - for w in positions_by_window - if w != "current_parliament" and w != _current_year - ) - has_current = "current_parliament" in positions_by_window - windows = year_windows + (["current_parliament"] if has_current else []) - - # Motion counts per year — sparse years get a warning label. - _SPARSE_YEARS = {"2016", "2017", "2018"} - _THRESHOLD = 0.65 - - def _window_label(w: str) -> str: - if w == "current_parliament": - return "Huidig parlement" - if w in _SPARSE_YEARS: - return f"{w} ⚠️" - return w - - col1, col2 = st.columns([3, 1]) - with col2: - window_idx = st.selectbox( - "Jaar", - options=windows, - index=len(windows) - 1, # default: current_parliament - format_func=_window_label, - ) - level = st.radio( - "Weergave", - options=["Kamerleden", "Partijen"], - index=0, - horizontal=True, - ) - min_mps = st.number_input( - "Min. Kamerleden per partij", - min_value=1, - max_value=20, - value=3, - step=1, - help="Partijen met minder dan dit aantal zetels worden niet weergegeven.", - ) - pos = positions_by_window.get(window_idx, {}) - if not pos: - st.info(f"Geen data voor venster {window_idx}") - return - - # For current_parliament, restrict to MPs who are still seated (tot_en_met IS NULL). - # Historical windows include all MPs active at the time — no restriction needed. - if window_idx == "current_parliament": - pos = {mp: xy for mp, xy in pos.items() if mp in active_mps} - - # Deduplicate MPs whose names appear both with and without a parenthetical first name, - # e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and - # average positions if both variants are present. - def _strip_paren(name: str) -> str: - return re.sub(r"\s*\([^)]*\)", "", name).strip() - - deduped: Dict[str, Tuple[float, float]] = {} - for name, (x, y) in pos.items(): - base = _strip_paren(name) - if base in deduped: - ox, oy = deduped[base] - deduped[base] = ((ox + x) / 2, (oy + y) / 2) - else: - deduped[base] = (x, y) - pos = deduped - - rows = [] - for name, (x, y) in pos.items(): - party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown") - rows.append({"name": name, "x": x, "y": y, "party": party}) - - df_pos = pd.DataFrame(rows) - - # Drop parties below the minimum MP threshold (unreliable centroids). - party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts() - valid_parties = set(party_counts[party_counts >= min_mps].index) - df_pos = df_pos[df_pos["party"].isin(valid_parties)] - - if df_pos.empty: - st.info("Geen partijen met genoeg Kamerleden voor dit venster.") - return - - # The first two SVD axes are clear, interpretable axes for our dataset. - # Show the classifier-provided full labels on the compass unconditionally - # so users see the canonical interpretation. We keep the confidence-based - # captions/interpretations in the expander but do not hide the axis titles - # for the compass. Note: the vertical axis title is rotated by Plotly — - # this can make "Progressief–Conservatief" look reversed because the word - # "Progressief" appears at the top when rendered; we therefore add explicit - # directional annotations to make the polarity unambiguous. - # Prefer classifier-provided labels for the first two axes. However, the - # classifier sometimes returns the concise numeric fallbacks "As 1"/"As 2" - # when it couldn't find an interpretable label. For the compass we prefer - # conventional semantic defaults instead of the generic "As N" strings so - # the chart remains readable. - _raw_x = axis_def.get("x_label") - _raw_y = axis_def.get("y_label") - - # Use the classifier helper to map internal/modal labels (e.g. "As 1") to - # user-facing labels. Import at function-time to avoid module import cycles - # and keep explorer lightweight. If the helper is unavailable fall back to - # labels from the unified svd_labels module. - try: - from analysis.axis_classifier import display_label_for_modal +def build_trajectories_tab(*args, **kwargs): + """Build the Partij Trajectories tab.""" + from analysis.tabs.trajectories import build_trajectories_tab as _impl - _x_label = display_label_for_modal(_raw_x, "x") - _y_label = display_label_for_modal(_raw_y, "y") - except Exception: - from analysis.svd_labels import get_fallback_labels + return _impl(*args, **kwargs) - _x_fallback, _y_fallback = get_fallback_labels() - _x_label = _raw_x or _x_fallback - _y_label = _raw_y or _y_fallback - if level == "Partijen": - # Aggregate to party centroids - df_party = df_pos.groupby("party", as_index=False).agg( - x=("x", "mean"), y=("y", "mean"), n=("name", "count") - ) - df_party["name"] = df_party["party"] - colour_map = { - p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique() - } - fig = px.scatter( - df_party, - x="x", - y="y", - color="party", - text="party", - hover_name="party", - hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True}, - color_discrete_map=colour_map, - title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)", - labels={ - "x": _x_label, - "y": _y_label, - "n": "Kamerleden", - }, - ) - fig.update_traces(textposition="top center", marker_size=14) - else: - colour_map = { - p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique() - } - fig = px.scatter( - df_pos, - x="x", - y="y", - color="party", - hover_name="name", - hover_data={"party": True, "x": ":.3f", "y": ":.3f"}, - color_discrete_map=colour_map, - title=f"Politiek Kompas — {_window_label(window_idx)}", - labels={"x": _x_label, "y": _y_label}, - ) +def build_search_tab(*args, **kwargs): + """Build the Motie Zoeken tab.""" + from analysis.tabs.search import build_search_tab as _impl - fig.update_layout( - height=600, - legend_title_text="Partij", - xaxis={"range": [-1, 1]}, - yaxis={"range": [-0.6, 0.6]}, - ) - with col1: - st.plotly_chart(fig, use_container_width=True) - _x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "") - if ( - _x_interp - and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD - ): - st.caption(_x_interp) - - # Voting discipline analysis - st.markdown("---") - st.markdown( - "**Stemdiscipline analyse:** De Rice-index meet hoe eensgezind partijen stemmen " - "tijdens hoofdelijke stemmingen. Een score van 100% betekent dat alle MPs van " - "een partij hetzelfde stemden; 50% wijst op een gelijke splitsing binnen de partij. " - "Partijen met hoge discipline (>95%) zoals PVV en SGP stemmen als een blok, wat " - "wijst op sterke partijdiscipline en homogene membership. Lagere discipline (<85%) " - "bij partijen als PvdA of SP kan duiden op interne factiestrijd, gewetensvragen " - "bij ethische thema's, of een brede ideologische koers die ruimte laat voor " - "afwijkende meningen. De discipline varieert ook per onderwerp — ethische kwesties " - "tonen vaak meer interne verschillen dan economische thema's." - ) + return _impl(*args, **kwargs) -# --------------------------------------------------------------------------- -# Tab 2: Partij Trajectories -# --------------------------------------------------------------------------- - - -def choose_trajectory_title(axis_def: dict, axis: str, threshold: float = 0.65) -> str: - """Choose a short trajectory axis title based on aggregated confidence.""" - return trajectory.choose_trajectory_title(axis_def, axis, threshold) - - -def build_trajectories_tab(db_path: str, window_size: str) -> None: - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] build_trajectories_tab called — db_path=%s, window_size=%s", - db_path, - window_size, - ) - st.subheader("Partij Trajectories") - st.markdown("Hoe bewegen partijen over de tijdsvensters heen?") - - positions_by_window, axis_def = load_positions(db_path, window_size) - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] load_positions → %d windows, total MPs=%d", - len(positions_by_window), - sum(len(v) for v in positions_by_window.values()), - ) - if axis_def is None: - axis_def = {} - if not positions_by_window: - # Instrumentation: record why trajectories tab aborted early - try: - _last_trajectories_diagnostics.update( - { - "stage": "load_positions_empty", - "positions_by_window_len": len(positions_by_window), - } - ) - except Exception: - pass - try: - st.warning("Geen positiedata beschikbaar.") - except Exception: - pass - # If debug enabled, show diagnostics in UI (best-effort) - try: - if get_debug_trajectories_enabled(): - try: - st.text_area( - "Trajectories diagnostics", - json.dumps(_last_trajectories_diagnostics, default=str), - height=160, - ) - except Exception: - pass - except Exception: - pass - return - - party_map = load_party_map(db_path) - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] load_party_map → %d entries, sample=%s", - len(party_map), - list(party_map.items())[:3], - ) - - # Add name normalization to improve matching - def normalize_mp_name(name): - """Normalize MP name for better matching between data sources.""" - if not name: - return "" - # Remove extra whitespace - name = name.strip() - # Ensure consistent spacing after comma - if "," in name and ", " not in name: - name = name.replace(",", ", ") - return name - - # Normalize party_map keys - party_map = {normalize_mp_name(k): v for k, v in party_map.items()} - - # Also normalize MP names in positions_by_window - normalized_positions = {} - for window, positions in positions_by_window.items(): - normalized_positions[window] = { - normalize_mp_name(k): v for k, v in positions.items() - } - positions_by_window = normalized_positions - - # After normalization, log the match rate - all_mp_names = set() - for positions in positions_by_window.values(): - all_mp_names.update(positions.keys()) - - matched_names = sum(1 for mp in all_mp_names if mp in party_map) - if all_mp_names: - logger.info( - f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)" - ) - else: - logger.info("MP name matching: no MPs found in positions data") - - if matched_names == 0 and len(all_mp_names) > 0: - logger.warning("No MP names matched between positions and party_map!") - logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}") - logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}") - - windows = sorted(positions_by_window.keys()) - - # Compute party centroids per window - centroids: Dict[str, Dict[str, Tuple[float, float]]] = {} - all_parties: set = set() - - # Helper to normalise MP names (strip parenthetical first names) to match - # entries in the party_map. This mirrors the behaviour used in the compass - # tab so both tabs resolve parties the same way. - def _strip_paren(name: str) -> str: - return re.sub(r"\s*\([^)]*\)", "", name).strip() - - for wid in windows: - pos = positions_by_window.get(wid, {}) - per_party: Dict[str, List[Tuple[float, float]]] = {} - for mp_name, (x, y) in pos.items(): - # Try exact match first, then stripped-name match to handle - # variants like "Dijk, J.P. (Jimmy)" -> "Dijk, J.P." used in mp_metadata - party = party_map.get(mp_name) or party_map.get( - _strip_paren(mp_name), "Unknown" - ) - if party == "Unknown": - continue - per_party.setdefault(party, []).append((x, y)) - for party, coords in per_party.items(): - all_parties.add(party) - xs = [c[0] for c in coords] - ys = [c[1] for c in coords] - centroids.setdefault(party, {})[wid] = ( - float(np.mean(xs)), - float(np.mean(ys)), - ) - - all_parties = sorted( - set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs) - - {None, "Unknown"} - ) - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] all_parties (raw from party_map) → %d parties: %s", - len(all_parties), - all_parties[:10], - ) - all_parties_sorted = sorted(all_parties) - - # If no parties were found after mapping MPs to parties, show a helpful - # message instead of rendering an empty chart. This commonly happens when - # the party map failed to load (DB error) or the min_mps threshold filtered - # out all parties. - if not all_parties_sorted: - st.info( - "Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat." - ) - try: - st.caption(f"Bekende partijen in party_map: {len(party_map)}") - except Exception: - pass - # Do not return here: allow per-MP fallback plotting below when no - # party-level centroids are available so the user still sees trajectories. - - # Default: show CDA, D66, VVD — the three parties that span the political centre - default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties] - if not default_parties: - default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties] - if not default_parties: - default_parties = all_parties_sorted[:6] - - selected_parties = st.multiselect( - "Selecteer partijen", - options=all_parties_sorted, - default=default_parties, - ) - - # Ensure EMA smoothing helper is available for per-MP fallback plotting which - # appears earlier in the function. Define here so calls above won't fail. - def _ema_smooth(values: List[float], alpha: float) -> List[float]: - if not values or alpha >= 1.0: - return values - smoothed = [values[0]] - for v in values[1:]: - smoothed.append(alpha * v + (1 - alpha) * smoothed[-1]) - return smoothed - - # default smoothing alpha used for inline per-MP plotting; may be overridden - # by the smoothing controls shown later in the UI. - smooth_alpha = 0.35 - - # If no party-level centroids were computed, fall back to per-MP trajectories - # so the user still sees a plot even when the party_map is missing or empty. - if not centroids: - # Fallback: plot individual MP trajectories - st.info( - "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." - ) - - # Build per-MP time series from positions_by_window - mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} - for wid in windows: - pos = positions_by_window.get(wid, {}) - for mp_name, xy in pos.items(): - # Defensive conversion: skip malformed coordinates instead of raising - try: - x, y = float(xy[0]), float(xy[1]) - except Exception: - # skip malformed entries silently (diagnostics will show counts) - continue - mp_positions.setdefault(mp_name, {})[wid] = (x, y) - - # Filter to MPs with at least 2 windows and not all NaN - mp_positions = { - mp: pos - for mp, pos in mp_positions.items() - if len(pos) >= 2 - and not all(np.isnan(x) and np.isnan(y) for x, y in pos.values()) - } - - if not mp_positions: - st.warning("Geen positiedata beschikbaar voor trajectplotten.") - _last_trajectories_diagnostics.update( - { - "stage": "no_mp_positions", - "mp_positions_count": 0, - } - ) - # show diagnostics when debug enabled - try: - if get_debug_trajectories_enabled(): - try: - st.text_area( - "Trajectories diagnostics", - json.dumps(_last_trajectories_diagnostics, default=str), - height=160, - ) - except Exception: - pass - except Exception: - pass - return - - # Store for later use - st.session_state["_trajectory_mp_positions"] = mp_positions - - mp_list = sorted(mp_positions.keys()) - default_mps = mp_list[:6] - selected_mps = st.multiselect( - "Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps - ) - - # Plot per-MP trajectories - fig = go.Figure() - trace_count = 0 - for mp in selected_mps: - wids_sorted = sorted(mp_positions[mp].keys()) - xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] - ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] - xs = _ema_smooth(xs_raw, smooth_alpha) - ys = _ema_smooth(ys_raw, smooth_alpha) - custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] - fig.add_trace( - go.Scatter( - x=xs, - y=ys, - mode="lines+markers", - name=mp, - text=wids_sorted, - customdata=custom_raw, - line=dict(color="#888888", shape="spline", smoothing=1.3), - marker=dict(color="#888888", size=6), - hovertemplate=( - f"{mp}
" - "venster: %{text}
" - "x (smoothed): %{x:.3f}
" - "x (raw): %{customdata[0]:.3f}
" - "y (smoothed): %{y:.3f}
" - "y (raw): %{customdata[1]:.3f}" - ), - ) - ) - trace_count += 1 - - _add_y_direction_annotations(fig) - if trace_count == 0: - st.info( - "Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data." - ) - else: - st.plotly_chart(fig, use_container_width=True) - return - - # Developer override: if EXPLORER_FORCE_SHOW_TRAJECTORIES=1 in the - # environment, bypass party filtering and show the first MPs' trajectories - # directly (helps diagnose production environments where party mapping - # or filtering prevents any traces from appearing). This is safe to keep - # in main because it only triggers when explicitly enabled. - if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"): - # Build per-MP time series from positions_by_window and plot first 6 MPs - mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} - for wid in windows: - pos = positions_by_window.get(wid, {}) - for mp_name, (x, y) in pos.items(): - mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y)) - - mp_list = sorted(mp_positions.keys()) - if not mp_list: - st.info("Geen MP-positiegegevens beschikbaar om te tonen.") - return - - sample_mps = mp_list[:6] - fig = go.Figure() - for mp in sample_mps: - wids_sorted = sorted(mp_positions[mp].keys()) - xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] - ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] - xs = _ema_smooth(xs_raw, 0.35) - ys = _ema_smooth(ys_raw, 0.35) - custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] - fig.add_trace( - go.Scatter( - x=xs, - y=ys, - mode="lines+markers", - name=mp, - text=wids_sorted, - customdata=custom_raw, - line=dict(color="#444444", shape="spline", smoothing=1.3), - marker=dict(color="#444444", size=6), - hovertemplate=( - f"{mp}
" - "venster: %{text}
" - "x (smoothed): %{x:.3f}
" - "x (raw): %{customdata[0]:.3f}
" - "y (smoothed): %{y:.3f}
" - "y (raw): %{customdata[1]:.3f}" - ), - ) - ) - _add_y_direction_annotations(fig) - st.plotly_chart(fig, use_container_width=True) - return - - # Debug expander: show data used to build trajectories so we can diagnose - # why no traces are appearing. Leave this collapsed by default in normal - # runs; when troubleshooting it will show counts and small samples. - try: - # Add a little opt-in checkbox in the UI to enable debug diagnostic output - debug_checkbox = False - try: - debug_checkbox = st.checkbox( - "Enable trajectories diagnostics (show extra info)", - value=get_debug_trajectories_enabled(), - ) - except Exception: - debug_checkbox = get_debug_trajectories_enabled() - if debug_checkbox: - try: - with st.expander( - "DEBUG: Trajectories data (showing diagnostics)", expanded=False - ): - st.write("windows (count):", len(windows)) - st.write("windows sample:", windows[:10]) - st.write("party_map entries:", len(party_map)) - st.write("parties with centroids:", len(all_parties_sorted)) - st.write("default_parties:", default_parties) - st.write("selected_parties:", selected_parties) - st.write("min_mps setting:", min_mps) - # sample centroid counts per party - sample = { - p: len(centroids.get(p, {})) - for p in list(all_parties_sorted)[:8] - } - st.write("sample centroid window counts per party:", sample) - except Exception: - pass - except Exception: - # Don't crash UI if st isn't available or expander fails - pass - - # Smoothing controls - smoothing_method = st.selectbox( - "Smoothing methode", - options=["EMA", "Spline", "None"], - index=0, - help="EMA = exponential moving average; Spline = low-degree polynomial spline fit; None = raw centroids", - ) - - # EMA alpha only shown/used when EMA is selected - smooth_alpha = 1.0 - if smoothing_method == "EMA": - smooth_alpha = st.slider( - "Glad maken (EMA-\u03b1)", - min_value=0.1, - max_value=1.0, - value=0.35, - step=0.05, - help=( - "\u03b1=1.0 toont de ruwe data; lagere waarden maken de lijn gladder. " - "Standaard 0.35 voor een goed evenwicht tussen detail en ruis." - ), - ) - - def _ema_smooth(values: List[float], alpha: float) -> List[float]: - """Apply exponential moving average; alpha=1.0 means no smoothing.""" - if not values or alpha >= 1.0: - return values - smoothed = [values[0]] - for v in values[1:]: - smoothed.append(alpha * v + (1 - alpha) * smoothed[-1]) - return smoothed - - def _spline_smooth(values: List[float]) -> List[float]: - """Perform a basic low-degree polynomial fit over index -> value and evaluate at indices. - - This provides a simple spline-like smoothing without adding scipy as a dependency. - For very small N this returns the raw values. - """ - n = len(values) - if n <= 2: - return values - deg = min(3, n - 1) - try: - idx = np.arange(n, dtype=float) - coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg) - smooth = np.polyval(coeffs, idx) - return [float(v) for v in smooth] - except Exception: - return values - - fig = go.Figure() - trace_count = 0 - helper_succeeded = False - # New: delegate plotting selection to helper for testability - # Note: select_trajectory_plot_data returns (fig, trace_count, banner_text) - try: - fig2, trace_count2, banner_text = select_trajectory_plot_data( - positions_by_window, party_map, windows, selected_parties, smooth_alpha - ) - # If helper returned a figure, replace - if fig2 is not None: - fig = fig2 - trace_count = trace_count2 - helper_succeeded = True - if banner_text: - try: - st.caption(banner_text) - except Exception: - pass - try: - _last_trajectories_diagnostics.update({"banner_text": banner_text}) - except Exception: - pass - except Exception as e: - tb = traceback.format_exc() - # attach diagnostics to the helper and module - try: - select_trajectory_plot_data._last_diagnostics = {"exception": tb} - except Exception: - pass - try: - _last_trajectories_diagnostics.update( - {"stage": "select_helper_exception", "exception": tb} - ) - except Exception: - pass - logger.exception("select_trajectory_plot_data failed") - debug_enabled = get_debug_trajectories_enabled() - if debug_enabled: - try: - st.text_area("select_trajectory_plot_data traceback", tb, height=240) - except Exception: - pass - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] helper_succeeded=%s", helper_succeeded - ) - if not helper_succeeded: - for party in selected_parties: - if party not in centroids: - continue - wids_sorted = sorted(centroids[party].keys()) - xs_raw = [centroids[party][w][0] for w in wids_sorted] - ys_raw = [centroids[party][w][1] for w in wids_sorted] - xs = _ema_smooth(xs_raw, smooth_alpha) - ys = _ema_smooth(ys_raw, smooth_alpha) - # Preserve raw (unsmoothed) values per-point so hover can show both raw and smoothed - custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] - colour = PARTY_COLOURS.get(party, "#9E9E9E") - fig.add_trace( - go.Scatter( - x=xs, - y=ys, - mode="lines+markers", - name=party, - text=wids_sorted, # full window ID for hover - customdata=custom_raw, - line=dict(color=colour, shape="spline", smoothing=1.3), - marker=dict(color=colour, size=8), - hovertemplate=( - f"{party}
" - "venster: %{text}
" - "x (smoothed): %{x:.3f}
" - "x (raw): %{customdata[0]:.3f}
" - "y (smoothed): %{y:.3f}
" - "y (raw): %{customdata[1]:.3f}" - ), - ) - ) - trace_count += 1 - - # For trajectories, the chart spans multiple windows. Use the classifier's - # per-window confidences aggregated (mean) to decide whether to use the - # classifier label or fall back to the conventional short label. - _THRESHOLD = 0.65 - x_conf_map = axis_def.get("x_label_confidence", {}) or {} - y_conf_map = axis_def.get("y_label_confidence", {}) or {} - - def _mean_conf(m: dict) -> Optional[float]: - vals = [v for v in m.values() if v is not None] - if not vals: - return None - return float(sum(vals) / len(vals)) - - x_mean = _mean_conf(x_conf_map) - y_mean = _mean_conf(y_conf_map) - - x_title = choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD) - y_title = choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD) - - fig.update_layout( - title="Partij trajectories", - xaxis_title=x_title, - yaxis_title=y_title, - height=600, - legend_title_text="Partij", - ) - _add_y_direction_annotations(fig) - # If no traces were added to the figure, show a diagnostic message so the - # user knows why the plot is empty. - try: - _last_trajectories_diagnostics.update({"trace_count": trace_count}) - except Exception: - pass - debug_enabled = get_debug_trajectories_enabled() - # Add detailed diagnostics to understand why trace_count is 0 - if trace_count == 0: - _last_trajectories_diagnostics.update( - { - "stage": "zero_traces", - "positions_count": sum(len(pos) for pos in positions_by_window.values()) - if positions_by_window - else 0, - "party_map_count": len(party_map) if party_map else 0, - "centroids_count": len(centroids) if centroids else 0, - "selected_parties_count": len(selected_parties) - if selected_parties - else 0, - "timestamp": datetime.now().isoformat(), - } - ) - # Check if there are positions but no centroids (name mismatch) - if positions_by_window and party_map and not centroids: - # Sample some MP names from positions - sample_mps = [] - for window, positions in list(positions_by_window.items())[:1]: - sample_mps = list(positions.keys())[:5] - break - # Check if these MPs are in party_map - matched = sum(1 for mp in sample_mps if mp in party_map) - _last_trajectories_diagnostics["name_match_check"] = { - "sample_mps": sample_mps, - "matched_in_party_map": matched, - "sample_size": len(sample_mps), - } - if trace_count == 0: - st.info("📊 **Geen trajecten getekend**") - - # Show diagnostic information - with st.expander("🔍 Diagnostische informatie"): - st.write("**Data status:**") - st.write( - f"- Positie vensters: {len(positions_by_window) if positions_by_window else 0}" - ) - st.write(f"- Party mappings: {len(party_map) if party_map else 0}") - st.write( - f"- Geselecteerde partijen: {len(selected_parties) if selected_parties else 0}" - ) - - if "centroid_diagnostics" in locals(): - st.write("**Centroid berekening:**") - st.write( - f"- Partijen met posities: {len(centroid_diagnostics.get('parties_with_positions', []))}" - ) - st.write( - f"- Partijen met alleen NaN: {len(centroid_diagnostics.get('parties_all_nan', []))}" - ) - - st.write("\n**Mogelijke oorzaken:**") - st.write("1. Geen SVD vectoren berekend voor de geselecteerde vensters") - st.write("2. MP namen in posities komen niet overeen met party_map") - st.write("3. Alle geselecteerde partijen hebben te weinig MPs (< 5)") - - # Add a button to run diagnostics - if st.button("🔧 Database diagnostiek uitvoeren"): - with st.spinner("Bezig met diagnostiek..."): - # Import and run diagnostics - from scripts.diagnose_trajectories_cli import ( - run as diagnose_trajectories, - ) - - results = diagnose_trajectories(db_path) - st.json(results) - else: - # DEBUG: show trace_count and figure data size before rendering - try: - st.info( - f"[DEBUG] trace_count={trace_count}, fig data count={len(fig.data)}, layout title={fig.layout.title.text if fig.layout.title else 'none'}" - ) - except Exception: - pass - try: - logging.getLogger(__name__).debug( - "[TRAJ DEBUG] About to render plotly chart — trace_count=%d, banner=%s, fig has %d traces", - trace_count, - banner_text, - len(fig.data), - ) - st.plotly_chart(fig, use_container_width=True) - except Exception as e: - st.error(f"Trajectories rendering failed: {e}") - # Always show diagnostics when rendering fails, regardless of trace_count - if get_debug_trajectories_enabled(): - try: - st.json(_last_trajectories_diagnostics) - except Exception: - st.text_area( - "Trajectories diagnostics (JSON failed)", - json.dumps(_last_trajectories_diagnostics, default=str), - height=240, - ) - - -# --------------------------------------------------------------------------- -# Tab 3: Motie Zoeken -# --------------------------------------------------------------------------- - - -def build_search_tab(db_path: str, show_rejected: bool) -> None: - st.subheader("Motie Zoeken") - - df = load_motions_df(db_path) - if df.empty: - st.warning("Geen moties beschikbaar.") - return +def build_browser_tab(*args, **kwargs): + """Build the Motie Browser tab.""" + from analysis.tabs.browser import build_browser_tab as _impl - if not show_rejected: - df = df[df["title"].fillna("").str.strip() != "Verworpen."] + return _impl(*args, **kwargs) - # Controls - col1, col2, col3 = st.columns([2, 1, 1]) - with col1: - query = st.text_input( - "Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen" - ) - with col2: - years = sorted(df["year"].dropna().astype(int).unique().tolist()) - if years: - year_range = st.select_slider( - "Jaar", options=years, value=(years[0], years[-1]) - ) - else: - year_range = (2019, 2024) - with col3: - min_controversy = st.slider( - "Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05 - ) - - # Apply filters in-memory - working = df.copy() - working = working[ - (working["year"] >= year_range[0]) & (working["year"] <= year_range[1]) - ] - if min_controversy > 0: - working = working[working["controversy_score"] >= min_controversy] - if query: - q = query.lower() - mask = working["title"].fillna("").str.lower().str.contains(q, regex=False) - working = working[mask] - - working = working.sort_values(by="controversy_score", ascending=False) - st.caption(f"{len(working)} resultaten (top 50 getoond)") - - for _, row in working.head(50).iterrows(): - title = row.get("title") or f"Motie #{row['id']}" - date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?" - controversy = row.get("controversy_score") or 0 - with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"): - cols = st.columns(3) - cols[0].metric("Controverse", f"{controversy:.2f}") - cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}") - cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?") - - # Voting breakdown - _render_voting_results(row.get("voting_results")) - - # Link to original motion - url = row.get("url") - if url and str(url).startswith("http"): - st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})") - - # Similar motions - sim = query_similar(db_path, int(row["id"]), top_k=5) - if not sim.empty: - st.markdown("**Vergelijkbare moties:**") - for _, s in sim.iterrows(): - s_date = ( - pd.to_datetime(s["date"]).strftime("%Y") - if pd.notna(s.get("date")) - else "" - ) - st.markdown( - f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*" - ) - else: - st.caption("_Nog geen vergelijkbare moties beschikbaar_") - - -# --------------------------------------------------------------------------- -# Tab 4: Motie Browser -# --------------------------------------------------------------------------- - - -def build_browser_tab(db_path: str, show_rejected: bool) -> None: - st.subheader("Motie Browser") - - df = load_motions_df(db_path) - if df.empty: - st.warning("Geen moties beschikbaar.") - return - - if not show_rejected: - df = df[df["title"].fillna("").str.strip() != "Verworpen."] - - # Controls - col1, col2, col3 = st.columns(3) - with col1: - years = sorted(df["year"].dropna().astype(int).unique().tolist()) - year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years]) - with col2: - min_controversy_b = st.slider( - "Min. controverse", - min_value=0.0, - max_value=1.0, - value=0.0, - step=0.05, - key="browser_controversy", - ) - with col3: - sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"]) - - # Filter - working = df.copy() - if year_filter != "(Alle)": - working = working[working["year"] == int(year_filter)] - if min_controversy_b > 0: - working = working[working["controversy_score"] >= min_controversy_b] - - sort_map = { - "Datum (nieuw)": ("date", False), - "Controverse": ("controversy_score", False), - "Marge": ("winning_margin", True), - } - sort_col, sort_asc = sort_map[sort_by] - working = working.sort_values(by=sort_col, ascending=sort_asc) - - # Display table - display_cols = ["id", "title", "date", "controversy_score", "winning_margin"] - available_display = [c for c in display_cols if c in working.columns] - st.dataframe( - working[available_display].reset_index(drop=True), - use_container_width=True, - height=350, - ) - - st.divider() - - # Detail panel - st.markdown("**Detail weergave** — vul een motie-ID in:") - sel_id = st.number_input( - "Motie ID", - min_value=int(working["id"].min()) if not working.empty else 1, - max_value=int(working["id"].max()) if not working.empty else 99999, - value=int(working["id"].iloc[0]) if not working.empty else 1, - step=1, - ) - motion_row = df[df["id"] == sel_id] - if not motion_row.empty: - row = motion_row.iloc[0] - st.markdown(f"### {row.get('title') or 'Onbekend'}") - date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?" - st.caption( - f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}" - ) - - # Link to original source - url = row.get("url") - if url and str(url).startswith("http"): - st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})") - - # Voting breakdown - st.markdown("**Stemuitslag:**") - _render_voting_results(row.get("voting_results")) - - # Similar motions - sim = query_similar(db_path, int(sel_id), top_k=10) - if not sim.empty: - st.markdown("**Vergelijkbare moties:**") - st.dataframe( - sim[["title", "score", "date", "policy_area"]], - use_container_width=True, - ) - else: - st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_") - - -def build_svd_components_tab(db_path: str) -> None: - """New tab: show top motions contributing to top SVD components. - - Reads thoughts/explorer/top_svd_top_motions.json and displays a selector - for components 1..10 with theme labels/explanations and a detail pane per motion. - - Components 1-2 use aligned PCA positions (consistent with compass). - Components 3-10 use raw SVD scores. - """ - st.subheader("🔬 SVD Assen — politieke polarisatiethema's") - st.markdown( - "Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen " - "van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren " - "het spanningsveld dat de as beschrijft." - ) - - # Scree plot: relative importance of each SVD component - scree_importances = load_scree_data(db_path) - if scree_importances: - st.markdown( - "**Scree-plot** — het relatieve gewicht van elke SVD-as. " - "De eerste assen verklaren het meeste van de stemverschillen in de Kamer; " - "latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau." - ) - _render_scree_plot(scree_importances) - - json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json") - if not os.path.exists(json_path): - st.warning( - f"Top-SVD data not found at {json_path}. Run the importance job to generate it." - ) - return - - try: - with open(json_path, "r", encoding="utf-8") as fh: - j = json.load(fh) - except Exception as e: - st.error(f"Failed to load SVD importance JSON: {e}") - return - - window = j.get("window") - rows = j.get("rows", []) - if not rows: - st.info("Geen top-moties in dataset") - return - - st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**") - - # Build mapping component -> list of motions (deduplicate by motion_id per component) - comp_map: dict[int, list] = {} - for r in rows: - comp = int(r.get("component", 0)) - bucket = comp_map.setdefault(comp, []) - existing_ids = {m.get("motion_id") for m in bucket} - if r.get("motion_id") not in existing_ids: - bucket.append(r) - - comp_options = sorted(comp_map.keys()) - - # Build display labels for selectbox: "As 1 — Regulering vs. status-quo" - def _comp_label(c: int) -> str: - theme = SVD_THEMES.get(c, {}) - lbl = theme.get("label", "") - return f"As {c} — {lbl}" if lbl else f"As {c}" - - comp_display = [_comp_label(c) for c in comp_options] - - # Load default party scores early (needed for sidebar controls) - party_scores_default = load_party_axis_scores(db_path) - party_mp_vectors = load_party_mp_vectors(db_path) - bootstrap_data = ( - _cached_bootstrap_cis(party_mp_vectors) if party_mp_vectors else None - ) - - # Sidebar controls for window selection and minimum MPs filter - col1, col2 = st.columns([2, 1]) - - # Initialize view mode (will be set in col2 if render succeeds) - view_mode = "Enkel venster" - selected_parties_for_trajectory: list = [] - - with col2: - comp_sel_idx = st.selectbox( - "Selecteer SVD-as", - options=list(range(len(comp_options))), - format_func=lambda i: comp_display[i], - index=0, - ) - comp_sel = comp_options[comp_sel_idx] - - # Minimum MPs filter (only relevant for components 1-2 which use party centroids) - min_mps = st.number_input( - "Min. Kamerleden per partij", - min_value=1, - max_value=20, - value=1, - step=1, - help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.", - ) - - # View selector for party axis display - view_mode = st.radio( - "Weergave", - options=["Enkel venster", "Tijdtraject"], - index=0, - help="Enkel venster: toont posities voor één tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.", - ) - - # Party multi-select for time trajectory view - selected_parties_for_trajectory = [] - if view_mode == "Tijdtraject": - # Get list of parties with scores - all_parties = ( - sorted(party_scores_default.keys()) if party_scores_default else [] - ) - default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8] - selected_parties_for_trajectory = st.multiselect( - "Partijen om te tonen", - options=all_parties, - default=default_parties, - help="Selecteer de partijen die je wilt zien in het tijdtraject.", - ) - - # Show theme explanation - theme = SVD_THEMES.get(comp_sel, {}) - if theme: - st.info(f"**{theme['label']}** — {theme['explanation']}") - - motions = comp_map.get(comp_sel, []) - - # Party axis chart - # Default party scores already loaded earlier for sidebar controls. - # ALL components 1-10 use raw (non-aligned) SVD vectors. - # The compass uses Procrustes-aligned PCA — separate visualization. - # Get available windows from svd_vectors; exclude current year (covered by current_parliament) - import datetime as _dt - _current_year = str(_dt.date.today().year) - available_windows = get_uniform_dim_windows(db_path) - year_windows = sorted( - w for w in available_windows if w != "current_parliament" and w != _current_year - ) - has_current = "current_parliament" in available_windows - svd_windows = year_windows + (["current_parliament"] if has_current else []) - - def _svd_window_label(w: str) -> str: - if w == "current_parliament": - return "Huidig parliament" - return w - - with col1: - svd_window = st.selectbox( - "Jaar", - options=svd_windows, - index=len(svd_windows) - 1, # default: current_parliament - format_func=_svd_window_label, - key=f"svd_window_{comp_sel}", - ) - - # Load party scores for the selected window (used for components 3-10) - if svd_window == "current_parliament": - party_scores = party_scores_default - else: - party_scores = load_party_axis_scores_for_window(db_path, svd_window) - - # Compute MP counts from party_mp_vectors - party_mp_counts = ( - {p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {} - ) - - # For components 1-2, use aligned positions from load_positions (same as compass) - # for consistency. For components 3-10, use raw SVD scores. - def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]: - """Get party (x, y) coordinates from aligned PCA positions for a window.""" - positions_by_window, _ = load_positions(db_path, "annual") - window_pos = positions_by_window.get(window, {}) - if not window_pos: - return {} - - # Load party map to convert MP names to parties - _party_map = load_party_map(db_path) - - # Aggregate MP positions to party centroids - party_coords: Dict[str, List[Tuple[float, float]]] = {} - for mp_name, (x, y) in window_pos.items(): - party = _party_map.get( - mp_name, _party_map.get(mp_name.split("(")[0].strip(), None) - ) - if party: - party_coords.setdefault(party, []).append((x, y)) - - # Compute mean position per party - return { - party: ( - float(np.mean([c[0] for c in coords])), - float(np.mean([c[1] for c in coords])), - ) - for party, coords in party_coords.items() - if coords - } - - # Load aligned scores for ALL components 1-10 using PCA on aligned vectors. - # This ensures consistency between compass and SVD components tab. - def _get_aligned_party_scores(window: str) -> Dict[str, np.ndarray]: - """Get party scores for all N components from aligned PCA positions.""" - active_mps = ( - load_active_mps(db_path) if window == "current_parliament" else None - ) - return get_aligned_party_scores(db_path, window, active_mps) - - # Extract 1D scores for this component using Procrustes-aligned PCA scores. - # All 10 components use _get_aligned_party_scores (compute_nd_axes with annual-only - # windows). This is mathematically identical to the compass x/y positions for - # components 1 and 2, and consistently uses the same aligned data for 3-10. - party_1d_coords: dict = {} - aligned_all_scores = _get_aligned_party_scores(svd_window) - for party, all_scores in aligned_all_scores.items(): - idx = comp_sel - 1 # 0-indexed - if idx < len(all_scores): - party_1d_coords[party] = (float(all_scores[idx]),) - - # Auto-compute flip directions for ALL components 1-10 based on aligned party centroids. - # Since we now use aligned PCA scores for all components, compute flip directly from - # aligned scores to ensure canonical right parties (PVV, FVD, JA21, SGP) appear on RIGHT. - computed_flips: Dict[int, bool] = {} - try: - from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT - - # Compute flip for each component based on aligned party scores - for comp_idx in range(10): - right_scores = [] - left_scores = [] - for party, scores in aligned_all_scores.items(): - if party in CANONICAL_RIGHT: - right_scores.append(scores[comp_idx]) - elif party in CANONICAL_LEFT: - left_scores.append(scores[comp_idx]) - - if right_scores and left_scores: - right_avg = np.mean(right_scores) - left_avg = np.mean(left_scores) - # Flip if right parties score lower than left (we want RIGHT > LEFT) - computed_flips[comp_idx + 1] = right_avg < left_avg - else: - computed_flips[comp_idx + 1] = False - except Exception: - # If flip computation fails, keep existing flip values from SVD_THEMES - pass - - # Build theme override with computed flip for this component - # (avoids mutating SVD_THEMES which persists stale values across Streamlit reruns) - theme_with_flip = { - **theme, - "flip": computed_flips.get(comp_sel, theme.get("flip", False)), - } - - # Filter parties by minimum MP count - if min_mps > 1 and party_mp_counts: - valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps} - party_1d_coords = { - p: coords for p, coords in party_1d_coords.items() if p in valid_parties - } - - # Render party axis chart (single window or time trajectory) - if view_mode == "Tijdtraject" and selected_parties_for_trajectory: - # Load party scores for all windows and render time trajectory - available_windows = get_uniform_dim_windows(db_path) - year_windows = sorted( - w - for w in available_windows - if w != "current_parliament" and w != _current_year - ) - has_current = "current_parliament" in available_windows - all_windows = year_windows + (["current_parliament"] if has_current else []) - - # Use aligned PCA scores for all windows (consistent with single-window view) - party_scores_by_window = _get_aligned_trajectory_scores(db_path, all_windows) - - _render_svd_time_trajectory( - party_scores_by_window, - comp_sel, - theme_with_flip, - selected_parties_for_trajectory, - ) - else: - # Single-window view: render 1D party axis chart - _render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip) - - # Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results) - motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None] - motion_details: Dict[int, tuple] = {} - if motion_ids: - # Defensively convert motion_ids to integers, skipping invalid values - ids_int: List[int] = [] - for mid in motion_ids: - try: - ids_int.append(int(mid)) - except Exception: - logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid) - - # If no valid ids remain, skip the DB query - if ids_int: - con = None - try: - placeholders = ", ".join("?" for _ in ids_int) - con = duckdb.connect(database=db_path, read_only=True) - db_rows = con.execute( - f"SELECT id, title, date, policy_area, url, body_text, voting_results " - f"FROM motions WHERE id IN ({placeholders})", - ids_int, - ).fetchall() - motion_details = {r[0]: r for r in db_rows} - except Exception: - logger.exception("Failed to batch-fetch motion details") - finally: - if con: - con.close() - - # Split motions by pole sign - pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0] - neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0] - - flip = theme_with_flip.get("flip", False) if theme_with_flip else False - pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else "" - neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else "" - - # Derive left/right labels from flip direction - # flip=True: positive_pole on left, negative_pole on right - # flip=False: negative_pole on left, positive_pole on right - if flip: - left_pole, right_pole = pos_pole, neg_pole - left_motions, right_motions = pos_motions, neg_motions - left_arrow, right_arrow = "▲", "▼" - else: - left_pole, right_pole = neg_pole, pos_pole - left_motions, right_motions = neg_motions, pos_motions - left_arrow, right_arrow = "▼", "▲" - - lcol, rcol = st.columns(2) - - with lcol: - st.markdown(f"**← {left_pole}**") - for m in left_motions: - mid = m.get("motion_id") - raw_title = m.get("title") or f"Motie #{mid}" - with st.expander(f"{left_arrow} {raw_title}"): - row = motion_details.get(int(mid)) if mid is not None else None - if row: - try: - date_str = str(row[2])[:10] - except Exception: - date_str = "?" - st.caption(f"📅 {date_str} | {row[3] or '—'}") - if row[4] and str(row[4]).startswith("http"): - st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") - if row[5]: - with st.expander("Toon volledige tekst"): - st.write(row[5]) - _render_voting_results(row[6]) - else: - st.caption("_Geen metadata beschikbaar_") - - with rcol: - st.markdown(f"**{right_pole} →**") - for m in right_motions: - mid = m.get("motion_id") - raw_title = m.get("title") or f"Motie #{mid}" - with st.expander(f"{right_arrow} {raw_title}"): - row = motion_details.get(int(mid)) if mid is not None else None - if row: - try: - date_str = str(row[2])[:10] - except Exception: - date_str = "?" - st.caption(f"📅 {date_str} | {row[3] or '—'}") - if row[4] and str(row[4]).startswith("http"): - st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") - if row[5]: - with st.expander("Toon volledige tekst"): - st.write(row[5]) - _render_voting_results(row[6]) - else: - st.caption("_Geen metadata beschikbaar_") - - -def build_mp_quiz_tab(db_path: str) -> None: - """Interactive quiz: narrow MPs by asking motion vote questions. - - Minimal viable flow: - - seed with top-N controversial motions (SEED_MOTIONS) - - present one question at a time, store answers in st.session_state['mp_quiz_votes'] - - after each answer call MotionDatabase.match_mps_for_votes to rank MPs - - if multiple candidates remain, call choose_discriminating_motions to pick next question - - stop when unique MP found or no discriminating motions remain - """ - st.subheader("🧑‍⚖️ Welk tweede kamerlid ben jij?") - st.markdown( - "Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt." - ) - - SEED_MOTIONS = 8 - MAX_QUESTIONS = 20 - - # initialize session state - if "mp_quiz_votes" not in st.session_state: - st.session_state["mp_quiz_votes"] = {} - if "mp_quiz_asked" not in st.session_state: - st.session_state["mp_quiz_asked"] = [] +def build_svd_components_tab(*args, **kwargs): + """Build the SVD Components tab.""" + from analysis.tabs.components import build_svd_components_tab as _impl - from database import MotionDatabase as _MotionDatabase + return _impl(*args, **kwargs) - db_inst = _MotionDatabase(db_path) - df = load_motions_df(db_path) - if df.empty: - st.warning("Geen moties beschikbaar om de quiz te starten.") - return +def build_mp_quiz_tab(*args, **kwargs): + """Build the MP Quiz tab.""" + from analysis.tabs.quiz import build_mp_quiz_tab as _impl - # seed from motions that actually have individual MP vote records - seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS) - if not seed_ids: - st.warning("Geen individuele stemdata beschikbaar voor de quiz.") - return - - # Determine next motion to ask - def _next_motion_id(): - # prefer seed motions not yet asked - for mid in seed_ids: - if str(mid) not in st.session_state["mp_quiz_votes"]: - return mid - # otherwise ask discriminating motion based on remaining candidate MPs - # compute current candidate set - - try: - user_votes = { - int(k): v for k, v in st.session_state["mp_quiz_votes"].items() - } - ranked = db_inst.match_mps_for_votes(user_votes, limit=200) - except Exception: - ranked = [] - - candidates = [r["mp_name"] for r in ranked] - excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()] - if not candidates: - return None - try: - next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1) - return next_ids[0] if next_ids else None - except Exception: - return None - - # show progress and controls - col1, col2 = st.columns([3, 1]) - with col2: - st.caption( - f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}" - ) - if st.button("Reset quiz"): - st.session_state["mp_quiz_votes"] = {} - st.session_state["mp_quiz_asked"] = [] - st.rerun() - - # main question loop (single question per render, wrapped in a form to avoid - # premature reruns when the user changes the radio selection) - next_mid = _next_motion_id() - if next_mid is None: - st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.") - else: - motion_rows = df[df["id"] == next_mid] - if motion_rows.empty: - # motion has votes but isn't in the motions DataFrame — skip it - st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem" - st.rerun() - return - motion_row = motion_rows.iloc[0] - st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}") - if motion_row.get("layman_explanation"): - st.info(motion_row.get("layman_explanation")) - - with st.form(key=f"mp_quiz_form_{next_mid}"): - choice = st.radio( - "Wat zou jij stemmen?", - options=["Voor", "Tegen", "Onthouden", "Geen stem"], - index=3, - ) - submitted = st.form_submit_button("Beantwoord en verder") - - if submitted: - st.session_state["mp_quiz_votes"][str(next_mid)] = choice - st.session_state["mp_quiz_asked"].append(next_mid) - st.rerun() - - # display current ranking - try: - user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()} - ranking = db_inst.match_mps_for_votes(user_votes, limit=50) - except Exception: - ranking = [] - - if ranking: - st.markdown("**Top kandidaten**") - # show as table - import pandas as pd - - rdf = pd.DataFrame(ranking) - st.dataframe(rdf.head(10), use_container_width=True) - - # check uniqueness - top_pct = ranking[0]["agreement_pct"] if ranking else 0.0 - top_matches = [r for r in ranking if r["agreement_pct"] == top_pct] - if len(top_matches) == 1 and top_matches[0]["overlap"] > 0: - st.success( - f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})" - ) - else: - if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS: - st.warning( - "Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten." - ) - else: - st.info("Nog geen unieke match — vraag meer om verder te verfijnen.") - else: - st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.") - - -# --------------------------------------------------------------------------- -# App entry -# --------------------------------------------------------------------------- + return _impl(*args, **kwargs) def run_app() -> None: @@ -3022,13 +473,11 @@ def run_app() -> None: ) st.title("🏛️ Parlement Explorer") - # Sidebar st.sidebar.title("Instellingen") db_path = "data/motions.db" window_size = "annual" show_rejected = st.sidebar.checkbox("Toon verworpen moties", value=False) - # About section with st.sidebar.expander("ℹ️ Over", expanded=False): try: if _DUCKDB_AVAILABLE: @@ -3053,8 +502,6 @@ def run_app() -> None: except Exception as e: st.warning(f"DB niet bereikbaar: {e}") - # Main tabs - # Streamlit tabs compatibility: some older/newer Streamlit builds expose different APIs. tab_labels = [ "🧭 Politiek Kompas", "📈 Trajectories", @@ -3076,7 +523,6 @@ def run_app() -> None: with tab5: build_svd_components_tab(db_path) else: - # Fallback for environments where `st.tabs` is not available: use a radio selector selection = st.radio("Tab", tab_labels) if selection == tab_labels[0]: build_compass_tab(db_path, window_size) diff --git a/scheduler.py b/scheduler.py new file mode 100644 index 0000000..0024654 --- /dev/null +++ b/scheduler.py @@ -0,0 +1,170 @@ +"""Automated pipeline scheduling. + +Runs the parliamentary embedding pipeline and motion summarization +on a configurable schedule using the `schedule` library. + +Usage: + uv run python scheduler.py # start scheduler loop + uv run python scheduler.py --once # run once and exit + uv run python scheduler.py --pipeline-time 03:00 --summarizer-every 6 +""" + +from __future__ import annotations + +import argparse +import logging +import signal +import sys +import time +from typing import Callable + +import schedule + +from config import config +import argparse + +from pipeline.run_pipeline import run as run_pipeline +from summarizer import summarizer + +_logger = logging.getLogger(__name__) + + +class PipelineScheduler: + """Schedules and runs pipeline jobs.""" + + def __init__(self, db_path: str = "data/motions.db"): + self.db_path = db_path + self._running = False + + def run_pipeline(self) -> int: + """Run the full embedding pipeline. + + Returns the exit code from the pipeline run. + """ + _logger.info("Starting scheduled pipeline run") + try: + args = argparse.Namespace( + db_path=self.db_path, + window_size="annual", + start_date=None, + end_date=None, + svd_k=50, + svd_workers=None, + text_model=None, + text_batch_size=200, + skip_metadata=False, + skip_extract=False, + skip_svd=False, + skip_text=False, + skip_fusion=False, + dry_run=False, + ) + result = run_pipeline(args) + _logger.info("Pipeline run completed with code %s", result) + return result if isinstance(result, int) else 0 + except Exception: + _logger.exception("Pipeline run failed") + return 1 + + def run_summarizer(self) -> None: + """Run motion summarization for missing explanations.""" + _logger.info("Starting scheduled summarizer run") + try: + summarizer.update_motion_summaries() + _logger.info("Summarizer run completed") + except Exception: + _logger.exception("Summarizer run failed") + + def schedule_daily(self, time_str: str = "02:00") -> None: + """Schedule the pipeline to run daily at *time_str*.""" + _logger.info("Scheduling daily pipeline run at %s", time_str) + schedule.every().day.at(time_str).do(self.run_pipeline) + + def schedule_summarizer(self, every_n_hours: int = 6) -> None: + """Schedule the summarizer to run every *every_n_hours* hours.""" + _logger.info("Scheduling summarizer every %s hours", every_n_hours) + schedule.every(every_n_hours).hours.do(self.run_summarizer) + + def _signal_handler(self, signum, frame) -> None: + """Handle shutdown signals gracefully.""" + _logger.info("Received signal %s, shutting down", signum) + self.stop() + + def start(self) -> None: + """Start the scheduler loop. + + Blocks until :meth:`stop` is called or a signal is received. + """ + self._running = True + + # Register signal handlers for graceful shutdown + signal.signal(signal.SIGTERM, self._signal_handler) + signal.signal(signal.SIGINT, self._signal_handler) + + _logger.info("Scheduler started") + while self._running: + schedule.run_pending() + time.sleep(1) + _logger.info("Scheduler stopped") + + def stop(self) -> None: + """Stop the scheduler loop.""" + self._running = False + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Automated pipeline scheduler", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--db-path", + default="data/motions.db", + help="Path to the DuckDB file", + ) + parser.add_argument( + "--pipeline-time", + default="02:00", + help="Daily pipeline run time (HH:MM)", + ) + parser.add_argument( + "--summarizer-every", + type=int, + default=6, + help="Run summarizer every N hours", + ) + parser.add_argument( + "--once", + action="store_true", + help="Run pipeline + summarizer once and exit (no scheduling loop)", + ) + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(message)s", + ) + + sched = PipelineScheduler(db_path=args.db_path) + + if args.once: + _logger.info("Running in single-shot mode") + pipeline_rc = sched.run_pipeline() + sched.run_summarizer() + return pipeline_rc + + sched.schedule_daily(args.pipeline_time) + if args.summarizer_every > 0: + sched.schedule_summarizer(args.summarizer_every) + + sched.start() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_explorer_decomposition.py b/tests/test_explorer_decomposition.py new file mode 100644 index 0000000..c8574b5 --- /dev/null +++ b/tests/test_explorer_decomposition.py @@ -0,0 +1,95 @@ +"""Tests for explorer.py decomposition (P3-001). + +Acceptance criteria: +- explorer.py must be under 1500 lines. +- Tab modules must define their build functions locally (not re-export from explorer). +- No circular imports between explorer.py and analysis.tabs. +""" + +import ast +import inspect +import pathlib + + +class TestExplorerDecomposition: + """RED test: explorer.py must be under 1500 lines.""" + + def test_explorer_line_count_under_1500(self): + path = pathlib.Path("explorer.py") + lines = path.read_text(encoding="utf-8").splitlines() + assert len(lines) < 1500, ( + f"explorer.py has {len(lines)} lines; target is < 1500. " + f"Extract tab functions and rendering helpers into analysis/tabs/." + ) + + def test_tab_modules_define_functions_locally(self): + """Each tab module must define its build_*_tab without delegating to explorer.""" + tabs = [ + ("analysis/tabs/compass.py", "build_compass_tab"), + ("analysis/tabs/trajectories.py", "build_trajectories_tab"), + ("analysis/tabs/search.py", "build_search_tab"), + ("analysis/tabs/browser.py", "build_browser_tab"), + ("analysis/tabs/components.py", "build_svd_components_tab"), + ("analysis/tabs/quiz.py", "build_mp_quiz_tab"), + ] + for module_path, func_name in tabs: + source = pathlib.Path(module_path).read_text(encoding="utf-8") + tree = ast.parse(source) + func_def = None + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef) and node.name == func_name: + func_def = node + break + assert func_def is not None, ( + f"{module_path} must define {func_name}" + ) + # Ensure it's not a one-liner stub that imports from explorer + body = func_def.body + assert len(body) > 3, ( + f"{module_path}.{func_name} looks like a stub ({len(body)} lines). " + f"Extract the real implementation from explorer.py." + ) + + def test_rendering_helpers_extracted(self): + """Rendering helpers should not live in explorer.py.""" + helpers = [ + "_render_scree_plot", + "_build_party_axis_figure", + "_render_party_axis_chart", + "_render_party_axis_chart_1d", + "_render_svd_time_trajectory", + "_render_voting_results", + "_add_y_direction_annotations", + ] + source = pathlib.Path("explorer.py").read_text(encoding="utf-8") + tree = ast.parse(source) + defined = {node.name for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)} + for helper in helpers: + assert helper not in defined, ( + f"{helper} should be extracted from explorer.py " + f"into analysis/tabs/_rendering.py" + ) + + def test_no_circular_import_tabs_to_explorer(self): + """Tab modules must not import from explorer.""" + tab_modules = [ + "analysis/tabs/compass.py", + "analysis/tabs/trajectories.py", + "analysis/tabs/search.py", + "analysis/tabs/browser.py", + "analysis/tabs/components.py", + "analysis/tabs/quiz.py", + "analysis/tabs/_rendering.py", + ] + for module_path in tab_modules: + if not pathlib.Path(module_path).exists(): + continue + source = pathlib.Path(module_path).read_text(encoding="utf-8") + assert "from explorer import" not in source, ( + f"{module_path} imports from explorer.py — " + f"move shared helpers to explorer_data.py or _rendering.py instead" + ) + assert "import explorer" not in source, ( + f"{module_path} imports explorer module — " + f"move shared helpers to explorer_data.py or _rendering.py instead" + ) diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py new file mode 100644 index 0000000..3c16787 --- /dev/null +++ b/tests/test_scheduler.py @@ -0,0 +1,159 @@ +"""Tests for scheduler.py — automated pipeline scheduling. + +TDD: write failing test, implement, refactor. +""" + +from __future__ import annotations + +import signal +from unittest.mock import MagicMock, patch + +import pytest + + +class TestPipelineSchedulerInit: + def test_default_db_path(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler() + assert sched.db_path == "data/motions.db" + assert not sched._running + + def test_custom_db_path(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler(db_path="/tmp/test.db") + assert sched.db_path == "/tmp/test.db" + + +class TestPipelineSchedulerRunPipeline: + def test_calls_pipeline_run_with_db_path(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler(db_path="/tmp/test.db") + with patch("scheduler.run_pipeline") as mock_run: + mock_run.return_value = 0 + sched.run_pipeline() + mock_run.assert_called_once() + # Verify args contain db_path via Namespace + args = mock_run.call_args[0][0] + assert args.db_path == "/tmp/test.db" + + def test_logs_error_on_pipeline_failure(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler() + with patch("scheduler.run_pipeline") as mock_run: + mock_run.side_effect = RuntimeError("pipeline failed") + with patch("scheduler._logger") as mock_logger: + result = sched.run_pipeline() + assert result == 1 + mock_logger.exception.assert_called_once() + + +class TestPipelineSchedulerRunSummarizer: + def test_calls_summarizer_update(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler() + with patch("scheduler.summarizer") as mock_summarizer: + sched.run_summarizer() + mock_summarizer.update_motion_summaries.assert_called_once() + + def test_logs_error_on_summarizer_failure(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler() + with patch("scheduler.summarizer") as mock_summarizer: + mock_summarizer.update_motion_summaries.side_effect = RuntimeError( + "summarizer failed" + ) + with patch("scheduler._logger") as mock_logger: + sched.run_summarizer() + mock_logger.exception.assert_called_once() + + +class TestPipelineSchedulerSchedule: + def test_schedule_daily_adds_job(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler() + with patch("scheduler.schedule") as mock_schedule: + mock_job = MagicMock() + mock_schedule.every.return_value.day.at.return_value.do = mock_job + sched.schedule_daily("02:00") + mock_schedule.every.assert_called_once() + + def test_schedule_summarizer_adds_job(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler() + with patch("scheduler.schedule") as mock_schedule: + mock_job = MagicMock() + mock_schedule.every.return_value.hour.do = mock_job + sched.schedule_summarizer(every_n_hours=6) + mock_schedule.every.assert_called_once() + + +class TestPipelineSchedulerLoop: + def test_start_runs_pending_jobs(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler() + call_count = 0 + + def _stop_after_first(*args, **kwargs): + nonlocal call_count + call_count += 1 + if call_count >= 3: + sched.stop() + + with patch("scheduler.schedule.run_pending") as mock_run_pending: + with patch("scheduler.time.sleep", side_effect=_stop_after_first): + with patch("scheduler.signal.signal"): + sched.start() + assert mock_run_pending.called + assert not sched._running + + def test_stop_sets_running_false(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler() + sched._running = True + sched.stop() + assert not sched._running + + def test_signal_handler_stops_scheduler(self): + from scheduler import PipelineScheduler + + sched = PipelineScheduler() + sched._running = True + with patch.object(sched, "stop") as mock_stop: + sched._signal_handler(signal.SIGINT, None) + mock_stop.assert_called_once() + + +class TestSchedulerCLI: + def test_main_parses_args(self): + from scheduler import main + + with patch("scheduler.PipelineScheduler") as mock_sched_class: + mock_sched = MagicMock() + mock_sched_class.return_value = mock_sched + rc = main(["--pipeline-time", "03:00"]) + assert rc == 0 + mock_sched_class.assert_called_once_with(db_path="data/motions.db") + mock_sched.schedule_daily.assert_called_once_with("03:00") + mock_sched.start.assert_called_once() + + def test_main_custom_db_path(self): + from scheduler import main + + with patch("scheduler.PipelineScheduler") as mock_sched_class: + mock_sched = MagicMock() + mock_sched.run_pipeline.return_value = 0 + mock_sched_class.return_value = mock_sched + rc = main(["--db-path", "/tmp/test.db", "--once"]) + assert rc == 0 + mock_sched_class.assert_called_once_with(db_path="/tmp/test.db") + mock_sched.run_pipeline.assert_called_once()