"""Parlement Explorer — Streamlit data analysis app. Four tabs: 1. Politiek Kompas — 2D scatter of MPs/parties, window slider 2. Partij Trajectories — party centroid lines over time 3. Motie Zoeken — text search + similarity lookup 4. Motie Browser — sortable table + detail panel Run with: streamlit run explorer.py Import-safe: heavy computation is behind @st.cache_data and only runs at UI time. All DuckDB connections are read_only=True so the app can run alongside the pipeline. """ from __future__ import annotations import json import logging import os import re import traceback from typing import Dict, List, Optional, Tuple try: import duckdb _DUCKDB_AVAILABLE = True except Exception: duckdb = None _DUCKDB_AVAILABLE = False import numpy as np import pandas as pd try: import plotly.express as px import plotly.graph_objects as go except Exception: # Plotly may be unavailable in lightweight test environments. Provide a tiny # local fallback that exposes a Figure-like object with `.data` and # `add_trace()` so unit tests can run without installing plotly. px = None import types class _DummyTrace: def __init__(self, **kwargs): # Preserve commonly-used attributes accessed by tests self.name = kwargs.get("name") self.x = kwargs.get("x") self.y = kwargs.get("y") self.text = kwargs.get("text") self.customdata = kwargs.get("customdata") class _DummyFigure: def __init__(self): self.data = [] def add_trace(self, trace): # plotly passes a Scatter object; our tests only inspect `.data` # elements for `.name` and `.customdata`. Accept both our # _DummyTrace and dict-like kwargs. if isinstance(trace, _DummyTrace): self.data.append(trace) else: # Some code may call go.Scatter(...) which returns an object; # if a mapping is passed here instead, coerce to _DummyTrace. try: # attempt attribute access name = getattr(trace, "name", None) x = getattr(trace, "x", None) y = getattr(trace, "y", None) text = getattr(trace, "text", None) customdata = getattr(trace, "customdata", None) except Exception: # Last resort: treat as mapping name = trace.get("name") if hasattr(trace, "get") else None x = trace.get("x") if hasattr(trace, "get") else None y = trace.get("y") if hasattr(trace, "get") else None text = trace.get("text") if hasattr(trace, "get") else None customdata = ( trace.get("customdata") if hasattr(trace, "get") else None ) self.data.append( _DummyTrace(name=name, x=x, y=y, text=text, customdata=customdata) ) def add_annotation(self, *args, **kwargs): # noop for tests that don't import full plotly return None go = types.SimpleNamespace( Figure=_DummyFigure, Scatter=lambda **kwargs: _DummyTrace(**kwargs) ) try: import streamlit as st except Exception: # Minimal dummy replacement for Streamlit used during tests / import-time. # We only need a tiny subset so unit tests can import explorer without # installing streamlit. All functions here are no-ops or simple fallbacks. class _DummySt: def cache_data(self, *args, **kwargs): def _decorator(func): return func return _decorator def markdown(self, *args, **kwargs): return None def subheader(self, *args, **kwargs): return None def plotly_chart(self, *args, **kwargs): return None def caption(self, *args, **kwargs): return None def text_area(self, *args, **kwargs): return None def json(self, *args, **kwargs): return None def checkbox(self, *args, **kwargs): # default to False unless value provided return kwargs.get("value", False) def warning(self, *args, **kwargs): return None def info(self, *args, **kwargs): return None def selectbox(self, *args, **kwargs): # return first option if options provided opts = ( kwargs.get("options") if kwargs.get("options") is not None else (args[1] if len(args) > 1 else []) ) return opts[0] if opts else None def multiselect(self, *args, **kwargs): opts = ( kwargs.get("options") if kwargs.get("options") is not None else (args[1] if len(args) > 1 else []) ) default = kwargs.get("default") if default is not None: return default return opts[:6] if opts else [] def number_input(self, *args, **kwargs): return kwargs.get("value") if "value" in kwargs else 1 def slider(self, *args, **kwargs): return kwargs.get("value") if "value" in kwargs else 0.35 def expander(self, *args, **kwargs): class _Ctx: def __enter__(self_inner): return self_inner def __exit__(self_inner, exc_type, exc, tb): return False return _Ctx() def columns(self, *args, **kwargs): # Return a tuple of simple objects with the methods used in the UI class _Col: def markdown(self, *a, **k): return None def metric(self, *a, **k): return None def dataframe(self, *a, **k): return None n = len(args[0]) if args else 1 return tuple(_Col() for _ in range(n)) st = _DummySt() # Temporary diagnostics for Trajectories plotting — set by instrumentation when # EXPLORER_DEBUG_TRAJECTORIES is enabled. This is intended to be small, opt-in and # reversible once root cause is found. _last_trajectories_diagnostics: dict = {} # Backwards/alternate name used by instrumentation: keep a second module-level # reference so callers/tests can look for either name. _last_diagnostics = _last_trajectories_diagnostics def get_debug_trajectories_enabled() -> bool: """Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode. Accepts '1', 'true', 'True'. Used as default for a per-tab checkbox. """ v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES") return str(v) in ("1", "true", "True") from explorer_helpers import ( inspect_positions_for_issues, compute_party_centroids, ) def select_trajectory_plot_data( positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], party_map: Dict[str, str], windows: List[str], selected_parties: List[str], smooth_alpha: float = 0.35, mp_fallback_count: Optional[int] = None, ) -> Tuple[go.Figure, int, Optional[str]]: """Return (fig, trace_count, banner_text). Helper used by build_trajectories_tab. Does not call Streamlit. """ # Use env var default if not provided if mp_fallback_count is None: try: mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20")) except Exception: mp_fallback_count = 20 # Compute per-party centroids aligned to windows party_centroids, meta = compute_party_centroids( positions_by_window, party_map, windows ) # Use inspector to collect diagnostics (import-safe, pure helper). Keep this # call local to the helper to ensure the inspector is exercised and the # diagnostics are available for logging/debugging. Do not call Streamlit # from here so the function remains import-safe for tests. try: inspector_summary = inspect_positions_for_issues(positions_by_window, party_map) except Exception: # Capture traceback diagnostics so callers (and tests) can inspect what went wrong. tb = traceback.format_exc() inspector_summary = {} try: # Attach diagnostics to the helper function for callers that want to inspect # the last error directly on the function object. select_trajectory_plot_data._last_diagnostics = { "stage": "inspector_exception", "exception": tb, } except Exception: # best-effort only pass try: # Also update the module-level trajectories diagnostics so the UI can show # a compact summary when debugging is enabled. _last_trajectories_diagnostics.update( {"stage": "inspector_exception", "exception": tb} ) except Exception: pass logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary) # Determine which parties have at least one non-nan centroid plottable_parties = [] for p, vals in party_centroids.items(): has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals) if has_valid: plottable_parties.append(p) fig = go.Figure() trace_count = 0 banner_text: Optional[str] = None def _ema_smooth(values: List[float], alpha: float) -> List[float]: if not values or alpha >= 1.0: return values smoothed: List[float] = [] prev = None for v in values: if v is None or (isinstance(v, float) and np.isnan(v)): smoothed.append(float(np.nan)) continue v = float(v) if prev is None: prev = v else: prev = alpha * v + (1 - alpha) * prev smoothed.append(float(prev)) return smoothed # If no plottable parties, fallback to MP trajectories if not plottable_parties: # Build mp_positions across windows mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} for wid in windows: pos = positions_by_window.get(wid, {}) for mp_name, xy in pos.items(): try: x, y = float(xy[0]), float(xy[1]) except Exception: continue mp_positions.setdefault(mp_name, {})[wid] = (x, y) # Rank MPs by activity (number of windows with positions) mp_activity = sorted( [(mp, len(wdict)) for mp, wdict in mp_positions.items()], key=lambda t: t[1], reverse=True, ) top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]] for mp in top_mps: wids_sorted = sorted(mp_positions.get(mp, {}).keys()) if not wids_sorted: continue xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] xs = _ema_smooth(xs_raw, smooth_alpha) ys = _ema_smooth(ys_raw, smooth_alpha) custom_raw = [ ( float(rx) if rx is not None else float(np.nan), float(ry) if ry is not None else float(np.nan), ) for rx, ry in zip(xs_raw, ys_raw) ] fig.add_trace( go.Scatter( x=xs, y=ys, mode="lines+markers", name=mp, text=wids_sorted, customdata=custom_raw, line=dict(color="#888888", shape="spline", smoothing=1.3), marker=dict(color="#888888", size=6), ) ) trace_count += 1 banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." return fig, trace_count, banner_text # Otherwise plot party centroids for selected parties intersecting plottable to_plot = [p for p in selected_parties if p in plottable_parties] # If none selected, default to all plottable if not to_plot: to_plot = plottable_parties for party in to_plot: vals = party_centroids.get(party, []) if not vals: continue xs_raw = [v[0] for v in vals] ys_raw = [v[1] for v in vals] xs = _ema_smooth(xs_raw, smooth_alpha) ys = _ema_smooth(ys_raw, smooth_alpha) # Ensure customdata preserves NaNs custom_raw = [ ( float(x) if (x is not None and not np.isnan(x)) else float(np.nan), float(y) if (y is not None and not np.isnan(y)) else float(np.nan), ) for x, y in zip(xs_raw, ys_raw) ] colour = PARTY_COLOURS.get(party, "#9E9E9E") fig.add_trace( go.Scatter( x=xs, y=ys, mode="lines+markers", name=party, text=windows, customdata=custom_raw, line=dict(color=colour, shape="spline", smoothing=1.3), marker=dict(color=colour, size=8), ) ) trace_count += 1 return fig, trace_count, None logger = logging.getLogger(__name__) # Party colour palette (consistent across tabs) PARTY_COLOURS: Dict[str, str] = { "VVD": "#1E73BE", "PVV": "#002366", "D66": "#00A36C", "CDA": "#4CAF50", "SP": "#E53935", "PvdA": "#D32F2F", "GroenLinks": "#388E3C", "GroenLinks-PvdA": "#2E7D32", "CU": "#0288D1", "SGP": "#F4511E", "PvdD": "#43A047", "FVD": "#6A1B9A", "JA21": "#7B1FA2", "BBB": "#8D6E63", "NSC": "#FF8F00", "Nieuw Sociaal Contract": "#FF8F00", # alias used in mp_metadata "DENK": "#00897B", "50PLUS": "#7E57C2", "Volt": "#572AB7", "ChristenUnie": "#0288D1", "Unknown": "#9E9E9E", } # Ordered list of well-known parties for trajectory default selection. # Keeps the chart readable without overwhelming users with all parties. KNOWN_MAJOR_PARTIES = [ "VVD", "PVV", "D66", "GroenLinks-PvdA", "GroenLinks", "PvdA", "CDA", "SP", "NSC", "CU", "BBB", ] # Parties currently seated in the Tweede Kamer (2023 election cycle). # Deze zijn de entity_ids zoals opgeslagen in svd_vectors voor window='2025'. CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset( { "PVV", "VVD", "NSC", "BBB", "D66", "GroenLinks-PvdA", "CDA", "SP", "ChristenUnie", "SGP", "Volt", "DENK", "PvdD", "JA21", "FVD", } ) # Normalize variant party names to canonical display names in CURRENT_PARLIAMENT_PARTIES _PARTY_NORMALIZE: dict[str, str] = { "Nieuw Sociaal Contract": "NSC", "CU": "ChristenUnie", "GL": "GroenLinks-PvdA", "GroenLinks": "GroenLinks-PvdA", "PvdA": "GroenLinks-PvdA", "Gündoğan": "Volt", # confirmed Volt, left parliament 2023-12-05 "Lid Keijzer": "BBB", # Keijzer left CDA, joined BBB cabinet "Groep Markuszower": "PVV", # Markuszower sits with PVV faction } # --------------------------------------------------------------------------- # Cached loaders # --------------------------------------------------------------------------- @st.cache_data(show_spinner="Beschikbare tijdsvensters laden…") def get_available_windows(db_path: str) -> List[str]: """Return sorted list of distinct window_ids from svd_vectors.""" con = duckdb.connect(database=db_path, read_only=True) try: rows = con.execute( "SELECT DISTINCT window_id FROM svd_vectors ORDER BY window_id" ).fetchall() return [r[0] for r in rows] except Exception: logger.exception("Failed to query available windows") return [] finally: con.close() @st.cache_data(show_spinner=False) def get_uniform_dim_windows(db_path: str) -> List[str]: """Return only windows whose dominant MP-vector dimension is 50. Some windows contain a mix of vector lengths due to multiple pipeline runs (e.g. 2016 has both dim=1 and dim=50 rows). We find the most common dimension per window and include only windows where that dominant dim equals 50. Windows with too few dim-50 entities (< 10) are also excluded to avoid degenerate PCA inputs. """ con = duckdb.connect(database=db_path, read_only=True) try: rows = con.execute( """ WITH vec_dims AS ( SELECT window_id, json_array_length(vector) AS dim FROM svd_vectors WHERE entity_type = 'mp' ), window_dim_counts AS ( SELECT window_id, dim, COUNT(*) AS cnt FROM vec_dims GROUP BY window_id, dim ), dominant AS ( SELECT DISTINCT ON (window_id) window_id, dim, cnt FROM window_dim_counts ORDER BY window_id, cnt DESC, dim DESC ) SELECT window_id FROM dominant WHERE dim >= 25 AND cnt >= 10 ORDER BY window_id """ ).fetchall() return [r[0] for r in rows] except Exception: logger.exception("Failed to query uniform-dim windows") return [] finally: con.close() def _should_swap_axes(axis_def: dict) -> bool: """Return True if the Y axis is 'Links–Rechts' and the X axis is not. When true, caller should swap x/y positions and metadata so left-right is conventionally on the horizontal axis. """ lr = "Links\u2013Rechts" return axis_def.get("y_label") == lr and axis_def.get("x_label") != lr def _swap_axes( positions_by_window: dict, axis_def: dict, ) -> tuple: """Swap x and y in all positions and axis metadata. Pure function — returns (new_positions_by_window, new_axis_def). """ new_positions: dict = {} for wid, pos_dict in positions_by_window.items(): new_positions[wid] = {ent: (y, x) for ent, (x, y) in pos_dict.items()} new_ax = dict(axis_def) # Non-paired keys pass through unchanged # Swap paired scalar keys new_ax["x_label"] = axis_def.get("y_label") new_ax["y_label"] = axis_def.get("x_label") # Swap paired dict keys for x_key, y_key in [ ("x_quality", "y_quality"), ("x_interpretation", "y_interpretation"), ("x_top_motions", "y_top_motions"), ("x_label_confidence", "y_label_confidence"), ("x_axis", "y_axis"), ]: new_ax[x_key] = axis_def.get(y_key) new_ax[y_key] = axis_def.get(x_key) return new_positions, new_ax def _render_axis_motions(label: str, conf_pct: str, top: dict) -> None: st.markdown(f"**{label}**{conf_pct}") for sign, icon in (("+", "➕"), ("-", "➖")): titles = top.get(sign, []) if titles: st.markdown( "  " + icon + " " + " · ".join(f"{t} ({d})" for t, d in titles[:3]) ) @st.cache_data(show_spinner="2D posities berekenen (kan even duren)…") def load_positions( db_path: str, window_size: str = "quarterly" ) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]: """Compute 2D positions per window using PCA on aligned SVD vectors. Returns: positions_by_window: {window_id: {entity_name: (x, y)}} axis_def: dict with x_axis, y_axis, method keys """ from analysis.political_axis import compute_2d_axes # Always compute PCA on ALL uniform-dim windows (quarterly + annual) so that # the principal components are determined by the full temporal spread of data. # Using only annual windows (11) causes PC1 to capture cross-temporal drift # instead of left-right ideology, resulting in a ~90° rotation. all_available = get_uniform_dim_windows(db_path) if not all_available: return {}, {} positions_by_window, axis_def = compute_2d_axes( db_path, window_ids=all_available, method="pca", pca_residual=True, normalize_vectors=True, ) try: from analysis.axis_classifier import classify_axes axis_def = classify_axes(positions_by_window, axis_def, db_path) except Exception: import logging logging.getLogger(__name__).exception( "classify_axes failed; using generic axis labels" ) # Axis orientation is guaranteed by compute_2d_axes via canonical party anchors # (Procrustes alignment + sign-fixing). We do NOT forcibly override axis labels # here so the classifier output (if available) can be surfaced conditionally in # the UI based on per-window confidence. Label selection is performed at render # time in the tabs so we can show fallback labels while still surfacing the # classifier interpretation and confidence when informative. # Filter displayed windows by window_size AFTER PCA computation. if window_size == "annual": annual_keys = set(w for w in all_available if "-Q" not in w) positions_by_window = { w: v for w, v in positions_by_window.items() if w in annual_keys } return positions_by_window, axis_def @st.cache_data(show_spinner="Partijkaart laden…") def load_party_map(db_path: str) -> Dict[str, str]: """Return {mp_name: party} mapping, with party names normalised to abbreviations.""" from analysis.visualize import _load_party_map _PARTY_ALIASES: Dict[str, str] = { "Nieuw Sociaal Contract": "NSC", } try: raw = _load_party_map(db_path) return {mp: _PARTY_ALIASES.get(party, party) for mp, party in raw.items()} except Exception: logger.exception("Failed to load party map") return {} @st.cache_data(show_spinner="Actieve Kamerleden laden…") def load_active_mps(db_path: str) -> set: """Return the set of mp_name values that are currently seated in parliament. An MP is considered active if their mp_metadata row has tot_en_met IS NULL, meaning they have no recorded end date for their current seat. """ try: con = duckdb.connect(database=db_path, read_only=True) rows = con.execute( "SELECT mp_name FROM mp_metadata WHERE tot_en_met IS NULL" ).fetchall() con.close() return {r[0] for r in rows} except Exception: logger.exception("Failed to load active MPs") return set() def compute_party_discipline( db_path: str, start_date: str, end_date: str, ) -> pd.DataFrame: """Compute per-party voting discipline (Rice index) for roll-call votes in a date range. Only individual MP vote rows are used (mp_name LIKE '%,%'). Returns a DataFrame with columns [party, n_motions, discipline] sorted by discipline ascending. Returns an empty DataFrame if fewer than 1 qualifying motion exists or on any DB error. Rice index per motion per party = fraction of party MPs voting with the party majority. The per-party score is the average Rice index across all motions in the date range. Only 'voor' and 'tegen' votes are counted; absent and abstaining MPs are excluded from the Rice index calculation. """ conn = None try: conn = duckdb.connect(db_path, read_only=True) result = conn.execute( """ WITH individual_votes AS ( SELECT motion_id, party, LOWER(vote) AS vote FROM mp_votes WHERE mp_name LIKE '%,%' AND date >= CAST(? AS DATE) AND date <= CAST(? AS DATE) AND vote IN ('voor', 'tegen') ), vote_counts AS ( SELECT motion_id, party, vote, COUNT(*) AS cnt FROM individual_votes GROUP BY motion_id, party, vote ), majority_vote AS ( SELECT motion_id, party, FIRST(vote ORDER BY cnt DESC, vote ASC) AS maj_vote, SUM(cnt) AS total_mp_votes FROM vote_counts GROUP BY motion_id, party ), rice_per_motion AS ( SELECT mv.motion_id, mv.party, SUM(CASE WHEN vc.vote = mv.maj_vote THEN vc.cnt ELSE 0 END) * 1.0 / mv.total_mp_votes AS rice FROM majority_vote mv JOIN vote_counts vc ON mv.motion_id = vc.motion_id AND mv.party = vc.party GROUP BY mv.motion_id, mv.party, mv.total_mp_votes ) SELECT party, COUNT(DISTINCT motion_id) AS n_motions, AVG(rice) AS discipline FROM rice_per_motion GROUP BY party ORDER BY discipline ASC """, [start_date, end_date], ).fetchdf() return result except Exception as exc: logger.warning("compute_party_discipline failed: %s", exc) return pd.DataFrame(columns=["party", "n_motions", "discipline"]) finally: if conn is not None: try: conn.close() except Exception: pass def _load_mp_vectors_by_party(db_path: str) -> Dict[str, List[np.ndarray]]: """Load individual MP SVD vectors grouped by party. Queries mp_metadata for the mp→party mapping (latest assignment during the current parliament), normalises party names, loads SVD vectors from the ``current_parliament`` window, and filters to CURRENT_PARLIAMENT_PARTIES. Returns: {party_name: [np.ndarray(50,), ...]} — one array per MP. """ con = duckdb.connect(database=db_path, read_only=True) try: # Build mp → party mapping. ORDER BY van ASC so latest assignment wins # via last-write-wins when an MP switched party. meta_rows = con.execute( "SELECT mp_name, party FROM mp_metadata " "WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' " "ORDER BY van ASC" ).fetchall() mp_party: Dict[str, str] = {} for mp_name, party in meta_rows: if mp_name and party: mp_party[mp_name] = _PARTY_NORMALIZE.get(party, party) # Individual MP vectors from current_parliament rows = con.execute( "SELECT entity_id, vector FROM svd_vectors " "WHERE entity_type='mp' AND window_id='current_parliament'" ).fetchall() party_vecs: Dict[str, List[np.ndarray]] = {} for entity_id, raw_vec in rows: party = mp_party.get(entity_id) if party is None or party not in CURRENT_PARLIAMENT_PARTIES: continue if isinstance(raw_vec, str): vec = json.loads(raw_vec) elif isinstance(raw_vec, (bytes, bytearray)): vec = json.loads(raw_vec.decode()) elif isinstance(raw_vec, list): vec = raw_vec else: try: vec = list(raw_vec) except Exception: continue fvec = np.array([float(v) if v is not None else 0.0 for v in vec]) party_vecs.setdefault(party, []).append(fvec) return party_vecs finally: try: con.close() except Exception: pass @st.cache_data(show_spinner="Partijposities op SVD-assen laden…") def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: """Return per-party SVD vectors, computed as mean of individual MP vectors. Loads individual MP rows from window='current_parliament', assigns each MP their party, then averages SVD vectors per party. Returns: {party_name: [float * k]} — k = 50, mean over all MPs in that party. """ try: party_vecs = _load_mp_vectors_by_party(db_path) return { party: np.array(vecs).mean(axis=0).tolist() for party, vecs in party_vecs.items() } except Exception: logger.exception("Failed to load party axis scores") return {} @st.cache_data(show_spinner="Partij-MP vectoren laden…") def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]: """Return per-party lists of individual MP SVD vectors. Same MP→party mapping as load_party_axis_scores(), suitable for bootstrap CI computation. Returns: {party_name: [np.ndarray(50,), ...]} — one array per MP. """ try: return _load_mp_vectors_by_party(db_path) except Exception: logger.exception("Failed to load party MP vectors") return {} @st.cache_data(show_spinner="Bootstrap CI berekenen…") def _cached_bootstrap_cis( party_mp_vectors: Dict[str, List[np.ndarray]], ) -> Dict[str, Dict]: """Thin caching wrapper around compute_party_bootstrap_cis.""" from analysis.political_axis import compute_party_bootstrap_cis return compute_party_bootstrap_cis(party_mp_vectors) @st.cache_data(show_spinner="Scree-plot laden…") def load_scree_data(db_path: str) -> List[float]: """Return explained variance ratios (%) for all SVD components, sorted descending. Uses the same Procrustes-aligned multi-window matrix as the compass axes so the scree plot is consistent with what the compass actually uses. """ try: from analysis.political_axis import compute_svd_spectrum return compute_svd_spectrum(db_path) except Exception: logger.exception("Failed to load scree data") return [] def _render_scree_plot(importances: List[float], n_show: int = 15) -> None: """Render a scree plot showing relative SVD component importance. Highlighted bars for the top-2 components (used in the compass); muted bars for the rest. A cumulative-variance dashed line on the same y-axis helps spot the elbow. A 50 % cumulative threshold line is drawn for reference. Args: importances: List of importance values sorted descending (from load_scree_data). n_show: How many components to display (default: first 15). """ if not importances: return # importances are already EVR percentages summing to ~100 over all components. # Slice to n_show for display; cumulative line shows how much variance is covered. data = list(importances[:n_show]) ranks = list(range(1, len(data) + 1)) # Cumulative variance for the dashed overlay line cumsum = [] running = 0.0 for v in data: running += v cumsum.append(running) # Colour: first 2 bars highlighted (compass axes), rest muted n_highlight = 2 bar_colours = [ "#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data)) ] fig = go.Figure() # Bars fig.add_trace( go.Bar( x=ranks, y=data, marker_color=bar_colours, hovertemplate="As %{x}
%{y:.1f}% verklaarde variantie", showlegend=False, ) ) # Cumulative variance line (dashed, warm amber) fig.add_trace( go.Scatter( x=ranks, y=cumsum, mode="lines+markers", line={"color": "#F57C00", "width": 2, "dash": "dot"}, marker={"size": 5, "color": "#F57C00"}, hovertemplate="As %{x}
Cumulatief: %{y:.1f}%", name="Cumulatief", showlegend=True, ) ) # 50 % reference line fig.add_hline( y=50, line_dash="dash", line_color="#BDBDBD", line_width=1, annotation_text="50%", annotation_position="right", annotation_font_color="#9E9E9E", annotation_font_size=11, ) # Annotations on the top-2 bars showing their % value for i in range(min(n_highlight, len(data))): fig.add_annotation( x=ranks[i], y=data[i] + 0.3, text=f"{data[i]:.1f}%", showarrow=False, font={"size": 11, "color": "#1565C0"}, yanchor="bottom", ) fig.update_layout( height=280, margin={"l": 10, "r": 50, "t": 30, "b": 40}, title={ "text": "Belang per SVD-as", "font": {"size": 13, "color": "#555555"}, "x": 0.02, "xanchor": "left", }, legend={ "orientation": "h", "x": 0.5, "xanchor": "center", "y": 1.08, "font": {"size": 11}, }, xaxis={ "title": {"text": "As (rang)", "font": {"size": 11}}, "tickmode": "linear", "tick0": 1, "dtick": 1, "showline": False, "showgrid": False, }, yaxis={ "title": {"text": "% van totale variantie", "font": {"size": 11}}, "showline": False, "showgrid": True, "gridcolor": "#eeeeee", "ticksuffix": "%", "range": [0, max(cumsum) * 1.08], }, plot_bgcolor="rgba(0,0,0,0)", paper_bgcolor="rgba(0,0,0,0)", bargap=0.25, ) st.plotly_chart(fig, use_container_width=True) def _build_party_axis_figure( party_coords: Dict[str, Tuple[float, float]], comp_sel: int, theme: dict, bootstrap_data: Optional[Dict[str, Dict]] = None, ) -> Optional[go.Figure]: """Build a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`. Accepts explicit per-party 2D coordinates (x,y) and uses the component selection to pick the value (comp_sel==1 -> x, comp_sel==2 -> y). This makes the API explicit and avoids indexing into long SVD vectors. Returns go.Figure or None if no data available. """ if not party_coords: return None if comp_sel not in (1, 2): raise ValueError( "_build_party_axis_figure only supports comp_sel 1 or 2 when using explicit coords" ) axis_idx = comp_sel - 1 flip = theme.get("flip", False) parties = [] scores = [] colours = [] # Support two shapes for party_coords: # - explicit 2D coords: (x, y) # - full SVD vectors (len>2) where we should pick the axis_idx element for party, val in party_coords.items(): try: # explicit (x, y) if hasattr(val, "__len__") and len(val) == 2: x, y = val score = float(x if axis_idx == 0 else y) else: # treat as sequence/array-like of full SVD vector score = float(val[axis_idx]) if flip: score = -score except Exception: # skip malformed entries silently continue parties.append(party) scores.append(score) colours.append(PARTY_COLOURS.get(party, "#9E9E9E")) if not scores: return None # Build hover text: include N when bootstrap data available hover = [] symbols = [] if bootstrap_data: for p, s in zip(parties, scores): bd = bootstrap_data.get(p) if bd: n_mps = bd.get("n_mps", "?") ci_low = None ci_high = None try: ci_low = float(bd["ci_lower"][axis_idx]) ci_high = float(bd["ci_upper"][axis_idx]) except Exception: pass if ci_low is not None and ci_high is not None: hover.append( f"{p}: {s:.3f} (N={n_mps}, 95%-BI: [{ci_low:.3f}, {ci_high:.3f}])" ) else: hover.append(f"{p}: {s:.3f} (N={n_mps})") symbols.append("diamond" if n_mps == 1 else "circle") else: hover.append(f"{p}: {s:.3f}") symbols.append("circle") marker_kwargs = {"size": 14, "color": colours, "symbol": symbols} else: hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)] marker_kwargs = {"size": 14, "color": colours} fig = go.Figure() x_min, x_max = min(scores) * 1.15, max(scores) * 1.15 if x_min == x_max: x_min, x_max = x_min - 1, x_max + 1 fig.add_trace( go.Scatter( x=[x_min, x_max], y=[0, 0], mode="lines", line={"color": "#cccccc", "width": 1}, hoverinfo="skip", showlegend=False, ) ) scatter_kwargs = { "x": scores, "y": [0] * len(scores), "mode": "markers+text", "text": parties, "textposition": "top center", "marker": marker_kwargs, "hovertext": hover, "hoverinfo": "text", "showlegend": False, } fig.add_trace(go.Scatter(**scatter_kwargs)) pos_pole = theme.get("positive_pole", "") neg_pole = theme.get("negative_pole", "") left_label = pos_pole if flip else neg_pole right_label = neg_pole if flip else pos_pole fig.update_layout( height=160, margin={"l": 10, "r": 10, "t": 10, "b": 30}, xaxis={ "title": f"← {left_label} | {right_label} →", "showticklabels": False, "showline": False, "showgrid": False, "zeroline": False, }, yaxis={"visible": False, "range": [-1, 2]}, plot_bgcolor="rgba(0,0,0,0)", paper_bgcolor="rgba(0,0,0,0)", ) return fig def _render_party_axis_chart( party_coords: Dict[str, Tuple[float, float]], comp_sel: int, theme: dict, bootstrap_data: Optional[Dict[str, Dict]] = None, ) -> None: """Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`. Expects explicit per-party coords mapping (party -> (x,y)) for components 1 & 2. """ fig = _build_party_axis_figure(party_coords, comp_sel, theme, bootstrap_data) if fig is None: st.caption("_Partijdata niet beschikbaar voor deze as._") return st.plotly_chart(fig, use_container_width=True) @st.cache_data(show_spinner="Moties laden…") def load_motions_df(db_path: str) -> pd.DataFrame: """Load the full motions table as a pandas DataFrame (read-only).""" con = duckdb.connect(database=db_path, read_only=True) try: df = con.execute( """ SELECT id, title, description, date, policy_area, voting_results, layman_explanation, winning_margin, controversy_score, url FROM motions """ ).fetchdf() df["date"] = pd.to_datetime(df["date"], errors="coerce") df["year"] = df["date"].dt.year return df except Exception: logger.exception("Failed to load motions") return pd.DataFrame() finally: con.close() def query_similar( db_path: str, source_motion_id: int, vector_type: str = "fused", top_k: int = 10, ) -> pd.DataFrame: """Return top-k similar motions from similarity_cache (read-only).""" con = duckdb.connect(database=db_path, read_only=True) try: rows = con.execute( """ SELECT sc.target_motion_id, sc.score, sc.window_id, m.title, m.date, m.policy_area FROM similarity_cache sc JOIN motions m ON m.id = sc.target_motion_id WHERE sc.source_motion_id = ? AND sc.vector_type = ? ORDER BY sc.score DESC LIMIT ? """, [source_motion_id, vector_type, top_k], ).fetchdf() return rows except Exception: logger.exception( "Failed to query similarity cache for motion %s", source_motion_id ) return pd.DataFrame() finally: con.close() # --------------------------------------------------------------------------- # Shared rendering helpers # --------------------------------------------------------------------------- def _render_voting_results(voting_results_json) -> None: """Render a voting_results JSON blob as a grouped voor/tegen/onthouden table. The JSON is stored as {party_or_mp: vote} where vote is one of 'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability. """ if not voting_results_json: return try: vdata = ( json.loads(voting_results_json) if isinstance(voting_results_json, str) else voting_results_json ) if not isinstance(vdata, dict) or not vdata: return # Group {vote: [actor, ...]} by_vote: Dict[str, List[str]] = {} for actor, vote in vdata.items(): vote_str = str(vote).lower().strip() by_vote.setdefault(vote_str, []).append(str(actor)) # Render in fixed order vote_order = ["voor", "tegen", "onthouden", "afwezig"] vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "⬜"} rows_shown = False for v in vote_order + [k for k in by_vote if k not in vote_order]: actors = by_vote.get(v) if not actors: continue emoji = vote_emoji.get(v, "▪️") st.markdown( f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}" ) rows_shown = True if not rows_shown: st.caption("_Geen stemuitslag beschikbaar_") except Exception: pass # --------------------------------------------------------------------------- # Tab 1: Politiek Kompas # --------------------------------------------------------------------------- def _add_y_direction_annotations(fig: go.Figure) -> None: """Add ▲ Progressief / ▼ Conservatief labels above and below the Y axis.""" common = dict( xref="paper", yref="paper", x=-0.07, showarrow=False, font=dict(size=11, color="#666666"), ) fig.add_annotation(**common, y=1.02, text="▲ Progressief", xanchor="center") fig.add_annotation(**common, y=-0.06, text="▼ Conservatief", xanchor="center") def _window_to_dates(window_id: str) -> tuple[str, str]: """Return (start_date, end_date) ISO strings for a given window_id. Annual windows like '2024' → ('2024-01-01', '2024-12-31'). 'current_parliament' → ('2023-11-22', '2099-12-31') (2023 formation date, open end). Unknown formats → ('2000-01-01', '2099-12-31') (effectively all time). """ if window_id == "current_parliament": return ("2023-11-22", "2099-12-31") if re.fullmatch(r"\d{4}", window_id): return (f"{window_id}-01-01", f"{window_id}-12-31") m = re.fullmatch(r"(\d{4})-Q([1-4])", window_id) if m: year, q = int(m.group(1)), int(m.group(2)) starts = {1: "01-01", 2: "04-01", 3: "07-01", 4: "10-01"} ends = {1: "03-31", 2: "06-30", 3: "09-30", 4: "12-31"} return (f"{year}-{starts[q]}", f"{year}-{ends[q]}") return ("2000-01-01", "2099-12-31") def build_compass_tab(db_path: str, window_size: str) -> None: st.subheader("Politiek Kompas") st.markdown( "2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)." ) # Compass always uses annual windows regardless of the sidebar window_size setting. positions_by_window, axis_def = load_positions(db_path, "annual") # load_positions may return None for axis_def when resources are missing # (e.g. classifier fallback or failed enrichment). Guard so UI rendering # code doesn't crash on axis_def.get calls. if axis_def is None: axis_def = {} if not positions_by_window: st.warning( "Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid." ) return party_map = load_party_map(db_path) active_mps = load_active_mps(db_path) # Sort windows: year windows first (ascending), current_parliament last. year_windows = sorted(w for w in positions_by_window if w != "current_parliament") has_current = "current_parliament" in positions_by_window windows = year_windows + (["current_parliament"] if has_current else []) # Motion counts per year — sparse years get a warning label. _SPARSE_YEARS = {"2016", "2017", "2018"} _THRESHOLD = 0.65 def _window_label(w: str) -> str: if w == "current_parliament": return "Huidig parlement" if w in _SPARSE_YEARS: return f"{w} ⚠️" return w col1, col2 = st.columns([3, 1]) with col2: window_idx = st.selectbox( "Jaar", options=windows, index=len(windows) - 1, # default: current_parliament format_func=_window_label, ) level = st.radio( "Weergave", options=["Kamerleden", "Partijen"], index=0, horizontal=True, ) min_mps = st.number_input( "Min. Kamerleden per partij", min_value=1, max_value=20, value=3, step=1, help="Partijen met minder dan dit aantal zetels worden niet weergegeven.", ) pos = positions_by_window.get(window_idx, {}) if not pos: st.info(f"Geen data voor venster {window_idx}") return # For current_parliament, restrict to MPs who are still seated (tot_en_met IS NULL). # Historical windows include all MPs active at the time — no restriction needed. if window_idx == "current_parliament": pos = {mp: xy for mp, xy in pos.items() if mp in active_mps} # Deduplicate MPs whose names appear both with and without a parenthetical first name, # e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and # average positions if both variants are present. def _strip_paren(name: str) -> str: return re.sub(r"\s*\([^)]*\)", "", name).strip() deduped: Dict[str, Tuple[float, float]] = {} for name, (x, y) in pos.items(): base = _strip_paren(name) if base in deduped: ox, oy = deduped[base] deduped[base] = ((ox + x) / 2, (oy + y) / 2) else: deduped[base] = (x, y) pos = deduped rows = [] for name, (x, y) in pos.items(): party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown") rows.append({"name": name, "x": x, "y": y, "party": party}) df_pos = pd.DataFrame(rows) # Drop parties below the minimum MP threshold (unreliable centroids). party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts() valid_parties = set(party_counts[party_counts >= min_mps].index) df_pos = df_pos[df_pos["party"].isin(valid_parties)] if df_pos.empty: st.info("Geen partijen met genoeg Kamerleden voor dit venster.") return # The first two SVD axes are clear, interpretable axes for our dataset. # Show the classifier-provided full labels on the compass unconditionally # so users see the canonical interpretation. We keep the confidence-based # captions/interpretations in the expander but do not hide the axis titles # for the compass. Note: the vertical axis title is rotated by Plotly — # this can make "Progressief–Conservatief" look reversed because the word # "Progressief" appears at the top when rendered; we therefore add explicit # directional annotations to make the polarity unambiguous. # Prefer classifier-provided labels for the first two axes. However, the # classifier sometimes returns the concise numeric fallbacks "As 1"/"As 2" # when it couldn't find an interpretable label. For the compass we prefer # conventional semantic defaults instead of the generic "As N" strings so # the chart remains readable. _raw_x = axis_def.get("x_label") _raw_y = axis_def.get("y_label") # Use the classifier helper to map internal/modal labels (e.g. "As 1") to # user-facing labels. Import at function-time to avoid module import cycles # and keep explorer lightweight. If the helper is unavailable fall back to # conventional semantic defaults so the UI remains readable. try: from analysis.axis_classifier import display_label_for_modal _x_label = display_label_for_modal(_raw_x, "x") _y_label = display_label_for_modal(_raw_y, "y") except Exception: _x_label = _raw_x or "Links\u2013Rechts" _y_label = _raw_y or "Progressief\u2013Conservatief" if level == "Partijen": # Aggregate to party centroids df_party = df_pos.groupby("party", as_index=False).agg( x=("x", "mean"), y=("y", "mean"), n=("name", "count") ) df_party["name"] = df_party["party"] colour_map = { p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique() } fig = px.scatter( df_party, x="x", y="y", color="party", text="party", hover_name="party", hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True}, color_discrete_map=colour_map, title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)", labels={ "x": _x_label, "y": _y_label, "n": "Kamerleden", }, ) fig.update_traces(textposition="top center", marker_size=14) else: colour_map = { p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique() } fig = px.scatter( df_pos, x="x", y="y", color="party", hover_name="name", hover_data={"party": True, "x": ":.3f", "y": ":.3f"}, color_discrete_map=colour_map, title=f"Politiek Kompas — {_window_label(window_idx)}", labels={"x": _x_label, "y": _y_label}, ) fig.update_layout( height=600, legend_title_text="Partij", xaxis={"range": [-1, 1]}, yaxis={"range": [-0.6, 0.6]}, ) _add_y_direction_annotations(fig) with col1: st.plotly_chart(fig, use_container_width=True) _x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "") _y_interp = axis_def.get("y_interpretation", {}).get(window_idx, "") if ( _x_interp and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD ): st.caption(_x_interp) if ( _y_interp and axis_def.get("y_quality", {}).get(window_idx, 1.0) < _THRESHOLD ): st.caption(_y_interp) # Motion expander — show which motions define each axis for this window x_top = axis_def.get("x_top_motions", {}).get(window_idx, {}) y_top = axis_def.get("y_top_motions", {}).get(window_idx, {}) x_conf = axis_def.get("x_label_confidence", {}).get(window_idx) y_conf = axis_def.get("y_label_confidence", {}).get(window_idx) evr = axis_def.get("explained_variance_ratio", [None, None]) evr0 = evr[0] if evr else None _has_motion_data = bool( x_top.get("+") or x_top.get("-") or y_top.get("+") or y_top.get("-") ) if _has_motion_data: with st.expander("🔍 Wat bepaalt deze assen?"): x_conf_pct = ( f" (vertrouwen: {x_conf:.0%})" if x_conf is not None else "" ) y_conf_pct = ( f" (vertrouwen: {y_conf:.0%})" if y_conf is not None else "" ) _render_axis_motions(f"Horizontale as: {_x_label}", x_conf_pct, x_top) _render_axis_motions(f"Verticale as: {_y_label}", y_conf_pct, y_top) if evr0 is not None: st.caption( f"De sterkste component verklaart {evr0:.1%} van de variantie in stemgedrag." ) # --- Voting discipline section --- _MIN_MOTIONS_FOR_DISCIPLINE = 5 start_date, end_date = _window_to_dates(window_idx) disc_df = compute_party_discipline(db_path, start_date, end_date) st.subheader("Stemgedrag cohesie") if disc_df.empty: st.caption( "Te weinig hoofdelijke stemmingen in dit venster voor een cohesieanalyse." ) else: disc_df = disc_df[disc_df["n_motions"] >= _MIN_MOTIONS_FOR_DISCIPLINE].copy() if disc_df.empty: st.caption( "Te weinig hoofdelijke stemmingen in dit venster voor een cohesieanalyse." ) else: compass_parties = set(df_pos["party"].unique()) disc_df = disc_df[disc_df["party"].isin(compass_parties)].copy() if disc_df.empty: st.caption("Geen overlappende partijen tussen kompas en stemmingsdata.") else: disc_df["discipline_pct"] = (disc_df["discipline"] * 100).round(1) disc_df["party_label"] = disc_df.apply( lambda r: f"{r['party']} ({int(r['n_motions'])} moties)", axis=1 ) bar_fig = px.bar( disc_df.sort_values("discipline"), x="discipline_pct", y="party_label", orientation="h", color="discipline_pct", color_continuous_scale="RdYlGn", range_color=[80, 100], labels={"discipline_pct": "Cohesie (%)", "party_label": "Partij"}, title="Cohesie bij hoofdelijke stemmingen", ) bar_fig.update_layout( height=max(300, len(disc_df) * 35 + 80), showlegend=False, coloraxis_showscale=False, yaxis_title="", ) st.plotly_chart(bar_fig, use_container_width=True) top3 = disc_df.nlargest(3, "discipline")[ ["party", "discipline_pct", "n_motions"] ] bot3 = disc_df.nsmallest(3, "discipline")[ ["party", "discipline_pct", "n_motions"] ] col_a, col_b = st.columns(2) with col_a: st.markdown("**Meest eensgezind**") st.dataframe( top3.rename( columns={ "party": "Partij", "discipline_pct": "Cohesie (%)", "n_motions": "Moties", } ), hide_index=True, use_container_width=True, ) with col_b: st.markdown("**Meest verdeeld**") st.dataframe( bot3.rename( columns={ "party": "Partij", "discipline_pct": "Cohesie (%)", "n_motions": "Moties", } ), hide_index=True, use_container_width=True, ) # --------------------------------------------------------------------------- # Tab 2: Partij Trajectories # --------------------------------------------------------------------------- def build_trajectories_tab(db_path: str, window_size: str) -> None: st.subheader("Partij Trajectories") st.markdown("Hoe bewegen partijen over de tijdsvensters heen?") positions_by_window, axis_def = load_positions(db_path, window_size) if axis_def is None: axis_def = {} if not positions_by_window: # Instrumentation: record why trajectories tab aborted early try: _last_trajectories_diagnostics.update( { "stage": "load_positions_empty", "positions_by_window_len": len(positions_by_window), } ) except Exception: pass try: st.warning("Geen positiedata beschikbaar.") except Exception: pass # If debug enabled, show diagnostics in UI (best-effort) try: if get_debug_trajectories_enabled(): try: st.text_area( "Trajectories diagnostics", json.dumps(_last_trajectories_diagnostics, default=str), height=160, ) except Exception: pass except Exception: pass return party_map = load_party_map(db_path) windows = sorted(positions_by_window.keys()) # Compute party centroids per window centroids: Dict[str, Dict[str, Tuple[float, float]]] = {} all_parties: set = set() # Helper to normalise MP names (strip parenthetical first names) to match # entries in the party_map. This mirrors the behaviour used in the compass # tab so both tabs resolve parties the same way. def _strip_paren(name: str) -> str: return re.sub(r"\s*\([^)]*\)", "", name).strip() for wid in windows: pos = positions_by_window.get(wid, {}) per_party: Dict[str, List[Tuple[float, float]]] = {} for mp_name, (x, y) in pos.items(): # Try exact match first, then stripped-name match to handle # variants like "Dijk, J.P. (Jimmy)" -> "Dijk, J.P." used in mp_metadata party = party_map.get(mp_name) or party_map.get( _strip_paren(mp_name), "Unknown" ) if party == "Unknown": continue per_party.setdefault(party, []).append((x, y)) for party, coords in per_party.items(): all_parties.add(party) xs = [c[0] for c in coords] ys = [c[1] for c in coords] centroids.setdefault(party, {})[wid] = ( float(np.mean(xs)), float(np.mean(ys)), ) all_parties_sorted = sorted(all_parties) # If no parties were found after mapping MPs to parties, show a helpful # message instead of rendering an empty chart. This commonly happens when # the party map failed to load (DB error) or the min_mps threshold filtered # out all parties. if not all_parties_sorted: st.info( "Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat." ) try: st.caption(f"Bekende partijen in party_map: {len(party_map)}") except Exception: pass # Do not return here: allow per-MP fallback plotting below when no # party-level centroids are available so the user still sees trajectories. # Default: show CDA, D66, VVD — the three parties that span the political centre default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties] if not default_parties: default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties] if not default_parties: default_parties = all_parties_sorted[:6] selected_parties = st.multiselect( "Selecteer partijen", options=all_parties_sorted, default=default_parties, ) # Ensure EMA smoothing helper is available for per-MP fallback plotting which # appears earlier in the function. Define here so calls above won't fail. def _ema_smooth(values: List[float], alpha: float) -> List[float]: if not values or alpha >= 1.0: return values smoothed = [values[0]] for v in values[1:]: smoothed.append(alpha * v + (1 - alpha) * smoothed[-1]) return smoothed # default smoothing alpha used for inline per-MP plotting; may be overridden # by the smoothing controls shown later in the UI. smooth_alpha = 0.35 # If no party-level centroids were computed, fall back to per-MP trajectories # so the user still sees a plot even when the party_map is missing or empty. if not centroids: # Build per-MP time series from positions_by_window mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} for wid in windows: pos = positions_by_window.get(wid, {}) for mp_name, xy in pos.items(): # Defensive conversion: skip malformed coordinates instead of raising try: x, y = float(xy[0]), float(xy[1]) except Exception: # skip malformed entries silently (diagnostics will show counts) continue mp_positions.setdefault(mp_name, {})[wid] = (x, y) if not mp_positions: try: _last_trajectories_diagnostics.update( { "stage": "no_mp_positions", "mp_positions_count": len(mp_positions), } ) except Exception: pass try: st.info("Geen positiedata beschikbaar voor trajectplotten.") except Exception: pass # show diagnostics when debug enabled try: if get_debug_trajectories_enabled(): try: st.text_area( "Trajectories diagnostics", json.dumps(_last_trajectories_diagnostics, default=str), height=160, ) except Exception: pass except Exception: pass return mp_list = sorted(mp_positions.keys()) default_mps = mp_list[:6] selected_mps = st.multiselect( "Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps ) # Plot per-MP trajectories fig = go.Figure() trace_count = 0 for mp in selected_mps: wids_sorted = sorted(mp_positions[mp].keys()) xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] xs = _ema_smooth(xs_raw, smooth_alpha) ys = _ema_smooth(ys_raw, smooth_alpha) custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] fig.add_trace( go.Scatter( x=xs, y=ys, mode="lines+markers", name=mp, text=wids_sorted, customdata=custom_raw, line=dict(color="#888888", shape="spline", smoothing=1.3), marker=dict(color="#888888", size=6), hovertemplate=( f"{mp}
" "venster: %{text}
" "x (smoothed): %{x:.3f}
" "x (raw): %{customdata[0]:.3f}
" "y (smoothed): %{y:.3f}
" "y (raw): %{customdata[1]:.3f}" ), ) ) trace_count += 1 _add_y_direction_annotations(fig) if trace_count == 0: st.info( "Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data." ) else: st.plotly_chart(fig, use_container_width=True) return # Developer override: if EXPLORER_FORCE_SHOW_TRAJECTORIES=1 in the # environment, bypass party filtering and show the first MPs' trajectories # directly (helps diagnose production environments where party mapping # or filtering prevents any traces from appearing). This is safe to keep # in main because it only triggers when explicitly enabled. if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"): # Build per-MP time series from positions_by_window and plot first 6 MPs mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} for wid in windows: pos = positions_by_window.get(wid, {}) for mp_name, (x, y) in pos.items(): mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y)) mp_list = sorted(mp_positions.keys()) if not mp_list: st.info("Geen MP-positiegegevens beschikbaar om te tonen.") return sample_mps = mp_list[:6] fig = go.Figure() for mp in sample_mps: wids_sorted = sorted(mp_positions[mp].keys()) xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] xs = _ema_smooth(xs_raw, 0.35) ys = _ema_smooth(ys_raw, 0.35) custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] fig.add_trace( go.Scatter( x=xs, y=ys, mode="lines+markers", name=mp, text=wids_sorted, customdata=custom_raw, line=dict(color="#444444", shape="spline", smoothing=1.3), marker=dict(color="#444444", size=6), hovertemplate=( f"{mp}
" "venster: %{text}
" "x (smoothed): %{x:.3f}
" "x (raw): %{customdata[0]:.3f}
" "y (smoothed): %{y:.3f}
" "y (raw): %{customdata[1]:.3f}" ), ) ) _add_y_direction_annotations(fig) st.plotly_chart(fig, use_container_width=True) return # Debug expander: show data used to build trajectories so we can diagnose # why no traces are appearing. Leave this collapsed by default in normal # runs; when troubleshooting it will show counts and small samples. try: # Add a little opt-in checkbox in the UI to enable debug diagnostic output debug_checkbox = False try: debug_checkbox = st.checkbox( "Enable trajectories diagnostics (show extra info)", value=get_debug_trajectories_enabled(), ) except Exception: debug_checkbox = get_debug_trajectories_enabled() if debug_checkbox: try: with st.expander( "DEBUG: Trajectories data (showing diagnostics)", expanded=False ): st.write("windows (count):", len(windows)) st.write("windows sample:", windows[:10]) st.write("party_map entries:", len(party_map)) st.write("parties with centroids:", len(all_parties_sorted)) st.write("default_parties:", default_parties) st.write("selected_parties:", selected_parties) st.write("min_mps setting:", min_mps) # sample centroid counts per party sample = { p: len(centroids.get(p, {})) for p in list(all_parties_sorted)[:8] } st.write("sample centroid window counts per party:", sample) except Exception: pass except Exception: # Don't crash UI if st isn't available or expander fails pass # Smoothing controls smoothing_method = st.selectbox( "Smoothing methode", options=["EMA", "Spline", "None"], index=0, help="EMA = exponential moving average; Spline = low-degree polynomial spline fit; None = raw centroids", ) # EMA alpha only shown/used when EMA is selected smooth_alpha = 1.0 if smoothing_method == "EMA": smooth_alpha = st.slider( "Glad maken (EMA-\u03b1)", min_value=0.1, max_value=1.0, value=0.35, step=0.05, help=( "\u03b1=1.0 toont de ruwe data; lagere waarden maken de lijn gladder. " "Standaard 0.35 voor een goed evenwicht tussen detail en ruis." ), ) def _ema_smooth(values: List[float], alpha: float) -> List[float]: """Apply exponential moving average; alpha=1.0 means no smoothing.""" if not values or alpha >= 1.0: return values smoothed = [values[0]] for v in values[1:]: smoothed.append(alpha * v + (1 - alpha) * smoothed[-1]) return smoothed def _spline_smooth(values: List[float]) -> List[float]: """Perform a basic low-degree polynomial fit over index -> value and evaluate at indices. This provides a simple spline-like smoothing without adding scipy as a dependency. For very small N this returns the raw values. """ n = len(values) if n <= 2: return values deg = min(3, n - 1) try: idx = np.arange(n, dtype=float) coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg) smooth = np.polyval(coeffs, idx) return [float(v) for v in smooth] except Exception: return values fig = go.Figure() trace_count = 0 # New: delegate plotting selection to helper for testability # Note: select_trajectory_plot_data returns (fig, trace_count, banner_text) try: fig2, trace_count2, banner_text = select_trajectory_plot_data( positions_by_window, party_map, windows, selected_parties, smooth_alpha ) # If helper returned a figure, replace if fig2 is not None: fig = fig2 trace_count = trace_count2 if banner_text: try: st.caption(banner_text) except Exception: pass try: _last_trajectories_diagnostics.update({"banner_text": banner_text}) except Exception: pass except Exception as e: tb = traceback.format_exc() # attach diagnostics to the helper and module try: select_trajectory_plot_data._last_diagnostics = {"exception": tb} except Exception: pass try: _last_trajectories_diagnostics.update( {"stage": "select_helper_exception", "exception": tb} ) except Exception: pass logger.exception("select_trajectory_plot_data failed") debug_enabled = get_debug_trajectories_enabled() if debug_enabled: try: st.text_area("select_trajectory_plot_data traceback", tb, height=240) except Exception: pass for party in selected_parties: if party not in centroids: continue wids_sorted = sorted(centroids[party].keys()) xs_raw = [centroids[party][w][0] for w in wids_sorted] ys_raw = [centroids[party][w][1] for w in wids_sorted] xs = _ema_smooth(xs_raw, smooth_alpha) ys = _ema_smooth(ys_raw, smooth_alpha) # Preserve raw (unsmoothed) values per-point so hover can show both raw and smoothed custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] colour = PARTY_COLOURS.get(party, "#9E9E9E") fig.add_trace( go.Scatter( x=xs, y=ys, mode="lines+markers", name=party, text=wids_sorted, # full window ID for hover customdata=custom_raw, line=dict(color=colour, shape="spline", smoothing=1.3), marker=dict(color=colour, size=8), hovertemplate=( f"{party}
" "venster: %{text}
" "x (smoothed): %{x:.3f}
" "x (raw): %{customdata[0]:.3f}
" "y (smoothed): %{y:.3f}
" "y (raw): %{customdata[1]:.3f}" ), ) ) trace_count += 1 # For trajectories, the chart spans multiple windows. Use the classifier's # per-window confidences aggregated (mean) to decide whether to use the # classifier label or fall back to the conventional short label. _THRESHOLD = 0.65 x_conf_map = axis_def.get("x_label_confidence", {}) or {} y_conf_map = axis_def.get("y_label_confidence", {}) or {} def _mean_conf(m: dict) -> Optional[float]: vals = [v for v in m.values() if v is not None] if not vals: return None return float(sum(vals) / len(vals)) x_mean = _mean_conf(x_conf_map) y_mean = _mean_conf(y_conf_map) def choose_trajectory_title(axis_def: dict, axis: str, threshold: float = 0.65) -> str: """Choose a short trajectory axis title based on aggregated confidence. axis: 'x' or 'y'. Returns axis_def label when its mean confidence >= threshold, otherwise returns the compact fallback 'As 1' / 'As 2'. Matches previous logic. """ _TH = threshold conf_map = axis_def.get(f"{axis}_label_confidence", {}) or {} vals = [v for v in conf_map.values() if v is not None] mean = float(sum(vals) / len(vals)) if vals else None label = axis_def.get(f"{axis}_label") if mean is not None and mean >= _TH and label: return label # Prefer the user-facing semantic fallback via the classifier helper try: from analysis.axis_classifier import display_label_for_modal fallback_modal = "As 1" if axis == "x" else "As 2" return display_label_for_modal(fallback_modal, axis) except Exception: return "As 1" if axis == "x" else "As 2" x_title = choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD) y_title = choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD) fig.update_layout( title="Partij trajectories", xaxis_title=x_title, yaxis_title=y_title, height=600, legend_title_text="Partij", ) _add_y_direction_annotations(fig) # If no traces were added to the figure, show a diagnostic message so the # user knows why the plot is empty. try: _last_trajectories_diagnostics.update({"trace_count": trace_count}) except Exception: pass debug_enabled = get_debug_trajectories_enabled() if trace_count == 0: try: st.info( "Geen trajecten getekend: geen geselecteerde partijen met voldoende data. Controleer de partijselectie en de 'Min. Kamerleden per partij' instelling." ) except Exception: pass if debug_enabled: try: st.text_area( "Trajectories diagnostics", json.dumps(_last_trajectories_diagnostics, default=str), height=240, ) except Exception: try: st.json(_last_trajectories_diagnostics) except Exception: pass else: # DEBUG: show trace_count and figure data size before rendering try: st.info( f"[DEBUG] trace_count={trace_count}, fig data count={len(fig.data)}, layout title={fig.layout.title.text if fig.layout.title else 'none'}" ) except Exception: pass try: st.plotly_chart(fig, use_container_width=True) except Exception as e: st.error(f"Trajectories rendering failed: {e}") # Always show diagnostics when rendering fails, regardless of trace_count if get_debug_trajectories_enabled(): try: st.json(_last_trajectories_diagnostics) except Exception: st.text_area( "Trajectories diagnostics (JSON failed)", json.dumps(_last_trajectories_diagnostics, default=str), height=240, ) # --------------------------------------------------------------------------- # Tab 3: Motie Zoeken # --------------------------------------------------------------------------- def build_search_tab(db_path: str, show_rejected: bool) -> None: st.subheader("Motie Zoeken") df = load_motions_df(db_path) if df.empty: st.warning("Geen moties beschikbaar.") return if not show_rejected: df = df[df["title"].fillna("").str.strip() != "Verworpen."] # Controls col1, col2, col3 = st.columns([2, 1, 1]) with col1: query = st.text_input( "Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen" ) with col2: years = sorted(df["year"].dropna().astype(int).unique().tolist()) if years: year_range = st.select_slider( "Jaar", options=years, value=(years[0], years[-1]) ) else: year_range = (2019, 2024) with col3: min_controversy = st.slider( "Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05 ) # Apply filters in-memory working = df.copy() working = working[ (working["year"] >= year_range[0]) & (working["year"] <= year_range[1]) ] if min_controversy > 0: working = working[working["controversy_score"] >= min_controversy] if query: q = query.lower() mask = working["title"].fillna("").str.lower().str.contains(q, regex=False) working = working[mask] working = working.sort_values(by="controversy_score", ascending=False) st.caption(f"{len(working)} resultaten (top 50 getoond)") for _, row in working.head(50).iterrows(): title = row.get("title") or f"Motie #{row['id']}" date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?" controversy = row.get("controversy_score") or 0 with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"): cols = st.columns(3) cols[0].metric("Controverse", f"{controversy:.2f}") cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}") cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?") # Voting breakdown _render_voting_results(row.get("voting_results")) # Link to original motion url = row.get("url") if url and str(url).startswith("http"): st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})") # Similar motions sim = query_similar(db_path, int(row["id"]), top_k=5) if not sim.empty: st.markdown("**Vergelijkbare moties:**") for _, s in sim.iterrows(): s_date = ( pd.to_datetime(s["date"]).strftime("%Y") if pd.notna(s.get("date")) else "" ) st.markdown( f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*" ) else: st.caption("_Nog geen vergelijkbare moties beschikbaar_") # --------------------------------------------------------------------------- # Tab 4: Motie Browser # --------------------------------------------------------------------------- def build_browser_tab(db_path: str, show_rejected: bool) -> None: st.subheader("Motie Browser") df = load_motions_df(db_path) if df.empty: st.warning("Geen moties beschikbaar.") return if not show_rejected: df = df[df["title"].fillna("").str.strip() != "Verworpen."] # Controls col1, col2, col3 = st.columns(3) with col1: years = sorted(df["year"].dropna().astype(int).unique().tolist()) year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years]) with col2: min_controversy_b = st.slider( "Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05, key="browser_controversy", ) with col3: sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"]) # Filter working = df.copy() if year_filter != "(Alle)": working = working[working["year"] == int(year_filter)] if min_controversy_b > 0: working = working[working["controversy_score"] >= min_controversy_b] sort_map = { "Datum (nieuw)": ("date", False), "Controverse": ("controversy_score", False), "Marge": ("winning_margin", True), } sort_col, sort_asc = sort_map[sort_by] working = working.sort_values(by=sort_col, ascending=sort_asc) # Display table display_cols = ["id", "title", "date", "controversy_score", "winning_margin"] available_display = [c for c in display_cols if c in working.columns] st.dataframe( working[available_display].reset_index(drop=True), use_container_width=True, height=350, ) st.divider() # Detail panel st.markdown("**Detail weergave** — vul een motie-ID in:") sel_id = st.number_input( "Motie ID", min_value=int(working["id"].min()) if not working.empty else 1, max_value=int(working["id"].max()) if not working.empty else 99999, value=int(working["id"].iloc[0]) if not working.empty else 1, step=1, ) motion_row = df[df["id"] == sel_id] if not motion_row.empty: row = motion_row.iloc[0] st.markdown(f"### {row.get('title') or 'Onbekend'}") date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?" st.caption( f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}" ) # Link to original source url = row.get("url") if url and str(url).startswith("http"): st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})") # Voting breakdown st.markdown("**Stemuitslag:**") _render_voting_results(row.get("voting_results")) # Similar motions sim = query_similar(db_path, int(sel_id), top_k=10) if not sim.empty: st.markdown("**Vergelijkbare moties:**") st.dataframe( sim[["title", "score", "date", "policy_area"]], use_container_width=True, ) else: st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_") def build_svd_components_tab(db_path: str) -> None: """New tab: show top motions contributing to top SVD components. Reads thoughts/explorer/top_svd_top_motions.json and displays a selector for components 1..10 with theme labels/explanations and a detail pane per motion. """ # Political polarisation themes per SVD component (1-indexed, window=2025) # Produced by per-axis analysis of all 10 unique top motions (zero cross-axis overlap). SVD_THEMES: dict[int, dict[str, str]] = { 1: { "label": "Links-rechts hoofdas", "explanation": ( "De dominante dimensie van het parlement: de klassieke links-rechts tegenstelling " "die het meeste verschil in stemgedrag verklaart. Aan de rechterkant (PVV, SGP, VVD, " "ChristenUnie) staan moties over defensie-uitbreiding, NAVO-verplichtingen, " "juridische ruimte voor drones en gaswinning. Aan de linkerkant (PvdD, SP, DENK, " "GroenLinks-PvdA) staan moties over huurverlaging, het veroordelen van " "antipersoneelslandmijnen, het opzeggen van het militaire verdrag met Israël en het " "oprichten van zorgbuurthuizen. De scheidslijn loopt dwars door thema's als " "veiligheid, economie, internationaal recht en sociale bescherming." ), "positive_pole": "Nationalistisch-conservatief: PVV, SGP, VVD, ChristenUnie", "negative_pole": "Progressief-links: PvdD, SP, DENK, GroenLinks-PvdA", "flip": False, }, 2: { "label": "Populistisch nationalisme versus institutioneel progressivisme", "explanation": ( "Deze as scheidt het populistisch-nationalistische bloc (PVV, FVD, Groep Markuszower, " "BBB) van het volledige overige parlement. Alleen PVV (+18), FVD (+4) en Groep " "Markuszower (+2) scoren positief; alle andere partijen scoren negatief, inclusief " "VVD (−15), CDA (−14), SGP (−25) en ChristenUnie (−59). Positieve moties: artsen " "vrijpleiten voor hydroxychloroquine/ivermectine, Syriërs terugsturen, geen geld " "aan Jordanië, tijdelijke bescherming Oekraïne beëindigen. Negatieve moties: " "digitale toegankelijkheid Caribisch Nederland, ethiekprogramma Defensie, zorg voor " "slachtoffers bombardement Hawija, zorgkwaliteitsstandaarden. Dit is geen links-rechts " "verdeling maar een nativistisch-populistisch vs. institutioneel onderscheid." ), "positive_pole": "Populistisch-nationalistisch: PVV, FVD, Groep Markuszower, BBB", "negative_pole": "Institutioneel: alle overige partijen — van VVD en SGP tot GroenLinks-PvdA en Volt", "flip": False, }, 3: { "label": "Verzorgingsstaat versus bezuinigingen en marktwerking", "explanation": ( "Deze as weerspiegelt de spanning tussen staatsingrijpen en marktliberalisme, " "aangescherpt door de kabinetscrisis van 2025. Aan de positieve kant staan moties " "die bezuinigingen op zorg en het gemeentefonds willen terugdraaien, winstuitkeringen " "in de zorg verbieden en publieke controle over ziekenhuisfusies eisen. SP, PvdD, " "GroenLinks-PvdA en PVV stemmen hier gelijk — ondanks hun tegengestelde PC1-posities. " "Aan de negatieve kant staan moties " "over marktwerking in de zorg, fiscale bedrijfsopvolgingsfaciliteiten (VVD), " "doorgaan met besturen ondanks de kabinetscrisis (VVD/Yeşilgöz) en defensie-" "uitgaven van 3,5% bbp." ), "positive_pole": "Pro-verzorgingsstaat: SP, PvdD, GroenLinks-PvdA, PVV (anti-bezuinigingen)", "negative_pole": "Marktliberaal en fiscaal conservatief: VVD, D66, CDA, SGP", "flip": True, }, 4: { "label": "Pragmatisch centrisme versus ideologische radicaliteit", "explanation": ( "De gevestigde centrumpartijen (D66, CDA, VVD, 50PLUS) staan tegenover zowel " "rechts-radicale als identiteitspolitieke posities. Aan de positieve kant staan " "moties over openbare toiletten, vaderbetrokkenheid bij opvoeding, internationale " "samenwerking met Australië en Canada, en long covid-expertise. Dit zijn pragmatische, " "institutionele beleidsposities. Aan de negatieve kant staan moties over een " "migratiesaldo-cap van 60.000, het verlaten van de WHO, kinderen in pleeggezinnen " "van hetzelfde geslacht (FVD) en de bescherming van religieuze schoolidentiteit " "via artikel 23. De negatieve pool combineert populistisch-rechts met " "identiteitsgerichte posities van zowel rechts als links." ), "positive_pole": "Constructief centrum: D66, CDA, VVD, 50PLUS — pragmatisch en internationaal", "negative_pole": "Radicaal-ideologisch: FVD, Groep Markuszower (rechts), ChristenUnie, DENK (religieus/identiteit)", "flip": True, }, 5: { "label": "Christelijk-sociaal communitarisme", "explanation": ( "Deze as scheidt partijen die gemeenschapszorg, burgerplicht en informele " "ondersteuningsstructuren benadrukken van partijen die individuele vrijheden en " "progressieve maatschappelijke hervorming voorstaan. Aan de positieve kant staan " "moties over schuldhulpverlening via vrijwilligersorganisaties, de maatschappelijke " "diensttijd voor jongeren met een afstand tot de arbeidsmarkt, en de gastouderopvang. " "ChristenUnie, SGP en CDA voeren hier de toon; ook D66 scoort positief door steun " "aan sociaal beleid. Aan de negatieve kant staan moties over wettelijke erkenning " "van meerouderschap, abortusrecht in het EU-Handvest, armoedebeleid en " "buitenlandse beïnvloeding. PvdD, GroenLinks-PvdA en VVD scoren hier negatief." ), "positive_pole": "Gemeenschapsgericht: ChristenUnie, SGP, CDA, D66 — vrijwilligers, diensttijd, zorgsystemen", "negative_pole": "Individualistisch-progressief: PvdD, GroenLinks-PvdA, VVD, PVV", "flip": False, }, 6: { "label": "Klimaat, energie en culturele integratie", "explanation": ( "Aan de positieve kant staan moties die LNG-capaciteit prefereren als alternatief " "voor strenge vulgraadverplichtingen, kernenergie als volwaardig CO₂-arm onderdeel " "van de energiemix willen erkennen op COP30, en discriminatie- en inclusiemeldpunten " "willen inventariseren. SGP, JA21, FVD en PVV scoren sterk positief. Aan de " "negatieve kant staan moties die fossiele-industrie-vertegenwoordigers willen weren " "van klimaatconferenties, structureel overleg met moslimgemeenschappen willen bij " "integratiebeleid, en aanvallen van Israël op Libanon veroordelen. " "PvdD, GroenLinks-PvdA, Volt en D66 scoren negatief. " "Deze as combineert energieideologie met culturele polarisatie rondom klimaat, " "integratie en buitenlandspolitiek." ), "positive_pole": "Pro-fossiel, nationaal energiebeleid: SGP, JA21, FVD, PVV", "negative_pole": "Klimaatgericht en inclusief: PvdD, GroenLinks-PvdA, Volt, D66", "flip": False, }, 7: { "label": "Bestuurlijk pragmatisme en implementatie (indicatief)", "explanation": ( "Een residuele as die overwegend beleidsdossiers uit 2024 (vorige parlementaire " "periode) omvat. De scores zijn smal (max ~11 punten) en de partijcombinaties " "ideologisch divers — dit label is indicatief. Aan de positieve kant staan " "pragmatische bestuursmoties: een compleet kostenoverzicht van producten van eigen " "bodem, papieren schoolboeken voor basisvaardigheden, een invoeringstoets voor het " "minimumloon en de A2-snelwegplanning. ChristenUnie, Volt, DENK en SP scoren " "positief. Aan de negatieve kant staan meer ideologisch geladen moties: een " "landelijk stookverbod (PvdD), het strafbaar stellen van verbranding van religieuze " "geschriften (DENK), chroom-6 schadevergoedingen en tegenhouden van nieuwe " "gaswinning. GroenLinks-PvdA, VVD, FVD en JA21 scoren negatief." ), "positive_pole": "Praktisch-bestuurlijk: ChristenUnie, Volt, SGP, DENK, SP", "negative_pole": "Ideologisch-principieel: GroenLinks-PvdA, VVD, FVD, JA21", "flip": True, }, 8: { "label": "Europese defensie-integratie (indicatief)", "explanation": ( "Aan de positieve kant staan moties die pleiten voor militaire mobiliteit als " "topprioriteit in EU/NAVO-verband en toewerken naar een militair Schengengebied, " "35% van defensiematerieel Europees inkopen en een Europees defensie-R&D-instituut " "oprichten. Ook het Nationaal Groeifonds en gewasbeschermingsonderzoek vallen " "positief. Volt en D66 scoren sterk positief. Aan de negatieve kant staan moties " "over ketenverantwoordelijkheid bij toeslagen (DENK), het coronaoversterfte-onderzoek " "(PVV/BBB), energiecontracten en huisvestingsregulering. SP (−39), DENK (−35) en " "PvdD (−26) scoren sterk negatief — dit betekent dat zij actief tégen deze " "EU-defensiemoties stemmen, niet simpelweg het thema negeren. Volt (N=1) domineert " "de positieve pool maar is als centroïde van één Kamerlid statistisch onbetrouwbaar." ), "positive_pole": "Pro-EU defensie en innovatie: Volt, D66", "negative_pole": "Nationaal/pacifistisch of binnenlandsgericht: SP, DENK, PvdD, 50PLUS", "flip": False, }, 9: { "label": "Decentraal bestuur en gemeenschapswaarden (indicatief)", "explanation": ( "Aan de positieve kant staan moties over naleving van de Financiële-verhoudingswet " "voor gemeenten, beperking van arbeidsmigratie binnen de EU, een nieuwe " "tandartsopleiding in Rotterdam, een actieplan tegen misbruik van hallucinerende " "geneesmiddelen en een oplossing voor milieuproblemen op Bonaire. SGP en " "ChristenUnie scoren sterk positief; ook DENK en SP. Aan de negatieve kant staan " "moties over een moratorium op geitenstallen, een verbod op gokadvertenties, " "verduidelijking van gronden voor voorlopige hechtenis, een leegstandbelasting voor " "woningen en end-to-end-encryptie. D66, JA21 en PVV scoren negatief. Deze as " "scheidt een nadruk op decentrale dienstverlening en gemeenschapsregulering van " "progressieve systeem- en rechtshervorming." ), "positive_pole": "Lokaal en gemeenschapsgericht: SGP, ChristenUnie, DENK, SP", "negative_pole": "Progressieve systemen en rechten: D66, JA21, PVV", "flip": True, }, 10: { "label": "Institutioneel toezicht en handhaving (indicatief)", "explanation": ( "De tiende as vangt resterende variantie op en scheidt partijen die sceptisch zijn " "over staatstoezicht van partijen die strikte regulering en handhaving steunen. " "Aan de positieve kant staan moties over minder tijdsintensieve schoolinspecties, " "het recht van toeslagenouders op hun persoonlijk dossier, behoud van de " "tegemoetkoming voor arbeidsongeschikten en een verlaging van de leeftijdsdrempel " "voor kindgesprekken. DENK, SP en PvdD scoren positief. Aan de negatieve kant " "staan moties over een aangifteplicht voor scholen bij veiligheidsincidenten, een " "rookverbod in auto's met kinderen, braakliggende landbouwgrond en verhoogd " "beloningsgeld voor tipgevers. GroenLinks-PvdA scoort opvallend sterk negatief, " "waarmee het zich onderscheidt van SP en DENK op handhavingsthema's." ), "positive_pole": "Kritisch op overheidstoezicht: DENK, SP, PvdD, Volt — minder inspectielast", "negative_pole": "Pro-handhaving en regulering: GroenLinks-PvdA, CDA, SGP — veiligheid en naleving", "flip": True, }, } st.subheader("🔬 SVD Assen — politieke polarisatiethema's") st.markdown( "Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen " "van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren " "het spanningsveld dat de as beschrijft." ) # Scree plot: relative importance of each SVD component scree_importances = load_scree_data(db_path) if scree_importances: st.markdown( "**Scree-plot** — het relatieve gewicht van elke SVD-as. " "De eerste assen verklaren het meeste van de stemverschillen in de Kamer; " "latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau." ) _render_scree_plot(scree_importances) json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json") if not os.path.exists(json_path): st.warning( f"Top-SVD data not found at {json_path}. Run the importance job to generate it." ) return try: with open(json_path, "r", encoding="utf-8") as fh: j = json.load(fh) except Exception as e: st.error(f"Failed to load SVD importance JSON: {e}") return window = j.get("window") rows = j.get("rows", []) if not rows: st.info("Geen top-moties in dataset") return st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**") # Build mapping component -> list of motions (deduplicate by motion_id per component) comp_map: dict[int, list] = {} for r in rows: comp = int(r.get("component", 0)) bucket = comp_map.setdefault(comp, []) existing_ids = {m.get("motion_id") for m in bucket} if r.get("motion_id") not in existing_ids: bucket.append(r) comp_options = sorted(comp_map.keys()) # Build display labels for selectbox: "As 1 — Regulering vs. status-quo" def _comp_label(c: int) -> str: theme = SVD_THEMES.get(c, {}) lbl = theme.get("label", "") return f"As {c} — {lbl}" if lbl else f"As {c}" comp_display = [_comp_label(c) for c in comp_options] comp_sel_idx = st.selectbox( "Selecteer SVD-as", options=list(range(len(comp_options))), format_func=lambda i: comp_display[i], index=0, ) comp_sel = comp_options[comp_sel_idx] # Show theme explanation theme = SVD_THEMES.get(comp_sel, {}) if theme: st.info(f"**{theme['label']}** — {theme['explanation']}") motions = comp_map.get(comp_sel, []) # Party axis chart # Default party scores (single-window mean vectors) as a fallback party_scores_default = load_party_axis_scores(db_path) party_mp_vectors = load_party_mp_vectors(db_path) bootstrap_data = ( _cached_bootstrap_cis(party_mp_vectors) if party_mp_vectors else None ) # For components 1 and 2, prefer MP-centroid values from the Procrustes-aligned # positions_by_window so the compass matches the trajectories (MP-mean centroids). if comp_sel in (1, 2): try: positions_by_window, axis_def = load_positions(db_path) if axis_def is None: axis_def = {} # choose the current parliament window if present window = ( "current_parliament" if "current_parliament" in positions_by_window else sorted(positions_by_window.keys())[-1] ) pos = positions_by_window.get(window, {}) # build party -> list of MP x/y coords party_map = load_party_map(db_path) per_party_coords: dict = {} for ent, (x, y) in pos.items(): party = party_map.get(ent) if party is None: continue per_party_coords.setdefault(party, []).append((x, y)) # construct party_scores mapping: prefer MP centroid [x,y], fallback to default vector party_scores = {} for party in set( list(per_party_coords.keys()) + list(party_scores_default.keys()) ): coords = per_party_coords.get(party) if coords: xs = [c[0] for c in coords] ys = [c[1] for c in coords] party_scores[party] = [float(np.mean(xs)), float(np.mean(ys))] else: # fallback: use the default single-window SVD mean vector party_scores[party] = party_scores_default.get(party, []) except Exception: # On any error, fall back to the old behaviour logger.exception( "Failed to derive party centroids from positions_by_window; falling back to load_party_axis_scores" ) party_scores = party_scores_default else: party_scores = party_scores_default # Convert party_scores (possibly [x,y] lists or legacy vectors) into explicit (x,y) coords party_coords: dict = {} for p, v in party_scores.items(): try: if v and len(v) >= 2: party_coords[p] = (float(v[0]), float(v[1])) except Exception: continue _render_party_axis_chart( party_coords, comp_sel, theme, bootstrap_data=bootstrap_data ) # Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results) motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None] motion_details: Dict[int, tuple] = {} if motion_ids: # Defensively convert motion_ids to integers, skipping invalid values ids_int: List[int] = [] for mid in motion_ids: try: ids_int.append(int(mid)) except Exception: logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid) # If no valid ids remain, skip the DB query if ids_int: con = None try: placeholders = ", ".join("?" for _ in ids_int) con = duckdb.connect(database=db_path, read_only=True) db_rows = con.execute( f"SELECT id, title, date, policy_area, url, body_text, voting_results " f"FROM motions WHERE id IN ({placeholders})", ids_int, ).fetchall() motion_details = {r[0]: r for r in db_rows} except Exception: logger.exception("Failed to batch-fetch motion details") finally: if con: con.close() # Split motions by pole sign pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0] neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0] flip = theme.get("flip", False) if theme else False pos_pole = theme.get("positive_pole", "") if theme else "" neg_pole = theme.get("negative_pole", "") if theme else "" # Determine which pole goes left (progressive) and which goes right if flip: left_pole, right_pole = pos_pole, neg_pole left_motions, right_motions = pos_motions, neg_motions left_arrow, right_arrow = "▲", "▼" else: left_pole, right_pole = neg_pole, pos_pole left_motions, right_motions = neg_motions, pos_motions left_arrow, right_arrow = "▼", "▲" lcol, rcol = st.columns(2) with lcol: st.markdown(f"**← {left_pole}**") for m in left_motions: mid = m.get("motion_id") raw_title = m.get("title") or f"Motie #{mid}" with st.expander(f"{left_arrow} {raw_title}"): row = motion_details.get(int(mid)) if mid is not None else None if row: try: date_str = str(row[2])[:10] except Exception: date_str = "?" st.caption(f"📅 {date_str} | {row[3] or '—'}") if row[4] and str(row[4]).startswith("http"): st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") if row[5]: with st.expander("Toon volledige tekst"): st.write(row[5]) _render_voting_results(row[6]) else: st.caption("_Geen metadata beschikbaar_") with rcol: st.markdown(f"**{right_pole} →**") for m in right_motions: mid = m.get("motion_id") raw_title = m.get("title") or f"Motie #{mid}" with st.expander(f"{right_arrow} {raw_title}"): row = motion_details.get(int(mid)) if mid is not None else None if row: try: date_str = str(row[2])[:10] except Exception: date_str = "?" st.caption(f"📅 {date_str} | {row[3] or '—'}") if row[4] and str(row[4]).startswith("http"): st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") if row[5]: with st.expander("Toon volledige tekst"): st.write(row[5]) _render_voting_results(row[6]) else: st.caption("_Geen metadata beschikbaar_") def build_mp_quiz_tab(db_path: str) -> None: """Interactive quiz: narrow MPs by asking motion vote questions. Minimal viable flow: - seed with top-N controversial motions (SEED_MOTIONS) - present one question at a time, store answers in st.session_state['mp_quiz_votes'] - after each answer call MotionDatabase.match_mps_for_votes to rank MPs - if multiple candidates remain, call choose_discriminating_motions to pick next question - stop when unique MP found or no discriminating motions remain """ st.subheader("🧑‍⚖️ Welk tweede kamerlid ben jij?") st.markdown( "Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt." ) SEED_MOTIONS = 8 MAX_QUESTIONS = 20 # initialize session state if "mp_quiz_votes" not in st.session_state: st.session_state["mp_quiz_votes"] = {} if "mp_quiz_asked" not in st.session_state: st.session_state["mp_quiz_asked"] = [] from database import MotionDatabase as _MotionDatabase db_inst = _MotionDatabase(db_path) df = load_motions_df(db_path) if df.empty: st.warning("Geen moties beschikbaar om de quiz te starten.") return # seed from motions that actually have individual MP vote records seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS) if not seed_ids: st.warning("Geen individuele stemdata beschikbaar voor de quiz.") return # Determine next motion to ask def _next_motion_id(): # prefer seed motions not yet asked for mid in seed_ids: if str(mid) not in st.session_state["mp_quiz_votes"]: return mid # otherwise ask discriminating motion based on remaining candidate MPs # compute current candidate set try: user_votes = { int(k): v for k, v in st.session_state["mp_quiz_votes"].items() } ranked = db_inst.match_mps_for_votes(user_votes, limit=200) except Exception: ranked = [] candidates = [r["mp_name"] for r in ranked] excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()] if not candidates: return None try: next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1) return next_ids[0] if next_ids else None except Exception: return None # show progress and controls col1, col2 = st.columns([3, 1]) with col2: st.caption( f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}" ) if st.button("Reset quiz"): st.session_state["mp_quiz_votes"] = {} st.session_state["mp_quiz_asked"] = [] st.rerun() # main question loop (single question per render, wrapped in a form to avoid # premature reruns when the user changes the radio selection) next_mid = _next_motion_id() if next_mid is None: st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.") else: motion_rows = df[df["id"] == next_mid] if motion_rows.empty: # motion has votes but isn't in the motions DataFrame — skip it st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem" st.rerun() return motion_row = motion_rows.iloc[0] st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}") if motion_row.get("layman_explanation"): st.info(motion_row.get("layman_explanation")) with st.form(key=f"mp_quiz_form_{next_mid}"): choice = st.radio( "Wat zou jij stemmen?", options=["Voor", "Tegen", "Onthouden", "Geen stem"], index=3, ) submitted = st.form_submit_button("Beantwoord en verder") if submitted: st.session_state["mp_quiz_votes"][str(next_mid)] = choice st.session_state["mp_quiz_asked"].append(next_mid) st.rerun() # display current ranking try: user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()} ranking = db_inst.match_mps_for_votes(user_votes, limit=50) except Exception: ranking = [] if ranking: st.markdown("**Top kandidaten**") # show as table import pandas as pd rdf = pd.DataFrame(ranking) st.dataframe(rdf.head(10), use_container_width=True) # check uniqueness top_pct = ranking[0]["agreement_pct"] if ranking else 0.0 top_matches = [r for r in ranking if r["agreement_pct"] == top_pct] if len(top_matches) == 1 and top_matches[0]["overlap"] > 0: st.success( f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})" ) else: if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS: st.warning( "Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten." ) else: st.info("Nog geen unieke match — vraag meer om verder te verfijnen.") else: st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.") # --------------------------------------------------------------------------- # App entry # --------------------------------------------------------------------------- def run_app() -> None: st.set_page_config( layout="wide", page_title="Parlement Explorer", page_icon="🏛️", ) st.title("🏛️ Parlement Explorer") # Sidebar st.sidebar.title("Instellingen") db_path = "data/motions.db" window_size = "annual" show_rejected = st.sidebar.checkbox("Toon verworpen moties", value=False) # About section with st.sidebar.expander("ℹ️ Over", expanded=False): try: if _DUCKDB_AVAILABLE: con = duckdb.connect(database=db_path, read_only=True) n_motions = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0] n_fused = con.execute( "SELECT COUNT(*) FROM fused_embeddings" ).fetchone()[0] n_sim = con.execute("SELECT COUNT(*) FROM similarity_cache").fetchone()[ 0 ] con.close() st.markdown( f"**Moties:** {n_motions:,} \n" f"**Fused embeddings:** {n_fused:,} \n" f"**Similarity cache:** {n_sim:,}" ) else: st.warning( "DuckDB niet beschikbaar in deze Python-omgeving; DB diagnostics zijn niet beschikbaar." ) except Exception as e: st.warning(f"DB niet bereikbaar: {e}") # Main tabs # Streamlit tabs compatibility: some older/newer Streamlit builds expose different APIs. tab_labels = [ "🧭 Politiek Kompas", "📈 Trajectories", "🔍 Motie Zoeken", "📋 Motie Browser", "🔬 SVD Components", ] if hasattr(st, "tabs") and callable(getattr(st, "tabs")): tab1, tab2, tab3, tab4, tab5 = st.tabs(tab_labels) with tab1: build_compass_tab(db_path, window_size) with tab2: build_trajectories_tab(db_path, window_size) with tab3: build_search_tab(db_path, show_rejected) with tab4: build_browser_tab(db_path, show_rejected) with tab5: build_svd_components_tab(db_path) else: # Fallback for environments where `st.tabs` is not available: use a radio selector selection = st.radio("Tab", tab_labels) if selection == tab_labels[0]: build_compass_tab(db_path, window_size) elif selection == tab_labels[1]: build_trajectories_tab(db_path, window_size) elif selection == tab_labels[2]: build_search_tab(db_path, show_rejected) elif selection == tab_labels[3]: build_browser_tab(db_path, show_rejected) else: build_svd_components_tab(db_path) if __name__ == "__main__": logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s" ) run_app()