You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/explorer.py

2046 lines
78 KiB

"""Parlement Explorer — Streamlit data analysis app.
Four tabs:
1. Politiek Kompas — 2D scatter of MPs/parties, window slider
2. Partij Trajectories — party centroid lines over time
3. Motie Zoeken — text search + similarity lookup
4. Motie Browser — sortable table + detail panel
Run with: streamlit run explorer.py
Import-safe: heavy computation is behind @st.cache_data and only runs at UI time.
All DuckDB connections are read_only=True so the app can run alongside the pipeline.
"""
from __future__ import annotations
import json
import logging
import os
import re
from typing import Dict, List, Optional, Tuple
import duckdb
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
logger = logging.getLogger(__name__)
# Party colour palette (consistent across tabs)
PARTY_COLOURS: Dict[str, str] = {
"VVD": "#1E73BE",
"PVV": "#002366",
"D66": "#00A36C",
"CDA": "#4CAF50",
"SP": "#E53935",
"PvdA": "#D32F2F",
"GroenLinks": "#388E3C",
"GroenLinks-PvdA": "#2E7D32",
"CU": "#0288D1",
"SGP": "#F4511E",
"PvdD": "#43A047",
"FVD": "#6A1B9A",
"JA21": "#7B1FA2",
"BBB": "#8D6E63",
"NSC": "#FF8F00",
"Nieuw Sociaal Contract": "#FF8F00", # alias used in mp_metadata
"DENK": "#00897B",
"50PLUS": "#7E57C2",
"Volt": "#572AB7",
"ChristenUnie": "#0288D1",
"Unknown": "#9E9E9E",
}
# Ordered list of well-known parties for trajectory default selection.
# Keeps the chart readable without overwhelming users with all parties.
KNOWN_MAJOR_PARTIES = [
"VVD",
"PVV",
"D66",
"GroenLinks-PvdA",
"GroenLinks",
"PvdA",
"CDA",
"SP",
"NSC",
"CU",
"BBB",
]
# Parties currently seated in the Tweede Kamer (2023 election cycle).
# Deze zijn de entity_ids zoals opgeslagen in svd_vectors voor window='2025'.
CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset(
{
"PVV",
"VVD",
"NSC",
"BBB",
"D66",
"GroenLinks-PvdA",
"CDA",
"SP",
"ChristenUnie",
"SGP",
"Volt",
"DENK",
"PvdD",
"JA21",
"FVD",
}
)
# Normalize variant party names to canonical display names in CURRENT_PARLIAMENT_PARTIES
_PARTY_NORMALIZE: dict[str, str] = {
"Nieuw Sociaal Contract": "NSC",
"CU": "ChristenUnie",
"GL": "GroenLinks-PvdA",
"GroenLinks": "GroenLinks-PvdA",
"PvdA": "GroenLinks-PvdA",
"Gündoğan": "Volt", # confirmed Volt, left parliament 2023-12-05
"Lid Keijzer": "BBB", # Keijzer left CDA, joined BBB cabinet
"Groep Markuszower": "PVV", # Markuszower sits with PVV faction
}
# ---------------------------------------------------------------------------
# Cached loaders
# ---------------------------------------------------------------------------
@st.cache_data(show_spinner="Beschikbare tijdsvensters laden…")
def get_available_windows(db_path: str) -> List[str]:
"""Return sorted list of distinct window_ids from svd_vectors."""
con = duckdb.connect(database=db_path, read_only=True)
try:
rows = con.execute(
"SELECT DISTINCT window_id FROM svd_vectors ORDER BY window_id"
).fetchall()
return [r[0] for r in rows]
except Exception:
logger.exception("Failed to query available windows")
return []
finally:
con.close()
@st.cache_data(show_spinner=False)
def get_uniform_dim_windows(db_path: str) -> List[str]:
"""Return only windows whose dominant MP-vector dimension is 50.
Some windows contain a mix of vector lengths due to multiple pipeline runs
(e.g. 2016 has both dim=1 and dim=50 rows). We find the most common dimension
per window and include only windows where that dominant dim equals 50.
Windows with too few dim-50 entities (< 10) are also excluded to avoid
degenerate PCA inputs.
"""
con = duckdb.connect(database=db_path, read_only=True)
try:
rows = con.execute(
"""
WITH vec_dims AS (
SELECT window_id, json_array_length(vector) AS dim
FROM svd_vectors
WHERE entity_type = 'mp'
),
window_dim_counts AS (
SELECT window_id, dim, COUNT(*) AS cnt
FROM vec_dims
GROUP BY window_id, dim
),
dominant AS (
SELECT DISTINCT ON (window_id) window_id, dim, cnt
FROM window_dim_counts
ORDER BY window_id, cnt DESC, dim DESC
)
SELECT window_id
FROM dominant
WHERE dim >= 25 AND cnt >= 10
ORDER BY window_id
"""
).fetchall()
return [r[0] for r in rows]
except Exception:
logger.exception("Failed to query uniform-dim windows")
return []
finally:
con.close()
def _should_swap_axes(axis_def: dict) -> bool:
"""Return True if the Y axis is 'Links–Rechts' and the X axis is not.
When true, caller should swap x/y positions and metadata so left-right
is conventionally on the horizontal axis.
"""
lr = "Links\u2013Rechts"
return axis_def.get("y_label") == lr and axis_def.get("x_label") != lr
def _swap_axes(
positions_by_window: dict,
axis_def: dict,
) -> tuple:
"""Swap x and y in all positions and axis metadata.
Pure function — returns (new_positions_by_window, new_axis_def).
"""
new_positions: dict = {}
for wid, pos_dict in positions_by_window.items():
new_positions[wid] = {ent: (y, x) for ent, (x, y) in pos_dict.items()}
new_ax = dict(axis_def)
# Non-paired keys pass through unchanged
# Swap paired scalar keys
new_ax["x_label"] = axis_def.get("y_label")
new_ax["y_label"] = axis_def.get("x_label")
# Swap paired dict keys
for x_key, y_key in [
("x_quality", "y_quality"),
("x_interpretation", "y_interpretation"),
("x_top_motions", "y_top_motions"),
("x_label_confidence", "y_label_confidence"),
("x_axis", "y_axis"),
]:
new_ax[x_key] = axis_def.get(y_key)
new_ax[y_key] = axis_def.get(x_key)
return new_positions, new_ax
def _render_axis_motions(label: str, conf_pct: str, top: dict) -> None:
st.markdown(f"**{label}**{conf_pct}")
for sign, icon in (("+", ""), ("-", "")):
titles = top.get(sign, [])
if titles:
st.markdown(
"&nbsp;&nbsp;"
+ icon
+ " "
+ " · ".join(f"{t} ({d})" for t, d in titles[:3])
)
@st.cache_data(show_spinner="2D posities berekenen (kan even duren)…")
def load_positions(
db_path: str, window_size: str = "quarterly"
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
"""Compute 2D positions per window using PCA on aligned SVD vectors.
Returns:
positions_by_window: {window_id: {entity_name: (x, y)}}
axis_def: dict with x_axis, y_axis, method keys
"""
from analysis.political_axis import compute_2d_axes
# Always compute PCA on ALL uniform-dim windows (quarterly + annual) so that
# the principal components are determined by the full temporal spread of data.
# Using only annual windows (11) causes PC1 to capture cross-temporal drift
# instead of left-right ideology, resulting in a ~90° rotation.
all_available = get_uniform_dim_windows(db_path)
if not all_available:
return {}, {}
positions_by_window, axis_def = compute_2d_axes(
db_path,
window_ids=all_available,
method="pca",
pca_residual=True,
normalize_vectors=True,
)
try:
from analysis.axis_classifier import classify_axes
axis_def = classify_axes(positions_by_window, axis_def, db_path)
except Exception:
import logging
logging.getLogger(__name__).exception(
"classify_axes failed; using generic axis labels"
)
if _should_swap_axes(axis_def):
positions_by_window, axis_def = _swap_axes(positions_by_window, axis_def)
# Filter displayed windows by window_size AFTER PCA computation.
if window_size == "annual":
annual_keys = set(w for w in all_available if "-Q" not in w)
positions_by_window = {
w: v for w, v in positions_by_window.items() if w in annual_keys
}
return positions_by_window, axis_def
@st.cache_data(show_spinner="Partijkaart laden…")
def load_party_map(db_path: str) -> Dict[str, str]:
"""Return {mp_name: party} mapping, with party names normalised to abbreviations."""
from analysis.visualize import _load_party_map
_PARTY_ALIASES: Dict[str, str] = {
"Nieuw Sociaal Contract": "NSC",
}
try:
raw = _load_party_map(db_path)
return {mp: _PARTY_ALIASES.get(party, party) for mp, party in raw.items()}
except Exception:
logger.exception("Failed to load party map")
return {}
@st.cache_data(show_spinner="Actieve Kamerleden laden…")
def load_active_mps(db_path: str) -> set:
"""Return the set of mp_name values that are currently seated in parliament.
An MP is considered active if their mp_metadata row has tot_en_met IS NULL,
meaning they have no recorded end date for their current seat.
"""
try:
con = duckdb.connect(database=db_path, read_only=True)
rows = con.execute(
"SELECT mp_name FROM mp_metadata WHERE tot_en_met IS NULL"
).fetchall()
con.close()
return {r[0] for r in rows}
except Exception:
logger.exception("Failed to load active MPs")
return set()
def compute_party_discipline(
db_path: str,
start_date: str,
end_date: str,
) -> pd.DataFrame:
"""Compute per-party voting discipline (Rice index) for roll-call votes in a date range.
Only individual MP vote rows are used (mp_name LIKE '%,%').
Returns a DataFrame with columns [party, n_motions, discipline] sorted by discipline ascending.
Returns an empty DataFrame if fewer than 1 qualifying motion exists or on any DB error.
Rice index per motion per party = fraction of party MPs voting with the party majority.
The per-party score is the average Rice index across all motions in the date range.
Only 'voor' and 'tegen' votes are counted; absent and abstaining MPs are excluded from the
Rice index calculation.
"""
conn = None
try:
conn = duckdb.connect(db_path, read_only=True)
result = conn.execute(
"""
WITH individual_votes AS (
SELECT
motion_id,
party,
LOWER(vote) AS vote
FROM mp_votes
WHERE mp_name LIKE '%,%'
AND date >= CAST(? AS DATE)
AND date <= CAST(? AS DATE)
AND vote IN ('voor', 'tegen')
),
vote_counts AS (
SELECT
motion_id,
party,
vote,
COUNT(*) AS cnt
FROM individual_votes
GROUP BY motion_id, party, vote
),
majority_vote AS (
SELECT
motion_id,
party,
FIRST(vote ORDER BY cnt DESC, vote ASC) AS maj_vote,
SUM(cnt) AS total_mp_votes
FROM vote_counts
GROUP BY motion_id, party
),
rice_per_motion AS (
SELECT
mv.motion_id,
mv.party,
SUM(CASE WHEN vc.vote = mv.maj_vote THEN vc.cnt ELSE 0 END)
* 1.0 / mv.total_mp_votes AS rice
FROM majority_vote mv
JOIN vote_counts vc
ON mv.motion_id = vc.motion_id AND mv.party = vc.party
GROUP BY mv.motion_id, mv.party, mv.total_mp_votes
)
SELECT
party,
COUNT(DISTINCT motion_id) AS n_motions,
AVG(rice) AS discipline
FROM rice_per_motion
GROUP BY party
ORDER BY discipline ASC
""",
[start_date, end_date],
).fetchdf()
return result
except Exception as exc:
logger.warning("compute_party_discipline failed: %s", exc)
return pd.DataFrame(columns=["party", "n_motions", "discipline"])
finally:
if conn is not None:
try:
conn.close()
except Exception:
pass
def _load_mp_vectors_by_party(db_path: str) -> Dict[str, List[np.ndarray]]:
"""Load individual MP SVD vectors grouped by party.
Queries mp_metadata for the mp→party mapping (latest assignment during the
current parliament), normalises party names, loads SVD vectors from the
``current_parliament`` window, and filters to CURRENT_PARLIAMENT_PARTIES.
Returns:
{party_name: [np.ndarray(50,), ...]} — one array per MP.
"""
con = duckdb.connect(database=db_path, read_only=True)
try:
# Build mp → party mapping. ORDER BY van ASC so latest assignment wins
# via last-write-wins when an MP switched party.
meta_rows = con.execute(
"SELECT mp_name, party FROM mp_metadata "
"WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' "
"ORDER BY van ASC"
).fetchall()
mp_party: Dict[str, str] = {}
for mp_name, party in meta_rows:
if mp_name and party:
mp_party[mp_name] = _PARTY_NORMALIZE.get(party, party)
# Individual MP vectors from current_parliament
rows = con.execute(
"SELECT entity_id, vector FROM svd_vectors "
"WHERE entity_type='mp' AND window_id='current_parliament'"
).fetchall()
party_vecs: Dict[str, List[np.ndarray]] = {}
for entity_id, raw_vec in rows:
party = mp_party.get(entity_id)
if party is None or party not in CURRENT_PARLIAMENT_PARTIES:
continue
if isinstance(raw_vec, str):
vec = json.loads(raw_vec)
elif isinstance(raw_vec, (bytes, bytearray)):
vec = json.loads(raw_vec.decode())
elif isinstance(raw_vec, list):
vec = raw_vec
else:
try:
vec = list(raw_vec)
except Exception:
continue
fvec = np.array([float(v) if v is not None else 0.0 for v in vec])
party_vecs.setdefault(party, []).append(fvec)
return party_vecs
finally:
try:
con.close()
except Exception:
pass
@st.cache_data(show_spinner="Partijposities op SVD-assen laden…")
def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
"""Return per-party SVD vectors, computed as mean of individual MP vectors.
Loads individual MP rows from window='current_parliament', assigns each MP
their party, then averages SVD vectors per party.
Returns:
{party_name: [float * k]} — k = 50, mean over all MPs in that party.
"""
try:
party_vecs = _load_mp_vectors_by_party(db_path)
return {
party: np.array(vecs).mean(axis=0).tolist()
for party, vecs in party_vecs.items()
}
except Exception:
logger.exception("Failed to load party axis scores")
return {}
@st.cache_data(show_spinner="Partij-MP vectoren laden…")
def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]:
"""Return per-party lists of individual MP SVD vectors.
Same MP→party mapping as load_party_axis_scores(), suitable for bootstrap
CI computation.
Returns:
{party_name: [np.ndarray(50,), ...]} — one array per MP.
"""
try:
return _load_mp_vectors_by_party(db_path)
except Exception:
logger.exception("Failed to load party MP vectors")
return {}
@st.cache_data(show_spinner="Bootstrap CI berekenen…")
def _cached_bootstrap_cis(
party_mp_vectors: Dict[str, List[np.ndarray]],
) -> Dict[str, Dict]:
"""Thin caching wrapper around compute_party_bootstrap_cis."""
from analysis.political_axis import compute_party_bootstrap_cis
return compute_party_bootstrap_cis(party_mp_vectors)
@st.cache_data(show_spinner="Scree-plot laden…")
def load_scree_data(db_path: str) -> List[float]:
"""Return explained variance ratios (%) for all SVD components, sorted descending.
Uses the same Procrustes-aligned multi-window matrix as the compass axes so the
scree plot is consistent with what the compass actually uses.
"""
try:
from analysis.political_axis import compute_svd_spectrum
return compute_svd_spectrum(db_path)
except Exception:
logger.exception("Failed to load scree data")
return []
def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
"""Render a scree plot showing relative SVD component importance.
Highlighted bars for the top-2 components (used in the compass); muted bars
for the rest. A cumulative-variance dashed line on the same y-axis helps
spot the elbow. A 50 % cumulative threshold line is drawn for reference.
Args:
importances: List of importance values sorted descending (from load_scree_data).
n_show: How many components to display (default: first 15).
"""
if not importances:
return
# importances are already EVR percentages summing to ~100 over all components.
# Slice to n_show for display; cumulative line shows how much variance is covered.
data = list(importances[:n_show])
ranks = list(range(1, len(data) + 1))
# Cumulative variance for the dashed overlay line
cumsum = []
running = 0.0
for v in data:
running += v
cumsum.append(running)
# Colour: first 2 bars highlighted (compass axes), rest muted
n_highlight = 2
bar_colours = [
"#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data))
]
fig = go.Figure()
# Bars
fig.add_trace(
go.Bar(
x=ranks,
y=data,
marker_color=bar_colours,
hovertemplate="As %{x}<br><b>%{y:.1f}%</b> verklaarde variantie<extra></extra>",
showlegend=False,
)
)
# Cumulative variance line (dashed, warm amber)
fig.add_trace(
go.Scatter(
x=ranks,
y=cumsum,
mode="lines+markers",
line={"color": "#F57C00", "width": 2, "dash": "dot"},
marker={"size": 5, "color": "#F57C00"},
hovertemplate="As %{x}<br>Cumulatief: <b>%{y:.1f}%</b><extra></extra>",
name="Cumulatief",
showlegend=True,
)
)
# 50 % reference line
fig.add_hline(
y=50,
line_dash="dash",
line_color="#BDBDBD",
line_width=1,
annotation_text="50%",
annotation_position="right",
annotation_font_color="#9E9E9E",
annotation_font_size=11,
)
# Annotations on the top-2 bars showing their % value
for i in range(min(n_highlight, len(data))):
fig.add_annotation(
x=ranks[i],
y=data[i] + 0.3,
text=f"{data[i]:.1f}%",
showarrow=False,
font={"size": 11, "color": "#1565C0"},
yanchor="bottom",
)
fig.update_layout(
height=280,
margin={"l": 10, "r": 50, "t": 30, "b": 40},
title={
"text": "Belang per SVD-as",
"font": {"size": 13, "color": "#555555"},
"x": 0.02,
"xanchor": "left",
},
legend={
"orientation": "h",
"x": 0.5,
"xanchor": "center",
"y": 1.08,
"font": {"size": 11},
},
xaxis={
"title": {"text": "As (rang)", "font": {"size": 11}},
"tickmode": "linear",
"tick0": 1,
"dtick": 1,
"showline": False,
"showgrid": False,
},
yaxis={
"title": {"text": "% van totale variantie", "font": {"size": 11}},
"showline": False,
"showgrid": True,
"gridcolor": "#eeeeee",
"ticksuffix": "%",
"range": [0, max(cumsum) * 1.08],
},
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
bargap=0.25,
)
st.plotly_chart(fig, use_container_width=True)
def _build_party_axis_figure(
party_scores: Dict[str, List[float]],
comp_sel: int,
theme: dict,
bootstrap_data: Optional[Dict[str, Dict]] = None,
) -> Optional[go.Figure]:
"""Build a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
Pure function that returns a go.Figure (no Streamlit calls).
Args:
party_scores: {party_name: [float*k]} — mean SVD vectors per party.
comp_sel: 1-indexed SVD axis number.
theme: dict with keys label, explanation, positive_pole, negative_pole, flip.
bootstrap_data: optional output from compute_party_bootstrap_cis —
{party: {centroid, ci_lower, ci_upper, std, n_mps}}.
Returns:
go.Figure, or None if no data available.
"""
if not party_scores:
return None
axis_idx = comp_sel - 1 # 0-based index into the 50-dim vector
flip = theme.get("flip", False)
data: list[dict] = []
for party, vec in party_scores.items():
if axis_idx < len(vec):
score = vec[axis_idx]
if flip:
score = -score
data.append({"party": party, "score": score})
if not data:
return None
scores = [d["score"] for d in data]
parties = [d["party"] for d in data]
colours = [PARTY_COLOURS.get(p, "#9E9E9E") for p in parties]
# Build hover text: include N when bootstrap data available
if bootstrap_data:
hover = []
for p, s in zip(parties, scores):
bd = bootstrap_data.get(p)
n_mps = bd["n_mps"] if bd else "?"
hover.append(f"{p}: {s:.3f} (N={n_mps})")
else:
hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
# Determine axis labels: left = progressive pole, right = conservative pole
pos_pole = theme.get("positive_pole", "")
neg_pole = theme.get("negative_pole", "")
left_label = pos_pole if flip else neg_pole
right_label = neg_pole if flip else pos_pole
fig = go.Figure()
# Baseline
x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
if x_min == x_max:
x_min, x_max = x_min - 1, x_max + 1
fig.add_trace(
go.Scatter(
x=[x_min, x_max],
y=[0, 0],
mode="lines",
line={"color": "#cccccc", "width": 1},
hoverinfo="skip",
showlegend=False,
)
)
# Build marker kwargs — bootstrap data adds error bars and diamond markers
marker_kwargs: dict = {"size": 18, "color": colours}
error_x_kwargs: Optional[dict] = None
if bootstrap_data:
error_array = []
symbols = []
for p in parties:
bd = bootstrap_data.get(p)
if bd:
err = (bd["ci_upper"][axis_idx] - bd["ci_lower"][axis_idx]) / 2
error_array.append(abs(float(err)))
symbols.append("diamond" if bd["n_mps"] == 1 else "circle")
else:
error_array.append(0.0)
symbols.append("circle")
marker_kwargs["symbol"] = symbols
error_x_kwargs = {"type": "data", "array": error_array, "visible": True}
# Party markers
scatter_kwargs: dict = {
"x": scores,
"y": [0] * len(scores),
"mode": "markers+text",
"text": parties,
"textposition": "top center",
"marker": marker_kwargs,
"hovertext": hover,
"hoverinfo": "text",
"showlegend": False,
}
if error_x_kwargs is not None:
scatter_kwargs["error_x"] = error_x_kwargs
fig.add_trace(go.Scatter(**scatter_kwargs))
fig.update_layout(
height=160,
margin={"l": 10, "r": 10, "t": 10, "b": 30},
xaxis={
"title": f"{left_label} | {right_label}",
"showticklabels": False,
"showline": False,
"showgrid": False,
"zeroline": False,
},
yaxis={"visible": False, "range": [-1, 2]},
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
)
return fig
def _render_party_axis_chart(
party_scores: Dict[str, List[float]],
comp_sel: int,
theme: dict,
bootstrap_data: Optional[Dict[str, Dict]] = None,
) -> None:
"""Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
Delegates figure construction to _build_party_axis_figure, then renders via
st.plotly_chart.
"""
fig = _build_party_axis_figure(party_scores, comp_sel, theme, bootstrap_data)
if fig is None:
st.caption("_Partijdata niet beschikbaar voor deze as._")
return
st.plotly_chart(fig, use_container_width=True)
@st.cache_data(show_spinner="Moties laden…")
def load_motions_df(db_path: str) -> pd.DataFrame:
"""Load the full motions table as a pandas DataFrame (read-only)."""
con = duckdb.connect(database=db_path, read_only=True)
try:
df = con.execute(
"""
SELECT id, title, description, date, policy_area,
voting_results, layman_explanation,
winning_margin, controversy_score, url
FROM motions
"""
).fetchdf()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["year"] = df["date"].dt.year
return df
except Exception:
logger.exception("Failed to load motions")
return pd.DataFrame()
finally:
con.close()
def query_similar(
db_path: str,
source_motion_id: int,
vector_type: str = "fused",
top_k: int = 10,
) -> pd.DataFrame:
"""Return top-k similar motions from similarity_cache (read-only)."""
con = duckdb.connect(database=db_path, read_only=True)
try:
rows = con.execute(
"""
SELECT sc.target_motion_id, sc.score, sc.window_id,
m.title, m.date, m.policy_area
FROM similarity_cache sc
JOIN motions m ON m.id = sc.target_motion_id
WHERE sc.source_motion_id = ?
AND sc.vector_type = ?
ORDER BY sc.score DESC
LIMIT ?
""",
[source_motion_id, vector_type, top_k],
).fetchdf()
return rows
except Exception:
logger.exception(
"Failed to query similarity cache for motion %s", source_motion_id
)
return pd.DataFrame()
finally:
con.close()
# ---------------------------------------------------------------------------
# Shared rendering helpers
# ---------------------------------------------------------------------------
def _render_voting_results(voting_results_json) -> None:
"""Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.
The JSON is stored as {party_or_mp: vote} where vote is one of
'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
"""
if not voting_results_json:
return
try:
vdata = (
json.loads(voting_results_json)
if isinstance(voting_results_json, str)
else voting_results_json
)
if not isinstance(vdata, dict) or not vdata:
return
# Group {vote: [actor, ...]}
by_vote: Dict[str, List[str]] = {}
for actor, vote in vdata.items():
vote_str = str(vote).lower().strip()
by_vote.setdefault(vote_str, []).append(str(actor))
# Render in fixed order
vote_order = ["voor", "tegen", "onthouden", "afwezig"]
vote_emoji = {"voor": "", "tegen": "", "onthouden": "🟡", "afwezig": ""}
rows_shown = False
for v in vote_order + [k for k in by_vote if k not in vote_order]:
actors = by_vote.get(v)
if not actors:
continue
emoji = vote_emoji.get(v, "")
st.markdown(
f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
)
rows_shown = True
if not rows_shown:
st.caption("_Geen stemuitslag beschikbaar_")
except Exception:
pass
# ---------------------------------------------------------------------------
# Tab 1: Politiek Kompas
# ---------------------------------------------------------------------------
def _add_y_direction_annotations(fig: go.Figure) -> None:
"""Add ▲ Progressief / ▼ Conservatief labels above and below the Y axis."""
common = dict(
xref="paper",
yref="paper",
x=-0.07,
showarrow=False,
font=dict(size=11, color="#666666"),
)
fig.add_annotation(**common, y=1.02, text="▲ Progressief", xanchor="center")
fig.add_annotation(**common, y=-0.06, text="▼ Conservatief", xanchor="center")
def _window_to_dates(window_id: str) -> tuple[str, str]:
"""Return (start_date, end_date) ISO strings for a given window_id.
Annual windows like '2024' → ('2024-01-01', '2024-12-31').
'current_parliament' → ('2023-11-22', '2099-12-31') (2023 formation date, open end).
Unknown formats → ('2000-01-01', '2099-12-31') (effectively all time).
"""
if window_id == "current_parliament":
return ("2023-11-22", "2099-12-31")
if re.fullmatch(r"\d{4}", window_id):
return (f"{window_id}-01-01", f"{window_id}-12-31")
m = re.fullmatch(r"(\d{4})-Q([1-4])", window_id)
if m:
year, q = int(m.group(1)), int(m.group(2))
starts = {1: "01-01", 2: "04-01", 3: "07-01", 4: "10-01"}
ends = {1: "03-31", 2: "06-30", 3: "09-30", 4: "12-31"}
return (f"{year}-{starts[q]}", f"{year}-{ends[q]}")
return ("2000-01-01", "2099-12-31")
def build_compass_tab(db_path: str, window_size: str) -> None:
st.subheader("Politiek Kompas")
st.markdown(
"2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
)
# Compass always uses annual windows regardless of the sidebar window_size setting.
positions_by_window, axis_def = load_positions(db_path, "annual")
if not positions_by_window:
st.warning(
"Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
)
return
party_map = load_party_map(db_path)
active_mps = load_active_mps(db_path)
# Sort windows: year windows first (ascending), current_parliament last.
year_windows = sorted(w for w in positions_by_window if w != "current_parliament")
has_current = "current_parliament" in positions_by_window
windows = year_windows + (["current_parliament"] if has_current else [])
# Motion counts per year — sparse years get a warning label.
_SPARSE_YEARS = {"2016", "2017", "2018"}
_THRESHOLD = 0.65
def _window_label(w: str) -> str:
if w == "current_parliament":
return "Huidig parlement"
if w in _SPARSE_YEARS:
return f"{w}"
return w
col1, col2 = st.columns([3, 1])
with col2:
window_idx = st.selectbox(
"Jaar",
options=windows,
index=len(windows) - 1, # default: current_parliament
format_func=_window_label,
)
level = st.radio(
"Weergave",
options=["Kamerleden", "Partijen"],
index=0,
horizontal=True,
)
min_mps = st.number_input(
"Min. Kamerleden per partij",
min_value=1,
max_value=20,
value=3,
step=1,
help="Partijen met minder dan dit aantal zetels worden niet weergegeven.",
)
pos = positions_by_window.get(window_idx, {})
if not pos:
st.info(f"Geen data voor venster {window_idx}")
return
# For current_parliament, restrict to MPs who are still seated (tot_en_met IS NULL).
# Historical windows include all MPs active at the time — no restriction needed.
if window_idx == "current_parliament":
pos = {mp: xy for mp, xy in pos.items() if mp in active_mps}
# Deduplicate MPs whose names appear both with and without a parenthetical first name,
# e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and
# average positions if both variants are present.
def _strip_paren(name: str) -> str:
return re.sub(r"\s*\([^)]*\)", "", name).strip()
deduped: Dict[str, Tuple[float, float]] = {}
for name, (x, y) in pos.items():
base = _strip_paren(name)
if base in deduped:
ox, oy = deduped[base]
deduped[base] = ((ox + x) / 2, (oy + y) / 2)
else:
deduped[base] = (x, y)
pos = deduped
rows = []
for name, (x, y) in pos.items():
party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown")
rows.append({"name": name, "x": x, "y": y, "party": party})
df_pos = pd.DataFrame(rows)
# Drop parties below the minimum MP threshold (unreliable centroids).
party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts()
valid_parties = set(party_counts[party_counts >= min_mps].index)
df_pos = df_pos[df_pos["party"].isin(valid_parties)]
if df_pos.empty:
st.info("Geen partijen met genoeg Kamerleden voor dit venster.")
return
_x_label = axis_def.get("x_label", "Links\u2013Rechts")
_y_label = axis_def.get("y_label", "Progressief\u2013Conservatief")
if level == "Partijen":
# Aggregate to party centroids
df_party = df_pos.groupby("party", as_index=False).agg(
x=("x", "mean"), y=("y", "mean"), n=("name", "count")
)
df_party["name"] = df_party["party"]
colour_map = {
p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique()
}
fig = px.scatter(
df_party,
x="x",
y="y",
color="party",
text="party",
hover_name="party",
hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True},
color_discrete_map=colour_map,
title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",
labels={
"x": _x_label,
"y": _y_label,
"n": "Kamerleden",
},
)
fig.update_traces(textposition="top center", marker_size=14)
else:
colour_map = {
p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()
}
fig = px.scatter(
df_pos,
x="x",
y="y",
color="party",
hover_name="name",
hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
color_discrete_map=colour_map,
title=f"Politiek Kompas — {_window_label(window_idx)}",
labels={"x": _x_label, "y": _y_label},
)
fig.update_layout(
height=600,
legend_title_text="Partij",
xaxis={"range": [-1, 1]},
yaxis={"range": [-0.6, 0.6]},
)
_add_y_direction_annotations(fig)
with col1:
st.plotly_chart(fig, use_container_width=True)
_x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "")
_y_interp = axis_def.get("y_interpretation", {}).get(window_idx, "")
if (
_x_interp
and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD
):
st.caption(_x_interp)
if (
_y_interp
and axis_def.get("y_quality", {}).get(window_idx, 1.0) < _THRESHOLD
):
st.caption(_y_interp)
# Motion expander — show which motions define each axis for this window
x_top = axis_def.get("x_top_motions", {}).get(window_idx, {})
y_top = axis_def.get("y_top_motions", {}).get(window_idx, {})
x_conf = axis_def.get("x_label_confidence", {}).get(window_idx)
y_conf = axis_def.get("y_label_confidence", {}).get(window_idx)
evr = axis_def.get("explained_variance_ratio", [None, None])
evr0 = evr[0] if evr else None
_has_motion_data = bool(
x_top.get("+") or x_top.get("-") or y_top.get("+") or y_top.get("-")
)
if _has_motion_data:
with st.expander("🔍 Wat bepaalt deze assen?"):
x_conf_pct = (
f" (vertrouwen: {x_conf:.0%})" if x_conf is not None else ""
)
y_conf_pct = (
f" (vertrouwen: {y_conf:.0%})" if y_conf is not None else ""
)
_render_axis_motions(f"Horizontale as: {_x_label}", x_conf_pct, x_top)
_render_axis_motions(f"Verticale as: {_y_label}", y_conf_pct, y_top)
if evr0 is not None:
st.caption(
f"De sterkste component verklaart {evr0:.1%} van de variantie in stemgedrag."
)
# --- Voting discipline section ---
_MIN_MOTIONS_FOR_DISCIPLINE = 5
start_date, end_date = _window_to_dates(window_idx)
disc_df = compute_party_discipline(db_path, start_date, end_date)
st.subheader("Stemgedrag cohesie")
if disc_df.empty:
st.caption(
"Te weinig hoofdelijke stemmingen in dit venster voor een cohesieanalyse."
)
else:
disc_df = disc_df[disc_df["n_motions"] >= _MIN_MOTIONS_FOR_DISCIPLINE].copy()
if disc_df.empty:
st.caption(
"Te weinig hoofdelijke stemmingen in dit venster voor een cohesieanalyse."
)
else:
compass_parties = set(df_pos["party"].unique())
disc_df = disc_df[disc_df["party"].isin(compass_parties)].copy()
if disc_df.empty:
st.caption("Geen overlappende partijen tussen kompas en stemmingsdata.")
else:
disc_df["discipline_pct"] = (disc_df["discipline"] * 100).round(1)
disc_df["party_label"] = disc_df.apply(
lambda r: f"{r['party']} ({int(r['n_motions'])} moties)", axis=1
)
bar_fig = px.bar(
disc_df.sort_values("discipline"),
x="discipline_pct",
y="party_label",
orientation="h",
color="discipline_pct",
color_continuous_scale="RdYlGn",
range_color=[80, 100],
labels={"discipline_pct": "Cohesie (%)", "party_label": "Partij"},
title="Cohesie bij hoofdelijke stemmingen",
)
bar_fig.update_layout(
height=max(300, len(disc_df) * 35 + 80),
showlegend=False,
coloraxis_showscale=False,
yaxis_title="",
)
st.plotly_chart(bar_fig, use_container_width=True)
top3 = disc_df.nlargest(3, "discipline")[
["party", "discipline_pct", "n_motions"]
]
bot3 = disc_df.nsmallest(3, "discipline")[
["party", "discipline_pct", "n_motions"]
]
col_a, col_b = st.columns(2)
with col_a:
st.markdown("**Meest eensgezind**")
st.dataframe(
top3.rename(
columns={
"party": "Partij",
"discipline_pct": "Cohesie (%)",
"n_motions": "Moties",
}
),
hide_index=True,
use_container_width=True,
)
with col_b:
st.markdown("**Meest verdeeld**")
st.dataframe(
bot3.rename(
columns={
"party": "Partij",
"discipline_pct": "Cohesie (%)",
"n_motions": "Moties",
}
),
hide_index=True,
use_container_width=True,
)
# ---------------------------------------------------------------------------
# Tab 2: Partij Trajectories
# ---------------------------------------------------------------------------
def build_trajectories_tab(db_path: str, window_size: str) -> None:
st.subheader("Partij Trajectories")
st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")
positions_by_window, axis_def = load_positions(db_path, window_size)
if not positions_by_window:
st.warning("Geen positiedata beschikbaar.")
return
party_map = load_party_map(db_path)
windows = sorted(positions_by_window.keys())
# Compute party centroids per window
centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
all_parties: set = set()
for wid in windows:
pos = positions_by_window.get(wid, {})
per_party: Dict[str, List[Tuple[float, float]]] = {}
for mp_name, (x, y) in pos.items():
party = party_map.get(mp_name, "Unknown")
if party == "Unknown":
continue
per_party.setdefault(party, []).append((x, y))
for party, coords in per_party.items():
all_parties.add(party)
xs = [c[0] for c in coords]
ys = [c[1] for c in coords]
centroids.setdefault(party, {})[wid] = (
float(np.mean(xs)),
float(np.mean(ys)),
)
all_parties_sorted = sorted(all_parties)
# Default: show CDA, D66, VVD — the three parties that span the political centre
default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties]
if not default_parties:
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
if not default_parties:
default_parties = all_parties_sorted[:6]
selected_parties = st.multiselect(
"Selecteer partijen",
options=all_parties_sorted,
default=default_parties,
)
fig = go.Figure()
for party in selected_parties:
if party not in centroids:
continue
wids_sorted = sorted(centroids[party].keys())
xs = [centroids[party][w][0] for w in wids_sorted]
ys = [centroids[party][w][1] for w in wids_sorted]
colour = PARTY_COLOURS.get(party, "#9E9E9E")
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=party,
text=wids_sorted, # full window ID for hover
line=dict(color=colour, shape="spline", smoothing=1.3),
marker=dict(color=colour, size=8),
hovertemplate=(
f"<b>{party}</b><br>"
"venster: %{text}<br>"
"x: %{x:.3f}<br>y: %{y:.3f}<extra></extra>"
),
)
)
fig.update_layout(
title="Partij trajectories",
xaxis_title=axis_def.get("x_label", "Links\u2013Rechts"),
yaxis_title=axis_def.get("y_label", "Progressief\u2013Conservatief"),
height=600,
legend_title_text="Partij",
)
_add_y_direction_annotations(fig)
st.plotly_chart(fig, use_container_width=True)
# ---------------------------------------------------------------------------
# Tab 3: Motie Zoeken
# ---------------------------------------------------------------------------
def build_search_tab(db_path: str, show_rejected: bool) -> None:
st.subheader("Motie Zoeken")
df = load_motions_df(db_path)
if df.empty:
st.warning("Geen moties beschikbaar.")
return
if not show_rejected:
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
# Controls
col1, col2, col3 = st.columns([2, 1, 1])
with col1:
query = st.text_input(
"Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
)
with col2:
years = sorted(df["year"].dropna().astype(int).unique().tolist())
if years:
year_range = st.select_slider(
"Jaar", options=years, value=(years[0], years[-1])
)
else:
year_range = (2019, 2024)
with col3:
min_controversy = st.slider(
"Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
)
# Apply filters in-memory
working = df.copy()
working = working[
(working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
]
if min_controversy > 0:
working = working[working["controversy_score"] >= min_controversy]
if query:
q = query.lower()
mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
working = working[mask]
working = working.sort_values(by="controversy_score", ascending=False)
st.caption(f"{len(working)} resultaten (top 50 getoond)")
for _, row in working.head(50).iterrows():
title = row.get("title") or f"Motie #{row['id']}"
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
controversy = row.get("controversy_score") or 0
with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
cols = st.columns(3)
cols[0].metric("Controverse", f"{controversy:.2f}")
cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")
# Voting breakdown
_render_voting_results(row.get("voting_results"))
# Link to original motion
url = row.get("url")
if url and str(url).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
# Similar motions
sim = query_similar(db_path, int(row["id"]), top_k=5)
if not sim.empty:
st.markdown("**Vergelijkbare moties:**")
for _, s in sim.iterrows():
s_date = (
pd.to_datetime(s["date"]).strftime("%Y")
if pd.notna(s.get("date"))
else ""
)
st.markdown(
f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
)
else:
st.caption("_Nog geen vergelijkbare moties beschikbaar_")
# ---------------------------------------------------------------------------
# Tab 4: Motie Browser
# ---------------------------------------------------------------------------
def build_browser_tab(db_path: str, show_rejected: bool) -> None:
st.subheader("Motie Browser")
df = load_motions_df(db_path)
if df.empty:
st.warning("Geen moties beschikbaar.")
return
if not show_rejected:
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
# Controls
col1, col2, col3 = st.columns(3)
with col1:
years = sorted(df["year"].dropna().astype(int).unique().tolist())
year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
with col2:
min_controversy_b = st.slider(
"Min. controverse",
min_value=0.0,
max_value=1.0,
value=0.0,
step=0.05,
key="browser_controversy",
)
with col3:
sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])
# Filter
working = df.copy()
if year_filter != "(Alle)":
working = working[working["year"] == int(year_filter)]
if min_controversy_b > 0:
working = working[working["controversy_score"] >= min_controversy_b]
sort_map = {
"Datum (nieuw)": ("date", False),
"Controverse": ("controversy_score", False),
"Marge": ("winning_margin", True),
}
sort_col, sort_asc = sort_map[sort_by]
working = working.sort_values(by=sort_col, ascending=sort_asc)
# Display table
display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
available_display = [c for c in display_cols if c in working.columns]
st.dataframe(
working[available_display].reset_index(drop=True),
use_container_width=True,
height=350,
)
st.divider()
# Detail panel
st.markdown("**Detail weergave** — vul een motie-ID in:")
sel_id = st.number_input(
"Motie ID",
min_value=int(working["id"].min()) if not working.empty else 1,
max_value=int(working["id"].max()) if not working.empty else 99999,
value=int(working["id"].iloc[0]) if not working.empty else 1,
step=1,
)
motion_row = df[df["id"] == sel_id]
if not motion_row.empty:
row = motion_row.iloc[0]
st.markdown(f"### {row.get('title') or 'Onbekend'}")
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
st.caption(
f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
)
# Link to original source
url = row.get("url")
if url and str(url).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
# Voting breakdown
st.markdown("**Stemuitslag:**")
_render_voting_results(row.get("voting_results"))
# Similar motions
sim = query_similar(db_path, int(sel_id), top_k=10)
if not sim.empty:
st.markdown("**Vergelijkbare moties:**")
st.dataframe(
sim[["title", "score", "date", "policy_area"]],
use_container_width=True,
)
else:
st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")
def build_svd_components_tab(db_path: str) -> None:
"""New tab: show top motions contributing to top SVD components.
Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
for components 1..10 with theme labels/explanations and a detail pane per motion.
"""
# Political polarisation themes per SVD component (1-indexed, window=2025)
# Produced by per-axis analysis of all 10 unique top motions (zero cross-axis overlap).
SVD_THEMES: dict[int, dict[str, str]] = {
1: {
"label": "Links-rechts hoofdas",
"explanation": (
"De dominante dimensie van het parlement: de klassieke links-rechts tegenstelling "
"die het meeste verschil in stemgedrag verklaart. Aan de rechterkant (PVV, SGP, VVD, "
"ChristenUnie) staan moties over defensie-uitbreiding, NAVO-verplichtingen, "
"juridische ruimte voor drones en gaswinning. Aan de linkerkant (PvdD, SP, DENK, "
"GroenLinks-PvdA) staan moties over huurverlaging, het veroordelen van "
"antipersoneelslandmijnen, het opzeggen van het militaire verdrag met Israël en het "
"oprichten van zorgbuurthuizen. De scheidslijn loopt dwars door thema's als "
"veiligheid, economie, internationaal recht en sociale bescherming."
),
"positive_pole": "Nationalistisch-conservatief: PVV, SGP, VVD, ChristenUnie",
"negative_pole": "Progressief-links: PvdD, SP, DENK, GroenLinks-PvdA",
"flip": False,
},
2: {
"label": "Populistisch nationalisme versus institutioneel progressivisme",
"explanation": (
"Deze as scheidt het populistisch-nationalistische bloc (PVV, FVD, Groep Markuszower, "
"BBB) van het volledige overige parlement. Alleen PVV (+18), FVD (+4) en Groep "
"Markuszower (+2) scoren positief; alle andere partijen scoren negatief, inclusief "
"VVD (−15), CDA (−14), SGP (−25) en ChristenUnie (−59). Positieve moties: artsen "
"vrijpleiten voor hydroxychloroquine/ivermectine, Syriërs terugsturen, geen geld "
"aan Jordanië, tijdelijke bescherming Oekraïne beëindigen. Negatieve moties: "
"digitale toegankelijkheid Caribisch Nederland, ethiekprogramma Defensie, zorg voor "
"slachtoffers bombardement Hawija, zorgkwaliteitsstandaarden. Dit is geen links-rechts "
"verdeling maar een nativistisch-populistisch vs. institutioneel onderscheid."
),
"positive_pole": "Populistisch-nationalistisch: PVV, FVD, Groep Markuszower, BBB",
"negative_pole": "Institutioneel: alle overige partijen — van VVD en SGP tot GroenLinks-PvdA en Volt",
"flip": False,
},
3: {
"label": "Verzorgingsstaat versus bezuinigingen en marktwerking",
"explanation": (
"Deze as weerspiegelt de spanning tussen staatsingrijpen en marktliberalisme, "
"aangescherpt door de kabinetscrisis van 2025. Aan de positieve kant staan moties "
"die bezuinigingen op zorg en het gemeentefonds willen terugdraaien, winstuitkeringen "
"in de zorg verbieden en publieke controle over ziekenhuisfusies eisen. SP, PvdD, "
"GroenLinks-PvdA en PVV stemmen hier gelijk — ondanks hun tegengestelde PC1-posities. "
"Aan de negatieve kant staan moties "
"over marktwerking in de zorg, fiscale bedrijfsopvolgingsfaciliteiten (VVD), "
"doorgaan met besturen ondanks de kabinetscrisis (VVD/Yeşilgöz) en defensie-"
"uitgaven van 3,5% bbp."
),
"positive_pole": "Pro-verzorgingsstaat: SP, PvdD, GroenLinks-PvdA, PVV (anti-bezuinigingen)",
"negative_pole": "Marktliberaal en fiscaal conservatief: VVD, D66, CDA, SGP",
"flip": True,
},
4: {
"label": "Pragmatisch centrisme versus ideologische radicaliteit",
"explanation": (
"De gevestigde centrumpartijen (D66, CDA, VVD, 50PLUS) staan tegenover zowel "
"rechts-radicale als identiteitspolitieke posities. Aan de positieve kant staan "
"moties over openbare toiletten, vaderbetrokkenheid bij opvoeding, internationale "
"samenwerking met Australië en Canada, en long covid-expertise. Dit zijn pragmatische, "
"institutionele beleidsposities. Aan de negatieve kant staan moties over een "
"migratiesaldo-cap van 60.000, het verlaten van de WHO, kinderen in pleeggezinnen "
"van hetzelfde geslacht (FVD) en de bescherming van religieuze schoolidentiteit "
"via artikel 23. De negatieve pool combineert populistisch-rechts met "
"identiteitsgerichte posities van zowel rechts als links."
),
"positive_pole": "Constructief centrum: D66, CDA, VVD, 50PLUS — pragmatisch en internationaal",
"negative_pole": "Radicaal-ideologisch: FVD, Groep Markuszower (rechts), ChristenUnie, DENK (religieus/identiteit)",
"flip": True,
},
5: {
"label": "Christelijk-sociaal communitarisme",
"explanation": (
"Deze as scheidt partijen die gemeenschapszorg, burgerplicht en informele "
"ondersteuningsstructuren benadrukken van partijen die individuele vrijheden en "
"progressieve maatschappelijke hervorming voorstaan. Aan de positieve kant staan "
"moties over schuldhulpverlening via vrijwilligersorganisaties, de maatschappelijke "
"diensttijd voor jongeren met een afstand tot de arbeidsmarkt, en de gastouderopvang. "
"ChristenUnie, SGP en CDA voeren hier de toon; ook D66 scoort positief door steun "
"aan sociaal beleid. Aan de negatieve kant staan moties over wettelijke erkenning "
"van meerouderschap, abortusrecht in het EU-Handvest, armoedebeleid en "
"buitenlandse beïnvloeding. PvdD, GroenLinks-PvdA en VVD scoren hier negatief."
),
"positive_pole": "Gemeenschapsgericht: ChristenUnie, SGP, CDA, D66 — vrijwilligers, diensttijd, zorgsystemen",
"negative_pole": "Individualistisch-progressief: PvdD, GroenLinks-PvdA, VVD, PVV",
"flip": False,
},
6: {
"label": "Klimaat, energie en culturele integratie",
"explanation": (
"Aan de positieve kant staan moties die LNG-capaciteit prefereren als alternatief "
"voor strenge vulgraadverplichtingen, kernenergie als volwaardig CO₂-arm onderdeel "
"van de energiemix willen erkennen op COP30, en discriminatie- en inclusiemeldpunten "
"willen inventariseren. SGP, JA21, FVD en PVV scoren sterk positief. Aan de "
"negatieve kant staan moties die fossiele-industrie-vertegenwoordigers willen weren "
"van klimaatconferenties, structureel overleg met moslimgemeenschappen willen bij "
"integratiebeleid, en aanvallen van Israël op Libanon veroordelen. "
"PvdD, GroenLinks-PvdA, Volt en D66 scoren negatief. "
"Deze as combineert energieideologie met culturele polarisatie rondom klimaat, "
"integratie en buitenlandspolitiek."
),
"positive_pole": "Pro-fossiel, nationaal energiebeleid: SGP, JA21, FVD, PVV",
"negative_pole": "Klimaatgericht en inclusief: PvdD, GroenLinks-PvdA, Volt, D66",
"flip": False,
},
7: {
"label": "Bestuurlijk pragmatisme en implementatie (indicatief)",
"explanation": (
"Een residuele as die overwegend beleidsdossiers uit 2024 (vorige parlementaire "
"periode) omvat. De scores zijn smal (max ~11 punten) en de partijcombinaties "
"ideologisch divers — dit label is indicatief. Aan de positieve kant staan "
"pragmatische bestuursmoties: een compleet kostenoverzicht van producten van eigen "
"bodem, papieren schoolboeken voor basisvaardigheden, een invoeringstoets voor het "
"minimumloon en de A2-snelwegplanning. ChristenUnie, Volt, DENK en SP scoren "
"positief. Aan de negatieve kant staan meer ideologisch geladen moties: een "
"landelijk stookverbod (PvdD), het strafbaar stellen van verbranding van religieuze "
"geschriften (DENK), chroom-6 schadevergoedingen en tegenhouden van nieuwe "
"gaswinning. GroenLinks-PvdA, VVD, FVD en JA21 scoren negatief."
),
"positive_pole": "Praktisch-bestuurlijk: ChristenUnie, Volt, SGP, DENK, SP",
"negative_pole": "Ideologisch-principieel: GroenLinks-PvdA, VVD, FVD, JA21",
"flip": True,
},
8: {
"label": "Europese defensie-integratie (indicatief)",
"explanation": (
"Aan de positieve kant staan moties die pleiten voor militaire mobiliteit als "
"topprioriteit in EU/NAVO-verband en toewerken naar een militair Schengengebied, "
"35% van defensiematerieel Europees inkopen en een Europees defensie-R&D-instituut "
"oprichten. Ook het Nationaal Groeifonds en gewasbeschermingsonderzoek vallen "
"positief. Volt en D66 scoren sterk positief. Aan de negatieve kant staan moties "
"over ketenverantwoordelijkheid bij toeslagen (DENK), het coronaoversterfte-onderzoek "
"(PVV/BBB), energiecontracten en huisvestingsregulering. SP (−39), DENK (−35) en "
"PvdD (−26) scoren sterk negatief — dit betekent dat zij actief tégen deze "
"EU-defensiemoties stemmen, niet simpelweg het thema negeren. Volt (N=1) domineert "
"de positieve pool maar is als centroïde van één Kamerlid statistisch onbetrouwbaar."
),
"positive_pole": "Pro-EU defensie en innovatie: Volt, D66",
"negative_pole": "Nationaal/pacifistisch of binnenlandsgericht: SP, DENK, PvdD, 50PLUS",
"flip": False,
},
9: {
"label": "Decentraal bestuur en gemeenschapswaarden (indicatief)",
"explanation": (
"Aan de positieve kant staan moties over naleving van de Financiële-verhoudingswet "
"voor gemeenten, beperking van arbeidsmigratie binnen de EU, een nieuwe "
"tandartsopleiding in Rotterdam, een actieplan tegen misbruik van hallucinerende "
"geneesmiddelen en een oplossing voor milieuproblemen op Bonaire. SGP en "
"ChristenUnie scoren sterk positief; ook DENK en SP. Aan de negatieve kant staan "
"moties over een moratorium op geitenstallen, een verbod op gokadvertenties, "
"verduidelijking van gronden voor voorlopige hechtenis, een leegstandbelasting voor "
"woningen en end-to-end-encryptie. D66, JA21 en PVV scoren negatief. Deze as "
"scheidt een nadruk op decentrale dienstverlening en gemeenschapsregulering van "
"progressieve systeem- en rechtshervorming."
),
"positive_pole": "Lokaal en gemeenschapsgericht: SGP, ChristenUnie, DENK, SP",
"negative_pole": "Progressieve systemen en rechten: D66, JA21, PVV",
"flip": True,
},
10: {
"label": "Institutioneel toezicht en handhaving (indicatief)",
"explanation": (
"De tiende as vangt resterende variantie op en scheidt partijen die sceptisch zijn "
"over staatstoezicht van partijen die strikte regulering en handhaving steunen. "
"Aan de positieve kant staan moties over minder tijdsintensieve schoolinspecties, "
"het recht van toeslagenouders op hun persoonlijk dossier, behoud van de "
"tegemoetkoming voor arbeidsongeschikten en een verlaging van de leeftijdsdrempel "
"voor kindgesprekken. DENK, SP en PvdD scoren positief. Aan de negatieve kant "
"staan moties over een aangifteplicht voor scholen bij veiligheidsincidenten, een "
"rookverbod in auto's met kinderen, braakliggende landbouwgrond en verhoogd "
"beloningsgeld voor tipgevers. GroenLinks-PvdA scoort opvallend sterk negatief, "
"waarmee het zich onderscheidt van SP en DENK op handhavingsthema's."
),
"positive_pole": "Kritisch op overheidstoezicht: DENK, SP, PvdD, Volt — minder inspectielast",
"negative_pole": "Pro-handhaving en regulering: GroenLinks-PvdA, CDA, SGP — veiligheid en naleving",
"flip": True,
},
}
st.subheader("🔬 SVD Assen — politieke polarisatiethema's")
st.markdown(
"Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
"van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
"het spanningsveld dat de as beschrijft."
)
# Scree plot: relative importance of each SVD component
scree_importances = load_scree_data(db_path)
if scree_importances:
st.markdown(
"**Scree-plot** — het relatieve gewicht van elke SVD-as. "
"De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
"latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau."
)
_render_scree_plot(scree_importances)
json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
if not os.path.exists(json_path):
st.warning(
f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
)
return
try:
with open(json_path, "r", encoding="utf-8") as fh:
j = json.load(fh)
except Exception as e:
st.error(f"Failed to load SVD importance JSON: {e}")
return
window = j.get("window")
rows = j.get("rows", [])
if not rows:
st.info("Geen top-moties in dataset")
return
st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")
# Build mapping component -> list of motions (deduplicate by motion_id per component)
comp_map: dict[int, list] = {}
for r in rows:
comp = int(r.get("component", 0))
bucket = comp_map.setdefault(comp, [])
existing_ids = {m.get("motion_id") for m in bucket}
if r.get("motion_id") not in existing_ids:
bucket.append(r)
comp_options = sorted(comp_map.keys())
# Build display labels for selectbox: "As 1 — Regulering vs. status-quo"
def _comp_label(c: int) -> str:
theme = SVD_THEMES.get(c, {})
lbl = theme.get("label", "")
return f"As {c}{lbl}" if lbl else f"As {c}"
comp_display = [_comp_label(c) for c in comp_options]
comp_sel_idx = st.selectbox(
"Selecteer SVD-as",
options=list(range(len(comp_options))),
format_func=lambda i: comp_display[i],
index=0,
)
comp_sel = comp_options[comp_sel_idx]
# Show theme explanation
theme = SVD_THEMES.get(comp_sel, {})
if theme:
st.info(f"**{theme['label']}** — {theme['explanation']}")
motions = comp_map.get(comp_sel, [])
# Party axis chart
party_scores = load_party_axis_scores(db_path)
party_mp_vectors = load_party_mp_vectors(db_path)
bootstrap_data = (
_cached_bootstrap_cis(party_mp_vectors) if party_mp_vectors else None
)
_render_party_axis_chart(
party_scores, comp_sel, theme, bootstrap_data=bootstrap_data
)
# Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
motion_details: Dict[int, tuple] = {}
if motion_ids:
# Defensively convert motion_ids to integers, skipping invalid values
ids_int: List[int] = []
for mid in motion_ids:
try:
ids_int.append(int(mid))
except Exception:
logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
# If no valid ids remain, skip the DB query
if ids_int:
con = None
try:
placeholders = ", ".join("?" for _ in ids_int)
con = duckdb.connect(database=db_path, read_only=True)
db_rows = con.execute(
f"SELECT id, title, date, policy_area, url, body_text, voting_results "
f"FROM motions WHERE id IN ({placeholders})",
ids_int,
).fetchall()
motion_details = {r[0]: r for r in db_rows}
except Exception:
logger.exception("Failed to batch-fetch motion details")
finally:
if con:
con.close()
# Split motions by pole sign
pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
flip = theme.get("flip", False) if theme else False
pos_pole = theme.get("positive_pole", "") if theme else ""
neg_pole = theme.get("negative_pole", "") if theme else ""
# Determine which pole goes left (progressive) and which goes right
if flip:
left_pole, right_pole = pos_pole, neg_pole
left_motions, right_motions = pos_motions, neg_motions
left_arrow, right_arrow = "", ""
else:
left_pole, right_pole = neg_pole, pos_pole
left_motions, right_motions = neg_motions, pos_motions
left_arrow, right_arrow = "", ""
lcol, rcol = st.columns(2)
with lcol:
st.markdown(f"**← {left_pole}**")
for m in left_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{left_arrow} {raw_title}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")
with rcol:
st.markdown(f"**{right_pole} →**")
for m in right_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{right_arrow} {raw_title}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")
def build_mp_quiz_tab(db_path: str) -> None:
"""Interactive quiz: narrow MPs by asking motion vote questions.
Minimal viable flow:
- seed with top-N controversial motions (SEED_MOTIONS)
- present one question at a time, store answers in st.session_state['mp_quiz_votes']
- after each answer call MotionDatabase.match_mps_for_votes to rank MPs
- if multiple candidates remain, call choose_discriminating_motions to pick next question
- stop when unique MP found or no discriminating motions remain
"""
st.subheader("🧑 Welk tweede kamerlid ben jij?")
st.markdown(
"Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt."
)
SEED_MOTIONS = 8
MAX_QUESTIONS = 20
# initialize session state
if "mp_quiz_votes" not in st.session_state:
st.session_state["mp_quiz_votes"] = {}
if "mp_quiz_asked" not in st.session_state:
st.session_state["mp_quiz_asked"] = []
from database import MotionDatabase as _MotionDatabase
db_inst = _MotionDatabase(db_path)
df = load_motions_df(db_path)
if df.empty:
st.warning("Geen moties beschikbaar om de quiz te starten.")
return
# seed from motions that actually have individual MP vote records
seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
if not seed_ids:
st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
return
# Determine next motion to ask
def _next_motion_id():
# prefer seed motions not yet asked
for mid in seed_ids:
if str(mid) not in st.session_state["mp_quiz_votes"]:
return mid
# otherwise ask discriminating motion based on remaining candidate MPs
# compute current candidate set
try:
user_votes = {
int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
}
ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
except Exception:
ranked = []
candidates = [r["mp_name"] for r in ranked]
excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()]
if not candidates:
return None
try:
next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
return next_ids[0] if next_ids else None
except Exception:
return None
# show progress and controls
col1, col2 = st.columns([3, 1])
with col2:
st.caption(
f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}"
)
if st.button("Reset quiz"):
st.session_state["mp_quiz_votes"] = {}
st.session_state["mp_quiz_asked"] = []
st.rerun()
# main question loop (single question per render, wrapped in a form to avoid
# premature reruns when the user changes the radio selection)
next_mid = _next_motion_id()
if next_mid is None:
st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
else:
motion_rows = df[df["id"] == next_mid]
if motion_rows.empty:
# motion has votes but isn't in the motions DataFrame — skip it
st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
st.rerun()
return
motion_row = motion_rows.iloc[0]
st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
if motion_row.get("layman_explanation"):
st.info(motion_row.get("layman_explanation"))
with st.form(key=f"mp_quiz_form_{next_mid}"):
choice = st.radio(
"Wat zou jij stemmen?",
options=["Voor", "Tegen", "Onthouden", "Geen stem"],
index=3,
)
submitted = st.form_submit_button("Beantwoord en verder")
if submitted:
st.session_state["mp_quiz_votes"][str(next_mid)] = choice
st.session_state["mp_quiz_asked"].append(next_mid)
st.rerun()
# display current ranking
try:
user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
except Exception:
ranking = []
if ranking:
st.markdown("**Top kandidaten**")
# show as table
import pandas as pd
rdf = pd.DataFrame(ranking)
st.dataframe(rdf.head(10), use_container_width=True)
# check uniqueness
top_pct = ranking[0]["agreement_pct"] if ranking else 0.0
top_matches = [r for r in ranking if r["agreement_pct"] == top_pct]
if len(top_matches) == 1 and top_matches[0]["overlap"] > 0:
st.success(
f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})"
)
else:
if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS:
st.warning(
"Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten."
)
else:
st.info("Nog geen unieke match — vraag meer om verder te verfijnen.")
else:
st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.")
# ---------------------------------------------------------------------------
# App entry
# ---------------------------------------------------------------------------
def run_app() -> None:
st.set_page_config(
layout="wide",
page_title="Parlement Explorer",
page_icon="🏛",
)
st.title("🏛 Parlement Explorer")
# Sidebar
st.sidebar.title("Instellingen")
db_path = "data/motions.db"
window_size = "annual"
show_rejected = st.sidebar.checkbox("Toon verworpen moties", value=False)
# About section
with st.sidebar.expander(" Over", expanded=False):
try:
con = duckdb.connect(database=db_path, read_only=True)
n_motions = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0]
n_fused = con.execute("SELECT COUNT(*) FROM fused_embeddings").fetchone()[0]
n_sim = con.execute("SELECT COUNT(*) FROM similarity_cache").fetchone()[0]
con.close()
st.markdown(
f"**Moties:** {n_motions:,} \n"
f"**Fused embeddings:** {n_fused:,} \n"
f"**Similarity cache:** {n_sim:,}"
)
except Exception as e:
st.warning(f"DB niet bereikbaar: {e}")
# Main tabs
# Streamlit tabs compatibility: some older/newer Streamlit builds expose different APIs.
tab_labels = [
"🧭 Politiek Kompas",
"📈 Trajectories",
"🔍 Motie Zoeken",
"📋 Motie Browser",
"🔬 SVD Components",
]
if hasattr(st, "tabs") and callable(getattr(st, "tabs")):
tab1, tab2, tab3, tab4, tab5 = st.tabs(tab_labels)
with tab1:
build_compass_tab(db_path, window_size)
with tab2:
build_trajectories_tab(db_path, window_size)
with tab3:
build_search_tab(db_path, show_rejected)
with tab4:
build_browser_tab(db_path, show_rejected)
with tab5:
build_svd_components_tab(db_path)
else:
# Fallback for environments where `st.tabs` is not available: use a radio selector
selection = st.radio("Tab", tab_labels)
if selection == tab_labels[0]:
build_compass_tab(db_path, window_size)
elif selection == tab_labels[1]:
build_trajectories_tab(db_path, window_size)
elif selection == tab_labels[2]:
build_search_tab(db_path, show_rejected)
elif selection == tab_labels[3]:
build_browser_tab(db_path, show_rejected)
else:
build_svd_components_tab(db_path)
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
)
run_app()