You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/explorer.py

1329 lines
52 KiB

"""Parlement Explorer — Streamlit data analysis app.
Four tabs:
1. Politiek Kompas — 2D scatter of MPs/parties, window slider
2. Partij Trajectories — party centroid lines over time
3. Motie Zoeken — text search + similarity lookup
4. Motie Browser — sortable table + detail panel
Run with: streamlit run explorer.py
Import-safe: heavy computation is behind @st.cache_data and only runs at UI time.
All DuckDB connections are read_only=True so the app can run alongside the pipeline.
"""
from __future__ import annotations
import json
import logging
import os
from typing import Dict, List, Optional, Tuple
import duckdb
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
logger = logging.getLogger(__name__)
# Party colour palette (consistent across tabs)
PARTY_COLOURS: Dict[str, str] = {
"VVD": "#1E73BE",
"PVV": "#002366",
"D66": "#00A36C",
"CDA": "#4CAF50",
"SP": "#E53935",
"PvdA": "#D32F2F",
"GroenLinks": "#388E3C",
"GroenLinks-PvdA": "#2E7D32",
"CU": "#0288D1",
"SGP": "#F4511E",
"PvdD": "#43A047",
"FVD": "#6A1B9A",
"JA21": "#7B1FA2",
"BBB": "#8D6E63",
"NSC": "#FF8F00",
"Nieuw Sociaal Contract": "#FF8F00", # alias used in mp_metadata
"DENK": "#00897B",
"50PLUS": "#7E57C2",
"Volt": "#572AB7",
"ChristenUnie": "#0288D1",
"Unknown": "#9E9E9E",
}
# Ordered list of well-known parties for trajectory default selection.
# Keeps the chart readable without overwhelming users with all parties.
KNOWN_MAJOR_PARTIES = [
"VVD",
"PVV",
"D66",
"GroenLinks-PvdA",
"GroenLinks",
"PvdA",
"CDA",
"SP",
"NSC",
"CU",
"BBB",
]
# Parties currently seated in the Tweede Kamer (2023 election cycle).
# Deze zijn de entity_ids zoals opgeslagen in svd_vectors voor window='2025'.
CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset(
{
"PVV",
"VVD",
"NSC",
"BBB",
"D66",
"GroenLinks-PvdA",
"CDA",
"SP",
"ChristenUnie",
"CU", # alias for ChristenUnie
"SGP",
"Volt",
"DENK",
"PvdD",
"JA21",
"FVD",
}
)
# ---------------------------------------------------------------------------
# Cached loaders
# ---------------------------------------------------------------------------
@st.cache_data(show_spinner="Beschikbare tijdsvensters laden…")
def get_available_windows(db_path: str) -> List[str]:
"""Return sorted list of distinct window_ids from svd_vectors."""
con = duckdb.connect(database=db_path, read_only=True)
try:
rows = con.execute(
"SELECT DISTINCT window_id FROM svd_vectors ORDER BY window_id"
).fetchall()
return [r[0] for r in rows]
except Exception:
logger.exception("Failed to query available windows")
return []
finally:
con.close()
@st.cache_data(show_spinner=False)
def get_uniform_dim_windows(db_path: str) -> List[str]:
"""Return only windows whose vector dimension equals the most common dimension.
np.vstack requires all vectors to have the same shape. Early or small windows
have lower SVD rank (dim < 50). This helper filters to only windows at the
dominant (max-count) dimension so compute_2d_axes never sees mixed shapes.
"""
con = duckdb.connect(database=db_path, read_only=True)
try:
rows = con.execute(
"""
WITH window_dims AS (
SELECT DISTINCT ON (window_id)
window_id,
json_array_length(vector) AS dim
FROM svd_vectors
WHERE entity_type = 'mp'
ORDER BY window_id
),
dim_counts AS (
SELECT dim, COUNT(*) AS cnt FROM window_dims GROUP BY dim
),
dominant AS (
SELECT dim FROM dim_counts ORDER BY cnt DESC, dim DESC LIMIT 1
)
SELECT wd.window_id
FROM window_dims wd
JOIN dominant d ON wd.dim = d.dim
ORDER BY wd.window_id
"""
).fetchall()
return [r[0] for r in rows]
except Exception:
logger.exception("Failed to query uniform-dim windows")
return []
finally:
con.close()
@st.cache_data(show_spinner="2D posities berekenen (kan even duren)…")
def load_positions(
db_path: str, window_size: str = "quarterly"
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
"""Compute 2D positions per window using PCA on aligned SVD vectors.
Returns:
positions_by_window: {window_id: {entity_name: (x, y)}}
axis_def: dict with x_axis, y_axis, method keys
"""
from analysis.political_axis import compute_2d_axes
# Only use windows where all vectors share the same dimension (dim=50).
# Mixed-dim windows cause np.vstack to fail in compute_2d_axes.
available = get_uniform_dim_windows(db_path)
if window_size == "annual":
# Keep only Q4 windows (one representative window per year)
available = [w for w in available if w.endswith("-Q4")]
if not available:
return {}, {}
positions_by_window, axis_def = compute_2d_axes(
db_path,
window_ids=available,
method="pca",
pca_residual=True,
normalize_vectors=True,
)
return positions_by_window, axis_def
@st.cache_data(show_spinner="Partijkaart laden…")
def load_party_map(db_path: str) -> Dict[str, str]:
"""Return {mp_name: party} mapping, with party names normalised to abbreviations."""
from analysis.visualize import _load_party_map
_PARTY_ALIASES: Dict[str, str] = {
"Nieuw Sociaal Contract": "NSC",
}
try:
raw = _load_party_map(db_path)
return {mp: _PARTY_ALIASES.get(party, party) for mp, party in raw.items()}
except Exception:
logger.exception("Failed to load party map")
return {}
@st.cache_data(show_spinner="Partijposities op SVD-assen laden…")
def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
"""Return per-party SVD vectors for window='2025'.
Queries svd_vectors WHERE entity_type='mp' AND window_id='2025'
AND entity_id is a known current-parliament party.
Returns:
{party_name: [float * k]} — k = 50 for the canonical 2025 window.
Duplicate rows for the same party are de-duplicated (last row wins).
"""
try:
con = duckdb.connect(database=db_path, read_only=True)
party_list = sorted(CURRENT_PARLIAMENT_PARTIES)
placeholders = ", ".join("?" for _ in party_list)
rows = con.execute(
f"SELECT entity_id, vector FROM svd_vectors "
f"WHERE entity_type='mp' AND window_id='2025' "
f"AND entity_id IN ({placeholders})",
party_list,
).fetchall()
result: Dict[str, List[float]] = {}
for entity_id, raw_vec in rows:
if isinstance(raw_vec, str):
vec = json.loads(raw_vec)
elif isinstance(raw_vec, (bytes, bytearray)):
vec = json.loads(raw_vec.decode())
elif isinstance(raw_vec, list):
vec = raw_vec
else:
try:
vec = list(raw_vec)
except Exception:
continue
result[entity_id] = [float(v) if v is not None else 0.0 for v in vec]
return result
except Exception:
logger.exception("Failed to load party axis scores")
return {}
finally:
try:
con.close()
except Exception:
pass
def _render_party_axis_chart(
party_scores: Dict[str, List[float]], comp_sel: int, theme: dict
) -> None:
"""Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
Each party is plotted at its score on a single horizontal axis (y=0).
When theme['flip'] is True the scores are negated so that the progressive/left
side always appears on the left of the chart.
"""
if not party_scores:
st.caption("_Partijdata niet beschikbaar voor deze as._")
return
axis_idx = comp_sel - 1 # 0-based index into the 50-dim vector
flip = theme.get("flip", False)
data: list[dict] = []
for party, vec in party_scores.items():
if axis_idx < len(vec):
score = vec[axis_idx]
if flip:
score = -score
data.append({"party": party, "score": score})
if not data:
st.caption("_Geen partijscores voor deze as._")
return
scores = [d["score"] for d in data]
parties = [d["party"] for d in data]
colours = [PARTY_COLOURS.get(p, "#9E9E9E") for p in parties]
hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
# Determine axis labels: left = progressive pole, right = conservative pole
pos_pole = theme.get("positive_pole", "")
neg_pole = theme.get("negative_pole", "")
left_label = pos_pole if flip else neg_pole
right_label = neg_pole if flip else pos_pole
fig = go.Figure()
# Baseline
x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
if x_min == x_max:
x_min, x_max = x_min - 1, x_max + 1
fig.add_trace(
go.Scatter(
x=[x_min, x_max],
y=[0, 0],
mode="lines",
line={"color": "#cccccc", "width": 1},
hoverinfo="skip",
showlegend=False,
)
)
# Party markers
fig.add_trace(
go.Scatter(
x=scores,
y=[0] * len(scores),
mode="markers+text",
text=parties,
textposition="top center",
marker={"size": 18, "color": colours},
hovertext=hover,
hoverinfo="text",
showlegend=False,
)
)
fig.update_layout(
height=160,
margin={"l": 10, "r": 10, "t": 10, "b": 30},
xaxis={
"title": f"{left_label} | {right_label}",
"zeroline": True,
"zerolinecolor": "#aaaaaa",
},
yaxis={"visible": False, "range": [-1, 2]},
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
)
st.plotly_chart(fig, use_container_width=True)
@st.cache_data(show_spinner="Moties laden…")
def load_motions_df(db_path: str) -> pd.DataFrame:
"""Load the full motions table as a pandas DataFrame (read-only)."""
con = duckdb.connect(database=db_path, read_only=True)
try:
df = con.execute(
"""
SELECT id, title, description, date, policy_area,
voting_results, layman_explanation,
winning_margin, controversy_score, url
FROM motions
"""
).fetchdf()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["year"] = df["date"].dt.year
return df
except Exception:
logger.exception("Failed to load motions")
return pd.DataFrame()
finally:
con.close()
def query_similar(
db_path: str,
source_motion_id: int,
vector_type: str = "fused",
top_k: int = 10,
) -> pd.DataFrame:
"""Return top-k similar motions from similarity_cache (read-only)."""
con = duckdb.connect(database=db_path, read_only=True)
try:
rows = con.execute(
"""
SELECT sc.target_motion_id, sc.score, sc.window_id,
m.title, m.date, m.policy_area
FROM similarity_cache sc
JOIN motions m ON m.id = sc.target_motion_id
WHERE sc.source_motion_id = ?
AND sc.vector_type = ?
ORDER BY sc.score DESC
LIMIT ?
""",
[source_motion_id, vector_type, top_k],
).fetchdf()
return rows
except Exception:
logger.exception(
"Failed to query similarity cache for motion %s", source_motion_id
)
return pd.DataFrame()
finally:
con.close()
# ---------------------------------------------------------------------------
# Shared rendering helpers
# ---------------------------------------------------------------------------
def _render_voting_results(voting_results_json) -> None:
"""Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.
The JSON is stored as {party_or_mp: vote} where vote is one of
'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
"""
if not voting_results_json:
return
try:
vdata = (
json.loads(voting_results_json)
if isinstance(voting_results_json, str)
else voting_results_json
)
if not isinstance(vdata, dict) or not vdata:
return
# Group {vote: [actor, ...]}
by_vote: Dict[str, List[str]] = {}
for actor, vote in vdata.items():
vote_str = str(vote).lower().strip()
by_vote.setdefault(vote_str, []).append(str(actor))
# Render in fixed order
vote_order = ["voor", "tegen", "onthouden", "afwezig"]
vote_emoji = {"voor": "", "tegen": "", "onthouden": "🟡", "afwezig": ""}
rows_shown = False
for v in vote_order + [k for k in by_vote if k not in vote_order]:
actors = by_vote.get(v)
if not actors:
continue
emoji = vote_emoji.get(v, "")
st.markdown(
f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
)
rows_shown = True
if not rows_shown:
st.caption("_Geen stemuitslag beschikbaar_")
except Exception:
pass
# ---------------------------------------------------------------------------
# Tab 1: Politiek Kompas
# ---------------------------------------------------------------------------
def build_compass_tab(db_path: str, window_size: str) -> None:
st.subheader("Politiek Kompas")
st.markdown(
"2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
)
positions_by_window, axis_def = load_positions(db_path, window_size)
if not positions_by_window:
st.warning(
"Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
)
return
party_map = load_party_map(db_path)
windows = sorted(positions_by_window.keys())
col1, col2 = st.columns([3, 1])
with col2:
window_idx = st.select_slider(
"Tijdsvenster", options=windows, value=windows[-1]
)
show_names = st.checkbox("Toon namen", value=False)
min_size = st.slider("Min. MPs per partij", 0, 20, 3)
pos = positions_by_window.get(window_idx, {})
if not pos:
st.info(f"Geen data voor venster {window_idx}")
return
rows = []
for name, (x, y) in pos.items():
party = party_map.get(name, "Unknown")
rows.append({"name": name, "x": x, "y": y, "party": party})
df_pos = pd.DataFrame(rows)
# Filter to parties with enough MPs
party_counts = df_pos["party"].value_counts()
valid_parties = party_counts[party_counts >= min_size].index
df_pos = df_pos[df_pos["party"].isin(valid_parties)]
colour_map = {p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()}
fig = px.scatter(
df_pos,
x="x",
y="y",
color="party",
hover_name="name",
hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
color_discrete_map=colour_map,
title=f"Politiek Kompas — {window_idx}",
labels={"x": "Links ← → Rechts", "y": "Progressief ↑ / Conservatief ↓"},
)
if show_names:
fig.update_traces(text=df_pos["name"], textposition="top center")
fig.update_layout(height=600, legend_title_text="Partij")
with col1:
st.plotly_chart(fig, use_container_width=True)
# Axis info
if axis_def:
evr = axis_def.get("explained_variance_ratio", [])
if evr:
st.caption(
f"PCA verklaarde variantie: as 1 = {evr[0] * 100:.1f}%, as 2 = {evr[1] * 100:.1f}%"
)
# ---------------------------------------------------------------------------
# Tab 2: Partij Trajectories
# ---------------------------------------------------------------------------
def build_trajectories_tab(db_path: str, window_size: str) -> None:
st.subheader("Partij Trajectories")
st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")
positions_by_window, _ = load_positions(db_path, window_size)
if not positions_by_window:
st.warning("Geen positiedata beschikbaar.")
return
party_map = load_party_map(db_path)
windows = sorted(positions_by_window.keys())
# Compute party centroids per window
centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
all_parties: set = set()
for wid in windows:
pos = positions_by_window.get(wid, {})
per_party: Dict[str, List[Tuple[float, float]]] = {}
for mp_name, (x, y) in pos.items():
party = party_map.get(mp_name, "Unknown")
if party == "Unknown":
continue
per_party.setdefault(party, []).append((x, y))
for party, coords in per_party.items():
all_parties.add(party)
xs = [c[0] for c in coords]
ys = [c[1] for c in coords]
centroids.setdefault(party, {})[wid] = (
float(np.mean(xs)),
float(np.mean(ys)),
)
all_parties_sorted = sorted(all_parties)
# Default: prefer known major parties over the automatic "appeared in most windows"
# heuristic, which would exclude newer parties like NSC that only have 4 windows.
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
if not default_parties:
default_parties = all_parties_sorted[:6]
selected_parties = st.multiselect(
"Selecteer partijen",
options=all_parties_sorted,
default=default_parties,
)
fig = go.Figure()
for party in selected_parties:
if party not in centroids:
continue
wids_sorted = sorted(centroids[party].keys())
xs = [centroids[party][w][0] for w in wids_sorted]
ys = [centroids[party][w][1] for w in wids_sorted]
colour = PARTY_COLOURS.get(party, "#9E9E9E")
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=party,
text=wids_sorted, # full window ID for hover
line=dict(color=colour, shape="spline", smoothing=1.3),
marker=dict(color=colour, size=8),
hovertemplate=(
f"<b>{party}</b><br>"
"venster: %{text}<br>"
"x: %{x:.3f}<br>y: %{y:.3f}<extra></extra>"
),
)
)
fig.update_layout(
title="Partij trajectories",
xaxis_title="Links ← → Rechts",
yaxis_title="Progressief ↑ / Conservatief ↓",
height=600,
legend_title_text="Partij",
)
st.plotly_chart(fig, use_container_width=True)
# ---------------------------------------------------------------------------
# Tab 3: Motie Zoeken
# ---------------------------------------------------------------------------
def build_search_tab(db_path: str, show_rejected: bool) -> None:
st.subheader("Motie Zoeken")
df = load_motions_df(db_path)
if df.empty:
st.warning("Geen moties beschikbaar.")
return
if not show_rejected:
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
# Controls
col1, col2, col3 = st.columns([2, 1, 1])
with col1:
query = st.text_input(
"Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
)
with col2:
years = sorted(df["year"].dropna().astype(int).unique().tolist())
if years:
year_range = st.select_slider(
"Jaar", options=years, value=(years[0], years[-1])
)
else:
year_range = (2019, 2024)
with col3:
min_controversy = st.slider(
"Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
)
# Apply filters in-memory
working = df.copy()
working = working[
(working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
]
if min_controversy > 0:
working = working[working["controversy_score"] >= min_controversy]
if query:
q = query.lower()
mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
working = working[mask]
working = working.sort_values(by="controversy_score", ascending=False)
st.caption(f"{len(working)} resultaten (top 50 getoond)")
for _, row in working.head(50).iterrows():
title = row.get("title") or f"Motie #{row['id']}"
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
controversy = row.get("controversy_score") or 0
with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
cols = st.columns(3)
cols[0].metric("Controverse", f"{controversy:.2f}")
cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")
# Voting breakdown
_render_voting_results(row.get("voting_results"))
# Link to original motion
url = row.get("url")
if url and str(url).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
# Similar motions
sim = query_similar(db_path, int(row["id"]), top_k=5)
if not sim.empty:
st.markdown("**Vergelijkbare moties:**")
for _, s in sim.iterrows():
s_date = (
pd.to_datetime(s["date"]).strftime("%Y")
if pd.notna(s.get("date"))
else ""
)
st.markdown(
f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
)
else:
st.caption("_Nog geen vergelijkbare moties beschikbaar_")
# ---------------------------------------------------------------------------
# Tab 4: Motie Browser
# ---------------------------------------------------------------------------
def build_browser_tab(db_path: str, show_rejected: bool) -> None:
st.subheader("Motie Browser")
df = load_motions_df(db_path)
if df.empty:
st.warning("Geen moties beschikbaar.")
return
if not show_rejected:
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
# Controls
col1, col2, col3 = st.columns(3)
with col1:
years = sorted(df["year"].dropna().astype(int).unique().tolist())
year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
with col2:
min_controversy_b = st.slider(
"Min. controverse",
min_value=0.0,
max_value=1.0,
value=0.0,
step=0.05,
key="browser_controversy",
)
with col3:
sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])
# Filter
working = df.copy()
if year_filter != "(Alle)":
working = working[working["year"] == int(year_filter)]
if min_controversy_b > 0:
working = working[working["controversy_score"] >= min_controversy_b]
sort_map = {
"Datum (nieuw)": ("date", False),
"Controverse": ("controversy_score", False),
"Marge": ("winning_margin", True),
}
sort_col, sort_asc = sort_map[sort_by]
working = working.sort_values(by=sort_col, ascending=sort_asc)
# Display table
display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
available_display = [c for c in display_cols if c in working.columns]
st.dataframe(
working[available_display].reset_index(drop=True),
use_container_width=True,
height=350,
)
st.divider()
# Detail panel
st.markdown("**Detail weergave** — vul een motie-ID in:")
sel_id = st.number_input(
"Motie ID",
min_value=int(working["id"].min()) if not working.empty else 1,
max_value=int(working["id"].max()) if not working.empty else 99999,
value=int(working["id"].iloc[0]) if not working.empty else 1,
step=1,
)
motion_row = df[df["id"] == sel_id]
if not motion_row.empty:
row = motion_row.iloc[0]
st.markdown(f"### {row.get('title') or 'Onbekend'}")
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
st.caption(
f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
)
# Link to original source
url = row.get("url")
if url and str(url).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
# Voting breakdown
st.markdown("**Stemuitslag:**")
_render_voting_results(row.get("voting_results"))
# Similar motions
sim = query_similar(db_path, int(sel_id), top_k=10)
if not sim.empty:
st.markdown("**Vergelijkbare moties:**")
st.dataframe(
sim[["title", "score", "date", "policy_area"]],
use_container_width=True,
)
else:
st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")
def build_svd_components_tab(db_path: str) -> None:
"""New tab: show top motions contributing to top SVD components.
Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
for components 1..10 with theme labels/explanations and a detail pane per motion.
"""
# Political polarisation themes per SVD component (1-indexed, window=2025)
# Produced by per-axis analysis of all 10 unique top motions (zero cross-axis overlap).
SVD_THEMES: dict[int, dict[str, str]] = {
1: {
"label": "Constructief coalitiebeleid versus radicale PVV-migratiepolitiek",
"explanation": (
"Deze as scheidt brede, constructieve coalitiemoties (CDA, VVD, NSC, CU, D66, GL-PvdA) "
"over uiteenlopende beleidsterreinen — van zorg en defensie tot multilateralisme en "
"digitale inclusie — van de harde PVV-motie die onmiddellijke uitzetting van Syriërs "
"en Oekraïense mannen eist. De positieve pool vertegenwoordigt het brede parlementaire "
"midden dat procedureel en inhoudelijk samenwerkt, terwijl de negatieve pool de "
"isolationistische en radicaal-nationalistische positie van de PVV symboliseert. "
"Deze dimensie is politiek betekenisvol omdat zij de kloof blootlegt tussen mainstream "
"coalitiepolitiek en populistisch-nationalistisch obstructionisme op het vlak van migratie."
),
"positive_pole": "Breed coalitiebeleid: zorg, defensie, multilateralisme, inclusie",
"negative_pole": "Radicale PVV-eis tot onmiddellijke uitzetting migranten",
"flip": True,
},
2: {
"label": "Nationalistisch migratiebeleid versus progressief internationaal solidariteitsdenken",
"explanation": (
"Deze as weerspiegelt de spanning tussen een nationalistisch-conservatieve koers "
"(PVV, BBB, JA21, NSC) die inzet op asielbeperking en nationaal belang, versus een "
"links-progressieve koers (SP, PvdD, DENK, GroenLinks) die internationale solidariteit, "
"reproductieve rechten, LHBT+-zorg en bescherming van minderheden centraal stelt. "
"Moties aan de positieve kant beperken asielzoekers en verdedigen nationaal belang "
"tegenover EU-druk, terwijl moties aan de negatieve kant Israëlisch optreden veroordelen, "
"Gazaanse slachtoffers helpen, abortus en PrEP vergoeden, en moslims beschermen tegen "
"discriminatie. Dit is een van de meest fundamentele breuken in de huidige Nederlandse politiek."
),
"positive_pole": "Asielbeperking, nationaal belang, restrictief migratiebeleid",
"negative_pole": "Pro-Palestina, progressieve zorgrechten, anti-discriminatie minderheden",
"flip": False,
},
3: {
"label": "Humanitaire solidariteit en inclusie versus nationalistische handhaving en deregulering",
"explanation": (
"Deze as scheidt partijen die inzetten op internationale humanitaire solidariteit "
"(sanctieverlichting Syrië, medische evacuatie Gazaanse kinderen), sociale inclusie "
"(gelijke financieringstoegang) en pragmatische justitie (slimmer straffen) van partijen "
"die pleiten voor strikte strafuitvoering, deregulering via afschaffing nationale koppen, "
"beperking van ngo's in het migratiebeleid en bescherming van eigendomsvrijheid zonder "
"discriminatieregels. De uitzonderlijk grote scoremagnitudes (±11,7) duiden op een "
"bijzonder scherpe polarisatie, waarbij links-progressieve en christelijk-sociale partijen "
"(SP, D66, GL, DENK, CU, PvdA, CDA, NSC) tegenover rechts-nationalistisch georiënteerde "
"partijen (PVV, BBB, VVD-rechtsflank, JA21) staan."
),
"positive_pole": "Internationale solidariteit, inclusie en pragmatische overheidsinterventie",
"negative_pole": "Strikte handhaving, deregulering en nationalistisch eigenbelang boven humanitaire verplichtingen",
"flip": True,
},
4: {
"label": "Publieke voorzieningen beschermen versus liberale marktwerking",
"explanation": (
"Deze as weerspiegelt de klassieke sociaal-economische tegenstelling tussen links en "
"liberaal-economisch rechts. Aan de positieve kant staan moties van SP en DENK die "
"pleiten voor betaalbare zorg, lage treintarieven, bescherming van politiepersoneel en "
"regionale brandweerposten — allemaal gericht op het beschermen van publieke voorzieningen "
"voor gewone burgers. Aan de negatieve kant staan moties van VVD, D66, Volt en NSC die "
"pleiten voor het EU-Mercosur vrijhandelsverdrag en een flexibele kennismigrantenregeling "
"ten behoeve van het economisch verdienvermogen. Deze dimensie is politiek betekenisvol "
"omdat hij de fundamentele vraag raakt of de staat actief moet ingrijpen om collectieve "
"voorzieningen betaalbaar en toegankelijk te houden, of dat vrije markt en open handel "
"leidend moeten zijn."
),
"positive_pole": "Vrije handel, open economie en marktgerichte arbeidsmigratie",
"negative_pole": "Staatsbescherming van betaalbare publieke voorzieningen voor iedereen",
"flip": False,
},
5: {
"label": "Christelijk-conservatief sociaal beleid versus seculier progressief",
"explanation": (
"Deze as reflecteert de politieke tegenstelling tussen christelijk-conservatieve "
"partijen (ChristenUnie en SGP) en seculier-progressieve partijen. Aan de positieve "
"pool staan moties over bescherming van het gezin, kritische houding tegenover euthanasie "
"bij jongeren, ruimte voor kerkgemeenschappen, bescherming van de agrarische sector en "
"een zakelijker onderwijscurriculum. Deze as is politiek betekenisvol omdat de SGP en CU "
"consistent vanuit een christelijk-sociale visie stemmen tegenover partijen als D66, "
"GroenLinks-PvdA en SP die een seculier-progressief beleid voorstaan."
),
"positive_pole": "Seculier-progressief: individuele autonomie, progressieve sociale rechten",
"negative_pole": "Christelijk-conservatief: gezin, kerk, leven, traditionele waarden",
"flip": True,
},
6: {
"label": "Christelijk-sociaal beschermingsbeleid versus links-progressieve systeemkritiek",
"explanation": (
"Deze as scheidt centrum-rechtse, christelijk-sociale partijen (CU, SGP, BBB, VVD) die "
"via pragmatisch overheidsbeleid specifieke kwetsbare groepen beschermen — zoals "
"pgb-budgethouders, christenen in Syrisch terugkeerbeleid en kinderen online — van "
"links-progressieve partijen (SP, GroenLinks) die structurele systeemkritiek leveren op "
"arbeidsmigratiemisstanden, winstbejag in de zorg en internationale solidariteit met "
"gemarginaliseerde groepen. De politieke tegenstelling gaat over de oriëntatie van sociaal "
"beleid: doelgroepgericht en institutioneel versus structureel-hervormend en solidair."
),
"positive_pole": "Christelijk-sociaal beschermingsbeleid voor pgb, kinderen en geloofsgroepen",
"negative_pole": "Links-progressieve systeemkritiek op zorg, arbeid en internationale solidariteit",
"flip": False,
},
7: {
"label": "Liberaal investeren en defensie versus linkse bescherming en controle",
"explanation": (
"Deze as scheidt partijen als D66 en VVD (positief), die inzetten op gerichte "
"investeringen in sport, wetenschap, defensie en slachtofferhulp, van partijen als SP, "
"DENK en NSC (negatief), die nadruk leggen op collectieve sociale bescherming, "
"parlementaire controle over militaire inzet en weren van marktwerking uit publieke "
"sectoren zoals de zorg. De spanning draait om de vraag of de overheid via gerichte "
"liberale investeringen of via collectivistische regulering en bescherming moet opereren."
),
"positive_pole": "Gerichte liberale investeringen in sport, wetenschap en defensie",
"negative_pole": "Collectieve bescherming, parlementaire controle en anti-marktwerking in zorg",
"flip": False,
},
8: {
"label": "Confessioneel-sociaal coalitiebeleid versus procedurele blokkade en handhaving",
"explanation": (
"De positieve pool groepeert moties van CU, NSC, SGP en D66 die concreet beleid bepleiten "
"op uiteenlopende terreinen: kostendelersnorm afschaffen ten gunste van bijstandsgerechtigden, "
"arbeidskorting terugdraaien om gepensioneerden te ontzien, Dutch Dome voor vitale "
"infrastructuurbescherming, stikstofmaatwerk in Regio Foodvalley, versnelling van "
"bouwvergunningverlening en het expliciet stellen van abortusvermindering als beleidsdoel. "
"De negatieve pool bevat het controversieel verklaren van het coffeeshopketen-experiment "
"(procedurele blokkade), een handhavingsdoelstelling voor illegaal vuurwerk en een "
"deregulerende uitzondering voor de topsportsector."
),
"positive_pole": "Coalitie christelijk-sociaal beleid: defensie, stikstofmaatwerk, bouw en ethiek",
"negative_pole": "Procedurele blokkade coffeeshop, handhavingsdoelstelling en topsportderegulering",
"flip": False,
},
9: {
"label": "Brede coalitiemeerderheid versus links marktingrijpen zorg",
"explanation": (
"Deze as onderscheidt moties die steun kregen van een brede, rechtse-tot-centrumlinkse "
"meerderheid (positieve pool) van een specifieke linkse motie (SP/GL-PvdA) die strenge "
"inkomenslimieten voor zorgbestuurders en aandeelhouders wil afdwingen (negatieve pool). "
"De positieve moties omvatten uiteenlopende onderwerpen — defensie, onderwijs, asiel, "
"belastingconstructies, natuur — ingediend door partijen als CU, NSC, JA21, D66 en "
"PvdA/GL, die gezamenlijk door de meerderheid werden gesteund. De negatieve pool "
"vertegenwoordigt een socialistische marktinterventie (WNT-normen in de zorg) die door "
"de rechtse regeringsmeerderheid werd weggestemd."
),
"positive_pole": "Breed gedragen beleid door centrum-rechts meerderheidsstemmen",
"negative_pole": "Socialistische inkomensregulering en marktingrijpen in de zorg",
"flip": False,
},
10: {
"label": "Gereguleerde kennismigratie en natuur-landbouwtransitie versus institutionele veiligheid",
"explanation": (
"De positieve pool groepeert moties die kennismigratie willen beperken tot sectoren met "
"aantoonbaar tekort (blauwe kaart), de kalverhouderij willen inkrimpen via dierrechten, "
"intensieve teelt op vrijgekochte boerengrond willen voorkomen, en humanitaire druk op "
"Israël inzake Gaza willen uitoefenen. Indieners zijn een brede coalitie van NSC, SP, "
"D66, CU en GL-PvdA. De negatieve pool bevat moties over institutionele coördinatie van "
"veiligheidsbeleid (Veilig Thuis), economische soevereiniteit (vitale productie "
"terughalen naar Nederland) en beperking van anonimiteit bij Woo-verzoeken, ingediend "
"door NSC, SGP en SP."
),
"positive_pole": "Beperkte kennismigratie, natuur-landbouwtransitie en Gaza-humanitair",
"negative_pole": "Institutionele veiligheidssturing, economisch nationalisme en Woo-beperking",
"flip": True,
},
}
st.subheader("🔬 SVD Assen — politieke polarisatiethema's")
st.markdown(
"Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
"van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
"het spanningsveld dat de as beschrijft."
)
json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
if not os.path.exists(json_path):
st.warning(
f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
)
return
try:
with open(json_path, "r", encoding="utf-8") as fh:
j = json.load(fh)
except Exception as e:
st.error(f"Failed to load SVD importance JSON: {e}")
return
window = j.get("window")
rows = j.get("rows", [])
if not rows:
st.info("Geen top-moties in dataset")
return
st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")
# Build mapping component -> list of motions (deduplicate by motion_id per component)
comp_map: dict[int, list] = {}
for r in rows:
comp = int(r.get("component", 0))
bucket = comp_map.setdefault(comp, [])
existing_ids = {m.get("motion_id") for m in bucket}
if r.get("motion_id") not in existing_ids:
bucket.append(r)
comp_options = sorted(comp_map.keys())
# Build display labels for selectbox: "As 1 — Regulering vs. status-quo"
def _comp_label(c: int) -> str:
theme = SVD_THEMES.get(c, {})
lbl = theme.get("label", "")
return f"As {c}{lbl}" if lbl else f"As {c}"
comp_display = [_comp_label(c) for c in comp_options]
comp_sel_idx = st.selectbox(
"Selecteer SVD-as",
options=list(range(len(comp_options))),
format_func=lambda i: comp_display[i],
index=0,
)
comp_sel = comp_options[comp_sel_idx]
# Show theme explanation
theme = SVD_THEMES.get(comp_sel, {})
if theme:
st.info(f"**{theme['label']}** — {theme['explanation']}")
motions = comp_map.get(comp_sel, [])
# Party axis chart
party_scores = load_party_axis_scores(db_path)
_render_party_axis_chart(party_scores, comp_sel, theme)
# Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
motion_details: Dict[int, tuple] = {}
if motion_ids:
# Defensively convert motion_ids to integers, skipping invalid values
ids_int: List[int] = []
for mid in motion_ids:
try:
ids_int.append(int(mid))
except Exception:
logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
# If no valid ids remain, skip the DB query
if ids_int:
con = None
try:
placeholders = ", ".join("?" for _ in ids_int)
con = duckdb.connect(database=db_path, read_only=True)
db_rows = con.execute(
f"SELECT id, title, date, policy_area, url, body_text, voting_results "
f"FROM motions WHERE id IN ({placeholders})",
ids_int,
).fetchall()
motion_details = {r[0]: r for r in db_rows}
except Exception:
logger.exception("Failed to batch-fetch motion details")
finally:
if con:
con.close()
# Split motions by pole sign
pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
flip = theme.get("flip", False) if theme else False
pos_pole = theme.get("positive_pole", "") if theme else ""
neg_pole = theme.get("negative_pole", "") if theme else ""
# Determine which pole goes left (progressive) and which goes right
if flip:
left_pole, right_pole = pos_pole, neg_pole
left_motions, right_motions = pos_motions, neg_motions
left_arrow, right_arrow = "", ""
else:
left_pole, right_pole = neg_pole, pos_pole
left_motions, right_motions = neg_motions, pos_motions
left_arrow, right_arrow = "", ""
lcol, rcol = st.columns(2)
with lcol:
st.markdown(f"**← {left_pole}**")
for m in left_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{left_arrow} {raw_title[:80]}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")
with rcol:
st.markdown(f"**{right_pole} →**")
for m in right_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{right_arrow} {raw_title[:80]}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")
def build_mp_quiz_tab(db_path: str) -> None:
"""Interactive quiz: narrow MPs by asking motion vote questions.
Minimal viable flow:
- seed with top-N controversial motions (SEED_MOTIONS)
- present one question at a time, store answers in st.session_state['mp_quiz_votes']
- after each answer call MotionDatabase.match_mps_for_votes to rank MPs
- if multiple candidates remain, call choose_discriminating_motions to pick next question
- stop when unique MP found or no discriminating motions remain
"""
st.subheader("🧑 Welk tweede kamerlid ben jij?")
st.markdown(
"Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt."
)
SEED_MOTIONS = 8
MAX_QUESTIONS = 20
# initialize session state
if "mp_quiz_votes" not in st.session_state:
st.session_state["mp_quiz_votes"] = {}
if "mp_quiz_asked" not in st.session_state:
st.session_state["mp_quiz_asked"] = []
from database import MotionDatabase as _MotionDatabase
db_inst = _MotionDatabase(db_path)
df = load_motions_df(db_path)
if df.empty:
st.warning("Geen moties beschikbaar om de quiz te starten.")
return
# seed from motions that actually have individual MP vote records
seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
if not seed_ids:
st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
return
# Determine next motion to ask
def _next_motion_id():
# prefer seed motions not yet asked
for mid in seed_ids:
if str(mid) not in st.session_state["mp_quiz_votes"]:
return mid
# otherwise ask discriminating motion based on remaining candidate MPs
# compute current candidate set
try:
user_votes = {
int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
}
ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
except Exception:
ranked = []
candidates = [r["mp_name"] for r in ranked]
excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()]
if not candidates:
return None
try:
next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
return next_ids[0] if next_ids else None
except Exception:
return None
# show progress and controls
col1, col2 = st.columns([3, 1])
with col2:
st.caption(
f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}"
)
if st.button("Reset quiz"):
st.session_state["mp_quiz_votes"] = {}
st.session_state["mp_quiz_asked"] = []
st.rerun()
# main question loop (single question per render)
next_mid = _next_motion_id()
if next_mid is None:
st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
else:
motion_rows = df[df["id"] == next_mid]
if motion_rows.empty:
# motion has votes but isn't in the motions DataFrame — skip it
st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
st.rerun()
return
motion_row = motion_rows.iloc[0]
st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
if motion_row.get("layman_explanation"):
st.info(motion_row.get("layman_explanation"))
choice = st.radio(
"Wat zou jij stemmen?",
options=["Voor", "Tegen", "Onthouden", "Geen stem"],
index=3,
key=f"mp_quiz_choice_{next_mid}",
)
if st.button("Beantwoord en verder", key=f"mp_quiz_submit_{next_mid}"):
st.session_state["mp_quiz_votes"][str(next_mid)] = choice
st.session_state["mp_quiz_asked"].append(next_mid)
st.rerun()
# display current ranking
try:
user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
except Exception:
ranking = []
if ranking:
st.markdown("**Top kandidaten**")
# show as table
import pandas as pd
rdf = pd.DataFrame(ranking)
st.dataframe(rdf.head(10), use_container_width=True)
# check uniqueness
top_pct = ranking[0]["agreement_pct"] if ranking else 0.0
top_matches = [r for r in ranking if r["agreement_pct"] == top_pct]
if len(top_matches) == 1 and top_matches[0]["overlap"] > 0:
st.success(
f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})"
)
else:
if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS:
st.warning(
"Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten."
)
else:
st.info("Nog geen unieke match — vraag meer om verder te verfijnen.")
else:
st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.")
# ---------------------------------------------------------------------------
# App entry
# ---------------------------------------------------------------------------
def run_app() -> None:
st.set_page_config(
layout="wide",
page_title="Parlement Explorer",
page_icon="🏛",
)
st.title("🏛 Parlement Explorer")
# Sidebar
st.sidebar.title("Instellingen")
db_path = st.sidebar.text_input("DuckDB pad", value="data/motions.db")
window_size = st.sidebar.radio("Venstergrootte", ["quarterly", "annual"], index=0)
show_rejected = st.sidebar.checkbox("Toon verworpen moties", value=False)
# About section
with st.sidebar.expander(" Over", expanded=False):
try:
con = duckdb.connect(database=db_path, read_only=True)
n_motions = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0]
n_fused = con.execute("SELECT COUNT(*) FROM fused_embeddings").fetchone()[0]
n_sim = con.execute("SELECT COUNT(*) FROM similarity_cache").fetchone()[0]
con.close()
st.markdown(
f"**Moties:** {n_motions:,} \n"
f"**Fused embeddings:** {n_fused:,} \n"
f"**Similarity cache:** {n_sim:,}"
)
except Exception as e:
st.warning(f"DB niet bereikbaar: {e}")
# Main tabs
# Streamlit tabs compatibility: some older/newer Streamlit builds expose different APIs.
tab_labels = [
"🧭 Politiek Kompas",
"📈 Trajectories",
"🔍 Motie Zoeken",
"📋 Motie Browser",
"🧑 Welk tweede kamerlid ben jij?",
"🔬 SVD Components",
]
if hasattr(st, "tabs") and callable(getattr(st, "tabs")):
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(tab_labels)
with tab1:
build_compass_tab(db_path, window_size)
with tab2:
build_trajectories_tab(db_path, window_size)
with tab3:
build_search_tab(db_path, show_rejected)
with tab4:
build_browser_tab(db_path, show_rejected)
with tab5:
build_mp_quiz_tab(db_path)
with tab6:
build_svd_components_tab(db_path)
else:
# Fallback for environments where `st.tabs` is not available: use a radio selector
selection = st.radio("Tab", tab_labels)
if selection == tab_labels[0]:
build_compass_tab(db_path, window_size)
elif selection == tab_labels[1]:
build_trajectories_tab(db_path, window_size)
elif selection == tab_labels[2]:
build_search_tab(db_path, show_rejected)
elif selection == tab_labels[3]:
build_browser_tab(db_path, show_rejected)
elif selection == tab_labels[4]:
build_mp_quiz_tab(db_path)
else:
build_svd_components_tab(db_path)
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
)
run_app()