|
|
"""Parlement Explorer — Streamlit data analysis app.
|
|
|
|
|
|
Four tabs:
|
|
|
1. Politiek Kompas — 2D scatter of MPs/parties, window slider
|
|
|
2. Partij Trajectories — party centroid lines over time
|
|
|
3. Motie Zoeken — text search + similarity lookup
|
|
|
4. Motie Browser — sortable table + detail panel
|
|
|
|
|
|
Run with: streamlit run explorer.py
|
|
|
|
|
|
Import-safe: heavy computation is behind @st.cache_data and only runs at UI time.
|
|
|
All DuckDB connections are read_only=True so the app can run alongside the pipeline.
|
|
|
"""
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
import json
|
|
|
import logging
|
|
|
import os
|
|
|
import re
|
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
|
|
import duckdb
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import plotly.express as px
|
|
|
import plotly.graph_objects as go
|
|
|
import streamlit as st
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Party colour palette (consistent across tabs)
|
|
|
PARTY_COLOURS: Dict[str, str] = {
|
|
|
"VVD": "#1E73BE",
|
|
|
"PVV": "#002366",
|
|
|
"D66": "#00A36C",
|
|
|
"CDA": "#4CAF50",
|
|
|
"SP": "#E53935",
|
|
|
"PvdA": "#D32F2F",
|
|
|
"GroenLinks": "#388E3C",
|
|
|
"GroenLinks-PvdA": "#2E7D32",
|
|
|
"CU": "#0288D1",
|
|
|
"SGP": "#F4511E",
|
|
|
"PvdD": "#43A047",
|
|
|
"FVD": "#6A1B9A",
|
|
|
"JA21": "#7B1FA2",
|
|
|
"BBB": "#8D6E63",
|
|
|
"NSC": "#FF8F00",
|
|
|
"Nieuw Sociaal Contract": "#FF8F00", # alias used in mp_metadata
|
|
|
"DENK": "#00897B",
|
|
|
"50PLUS": "#7E57C2",
|
|
|
"Volt": "#572AB7",
|
|
|
"ChristenUnie": "#0288D1",
|
|
|
"Unknown": "#9E9E9E",
|
|
|
}
|
|
|
|
|
|
# Ordered list of well-known parties for trajectory default selection.
|
|
|
# Keeps the chart readable without overwhelming users with all parties.
|
|
|
KNOWN_MAJOR_PARTIES = [
|
|
|
"VVD",
|
|
|
"PVV",
|
|
|
"D66",
|
|
|
"GroenLinks-PvdA",
|
|
|
"GroenLinks",
|
|
|
"PvdA",
|
|
|
"CDA",
|
|
|
"SP",
|
|
|
"NSC",
|
|
|
"CU",
|
|
|
"BBB",
|
|
|
]
|
|
|
|
|
|
|
|
|
# Parties currently seated in the Tweede Kamer (2023 election cycle).
|
|
|
# Deze zijn de entity_ids zoals opgeslagen in svd_vectors voor window='2025'.
|
|
|
CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset(
|
|
|
{
|
|
|
"PVV",
|
|
|
"VVD",
|
|
|
"NSC",
|
|
|
"BBB",
|
|
|
"D66",
|
|
|
"GroenLinks-PvdA",
|
|
|
"CDA",
|
|
|
"SP",
|
|
|
"ChristenUnie",
|
|
|
"SGP",
|
|
|
"Volt",
|
|
|
"DENK",
|
|
|
"PvdD",
|
|
|
"JA21",
|
|
|
"FVD",
|
|
|
}
|
|
|
)
|
|
|
|
|
|
# Normalize variant party names to canonical display names in CURRENT_PARLIAMENT_PARTIES
|
|
|
_PARTY_NORMALIZE: dict[str, str] = {
|
|
|
"Nieuw Sociaal Contract": "NSC",
|
|
|
"CU": "ChristenUnie",
|
|
|
"GL": "GroenLinks-PvdA",
|
|
|
"GroenLinks": "GroenLinks-PvdA",
|
|
|
"PvdA": "GroenLinks-PvdA",
|
|
|
"Gündoğan": "Volt", # confirmed Volt, left parliament 2023-12-05
|
|
|
"Lid Keijzer": "BBB", # Keijzer left CDA, joined BBB cabinet
|
|
|
"Groep Markuszower": "PVV", # Markuszower sits with PVV faction
|
|
|
}
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Cached loaders
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Beschikbare tijdsvensters laden…")
|
|
|
def get_available_windows(db_path: str) -> List[str]:
|
|
|
"""Return sorted list of distinct window_ids from svd_vectors."""
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
try:
|
|
|
rows = con.execute(
|
|
|
"SELECT DISTINCT window_id FROM svd_vectors ORDER BY window_id"
|
|
|
).fetchall()
|
|
|
return [r[0] for r in rows]
|
|
|
except Exception:
|
|
|
logger.exception("Failed to query available windows")
|
|
|
return []
|
|
|
finally:
|
|
|
con.close()
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner=False)
|
|
|
def get_uniform_dim_windows(db_path: str) -> List[str]:
|
|
|
"""Return only windows whose dominant MP-vector dimension is 50.
|
|
|
|
|
|
Some windows contain a mix of vector lengths due to multiple pipeline runs
|
|
|
(e.g. 2016 has both dim=1 and dim=50 rows). We find the most common dimension
|
|
|
per window and include only windows where that dominant dim equals 50.
|
|
|
Windows with too few dim-50 entities (< 10) are also excluded to avoid
|
|
|
degenerate PCA inputs.
|
|
|
"""
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
try:
|
|
|
rows = con.execute(
|
|
|
"""
|
|
|
WITH vec_dims AS (
|
|
|
SELECT window_id, json_array_length(vector) AS dim
|
|
|
FROM svd_vectors
|
|
|
WHERE entity_type = 'mp'
|
|
|
),
|
|
|
window_dim_counts AS (
|
|
|
SELECT window_id, dim, COUNT(*) AS cnt
|
|
|
FROM vec_dims
|
|
|
GROUP BY window_id, dim
|
|
|
),
|
|
|
dominant AS (
|
|
|
SELECT DISTINCT ON (window_id) window_id, dim, cnt
|
|
|
FROM window_dim_counts
|
|
|
ORDER BY window_id, cnt DESC, dim DESC
|
|
|
)
|
|
|
SELECT window_id
|
|
|
FROM dominant
|
|
|
WHERE dim >= 25 AND cnt >= 10
|
|
|
ORDER BY window_id
|
|
|
"""
|
|
|
).fetchall()
|
|
|
return [r[0] for r in rows]
|
|
|
except Exception:
|
|
|
logger.exception("Failed to query uniform-dim windows")
|
|
|
return []
|
|
|
finally:
|
|
|
con.close()
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="2D posities berekenen (kan even duren)…")
|
|
|
def load_positions(
|
|
|
db_path: str, window_size: str = "quarterly"
|
|
|
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
|
|
|
"""Compute 2D positions per window using PCA on aligned SVD vectors.
|
|
|
|
|
|
Returns:
|
|
|
positions_by_window: {window_id: {entity_name: (x, y)}}
|
|
|
axis_def: dict with x_axis, y_axis, method keys
|
|
|
"""
|
|
|
from analysis.political_axis import compute_2d_axes
|
|
|
|
|
|
# Always compute PCA on ALL uniform-dim windows (quarterly + annual) so that
|
|
|
# the principal components are determined by the full temporal spread of data.
|
|
|
# Using only annual windows (11) causes PC1 to capture cross-temporal drift
|
|
|
# instead of left-right ideology, resulting in a ~90° rotation.
|
|
|
all_available = get_uniform_dim_windows(db_path)
|
|
|
|
|
|
if not all_available:
|
|
|
return {}, {}
|
|
|
|
|
|
positions_by_window, axis_def = compute_2d_axes(
|
|
|
db_path,
|
|
|
window_ids=all_available,
|
|
|
method="pca",
|
|
|
pca_residual=True,
|
|
|
normalize_vectors=True,
|
|
|
)
|
|
|
|
|
|
# Filter displayed windows by window_size AFTER PCA computation.
|
|
|
if window_size == "annual":
|
|
|
annual_keys = set(w for w in all_available if "-Q" not in w)
|
|
|
positions_by_window = {
|
|
|
w: v for w, v in positions_by_window.items() if w in annual_keys
|
|
|
}
|
|
|
|
|
|
return positions_by_window, axis_def
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partijkaart laden…")
|
|
|
def load_party_map(db_path: str) -> Dict[str, str]:
|
|
|
"""Return {mp_name: party} mapping, with party names normalised to abbreviations."""
|
|
|
from analysis.visualize import _load_party_map
|
|
|
|
|
|
_PARTY_ALIASES: Dict[str, str] = {
|
|
|
"Nieuw Sociaal Contract": "NSC",
|
|
|
}
|
|
|
|
|
|
try:
|
|
|
raw = _load_party_map(db_path)
|
|
|
return {mp: _PARTY_ALIASES.get(party, party) for mp, party in raw.items()}
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load party map")
|
|
|
return {}
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Actieve Kamerleden laden…")
|
|
|
def load_active_mps(db_path: str) -> set:
|
|
|
"""Return the set of mp_name values that are currently seated in parliament.
|
|
|
|
|
|
An MP is considered active if their mp_metadata row has tot_en_met IS NULL,
|
|
|
meaning they have no recorded end date for their current seat.
|
|
|
"""
|
|
|
try:
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
rows = con.execute(
|
|
|
"SELECT mp_name FROM mp_metadata WHERE tot_en_met IS NULL"
|
|
|
).fetchall()
|
|
|
con.close()
|
|
|
return {r[0] for r in rows}
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load active MPs")
|
|
|
return set()
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partijposities op SVD-assen laden…")
|
|
|
def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
|
|
|
"""Return per-party SVD vectors, computed as mean of individual MP vectors.
|
|
|
|
|
|
Loads individual MP rows (entity_id LIKE '%,%') from window='current_parliament',
|
|
|
assigns each MP their party using the dominant party from mp_votes, then
|
|
|
averages SVD vectors per party.
|
|
|
|
|
|
This matches the political compass data source (also averages individual MPs),
|
|
|
so axis rankings are consistent between the SVD tab and the compass.
|
|
|
|
|
|
Returns:
|
|
|
{party_name: [float * k]} — k = 50, mean over all MPs in that party.
|
|
|
"""
|
|
|
try:
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
|
|
|
# Build mp → party mapping from mp_metadata (most recent party during current parliament).
|
|
|
# mp_metadata format: mp_name like "Van Baarle, S.R.T.", party = "GroenLinks-PvdA"
|
|
|
# We take the party record with the latest `van` date (most recent assignment).
|
|
|
meta_rows = con.execute(
|
|
|
"SELECT mp_name, party FROM mp_metadata "
|
|
|
"WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22'"
|
|
|
).fetchall()
|
|
|
# For MPs with multiple records (party switches), keep the one with latest van date.
|
|
|
# Simple approach: last-write-wins per mp_name after sorting by van ascending.
|
|
|
mp_party_raw: Dict[str, str] = {}
|
|
|
for mp_name, party in meta_rows:
|
|
|
if mp_name and party:
|
|
|
mp_party_raw[mp_name] = party # later rows (after ORDER BY van) win
|
|
|
|
|
|
# Re-query ordered so latest van wins reliably
|
|
|
meta_ordered = con.execute(
|
|
|
"SELECT mp_name, party FROM mp_metadata "
|
|
|
"WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' "
|
|
|
"ORDER BY van ASC"
|
|
|
).fetchall()
|
|
|
mp_party_raw = {}
|
|
|
for mp_name, party in meta_ordered:
|
|
|
if mp_name and party:
|
|
|
mp_party_raw[mp_name] = party
|
|
|
|
|
|
# Normalize party names to canonical abbreviations
|
|
|
mp_party: Dict[str, str] = {}
|
|
|
for mp_name, party in mp_party_raw.items():
|
|
|
canonical = _PARTY_NORMALIZE.get(party, party)
|
|
|
mp_party[mp_name] = canonical
|
|
|
|
|
|
# Individual MP vectors from current_parliament
|
|
|
rows = con.execute(
|
|
|
"SELECT entity_id, vector FROM svd_vectors "
|
|
|
"WHERE entity_type='mp' AND window_id='current_parliament'"
|
|
|
).fetchall()
|
|
|
|
|
|
party_vecs: Dict[str, list] = {}
|
|
|
for entity_id, raw_vec in rows:
|
|
|
party = mp_party.get(entity_id)
|
|
|
if party is None or party not in CURRENT_PARLIAMENT_PARTIES:
|
|
|
continue
|
|
|
if isinstance(raw_vec, str):
|
|
|
vec = json.loads(raw_vec)
|
|
|
elif isinstance(raw_vec, (bytes, bytearray)):
|
|
|
vec = json.loads(raw_vec.decode())
|
|
|
elif isinstance(raw_vec, list):
|
|
|
vec = raw_vec
|
|
|
else:
|
|
|
try:
|
|
|
vec = list(raw_vec)
|
|
|
except Exception:
|
|
|
continue
|
|
|
fvec = [float(v) if v is not None else 0.0 for v in vec]
|
|
|
party_vecs.setdefault(party, []).append(fvec)
|
|
|
|
|
|
# Average vectors per party
|
|
|
result: Dict[str, List[float]] = {}
|
|
|
for party, vecs in party_vecs.items():
|
|
|
result[party] = np.array(vecs).mean(axis=0).tolist()
|
|
|
return result
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load party axis scores")
|
|
|
return {}
|
|
|
finally:
|
|
|
try:
|
|
|
con.close()
|
|
|
except Exception:
|
|
|
pass
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Scree-plot laden…")
|
|
|
def load_scree_data(db_path: str) -> List[float]:
|
|
|
"""Return per-component importances (L2-norm per SVD dim), sorted descending.
|
|
|
|
|
|
Uses individual MP vectors from current_parliament (entity_id LIKE '%,%').
|
|
|
Computes L2-norm per SVD dimension across all MPs, then sorts descending
|
|
|
so the elbow shape is visible in the scree chart.
|
|
|
"""
|
|
|
try:
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
rows = con.execute(
|
|
|
"SELECT entity_id, vector FROM svd_vectors "
|
|
|
"WHERE entity_type='mp' AND window_id='current_parliament' "
|
|
|
"AND entity_id LIKE '%,%'"
|
|
|
).fetchall()
|
|
|
vectors: List[List[float]] = []
|
|
|
for entity_id, raw_vec in rows:
|
|
|
if isinstance(raw_vec, str):
|
|
|
vec = json.loads(raw_vec)
|
|
|
elif isinstance(raw_vec, (bytes, bytearray)):
|
|
|
vec = json.loads(raw_vec.decode())
|
|
|
elif isinstance(raw_vec, list):
|
|
|
vec = raw_vec
|
|
|
else:
|
|
|
try:
|
|
|
vec = list(raw_vec)
|
|
|
except Exception:
|
|
|
continue
|
|
|
fvec = [float(v) if v is not None else 0.0 for v in vec]
|
|
|
vectors.append(fvec)
|
|
|
if not vectors:
|
|
|
return []
|
|
|
n_dims = len(vectors[0])
|
|
|
importances: List[float] = []
|
|
|
for dim in range(n_dims):
|
|
|
col = [v[dim] for v in vectors if dim < len(v)]
|
|
|
l2 = sum(x**2 for x in col) ** 0.5
|
|
|
importances.append(l2)
|
|
|
return sorted(importances, reverse=True)
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load scree data")
|
|
|
return []
|
|
|
finally:
|
|
|
try:
|
|
|
con.close()
|
|
|
except Exception:
|
|
|
pass
|
|
|
|
|
|
|
|
|
def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
|
|
|
"""Render a scree plot showing relative SVD component importance.
|
|
|
|
|
|
Highlighted bars for the top-2 components (used in the compass); muted bars
|
|
|
for the rest. A cumulative-variance dashed line on the same y-axis helps
|
|
|
spot the elbow. A 50 % cumulative threshold line is drawn for reference.
|
|
|
|
|
|
Args:
|
|
|
importances: List of importance values sorted descending (from load_scree_data).
|
|
|
n_show: How many components to display (default: first 15).
|
|
|
"""
|
|
|
if not importances:
|
|
|
return
|
|
|
total = sum(importances) or 1.0
|
|
|
raw = importances[:n_show]
|
|
|
data = [v / total * 100 for v in raw]
|
|
|
ranks = list(range(1, len(data) + 1))
|
|
|
|
|
|
# Cumulative variance for the dashed overlay line
|
|
|
cumsum = []
|
|
|
running = 0.0
|
|
|
for v in data:
|
|
|
running += v
|
|
|
cumsum.append(running)
|
|
|
|
|
|
# Colour: first 2 bars highlighted (compass axes), rest muted
|
|
|
n_highlight = 2
|
|
|
bar_colours = [
|
|
|
"#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data))
|
|
|
]
|
|
|
|
|
|
fig = go.Figure()
|
|
|
|
|
|
# Bars
|
|
|
fig.add_trace(
|
|
|
go.Bar(
|
|
|
x=ranks,
|
|
|
y=data,
|
|
|
marker_color=bar_colours,
|
|
|
hovertemplate="As %{x}<br><b>%{y:.1f}%</b> van totaal<extra></extra>",
|
|
|
showlegend=False,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
# Cumulative variance line (dashed, warm amber)
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=ranks,
|
|
|
y=cumsum,
|
|
|
mode="lines+markers",
|
|
|
line={"color": "#F57C00", "width": 2, "dash": "dot"},
|
|
|
marker={"size": 5, "color": "#F57C00"},
|
|
|
hovertemplate="As %{x}<br>Cumulatief: <b>%{y:.1f}%</b><extra></extra>",
|
|
|
name="Cumulatief",
|
|
|
showlegend=True,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
# 50 % reference line
|
|
|
fig.add_hline(
|
|
|
y=50,
|
|
|
line_dash="dash",
|
|
|
line_color="#BDBDBD",
|
|
|
line_width=1,
|
|
|
annotation_text="50%",
|
|
|
annotation_position="right",
|
|
|
annotation_font_color="#9E9E9E",
|
|
|
annotation_font_size=11,
|
|
|
)
|
|
|
|
|
|
# Annotations on the top-2 bars showing their % value
|
|
|
for i in range(min(n_highlight, len(data))):
|
|
|
fig.add_annotation(
|
|
|
x=ranks[i],
|
|
|
y=data[i] + 0.3,
|
|
|
text=f"{data[i]:.1f}%",
|
|
|
showarrow=False,
|
|
|
font={"size": 11, "color": "#1565C0"},
|
|
|
yanchor="bottom",
|
|
|
)
|
|
|
|
|
|
fig.update_layout(
|
|
|
height=280,
|
|
|
margin={"l": 10, "r": 50, "t": 30, "b": 40},
|
|
|
title={
|
|
|
"text": "Belang per SVD-as",
|
|
|
"font": {"size": 13, "color": "#555555"},
|
|
|
"x": 0.02,
|
|
|
"xanchor": "left",
|
|
|
},
|
|
|
legend={
|
|
|
"orientation": "h",
|
|
|
"x": 0.5,
|
|
|
"xanchor": "center",
|
|
|
"y": 1.08,
|
|
|
"font": {"size": 11},
|
|
|
},
|
|
|
xaxis={
|
|
|
"title": {"text": "As (rang)", "font": {"size": 11}},
|
|
|
"tickmode": "linear",
|
|
|
"tick0": 1,
|
|
|
"dtick": 1,
|
|
|
"showline": False,
|
|
|
"showgrid": False,
|
|
|
},
|
|
|
yaxis={
|
|
|
"title": {"text": "% van totale variantie", "font": {"size": 11}},
|
|
|
"showline": False,
|
|
|
"showgrid": True,
|
|
|
"gridcolor": "#eeeeee",
|
|
|
"ticksuffix": "%",
|
|
|
"range": [0, max(cumsum) * 1.08],
|
|
|
},
|
|
|
plot_bgcolor="rgba(0,0,0,0)",
|
|
|
paper_bgcolor="rgba(0,0,0,0)",
|
|
|
bargap=0.25,
|
|
|
)
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
def _render_party_axis_chart(
|
|
|
party_scores: Dict[str, List[float]], comp_sel: int, theme: dict
|
|
|
) -> None:
|
|
|
"""Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
|
|
|
|
|
|
Each party is plotted at its score on a single horizontal axis (y=0).
|
|
|
When theme['flip'] is True the scores are negated so that the progressive/left
|
|
|
side always appears on the left of the chart.
|
|
|
"""
|
|
|
if not party_scores:
|
|
|
st.caption("_Partijdata niet beschikbaar voor deze as._")
|
|
|
return
|
|
|
|
|
|
axis_idx = comp_sel - 1 # 0-based index into the 50-dim vector
|
|
|
flip = theme.get("flip", False)
|
|
|
data: list[dict] = []
|
|
|
for party, vec in party_scores.items():
|
|
|
if axis_idx < len(vec):
|
|
|
score = vec[axis_idx]
|
|
|
if flip:
|
|
|
score = -score
|
|
|
data.append({"party": party, "score": score})
|
|
|
|
|
|
if not data:
|
|
|
st.caption("_Geen partijscores voor deze as._")
|
|
|
return
|
|
|
|
|
|
scores = [d["score"] for d in data]
|
|
|
parties = [d["party"] for d in data]
|
|
|
colours = [PARTY_COLOURS.get(p, "#9E9E9E") for p in parties]
|
|
|
hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
|
|
|
|
|
|
# Determine axis labels: left = progressive pole, right = conservative pole
|
|
|
pos_pole = theme.get("positive_pole", "")
|
|
|
neg_pole = theme.get("negative_pole", "")
|
|
|
left_label = pos_pole if flip else neg_pole
|
|
|
right_label = neg_pole if flip else pos_pole
|
|
|
|
|
|
fig = go.Figure()
|
|
|
# Baseline
|
|
|
x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
|
|
|
if x_min == x_max:
|
|
|
x_min, x_max = x_min - 1, x_max + 1
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=[x_min, x_max],
|
|
|
y=[0, 0],
|
|
|
mode="lines",
|
|
|
line={"color": "#cccccc", "width": 1},
|
|
|
hoverinfo="skip",
|
|
|
showlegend=False,
|
|
|
)
|
|
|
)
|
|
|
# Party markers
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=scores,
|
|
|
y=[0] * len(scores),
|
|
|
mode="markers+text",
|
|
|
text=parties,
|
|
|
textposition="top center",
|
|
|
marker={"size": 18, "color": colours},
|
|
|
hovertext=hover,
|
|
|
hoverinfo="text",
|
|
|
showlegend=False,
|
|
|
)
|
|
|
)
|
|
|
fig.update_layout(
|
|
|
height=160,
|
|
|
margin={"l": 10, "r": 10, "t": 10, "b": 30},
|
|
|
xaxis={
|
|
|
"title": f"← {left_label} | {right_label} →",
|
|
|
"showticklabels": False,
|
|
|
"showline": False,
|
|
|
"showgrid": False,
|
|
|
"zeroline": False,
|
|
|
},
|
|
|
yaxis={"visible": False, "range": [-1, 2]},
|
|
|
plot_bgcolor="rgba(0,0,0,0)",
|
|
|
paper_bgcolor="rgba(0,0,0,0)",
|
|
|
)
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Moties laden…")
|
|
|
def load_motions_df(db_path: str) -> pd.DataFrame:
|
|
|
"""Load the full motions table as a pandas DataFrame (read-only)."""
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
try:
|
|
|
df = con.execute(
|
|
|
"""
|
|
|
SELECT id, title, description, date, policy_area,
|
|
|
voting_results, layman_explanation,
|
|
|
winning_margin, controversy_score, url
|
|
|
FROM motions
|
|
|
"""
|
|
|
).fetchdf()
|
|
|
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
|
|
df["year"] = df["date"].dt.year
|
|
|
return df
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load motions")
|
|
|
return pd.DataFrame()
|
|
|
finally:
|
|
|
con.close()
|
|
|
|
|
|
|
|
|
def query_similar(
|
|
|
db_path: str,
|
|
|
source_motion_id: int,
|
|
|
vector_type: str = "fused",
|
|
|
top_k: int = 10,
|
|
|
) -> pd.DataFrame:
|
|
|
"""Return top-k similar motions from similarity_cache (read-only)."""
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
try:
|
|
|
rows = con.execute(
|
|
|
"""
|
|
|
SELECT sc.target_motion_id, sc.score, sc.window_id,
|
|
|
m.title, m.date, m.policy_area
|
|
|
FROM similarity_cache sc
|
|
|
JOIN motions m ON m.id = sc.target_motion_id
|
|
|
WHERE sc.source_motion_id = ?
|
|
|
AND sc.vector_type = ?
|
|
|
ORDER BY sc.score DESC
|
|
|
LIMIT ?
|
|
|
""",
|
|
|
[source_motion_id, vector_type, top_k],
|
|
|
).fetchdf()
|
|
|
return rows
|
|
|
except Exception:
|
|
|
logger.exception(
|
|
|
"Failed to query similarity cache for motion %s", source_motion_id
|
|
|
)
|
|
|
return pd.DataFrame()
|
|
|
finally:
|
|
|
con.close()
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Shared rendering helpers
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def _render_voting_results(voting_results_json) -> None:
|
|
|
"""Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.
|
|
|
|
|
|
The JSON is stored as {party_or_mp: vote} where vote is one of
|
|
|
'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
|
|
|
"""
|
|
|
if not voting_results_json:
|
|
|
return
|
|
|
try:
|
|
|
vdata = (
|
|
|
json.loads(voting_results_json)
|
|
|
if isinstance(voting_results_json, str)
|
|
|
else voting_results_json
|
|
|
)
|
|
|
if not isinstance(vdata, dict) or not vdata:
|
|
|
return
|
|
|
# Group {vote: [actor, ...]}
|
|
|
by_vote: Dict[str, List[str]] = {}
|
|
|
for actor, vote in vdata.items():
|
|
|
vote_str = str(vote).lower().strip()
|
|
|
by_vote.setdefault(vote_str, []).append(str(actor))
|
|
|
# Render in fixed order
|
|
|
vote_order = ["voor", "tegen", "onthouden", "afwezig"]
|
|
|
vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "⬜"}
|
|
|
rows_shown = False
|
|
|
for v in vote_order + [k for k in by_vote if k not in vote_order]:
|
|
|
actors = by_vote.get(v)
|
|
|
if not actors:
|
|
|
continue
|
|
|
emoji = vote_emoji.get(v, "▪️")
|
|
|
st.markdown(
|
|
|
f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
|
|
|
)
|
|
|
rows_shown = True
|
|
|
if not rows_shown:
|
|
|
st.caption("_Geen stemuitslag beschikbaar_")
|
|
|
except Exception:
|
|
|
pass
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 1: Politiek Kompas
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def build_compass_tab(db_path: str, window_size: str) -> None:
|
|
|
st.subheader("Politiek Kompas")
|
|
|
st.markdown(
|
|
|
"2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
|
|
|
)
|
|
|
|
|
|
# Compass always uses annual windows regardless of the sidebar window_size setting.
|
|
|
positions_by_window, axis_def = load_positions(db_path, "annual")
|
|
|
if not positions_by_window:
|
|
|
st.warning(
|
|
|
"Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
|
|
|
)
|
|
|
return
|
|
|
|
|
|
party_map = load_party_map(db_path)
|
|
|
active_mps = load_active_mps(db_path)
|
|
|
|
|
|
# Sort windows: year windows first (ascending), current_parliament last.
|
|
|
year_windows = sorted(w for w in positions_by_window if w != "current_parliament")
|
|
|
has_current = "current_parliament" in positions_by_window
|
|
|
windows = year_windows + (["current_parliament"] if has_current else [])
|
|
|
|
|
|
# Motion counts per year — sparse years get a warning label.
|
|
|
_SPARSE_YEARS = {"2016", "2017", "2018"}
|
|
|
|
|
|
def _window_label(w: str) -> str:
|
|
|
if w == "current_parliament":
|
|
|
return "Huidig parlement"
|
|
|
if w in _SPARSE_YEARS:
|
|
|
return f"{w} ⚠️"
|
|
|
return w
|
|
|
|
|
|
col1, col2 = st.columns([3, 1])
|
|
|
with col2:
|
|
|
window_idx = st.selectbox(
|
|
|
"Jaar",
|
|
|
options=windows,
|
|
|
index=len(windows) - 1, # default: current_parliament
|
|
|
format_func=_window_label,
|
|
|
)
|
|
|
level = st.radio(
|
|
|
"Weergave",
|
|
|
options=["Kamerleden", "Partijen"],
|
|
|
index=0,
|
|
|
horizontal=True,
|
|
|
)
|
|
|
min_mps = st.number_input(
|
|
|
"Min. Kamerleden per partij",
|
|
|
min_value=1,
|
|
|
max_value=20,
|
|
|
value=3,
|
|
|
step=1,
|
|
|
help="Partijen met minder dan dit aantal zetels worden niet weergegeven.",
|
|
|
)
|
|
|
|
|
|
pos = positions_by_window.get(window_idx, {})
|
|
|
if not pos:
|
|
|
st.info(f"Geen data voor venster {window_idx}")
|
|
|
return
|
|
|
|
|
|
# For current_parliament, restrict to MPs who are still seated (tot_en_met IS NULL).
|
|
|
# Historical windows include all MPs active at the time — no restriction needed.
|
|
|
if window_idx == "current_parliament":
|
|
|
pos = {mp: xy for mp, xy in pos.items() if mp in active_mps}
|
|
|
|
|
|
# Deduplicate MPs whose names appear both with and without a parenthetical first name,
|
|
|
# e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and
|
|
|
# average positions if both variants are present.
|
|
|
def _strip_paren(name: str) -> str:
|
|
|
return re.sub(r"\s*\([^)]*\)", "", name).strip()
|
|
|
|
|
|
deduped: Dict[str, Tuple[float, float]] = {}
|
|
|
for name, (x, y) in pos.items():
|
|
|
base = _strip_paren(name)
|
|
|
if base in deduped:
|
|
|
ox, oy = deduped[base]
|
|
|
deduped[base] = ((ox + x) / 2, (oy + y) / 2)
|
|
|
else:
|
|
|
deduped[base] = (x, y)
|
|
|
pos = deduped
|
|
|
|
|
|
rows = []
|
|
|
for name, (x, y) in pos.items():
|
|
|
party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown")
|
|
|
rows.append({"name": name, "x": x, "y": y, "party": party})
|
|
|
|
|
|
df_pos = pd.DataFrame(rows)
|
|
|
|
|
|
# Drop parties below the minimum MP threshold (unreliable centroids).
|
|
|
party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts()
|
|
|
valid_parties = set(party_counts[party_counts >= min_mps].index)
|
|
|
df_pos = df_pos[df_pos["party"].isin(valid_parties)]
|
|
|
|
|
|
if df_pos.empty:
|
|
|
st.info("Geen partijen met genoeg Kamerleden voor dit venster.")
|
|
|
return
|
|
|
|
|
|
if level == "Partijen":
|
|
|
# Aggregate to party centroids
|
|
|
df_party = df_pos.groupby("party", as_index=False).agg(
|
|
|
x=("x", "mean"), y=("y", "mean"), n=("name", "count")
|
|
|
)
|
|
|
df_party["name"] = df_party["party"]
|
|
|
colour_map = {
|
|
|
p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique()
|
|
|
}
|
|
|
fig = px.scatter(
|
|
|
df_party,
|
|
|
x="x",
|
|
|
y="y",
|
|
|
color="party",
|
|
|
text="party",
|
|
|
hover_name="party",
|
|
|
hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True},
|
|
|
color_discrete_map=colour_map,
|
|
|
title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",
|
|
|
labels={
|
|
|
"x": "Links ← → Rechts",
|
|
|
"y": "Progressief ↑ / Conservatief ↓",
|
|
|
"n": "Kamerleden",
|
|
|
},
|
|
|
)
|
|
|
fig.update_traces(textposition="top center", marker_size=14)
|
|
|
else:
|
|
|
colour_map = {
|
|
|
p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()
|
|
|
}
|
|
|
fig = px.scatter(
|
|
|
df_pos,
|
|
|
x="x",
|
|
|
y="y",
|
|
|
color="party",
|
|
|
hover_name="name",
|
|
|
hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
|
|
|
color_discrete_map=colour_map,
|
|
|
title=f"Politiek Kompas — {_window_label(window_idx)}",
|
|
|
labels={"x": "Links ← → Rechts", "y": "Progressief ↑ / Conservatief ↓"},
|
|
|
)
|
|
|
|
|
|
fig.update_layout(
|
|
|
height=600,
|
|
|
legend_title_text="Partij",
|
|
|
xaxis={"range": [-1, 1]},
|
|
|
yaxis={"range": [-0.6, 0.6]},
|
|
|
)
|
|
|
|
|
|
with col1:
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 2: Partij Trajectories
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def build_trajectories_tab(db_path: str, window_size: str) -> None:
|
|
|
st.subheader("Partij Trajectories")
|
|
|
st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")
|
|
|
|
|
|
positions_by_window, _ = load_positions(db_path, window_size)
|
|
|
if not positions_by_window:
|
|
|
st.warning("Geen positiedata beschikbaar.")
|
|
|
return
|
|
|
|
|
|
party_map = load_party_map(db_path)
|
|
|
windows = sorted(positions_by_window.keys())
|
|
|
|
|
|
# Compute party centroids per window
|
|
|
centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
|
|
|
all_parties: set = set()
|
|
|
for wid in windows:
|
|
|
pos = positions_by_window.get(wid, {})
|
|
|
per_party: Dict[str, List[Tuple[float, float]]] = {}
|
|
|
for mp_name, (x, y) in pos.items():
|
|
|
party = party_map.get(mp_name, "Unknown")
|
|
|
if party == "Unknown":
|
|
|
continue
|
|
|
per_party.setdefault(party, []).append((x, y))
|
|
|
for party, coords in per_party.items():
|
|
|
all_parties.add(party)
|
|
|
xs = [c[0] for c in coords]
|
|
|
ys = [c[1] for c in coords]
|
|
|
centroids.setdefault(party, {})[wid] = (
|
|
|
float(np.mean(xs)),
|
|
|
float(np.mean(ys)),
|
|
|
)
|
|
|
|
|
|
all_parties_sorted = sorted(all_parties)
|
|
|
|
|
|
# Default: show CDA, D66, VVD — the three parties that span the political centre
|
|
|
default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties]
|
|
|
if not default_parties:
|
|
|
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
|
|
|
if not default_parties:
|
|
|
default_parties = all_parties_sorted[:6]
|
|
|
|
|
|
selected_parties = st.multiselect(
|
|
|
"Selecteer partijen",
|
|
|
options=all_parties_sorted,
|
|
|
default=default_parties,
|
|
|
)
|
|
|
|
|
|
fig = go.Figure()
|
|
|
for party in selected_parties:
|
|
|
if party not in centroids:
|
|
|
continue
|
|
|
wids_sorted = sorted(centroids[party].keys())
|
|
|
xs = [centroids[party][w][0] for w in wids_sorted]
|
|
|
ys = [centroids[party][w][1] for w in wids_sorted]
|
|
|
colour = PARTY_COLOURS.get(party, "#9E9E9E")
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=xs,
|
|
|
y=ys,
|
|
|
mode="lines+markers",
|
|
|
name=party,
|
|
|
text=wids_sorted, # full window ID for hover
|
|
|
line=dict(color=colour, shape="spline", smoothing=1.3),
|
|
|
marker=dict(color=colour, size=8),
|
|
|
hovertemplate=(
|
|
|
f"<b>{party}</b><br>"
|
|
|
"venster: %{text}<br>"
|
|
|
"x: %{x:.3f}<br>y: %{y:.3f}<extra></extra>"
|
|
|
),
|
|
|
)
|
|
|
)
|
|
|
|
|
|
fig.update_layout(
|
|
|
title="Partij trajectories",
|
|
|
xaxis_title="Links ← → Rechts",
|
|
|
yaxis_title="Progressief ↑ / Conservatief ↓",
|
|
|
height=600,
|
|
|
legend_title_text="Partij",
|
|
|
)
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 3: Motie Zoeken
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def build_search_tab(db_path: str, show_rejected: bool) -> None:
|
|
|
st.subheader("Motie Zoeken")
|
|
|
|
|
|
df = load_motions_df(db_path)
|
|
|
if df.empty:
|
|
|
st.warning("Geen moties beschikbaar.")
|
|
|
return
|
|
|
|
|
|
if not show_rejected:
|
|
|
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
|
|
|
|
|
|
# Controls
|
|
|
col1, col2, col3 = st.columns([2, 1, 1])
|
|
|
with col1:
|
|
|
query = st.text_input(
|
|
|
"Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
|
|
|
)
|
|
|
with col2:
|
|
|
years = sorted(df["year"].dropna().astype(int).unique().tolist())
|
|
|
if years:
|
|
|
year_range = st.select_slider(
|
|
|
"Jaar", options=years, value=(years[0], years[-1])
|
|
|
)
|
|
|
else:
|
|
|
year_range = (2019, 2024)
|
|
|
with col3:
|
|
|
min_controversy = st.slider(
|
|
|
"Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
|
|
|
)
|
|
|
|
|
|
# Apply filters in-memory
|
|
|
working = df.copy()
|
|
|
working = working[
|
|
|
(working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
|
|
|
]
|
|
|
if min_controversy > 0:
|
|
|
working = working[working["controversy_score"] >= min_controversy]
|
|
|
if query:
|
|
|
q = query.lower()
|
|
|
mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
|
|
|
working = working[mask]
|
|
|
|
|
|
working = working.sort_values(by="controversy_score", ascending=False)
|
|
|
st.caption(f"{len(working)} resultaten (top 50 getoond)")
|
|
|
|
|
|
for _, row in working.head(50).iterrows():
|
|
|
title = row.get("title") or f"Motie #{row['id']}"
|
|
|
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
|
|
|
controversy = row.get("controversy_score") or 0
|
|
|
with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
|
|
|
cols = st.columns(3)
|
|
|
cols[0].metric("Controverse", f"{controversy:.2f}")
|
|
|
cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
|
|
|
cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")
|
|
|
|
|
|
# Voting breakdown
|
|
|
_render_voting_results(row.get("voting_results"))
|
|
|
|
|
|
# Link to original motion
|
|
|
url = row.get("url")
|
|
|
if url and str(url).startswith("http"):
|
|
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
|
|
|
|
|
|
# Similar motions
|
|
|
sim = query_similar(db_path, int(row["id"]), top_k=5)
|
|
|
if not sim.empty:
|
|
|
st.markdown("**Vergelijkbare moties:**")
|
|
|
for _, s in sim.iterrows():
|
|
|
s_date = (
|
|
|
pd.to_datetime(s["date"]).strftime("%Y")
|
|
|
if pd.notna(s.get("date"))
|
|
|
else ""
|
|
|
)
|
|
|
st.markdown(
|
|
|
f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
|
|
|
)
|
|
|
else:
|
|
|
st.caption("_Nog geen vergelijkbare moties beschikbaar_")
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 4: Motie Browser
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def build_browser_tab(db_path: str, show_rejected: bool) -> None:
|
|
|
st.subheader("Motie Browser")
|
|
|
|
|
|
df = load_motions_df(db_path)
|
|
|
if df.empty:
|
|
|
st.warning("Geen moties beschikbaar.")
|
|
|
return
|
|
|
|
|
|
if not show_rejected:
|
|
|
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
|
|
|
|
|
|
# Controls
|
|
|
col1, col2, col3 = st.columns(3)
|
|
|
with col1:
|
|
|
years = sorted(df["year"].dropna().astype(int).unique().tolist())
|
|
|
year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
|
|
|
with col2:
|
|
|
min_controversy_b = st.slider(
|
|
|
"Min. controverse",
|
|
|
min_value=0.0,
|
|
|
max_value=1.0,
|
|
|
value=0.0,
|
|
|
step=0.05,
|
|
|
key="browser_controversy",
|
|
|
)
|
|
|
with col3:
|
|
|
sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])
|
|
|
|
|
|
# Filter
|
|
|
working = df.copy()
|
|
|
if year_filter != "(Alle)":
|
|
|
working = working[working["year"] == int(year_filter)]
|
|
|
if min_controversy_b > 0:
|
|
|
working = working[working["controversy_score"] >= min_controversy_b]
|
|
|
|
|
|
sort_map = {
|
|
|
"Datum (nieuw)": ("date", False),
|
|
|
"Controverse": ("controversy_score", False),
|
|
|
"Marge": ("winning_margin", True),
|
|
|
}
|
|
|
sort_col, sort_asc = sort_map[sort_by]
|
|
|
working = working.sort_values(by=sort_col, ascending=sort_asc)
|
|
|
|
|
|
# Display table
|
|
|
display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
|
|
|
available_display = [c for c in display_cols if c in working.columns]
|
|
|
st.dataframe(
|
|
|
working[available_display].reset_index(drop=True),
|
|
|
use_container_width=True,
|
|
|
height=350,
|
|
|
)
|
|
|
|
|
|
st.divider()
|
|
|
|
|
|
# Detail panel
|
|
|
st.markdown("**Detail weergave** — vul een motie-ID in:")
|
|
|
sel_id = st.number_input(
|
|
|
"Motie ID",
|
|
|
min_value=int(working["id"].min()) if not working.empty else 1,
|
|
|
max_value=int(working["id"].max()) if not working.empty else 99999,
|
|
|
value=int(working["id"].iloc[0]) if not working.empty else 1,
|
|
|
step=1,
|
|
|
)
|
|
|
motion_row = df[df["id"] == sel_id]
|
|
|
if not motion_row.empty:
|
|
|
row = motion_row.iloc[0]
|
|
|
st.markdown(f"### {row.get('title') or 'Onbekend'}")
|
|
|
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
|
|
|
st.caption(
|
|
|
f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
|
|
|
)
|
|
|
|
|
|
# Link to original source
|
|
|
url = row.get("url")
|
|
|
if url and str(url).startswith("http"):
|
|
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
|
|
|
|
|
|
# Voting breakdown
|
|
|
st.markdown("**Stemuitslag:**")
|
|
|
_render_voting_results(row.get("voting_results"))
|
|
|
|
|
|
# Similar motions
|
|
|
sim = query_similar(db_path, int(sel_id), top_k=10)
|
|
|
if not sim.empty:
|
|
|
st.markdown("**Vergelijkbare moties:**")
|
|
|
st.dataframe(
|
|
|
sim[["title", "score", "date", "policy_area"]],
|
|
|
use_container_width=True,
|
|
|
)
|
|
|
else:
|
|
|
st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")
|
|
|
|
|
|
|
|
|
def build_svd_components_tab(db_path: str) -> None:
|
|
|
"""New tab: show top motions contributing to top SVD components.
|
|
|
|
|
|
Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
|
|
|
for components 1..10 with theme labels/explanations and a detail pane per motion.
|
|
|
"""
|
|
|
# Political polarisation themes per SVD component (1-indexed, window=2025)
|
|
|
# Produced by per-axis analysis of all 10 unique top motions (zero cross-axis overlap).
|
|
|
SVD_THEMES: dict[int, dict[str, str]] = {
|
|
|
1: {
|
|
|
"label": "Links-rechts hoofdas: progressief versus conservatief-nationalistisch",
|
|
|
"explanation": (
|
|
|
"De dominante dimensie van het parlement: partijen aan de linkerkant (PvdD, GL-PvdA, "
|
|
|
"DENK, SP) stemmen progressief — voor sociale voorzieningen, klimaat, internationale "
|
|
|
"solidariteit — terwijl partijen aan de rechterkant (PVV, NSC, BBB, SGP) inzetten op "
|
|
|
"nationaal belang, migratiebeheer en conservatieve waarden. Linkse moties omvatten "
|
|
|
"boycots van Israëlische defensiebedrijven, huurverlaging en het oprichten van "
|
|
|
"zorgbuurthuizen; rechtse moties gaan over NAVO-verplichtingen, juridische ruimte voor "
|
|
|
"drones en gaswinningsprojecten. Dit is de klassieke links-rechts tegenstelling die "
|
|
|
"het meeste verschil in stemgedrag verklaart."
|
|
|
),
|
|
|
"positive_pole": "Nationalistisch-conservatief: PVV, NSC, BBB, SGP, VVD",
|
|
|
"negative_pole": "Progressief-links: PvdD, GL-PvdA, DENK, SP",
|
|
|
"flip": False,
|
|
|
},
|
|
|
2: {
|
|
|
"label": "PVV/FVD populistisch isolationisme versus het overige parlement",
|
|
|
"explanation": (
|
|
|
"Deze as isoleert PVV en FVD van alle andere partijen. Aan de positieve kant staan "
|
|
|
"moties die artsen vrijpleiten die hydroxychloroquine voorschreven, Syriërs direct "
|
|
|
"willen terugsturen, geen geld aan Jordanië willen geven en de richtlijn tijdelijke "
|
|
|
"bescherming voor Oekraïners willen beëindigen. Aan de negatieve kant staan "
|
|
|
"mainstream-moties van CU, CDA, VVD en NSC over digitale toegankelijkheid, "
|
|
|
"jongerenzorg en zorgstandaarden — partijen die in de positieve ruimte van as 1 "
|
|
|
"zitten maar hier op één lijn staan met links. Dit is geen links-rechts as maar een "
|
|
|
"populistisch-isolationisme-as: PVV en FVD vormen een eigen cluster dat los staat "
|
|
|
"van de rest van het politieke spectrum."
|
|
|
),
|
|
|
"positive_pole": "PVV/FVD populistisch isolationisme: anti-EU, anti-Oekraïne, antiwetenschap",
|
|
|
"negative_pole": "Gehele overige parlement: mainstream links én rechts",
|
|
|
"flip": False,
|
|
|
},
|
|
|
3: {
|
|
|
"label": "Sociaal-economisch links versus marktliberaal en landelijk rechts",
|
|
|
"explanation": (
|
|
|
"Deze as weerspiegelt de klassieke sociaal-economische breuklijn. Aan de linkerkant "
|
|
|
"staan moties van SP die bezuinigingen op zorg en gemeentefonds willen schrappen, "
|
|
|
"winstuitkeringen in de zorg willen verbieden en instemmingsrecht bij "
|
|
|
"ziekenhuisfusies eisen — allemaal gericht op bescherming van publieke voorzieningen. "
|
|
|
"Aan de rechterkant staan moties van BBB (wolvenzendering), VVD (langetermijn"
|
|
|
"investeerders zorg, controversieel verklaren) en NSC (belastingplichtigen 2023/2024) "
|
|
|
"die een marktgerichtere koers voorstaan of agrarische belangen verdedigen. SP scoort "
|
|
|
"sterk links, VVD en NSC sterk rechts."
|
|
|
),
|
|
|
"positive_pole": "Sociaal-economisch links: publieke zorg, tegengaan marktwerking",
|
|
|
"negative_pole": "Marktliberaal en agrarisch-rechts: VVD, NSC, BBB",
|
|
|
"flip": True,
|
|
|
},
|
|
|
4: {
|
|
|
"label": "Christelijk-sociaal centrum versus populistisch-soevereinistisch",
|
|
|
"explanation": (
|
|
|
"Deze as scheidt christelijk-sociale en gematigde centrumpartijen (CU, CDA, D66) van "
|
|
|
"populistisch-soevereinistische partijen (FVD, NSC). Aan de linkerkant staan "
|
|
|
"CU-moties over vaderbetrokkenheid, long covid vergoeding en internationale "
|
|
|
"samenwerking; aan de rechterkant FVD-moties over het verbieden van pleegzorg bij "
|
|
|
"paren van hetzelfde geslacht, een migratiesaldo van max 60.000 en het verlaten van "
|
|
|
"de WHO. NSC scoort sterk rechts op deze as door amendementen die evaluaties en "
|
|
|
"grondwetswijzigingen (artikel 23) willen blokkeren. Dit is een cultureel-"
|
|
|
"institutionele as: vertrouwen in internationale instituties en pluralisme tegenover "
|
|
|
"soevereinistisch-traditioneel wantrouwen."
|
|
|
),
|
|
|
"positive_pole": "Christelijk-sociaal en institutioneel: CU, CDA, D66",
|
|
|
"negative_pole": "Populistisch-soevereinistisch: FVD, NSC-rechtsflank",
|
|
|
"flip": True,
|
|
|
},
|
|
|
5: {
|
|
|
"label": "Christelijk-conservatief en ruraal sociaal versus seculier-progressief",
|
|
|
"explanation": (
|
|
|
"Deze as reflecteert de tegenstelling tussen christelijk-conservatieve en ruraal-"
|
|
|
"sociale partijen enerzijds (NSC, CU, SGP, CDA) en seculier-progressieve partijen "
|
|
|
"anderzijds (D66, GL-PvdA, SP). Rechtse moties omvatten vrijwilligers in "
|
|
|
"schuldhulpverlening ondersteunen, maatschappelijke diensttijd koppelen aan "
|
|
|
"arbeidsmarktafstand en WW-duur alleen verkorten met omscholing. Linkse moties "
|
|
|
"bepleiten erkenning van meerouderschap, het recht op abortus in het EU-handvest "
|
|
|
"en een nationaal coördinator buitenlandse beïnvloeding. NSC en CU scoren sterk "
|
|
|
"rechts; D66 en GL-PvdA sterk links."
|
|
|
),
|
|
|
"positive_pole": "Christelijk-conservatief en ruraal: NSC, CU, SGP, CDA",
|
|
|
"negative_pole": "Seculier-progressief: D66, GL-PvdA, SP",
|
|
|
"flip": False,
|
|
|
},
|
|
|
6: {
|
|
|
"label": "Energiepragmatisme en liberale fiscaliteit versus klimaatactivisme en anti-discriminatie",
|
|
|
"explanation": (
|
|
|
"Aan de rechterkant staan moties die kernenergie als CO₂-arm alternatief willen "
|
|
|
"erkennen op COP30, lng-capaciteit prefereren boven vulgraadverplichtingen en "
|
|
|
"discriminatiemeldpunten willen inventariseren (JA21). Aan de linkerkant staan "
|
|
|
"moties die fossiele industrie van klimaatconferenties willen weren (GL), de "
|
|
|
"integratieparadox willen meenemen in beleid en aanvallen van Israël op Libanon "
|
|
|
"veroordelen (DENK, SP). FVD en JA21 scoren sterk rechts; GL-PvdA, DENK en SP "
|
|
|
"sterk links. Dit is een combinatie van energie-ideologie en culturele polarisatie "
|
|
|
"rondom klimaat, integratie en buitenlandspolitiek."
|
|
|
),
|
|
|
"positive_pole": "Energiepragmatisme, kernenergie, liberale fiscaliteit: FVD, JA21, SGP, CU",
|
|
|
"negative_pole": "Klimaatactivisme, anti-discriminatie en internationale verantwoordelijkheid: GL, DENK, SP",
|
|
|
"flip": False,
|
|
|
},
|
|
|
7: {
|
|
|
"label": "Pragmatisch coalitiebeleid versus ecologisch-progressief en religieuze bescherming",
|
|
|
"explanation": (
|
|
|
"Aan de rechterkant staan pragmatische coalitiemoties: voedselprijzen inzichtelijk "
|
|
|
"maken (PVV/CU), papieren schoolboeken behouden (CDA), invoeringstoets voor mkb "
|
|
|
"(NSC) en het controversieel verklaren van bepaalde dossiers (VVD). Aan de "
|
|
|
"linkerkant staan progressief-ecologische moties: een landelijk stookverbod (PvdD), "
|
|
|
"verbranding van religieuze geschriften strafbaar stellen (DENK), chroom-6 "
|
|
|
"schadevergoedingen (SP/D66) en tegenhouden van nieuwe gaswinning (SP). De "
|
|
|
"partijscores zijn smal maar consistent: PvdD, DENK en SP links; CU, NSC en CDA "
|
|
|
"rechts."
|
|
|
),
|
|
|
"positive_pole": "Ecologisch-progressief en religieuze bescherming: PvdD, DENK, SP",
|
|
|
"negative_pole": "Pragmatisch coalitiebeleid: PVV, CU, NSC, CDA, VVD",
|
|
|
"flip": True,
|
|
|
},
|
|
|
8: {
|
|
|
"label": "Pro-Europees defensie en investering versus nationaal-populistisch wantrouwen",
|
|
|
"explanation": (
|
|
|
"Aan de rechterkant staan moties van D66, Volt en CDA die NAVO-militaire mobiliteit "
|
|
|
"in het Schengengebied regelen, 35% van defensiematerieel Europees willen inkopen "
|
|
|
"en een Nationaal Groeifonds-ronde willen lanceren — allemaal pro-Europees en "
|
|
|
"investerings-georiënteerd. Aan de linkerkant staan moties over coronastrategie "
|
|
|
"en oversterfte (PVV/BBB), ketenverantwoordelijkheid bij toeslagen (DENK) en "
|
|
|
"vraagresponsovereenkomsten zonder opzegtermijn (PVV). SP scoort sterk links door "
|
|
|
"wantrouwen jegens institutionele processen; D66 en Volt sterk rechts door hun "
|
|
|
"pro-Europese en investeringsgerichte koers."
|
|
|
),
|
|
|
"positive_pole": "Pro-Europees, NAVO en investering: D66, Volt, CDA",
|
|
|
"negative_pole": "Nationaal-populistisch wantrouwen: PVV, SP-controlereflex, DENK",
|
|
|
"flip": False,
|
|
|
},
|
|
|
9: {
|
|
|
"label": "Gereformeerd-sociaal centrum versus progressief regulerend",
|
|
|
"explanation": (
|
|
|
"Aan de linkerkant staan moties van NSC, CU en SGP over naleving van de Financiële-"
|
|
|
"verhoudingswet, beperking van arbeidsmigratie binnen de EU, een nieuwe "
|
|
|
"opleidingsplek voor tandartsen en een actieplan tegen misbruik van "
|
|
|
"hallucinerende geneesmiddelen. Aan de rechterkant staan moties van PvdD, GL "
|
|
|
"en D66: moratorium op geitenstallen, verbod op gokadvertenties in zoekmachines, "
|
|
|
"verduidelijking van voorlopige hechtenis en leegstandbelasting voor woningen. "
|
|
|
"CU en SGP scoren sterk links; PvdD en D66 sterk rechts. Dit is een as van "
|
|
|
"gereformeerd-sociaal pragmatisme tegenover progressieve regulering."
|
|
|
),
|
|
|
"positive_pole": "Gereformeerd-sociaal centrum: NSC, CU, SGP — naleving, arbeidsmarkt, volksgezondheid",
|
|
|
"negative_pole": "Progressief regulerend: PvdD, GL, D66 — milieu, wonen, rechtsstaat",
|
|
|
"flip": True,
|
|
|
},
|
|
|
10: {
|
|
|
"label": "Residuele as: individuele dienstverlening versus collectieve handhaving",
|
|
|
"explanation": (
|
|
|
"De tiende as vangt kleine resterende variantie op na de eerste negen. Aan de "
|
|
|
"linkerkant staan moties die individuele dienstverlening verbeteren: minder "
|
|
|
"tijdsintensieve inspectiebezoeken (VVD), een persoonlijk dossier voor ouders "
|
|
|
"binnen één maand (SP), tegemoetkoming arbeidsongeschikten in stand houden (SP) "
|
|
|
"en een verlaging van de leeftijd voor kindgesprekken (PVV). Aan de rechterkant "
|
|
|
"staan handhavings- en regelgevingsmoties: aangifteplicht voor scholen bij "
|
|
|
"veiligheidsincidenten (VVD), rookvrije auto's met kinderen (NSC/CDA) en "
|
|
|
"beloningsgeld voor tipgevers op de Nationale Opsporingslijst (VVD). De scores "
|
|
|
"zijn klein (max ±6,5) en de coalitie op elke pool is gemengd — dit is geen "
|
|
|
"duidelijke ideologische as maar een restfactor."
|
|
|
),
|
|
|
"positive_pole": "Individuele dienstverlening en ontzorging: VVD, SP, PVV-elementen",
|
|
|
"negative_pole": "Collectieve handhaving en regelgeving: VVD-handhavingsflank, CDA, NSC",
|
|
|
"flip": True,
|
|
|
},
|
|
|
}
|
|
|
|
|
|
st.subheader("🔬 SVD Assen — politieke polarisatiethema's")
|
|
|
st.markdown(
|
|
|
"Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
|
|
|
"van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
|
|
|
"het spanningsveld dat de as beschrijft."
|
|
|
)
|
|
|
|
|
|
# Scree plot: relative importance of each SVD component
|
|
|
scree_importances = load_scree_data(db_path)
|
|
|
if scree_importances:
|
|
|
st.markdown(
|
|
|
"**Scree-plot** — het relatieve gewicht van elke SVD-as. "
|
|
|
"De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
|
|
|
"latere assen zijn subtieler maar politiek nog steeds betekenisvol."
|
|
|
)
|
|
|
_render_scree_plot(scree_importances)
|
|
|
|
|
|
json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
|
|
|
if not os.path.exists(json_path):
|
|
|
st.warning(
|
|
|
f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
|
|
|
)
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
with open(json_path, "r", encoding="utf-8") as fh:
|
|
|
j = json.load(fh)
|
|
|
except Exception as e:
|
|
|
st.error(f"Failed to load SVD importance JSON: {e}")
|
|
|
return
|
|
|
|
|
|
window = j.get("window")
|
|
|
rows = j.get("rows", [])
|
|
|
if not rows:
|
|
|
st.info("Geen top-moties in dataset")
|
|
|
return
|
|
|
|
|
|
st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")
|
|
|
|
|
|
# Build mapping component -> list of motions (deduplicate by motion_id per component)
|
|
|
comp_map: dict[int, list] = {}
|
|
|
for r in rows:
|
|
|
comp = int(r.get("component", 0))
|
|
|
bucket = comp_map.setdefault(comp, [])
|
|
|
existing_ids = {m.get("motion_id") for m in bucket}
|
|
|
if r.get("motion_id") not in existing_ids:
|
|
|
bucket.append(r)
|
|
|
|
|
|
comp_options = sorted(comp_map.keys())
|
|
|
|
|
|
# Build display labels for selectbox: "As 1 — Regulering vs. status-quo"
|
|
|
def _comp_label(c: int) -> str:
|
|
|
theme = SVD_THEMES.get(c, {})
|
|
|
lbl = theme.get("label", "")
|
|
|
return f"As {c} — {lbl}" if lbl else f"As {c}"
|
|
|
|
|
|
comp_display = [_comp_label(c) for c in comp_options]
|
|
|
comp_sel_idx = st.selectbox(
|
|
|
"Selecteer SVD-as",
|
|
|
options=list(range(len(comp_options))),
|
|
|
format_func=lambda i: comp_display[i],
|
|
|
index=0,
|
|
|
)
|
|
|
comp_sel = comp_options[comp_sel_idx]
|
|
|
|
|
|
# Show theme explanation
|
|
|
theme = SVD_THEMES.get(comp_sel, {})
|
|
|
if theme:
|
|
|
st.info(f"**{theme['label']}** — {theme['explanation']}")
|
|
|
|
|
|
motions = comp_map.get(comp_sel, [])
|
|
|
|
|
|
# Party axis chart
|
|
|
party_scores = load_party_axis_scores(db_path)
|
|
|
_render_party_axis_chart(party_scores, comp_sel, theme)
|
|
|
|
|
|
# Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
|
|
|
motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
|
|
|
motion_details: Dict[int, tuple] = {}
|
|
|
if motion_ids:
|
|
|
# Defensively convert motion_ids to integers, skipping invalid values
|
|
|
ids_int: List[int] = []
|
|
|
for mid in motion_ids:
|
|
|
try:
|
|
|
ids_int.append(int(mid))
|
|
|
except Exception:
|
|
|
logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
|
|
|
|
|
|
# If no valid ids remain, skip the DB query
|
|
|
if ids_int:
|
|
|
con = None
|
|
|
try:
|
|
|
placeholders = ", ".join("?" for _ in ids_int)
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
db_rows = con.execute(
|
|
|
f"SELECT id, title, date, policy_area, url, body_text, voting_results "
|
|
|
f"FROM motions WHERE id IN ({placeholders})",
|
|
|
ids_int,
|
|
|
).fetchall()
|
|
|
motion_details = {r[0]: r for r in db_rows}
|
|
|
except Exception:
|
|
|
logger.exception("Failed to batch-fetch motion details")
|
|
|
finally:
|
|
|
if con:
|
|
|
con.close()
|
|
|
|
|
|
# Split motions by pole sign
|
|
|
pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
|
|
|
neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
|
|
|
|
|
|
flip = theme.get("flip", False) if theme else False
|
|
|
pos_pole = theme.get("positive_pole", "") if theme else ""
|
|
|
neg_pole = theme.get("negative_pole", "") if theme else ""
|
|
|
|
|
|
# Determine which pole goes left (progressive) and which goes right
|
|
|
if flip:
|
|
|
left_pole, right_pole = pos_pole, neg_pole
|
|
|
left_motions, right_motions = pos_motions, neg_motions
|
|
|
left_arrow, right_arrow = "▲", "▼"
|
|
|
else:
|
|
|
left_pole, right_pole = neg_pole, pos_pole
|
|
|
left_motions, right_motions = neg_motions, pos_motions
|
|
|
left_arrow, right_arrow = "▼", "▲"
|
|
|
|
|
|
lcol, rcol = st.columns(2)
|
|
|
|
|
|
with lcol:
|
|
|
st.markdown(f"**← {left_pole}**")
|
|
|
for m in left_motions:
|
|
|
mid = m.get("motion_id")
|
|
|
raw_title = m.get("title") or f"Motie #{mid}"
|
|
|
with st.expander(f"{left_arrow} {raw_title}"):
|
|
|
row = motion_details.get(int(mid)) if mid is not None else None
|
|
|
if row:
|
|
|
try:
|
|
|
date_str = str(row[2])[:10]
|
|
|
except Exception:
|
|
|
date_str = "?"
|
|
|
st.caption(f"📅 {date_str} | {row[3] or '—'}")
|
|
|
if row[4] and str(row[4]).startswith("http"):
|
|
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
|
|
|
if row[5]:
|
|
|
with st.expander("Toon volledige tekst"):
|
|
|
st.write(row[5])
|
|
|
_render_voting_results(row[6])
|
|
|
else:
|
|
|
st.caption("_Geen metadata beschikbaar_")
|
|
|
|
|
|
with rcol:
|
|
|
st.markdown(f"**{right_pole} →**")
|
|
|
for m in right_motions:
|
|
|
mid = m.get("motion_id")
|
|
|
raw_title = m.get("title") or f"Motie #{mid}"
|
|
|
with st.expander(f"{right_arrow} {raw_title}"):
|
|
|
row = motion_details.get(int(mid)) if mid is not None else None
|
|
|
if row:
|
|
|
try:
|
|
|
date_str = str(row[2])[:10]
|
|
|
except Exception:
|
|
|
date_str = "?"
|
|
|
st.caption(f"📅 {date_str} | {row[3] or '—'}")
|
|
|
if row[4] and str(row[4]).startswith("http"):
|
|
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
|
|
|
if row[5]:
|
|
|
with st.expander("Toon volledige tekst"):
|
|
|
st.write(row[5])
|
|
|
_render_voting_results(row[6])
|
|
|
else:
|
|
|
st.caption("_Geen metadata beschikbaar_")
|
|
|
|
|
|
|
|
|
def build_mp_quiz_tab(db_path: str) -> None:
|
|
|
"""Interactive quiz: narrow MPs by asking motion vote questions.
|
|
|
|
|
|
Minimal viable flow:
|
|
|
- seed with top-N controversial motions (SEED_MOTIONS)
|
|
|
- present one question at a time, store answers in st.session_state['mp_quiz_votes']
|
|
|
- after each answer call MotionDatabase.match_mps_for_votes to rank MPs
|
|
|
- if multiple candidates remain, call choose_discriminating_motions to pick next question
|
|
|
- stop when unique MP found or no discriminating motions remain
|
|
|
"""
|
|
|
st.subheader("🧑⚖️ Welk tweede kamerlid ben jij?")
|
|
|
st.markdown(
|
|
|
"Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt."
|
|
|
)
|
|
|
|
|
|
SEED_MOTIONS = 8
|
|
|
MAX_QUESTIONS = 20
|
|
|
|
|
|
# initialize session state
|
|
|
if "mp_quiz_votes" not in st.session_state:
|
|
|
st.session_state["mp_quiz_votes"] = {}
|
|
|
if "mp_quiz_asked" not in st.session_state:
|
|
|
st.session_state["mp_quiz_asked"] = []
|
|
|
|
|
|
from database import MotionDatabase as _MotionDatabase
|
|
|
|
|
|
db_inst = _MotionDatabase(db_path)
|
|
|
|
|
|
df = load_motions_df(db_path)
|
|
|
if df.empty:
|
|
|
st.warning("Geen moties beschikbaar om de quiz te starten.")
|
|
|
return
|
|
|
|
|
|
# seed from motions that actually have individual MP vote records
|
|
|
seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
|
|
|
if not seed_ids:
|
|
|
st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
|
|
|
return
|
|
|
|
|
|
# Determine next motion to ask
|
|
|
def _next_motion_id():
|
|
|
# prefer seed motions not yet asked
|
|
|
for mid in seed_ids:
|
|
|
if str(mid) not in st.session_state["mp_quiz_votes"]:
|
|
|
return mid
|
|
|
# otherwise ask discriminating motion based on remaining candidate MPs
|
|
|
# compute current candidate set
|
|
|
|
|
|
try:
|
|
|
user_votes = {
|
|
|
int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
|
|
|
}
|
|
|
ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
|
|
|
except Exception:
|
|
|
ranked = []
|
|
|
|
|
|
candidates = [r["mp_name"] for r in ranked]
|
|
|
excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()]
|
|
|
if not candidates:
|
|
|
return None
|
|
|
try:
|
|
|
next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
|
|
|
return next_ids[0] if next_ids else None
|
|
|
except Exception:
|
|
|
return None
|
|
|
|
|
|
# show progress and controls
|
|
|
col1, col2 = st.columns([3, 1])
|
|
|
with col2:
|
|
|
st.caption(
|
|
|
f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}"
|
|
|
)
|
|
|
if st.button("Reset quiz"):
|
|
|
st.session_state["mp_quiz_votes"] = {}
|
|
|
st.session_state["mp_quiz_asked"] = []
|
|
|
st.rerun()
|
|
|
|
|
|
# main question loop (single question per render, wrapped in a form to avoid
|
|
|
# premature reruns when the user changes the radio selection)
|
|
|
next_mid = _next_motion_id()
|
|
|
if next_mid is None:
|
|
|
st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
|
|
|
else:
|
|
|
motion_rows = df[df["id"] == next_mid]
|
|
|
if motion_rows.empty:
|
|
|
# motion has votes but isn't in the motions DataFrame — skip it
|
|
|
st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
|
|
|
st.rerun()
|
|
|
return
|
|
|
motion_row = motion_rows.iloc[0]
|
|
|
st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
|
|
|
if motion_row.get("layman_explanation"):
|
|
|
st.info(motion_row.get("layman_explanation"))
|
|
|
|
|
|
with st.form(key=f"mp_quiz_form_{next_mid}"):
|
|
|
choice = st.radio(
|
|
|
"Wat zou jij stemmen?",
|
|
|
options=["Voor", "Tegen", "Onthouden", "Geen stem"],
|
|
|
index=3,
|
|
|
)
|
|
|
submitted = st.form_submit_button("Beantwoord en verder")
|
|
|
|
|
|
if submitted:
|
|
|
st.session_state["mp_quiz_votes"][str(next_mid)] = choice
|
|
|
st.session_state["mp_quiz_asked"].append(next_mid)
|
|
|
st.rerun()
|
|
|
|
|
|
# display current ranking
|
|
|
try:
|
|
|
user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
|
|
|
ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
|
|
|
except Exception:
|
|
|
ranking = []
|
|
|
|
|
|
if ranking:
|
|
|
st.markdown("**Top kandidaten**")
|
|
|
# show as table
|
|
|
import pandas as pd
|
|
|
|
|
|
rdf = pd.DataFrame(ranking)
|
|
|
st.dataframe(rdf.head(10), use_container_width=True)
|
|
|
|
|
|
# check uniqueness
|
|
|
top_pct = ranking[0]["agreement_pct"] if ranking else 0.0
|
|
|
top_matches = [r for r in ranking if r["agreement_pct"] == top_pct]
|
|
|
if len(top_matches) == 1 and top_matches[0]["overlap"] > 0:
|
|
|
st.success(
|
|
|
f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})"
|
|
|
)
|
|
|
else:
|
|
|
if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS:
|
|
|
st.warning(
|
|
|
"Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten."
|
|
|
)
|
|
|
else:
|
|
|
st.info("Nog geen unieke match — vraag meer om verder te verfijnen.")
|
|
|
else:
|
|
|
st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.")
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# App entry
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def run_app() -> None:
|
|
|
st.set_page_config(
|
|
|
layout="wide",
|
|
|
page_title="Parlement Explorer",
|
|
|
page_icon="🏛️",
|
|
|
)
|
|
|
st.title("🏛️ Parlement Explorer")
|
|
|
|
|
|
# Sidebar
|
|
|
st.sidebar.title("Instellingen")
|
|
|
db_path = "data/motions.db"
|
|
|
window_size = "annual"
|
|
|
show_rejected = st.sidebar.checkbox("Toon verworpen moties", value=False)
|
|
|
|
|
|
# About section
|
|
|
with st.sidebar.expander("ℹ️ Over", expanded=False):
|
|
|
try:
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
n_motions = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0]
|
|
|
n_fused = con.execute("SELECT COUNT(*) FROM fused_embeddings").fetchone()[0]
|
|
|
n_sim = con.execute("SELECT COUNT(*) FROM similarity_cache").fetchone()[0]
|
|
|
con.close()
|
|
|
st.markdown(
|
|
|
f"**Moties:** {n_motions:,} \n"
|
|
|
f"**Fused embeddings:** {n_fused:,} \n"
|
|
|
f"**Similarity cache:** {n_sim:,}"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
st.warning(f"DB niet bereikbaar: {e}")
|
|
|
|
|
|
# Main tabs
|
|
|
# Streamlit tabs compatibility: some older/newer Streamlit builds expose different APIs.
|
|
|
tab_labels = [
|
|
|
"🧭 Politiek Kompas",
|
|
|
"📈 Trajectories",
|
|
|
"🔍 Motie Zoeken",
|
|
|
"📋 Motie Browser",
|
|
|
"🔬 SVD Components",
|
|
|
]
|
|
|
|
|
|
if hasattr(st, "tabs") and callable(getattr(st, "tabs")):
|
|
|
tab1, tab2, tab3, tab4, tab5 = st.tabs(tab_labels)
|
|
|
with tab1:
|
|
|
build_compass_tab(db_path, window_size)
|
|
|
with tab2:
|
|
|
build_trajectories_tab(db_path, window_size)
|
|
|
with tab3:
|
|
|
build_search_tab(db_path, show_rejected)
|
|
|
with tab4:
|
|
|
build_browser_tab(db_path, show_rejected)
|
|
|
with tab5:
|
|
|
build_svd_components_tab(db_path)
|
|
|
else:
|
|
|
# Fallback for environments where `st.tabs` is not available: use a radio selector
|
|
|
selection = st.radio("Tab", tab_labels)
|
|
|
if selection == tab_labels[0]:
|
|
|
build_compass_tab(db_path, window_size)
|
|
|
elif selection == tab_labels[1]:
|
|
|
build_trajectories_tab(db_path, window_size)
|
|
|
elif selection == tab_labels[2]:
|
|
|
build_search_tab(db_path, show_rejected)
|
|
|
elif selection == tab_labels[3]:
|
|
|
build_browser_tab(db_path, show_rejected)
|
|
|
else:
|
|
|
build_svd_components_tab(db_path)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
logging.basicConfig(
|
|
|
level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
|
|
|
)
|
|
|
run_app()
|
|
|
|