|
|
"""Parlement Explorer — Streamlit data analysis app.
|
|
|
|
|
|
Four tabs:
|
|
|
1. Politiek Kompas — 2D scatter of MPs/parties, window slider
|
|
|
2. Partij Trajectories — party centroid lines over time
|
|
|
3. Motie Zoeken — text search + similarity lookup
|
|
|
4. Motie Browser — sortable table + detail panel
|
|
|
|
|
|
Run with: streamlit run explorer.py
|
|
|
|
|
|
Import-safe: heavy computation is behind @st.cache_data and only runs at UI time.
|
|
|
All DuckDB connections are read_only=True so the app can run alongside the pipeline.
|
|
|
"""
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
import json
|
|
|
import logging
|
|
|
import os
|
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
|
|
import duckdb
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import plotly.express as px
|
|
|
import plotly.graph_objects as go
|
|
|
import streamlit as st
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Party colour palette (consistent across tabs)
|
|
|
PARTY_COLOURS: Dict[str, str] = {
|
|
|
"VVD": "#1E73BE",
|
|
|
"PVV": "#002366",
|
|
|
"D66": "#00A36C",
|
|
|
"CDA": "#4CAF50",
|
|
|
"SP": "#E53935",
|
|
|
"PvdA": "#D32F2F",
|
|
|
"GroenLinks": "#388E3C",
|
|
|
"GroenLinks-PvdA": "#2E7D32",
|
|
|
"CU": "#0288D1",
|
|
|
"SGP": "#F4511E",
|
|
|
"PvdD": "#43A047",
|
|
|
"FVD": "#6A1B9A",
|
|
|
"JA21": "#7B1FA2",
|
|
|
"BBB": "#8D6E63",
|
|
|
"NSC": "#FF8F00",
|
|
|
"Nieuw Sociaal Contract": "#FF8F00", # alias used in mp_metadata
|
|
|
"DENK": "#00897B",
|
|
|
"50PLUS": "#7E57C2",
|
|
|
"Volt": "#572AB7",
|
|
|
"Unknown": "#9E9E9E",
|
|
|
}
|
|
|
|
|
|
# Ordered list of well-known parties for trajectory default selection.
|
|
|
# Keeps the chart readable without overwhelming users with all parties.
|
|
|
KNOWN_MAJOR_PARTIES = [
|
|
|
"VVD",
|
|
|
"PVV",
|
|
|
"D66",
|
|
|
"GroenLinks-PvdA",
|
|
|
"GroenLinks",
|
|
|
"PvdA",
|
|
|
"CDA",
|
|
|
"SP",
|
|
|
"NSC",
|
|
|
"CU",
|
|
|
"BBB",
|
|
|
]
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Cached loaders
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Beschikbare tijdsvensters laden…")
|
|
|
def get_available_windows(db_path: str) -> List[str]:
|
|
|
"""Return sorted list of distinct window_ids from svd_vectors."""
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
try:
|
|
|
rows = con.execute(
|
|
|
"SELECT DISTINCT window_id FROM svd_vectors ORDER BY window_id"
|
|
|
).fetchall()
|
|
|
return [r[0] for r in rows]
|
|
|
except Exception:
|
|
|
logger.exception("Failed to query available windows")
|
|
|
return []
|
|
|
finally:
|
|
|
con.close()
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner=False)
|
|
|
def get_uniform_dim_windows(db_path: str) -> List[str]:
|
|
|
"""Return only windows whose vector dimension equals the most common dimension.
|
|
|
|
|
|
np.vstack requires all vectors to have the same shape. Early or small windows
|
|
|
have lower SVD rank (dim < 50). This helper filters to only windows at the
|
|
|
dominant (max-count) dimension so compute_2d_axes never sees mixed shapes.
|
|
|
"""
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
try:
|
|
|
rows = con.execute(
|
|
|
"""
|
|
|
WITH window_dims AS (
|
|
|
SELECT DISTINCT ON (window_id)
|
|
|
window_id,
|
|
|
json_array_length(vector) AS dim
|
|
|
FROM svd_vectors
|
|
|
WHERE entity_type = 'mp'
|
|
|
ORDER BY window_id
|
|
|
),
|
|
|
dim_counts AS (
|
|
|
SELECT dim, COUNT(*) AS cnt FROM window_dims GROUP BY dim
|
|
|
),
|
|
|
dominant AS (
|
|
|
SELECT dim FROM dim_counts ORDER BY cnt DESC, dim DESC LIMIT 1
|
|
|
)
|
|
|
SELECT wd.window_id
|
|
|
FROM window_dims wd
|
|
|
JOIN dominant d ON wd.dim = d.dim
|
|
|
ORDER BY wd.window_id
|
|
|
"""
|
|
|
).fetchall()
|
|
|
return [r[0] for r in rows]
|
|
|
except Exception:
|
|
|
logger.exception("Failed to query uniform-dim windows")
|
|
|
return []
|
|
|
finally:
|
|
|
con.close()
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="2D posities berekenen (kan even duren)…")
|
|
|
def load_positions(
|
|
|
db_path: str, window_size: str = "quarterly"
|
|
|
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
|
|
|
"""Compute 2D positions per window using PCA on aligned SVD vectors.
|
|
|
|
|
|
Returns:
|
|
|
positions_by_window: {window_id: {entity_name: (x, y)}}
|
|
|
axis_def: dict with x_axis, y_axis, method keys
|
|
|
"""
|
|
|
from analysis.political_axis import compute_2d_axes
|
|
|
|
|
|
# Only use windows where all vectors share the same dimension (dim=50).
|
|
|
# Mixed-dim windows cause np.vstack to fail in compute_2d_axes.
|
|
|
available = get_uniform_dim_windows(db_path)
|
|
|
if window_size == "annual":
|
|
|
# Keep only Q4 windows (one representative window per year)
|
|
|
available = [w for w in available if w.endswith("-Q4")]
|
|
|
|
|
|
if not available:
|
|
|
return {}, {}
|
|
|
|
|
|
positions_by_window, axis_def = compute_2d_axes(
|
|
|
db_path,
|
|
|
window_ids=available,
|
|
|
method="pca",
|
|
|
pca_residual=True,
|
|
|
normalize_vectors=True,
|
|
|
)
|
|
|
return positions_by_window, axis_def
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Partijkaart laden…")
|
|
|
def load_party_map(db_path: str) -> Dict[str, str]:
|
|
|
"""Return {mp_name: party} mapping, with party names normalised to abbreviations."""
|
|
|
from analysis.visualize import _load_party_map
|
|
|
|
|
|
_PARTY_ALIASES: Dict[str, str] = {
|
|
|
"Nieuw Sociaal Contract": "NSC",
|
|
|
}
|
|
|
|
|
|
try:
|
|
|
raw = _load_party_map(db_path)
|
|
|
return {mp: _PARTY_ALIASES.get(party, party) for mp, party in raw.items()}
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load party map")
|
|
|
return {}
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="Moties laden…")
|
|
|
def load_motions_df(db_path: str) -> pd.DataFrame:
|
|
|
"""Load the full motions table as a pandas DataFrame (read-only)."""
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
try:
|
|
|
df = con.execute(
|
|
|
"""
|
|
|
SELECT id, title, description, date, policy_area,
|
|
|
voting_results, layman_explanation,
|
|
|
winning_margin, controversy_score, url
|
|
|
FROM motions
|
|
|
"""
|
|
|
).fetchdf()
|
|
|
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
|
|
df["year"] = df["date"].dt.year
|
|
|
return df
|
|
|
except Exception:
|
|
|
logger.exception("Failed to load motions")
|
|
|
return pd.DataFrame()
|
|
|
finally:
|
|
|
con.close()
|
|
|
|
|
|
|
|
|
def query_similar(
|
|
|
db_path: str,
|
|
|
source_motion_id: int,
|
|
|
vector_type: str = "fused",
|
|
|
top_k: int = 10,
|
|
|
) -> pd.DataFrame:
|
|
|
"""Return top-k similar motions from similarity_cache (read-only)."""
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
try:
|
|
|
rows = con.execute(
|
|
|
"""
|
|
|
SELECT sc.target_motion_id, sc.score, sc.window_id,
|
|
|
m.title, m.date, m.policy_area
|
|
|
FROM similarity_cache sc
|
|
|
JOIN motions m ON m.id = sc.target_motion_id
|
|
|
WHERE sc.source_motion_id = ?
|
|
|
AND sc.vector_type = ?
|
|
|
ORDER BY sc.score DESC
|
|
|
LIMIT ?
|
|
|
""",
|
|
|
[source_motion_id, vector_type, top_k],
|
|
|
).fetchdf()
|
|
|
return rows
|
|
|
except Exception:
|
|
|
logger.exception(
|
|
|
"Failed to query similarity cache for motion %s", source_motion_id
|
|
|
)
|
|
|
return pd.DataFrame()
|
|
|
finally:
|
|
|
con.close()
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Shared rendering helpers
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def _render_voting_results(voting_results_json) -> None:
|
|
|
"""Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.
|
|
|
|
|
|
The JSON is stored as {party_or_mp: vote} where vote is one of
|
|
|
'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
|
|
|
"""
|
|
|
if not voting_results_json:
|
|
|
return
|
|
|
try:
|
|
|
vdata = (
|
|
|
json.loads(voting_results_json)
|
|
|
if isinstance(voting_results_json, str)
|
|
|
else voting_results_json
|
|
|
)
|
|
|
if not isinstance(vdata, dict) or not vdata:
|
|
|
return
|
|
|
# Group {vote: [actor, ...]}
|
|
|
by_vote: Dict[str, List[str]] = {}
|
|
|
for actor, vote in vdata.items():
|
|
|
vote_str = str(vote).lower().strip()
|
|
|
by_vote.setdefault(vote_str, []).append(str(actor))
|
|
|
# Render in fixed order
|
|
|
vote_order = ["voor", "tegen", "onthouden", "afwezig"]
|
|
|
vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "⬜"}
|
|
|
rows_shown = False
|
|
|
for v in vote_order + [k for k in by_vote if k not in vote_order]:
|
|
|
actors = by_vote.get(v)
|
|
|
if not actors:
|
|
|
continue
|
|
|
emoji = vote_emoji.get(v, "▪️")
|
|
|
st.markdown(
|
|
|
f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
|
|
|
)
|
|
|
rows_shown = True
|
|
|
if not rows_shown:
|
|
|
st.caption("_Geen stemuitslag beschikbaar_")
|
|
|
except Exception:
|
|
|
pass
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 1: Politiek Kompas
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def build_compass_tab(db_path: str, window_size: str) -> None:
|
|
|
st.subheader("Politiek Kompas")
|
|
|
st.markdown(
|
|
|
"2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
|
|
|
)
|
|
|
|
|
|
positions_by_window, axis_def = load_positions(db_path, window_size)
|
|
|
if not positions_by_window:
|
|
|
st.warning(
|
|
|
"Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
|
|
|
)
|
|
|
return
|
|
|
|
|
|
party_map = load_party_map(db_path)
|
|
|
windows = sorted(positions_by_window.keys())
|
|
|
|
|
|
col1, col2 = st.columns([3, 1])
|
|
|
with col2:
|
|
|
window_idx = st.select_slider(
|
|
|
"Tijdsvenster", options=windows, value=windows[-1]
|
|
|
)
|
|
|
show_names = st.checkbox("Toon namen", value=False)
|
|
|
min_size = st.slider("Min. MPs per partij", 0, 20, 3)
|
|
|
|
|
|
pos = positions_by_window.get(window_idx, {})
|
|
|
if not pos:
|
|
|
st.info(f"Geen data voor venster {window_idx}")
|
|
|
return
|
|
|
|
|
|
rows = []
|
|
|
for name, (x, y) in pos.items():
|
|
|
party = party_map.get(name, "Unknown")
|
|
|
rows.append({"name": name, "x": x, "y": y, "party": party})
|
|
|
|
|
|
df_pos = pd.DataFrame(rows)
|
|
|
|
|
|
# Filter to parties with enough MPs
|
|
|
party_counts = df_pos["party"].value_counts()
|
|
|
valid_parties = party_counts[party_counts >= min_size].index
|
|
|
df_pos = df_pos[df_pos["party"].isin(valid_parties)]
|
|
|
|
|
|
colour_map = {p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()}
|
|
|
|
|
|
fig = px.scatter(
|
|
|
df_pos,
|
|
|
x="x",
|
|
|
y="y",
|
|
|
color="party",
|
|
|
hover_name="name",
|
|
|
hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
|
|
|
color_discrete_map=colour_map,
|
|
|
title=f"Politiek Kompas — {window_idx}",
|
|
|
labels={"x": "Links ← → Rechts", "y": "Progressief ↑ / Conservatief ↓"},
|
|
|
)
|
|
|
if show_names:
|
|
|
fig.update_traces(text=df_pos["name"], textposition="top center")
|
|
|
fig.update_layout(height=600, legend_title_text="Partij")
|
|
|
|
|
|
with col1:
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
# Axis info
|
|
|
if axis_def:
|
|
|
evr = axis_def.get("explained_variance_ratio", [])
|
|
|
if evr:
|
|
|
st.caption(
|
|
|
f"PCA verklaarde variantie: as 1 = {evr[0] * 100:.1f}%, as 2 = {evr[1] * 100:.1f}%"
|
|
|
)
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 2: Partij Trajectories
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def build_trajectories_tab(db_path: str, window_size: str) -> None:
|
|
|
st.subheader("Partij Trajectories")
|
|
|
st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")
|
|
|
|
|
|
positions_by_window, _ = load_positions(db_path, window_size)
|
|
|
if not positions_by_window:
|
|
|
st.warning("Geen positiedata beschikbaar.")
|
|
|
return
|
|
|
|
|
|
party_map = load_party_map(db_path)
|
|
|
windows = sorted(positions_by_window.keys())
|
|
|
|
|
|
# Compute party centroids per window
|
|
|
centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
|
|
|
all_parties: set = set()
|
|
|
for wid in windows:
|
|
|
pos = positions_by_window.get(wid, {})
|
|
|
per_party: Dict[str, List[Tuple[float, float]]] = {}
|
|
|
for mp_name, (x, y) in pos.items():
|
|
|
party = party_map.get(mp_name, "Unknown")
|
|
|
if party == "Unknown":
|
|
|
continue
|
|
|
per_party.setdefault(party, []).append((x, y))
|
|
|
for party, coords in per_party.items():
|
|
|
all_parties.add(party)
|
|
|
xs = [c[0] for c in coords]
|
|
|
ys = [c[1] for c in coords]
|
|
|
centroids.setdefault(party, {})[wid] = (
|
|
|
float(np.mean(xs)),
|
|
|
float(np.mean(ys)),
|
|
|
)
|
|
|
|
|
|
all_parties_sorted = sorted(all_parties)
|
|
|
|
|
|
# Default: prefer known major parties over the automatic "appeared in most windows"
|
|
|
# heuristic, which would exclude newer parties like NSC that only have 4 windows.
|
|
|
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
|
|
|
if not default_parties:
|
|
|
default_parties = all_parties_sorted[:6]
|
|
|
|
|
|
selected_parties = st.multiselect(
|
|
|
"Selecteer partijen",
|
|
|
options=all_parties_sorted,
|
|
|
default=default_parties,
|
|
|
)
|
|
|
|
|
|
# Note about partial data years
|
|
|
if "2023-Q1" in windows and not any(
|
|
|
w.startswith("2023-Q") and w != "2023-Q1" for w in windows
|
|
|
):
|
|
|
st.caption(
|
|
|
"ℹ️ 2023 heeft alleen data voor Q1 — pipeline draaide niet door in dat jaar."
|
|
|
)
|
|
|
|
|
|
fig = go.Figure()
|
|
|
for party in selected_parties:
|
|
|
if party not in centroids:
|
|
|
continue
|
|
|
wids_sorted = sorted(centroids[party].keys())
|
|
|
xs = [centroids[party][w][0] for w in wids_sorted]
|
|
|
ys = [centroids[party][w][1] for w in wids_sorted]
|
|
|
colour = PARTY_COLOURS.get(party, "#9E9E9E")
|
|
|
fig.add_trace(
|
|
|
go.Scatter(
|
|
|
x=xs,
|
|
|
y=ys,
|
|
|
mode="lines+markers+text",
|
|
|
name=party,
|
|
|
text=[w.replace("-Q4", "") for w in wids_sorted],
|
|
|
textposition="top center",
|
|
|
line=dict(color=colour),
|
|
|
marker=dict(color=colour, size=8),
|
|
|
hovertemplate=(
|
|
|
f"<b>{party}</b><br>"
|
|
|
"venster: %{text}<br>"
|
|
|
"x: %{x:.3f}<br>y: %{y:.3f}<extra></extra>"
|
|
|
),
|
|
|
)
|
|
|
)
|
|
|
|
|
|
fig.update_layout(
|
|
|
title="Partij trajectories",
|
|
|
xaxis_title="Links ← → Rechts",
|
|
|
yaxis_title="Progressief ↑ / Conservatief ↓",
|
|
|
height=600,
|
|
|
legend_title_text="Partij",
|
|
|
)
|
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 3: Motie Zoeken
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def build_search_tab(db_path: str, show_rejected: bool) -> None:
|
|
|
st.subheader("Motie Zoeken")
|
|
|
|
|
|
df = load_motions_df(db_path)
|
|
|
if df.empty:
|
|
|
st.warning("Geen moties beschikbaar.")
|
|
|
return
|
|
|
|
|
|
if not show_rejected:
|
|
|
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
|
|
|
|
|
|
# Controls
|
|
|
col1, col2, col3 = st.columns([2, 1, 1])
|
|
|
with col1:
|
|
|
query = st.text_input(
|
|
|
"Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
|
|
|
)
|
|
|
with col2:
|
|
|
years = sorted(df["year"].dropna().astype(int).unique().tolist())
|
|
|
if years:
|
|
|
year_range = st.select_slider(
|
|
|
"Jaar", options=years, value=(years[0], years[-1])
|
|
|
)
|
|
|
else:
|
|
|
year_range = (2019, 2024)
|
|
|
with col3:
|
|
|
min_controversy = st.slider(
|
|
|
"Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
|
|
|
)
|
|
|
|
|
|
# Apply filters in-memory
|
|
|
working = df.copy()
|
|
|
working = working[
|
|
|
(working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
|
|
|
]
|
|
|
if min_controversy > 0:
|
|
|
working = working[working["controversy_score"] >= min_controversy]
|
|
|
if query:
|
|
|
q = query.lower()
|
|
|
mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
|
|
|
working = working[mask]
|
|
|
|
|
|
working = working.sort_values(by="controversy_score", ascending=False)
|
|
|
st.caption(f"{len(working)} resultaten (top 50 getoond)")
|
|
|
|
|
|
for _, row in working.head(50).iterrows():
|
|
|
title = row.get("title") or f"Motie #{row['id']}"
|
|
|
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
|
|
|
controversy = row.get("controversy_score") or 0
|
|
|
with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
|
|
|
cols = st.columns(3)
|
|
|
cols[0].metric("Controverse", f"{controversy:.2f}")
|
|
|
cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
|
|
|
cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")
|
|
|
|
|
|
# Voting breakdown
|
|
|
_render_voting_results(row.get("voting_results"))
|
|
|
|
|
|
# Link to original motion
|
|
|
url = row.get("url")
|
|
|
if url and str(url).startswith("http"):
|
|
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
|
|
|
|
|
|
# Similar motions
|
|
|
sim = query_similar(db_path, int(row["id"]), top_k=5)
|
|
|
if not sim.empty:
|
|
|
st.markdown("**Vergelijkbare moties:**")
|
|
|
for _, s in sim.iterrows():
|
|
|
s_date = (
|
|
|
pd.to_datetime(s["date"]).strftime("%Y")
|
|
|
if pd.notna(s.get("date"))
|
|
|
else ""
|
|
|
)
|
|
|
st.markdown(
|
|
|
f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
|
|
|
)
|
|
|
else:
|
|
|
st.caption("_Nog geen vergelijkbare moties beschikbaar_")
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Tab 4: Motie Browser
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def build_browser_tab(db_path: str, show_rejected: bool) -> None:
|
|
|
st.subheader("Motie Browser")
|
|
|
|
|
|
df = load_motions_df(db_path)
|
|
|
if df.empty:
|
|
|
st.warning("Geen moties beschikbaar.")
|
|
|
return
|
|
|
|
|
|
if not show_rejected:
|
|
|
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
|
|
|
|
|
|
# Controls
|
|
|
col1, col2, col3 = st.columns(3)
|
|
|
with col1:
|
|
|
years = sorted(df["year"].dropna().astype(int).unique().tolist())
|
|
|
year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
|
|
|
with col2:
|
|
|
min_controversy_b = st.slider(
|
|
|
"Min. controverse",
|
|
|
min_value=0.0,
|
|
|
max_value=1.0,
|
|
|
value=0.0,
|
|
|
step=0.05,
|
|
|
key="browser_controversy",
|
|
|
)
|
|
|
with col3:
|
|
|
sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])
|
|
|
|
|
|
# Filter
|
|
|
working = df.copy()
|
|
|
if year_filter != "(Alle)":
|
|
|
working = working[working["year"] == int(year_filter)]
|
|
|
if min_controversy_b > 0:
|
|
|
working = working[working["controversy_score"] >= min_controversy_b]
|
|
|
|
|
|
sort_map = {
|
|
|
"Datum (nieuw)": ("date", False),
|
|
|
"Controverse": ("controversy_score", False),
|
|
|
"Marge": ("winning_margin", True),
|
|
|
}
|
|
|
sort_col, sort_asc = sort_map[sort_by]
|
|
|
working = working.sort_values(by=sort_col, ascending=sort_asc)
|
|
|
|
|
|
# Display table
|
|
|
display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
|
|
|
available_display = [c for c in display_cols if c in working.columns]
|
|
|
st.dataframe(
|
|
|
working[available_display].reset_index(drop=True),
|
|
|
use_container_width=True,
|
|
|
height=350,
|
|
|
)
|
|
|
|
|
|
st.divider()
|
|
|
|
|
|
# Detail panel
|
|
|
st.markdown("**Detail weergave** — vul een motie-ID in:")
|
|
|
sel_id = st.number_input(
|
|
|
"Motie ID",
|
|
|
min_value=int(working["id"].min()) if not working.empty else 1,
|
|
|
max_value=int(working["id"].max()) if not working.empty else 99999,
|
|
|
value=int(working["id"].iloc[0]) if not working.empty else 1,
|
|
|
step=1,
|
|
|
)
|
|
|
motion_row = df[df["id"] == sel_id]
|
|
|
if not motion_row.empty:
|
|
|
row = motion_row.iloc[0]
|
|
|
st.markdown(f"### {row.get('title') or 'Onbekend'}")
|
|
|
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
|
|
|
st.caption(
|
|
|
f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
|
|
|
)
|
|
|
|
|
|
# Link to original source
|
|
|
url = row.get("url")
|
|
|
if url and str(url).startswith("http"):
|
|
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
|
|
|
|
|
|
# Voting breakdown
|
|
|
st.markdown("**Stemuitslag:**")
|
|
|
_render_voting_results(row.get("voting_results"))
|
|
|
|
|
|
# Similar motions
|
|
|
sim = query_similar(db_path, int(sel_id), top_k=10)
|
|
|
if not sim.empty:
|
|
|
st.markdown("**Vergelijkbare moties:**")
|
|
|
st.dataframe(
|
|
|
sim[["title", "score", "date", "policy_area"]],
|
|
|
use_container_width=True,
|
|
|
)
|
|
|
else:
|
|
|
st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# App entry
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
def run_app() -> None:
|
|
|
st.set_page_config(
|
|
|
layout="wide",
|
|
|
page_title="Parlement Explorer",
|
|
|
page_icon="🏛️",
|
|
|
)
|
|
|
st.title("🏛️ Parlement Explorer")
|
|
|
|
|
|
# Sidebar
|
|
|
st.sidebar.title("Instellingen")
|
|
|
db_path = st.sidebar.text_input("DuckDB pad", value="data/motions.db")
|
|
|
window_size = st.sidebar.radio("Venstergrootte", ["quarterly", "annual"], index=0)
|
|
|
show_rejected = st.sidebar.checkbox("Toon verworpen moties", value=False)
|
|
|
|
|
|
# About section
|
|
|
with st.sidebar.expander("ℹ️ Over", expanded=False):
|
|
|
try:
|
|
|
con = duckdb.connect(database=db_path, read_only=True)
|
|
|
n_motions = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0]
|
|
|
n_fused = con.execute("SELECT COUNT(*) FROM fused_embeddings").fetchone()[0]
|
|
|
n_sim = con.execute("SELECT COUNT(*) FROM similarity_cache").fetchone()[0]
|
|
|
con.close()
|
|
|
st.markdown(
|
|
|
f"**Moties:** {n_motions:,} \n"
|
|
|
f"**Fused embeddings:** {n_fused:,} \n"
|
|
|
f"**Similarity cache:** {n_sim:,}"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
st.warning(f"DB niet bereikbaar: {e}")
|
|
|
|
|
|
# Main tabs
|
|
|
tab1, tab2, tab3, tab4 = st.tabs(
|
|
|
["🧭 Politiek Kompas", "📈 Trajectories", "🔍 Motie Zoeken", "📋 Motie Browser"]
|
|
|
)
|
|
|
with tab1:
|
|
|
build_compass_tab(db_path, window_size)
|
|
|
with tab2:
|
|
|
build_trajectories_tab(db_path, window_size)
|
|
|
with tab3:
|
|
|
build_search_tab(db_path, show_rejected)
|
|
|
with tab4:
|
|
|
build_browser_tab(db_path, show_rejected)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
logging.basicConfig(
|
|
|
level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
|
|
|
)
|
|
|
run_app()
|
|
|
|