fix(explorer): cleaner trajectories, NSC support, controversy filter, voting display, URL links

main
Sven Geboers 1 month ago
parent cd7bb3b1e0
commit cbbdc6e738
  1. 180
      explorer.py

@ -45,11 +45,30 @@ PARTY_COLOURS: Dict[str, str] = {
"JA21": "#7B1FA2",
"BBB": "#8D6E63",
"NSC": "#FF8F00",
"Nieuw Sociaal Contract": "#FF8F00", # alias used in mp_metadata
"DENK": "#00897B",
"50PLUS": "#7E57C2",
"Volt": "#572AB7",
"Unknown": "#9E9E9E",
}
# Ordered list of well-known parties for trajectory default selection.
# Keeps the chart readable without overwhelming users with all parties.
KNOWN_MAJOR_PARTIES = [
"VVD",
"PVV",
"D66",
"GroenLinks-PvdA",
"GroenLinks",
"PvdA",
"CDA",
"SP",
"NSC",
"Nieuw Sociaal Contract",
"CU",
"BBB",
]
# ---------------------------------------------------------------------------
# Cached loaders
@ -165,7 +184,7 @@ def load_motions_df(db_path: str) -> pd.DataFrame:
"""
SELECT id, title, description, date, policy_area,
voting_results, layman_explanation,
winning_margin, controversy_score
winning_margin, controversy_score, url
FROM motions
"""
).fetchdf()
@ -211,6 +230,51 @@ def query_similar(
con.close()
# ---------------------------------------------------------------------------
# Shared rendering helpers
# ---------------------------------------------------------------------------
def _render_voting_results(voting_results_json) -> None:
"""Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.
The JSON is stored as {party_or_mp: vote} where vote is one of
'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
"""
if not voting_results_json:
return
try:
vdata = (
json.loads(voting_results_json)
if isinstance(voting_results_json, str)
else voting_results_json
)
if not isinstance(vdata, dict) or not vdata:
return
# Group {vote: [actor, ...]}
by_vote: Dict[str, List[str]] = {}
for actor, vote in vdata.items():
vote_str = str(vote).lower().strip()
by_vote.setdefault(vote_str, []).append(str(actor))
# Render in fixed order
vote_order = ["voor", "tegen", "onthouden", "afwezig"]
vote_emoji = {"voor": "", "tegen": "", "onthouden": "🟡", "afwezig": ""}
rows_shown = False
for v in vote_order + [k for k in by_vote if k not in vote_order]:
actors = by_vote.get(v)
if not actors:
continue
emoji = vote_emoji.get(v, "")
st.markdown(
f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
)
rows_shown = True
if not rows_shown:
st.caption("_Geen stemuitslag beschikbaar_")
except Exception:
pass
# ---------------------------------------------------------------------------
# Tab 1: Politiek Kompas
# ---------------------------------------------------------------------------
@ -324,18 +388,27 @@ def build_trajectories_tab(db_path: str, window_size: str) -> None:
)
all_parties_sorted = sorted(all_parties)
major_parties = [
p
for p in all_parties_sorted
if len(centroids.get(p, {})) >= max(2, len(windows) // 2)
]
# Default: prefer known major parties over the automatic "appeared in most windows"
# heuristic, which would exclude newer parties like NSC that only have 4 windows.
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
if not default_parties:
default_parties = all_parties_sorted[:6]
selected_parties = st.multiselect(
"Selecteer partijen",
options=all_parties_sorted,
default=major_parties[:12] if major_parties else all_parties_sorted[:8],
default=default_parties,
)
# Note about partial data years
if "2023-Q1" in windows and not any(
w.startswith("2023-Q") and w != "2023-Q1" for w in windows
):
st.caption(
" 2023 heeft alleen data voor Q1 — pipeline draaide niet door in dat jaar."
)
fig = go.Figure()
for party in selected_parties:
if party not in centroids:
@ -388,11 +461,11 @@ def build_search_tab(db_path: str, show_rejected: bool) -> None:
if not show_rejected:
df = df[df["title"].fillna("").str.strip() != "Verworpen."]
# Sidebar-style controls in the main area
# Controls
col1, col2, col3 = st.columns([2, 1, 1])
with col1:
query = st.text_input(
"Zoek op titel of uitleg", placeholder="bijv. stikstof, klimaat, wonen"
"Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
)
with col2:
years = sorted(df["year"].dropna().astype(int).unique().tolist())
@ -403,23 +476,20 @@ def build_search_tab(db_path: str, show_rejected: bool) -> None:
else:
year_range = (2019, 2024)
with col3:
policy_areas = ["(Alle)"] + sorted(df["policy_area"].dropna().unique().tolist())
policy_filter = st.selectbox("Beleidsterrein", options=policy_areas)
min_controversy = st.slider(
"Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
)
# Apply filters in-memory
working = df.copy()
working = working[
(working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
]
if policy_filter != "(Alle)":
working = working[working["policy_area"] == policy_filter]
if min_controversy > 0:
working = working[working["controversy_score"] >= min_controversy]
if query:
q = query.lower()
mask = working["title"].fillna("").str.lower().str.contains(
q, regex=False
) | working["layman_explanation"].fillna("").str.lower().str.contains(
q, regex=False
)
mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
working = working[mask]
working = working.sort_values(by="controversy_score", ascending=False)
@ -428,20 +498,21 @@ def build_search_tab(db_path: str, show_rejected: bool) -> None:
for _, row in working.head(50).iterrows():
title = row.get("title") or f"Motie #{row['id']}"
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
with st.expander(f"**{title}** — {date_str}{row.get('policy_area') or ''}"):
explanation = row.get("layman_explanation")
if explanation and str(explanation).strip():
st.markdown(explanation)
elif row.get("description") and str(row["description"]).strip():
st.markdown(str(row["description"])[:600] + "")
else:
st.caption("_Geen samenvatting beschikbaar_")
controversy = row.get("controversy_score") or 0
with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
cols = st.columns(3)
cols[0].metric("Controverse", f"{row.get('controversy_score', 0):.2f}")
cols[0].metric("Controverse", f"{controversy:.2f}")
cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")
# Voting breakdown
_render_voting_results(row.get("voting_results"))
# Link to original motion
url = row.get("url")
if url and str(url).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
# Similar motions
sim = query_similar(db_path, int(row["id"]), top_k=5)
if not sim.empty:
@ -481,9 +552,13 @@ def build_browser_tab(db_path: str, show_rejected: bool) -> None:
years = sorted(df["year"].dropna().astype(int).unique().tolist())
year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
with col2:
policy_areas = ["(Alle)"] + sorted(df["policy_area"].dropna().unique().tolist())
pa_filter = st.selectbox(
"Beleidsterrein", options=policy_areas, key="browser_pa"
min_controversy_b = st.slider(
"Min. controverse",
min_value=0.0,
max_value=1.0,
value=0.0,
step=0.05,
key="browser_controversy",
)
with col3:
sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])
@ -492,8 +567,8 @@ def build_browser_tab(db_path: str, show_rejected: bool) -> None:
working = df.copy()
if year_filter != "(Alle)":
working = working[working["year"] == int(year_filter)]
if pa_filter != "(Alle)":
working = working[working["policy_area"] == pa_filter]
if min_controversy_b > 0:
working = working[working["controversy_score"] >= min_controversy_b]
sort_map = {
"Datum (nieuw)": ("date", False),
@ -504,14 +579,7 @@ def build_browser_tab(db_path: str, show_rejected: bool) -> None:
working = working.sort_values(by=sort_col, ascending=sort_asc)
# Display table
display_cols = [
"id",
"title",
"date",
"policy_area",
"controversy_score",
"winning_margin",
]
display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
available_display = [c for c in display_cols if c in working.columns]
st.dataframe(
working[available_display].reset_index(drop=True),
@ -534,31 +602,19 @@ def build_browser_tab(db_path: str, show_rejected: bool) -> None:
if not motion_row.empty:
row = motion_row.iloc[0]
st.markdown(f"### {row.get('title') or 'Onbekend'}")
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
st.caption(
f"📅 {row['date'].strftime('%d %b %Y') if pd.notna(row['date']) else '?'} "
f"| 🏷 {row.get('policy_area') or ''} "
f"| 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
)
if row.get("layman_explanation") and str(row["layman_explanation"]).strip():
st.markdown(row["layman_explanation"])
elif row.get("description") and str(row["description"]).strip():
st.markdown(str(row["description"]))
# Link to original source
url = row.get("url")
if url and str(url).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")
# Parse voting results
try:
vr = row.get("voting_results")
if vr and str(vr).strip() not in ("", "null", "None"):
vdata = json.loads(vr) if isinstance(vr, str) else vr
if isinstance(vdata, dict):
st.markdown("**Stemuitslag:**")
for category, actors in vdata.items():
if actors:
st.markdown(
f"- **{category}**: {', '.join(str(a) for a in actors)}"
)
except Exception:
pass
# Voting breakdown
st.markdown("**Stemuitslag:**")
_render_voting_results(row.get("voting_results"))
# Similar motions
sim = query_similar(db_path, int(sel_id), top_k=10)

Loading…
Cancel
Save