diff --git a/explorer.py b/explorer.py index 38255dd..432b4b3 100644 --- a/explorer.py +++ b/explorer.py @@ -45,11 +45,30 @@ PARTY_COLOURS: Dict[str, str] = { "JA21": "#7B1FA2", "BBB": "#8D6E63", "NSC": "#FF8F00", + "Nieuw Sociaal Contract": "#FF8F00", # alias used in mp_metadata "DENK": "#00897B", "50PLUS": "#7E57C2", + "Volt": "#572AB7", "Unknown": "#9E9E9E", } +# Ordered list of well-known parties for trajectory default selection. +# Keeps the chart readable without overwhelming users with all parties. +KNOWN_MAJOR_PARTIES = [ + "VVD", + "PVV", + "D66", + "GroenLinks-PvdA", + "GroenLinks", + "PvdA", + "CDA", + "SP", + "NSC", + "Nieuw Sociaal Contract", + "CU", + "BBB", +] + # --------------------------------------------------------------------------- # Cached loaders @@ -165,7 +184,7 @@ def load_motions_df(db_path: str) -> pd.DataFrame: """ SELECT id, title, description, date, policy_area, voting_results, layman_explanation, - winning_margin, controversy_score + winning_margin, controversy_score, url FROM motions """ ).fetchdf() @@ -211,6 +230,51 @@ def query_similar( con.close() +# --------------------------------------------------------------------------- +# Shared rendering helpers +# --------------------------------------------------------------------------- + + +def _render_voting_results(voting_results_json) -> None: + """Render a voting_results JSON blob as a grouped voor/tegen/onthouden table. + + The JSON is stored as {party_or_mp: vote} where vote is one of + 'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability. + """ + if not voting_results_json: + return + try: + vdata = ( + json.loads(voting_results_json) + if isinstance(voting_results_json, str) + else voting_results_json + ) + if not isinstance(vdata, dict) or not vdata: + return + # Group {vote: [actor, ...]} + by_vote: Dict[str, List[str]] = {} + for actor, vote in vdata.items(): + vote_str = str(vote).lower().strip() + by_vote.setdefault(vote_str, []).append(str(actor)) + # Render in fixed order + vote_order = ["voor", "tegen", "onthouden", "afwezig"] + vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "âŦœ"} + rows_shown = False + for v in vote_order + [k for k in by_vote if k not in vote_order]: + actors = by_vote.get(v) + if not actors: + continue + emoji = vote_emoji.get(v, "â–Ēī¸") + st.markdown( + f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}" + ) + rows_shown = True + if not rows_shown: + st.caption("_Geen stemuitslag beschikbaar_") + except Exception: + pass + + # --------------------------------------------------------------------------- # Tab 1: Politiek Kompas # --------------------------------------------------------------------------- @@ -324,18 +388,27 @@ def build_trajectories_tab(db_path: str, window_size: str) -> None: ) all_parties_sorted = sorted(all_parties) - major_parties = [ - p - for p in all_parties_sorted - if len(centroids.get(p, {})) >= max(2, len(windows) // 2) - ] + + # Default: prefer known major parties over the automatic "appeared in most windows" + # heuristic, which would exclude newer parties like NSC that only have 4 windows. + default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties] + if not default_parties: + default_parties = all_parties_sorted[:6] selected_parties = st.multiselect( "Selecteer partijen", options=all_parties_sorted, - default=major_parties[:12] if major_parties else all_parties_sorted[:8], + default=default_parties, ) + # Note about partial data years + if "2023-Q1" in windows and not any( + w.startswith("2023-Q") and w != "2023-Q1" for w in windows + ): + st.caption( + "â„šī¸ 2023 heeft alleen data voor Q1 — pipeline draaide niet door in dat jaar." + ) + fig = go.Figure() for party in selected_parties: if party not in centroids: @@ -388,11 +461,11 @@ def build_search_tab(db_path: str, show_rejected: bool) -> None: if not show_rejected: df = df[df["title"].fillna("").str.strip() != "Verworpen."] - # Sidebar-style controls in the main area + # Controls col1, col2, col3 = st.columns([2, 1, 1]) with col1: query = st.text_input( - "Zoek op titel of uitleg", placeholder="bijv. stikstof, klimaat, wonen" + "Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen" ) with col2: years = sorted(df["year"].dropna().astype(int).unique().tolist()) @@ -403,23 +476,20 @@ def build_search_tab(db_path: str, show_rejected: bool) -> None: else: year_range = (2019, 2024) with col3: - policy_areas = ["(Alle)"] + sorted(df["policy_area"].dropna().unique().tolist()) - policy_filter = st.selectbox("Beleidsterrein", options=policy_areas) + min_controversy = st.slider( + "Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05 + ) # Apply filters in-memory working = df.copy() working = working[ (working["year"] >= year_range[0]) & (working["year"] <= year_range[1]) ] - if policy_filter != "(Alle)": - working = working[working["policy_area"] == policy_filter] + if min_controversy > 0: + working = working[working["controversy_score"] >= min_controversy] if query: q = query.lower() - mask = working["title"].fillna("").str.lower().str.contains( - q, regex=False - ) | working["layman_explanation"].fillna("").str.lower().str.contains( - q, regex=False - ) + mask = working["title"].fillna("").str.lower().str.contains(q, regex=False) working = working[mask] working = working.sort_values(by="controversy_score", ascending=False) @@ -428,20 +498,21 @@ def build_search_tab(db_path: str, show_rejected: bool) -> None: for _, row in working.head(50).iterrows(): title = row.get("title") or f"Motie #{row['id']}" date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?" - with st.expander(f"**{title}** — {date_str} — {row.get('policy_area') or ''}"): - explanation = row.get("layman_explanation") - if explanation and str(explanation).strip(): - st.markdown(explanation) - elif row.get("description") and str(row["description"]).strip(): - st.markdown(str(row["description"])[:600] + "â€Ļ") - else: - st.caption("_Geen samenvatting beschikbaar_") - + controversy = row.get("controversy_score") or 0 + with st.expander(f"**{title}** — {date_str} — đŸ”Ĩ {controversy:.2f}"): cols = st.columns(3) - cols[0].metric("Controverse", f"{row.get('controversy_score', 0):.2f}") + cols[0].metric("Controverse", f"{controversy:.2f}") cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}") cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?") + # Voting breakdown + _render_voting_results(row.get("voting_results")) + + # Link to original motion + url = row.get("url") + if url and str(url).startswith("http"): + st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})") + # Similar motions sim = query_similar(db_path, int(row["id"]), top_k=5) if not sim.empty: @@ -481,9 +552,13 @@ def build_browser_tab(db_path: str, show_rejected: bool) -> None: years = sorted(df["year"].dropna().astype(int).unique().tolist()) year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years]) with col2: - policy_areas = ["(Alle)"] + sorted(df["policy_area"].dropna().unique().tolist()) - pa_filter = st.selectbox( - "Beleidsterrein", options=policy_areas, key="browser_pa" + min_controversy_b = st.slider( + "Min. controverse", + min_value=0.0, + max_value=1.0, + value=0.0, + step=0.05, + key="browser_controversy", ) with col3: sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"]) @@ -492,8 +567,8 @@ def build_browser_tab(db_path: str, show_rejected: bool) -> None: working = df.copy() if year_filter != "(Alle)": working = working[working["year"] == int(year_filter)] - if pa_filter != "(Alle)": - working = working[working["policy_area"] == pa_filter] + if min_controversy_b > 0: + working = working[working["controversy_score"] >= min_controversy_b] sort_map = { "Datum (nieuw)": ("date", False), @@ -504,14 +579,7 @@ def build_browser_tab(db_path: str, show_rejected: bool) -> None: working = working.sort_values(by=sort_col, ascending=sort_asc) # Display table - display_cols = [ - "id", - "title", - "date", - "policy_area", - "controversy_score", - "winning_margin", - ] + display_cols = ["id", "title", "date", "controversy_score", "winning_margin"] available_display = [c for c in display_cols if c in working.columns] st.dataframe( working[available_display].reset_index(drop=True), @@ -534,31 +602,19 @@ def build_browser_tab(db_path: str, show_rejected: bool) -> None: if not motion_row.empty: row = motion_row.iloc[0] st.markdown(f"### {row.get('title') or 'Onbekend'}") + date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?" st.caption( - f"📅 {row['date'].strftime('%d %b %Y') if pd.notna(row['date']) else '?'} " - f"| đŸˇī¸ {row.get('policy_area') or ''} " - f"| đŸ”Ĩ Controverse: {row.get('controversy_score', 0):.2f}" + f"📅 {date_str} | đŸ”Ĩ Controverse: {row.get('controversy_score', 0):.2f}" ) - if row.get("layman_explanation") and str(row["layman_explanation"]).strip(): - st.markdown(row["layman_explanation"]) - elif row.get("description") and str(row["description"]).strip(): - st.markdown(str(row["description"])) + # Link to original source + url = row.get("url") + if url and str(url).startswith("http"): + st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})") - # Parse voting results - try: - vr = row.get("voting_results") - if vr and str(vr).strip() not in ("", "null", "None"): - vdata = json.loads(vr) if isinstance(vr, str) else vr - if isinstance(vdata, dict): - st.markdown("**Stemuitslag:**") - for category, actors in vdata.items(): - if actors: - st.markdown( - f"- **{category}**: {', '.join(str(a) for a in actors)}" - ) - except Exception: - pass + # Voting breakdown + st.markdown("**Stemuitslag:**") + _render_voting_results(row.get("voting_results")) # Similar motions sim = query_similar(db_path, int(sel_id), top_k=10)