diff --git a/explorer.py b/explorer.py index 7c01fce..8993186 100644 --- a/explorer.py +++ b/explorer.py @@ -17,6 +17,7 @@ from __future__ import annotations import json import logging import os +import re from typing import Dict, List, Optional, Tuple import duckdb @@ -572,45 +573,94 @@ def build_compass_tab(db_path: str, window_size: str) -> None: col1, col2 = st.columns([3, 1]) with col2: - window_idx = st.select_slider( - "Tijdsvenster", options=windows, value=windows[-1] + window_idx = st.selectbox( + "Tijdsvenster", options=windows, index=len(windows) - 1 + ) + level = st.radio( + "Weergave", + options=["Kamerleden", "Partijen"], + index=0, + horizontal=True, ) - show_names = st.checkbox("Toon namen", value=False) - min_size = st.slider("Min. MPs per partij", 0, 20, 3) pos = positions_by_window.get(window_idx, {}) if not pos: st.info(f"Geen data voor venster {window_idx}") return + # Deduplicate MPs whose names appear both with and without a parenthetical first name, + # e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and + # average positions if both variants are present. + def _strip_paren(name: str) -> str: + return re.sub(r"\s*\([^)]*\)", "", name).strip() + + deduped: Dict[str, Tuple[float, float]] = {} + for name, (x, y) in pos.items(): + base = _strip_paren(name) + if base in deduped: + ox, oy = deduped[base] + deduped[base] = ((ox + x) / 2, (oy + y) / 2) + else: + deduped[base] = (x, y) + pos = deduped + rows = [] for name, (x, y) in pos.items(): - party = party_map.get(name, "Unknown") + party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown") rows.append({"name": name, "x": x, "y": y, "party": party}) df_pos = pd.DataFrame(rows) - # Filter to parties with enough MPs - party_counts = df_pos["party"].value_counts() - valid_parties = party_counts[party_counts >= min_size].index - df_pos = df_pos[df_pos["party"].isin(valid_parties)] - - colour_map = {p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()} - - fig = px.scatter( - df_pos, - x="x", - y="y", - color="party", - hover_name="name", - hover_data={"party": True, "x": ":.3f", "y": ":.3f"}, - color_discrete_map=colour_map, - title=f"Politiek Kompas — {window_idx}", - labels={"x": "Links ← → Rechts", "y": "Progressief ↑ / Conservatief ↓"}, + if level == "Partijen": + # Aggregate to party centroids + df_party = ( + df_pos[df_pos["party"] != "Unknown"] + .groupby("party", as_index=False) + .agg(x=("x", "mean"), y=("y", "mean"), n=("name", "count")) + ) + df_party["name"] = df_party["party"] + colour_map = { + p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique() + } + fig = px.scatter( + df_party, + x="x", + y="y", + color="party", + text="party", + hover_name="party", + hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True}, + color_discrete_map=colour_map, + title=f"Politiek Kompas — {window_idx} (partijen)", + labels={ + "x": "Links ← → Rechts", + "y": "Progressief ↑ / Conservatief ↓", + "n": "Kamerleden", + }, + ) + fig.update_traces(textposition="top center", marker_size=14) + else: + colour_map = { + p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique() + } + fig = px.scatter( + df_pos, + x="x", + y="y", + color="party", + hover_name="name", + hover_data={"party": True, "x": ":.3f", "y": ":.3f"}, + color_discrete_map=colour_map, + title=f"Politiek Kompas — {window_idx}", + labels={"x": "Links ← → Rechts", "y": "Progressief ↑ / Conservatief ↓"}, + ) + + fig.update_layout( + height=600, + legend_title_text="Partij", + xaxis={"range": [-1, 1]}, + yaxis={"range": [-0.6, 0.6]}, ) - if show_names: - fig.update_traces(text=df_pos["name"], textposition="top center") - fig.update_layout(height=600, legend_title_text="Partij") with col1: st.plotly_chart(fig, use_container_width=True)