feat(explorer): SVD tab redesign — pole-split motions, party axis chart, inline expanders with voting

- Add ChristenUnie colour alias and CURRENT_PARLIAMENT_PARTIES frozenset (15 parties)
- Add load_party_axis_scores() — queries party SVD vectors from window=2025, cached
- Add _render_party_axis_chart() — 1D Plotly scatter of party positions per axis
- Restructure build_svd_components_tab: replace session-state button/detail-pane with
  inline st.expander per motion, split into pos/neg pole columns, batch DB query for
  all 10 motions including voting_results, rendered via _render_voting_results

Smoke-tested: 15 parties loaded, all 10 axis-1 motions returned with voting data.
main
Sven Geboers 1 month ago
parent 9caaa8baca
commit 361cf9fd35
  1. 396
      explorer.py

@ -49,6 +49,7 @@ PARTY_COLOURS: Dict[str, str] = {
"DENK": "#00897B", "DENK": "#00897B",
"50PLUS": "#7E57C2", "50PLUS": "#7E57C2",
"Volt": "#572AB7", "Volt": "#572AB7",
"ChristenUnie": "#0288D1",
"Unknown": "#9E9E9E", "Unknown": "#9E9E9E",
} }
@ -69,23 +70,27 @@ KNOWN_MAJOR_PARTIES = [
] ]
# Current parliament parties (used for party-level SVD lookups) # Parties currently seated in the Tweede Kamer (2023 election cycle).
# Keep both common abbreviations and full names that may appear in the DB # Deze zijn de entity_ids zoals opgeslagen in svd_vectors voor window='2025'.
CURRENT_PARLIAMENT_PARTIES = frozenset( CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset(
[ {
"VVD",
"PVV", "PVV",
"VVD",
"NSC",
"BBB",
"D66", "D66",
"GroenLinks-PvdA", "GroenLinks-PvdA",
"GroenLinks",
"PvdA",
"CDA", "CDA",
"SP", "SP",
"NSC",
"CU",
"ChristenUnie", "ChristenUnie",
"BBB", "CU", # alias for ChristenUnie
] "SGP",
"Volt",
"DENK",
"PvdD",
"JA21",
"FVD",
}
) )
@ -207,16 +212,12 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
AND entity_id is a known current-parliament party. AND entity_id is a known current-parliament party.
Returns: Returns:
{party_name: [float * k]} k = 50 for the canonical 2025 window {party_name: [float * k]} k = 50 for the canonical 2025 window.
Duplicate rows for the same party are de-duplicated (last row wins).
""" """
con = None
try: try:
# Use a deterministic, ordered list for parameter binding
party_list = sorted(CURRENT_PARLIAMENT_PARTIES)
if not party_list:
return {}
con = duckdb.connect(database=db_path, read_only=True) con = duckdb.connect(database=db_path, read_only=True)
party_list = sorted(CURRENT_PARLIAMENT_PARTIES)
placeholders = ", ".join("?" for _ in party_list) placeholders = ", ".join("?" for _ in party_list)
rows = con.execute( rows = con.execute(
f"SELECT entity_id, vector FROM svd_vectors " f"SELECT entity_id, vector FROM svd_vectors "
@ -224,70 +225,29 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
f"AND entity_id IN ({placeholders})", f"AND entity_id IN ({placeholders})",
party_list, party_list,
).fetchall() ).fetchall()
result: Dict[str, List[float]] = {}
out: Dict[str, List[float]] = {} for entity_id, raw_vec in rows:
for row in rows: if isinstance(raw_vec, str):
party = row[0] vec = json.loads(raw_vec)
vec_field = row[1] elif isinstance(raw_vec, (bytes, bytearray)):
try: vec = json.loads(raw_vec.decode())
if vec_field is None: elif isinstance(raw_vec, list):
# skip missing vectors vec = raw_vec
else:
try:
vec = list(raw_vec)
except Exception:
continue continue
# string-encoded JSON vector result[entity_id] = [float(v) if v is not None else 0.0 for v in vec]
if isinstance(vec_field, str): return result
vec = json.loads(vec_field)
# bytes (some DB drivers may return bytes)
elif isinstance(vec_field, (bytes, bytearray)):
try:
vec = json.loads(vec_field.decode("utf-8"))
except Exception:
# fallback: attempt to eval as list-like
vec = list(vec_field)
# already a list/tuple/np.ndarray-like
elif isinstance(vec_field, (list, tuple, np.ndarray)):
vec = list(vec_field)
else:
# unknown type: attempt best-effort conversion
vec = list(vec_field)
# ensure all entries are floats
vec_floats = [float(x) for x in vec]
out[party] = vec_floats
except Exception:
# skip malformed rows but keep processing others
logger.debug("Skipping malformed vector for party %s", party)
continue
return out
except Exception: except Exception:
logger.exception("Failed to load party axis scores") logger.exception("Failed to load party axis scores")
return {} return {}
finally: finally:
if con is not None: try:
con.close() con.close()
except Exception:
pass
@st.cache_data(show_spinner="Moties laden…")
def load_motions_df(db_path: str) -> pd.DataFrame:
"""Load the full motions table as a pandas DataFrame (read-only)."""
con = duckdb.connect(database=db_path, read_only=True)
try:
df = con.execute(
"""
SELECT id, title, description, date, policy_area,
voting_results, layman_explanation,
winning_margin, controversy_score, url
FROM motions
"""
).fetchdf()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["year"] = df["date"].dt.year
return df
except Exception:
logger.exception("Failed to load motions")
return pd.DataFrame()
finally:
con.close()
def _render_party_axis_chart( def _render_party_axis_chart(
@ -295,114 +255,91 @@ def _render_party_axis_chart(
) -> None: ) -> None:
"""Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`. """Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
party_scores: mapping party -> list-like vector (50-dim) Each party is plotted at its score on a single horizontal axis (y=0).
comp_sel: 1-based component index
""" """
# Validate component selection
if not isinstance(comp_sel, int) or comp_sel < 1:
st.caption("Ongeldige SVD-as geselecteerd.")
return
if not party_scores: if not party_scores:
st.caption("Partijdata zijn niet beschikbaar.") st.caption("_Partijdata niet beschikbaar voor deze as._")
return return
axis_idx = comp_sel - 1 axis_idx = comp_sel - 1 # 0-based index into the 50-dim vector
data: list[dict] = []
# Determine maximum available vector dimension to validate selection
max_dim = 0
for v in party_scores.values():
try:
if isinstance(v, (list, tuple, np.ndarray)):
max_dim = max(max_dim, len(v))
except Exception:
continue
if axis_idx >= max_dim:
st.caption(
f"Geselecteerde component ({comp_sel}) valt buiten het bereik van de beschikbare vectoren ({max_dim} dimensies)."
)
return
parties: List[str] = []
xs: List[float] = []
for party, vec in party_scores.items(): for party, vec in party_scores.items():
# Ensure vec is indexable/sequence-like if axis_idx < len(vec):
if not isinstance(vec, (list, tuple, np.ndarray)): data.append({"party": party, "score": vec[axis_idx]})
continue
# safe indexing
if axis_idx >= len(vec):
continue
try:
raw = vec[axis_idx]
val = float(raw)
# filter non-finite values
if not np.isfinite(val):
continue
except Exception:
continue
parties.append(party)
xs.append(val)
if not xs: if not data:
st.caption("Geen bruikbare partijposities gevonden voor de gekozen SVD-as.") st.caption("_Geen partijscores voor deze as._")
return return
try: scores = [d["score"] for d in data]
x_min = float(min(xs)) parties = [d["party"] for d in data]
x_max = float(max(xs)) colours = [PARTY_COLOURS.get(p, "#9E9E9E") for p in parties]
except Exception: hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
st.caption("Onvoldoende gegevens om het asbereik te berekenen.")
return
# Symmetric padding around the midpoint for balanced visualisation
if x_min == x_max:
padding = 0.5 if x_min == 0 else abs(x_min) * 0.1
if padding <= 0:
padding = 0.5
center = x_min
half = padding
else:
center = (x_min + x_max) / 2.0
half = max(abs(x_max - center), abs(center - x_min))
# add slight visual padding
half = half * 1.15
x_min = center - half
x_max = center + half
# Build horizontal scatter: y is constant (0) but offset for label placement
ys = [0 for _ in xs]
fig = go.Figure() fig = go.Figure()
# Baseline
x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
fig.add_trace(
go.Scatter(
x=[x_min, x_max],
y=[0, 0],
mode="lines",
line={"color": "#cccccc", "width": 1},
hoverinfo="skip",
showlegend=False,
)
)
# Party markers
fig.add_trace( fig.add_trace(
go.Scatter( go.Scatter(
x=xs, x=scores,
y=ys, y=[0] * len(scores),
mode="markers+text", mode="markers+text",
text=parties, text=parties,
textposition="top center", textposition="top center",
marker=dict( marker={"size": 12, "color": colours},
size=10, color=[PARTY_COLOURS.get(p, "#9E9E9E") for p in parties] hovertext=hover,
), hoverinfo="text",
hovertemplate="%{text}<br>x: %{x:.3f}<extra></extra>", showlegend=False,
) )
) )
fig.update_layout( fig.update_layout(
title=f"Partijposities op SVD-as {comp_sel}", height=160,
xaxis_title="Negatief ← — → Positief", margin={"l": 10, "r": 10, "t": 10, "b": 30},
yaxis=dict(visible=False), xaxis={
xaxis=dict(range=[x_min, x_max]), "title": "← Negatieve pool | Positieve pool →",
height=300, "zeroline": True,
margin=dict(t=40, b=40, l=40, r=40), "zerolinecolor": "#aaaaaa",
showlegend=False, },
yaxis={"visible": False, "range": [-1, 2]},
plot_bgcolor="white",
) )
st.plotly_chart(fig, use_container_width=True) st.plotly_chart(fig, use_container_width=True)
@st.cache_data(show_spinner="Moties laden…")
def load_motions_df(db_path: str) -> pd.DataFrame:
"""Load the full motions table as a pandas DataFrame (read-only)."""
con = duckdb.connect(database=db_path, read_only=True)
try:
df = con.execute(
"""
SELECT id, title, description, date, policy_area,
voting_results, layman_explanation,
winning_margin, controversy_score, url
FROM motions
"""
).fetchdf()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["year"] = df["date"].dt.year
return df
except Exception:
logger.exception("Failed to load motions")
return pd.DataFrame()
finally:
con.close()
def query_similar( def query_similar(
db_path: str, db_path: str,
source_motion_id: int, source_motion_id: int,
@ -1039,62 +976,103 @@ def build_svd_components_tab(db_path: str) -> None:
) )
comp_sel = comp_options[comp_sel_idx] comp_sel = comp_options[comp_sel_idx]
# Show theme explanation + poles # Show theme explanation
theme = SVD_THEMES.get(comp_sel, {}) theme = SVD_THEMES.get(comp_sel, {})
if theme: if theme:
st.info(f"**{theme['label']}** — {theme['explanation']}") st.info(f"**{theme['label']}** — {theme['explanation']}")
pos = theme.get("positive_pole", "")
neg = theme.get("negative_pole", "")
if pos or neg:
pcol, ncol = st.columns(2)
with pcol:
st.success(f"▲ **Positieve pool:** {pos}")
with ncol:
st.error(f"▼ **Negatieve pool:** {neg}")
motions = comp_map.get(comp_sel, []) motions = comp_map.get(comp_sel, [])
col1, col2 = st.columns([1, 2]) # Party axis chart
with col1: party_scores = load_party_axis_scores(db_path)
st.markdown("**Top-moties (titels)**") _render_party_axis_chart(party_scores, comp_sel)
for m in motions:
mid = m.get("motion_id") # Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
score = m.get("score", 0.0) motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
title = m.get("title") or f"Motie #{mid}" motion_details: Dict[int, tuple] = {}
sign = "" if score >= 0 else "" if motion_ids:
if st.button(f"{sign} {mid}: {title[:72]}", key=f"btn_{comp_sel}_{mid}"): # Defensively convert motion_ids to integers, skipping invalid values
st.session_state["svd_selected_mid"] = mid ids_int: List[int] = []
for mid in motion_ids:
with col2: try:
sel_mid = st.session_state.get("svd_selected_mid") ids_int.append(int(mid))
if not sel_mid and motions: except Exception:
sel_mid = motions[0].get("motion_id") logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
if sel_mid:
# fetch motion metadata from DB for completeness
try:
con = duckdb.connect(database=db_path, read_only=True)
row = con.execute(
"SELECT id, title, date, policy_area, url, body_text FROM motions WHERE id=?",
[int(sel_mid)],
).fetchone()
con.close()
except Exception:
row = None
if row: # If no valid ids remain, skip the DB query
st.markdown(f"### {row[1] or f'Motie #{row[0]}'}") if ids_int:
con = None
try: try:
date_str = str(row[2])[:10] placeholders = ", ".join("?" for _ in ids_int)
con = duckdb.connect(database=db_path, read_only=True)
db_rows = con.execute(
f"SELECT id, title, date, policy_area, url, body_text, voting_results "
f"FROM motions WHERE id IN ({placeholders})",
ids_int,
).fetchall()
motion_details = {r[0]: r for r in db_rows}
except Exception: except Exception:
date_str = "?" logger.exception("Failed to batch-fetch motion details")
st.caption(f"📅 {date_str} | {row[3]}") finally:
if row[4] and str(row[4]).startswith("http"): if con:
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") con.close()
if row[5]:
with st.expander("Toon volledige tekst"): # Split motions by pole sign
st.write(row[5]) pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
else: neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
st.info(f"Metadata not found in DB for motion {sel_mid}")
pos_pole = (
theme.get("positive_pole", "Positieve pool") if theme else "Positieve pool"
)
neg_pole = (
theme.get("negative_pole", "Negatieve pool") if theme else "Negatieve pool"
)
pcol, ncol = st.columns(2)
with pcol:
st.success(f"▲ **Positieve pool:** {pos_pole}")
for m in pos_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{raw_title[:80]}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")
with ncol:
st.error(f"▼ **Negatieve pool:** {neg_pole}")
for m in neg_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{raw_title[:80]}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")
def build_mp_quiz_tab(db_path: str) -> None: def build_mp_quiz_tab(db_path: str) -> None:

Loading…
Cancel
Save