feat(explorer): SVD tab redesign — pole-split motions, party axis chart, inline expanders with voting

- Add ChristenUnie colour alias and CURRENT_PARLIAMENT_PARTIES frozenset (15 parties)
- Add load_party_axis_scores() — queries party SVD vectors from window=2025, cached
- Add _render_party_axis_chart() — 1D Plotly scatter of party positions per axis
- Restructure build_svd_components_tab: replace session-state button/detail-pane with
  inline st.expander per motion, split into pos/neg pole columns, batch DB query for
  all 10 motions including voting_results, rendered via _render_voting_results

Smoke-tested: 15 parties loaded, all 10 axis-1 motions returned with voting data.
main
Sven Geboers 1 month ago
parent 9caaa8baca
commit 361cf9fd35
  1. 396
      explorer.py

@ -49,6 +49,7 @@ PARTY_COLOURS: Dict[str, str] = {
"DENK": "#00897B",
"50PLUS": "#7E57C2",
"Volt": "#572AB7",
"ChristenUnie": "#0288D1",
"Unknown": "#9E9E9E",
}
@ -69,23 +70,27 @@ KNOWN_MAJOR_PARTIES = [
]
# Current parliament parties (used for party-level SVD lookups)
# Keep both common abbreviations and full names that may appear in the DB
CURRENT_PARLIAMENT_PARTIES = frozenset(
[
"VVD",
# Parties currently seated in the Tweede Kamer (2023 election cycle).
# Deze zijn de entity_ids zoals opgeslagen in svd_vectors voor window='2025'.
CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset(
{
"PVV",
"VVD",
"NSC",
"BBB",
"D66",
"GroenLinks-PvdA",
"GroenLinks",
"PvdA",
"CDA",
"SP",
"NSC",
"CU",
"ChristenUnie",
"BBB",
]
"CU", # alias for ChristenUnie
"SGP",
"Volt",
"DENK",
"PvdD",
"JA21",
"FVD",
}
)
@ -207,16 +212,12 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
AND entity_id is a known current-parliament party.
Returns:
{party_name: [float * k]} k = 50 for the canonical 2025 window
{party_name: [float * k]} k = 50 for the canonical 2025 window.
Duplicate rows for the same party are de-duplicated (last row wins).
"""
con = None
try:
# Use a deterministic, ordered list for parameter binding
party_list = sorted(CURRENT_PARLIAMENT_PARTIES)
if not party_list:
return {}
con = duckdb.connect(database=db_path, read_only=True)
party_list = sorted(CURRENT_PARLIAMENT_PARTIES)
placeholders = ", ".join("?" for _ in party_list)
rows = con.execute(
f"SELECT entity_id, vector FROM svd_vectors "
@ -224,70 +225,29 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
f"AND entity_id IN ({placeholders})",
party_list,
).fetchall()
out: Dict[str, List[float]] = {}
for row in rows:
party = row[0]
vec_field = row[1]
try:
if vec_field is None:
# skip missing vectors
result: Dict[str, List[float]] = {}
for entity_id, raw_vec in rows:
if isinstance(raw_vec, str):
vec = json.loads(raw_vec)
elif isinstance(raw_vec, (bytes, bytearray)):
vec = json.loads(raw_vec.decode())
elif isinstance(raw_vec, list):
vec = raw_vec
else:
try:
vec = list(raw_vec)
except Exception:
continue
# string-encoded JSON vector
if isinstance(vec_field, str):
vec = json.loads(vec_field)
# bytes (some DB drivers may return bytes)
elif isinstance(vec_field, (bytes, bytearray)):
try:
vec = json.loads(vec_field.decode("utf-8"))
except Exception:
# fallback: attempt to eval as list-like
vec = list(vec_field)
# already a list/tuple/np.ndarray-like
elif isinstance(vec_field, (list, tuple, np.ndarray)):
vec = list(vec_field)
else:
# unknown type: attempt best-effort conversion
vec = list(vec_field)
# ensure all entries are floats
vec_floats = [float(x) for x in vec]
out[party] = vec_floats
except Exception:
# skip malformed rows but keep processing others
logger.debug("Skipping malformed vector for party %s", party)
continue
return out
result[entity_id] = [float(v) if v is not None else 0.0 for v in vec]
return result
except Exception:
logger.exception("Failed to load party axis scores")
return {}
finally:
if con is not None:
try:
con.close()
@st.cache_data(show_spinner="Moties laden…")
def load_motions_df(db_path: str) -> pd.DataFrame:
"""Load the full motions table as a pandas DataFrame (read-only)."""
con = duckdb.connect(database=db_path, read_only=True)
try:
df = con.execute(
"""
SELECT id, title, description, date, policy_area,
voting_results, layman_explanation,
winning_margin, controversy_score, url
FROM motions
"""
).fetchdf()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["year"] = df["date"].dt.year
return df
except Exception:
logger.exception("Failed to load motions")
return pd.DataFrame()
finally:
con.close()
except Exception:
pass
def _render_party_axis_chart(
@ -295,114 +255,91 @@ def _render_party_axis_chart(
) -> None:
"""Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
party_scores: mapping party -> list-like vector (50-dim)
comp_sel: 1-based component index
Each party is plotted at its score on a single horizontal axis (y=0).
"""
# Validate component selection
if not isinstance(comp_sel, int) or comp_sel < 1:
st.caption("Ongeldige SVD-as geselecteerd.")
return
if not party_scores:
st.caption("Partijdata zijn niet beschikbaar.")
st.caption("_Partijdata niet beschikbaar voor deze as._")
return
axis_idx = comp_sel - 1
# Determine maximum available vector dimension to validate selection
max_dim = 0
for v in party_scores.values():
try:
if isinstance(v, (list, tuple, np.ndarray)):
max_dim = max(max_dim, len(v))
except Exception:
continue
if axis_idx >= max_dim:
st.caption(
f"Geselecteerde component ({comp_sel}) valt buiten het bereik van de beschikbare vectoren ({max_dim} dimensies)."
)
return
parties: List[str] = []
xs: List[float] = []
axis_idx = comp_sel - 1 # 0-based index into the 50-dim vector
data: list[dict] = []
for party, vec in party_scores.items():
# Ensure vec is indexable/sequence-like
if not isinstance(vec, (list, tuple, np.ndarray)):
continue
# safe indexing
if axis_idx >= len(vec):
continue
try:
raw = vec[axis_idx]
val = float(raw)
# filter non-finite values
if not np.isfinite(val):
continue
except Exception:
continue
parties.append(party)
xs.append(val)
if axis_idx < len(vec):
data.append({"party": party, "score": vec[axis_idx]})
if not xs:
st.caption("Geen bruikbare partijposities gevonden voor de gekozen SVD-as.")
if not data:
st.caption("_Geen partijscores voor deze as._")
return
try:
x_min = float(min(xs))
x_max = float(max(xs))
except Exception:
st.caption("Onvoldoende gegevens om het asbereik te berekenen.")
return
# Symmetric padding around the midpoint for balanced visualisation
if x_min == x_max:
padding = 0.5 if x_min == 0 else abs(x_min) * 0.1
if padding <= 0:
padding = 0.5
center = x_min
half = padding
else:
center = (x_min + x_max) / 2.0
half = max(abs(x_max - center), abs(center - x_min))
# add slight visual padding
half = half * 1.15
x_min = center - half
x_max = center + half
# Build horizontal scatter: y is constant (0) but offset for label placement
ys = [0 for _ in xs]
scores = [d["score"] for d in data]
parties = [d["party"] for d in data]
colours = [PARTY_COLOURS.get(p, "#9E9E9E") for p in parties]
hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
fig = go.Figure()
# Baseline
x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
fig.add_trace(
go.Scatter(
x=[x_min, x_max],
y=[0, 0],
mode="lines",
line={"color": "#cccccc", "width": 1},
hoverinfo="skip",
showlegend=False,
)
)
# Party markers
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
x=scores,
y=[0] * len(scores),
mode="markers+text",
text=parties,
textposition="top center",
marker=dict(
size=10, color=[PARTY_COLOURS.get(p, "#9E9E9E") for p in parties]
),
hovertemplate="%{text}<br>x: %{x:.3f}<extra></extra>",
marker={"size": 12, "color": colours},
hovertext=hover,
hoverinfo="text",
showlegend=False,
)
)
fig.update_layout(
title=f"Partijposities op SVD-as {comp_sel}",
xaxis_title="Negatief ← — → Positief",
yaxis=dict(visible=False),
xaxis=dict(range=[x_min, x_max]),
height=300,
margin=dict(t=40, b=40, l=40, r=40),
showlegend=False,
height=160,
margin={"l": 10, "r": 10, "t": 10, "b": 30},
xaxis={
"title": "← Negatieve pool | Positieve pool →",
"zeroline": True,
"zerolinecolor": "#aaaaaa",
},
yaxis={"visible": False, "range": [-1, 2]},
plot_bgcolor="white",
)
st.plotly_chart(fig, use_container_width=True)
@st.cache_data(show_spinner="Moties laden…")
def load_motions_df(db_path: str) -> pd.DataFrame:
"""Load the full motions table as a pandas DataFrame (read-only)."""
con = duckdb.connect(database=db_path, read_only=True)
try:
df = con.execute(
"""
SELECT id, title, description, date, policy_area,
voting_results, layman_explanation,
winning_margin, controversy_score, url
FROM motions
"""
).fetchdf()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["year"] = df["date"].dt.year
return df
except Exception:
logger.exception("Failed to load motions")
return pd.DataFrame()
finally:
con.close()
def query_similar(
db_path: str,
source_motion_id: int,
@ -1039,62 +976,103 @@ def build_svd_components_tab(db_path: str) -> None:
)
comp_sel = comp_options[comp_sel_idx]
# Show theme explanation + poles
# Show theme explanation
theme = SVD_THEMES.get(comp_sel, {})
if theme:
st.info(f"**{theme['label']}** — {theme['explanation']}")
pos = theme.get("positive_pole", "")
neg = theme.get("negative_pole", "")
if pos or neg:
pcol, ncol = st.columns(2)
with pcol:
st.success(f"▲ **Positieve pool:** {pos}")
with ncol:
st.error(f"▼ **Negatieve pool:** {neg}")
motions = comp_map.get(comp_sel, [])
col1, col2 = st.columns([1, 2])
with col1:
st.markdown("**Top-moties (titels)**")
for m in motions:
mid = m.get("motion_id")
score = m.get("score", 0.0)
title = m.get("title") or f"Motie #{mid}"
sign = "" if score >= 0 else ""
if st.button(f"{sign} {mid}: {title[:72]}", key=f"btn_{comp_sel}_{mid}"):
st.session_state["svd_selected_mid"] = mid
with col2:
sel_mid = st.session_state.get("svd_selected_mid")
if not sel_mid and motions:
sel_mid = motions[0].get("motion_id")
if sel_mid:
# fetch motion metadata from DB for completeness
try:
con = duckdb.connect(database=db_path, read_only=True)
row = con.execute(
"SELECT id, title, date, policy_area, url, body_text FROM motions WHERE id=?",
[int(sel_mid)],
).fetchone()
con.close()
except Exception:
row = None
# Party axis chart
party_scores = load_party_axis_scores(db_path)
_render_party_axis_chart(party_scores, comp_sel)
# Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
motion_details: Dict[int, tuple] = {}
if motion_ids:
# Defensively convert motion_ids to integers, skipping invalid values
ids_int: List[int] = []
for mid in motion_ids:
try:
ids_int.append(int(mid))
except Exception:
logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
if row:
st.markdown(f"### {row[1] or f'Motie #{row[0]}'}")
# If no valid ids remain, skip the DB query
if ids_int:
con = None
try:
date_str = str(row[2])[:10]
placeholders = ", ".join("?" for _ in ids_int)
con = duckdb.connect(database=db_path, read_only=True)
db_rows = con.execute(
f"SELECT id, title, date, policy_area, url, body_text, voting_results "
f"FROM motions WHERE id IN ({placeholders})",
ids_int,
).fetchall()
motion_details = {r[0]: r for r in db_rows}
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3]}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
else:
st.info(f"Metadata not found in DB for motion {sel_mid}")
logger.exception("Failed to batch-fetch motion details")
finally:
if con:
con.close()
# Split motions by pole sign
pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
pos_pole = (
theme.get("positive_pole", "Positieve pool") if theme else "Positieve pool"
)
neg_pole = (
theme.get("negative_pole", "Negatieve pool") if theme else "Negatieve pool"
)
pcol, ncol = st.columns(2)
with pcol:
st.success(f"▲ **Positieve pool:** {pos_pole}")
for m in pos_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{raw_title[:80]}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")
with ncol:
st.error(f"▼ **Negatieve pool:** {neg_pole}")
for m in neg_motions:
mid = m.get("motion_id")
raw_title = m.get("title") or f"Motie #{mid}"
with st.expander(f"{raw_title[:80]}"):
row = motion_details.get(int(mid)) if mid is not None else None
if row:
try:
date_str = str(row[2])[:10]
except Exception:
date_str = "?"
st.caption(f"📅 {date_str} | {row[3] or ''}")
if row[4] and str(row[4]).startswith("http"):
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
if row[5]:
with st.expander("Toon volledige tekst"):
st.write(row[5])
_render_voting_results(row[6])
else:
st.caption("_Geen metadata beschikbaar_")
def build_mp_quiz_tab(db_path: str) -> None:

Loading…
Cancel
Save