diff --git a/explorer.py b/explorer.py
index d0cbdcd..1bb973b 100644
--- a/explorer.py
+++ b/explorer.py
@@ -49,6 +49,7 @@ PARTY_COLOURS: Dict[str, str] = {
"DENK": "#00897B",
"50PLUS": "#7E57C2",
"Volt": "#572AB7",
+ "ChristenUnie": "#0288D1",
"Unknown": "#9E9E9E",
}
@@ -69,23 +70,27 @@ KNOWN_MAJOR_PARTIES = [
]
-# Current parliament parties (used for party-level SVD lookups)
-# Keep both common abbreviations and full names that may appear in the DB
-CURRENT_PARLIAMENT_PARTIES = frozenset(
- [
- "VVD",
+# Parties currently seated in the Tweede Kamer (2023 election cycle).
+# Deze zijn de entity_ids zoals opgeslagen in svd_vectors voor window='2025'.
+CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset(
+ {
"PVV",
+ "VVD",
+ "NSC",
+ "BBB",
"D66",
"GroenLinks-PvdA",
- "GroenLinks",
- "PvdA",
"CDA",
"SP",
- "NSC",
- "CU",
"ChristenUnie",
- "BBB",
- ]
+ "CU", # alias for ChristenUnie
+ "SGP",
+ "Volt",
+ "DENK",
+ "PvdD",
+ "JA21",
+ "FVD",
+ }
)
@@ -207,16 +212,12 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
AND entity_id is a known current-parliament party.
Returns:
- {party_name: [float * k]} — k = 50 for the canonical 2025 window
+ {party_name: [float * k]} — k = 50 for the canonical 2025 window.
+ Duplicate rows for the same party are de-duplicated (last row wins).
"""
- con = None
try:
- # Use a deterministic, ordered list for parameter binding
- party_list = sorted(CURRENT_PARLIAMENT_PARTIES)
- if not party_list:
- return {}
-
con = duckdb.connect(database=db_path, read_only=True)
+ party_list = sorted(CURRENT_PARLIAMENT_PARTIES)
placeholders = ", ".join("?" for _ in party_list)
rows = con.execute(
f"SELECT entity_id, vector FROM svd_vectors "
@@ -224,70 +225,29 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
f"AND entity_id IN ({placeholders})",
party_list,
).fetchall()
-
- out: Dict[str, List[float]] = {}
- for row in rows:
- party = row[0]
- vec_field = row[1]
- try:
- if vec_field is None:
- # skip missing vectors
+ result: Dict[str, List[float]] = {}
+ for entity_id, raw_vec in rows:
+ if isinstance(raw_vec, str):
+ vec = json.loads(raw_vec)
+ elif isinstance(raw_vec, (bytes, bytearray)):
+ vec = json.loads(raw_vec.decode())
+ elif isinstance(raw_vec, list):
+ vec = raw_vec
+ else:
+ try:
+ vec = list(raw_vec)
+ except Exception:
continue
- # string-encoded JSON vector
- if isinstance(vec_field, str):
- vec = json.loads(vec_field)
- # bytes (some DB drivers may return bytes)
- elif isinstance(vec_field, (bytes, bytearray)):
- try:
- vec = json.loads(vec_field.decode("utf-8"))
- except Exception:
- # fallback: attempt to eval as list-like
- vec = list(vec_field)
- # already a list/tuple/np.ndarray-like
- elif isinstance(vec_field, (list, tuple, np.ndarray)):
- vec = list(vec_field)
- else:
- # unknown type: attempt best-effort conversion
- vec = list(vec_field)
-
- # ensure all entries are floats
- vec_floats = [float(x) for x in vec]
- out[party] = vec_floats
- except Exception:
- # skip malformed rows but keep processing others
- logger.debug("Skipping malformed vector for party %s", party)
- continue
-
- return out
+ result[entity_id] = [float(v) if v is not None else 0.0 for v in vec]
+ return result
except Exception:
logger.exception("Failed to load party axis scores")
return {}
finally:
- if con is not None:
+ try:
con.close()
-
-
-@st.cache_data(show_spinner="Moties laden…")
-def load_motions_df(db_path: str) -> pd.DataFrame:
- """Load the full motions table as a pandas DataFrame (read-only)."""
- con = duckdb.connect(database=db_path, read_only=True)
- try:
- df = con.execute(
- """
- SELECT id, title, description, date, policy_area,
- voting_results, layman_explanation,
- winning_margin, controversy_score, url
- FROM motions
- """
- ).fetchdf()
- df["date"] = pd.to_datetime(df["date"], errors="coerce")
- df["year"] = df["date"].dt.year
- return df
- except Exception:
- logger.exception("Failed to load motions")
- return pd.DataFrame()
- finally:
- con.close()
+ except Exception:
+ pass
def _render_party_axis_chart(
@@ -295,114 +255,91 @@ def _render_party_axis_chart(
) -> None:
"""Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
- party_scores: mapping party -> list-like vector (50-dim)
- comp_sel: 1-based component index
+ Each party is plotted at its score on a single horizontal axis (y=0).
"""
- # Validate component selection
- if not isinstance(comp_sel, int) or comp_sel < 1:
- st.caption("Ongeldige SVD-as geselecteerd.")
- return
-
if not party_scores:
- st.caption("Partijdata zijn niet beschikbaar.")
+ st.caption("_Partijdata niet beschikbaar voor deze as._")
return
- axis_idx = comp_sel - 1
-
- # Determine maximum available vector dimension to validate selection
- max_dim = 0
- for v in party_scores.values():
- try:
- if isinstance(v, (list, tuple, np.ndarray)):
- max_dim = max(max_dim, len(v))
- except Exception:
- continue
-
- if axis_idx >= max_dim:
- st.caption(
- f"Geselecteerde component ({comp_sel}) valt buiten het bereik van de beschikbare vectoren ({max_dim} dimensies)."
- )
- return
-
- parties: List[str] = []
- xs: List[float] = []
-
+ axis_idx = comp_sel - 1 # 0-based index into the 50-dim vector
+ data: list[dict] = []
for party, vec in party_scores.items():
- # Ensure vec is indexable/sequence-like
- if not isinstance(vec, (list, tuple, np.ndarray)):
- continue
- # safe indexing
- if axis_idx >= len(vec):
- continue
- try:
- raw = vec[axis_idx]
- val = float(raw)
- # filter non-finite values
- if not np.isfinite(val):
- continue
- except Exception:
- continue
- parties.append(party)
- xs.append(val)
+ if axis_idx < len(vec):
+ data.append({"party": party, "score": vec[axis_idx]})
- if not xs:
- st.caption("Geen bruikbare partijposities gevonden voor de gekozen SVD-as.")
+ if not data:
+ st.caption("_Geen partijscores voor deze as._")
return
- try:
- x_min = float(min(xs))
- x_max = float(max(xs))
- except Exception:
- st.caption("Onvoldoende gegevens om het asbereik te berekenen.")
- return
-
- # Symmetric padding around the midpoint for balanced visualisation
- if x_min == x_max:
- padding = 0.5 if x_min == 0 else abs(x_min) * 0.1
- if padding <= 0:
- padding = 0.5
- center = x_min
- half = padding
- else:
- center = (x_min + x_max) / 2.0
- half = max(abs(x_max - center), abs(center - x_min))
- # add slight visual padding
- half = half * 1.15
-
- x_min = center - half
- x_max = center + half
-
- # Build horizontal scatter: y is constant (0) but offset for label placement
- ys = [0 for _ in xs]
+ scores = [d["score"] for d in data]
+ parties = [d["party"] for d in data]
+ colours = [PARTY_COLOURS.get(p, "#9E9E9E") for p in parties]
+ hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
fig = go.Figure()
+ # Baseline
+ x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
+ fig.add_trace(
+ go.Scatter(
+ x=[x_min, x_max],
+ y=[0, 0],
+ mode="lines",
+ line={"color": "#cccccc", "width": 1},
+ hoverinfo="skip",
+ showlegend=False,
+ )
+ )
+ # Party markers
fig.add_trace(
go.Scatter(
- x=xs,
- y=ys,
+ x=scores,
+ y=[0] * len(scores),
mode="markers+text",
text=parties,
textposition="top center",
- marker=dict(
- size=10, color=[PARTY_COLOURS.get(p, "#9E9E9E") for p in parties]
- ),
- hovertemplate="%{text}
x: %{x:.3f}",
+ marker={"size": 12, "color": colours},
+ hovertext=hover,
+ hoverinfo="text",
+ showlegend=False,
)
)
-
fig.update_layout(
- title=f"Partijposities op SVD-as {comp_sel}",
- xaxis_title="Negatief ← — → Positief",
- yaxis=dict(visible=False),
- xaxis=dict(range=[x_min, x_max]),
- height=300,
- margin=dict(t=40, b=40, l=40, r=40),
- showlegend=False,
+ height=160,
+ margin={"l": 10, "r": 10, "t": 10, "b": 30},
+ xaxis={
+ "title": "← Negatieve pool | Positieve pool →",
+ "zeroline": True,
+ "zerolinecolor": "#aaaaaa",
+ },
+ yaxis={"visible": False, "range": [-1, 2]},
+ plot_bgcolor="white",
)
-
st.plotly_chart(fig, use_container_width=True)
+@st.cache_data(show_spinner="Moties laden…")
+def load_motions_df(db_path: str) -> pd.DataFrame:
+ """Load the full motions table as a pandas DataFrame (read-only)."""
+ con = duckdb.connect(database=db_path, read_only=True)
+ try:
+ df = con.execute(
+ """
+ SELECT id, title, description, date, policy_area,
+ voting_results, layman_explanation,
+ winning_margin, controversy_score, url
+ FROM motions
+ """
+ ).fetchdf()
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
+ df["year"] = df["date"].dt.year
+ return df
+ except Exception:
+ logger.exception("Failed to load motions")
+ return pd.DataFrame()
+ finally:
+ con.close()
+
+
def query_similar(
db_path: str,
source_motion_id: int,
@@ -1039,62 +976,103 @@ def build_svd_components_tab(db_path: str) -> None:
)
comp_sel = comp_options[comp_sel_idx]
- # Show theme explanation + poles
+ # Show theme explanation
theme = SVD_THEMES.get(comp_sel, {})
if theme:
st.info(f"**{theme['label']}** — {theme['explanation']}")
- pos = theme.get("positive_pole", "")
- neg = theme.get("negative_pole", "")
- if pos or neg:
- pcol, ncol = st.columns(2)
- with pcol:
- st.success(f"▲ **Positieve pool:** {pos}")
- with ncol:
- st.error(f"▼ **Negatieve pool:** {neg}")
motions = comp_map.get(comp_sel, [])
- col1, col2 = st.columns([1, 2])
- with col1:
- st.markdown("**Top-moties (titels)**")
- for m in motions:
- mid = m.get("motion_id")
- score = m.get("score", 0.0)
- title = m.get("title") or f"Motie #{mid}"
- sign = "▲" if score >= 0 else "▼"
- if st.button(f"{sign} {mid}: {title[:72]}", key=f"btn_{comp_sel}_{mid}"):
- st.session_state["svd_selected_mid"] = mid
-
- with col2:
- sel_mid = st.session_state.get("svd_selected_mid")
- if not sel_mid and motions:
- sel_mid = motions[0].get("motion_id")
- if sel_mid:
- # fetch motion metadata from DB for completeness
- try:
- con = duckdb.connect(database=db_path, read_only=True)
- row = con.execute(
- "SELECT id, title, date, policy_area, url, body_text FROM motions WHERE id=?",
- [int(sel_mid)],
- ).fetchone()
- con.close()
- except Exception:
- row = None
+ # Party axis chart
+ party_scores = load_party_axis_scores(db_path)
+ _render_party_axis_chart(party_scores, comp_sel)
+
+ # Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
+ motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
+ motion_details: Dict[int, tuple] = {}
+ if motion_ids:
+ # Defensively convert motion_ids to integers, skipping invalid values
+ ids_int: List[int] = []
+ for mid in motion_ids:
+ try:
+ ids_int.append(int(mid))
+ except Exception:
+ logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
- if row:
- st.markdown(f"### {row[1] or f'Motie #{row[0]}'}")
+ # If no valid ids remain, skip the DB query
+ if ids_int:
+ con = None
try:
- date_str = str(row[2])[:10]
+ placeholders = ", ".join("?" for _ in ids_int)
+ con = duckdb.connect(database=db_path, read_only=True)
+ db_rows = con.execute(
+ f"SELECT id, title, date, policy_area, url, body_text, voting_results "
+ f"FROM motions WHERE id IN ({placeholders})",
+ ids_int,
+ ).fetchall()
+ motion_details = {r[0]: r for r in db_rows}
except Exception:
- date_str = "?"
- st.caption(f"📅 {date_str} | {row[3]}")
- if row[4] and str(row[4]).startswith("http"):
- st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
- if row[5]:
- with st.expander("Toon volledige tekst"):
- st.write(row[5])
- else:
- st.info(f"Metadata not found in DB for motion {sel_mid}")
+ logger.exception("Failed to batch-fetch motion details")
+ finally:
+ if con:
+ con.close()
+
+ # Split motions by pole sign
+ pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
+ neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
+
+ pos_pole = (
+ theme.get("positive_pole", "Positieve pool") if theme else "Positieve pool"
+ )
+ neg_pole = (
+ theme.get("negative_pole", "Negatieve pool") if theme else "Negatieve pool"
+ )
+
+ pcol, ncol = st.columns(2)
+
+ with pcol:
+ st.success(f"▲ **Positieve pool:** {pos_pole}")
+ for m in pos_motions:
+ mid = m.get("motion_id")
+ raw_title = m.get("title") or f"Motie #{mid}"
+ with st.expander(f"▲ {raw_title[:80]}"):
+ row = motion_details.get(int(mid)) if mid is not None else None
+ if row:
+ try:
+ date_str = str(row[2])[:10]
+ except Exception:
+ date_str = "?"
+ st.caption(f"📅 {date_str} | {row[3] or '—'}")
+ if row[4] and str(row[4]).startswith("http"):
+ st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
+ if row[5]:
+ with st.expander("Toon volledige tekst"):
+ st.write(row[5])
+ _render_voting_results(row[6])
+ else:
+ st.caption("_Geen metadata beschikbaar_")
+
+ with ncol:
+ st.error(f"▼ **Negatieve pool:** {neg_pole}")
+ for m in neg_motions:
+ mid = m.get("motion_id")
+ raw_title = m.get("title") or f"Motie #{mid}"
+ with st.expander(f"▼ {raw_title[:80]}"):
+ row = motion_details.get(int(mid)) if mid is not None else None
+ if row:
+ try:
+ date_str = str(row[2])[:10]
+ except Exception:
+ date_str = "?"
+ st.caption(f"📅 {date_str} | {row[3] or '—'}")
+ if row[4] and str(row[4]).startswith("http"):
+ st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
+ if row[5]:
+ with st.expander("Toon volledige tekst"):
+ st.write(row[5])
+ _render_voting_results(row[6])
+ else:
+ st.caption("_Geen metadata beschikbaar_")
def build_mp_quiz_tab(db_path: str) -> None: