feat(explorer): add scree plot and clean up SVD axis chart

- Add load_scree_data() cached loader computing L2-norm of party scores
  per SVD dimension as a proxy for component importance
- Add _render_scree_plot() rendering a bar chart of the first 15 components
- Insert scree plot + Dutch explanation at the top of build_svd_components_tab
- Clean up _render_party_axis_chart: remove tick numbers, axis line, grid,
  and zero-line from the x-axis (pole labels remain as chart title)
main
Sven Geboers 1 month ago
parent a20bd834fc
commit c5cbc89c1f
  1. 112
      explorer.py

@ -250,6 +250,102 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
pass
@st.cache_data(show_spinner="Scree-plot laden…")
def load_scree_data(db_path: str) -> List[float]:
"""Return a list of component importances (L2-norm of party scores per dimension).
Uses the same svd_vectors data as load_party_axis_scores but aggregates across
all components (0-indexed). Returns a list of length == vector dimensionality (50).
"""
try:
con = duckdb.connect(database=db_path, read_only=True)
party_list = sorted(CURRENT_PARLIAMENT_PARTIES)
placeholders = ", ".join("?" for _ in party_list)
rows = con.execute(
f"SELECT vector FROM svd_vectors "
f"WHERE entity_type='mp' AND window_id='current_parliament' "
f"AND entity_id IN ({placeholders})",
party_list,
).fetchall()
vectors: List[List[float]] = []
for (raw_vec,) in rows:
if isinstance(raw_vec, str):
vec = json.loads(raw_vec)
elif isinstance(raw_vec, (bytes, bytearray)):
vec = json.loads(raw_vec.decode())
elif isinstance(raw_vec, list):
vec = raw_vec
else:
try:
vec = list(raw_vec)
except Exception:
continue
vectors.append([float(v) if v is not None else 0.0 for v in vec])
if not vectors:
return []
n_dims = len(vectors[0])
importances: List[float] = []
for dim in range(n_dims):
col = [v[dim] for v in vectors if dim < len(v)]
l2 = sum(x**2 for x in col) ** 0.5
importances.append(l2)
return importances
except Exception:
logger.exception("Failed to load scree data")
return []
finally:
try:
con.close()
except Exception:
pass
def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
"""Render a bar chart showing relative component importance (scree plot).
Args:
importances: List of L2-norm scores per component (0-indexed).
n_show: How many components to display (default: first 15).
"""
if not importances:
return
data = importances[:n_show]
components = list(range(1, len(data) + 1))
colours = [
PARTY_COLOURS.get("PVV", "#1565C0") if i == 0 else "#90CAF9"
for i in range(len(data))
]
fig = go.Figure(
go.Bar(
x=components,
y=data,
marker_color=colours,
hovertemplate="As %{x}<br>Gewicht: %{y:.2f}<extra></extra>",
)
)
fig.update_layout(
height=220,
margin={"l": 10, "r": 10, "t": 10, "b": 30},
xaxis={
"title": "SVD-as",
"tickmode": "linear",
"tick0": 1,
"dtick": 1,
"showline": False,
"showgrid": False,
},
yaxis={
"title": "Relatief gewicht",
"showline": False,
"showgrid": True,
"gridcolor": "#eeeeee",
},
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
)
st.plotly_chart(fig, use_container_width=True)
def _render_party_axis_chart(
party_scores: Dict[str, List[float]], comp_sel: int, theme: dict
) -> None:
@ -322,8 +418,10 @@ def _render_party_axis_chart(
margin={"l": 10, "r": 10, "t": 10, "b": 30},
xaxis={
"title": f"{left_label} | {right_label}",
"zeroline": True,
"zerolinecolor": "#aaaaaa",
"showticklabels": False,
"showline": False,
"showgrid": False,
"zeroline": False,
},
yaxis={"visible": False, "range": [-1, 2]},
plot_bgcolor="rgba(0,0,0,0)",
@ -957,6 +1055,16 @@ def build_svd_components_tab(db_path: str) -> None:
"het spanningsveld dat de as beschrijft."
)
# Scree plot: relative importance of each SVD component
scree_importances = load_scree_data(db_path)
if scree_importances:
st.markdown(
"**Scree-plot** — het relatieve gewicht van elke SVD-as. "
"De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
"latere assen zijn subtieler maar politiek nog steeds betekenisvol."
)
_render_scree_plot(scree_importances)
json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
if not os.path.exists(json_path):
st.warning(

Loading…
Cancel
Save