From c5cbc89c1fd77b309ad745d5a9b60f1d3ab05d35 Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Wed, 25 Mar 2026 22:03:01 +0100 Subject: [PATCH] feat(explorer): add scree plot and clean up SVD axis chart - Add load_scree_data() cached loader computing L2-norm of party scores per SVD dimension as a proxy for component importance - Add _render_scree_plot() rendering a bar chart of the first 15 components - Insert scree plot + Dutch explanation at the top of build_svd_components_tab - Clean up _render_party_axis_chart: remove tick numbers, axis line, grid, and zero-line from the x-axis (pole labels remain as chart title) --- explorer.py | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 2 deletions(-) diff --git a/explorer.py b/explorer.py index 4c95fe0..7d2475e 100644 --- a/explorer.py +++ b/explorer.py @@ -250,6 +250,102 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: pass +@st.cache_data(show_spinner="Scree-plot laden…") +def load_scree_data(db_path: str) -> List[float]: + """Return a list of component importances (L2-norm of party scores per dimension). + + Uses the same svd_vectors data as load_party_axis_scores but aggregates across + all components (0-indexed). Returns a list of length == vector dimensionality (50). + """ + try: + con = duckdb.connect(database=db_path, read_only=True) + party_list = sorted(CURRENT_PARLIAMENT_PARTIES) + placeholders = ", ".join("?" for _ in party_list) + rows = con.execute( + f"SELECT vector FROM svd_vectors " + f"WHERE entity_type='mp' AND window_id='current_parliament' " + f"AND entity_id IN ({placeholders})", + party_list, + ).fetchall() + vectors: List[List[float]] = [] + for (raw_vec,) in rows: + if isinstance(raw_vec, str): + vec = json.loads(raw_vec) + elif isinstance(raw_vec, (bytes, bytearray)): + vec = json.loads(raw_vec.decode()) + elif isinstance(raw_vec, list): + vec = raw_vec + else: + try: + vec = list(raw_vec) + except Exception: + continue + vectors.append([float(v) if v is not None else 0.0 for v in vec]) + if not vectors: + return [] + n_dims = len(vectors[0]) + importances: List[float] = [] + for dim in range(n_dims): + col = [v[dim] for v in vectors if dim < len(v)] + l2 = sum(x**2 for x in col) ** 0.5 + importances.append(l2) + return importances + except Exception: + logger.exception("Failed to load scree data") + return [] + finally: + try: + con.close() + except Exception: + pass + + +def _render_scree_plot(importances: List[float], n_show: int = 15) -> None: + """Render a bar chart showing relative component importance (scree plot). + + Args: + importances: List of L2-norm scores per component (0-indexed). + n_show: How many components to display (default: first 15). + """ + if not importances: + return + data = importances[:n_show] + components = list(range(1, len(data) + 1)) + colours = [ + PARTY_COLOURS.get("PVV", "#1565C0") if i == 0 else "#90CAF9" + for i in range(len(data)) + ] + fig = go.Figure( + go.Bar( + x=components, + y=data, + marker_color=colours, + hovertemplate="As %{x}
Gewicht: %{y:.2f}", + ) + ) + fig.update_layout( + height=220, + margin={"l": 10, "r": 10, "t": 10, "b": 30}, + xaxis={ + "title": "SVD-as", + "tickmode": "linear", + "tick0": 1, + "dtick": 1, + "showline": False, + "showgrid": False, + }, + yaxis={ + "title": "Relatief gewicht", + "showline": False, + "showgrid": True, + "gridcolor": "#eeeeee", + }, + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + ) + st.plotly_chart(fig, use_container_width=True) + + def _render_party_axis_chart( party_scores: Dict[str, List[float]], comp_sel: int, theme: dict ) -> None: @@ -322,8 +418,10 @@ def _render_party_axis_chart( margin={"l": 10, "r": 10, "t": 10, "b": 30}, xaxis={ "title": f"← {left_label} | {right_label} →", - "zeroline": True, - "zerolinecolor": "#aaaaaa", + "showticklabels": False, + "showline": False, + "showgrid": False, + "zeroline": False, }, yaxis={"visible": False, "range": [-1, 2]}, plot_bgcolor="rgba(0,0,0,0)", @@ -957,6 +1055,16 @@ def build_svd_components_tab(db_path: str) -> None: "het spanningsveld dat de as beschrijft." ) + # Scree plot: relative importance of each SVD component + scree_importances = load_scree_data(db_path) + if scree_importances: + st.markdown( + "**Scree-plot** — het relatieve gewicht van elke SVD-as. " + "De eerste assen verklaren het meeste van de stemverschillen in de Kamer; " + "latere assen zijn subtieler maar politiek nog steeds betekenisvol." + ) + _render_scree_plot(scree_importances) + json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json") if not os.path.exists(json_path): st.warning(