diff --git a/explorer.py b/explorer.py
index 7d2475e..7c01fce 100644
--- a/explorer.py
+++ b/explorer.py
@@ -252,23 +252,27 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
@st.cache_data(show_spinner="Scree-plot laden…")
def load_scree_data(db_path: str) -> List[float]:
- """Return a list of component importances (L2-norm of party scores per dimension).
+ """Return component importances (L2-norm per SVD dimension), sorted descending.
- Uses the same svd_vectors data as load_party_axis_scores but aggregates across
- all components (0-indexed). Returns a list of length == vector dimensionality (50).
+ Uses ALL individual MP vectors (entity_type='mp', window='current_parliament'),
+ excluding party-aggregated rows. Since the stored vectors are U*s (scaled by
+ singular values), the L2-norm of all MP scores per dimension approximates the
+ singular value for that dimension. Sorting descending gives the proper scree shape.
+
+ Note: Procrustes alignment across sub-windows may scramble the original dimension
+ ordering, so we sort by magnitude rather than relying on dimension index order.
"""
try:
con = duckdb.connect(database=db_path, read_only=True)
- party_list = sorted(CURRENT_PARLIAMENT_PARTIES)
- placeholders = ", ".join("?" for _ in party_list)
rows = con.execute(
- f"SELECT vector FROM svd_vectors "
- f"WHERE entity_type='mp' AND window_id='current_parliament' "
- f"AND entity_id IN ({placeholders})",
- party_list,
+ "SELECT entity_id, vector FROM svd_vectors "
+ "WHERE entity_type='mp' AND window_id='current_parliament'"
).fetchall()
+ # Individual MPs have "Lastname, F." format; party rows are short codes without commas
vectors: List[List[float]] = []
- for (raw_vec,) in rows:
+ for entity_id, raw_vec in rows:
+ if "," not in entity_id:
+ continue # skip party-aggregated rows
if isinstance(raw_vec, str):
vec = json.loads(raw_vec)
elif isinstance(raw_vec, (bytes, bytearray)):
@@ -289,7 +293,7 @@ def load_scree_data(db_path: str) -> List[float]:
col = [v[dim] for v in vectors if dim < len(v)]
l2 = sum(x**2 for x in col) ** 0.5
importances.append(l2)
- return importances
+ return sorted(importances, reverse=True)
except Exception:
logger.exception("Failed to load scree data")
return []
@@ -301,33 +305,47 @@ def load_scree_data(db_path: str) -> List[float]:
def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
- """Render a bar chart showing relative component importance (scree plot).
+ """Render a bar+line combo chart showing relative SVD component importance.
+
+ Bars show the L2-norm (singular value proxy) per rank; a line connects the tops
+ of the bars to make the 'elbow' in the scree curve easy to spot.
Args:
- importances: List of L2-norm scores per component (0-indexed).
+ importances: List of importance values sorted descending (from load_scree_data).
n_show: How many components to display (default: first 15).
"""
if not importances:
return
data = importances[:n_show]
- components = list(range(1, len(data) + 1))
- colours = [
- PARTY_COLOURS.get("PVV", "#1565C0") if i == 0 else "#90CAF9"
- for i in range(len(data))
- ]
- fig = go.Figure(
+ ranks = list(range(1, len(data) + 1))
+ bar_colour = "#90CAF9"
+ line_colour = "#1565C0"
+ fig = go.Figure()
+ fig.add_trace(
go.Bar(
- x=components,
+ x=ranks,
y=data,
- marker_color=colours,
- hovertemplate="As %{x}
Gewicht: %{y:.2f}",
+ marker_color=bar_colour,
+ hovertemplate="Rang %{x}
Gewicht: %{y:.2f}",
+ showlegend=False,
+ )
+ )
+ fig.add_trace(
+ go.Scatter(
+ x=ranks,
+ y=data,
+ mode="lines+markers",
+ line={"color": line_colour, "width": 2},
+ marker={"size": 6, "color": line_colour},
+ hoverinfo="skip",
+ showlegend=False,
)
)
fig.update_layout(
height=220,
margin={"l": 10, "r": 10, "t": 10, "b": 30},
xaxis={
- "title": "SVD-as",
+ "title": "Rang",
"tickmode": "linear",
"tick0": 1,
"dtick": 1,
@@ -342,6 +360,7 @@ def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
},
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
+ bargap=0.2,
)
st.plotly_chart(fig, use_container_width=True)