fix(svd): use averaged individual MP vectors for party axis scores, fix scree to %, re-add component 4

- load_party_axis_scores now loads individual MP vectors and averages
  per party (same data source as the political compass), so SVD axis
  rankings are consistent between the two tabs. Previously it used
  party-aggregate rows which gave structurally 0 signal on dim 3.

- Re-add component 4 (dim 3) to SVD_THEMES and revert comp_options
  filter — with individual MPs averaged, dim 3 now shows real party
  separation (the 'publieke voorzieningen vs marktwerking' axis).

- Scree plot y-axis now shows percentage of total variance instead of
  raw L2-norms; hover also updated to show '% van totaal'.
main
Sven Geboers 1 month ago
parent 0a2238f2ba
commit a7517bb6ae
  1. 83
      explorer.py

@ -216,27 +216,47 @@ def load_party_map(db_path: str) -> Dict[str, str]:
@st.cache_data(show_spinner="Partijposities op SVD-assen laden…")
def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
"""Return per-party SVD vectors for window='current_parliament'.
"""Return per-party SVD vectors, computed as mean of individual MP vectors.
Queries svd_vectors WHERE entity_type='mp' AND window_id='current_parliament'
AND entity_id is a known current-parliament party.
Loads individual MP rows (entity_id LIKE '%,%') from window='current_parliament',
assigns each MP their party using the dominant party from mp_votes, then
averages SVD vectors per party.
This matches the political compass data source (also averages individual MPs),
so axis rankings are consistent between the SVD tab and the compass.
Returns:
{party_name: [float * k]} k = 50 for the canonical current_parliament window.
Duplicate rows for the same party are de-duplicated (last row wins).
{party_name: [float * k]} k = 50, mean over all MPs in that party.
"""
try:
con = duckdb.connect(database=db_path, read_only=True)
party_list = sorted(CURRENT_PARLIAMENT_PARTIES)
placeholders = ", ".join("?" for _ in party_list)
# Dominant party per individual MP from mp_votes (majority-vote assignment)
party_rows = con.execute(
"SELECT mp_name, party, COUNT(*) as n FROM mp_votes "
"WHERE party IS NOT NULL AND party != '' AND mp_name LIKE '%,%' "
"GROUP BY mp_name, party"
).fetchall()
party_counts: Dict[str, Dict[str, int]] = {}
for mp_name, party, n in party_rows:
party_counts.setdefault(mp_name, {})[party] = n
mp_party: Dict[str, str] = {
mp: max(counts, key=counts.__getitem__)
for mp, counts in party_counts.items()
}
# Individual MP vectors from current_parliament
rows = con.execute(
f"SELECT entity_id, vector FROM svd_vectors "
f"WHERE entity_type='mp' AND window_id='current_parliament' "
f"AND entity_id IN ({placeholders})",
party_list,
"SELECT entity_id, vector FROM svd_vectors "
"WHERE entity_type='mp' AND window_id='current_parliament' "
"AND entity_id LIKE '%,%'"
).fetchall()
result: Dict[str, List[float]] = {}
party_vecs: Dict[str, list] = {}
for entity_id, raw_vec in rows:
party = mp_party.get(entity_id)
if party is None or party not in CURRENT_PARLIAMENT_PARTIES:
continue
if isinstance(raw_vec, str):
vec = json.loads(raw_vec)
elif isinstance(raw_vec, (bytes, bytearray)):
@ -248,7 +268,13 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
vec = list(raw_vec)
except Exception:
continue
result[entity_id] = [float(v) if v is not None else 0.0 for v in vec]
fvec = [float(v) if v is not None else 0.0 for v in vec]
party_vecs.setdefault(party, []).append(fvec)
# Average vectors per party
result: Dict[str, List[float]] = {}
for party, vecs in party_vecs.items():
result[party] = np.array(vecs).mean(axis=0).tolist()
return result
except Exception:
logger.exception("Failed to load party axis scores")
@ -329,7 +355,9 @@ def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
"""
if not importances:
return
data = importances[:n_show]
total = sum(importances) or 1.0
raw = importances[:n_show]
data = [v / total * 100 for v in raw]
ranks = list(range(1, len(data) + 1))
bar_colour = "#90CAF9"
line_colour = "#1565C0"
@ -339,7 +367,7 @@ def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
x=ranks,
y=data,
marker_color=bar_colour,
hovertemplate="Rang %{x}<br>Gewicht: %{y:.2f}<extra></extra>",
hovertemplate="Rang %{x}<br>%{y:.1f}% van totaal<extra></extra>",
showlegend=False,
)
)
@ -366,10 +394,11 @@ def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
"showgrid": False,
},
yaxis={
"title": "Relatief gewicht",
"title": "% van totale variantie",
"showline": False,
"showgrid": True,
"gridcolor": "#eeeeee",
"ticksuffix": "%",
},
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
@ -1017,6 +1046,24 @@ def build_svd_components_tab(db_path: str) -> None:
"negative_pole": "Strikte handhaving, deregulering en nationalistisch eigenbelang boven humanitaire verplichtingen",
"flip": True,
},
4: {
"label": "Publieke voorzieningen beschermen versus liberale marktwerking",
"explanation": (
"Deze as weerspiegelt de klassieke sociaal-economische tegenstelling tussen links en "
"liberaal-economisch rechts. Aan de positieve kant staan moties van SP en DENK die "
"pleiten voor betaalbare zorg, lage treintarieven, bescherming van politiepersoneel en "
"regionale brandweerposten — allemaal gericht op het beschermen van publieke voorzieningen "
"voor gewone burgers. Aan de negatieve kant staan moties van VVD, D66, Volt en NSC die "
"pleiten voor het EU-Mercosur vrijhandelsverdrag en een flexibele kennismigrantenregeling "
"ten behoeve van het economisch verdienvermogen. Deze dimensie is politiek betekenisvol "
"omdat hij de fundamentele vraag raakt of de staat actief moet ingrijpen om collectieve "
"voorzieningen betaalbaar en toegankelijk te houden, of dat vrije markt en open handel "
"leidend moeten zijn."
),
"positive_pole": "Vrije handel, open economie en marktgerichte arbeidsmigratie",
"negative_pole": "Staatsbescherming van betaalbare publieke voorzieningen voor iedereen",
"flip": False,
},
5: {
"label": "Christelijk-conservatief sociaal beleid versus seculier progressief",
"explanation": (
@ -1159,9 +1206,7 @@ def build_svd_components_tab(db_path: str) -> None:
if r.get("motion_id") not in existing_ids:
bucket.append(r)
# Only show components that have a defined theme (excludes e.g. dim 3 which
# captures within-party individual variance and is uninformative at party level).
comp_options = sorted(c for c in comp_map.keys() if c in SVD_THEMES)
comp_options = sorted(comp_map.keys())
# Build display labels for selectbox: "As 1 — Regulering vs. status-quo"
def _comp_label(c: int) -> str:

Loading…
Cancel
Save