diff --git a/explorer.py b/explorer.py index ffe2c9e..597ce2b 100644 --- a/explorer.py +++ b/explorer.py @@ -216,27 +216,47 @@ def load_party_map(db_path: str) -> Dict[str, str]: @st.cache_data(show_spinner="Partijposities op SVD-assen laden…") def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: - """Return per-party SVD vectors for window='current_parliament'. + """Return per-party SVD vectors, computed as mean of individual MP vectors. - Queries svd_vectors WHERE entity_type='mp' AND window_id='current_parliament' - AND entity_id is a known current-parliament party. + Loads individual MP rows (entity_id LIKE '%,%') from window='current_parliament', + assigns each MP their party using the dominant party from mp_votes, then + averages SVD vectors per party. + + This matches the political compass data source (also averages individual MPs), + so axis rankings are consistent between the SVD tab and the compass. Returns: - {party_name: [float * k]} — k = 50 for the canonical current_parliament window. - Duplicate rows for the same party are de-duplicated (last row wins). + {party_name: [float * k]} — k = 50, mean over all MPs in that party. """ try: con = duckdb.connect(database=db_path, read_only=True) - party_list = sorted(CURRENT_PARLIAMENT_PARTIES) - placeholders = ", ".join("?" for _ in party_list) + + # Dominant party per individual MP from mp_votes (majority-vote assignment) + party_rows = con.execute( + "SELECT mp_name, party, COUNT(*) as n FROM mp_votes " + "WHERE party IS NOT NULL AND party != '' AND mp_name LIKE '%,%' " + "GROUP BY mp_name, party" + ).fetchall() + party_counts: Dict[str, Dict[str, int]] = {} + for mp_name, party, n in party_rows: + party_counts.setdefault(mp_name, {})[party] = n + mp_party: Dict[str, str] = { + mp: max(counts, key=counts.__getitem__) + for mp, counts in party_counts.items() + } + + # Individual MP vectors from current_parliament rows = con.execute( - f"SELECT entity_id, vector FROM svd_vectors " - f"WHERE entity_type='mp' AND window_id='current_parliament' " - f"AND entity_id IN ({placeholders})", - party_list, + "SELECT entity_id, vector FROM svd_vectors " + "WHERE entity_type='mp' AND window_id='current_parliament' " + "AND entity_id LIKE '%,%'" ).fetchall() - result: Dict[str, List[float]] = {} + + party_vecs: Dict[str, list] = {} for entity_id, raw_vec in rows: + party = mp_party.get(entity_id) + if party is None or party not in CURRENT_PARLIAMENT_PARTIES: + continue if isinstance(raw_vec, str): vec = json.loads(raw_vec) elif isinstance(raw_vec, (bytes, bytearray)): @@ -248,7 +268,13 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: vec = list(raw_vec) except Exception: continue - result[entity_id] = [float(v) if v is not None else 0.0 for v in vec] + fvec = [float(v) if v is not None else 0.0 for v in vec] + party_vecs.setdefault(party, []).append(fvec) + + # Average vectors per party + result: Dict[str, List[float]] = {} + for party, vecs in party_vecs.items(): + result[party] = np.array(vecs).mean(axis=0).tolist() return result except Exception: logger.exception("Failed to load party axis scores") @@ -329,7 +355,9 @@ def _render_scree_plot(importances: List[float], n_show: int = 15) -> None: """ if not importances: return - data = importances[:n_show] + total = sum(importances) or 1.0 + raw = importances[:n_show] + data = [v / total * 100 for v in raw] ranks = list(range(1, len(data) + 1)) bar_colour = "#90CAF9" line_colour = "#1565C0" @@ -339,7 +367,7 @@ def _render_scree_plot(importances: List[float], n_show: int = 15) -> None: x=ranks, y=data, marker_color=bar_colour, - hovertemplate="Rang %{x}
Gewicht: %{y:.2f}", + hovertemplate="Rang %{x}
%{y:.1f}% van totaal", showlegend=False, ) ) @@ -366,10 +394,11 @@ def _render_scree_plot(importances: List[float], n_show: int = 15) -> None: "showgrid": False, }, yaxis={ - "title": "Relatief gewicht", + "title": "% van totale variantie", "showline": False, "showgrid": True, "gridcolor": "#eeeeee", + "ticksuffix": "%", }, plot_bgcolor="rgba(0,0,0,0)", paper_bgcolor="rgba(0,0,0,0)", @@ -1017,6 +1046,24 @@ def build_svd_components_tab(db_path: str) -> None: "negative_pole": "Strikte handhaving, deregulering en nationalistisch eigenbelang boven humanitaire verplichtingen", "flip": True, }, + 4: { + "label": "Publieke voorzieningen beschermen versus liberale marktwerking", + "explanation": ( + "Deze as weerspiegelt de klassieke sociaal-economische tegenstelling tussen links en " + "liberaal-economisch rechts. Aan de positieve kant staan moties van SP en DENK die " + "pleiten voor betaalbare zorg, lage treintarieven, bescherming van politiepersoneel en " + "regionale brandweerposten — allemaal gericht op het beschermen van publieke voorzieningen " + "voor gewone burgers. Aan de negatieve kant staan moties van VVD, D66, Volt en NSC die " + "pleiten voor het EU-Mercosur vrijhandelsverdrag en een flexibele kennismigrantenregeling " + "ten behoeve van het economisch verdienvermogen. Deze dimensie is politiek betekenisvol " + "omdat hij de fundamentele vraag raakt of de staat actief moet ingrijpen om collectieve " + "voorzieningen betaalbaar en toegankelijk te houden, of dat vrije markt en open handel " + "leidend moeten zijn." + ), + "positive_pole": "Vrije handel, open economie en marktgerichte arbeidsmigratie", + "negative_pole": "Staatsbescherming van betaalbare publieke voorzieningen voor iedereen", + "flip": False, + }, 5: { "label": "Christelijk-conservatief sociaal beleid versus seculier progressief", "explanation": ( @@ -1159,9 +1206,7 @@ def build_svd_components_tab(db_path: str) -> None: if r.get("motion_id") not in existing_ids: bucket.append(r) - # Only show components that have a defined theme (excludes e.g. dim 3 which - # captures within-party individual variance and is uninformative at party level). - comp_options = sorted(c for c in comp_map.keys() if c in SVD_THEMES) + comp_options = sorted(comp_map.keys()) # Build display labels for selectbox: "As 1 — Regulering vs. status-quo" def _comp_label(c: int) -> str: