diff --git a/analysis/axis_classifier.py b/analysis/axis_classifier.py index 14a281f..b4568c2 100644 --- a/analysis/axis_classifier.py +++ b/analysis/axis_classifier.py @@ -71,10 +71,6 @@ _INTERPRETATION_TEMPLATES = { "oppositiepartijen (r={r:.2f}). Ideologische tegenstellingen zijn minder dominant dit jaar." ), "pc": "De {orientation} as weerspiegelt de progressief-conservatieve tegenstelling.", - "fallback": ( - "De {orientation} as weerspiegelt een empirisch stempatroon " - "zonder duidelijke ideologische richting." - ), } # Maps motion-path keyword labels to _INTERPRETATION_TEMPLATES keys. @@ -459,7 +455,7 @@ def _assign_label( ) return ( fallback_label, - _INTERPRETATION_TEMPLATES["fallback"].format(orientation=orientation), + "", # No interpretation for unclassified axes quality, ) @@ -617,14 +613,10 @@ def classify_axes( _x_fallback, _y_fallback = get_fallback_labels() if x_lbl is None: x_lbl = _x_fallback - x_int = _INTERPRETATION_TEMPLATES["fallback"].format( - orientation="horizontale" - ) + x_int = "" # No interpretation for unclassified axes if y_lbl is None: y_lbl = _y_fallback - y_int = _INTERPRETATION_TEMPLATES["fallback"].format( - orientation="verticale" - ) + y_int = "" # No interpretation for unclassified axes x_quality[wid] = x_q y_quality[wid] = y_q diff --git a/explorer.py b/explorer.py index 6e6397e..a4cc97a 100644 --- a/explorer.py +++ b/explorer.py @@ -1016,6 +1016,89 @@ def _load_mp_vectors_by_party(db_path: str) -> Dict[str, List[np.ndarray]]: pass +def _load_mp_vectors_by_party_for_window( + db_path: str, window: str +) -> Dict[str, List[np.ndarray]]: + """Load individual MP SVD vectors grouped by party for a specific window. + + Similar to _load_mp_vectors_by_party but for a specific window_id. + For historical windows, uses the MP→party mapping from that time period. + + Returns: + {party_name: [np.ndarray(50,), ...]} — one array per MP. + """ + con = duckdb.connect(database=db_path, read_only=True) + try: + # For historical windows, we need to determine which MPs were active + # and their party affiliations during that window period. + # Parse window like "2015", "2016-Q1", etc. + is_current = window == "current_parliament" + + if is_current: + # Use current parliament MP→party mapping + meta_rows = con.execute( + "SELECT mp_name, party FROM mp_metadata " + "WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' " + "ORDER BY van ASC" + ).fetchall() + else: + # For historical windows, try to get MPs active during that period + # Parse year from window (e.g., "2015" or "2015-Q1") + try: + year = int(window.split("-")[0]) + except ValueError: + year = 2023 # fallback + + # Get MPs active during that year + meta_rows = con.execute( + "SELECT mp_name, party FROM mp_metadata " + "WHERE van <= ? AND (tot_en_met IS NULL OR tot_en_met >= ?) " + "ORDER BY van ASC", + [f"{year}-12-31", f"{year}-01-01"], + ).fetchall() + + mp_party: Dict[str, str] = {} + for mp_name, party in meta_rows: + if mp_name and party: + mp_party[mp_name] = _PARTY_NORMALIZE.get(party, party) + + # Individual MP vectors for the specified window + rows = con.execute( + "SELECT entity_id, vector FROM svd_vectors " + "WHERE entity_type='mp' AND window_id=?", + [window], + ).fetchall() + + party_vecs: Dict[str, List[np.ndarray]] = {} + for entity_id, raw_vec in rows: + party = mp_party.get(entity_id) + # For historical windows, include all parties found + if party is None: + continue + if is_current and party not in CURRENT_PARLIAMENT_PARTIES: + continue + if isinstance(raw_vec, str): + vec = json.loads(raw_vec) + elif isinstance(raw_vec, (bytes, bytearray)): + vec = json.loads(raw_vec.decode()) + elif isinstance(raw_vec, list): + vec = raw_vec + else: + try: + vec = list(raw_vec) + except Exception: + continue + fvec = np.array([float(v) if v is not None else 0.0 for v in vec]) + party_vecs.setdefault(party, []).append(fvec) + + return party_vecs + finally: + try: + con.close() + except Exception: + pass + + @st.cache_data(show_spinner="Partijposities op SVD-assen laden…") def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: """Return per-party SVD vectors, computed as mean of individual MP vectors. @@ -1037,6 +1120,28 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: return {} +@st.cache_data(show_spinner="Partijposities voor jaar laden…") +def load_party_axis_scores_for_window( + db_path: str, window: str +) -> Dict[str, List[float]]: + """Return per-party SVD vectors for a specific window. + + Similar to load_party_axis_scores but for a specific window_id. + + Returns: + {party_name: [float * k]} — k = 50, mean over all MPs in that party for that window. + """ + try: + party_vecs = _load_mp_vectors_by_party_for_window(db_path, window) + return { + party: np.array(vecs).mean(axis=0).tolist() + for party, vecs in party_vecs.items() + } + except Exception: + logger.exception(f"Failed to load party axis scores for window {window}") + return {} + + @st.cache_data(show_spinner="Partij-MP vectoren laden…") def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]: """Return per-party lists of individual MP SVD vectors. @@ -1823,86 +1928,6 @@ def build_compass_tab(db_path: str, window_size: str) -> None: f"De sterkste component verklaart {evr0:.1%} van de variantie in stemgedrag." ) - # --- Voting discipline section --- - _MIN_MOTIONS_FOR_DISCIPLINE = 5 - start_date, end_date = _window_to_dates(window_idx) - disc_df = compute_party_discipline(db_path, start_date, end_date) - - st.subheader("Stemgedrag cohesie") - if disc_df.empty: - st.caption( - "Te weinig hoofdelijke stemmingen in dit venster voor een cohesieanalyse." - ) - else: - disc_df = disc_df[disc_df["n_motions"] >= _MIN_MOTIONS_FOR_DISCIPLINE].copy() - if disc_df.empty: - st.caption( - "Te weinig hoofdelijke stemmingen in dit venster voor een cohesieanalyse." - ) - else: - compass_parties = set(df_pos["party"].unique()) - disc_df = disc_df[disc_df["party"].isin(compass_parties)].copy() - if disc_df.empty: - st.caption("Geen overlappende partijen tussen kompas en stemmingsdata.") - else: - disc_df["discipline_pct"] = (disc_df["discipline"] * 100).round(1) - disc_df["party_label"] = disc_df.apply( - lambda r: f"{r['party']} ({int(r['n_motions'])} moties)", axis=1 - ) - - bar_fig = px.bar( - disc_df.sort_values("discipline"), - x="discipline_pct", - y="party_label", - orientation="h", - color="discipline_pct", - color_continuous_scale="RdYlGn", - range_color=[80, 100], - labels={"discipline_pct": "Cohesie (%)", "party_label": "Partij"}, - title="Cohesie bij hoofdelijke stemmingen", - ) - bar_fig.update_layout( - height=max(300, len(disc_df) * 35 + 80), - showlegend=False, - coloraxis_showscale=False, - yaxis_title="", - ) - st.plotly_chart(bar_fig, use_container_width=True) - - top3 = disc_df.nlargest(3, "discipline")[ - ["party", "discipline_pct", "n_motions"] - ] - bot3 = disc_df.nsmallest(3, "discipline")[ - ["party", "discipline_pct", "n_motions"] - ] - col_a, col_b = st.columns(2) - with col_a: - st.markdown("**Meest eensgezind**") - st.dataframe( - top3.rename( - columns={ - "party": "Partij", - "discipline_pct": "Cohesie (%)", - "n_motions": "Moties", - } - ), - hide_index=True, - use_container_width=True, - ) - with col_b: - st.markdown("**Meest verdeeld**") - st.dataframe( - bot3.rename( - columns={ - "party": "Partij", - "discipline_pct": "Cohesie (%)", - "n_motions": "Moties", - } - ), - hide_index=True, - use_container_width=True, - ) - # --------------------------------------------------------------------------- # Tab 2: Partij Trajectories