From 121c32ae8acc515d6d65665b73f5bb471547880e Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Fri, 1 May 2026 10:20:55 +0200 Subject: [PATCH] fix: make scree and party-axis functions resilient to missing schema artifacts - load_scree_data: return [] with TODO until schema stores EVR metadata - load_party_axis_scores: compute from vectors instead of missing table - load_party_axis_scores_for_window: same vector-based fallback - load_party_scores_all_windows[_aligned]: check table existence, fall back to computing from load_positions when absent All functions predated decomposition (5afbad1, 2026-04-05) and relied on party_axis_scores / sv_metadata columns that were never created. --- analysis/explorer_data.py | 188 +++++++++++++++++++++----------------- 1 file changed, 104 insertions(+), 84 deletions(-) diff --git a/analysis/explorer_data.py b/analysis/explorer_data.py index 728faa9..035b7ad 100644 --- a/analysis/explorer_data.py +++ b/analysis/explorer_data.py @@ -144,25 +144,10 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: """Return party scores for all windows (non-aligned). Returns dict mapping party_abbrev -> list of axis scores, one per window. + Computed as the mean of individual MP vectors per party. """ try: - con = duckdb.connect(database=db_path, read_only=True) - rows = con.execute( - """ - SELECT party_abbrev, window_id, x_axis, y_axis - FROM party_axis_scores - ORDER BY party_abbrev, window_id - """ - ).fetchall() - con.close() - - scores: Dict[str, List[float]] = {} - for party, window, x, y in rows: - if party not in scores: - scores[party] = [] - if x is not None and y is not None: - scores[party].extend([x, y]) - return scores + return compute_party_axis_scores(load_mp_vectors_by_party(db_path)) except Exception: logger.exception("Failed to load party axis scores") return {} @@ -171,21 +156,14 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: def load_party_axis_scores_for_window( db_path: str, window: str ) -> Dict[str, List[float]]: - """Return party scores for a specific window (aligned).""" - try: - con = duckdb.connect(database=db_path, read_only=True) - rows = con.execute( - """ - SELECT party_abbrev, x_axis, y_axis - FROM party_axis_scores - WHERE window_id = ? - ORDER BY party_abbrev - """, - [window], - ).fetchall() - con.close() + """Return party scores for a specific window. - return {party: [x or 0.0, y or 0.0] for party, x, y in rows} + Computed as the mean of individual MP vectors per party for the window. + """ + try: + return compute_party_axis_scores( + load_mp_vectors_by_party_for_window(db_path, window) + ) except Exception: logger.exception("Failed to load party axis scores for window %s", window) return {} @@ -195,28 +173,55 @@ def load_party_scores_all_windows(db_path: str) -> Dict[str, List[List[float]]]: """Return party scores across all windows (non-aligned).""" try: con = duckdb.connect(database=db_path, read_only=True) - rows = con.execute( - """ - SELECT party_abbrev, window_id, x_axis, y_axis - FROM party_axis_scores - ORDER BY party_abbrev, window_id - """ - ).fetchall() + table_exists = con.execute( + "SELECT COUNT(*) FROM information_schema.tables WHERE table_name = 'party_axis_scores'" + ).fetchone()[0] + if table_exists: + rows = con.execute( + """ + SELECT party_abbrev, window_id, x_axis, y_axis + FROM party_axis_scores + ORDER BY party_abbrev, window_id + """ + ).fetchall() + con.close() + + scores: Dict[str, List[List[float]]] = {} + current_party = None + for party, window, x, y in rows: + if party != current_party: + scores[party] = [] + current_party = party + if x is not None and y is not None: + scores[party].append([x, y]) + else: + scores[party].append([0.0, 0.0]) + return scores con.close() + except Exception: + logger.exception("Failed to load party scores all windows from table") + # Fallback: compute from positions when table does not exist + try: + positions_by_window, _ = load_positions(db_path, "annual") + _party_map = load_party_map(db_path) scores: Dict[str, List[List[float]]] = {} - current_party = None - for party, window, x, y in rows: - if party != current_party: - scores[party] = [] - current_party = party - if x is not None and y is not None: - scores[party].append([x, y]) - else: - scores[party].append([0.0, 0.0]) + for window, window_pos in positions_by_window.items(): + party_coords: Dict[str, List[Tuple[float, float]]] = {} + for mp_name, (x, y) in window_pos.items(): + party = _party_map.get( + mp_name, _party_map.get(mp_name.split("(")[0].strip(), None) + ) + if party: + party_coords.setdefault(party, []).append((x, y)) + for party, coords in party_coords.items(): + if coords: + mean_x = float(np.mean([c[0] for c in coords])) + mean_y = float(np.mean([c[1] for c in coords])) + scores.setdefault(party, []).append([mean_x, mean_y]) return scores except Exception: - logger.exception("Failed to load party scores all windows") + logger.exception("Failed to compute party scores all windows from positions") return {} @@ -226,28 +231,55 @@ def load_party_scores_all_windows_aligned( """Return party scores across all windows (Procrustes-aligned).""" try: con = duckdb.connect(database=db_path, read_only=True) - rows = con.execute( - """ - SELECT party_abbrev, window_id, x_axis_aligned, y_axis_aligned - FROM party_axis_scores - ORDER BY party_abbrev, window_id - """ - ).fetchall() + table_exists = con.execute( + "SELECT COUNT(*) FROM information_schema.tables WHERE table_name = 'party_axis_scores'" + ).fetchone()[0] + if table_exists: + rows = con.execute( + """ + SELECT party_abbrev, window_id, x_axis_aligned, y_axis_aligned + FROM party_axis_scores + ORDER BY party_abbrev, window_id + """ + ).fetchall() + con.close() + + scores: Dict[str, List[List[float]]] = {} + current_party = None + for party, window, x, y in rows: + if party != current_party: + scores[party] = [] + current_party = party + if x is not None and y is not None: + scores[party].append([x, y]) + else: + scores[party].append([0.0, 0.0]) + return scores con.close() + except Exception: + logger.exception("Failed to load aligned party scores all windows from table") + # Fallback: compute from positions when table does not exist + try: + positions_by_window, _ = load_positions(db_path, "annual") + _party_map = load_party_map(db_path) scores: Dict[str, List[List[float]]] = {} - current_party = None - for party, window, x, y in rows: - if party != current_party: - scores[party] = [] - current_party = party - if x is not None and y is not None: - scores[party].append([x, y]) - else: - scores[party].append([0.0, 0.0]) + for window, window_pos in positions_by_window.items(): + party_coords: Dict[str, List[Tuple[float, float]]] = {} + for mp_name, (x, y) in window_pos.items(): + party = _party_map.get( + mp_name, _party_map.get(mp_name.split("(")[0].strip(), None) + ) + if party: + party_coords.setdefault(party, []).append((x, y)) + for party, coords in party_coords.items(): + if coords: + mean_x = float(np.mean([c[0] for c in coords])) + mean_y = float(np.mean([c[1] for c in coords])) + scores.setdefault(party, []).append([mean_x, mean_y]) return scores except Exception: - logger.exception("Failed to load aligned party scores all windows") + logger.exception("Failed to compute aligned party scores all windows from positions") return {} @@ -314,26 +346,14 @@ def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]: def load_scree_data(db_path: str) -> List[float]: - """Load scree plot data (explained variance) for current_parliament.""" - try: - con = duckdb.connect(database=db_path, read_only=True) - row = con.execute( - """ - SELECT sv_metadata FROM svd_vectors - WHERE window_id = 'current_parliament' AND entity_type = 'singular_values' - LIMIT 1 - """ - ).fetchone() - con.close() + """Load scree plot data (explained variance) for current_parliament. - if row and row[0]: - import json - - return json.loads(row[0]) - return [] - except Exception: - logger.exception("Failed to load scree data") - return [] + TODO: Scree data requires SVD metadata (singular values / explained + variance ratios) to be stored in the database. Currently only + transformed vectors are stored in svd_vectors.vector, not the + decomposition metadata needed for a scree plot. + """ + return [] def load_motions_df(db_path: str) -> pd.DataFrame: