fix: make scree and party-axis functions resilient to missing schema artifacts

- load_scree_data: return [] with TODO until schema stores EVR metadata
- load_party_axis_scores: compute from vectors instead of missing table
- load_party_axis_scores_for_window: same vector-based fallback
- load_party_scores_all_windows[_aligned]: check table existence,
  fall back to computing from load_positions when absent

All functions predated decomposition (5afbad1, 2026-04-05) and relied on
party_axis_scores / sv_metadata columns that were never created.
main
Sven Geboers 4 weeks ago
parent 09bb99658f
commit 121c32ae8a
  1. 122
      analysis/explorer_data.py

@ -144,25 +144,10 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
"""Return party scores for all windows (non-aligned).
Returns dict mapping party_abbrev -> list of axis scores, one per window.
Computed as the mean of individual MP vectors per party.
"""
try:
con = duckdb.connect(database=db_path, read_only=True)
rows = con.execute(
"""
SELECT party_abbrev, window_id, x_axis, y_axis
FROM party_axis_scores
ORDER BY party_abbrev, window_id
"""
).fetchall()
con.close()
scores: Dict[str, List[float]] = {}
for party, window, x, y in rows:
if party not in scores:
scores[party] = []
if x is not None and y is not None:
scores[party].extend([x, y])
return scores
return compute_party_axis_scores(load_mp_vectors_by_party(db_path))
except Exception:
logger.exception("Failed to load party axis scores")
return {}
@ -171,21 +156,14 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
def load_party_axis_scores_for_window(
db_path: str, window: str
) -> Dict[str, List[float]]:
"""Return party scores for a specific window (aligned)."""
try:
con = duckdb.connect(database=db_path, read_only=True)
rows = con.execute(
"""
SELECT party_abbrev, x_axis, y_axis
FROM party_axis_scores
WHERE window_id = ?
ORDER BY party_abbrev
""",
[window],
).fetchall()
con.close()
"""Return party scores for a specific window.
return {party: [x or 0.0, y or 0.0] for party, x, y in rows}
Computed as the mean of individual MP vectors per party for the window.
"""
try:
return compute_party_axis_scores(
load_mp_vectors_by_party_for_window(db_path, window)
)
except Exception:
logger.exception("Failed to load party axis scores for window %s", window)
return {}
@ -195,6 +173,10 @@ def load_party_scores_all_windows(db_path: str) -> Dict[str, List[List[float]]]:
"""Return party scores across all windows (non-aligned)."""
try:
con = duckdb.connect(database=db_path, read_only=True)
table_exists = con.execute(
"SELECT COUNT(*) FROM information_schema.tables WHERE table_name = 'party_axis_scores'"
).fetchone()[0]
if table_exists:
rows = con.execute(
"""
SELECT party_abbrev, window_id, x_axis, y_axis
@ -215,8 +197,31 @@ def load_party_scores_all_windows(db_path: str) -> Dict[str, List[List[float]]]:
else:
scores[party].append([0.0, 0.0])
return scores
con.close()
except Exception:
logger.exception("Failed to load party scores all windows")
logger.exception("Failed to load party scores all windows from table")
# Fallback: compute from positions when table does not exist
try:
positions_by_window, _ = load_positions(db_path, "annual")
_party_map = load_party_map(db_path)
scores: Dict[str, List[List[float]]] = {}
for window, window_pos in positions_by_window.items():
party_coords: Dict[str, List[Tuple[float, float]]] = {}
for mp_name, (x, y) in window_pos.items():
party = _party_map.get(
mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
)
if party:
party_coords.setdefault(party, []).append((x, y))
for party, coords in party_coords.items():
if coords:
mean_x = float(np.mean([c[0] for c in coords]))
mean_y = float(np.mean([c[1] for c in coords]))
scores.setdefault(party, []).append([mean_x, mean_y])
return scores
except Exception:
logger.exception("Failed to compute party scores all windows from positions")
return {}
@ -226,6 +231,10 @@ def load_party_scores_all_windows_aligned(
"""Return party scores across all windows (Procrustes-aligned)."""
try:
con = duckdb.connect(database=db_path, read_only=True)
table_exists = con.execute(
"SELECT COUNT(*) FROM information_schema.tables WHERE table_name = 'party_axis_scores'"
).fetchone()[0]
if table_exists:
rows = con.execute(
"""
SELECT party_abbrev, window_id, x_axis_aligned, y_axis_aligned
@ -246,8 +255,31 @@ def load_party_scores_all_windows_aligned(
else:
scores[party].append([0.0, 0.0])
return scores
con.close()
except Exception:
logger.exception("Failed to load aligned party scores all windows from table")
# Fallback: compute from positions when table does not exist
try:
positions_by_window, _ = load_positions(db_path, "annual")
_party_map = load_party_map(db_path)
scores: Dict[str, List[List[float]]] = {}
for window, window_pos in positions_by_window.items():
party_coords: Dict[str, List[Tuple[float, float]]] = {}
for mp_name, (x, y) in window_pos.items():
party = _party_map.get(
mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
)
if party:
party_coords.setdefault(party, []).append((x, y))
for party, coords in party_coords.items():
if coords:
mean_x = float(np.mean([c[0] for c in coords]))
mean_y = float(np.mean([c[1] for c in coords]))
scores.setdefault(party, []).append([mean_x, mean_y])
return scores
except Exception:
logger.exception("Failed to load aligned party scores all windows")
logger.exception("Failed to compute aligned party scores all windows from positions")
return {}
@ -314,25 +346,13 @@ def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]:
def load_scree_data(db_path: str) -> List[float]:
"""Load scree plot data (explained variance) for current_parliament."""
try:
con = duckdb.connect(database=db_path, read_only=True)
row = con.execute(
"""
SELECT sv_metadata FROM svd_vectors
WHERE window_id = 'current_parliament' AND entity_type = 'singular_values'
LIMIT 1
"""
).fetchone()
con.close()
if row and row[0]:
import json
"""Load scree plot data (explained variance) for current_parliament.
return json.loads(row[0])
return []
except Exception:
logger.exception("Failed to load scree data")
TODO: Scree data requires SVD metadata (singular values / explained
variance ratios) to be stored in the database. Currently only
transformed vectors are stored in svd_vectors.vector, not the
decomposition metadata needed for a scree plot.
"""
return []

Loading…
Cancel
Save