- Extract 6 tab functions from explorer.py (3097 → 543 lines) - Create analysis/tabs/_rendering.py with shared plotly helpers - Move data logic to analysis/explorer_data.py - Add lazy-import wrappers in explorer.py for backward compat - Add scheduler.py with PipelineScheduler for daily pipeline runs - Add test_explorer_decomposition.py (5 tests, all pass) - Add test_scheduler.py (13 tests, all pass) - Full test suite: 222 passed, 2 skippedmain
parent
203ae178ca
commit
3bdb43f162
@ -1,18 +1,95 @@ |
|||||||
"""Browser tab for the parliamentary explorer. |
"""Browser tab for the parliamentary explorer.""" |
||||||
|
|
||||||
This module will contain the browser tab implementation. |
|
||||||
Currently: Tab logic remains in explorer.py pending Streamlit decoupling. |
|
||||||
""" |
|
||||||
|
|
||||||
from __future__ import annotations |
from __future__ import annotations |
||||||
|
|
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
import analysis.explorer_data as explorer_data |
||||||
|
from analysis.tabs._rendering import _render_voting_results, st |
||||||
|
|
||||||
|
|
||||||
def build_browser_tab(db_path: str, show_rejected: bool) -> None: |
def build_browser_tab(db_path: str, show_rejected: bool) -> None: |
||||||
"""Build the Motie Browser tab. |
"""Build the Motie Browser tab.""" |
||||||
|
st.subheader("Motie Browser") |
||||||
|
|
||||||
|
df = explorer_data.load_motions_df(db_path) |
||||||
|
if df.empty: |
||||||
|
st.warning("Geen moties beschikbaar.") |
||||||
|
return |
||||||
|
|
||||||
|
if not show_rejected: |
||||||
|
df = df[df["title"].fillna("").str.strip() != "Verworpen."] |
||||||
|
|
||||||
|
col1, col2, col3 = st.columns(3) |
||||||
|
with col1: |
||||||
|
years = sorted(df["year"].dropna().astype(int).unique().tolist()) |
||||||
|
year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years]) |
||||||
|
with col2: |
||||||
|
min_controversy_b = st.slider( |
||||||
|
"Min. controverse", |
||||||
|
min_value=0.0, |
||||||
|
max_value=1.0, |
||||||
|
value=0.0, |
||||||
|
step=0.05, |
||||||
|
key="browser_controversy", |
||||||
|
) |
||||||
|
with col3: |
||||||
|
sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"]) |
||||||
|
|
||||||
|
working = df.copy() |
||||||
|
if year_filter != "(Alle)": |
||||||
|
working = working[working["year"] == int(year_filter)] |
||||||
|
if min_controversy_b > 0: |
||||||
|
working = working[working["controversy_score"] >= min_controversy_b] |
||||||
|
|
||||||
|
sort_map = { |
||||||
|
"Datum (nieuw)": ("date", False), |
||||||
|
"Controverse": ("controversy_score", False), |
||||||
|
"Marge": ("winning_margin", True), |
||||||
|
} |
||||||
|
sort_col, sort_asc = sort_map[sort_by] |
||||||
|
working = working.sort_values(by=sort_col, ascending=sort_asc) |
||||||
|
|
||||||
|
display_cols = ["id", "title", "date", "controversy_score", "winning_margin"] |
||||||
|
available_display = [c for c in display_cols if c in working.columns] |
||||||
|
st.dataframe( |
||||||
|
working[available_display].reset_index(drop=True), |
||||||
|
use_container_width=True, |
||||||
|
height=350, |
||||||
|
) |
||||||
|
|
||||||
|
st.divider() |
||||||
|
|
||||||
|
st.markdown("**Detail weergave** — vul een motie-ID in:") |
||||||
|
sel_id = st.number_input( |
||||||
|
"Motie ID", |
||||||
|
min_value=int(working["id"].min()) if not working.empty else 1, |
||||||
|
max_value=int(working["id"].max()) if not working.empty else 99999, |
||||||
|
value=int(working["id"].iloc[0]) if not working.empty else 1, |
||||||
|
step=1, |
||||||
|
) |
||||||
|
motion_row = df[df["id"] == sel_id] |
||||||
|
if not motion_row.empty: |
||||||
|
row = motion_row.iloc[0] |
||||||
|
st.markdown(f"### {row.get('title') or 'Onbekend'}") |
||||||
|
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?" |
||||||
|
st.caption( |
||||||
|
f"📅 {date_str} | 🔥 Controverse: {row.get('controversy_score', 0):.2f}" |
||||||
|
) |
||||||
|
|
||||||
|
url = row.get("url") |
||||||
|
if url and str(url).startswith("http"): |
||||||
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})") |
||||||
|
|
||||||
Currently delegates to explorer.py implementation. |
st.markdown("**Stemuitslag:**") |
||||||
Will be extracted when rendering logic is decoupled from Streamlit. |
_render_voting_results(row.get("voting_results")) |
||||||
""" |
|
||||||
import explorer |
|
||||||
|
|
||||||
explorer.build_browser_tab(db_path, show_rejected) |
sim = explorer_data.query_similar(db_path, int(sel_id), top_k=10) |
||||||
|
if not sim.empty: |
||||||
|
st.markdown("**Vergelijkbare moties:**") |
||||||
|
st.dataframe( |
||||||
|
sim[["title", "score", "date", "policy_area"]], |
||||||
|
use_container_width=True, |
||||||
|
) |
||||||
|
else: |
||||||
|
st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_") |
||||||
|
|||||||
@ -1,18 +1,374 @@ |
|||||||
"""SVD Components tab for the parliamentary explorer. |
"""SVD Components tab for the parliamentary explorer.""" |
||||||
|
|
||||||
This module will contain the SVD components tab implementation. |
|
||||||
Currently: Tab logic remains in explorer.py pending Streamlit decoupling. |
|
||||||
""" |
|
||||||
|
|
||||||
from __future__ import annotations |
from __future__ import annotations |
||||||
|
|
||||||
|
import datetime as _dt |
||||||
|
import logging |
||||||
|
import os |
||||||
|
from typing import Dict, List, Tuple |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
|
||||||
|
from analysis import config |
||||||
|
import analysis.explorer_data as explorer_data |
||||||
|
from analysis.tabs._rendering import ( |
||||||
|
_render_party_axis_chart_1d, |
||||||
|
_render_scree_plot, |
||||||
|
_render_svd_time_trajectory, |
||||||
|
_render_voting_results, |
||||||
|
st, |
||||||
|
) |
||||||
|
|
||||||
|
try: |
||||||
|
import duckdb |
||||||
|
except Exception: |
||||||
|
duckdb = None # type: ignore |
||||||
|
|
||||||
|
SVD_THEMES = config.SVD_THEMES |
||||||
|
KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES |
||||||
|
|
||||||
|
logger = logging.getLogger(__name__) |
||||||
|
|
||||||
|
|
||||||
def build_svd_components_tab(db_path: str) -> None: |
def build_svd_components_tab(db_path: str) -> None: |
||||||
"""Build the SVD Components tab. |
"""New tab: show top motions contributing to top SVD components. |
||||||
|
|
||||||
|
Reads thoughts/explorer/top_svd_top_motions.json and displays a selector |
||||||
|
for components 1..10 with theme labels/explanations and a detail pane per motion. |
||||||
|
|
||||||
Currently delegates to explorer.py implementation. |
Components 1-2 use aligned PCA positions (consistent with compass). |
||||||
Will be extracted when rendering logic is decoupled from Streamlit. |
Components 3-10 use raw SVD scores. |
||||||
""" |
""" |
||||||
import explorer |
st.subheader("🔬 SVD Assen — politieke polarisatiethema's") |
||||||
|
st.markdown( |
||||||
|
"Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen " |
||||||
|
"van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren " |
||||||
|
"het spanningsveld dat de as beschrijft." |
||||||
|
) |
||||||
|
|
||||||
|
scree_importances = explorer_data.load_scree_data(db_path) |
||||||
|
if scree_importances: |
||||||
|
st.markdown( |
||||||
|
"**Scree-plot** — het relatieve gewicht van elke SVD-as. " |
||||||
|
"De eerste assen verklaren het meeste van de stemverschillen in de Kamer; " |
||||||
|
"latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau." |
||||||
|
) |
||||||
|
_render_scree_plot(scree_importances) |
||||||
|
|
||||||
|
json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json") |
||||||
|
if not os.path.exists(json_path): |
||||||
|
st.warning( |
||||||
|
f"Top-SVD data not found at {json_path}. Run the importance job to generate it." |
||||||
|
) |
||||||
|
return |
||||||
|
|
||||||
|
try: |
||||||
|
import json |
||||||
|
|
||||||
|
with open(json_path, "r", encoding="utf-8") as fh: |
||||||
|
j = json.load(fh) |
||||||
|
except Exception as e: |
||||||
|
st.error(f"Failed to load SVD importance JSON: {e}") |
||||||
|
return |
||||||
|
|
||||||
|
window = j.get("window") |
||||||
|
rows = j.get("rows", []) |
||||||
|
if not rows: |
||||||
|
st.info("Geen top-moties in dataset") |
||||||
|
return |
||||||
|
|
||||||
|
st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**") |
||||||
|
|
||||||
|
comp_map: dict[int, list] = {} |
||||||
|
for r in rows: |
||||||
|
comp = int(r.get("component", 0)) |
||||||
|
bucket = comp_map.setdefault(comp, []) |
||||||
|
existing_ids = {m.get("motion_id") for m in bucket} |
||||||
|
if r.get("motion_id") not in existing_ids: |
||||||
|
bucket.append(r) |
||||||
|
|
||||||
|
comp_options = sorted(comp_map.keys()) |
||||||
|
|
||||||
|
def _comp_label(c: int) -> str: |
||||||
|
theme = SVD_THEMES.get(c, {}) |
||||||
|
lbl = theme.get("label", "") |
||||||
|
return f"As {c} — {lbl}" if lbl else f"As {c}" |
||||||
|
|
||||||
|
comp_display = [_comp_label(c) for c in comp_options] |
||||||
|
|
||||||
|
party_scores_default = explorer_data.load_party_axis_scores(db_path) |
||||||
|
party_mp_vectors = explorer_data.load_party_mp_vectors(db_path) |
||||||
|
bootstrap_data = None |
||||||
|
if party_mp_vectors: |
||||||
|
try: |
||||||
|
from analysis.political_axis import compute_party_bootstrap_cis |
||||||
|
|
||||||
|
bootstrap_data = compute_party_bootstrap_cis(party_mp_vectors) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
|
||||||
|
col1, col2 = st.columns([2, 1]) |
||||||
|
|
||||||
|
view_mode = "Enkel venster" |
||||||
|
selected_parties_for_trajectory: list = [] |
||||||
|
|
||||||
|
with col2: |
||||||
|
comp_sel_idx = st.selectbox( |
||||||
|
"Selecteer SVD-as", |
||||||
|
options=list(range(len(comp_options))), |
||||||
|
format_func=lambda i: comp_display[i], |
||||||
|
index=0, |
||||||
|
) |
||||||
|
comp_sel = comp_options[comp_sel_idx] |
||||||
|
|
||||||
|
min_mps = st.number_input( |
||||||
|
"Min. Kamerleden per partij", |
||||||
|
min_value=1, |
||||||
|
max_value=20, |
||||||
|
value=1, |
||||||
|
step=1, |
||||||
|
help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.", |
||||||
|
) |
||||||
|
|
||||||
|
view_mode = st.radio( |
||||||
|
"Weergave", |
||||||
|
options=["Enkel venster", "Tijdtraject"], |
||||||
|
index=0, |
||||||
|
help="Enkel venster: toont posities voor één tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.", |
||||||
|
) |
||||||
|
|
||||||
|
selected_parties_for_trajectory = [] |
||||||
|
if view_mode == "Tijdtraject": |
||||||
|
all_parties = ( |
||||||
|
sorted(party_scores_default.keys()) if party_scores_default else [] |
||||||
|
) |
||||||
|
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8] |
||||||
|
selected_parties_for_trajectory = st.multiselect( |
||||||
|
"Partijen om te tonen", |
||||||
|
options=all_parties, |
||||||
|
default=default_parties, |
||||||
|
help="Selecteer de partijen die je wilt zien in het tijdtraject.", |
||||||
|
) |
||||||
|
|
||||||
|
theme = SVD_THEMES.get(comp_sel, {}) |
||||||
|
if theme: |
||||||
|
st.info(f"**{theme['label']}** — {theme['explanation']}") |
||||||
|
|
||||||
|
motions = comp_map.get(comp_sel, []) |
||||||
|
|
||||||
|
_current_year = str(_dt.date.today().year) |
||||||
|
available_windows = explorer_data.get_uniform_dim_windows(db_path) |
||||||
|
year_windows = sorted( |
||||||
|
w for w in available_windows if w != "current_parliament" and w != _current_year |
||||||
|
) |
||||||
|
has_current = "current_parliament" in available_windows |
||||||
|
svd_windows = year_windows + (["current_parliament"] if has_current else []) |
||||||
|
|
||||||
|
def _svd_window_label(w: str) -> str: |
||||||
|
if w == "current_parliament": |
||||||
|
return "Huidig parliament" |
||||||
|
return w |
||||||
|
|
||||||
|
with col1: |
||||||
|
svd_window = st.selectbox( |
||||||
|
"Jaar", |
||||||
|
options=svd_windows, |
||||||
|
index=len(svd_windows) - 1, |
||||||
|
format_func=_svd_window_label, |
||||||
|
key=f"svd_window_{comp_sel}", |
||||||
|
) |
||||||
|
|
||||||
|
if svd_window == "current_parliament": |
||||||
|
party_scores = party_scores_default |
||||||
|
else: |
||||||
|
party_scores = explorer_data.load_party_axis_scores_for_window(db_path, svd_window) |
||||||
|
|
||||||
|
party_mp_counts = ( |
||||||
|
{p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {} |
||||||
|
) |
||||||
|
|
||||||
|
def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]: |
||||||
|
"""Get party (x, y) coordinates from aligned PCA positions for a window.""" |
||||||
|
positions_by_window, _ = explorer_data.load_positions(db_path, "annual") |
||||||
|
window_pos = positions_by_window.get(window, {}) |
||||||
|
if not window_pos: |
||||||
|
return {} |
||||||
|
|
||||||
|
_party_map = explorer_data.load_party_map(db_path) |
||||||
|
|
||||||
|
party_coords: Dict[str, List[Tuple[float, float]]] = {} |
||||||
|
for mp_name, (x, y) in window_pos.items(): |
||||||
|
party = _party_map.get( |
||||||
|
mp_name, _party_map.get(mp_name.split("(")[0].strip(), None) |
||||||
|
) |
||||||
|
if party: |
||||||
|
party_coords.setdefault(party, []).append((x, y)) |
||||||
|
|
||||||
|
return { |
||||||
|
party: ( |
||||||
|
float(np.mean([c[0] for c in coords])), |
||||||
|
float(np.mean([c[1] for c in coords])), |
||||||
|
) |
||||||
|
for party, coords in party_coords.items() |
||||||
|
if coords |
||||||
|
} |
||||||
|
|
||||||
|
active_mps = ( |
||||||
|
explorer_data.load_active_mps(db_path) |
||||||
|
if svd_window == "current_parliament" |
||||||
|
else None |
||||||
|
) |
||||||
|
aligned_all_scores = explorer_data.get_aligned_party_scores( |
||||||
|
db_path, svd_window, active_mps |
||||||
|
) |
||||||
|
|
||||||
|
party_1d_coords: dict = {} |
||||||
|
for party, all_scores in aligned_all_scores.items(): |
||||||
|
idx = comp_sel - 1 |
||||||
|
if idx < len(all_scores): |
||||||
|
party_1d_coords[party] = (float(all_scores[idx]),) |
||||||
|
|
||||||
|
computed_flips: Dict[int, bool] = {} |
||||||
|
try: |
||||||
|
from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT |
||||||
|
|
||||||
|
for comp_idx in range(10): |
||||||
|
right_scores = [] |
||||||
|
left_scores = [] |
||||||
|
for party, scores in aligned_all_scores.items(): |
||||||
|
if party in CANONICAL_RIGHT: |
||||||
|
right_scores.append(scores[comp_idx]) |
||||||
|
elif party in CANONICAL_LEFT: |
||||||
|
left_scores.append(scores[comp_idx]) |
||||||
|
|
||||||
|
if right_scores and left_scores: |
||||||
|
right_avg = np.mean(right_scores) |
||||||
|
left_avg = np.mean(left_scores) |
||||||
|
computed_flips[comp_idx + 1] = right_avg < left_avg |
||||||
|
else: |
||||||
|
computed_flips[comp_idx + 1] = False |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
|
||||||
|
theme_with_flip = { |
||||||
|
**theme, |
||||||
|
"flip": computed_flips.get(comp_sel, theme.get("flip", False)), |
||||||
|
} |
||||||
|
|
||||||
|
if min_mps > 1 and party_mp_counts: |
||||||
|
valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps} |
||||||
|
party_1d_coords = { |
||||||
|
p: coords for p, coords in party_1d_coords.items() if p in valid_parties |
||||||
|
} |
||||||
|
|
||||||
|
if view_mode == "Tijdtraject" and selected_parties_for_trajectory: |
||||||
|
available_windows = explorer_data.get_uniform_dim_windows(db_path) |
||||||
|
year_windows = sorted( |
||||||
|
w |
||||||
|
for w in available_windows |
||||||
|
if w != "current_parliament" and w != _current_year |
||||||
|
) |
||||||
|
has_current = "current_parliament" in available_windows |
||||||
|
all_windows = year_windows + (["current_parliament"] if has_current else []) |
||||||
|
|
||||||
|
party_scores_by_window = explorer_data._get_aligned_trajectory_scores( |
||||||
|
db_path, all_windows |
||||||
|
) |
||||||
|
|
||||||
|
_render_svd_time_trajectory( |
||||||
|
party_scores_by_window, |
||||||
|
comp_sel, |
||||||
|
theme_with_flip, |
||||||
|
selected_parties_for_trajectory, |
||||||
|
) |
||||||
|
else: |
||||||
|
_render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip) |
||||||
|
|
||||||
|
motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None] |
||||||
|
motion_details: Dict[int, tuple] = {} |
||||||
|
if motion_ids: |
||||||
|
ids_int: List[int] = [] |
||||||
|
for mid in motion_ids: |
||||||
|
try: |
||||||
|
ids_int.append(int(mid)) |
||||||
|
except Exception: |
||||||
|
logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid) |
||||||
|
|
||||||
|
if ids_int and duckdb is not None: |
||||||
|
con = None |
||||||
|
try: |
||||||
|
placeholders = ", ".join("?" for _ in ids_int) |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
db_rows = con.execute( |
||||||
|
f"SELECT id, title, date, policy_area, url, body_text, voting_results " |
||||||
|
f"FROM motions WHERE id IN ({placeholders})", |
||||||
|
ids_int, |
||||||
|
).fetchall() |
||||||
|
motion_details = {r[0]: r for r in db_rows} |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to batch-fetch motion details") |
||||||
|
finally: |
||||||
|
if con: |
||||||
|
con.close() |
||||||
|
|
||||||
|
pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0] |
||||||
|
neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0] |
||||||
|
|
||||||
|
flip = theme_with_flip.get("flip", False) if theme_with_flip else False |
||||||
|
pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else "" |
||||||
|
neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else "" |
||||||
|
|
||||||
|
if flip: |
||||||
|
left_pole, right_pole = pos_pole, neg_pole |
||||||
|
left_motions, right_motions = pos_motions, neg_motions |
||||||
|
left_arrow, right_arrow = "▲", "▼" |
||||||
|
else: |
||||||
|
left_pole, right_pole = neg_pole, pos_pole |
||||||
|
left_motions, right_motions = neg_motions, pos_motions |
||||||
|
left_arrow, right_arrow = "▼", "▲" |
||||||
|
|
||||||
|
lcol, rcol = st.columns(2) |
||||||
|
|
||||||
|
with lcol: |
||||||
|
st.markdown(f"**← {left_pole}**") |
||||||
|
for m in left_motions: |
||||||
|
mid = m.get("motion_id") |
||||||
|
raw_title = m.get("title") or f"Motie #{mid}" |
||||||
|
with st.expander(f"{left_arrow} {raw_title}"): |
||||||
|
row = motion_details.get(int(mid)) if mid is not None else None |
||||||
|
if row: |
||||||
|
try: |
||||||
|
date_str = str(row[2])[:10] |
||||||
|
except Exception: |
||||||
|
date_str = "?" |
||||||
|
st.caption(f"📅 {date_str} | {row[3] or '—'}") |
||||||
|
if row[4] and str(row[4]).startswith("http"): |
||||||
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") |
||||||
|
if row[5]: |
||||||
|
with st.expander("Toon volledige tekst"): |
||||||
|
st.write(row[5]) |
||||||
|
_render_voting_results(row[6]) |
||||||
|
else: |
||||||
|
st.caption("_Geen metadata beschikbaar_") |
||||||
|
|
||||||
explorer.build_svd_components_tab(db_path) |
with rcol: |
||||||
|
st.markdown(f"**{right_pole} →**") |
||||||
|
for m in right_motions: |
||||||
|
mid = m.get("motion_id") |
||||||
|
raw_title = m.get("title") or f"Motie #{mid}" |
||||||
|
with st.expander(f"{right_arrow} {raw_title}"): |
||||||
|
row = motion_details.get(int(mid)) if mid is not None else None |
||||||
|
if row: |
||||||
|
try: |
||||||
|
date_str = str(row[2])[:10] |
||||||
|
except Exception: |
||||||
|
date_str = "?" |
||||||
|
st.caption(f"📅 {date_str} | {row[3] or '—'}") |
||||||
|
if row[4] and str(row[4]).startswith("http"): |
||||||
|
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})") |
||||||
|
if row[5]: |
||||||
|
with st.expander("Toon volledige tekst"): |
||||||
|
st.write(row[5]) |
||||||
|
_render_voting_results(row[6]) |
||||||
|
else: |
||||||
|
st.caption("_Geen metadata beschikbaar_") |
||||||
|
|||||||
@ -1,18 +1,84 @@ |
|||||||
"""Search tab for the parliamentary explorer. |
"""Search tab for the parliamentary explorer.""" |
||||||
|
|
||||||
This module will contain the search tab implementation. |
|
||||||
Currently: Tab logic remains in explorer.py pending Streamlit decoupling. |
|
||||||
""" |
|
||||||
|
|
||||||
from __future__ import annotations |
from __future__ import annotations |
||||||
|
|
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
import analysis.explorer_data as explorer_data |
||||||
|
from analysis.tabs._rendering import _render_voting_results, st |
||||||
|
|
||||||
|
|
||||||
def build_search_tab(db_path: str, show_rejected: bool) -> None: |
def build_search_tab(db_path: str, show_rejected: bool) -> None: |
||||||
"""Build the Motie Zoeken tab. |
"""Build the Motie Zoeken tab.""" |
||||||
|
st.subheader("Motie Zoeken") |
||||||
|
|
||||||
|
df = explorer_data.load_motions_df(db_path) |
||||||
|
if df.empty: |
||||||
|
st.warning("Geen moties beschikbaar.") |
||||||
|
return |
||||||
|
|
||||||
|
if not show_rejected: |
||||||
|
df = df[df["title"].fillna("").str.strip() != "Verworpen."] |
||||||
|
|
||||||
|
col1, col2, col3 = st.columns([2, 1, 1]) |
||||||
|
with col1: |
||||||
|
query = st.text_input( |
||||||
|
"Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen" |
||||||
|
) |
||||||
|
with col2: |
||||||
|
years = sorted(df["year"].dropna().astype(int).unique().tolist()) |
||||||
|
if years: |
||||||
|
year_range = st.select_slider( |
||||||
|
"Jaar", options=years, value=(years[0], years[-1]) |
||||||
|
) |
||||||
|
else: |
||||||
|
year_range = (2019, 2024) |
||||||
|
with col3: |
||||||
|
min_controversy = st.slider( |
||||||
|
"Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05 |
||||||
|
) |
||||||
|
|
||||||
|
working = df.copy() |
||||||
|
working = working[ |
||||||
|
(working["year"] >= year_range[0]) & (working["year"] <= year_range[1]) |
||||||
|
] |
||||||
|
if min_controversy > 0: |
||||||
|
working = working[working["controversy_score"] >= min_controversy] |
||||||
|
if query: |
||||||
|
q = query.lower() |
||||||
|
mask = working["title"].fillna("").str.lower().str.contains(q, regex=False) |
||||||
|
working = working[mask] |
||||||
|
|
||||||
|
working = working.sort_values(by="controversy_score", ascending=False) |
||||||
|
st.caption(f"{len(working)} resultaten (top 50 getoond)") |
||||||
|
|
||||||
|
for _, row in working.head(50).iterrows(): |
||||||
|
title = row.get("title") or f"Motie #{row['id']}" |
||||||
|
date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?" |
||||||
|
controversy = row.get("controversy_score") or 0 |
||||||
|
with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"): |
||||||
|
cols = st.columns(3) |
||||||
|
cols[0].metric("Controverse", f"{controversy:.2f}") |
||||||
|
cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}") |
||||||
|
cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?") |
||||||
|
|
||||||
|
_render_voting_results(row.get("voting_results")) |
||||||
|
|
||||||
Currently delegates to explorer.py implementation. |
url = row.get("url") |
||||||
Will be extracted when rendering logic is decoupled from Streamlit. |
if url and str(url).startswith("http"): |
||||||
""" |
st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})") |
||||||
import explorer |
|
||||||
|
|
||||||
explorer.build_search_tab(db_path, show_rejected) |
sim = explorer_data.query_similar(db_path, int(row["id"]), top_k=5) |
||||||
|
if not sim.empty: |
||||||
|
st.markdown("**Vergelijkbare moties:**") |
||||||
|
for _, s in sim.iterrows(): |
||||||
|
s_date = ( |
||||||
|
pd.to_datetime(s["date"]).strftime("%Y") |
||||||
|
if pd.notna(s.get("date")) |
||||||
|
else "" |
||||||
|
) |
||||||
|
st.markdown( |
||||||
|
f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*" |
||||||
|
) |
||||||
|
else: |
||||||
|
st.caption("_Nog geen vergelijkbare moties beschikbaar_") |
||||||
|
|||||||
@ -1,20 +1,774 @@ |
|||||||
"""Trajectories tab for the parliamentary explorer. |
"""Trajectories tab for the parliamentary explorer.""" |
||||||
|
|
||||||
This module will contain the trajectories tab implementation. |
from __future__ import annotations |
||||||
Currently: Tab logic remains in explorer.py pending Streamlit decoupling. |
|
||||||
|
import json |
||||||
|
import logging |
||||||
|
import os |
||||||
|
import re |
||||||
|
import traceback |
||||||
|
from datetime import datetime |
||||||
|
from typing import Dict, List, Optional, Tuple |
||||||
|
|
||||||
|
import numpy as np |
||||||
|
|
||||||
|
from analysis import config |
||||||
|
import analysis.explorer_data as explorer_data |
||||||
|
from analysis import trajectory |
||||||
|
from analysis.tabs._rendering import ( |
||||||
|
PARTY_COLOURS, |
||||||
|
_add_y_direction_annotations, |
||||||
|
go, |
||||||
|
st, |
||||||
|
) |
||||||
|
from explorer_helpers import compute_party_centroids, inspect_positions_for_issues |
||||||
|
|
||||||
|
KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES |
||||||
|
|
||||||
|
logger = logging.getLogger(__name__) |
||||||
|
|
||||||
|
_last_trajectories_diagnostics: dict = {} |
||||||
|
_last_diagnostics = _last_trajectories_diagnostics |
||||||
|
|
||||||
|
|
||||||
|
def get_debug_trajectories_enabled() -> bool: |
||||||
|
"""Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode.""" |
||||||
|
v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES") |
||||||
|
return str(v) in ("1", "true", "True") |
||||||
|
|
||||||
|
|
||||||
|
def select_trajectory_plot_data( |
||||||
|
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]], |
||||||
|
party_map: Dict[str, str], |
||||||
|
windows: List[str], |
||||||
|
selected_parties: List[str], |
||||||
|
smooth_alpha: float = 0.35, |
||||||
|
mp_fallback_count: Optional[int] = None, |
||||||
|
) -> Tuple[go.Figure, int, Optional[str]]: |
||||||
|
"""Return (fig, trace_count, banner_text). |
||||||
|
|
||||||
|
Helper used by build_trajectories_tab. Does not call Streamlit. |
||||||
""" |
""" |
||||||
|
if mp_fallback_count is None: |
||||||
|
try: |
||||||
|
mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20")) |
||||||
|
except Exception: |
||||||
|
mp_fallback_count = 20 |
||||||
|
|
||||||
from __future__ import annotations |
party_centroids, meta = compute_party_centroids( |
||||||
|
positions_by_window, party_map, windows |
||||||
|
) |
||||||
|
|
||||||
|
try: |
||||||
|
inspector_summary = inspect_positions_for_issues(positions_by_window, party_map) |
||||||
|
except Exception: |
||||||
|
tb = traceback.format_exc() |
||||||
|
inspector_summary = {} |
||||||
|
try: |
||||||
|
select_trajectory_plot_data._last_diagnostics = { |
||||||
|
"stage": "inspector_exception", |
||||||
|
"exception": tb, |
||||||
|
} |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
try: |
||||||
|
_last_trajectories_diagnostics.update( |
||||||
|
{"stage": "inspector_exception", "exception": tb} |
||||||
|
) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary) |
||||||
|
|
||||||
|
plottable_parties = [] |
||||||
|
for p, vals in party_centroids.items(): |
||||||
|
has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals) |
||||||
|
if has_valid: |
||||||
|
plottable_parties.append(p) |
||||||
|
|
||||||
|
logging.getLogger(__name__).debug( |
||||||
|
"[TRAJ DEBUG] plottable_parties: %d parties, sample=%s", |
||||||
|
len(plottable_parties), |
||||||
|
(plottable_parties[:5] if plottable_parties else "empty"), |
||||||
|
) |
||||||
|
logging.getLogger(__name__).debug( |
||||||
|
"[TRAJ DEBUG] party_centroids keys: %s", |
||||||
|
list(party_centroids.keys())[:10], |
||||||
|
) |
||||||
|
if party_centroids: |
||||||
|
sample_party = list(party_centroids.keys())[0] |
||||||
|
sample_vals = party_centroids[sample_party] |
||||||
|
logging.getLogger(__name__).debug( |
||||||
|
"[TRAJ DEBUG] Sample party '%s' centroids: %s...", |
||||||
|
sample_party, |
||||||
|
sample_vals[:3], |
||||||
|
) |
||||||
|
|
||||||
|
fig = go.Figure() |
||||||
|
trace_count = 0 |
||||||
|
banner_text: Optional[str] = None |
||||||
|
|
||||||
from typing import List |
def _ema_smooth(values: List[float], alpha: float) -> List[float]: |
||||||
|
if not values or alpha >= 1.0: |
||||||
|
return values |
||||||
|
smoothed: List[float] = [] |
||||||
|
prev = None |
||||||
|
for v in values: |
||||||
|
if v is None or (isinstance(v, float) and np.isnan(v)): |
||||||
|
smoothed.append(float(np.nan)) |
||||||
|
continue |
||||||
|
v = float(v) |
||||||
|
if prev is None: |
||||||
|
prev = v |
||||||
|
else: |
||||||
|
prev = alpha * v + (1 - alpha) * prev |
||||||
|
smoothed.append(float(prev)) |
||||||
|
return smoothed |
||||||
|
|
||||||
|
if not plottable_parties: |
||||||
|
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} |
||||||
|
for wid in windows: |
||||||
|
pos = positions_by_window.get(wid, {}) |
||||||
|
for mp_name, xy in pos.items(): |
||||||
|
try: |
||||||
|
x, y = float(xy[0]), float(xy[1]) |
||||||
|
except Exception: |
||||||
|
continue |
||||||
|
mp_positions.setdefault(mp_name, {})[wid] = (x, y) |
||||||
|
|
||||||
|
mp_activity = sorted( |
||||||
|
[(mp, len(wdict)) for mp, wdict in mp_positions.items()], |
||||||
|
key=lambda t: t[1], |
||||||
|
reverse=True, |
||||||
|
) |
||||||
|
top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]] |
||||||
|
|
||||||
|
for mp in top_mps: |
||||||
|
wids_sorted = sorted(mp_positions.get(mp, {}).keys()) |
||||||
|
if not wids_sorted: |
||||||
|
continue |
||||||
|
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] |
||||||
|
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] |
||||||
|
xs = _ema_smooth(xs_raw, smooth_alpha) |
||||||
|
ys = _ema_smooth(ys_raw, smooth_alpha) |
||||||
|
custom_raw = [ |
||||||
|
( |
||||||
|
float(rx) if rx is not None else float(np.nan), |
||||||
|
float(ry) if ry is not None else float(np.nan), |
||||||
|
) |
||||||
|
for rx, ry in zip(xs_raw, ys_raw) |
||||||
|
] |
||||||
|
fig.add_trace( |
||||||
|
go.Scatter( |
||||||
|
x=xs, |
||||||
|
y=ys, |
||||||
|
mode="lines+markers", |
||||||
|
name=mp, |
||||||
|
text=wids_sorted, |
||||||
|
customdata=custom_raw, |
||||||
|
line=dict(color="#888888", shape="spline", smoothing=1.3), |
||||||
|
marker=dict(color="#888888", size=6), |
||||||
|
) |
||||||
|
) |
||||||
|
trace_count += 1 |
||||||
|
|
||||||
|
banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." |
||||||
|
logging.getLogger(__name__).debug( |
||||||
|
"[TRAJ DEBUG] Fallback to MP trajectories: trace_count=%d, top_mps=%d", |
||||||
|
trace_count, |
||||||
|
len(top_mps), |
||||||
|
) |
||||||
|
return fig, trace_count, banner_text |
||||||
|
|
||||||
|
to_plot = [p for p in selected_parties if p in plottable_parties] |
||||||
|
if not to_plot: |
||||||
|
to_plot = plottable_parties |
||||||
|
|
||||||
|
for party in to_plot: |
||||||
|
vals = party_centroids.get(party, []) |
||||||
|
if not vals: |
||||||
|
continue |
||||||
|
xs_raw = [v[0] for v in vals] |
||||||
|
ys_raw = [v[1] for v in vals] |
||||||
|
xs = _ema_smooth(xs_raw, smooth_alpha) |
||||||
|
ys = _ema_smooth(ys_raw, smooth_alpha) |
||||||
|
custom_raw = [ |
||||||
|
( |
||||||
|
float(x) if (x is not None and not np.isnan(x)) else float(np.nan), |
||||||
|
float(y) if (y is not None and not np.isnan(y)) else float(np.nan), |
||||||
|
) |
||||||
|
for x, y in zip(xs_raw, ys_raw) |
||||||
|
] |
||||||
|
colour = PARTY_COLOURS.get(party, "#9E9E9E") |
||||||
|
fig.add_trace( |
||||||
|
go.Scatter( |
||||||
|
x=xs, |
||||||
|
y=ys, |
||||||
|
mode="lines+markers", |
||||||
|
name=party, |
||||||
|
text=windows, |
||||||
|
customdata=custom_raw, |
||||||
|
line=dict(color=colour, shape="spline", smoothing=1.3), |
||||||
|
marker=dict(color=colour, size=8), |
||||||
|
) |
||||||
|
) |
||||||
|
trace_count += 1 |
||||||
|
|
||||||
|
logging.getLogger(__name__).debug( |
||||||
|
"[TRAJ DEBUG] Final trace_count=%d, plottable_parties=%d, to_plot=%s", |
||||||
|
trace_count, |
||||||
|
len(plottable_parties), |
||||||
|
(len(to_plot) if "to_plot" in dir() else "N/A"), |
||||||
|
) |
||||||
|
return fig, trace_count, None |
||||||
|
|
||||||
|
|
||||||
def build_trajectories_tab(db_path: str, window_size: str) -> None: |
def build_trajectories_tab(db_path: str, window_size: str) -> None: |
||||||
"""Build the Partij Trajectories tab. |
"""Build the Partij Trajectories tab.""" |
||||||
|
logging.getLogger(__name__).debug( |
||||||
|
"[TRAJ DEBUG] build_trajectories_tab called — db_path=%s, window_size=%s", |
||||||
|
db_path, |
||||||
|
window_size, |
||||||
|
) |
||||||
|
st.subheader("Partij Trajectories") |
||||||
|
st.markdown("Hoe bewegen partijen over de tijdsvensters heen?") |
||||||
|
|
||||||
Currently delegates to explorer.py implementation. |
positions_by_window, axis_def = explorer_data.load_positions(db_path, window_size) |
||||||
Will be extracted when rendering logic is decoupled from Streamlit. |
logging.getLogger(__name__).debug( |
||||||
""" |
"[TRAJ DEBUG] load_positions → %d windows, total MPs=%d", |
||||||
import explorer |
len(positions_by_window), |
||||||
|
sum(len(v) for v in positions_by_window.values()), |
||||||
|
) |
||||||
|
if axis_def is None: |
||||||
|
axis_def = {} |
||||||
|
if not positions_by_window: |
||||||
|
try: |
||||||
|
_last_trajectories_diagnostics.update( |
||||||
|
{ |
||||||
|
"stage": "load_positions_empty", |
||||||
|
"positions_by_window_len": len(positions_by_window), |
||||||
|
} |
||||||
|
) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
try: |
||||||
|
st.warning("Geen positiedata beschikbaar.") |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
try: |
||||||
|
if get_debug_trajectories_enabled(): |
||||||
|
try: |
||||||
|
st.text_area( |
||||||
|
"Trajectories diagnostics", |
||||||
|
json.dumps(_last_trajectories_diagnostics, default=str), |
||||||
|
height=160, |
||||||
|
) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
return |
||||||
|
|
||||||
|
party_map = explorer_data.load_party_map(db_path) |
||||||
|
logging.getLogger(__name__).debug( |
||||||
|
"[TRAJ DEBUG] load_party_map → %d entries, sample=%s", |
||||||
|
len(party_map), |
||||||
|
list(party_map.items())[:3], |
||||||
|
) |
||||||
|
|
||||||
|
def normalize_mp_name(name): |
||||||
|
"""Normalize MP name for better matching between data sources.""" |
||||||
|
if not name: |
||||||
|
return "" |
||||||
|
name = name.strip() |
||||||
|
if "," in name and ", " not in name: |
||||||
|
name = name.replace(",", ", ") |
||||||
|
return name |
||||||
|
|
||||||
|
party_map = {normalize_mp_name(k): v for k, v in party_map.items()} |
||||||
|
|
||||||
|
normalized_positions = {} |
||||||
|
for window, positions in positions_by_window.items(): |
||||||
|
normalized_positions[window] = { |
||||||
|
normalize_mp_name(k): v for k, v in positions.items() |
||||||
|
} |
||||||
|
positions_by_window = normalized_positions |
||||||
|
|
||||||
|
all_mp_names = set() |
||||||
|
for positions in positions_by_window.values(): |
||||||
|
all_mp_names.update(positions.keys()) |
||||||
|
|
||||||
|
matched_names = sum(1 for mp in all_mp_names if mp in party_map) |
||||||
|
if all_mp_names: |
||||||
|
logger.info( |
||||||
|
f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)" |
||||||
|
) |
||||||
|
else: |
||||||
|
logger.info("MP name matching: no MPs found in positions data") |
||||||
|
|
||||||
|
if matched_names == 0 and len(all_mp_names) > 0: |
||||||
|
logger.warning("No MP names matched between positions and party_map!") |
||||||
|
logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}") |
||||||
|
logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}") |
||||||
|
|
||||||
|
windows = sorted(positions_by_window.keys()) |
||||||
|
|
||||||
|
centroids: Dict[str, Dict[str, Tuple[float, float]]] = {} |
||||||
|
all_parties: set = set() |
||||||
|
|
||||||
|
def _strip_paren(name: str) -> str: |
||||||
|
return re.sub(r"\s*\([^)]*\)", "", name).strip() |
||||||
|
|
||||||
|
for wid in windows: |
||||||
|
pos = positions_by_window.get(wid, {}) |
||||||
|
per_party: Dict[str, List[Tuple[float, float]]] = {} |
||||||
|
for mp_name, (x, y) in pos.items(): |
||||||
|
party = party_map.get(mp_name) or party_map.get( |
||||||
|
_strip_paren(mp_name), "Unknown" |
||||||
|
) |
||||||
|
if party == "Unknown": |
||||||
|
continue |
||||||
|
per_party.setdefault(party, []).append((x, y)) |
||||||
|
for party, coords in per_party.items(): |
||||||
|
all_parties.add(party) |
||||||
|
xs = [c[0] for c in coords] |
||||||
|
ys = [c[1] for c in coords] |
||||||
|
centroids.setdefault(party, {})[wid] = ( |
||||||
|
float(np.mean(xs)), |
||||||
|
float(np.mean(ys)), |
||||||
|
) |
||||||
|
|
||||||
|
all_parties = sorted( |
||||||
|
set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs) |
||||||
|
- {None, "Unknown"} |
||||||
|
) |
||||||
|
logging.getLogger(__name__).debug( |
||||||
|
"[TRAJ DEBUG] all_parties (raw from party_map) → %d parties: %s", |
||||||
|
len(all_parties), |
||||||
|
all_parties[:10], |
||||||
|
) |
||||||
|
all_parties_sorted = sorted(all_parties) |
||||||
|
|
||||||
|
if not all_parties_sorted: |
||||||
|
st.info( |
||||||
|
"Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat." |
||||||
|
) |
||||||
|
try: |
||||||
|
st.caption(f"Bekende partijen in party_map: {len(party_map)}") |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
|
||||||
|
default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties] |
||||||
|
if not default_parties: |
||||||
|
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties] |
||||||
|
if not default_parties: |
||||||
|
default_parties = all_parties_sorted[:6] |
||||||
|
|
||||||
|
selected_parties = st.multiselect( |
||||||
|
"Selecteer partijen", |
||||||
|
options=all_parties_sorted, |
||||||
|
default=default_parties, |
||||||
|
) |
||||||
|
|
||||||
|
def _ema_smooth(values: List[float], alpha: float) -> List[float]: |
||||||
|
if not values or alpha >= 1.0: |
||||||
|
return values |
||||||
|
smoothed = [values[0]] |
||||||
|
for v in values[1:]: |
||||||
|
smoothed.append(alpha * v + (1 - alpha) * smoothed[-1]) |
||||||
|
return smoothed |
||||||
|
|
||||||
|
smooth_alpha = 0.35 |
||||||
|
|
||||||
|
if not centroids: |
||||||
|
st.info( |
||||||
|
"Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." |
||||||
|
) |
||||||
|
|
||||||
|
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} |
||||||
|
for wid in windows: |
||||||
|
pos = positions_by_window.get(wid, {}) |
||||||
|
for mp_name, xy in pos.items(): |
||||||
|
try: |
||||||
|
x, y = float(xy[0]), float(xy[1]) |
||||||
|
except Exception: |
||||||
|
continue |
||||||
|
mp_positions.setdefault(mp_name, {})[wid] = (x, y) |
||||||
|
|
||||||
|
mp_positions = { |
||||||
|
mp: pos |
||||||
|
for mp, pos in mp_positions.items() |
||||||
|
if len(pos) >= 2 |
||||||
|
and not all(np.isnan(x) and np.isnan(y) for x, y in pos.values()) |
||||||
|
} |
||||||
|
|
||||||
|
if not mp_positions: |
||||||
|
st.warning("Geen positiedata beschikbaar voor trajectplotten.") |
||||||
|
_last_trajectories_diagnostics.update( |
||||||
|
{ |
||||||
|
"stage": "no_mp_positions", |
||||||
|
"mp_positions_count": 0, |
||||||
|
} |
||||||
|
) |
||||||
|
try: |
||||||
|
if get_debug_trajectories_enabled(): |
||||||
|
try: |
||||||
|
st.text_area( |
||||||
|
"Trajectories diagnostics", |
||||||
|
json.dumps(_last_trajectories_diagnostics, default=str), |
||||||
|
height=160, |
||||||
|
) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
return |
||||||
|
|
||||||
|
st.session_state["_trajectory_mp_positions"] = mp_positions |
||||||
|
|
||||||
|
mp_list = sorted(mp_positions.keys()) |
||||||
|
default_mps = mp_list[:6] |
||||||
|
selected_mps = st.multiselect( |
||||||
|
"Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps |
||||||
|
) |
||||||
|
|
||||||
|
fig = go.Figure() |
||||||
|
trace_count = 0 |
||||||
|
for mp in selected_mps: |
||||||
|
wids_sorted = sorted(mp_positions[mp].keys()) |
||||||
|
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] |
||||||
|
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] |
||||||
|
xs = _ema_smooth(xs_raw, smooth_alpha) |
||||||
|
ys = _ema_smooth(ys_raw, smooth_alpha) |
||||||
|
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] |
||||||
|
fig.add_trace( |
||||||
|
go.Scatter( |
||||||
|
x=xs, |
||||||
|
y=ys, |
||||||
|
mode="lines+markers", |
||||||
|
name=mp, |
||||||
|
text=wids_sorted, |
||||||
|
customdata=custom_raw, |
||||||
|
line=dict(color="#888888", shape="spline", smoothing=1.3), |
||||||
|
marker=dict(color="#888888", size=6), |
||||||
|
hovertemplate=( |
||||||
|
f"<b>{mp}</b><br>" |
||||||
|
"venster: %{text}<br>" |
||||||
|
"x (smoothed): %{x:.3f}<br>" |
||||||
|
"x (raw): %{customdata[0]:.3f}<br>" |
||||||
|
"y (smoothed): %{y:.3f}<br>" |
||||||
|
"y (raw): %{customdata[1]:.3f}<extra></extra>" |
||||||
|
), |
||||||
|
) |
||||||
|
) |
||||||
|
trace_count += 1 |
||||||
|
|
||||||
|
_add_y_direction_annotations(fig) |
||||||
|
if trace_count == 0: |
||||||
|
st.info( |
||||||
|
"Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data." |
||||||
|
) |
||||||
|
else: |
||||||
|
st.plotly_chart(fig, use_container_width=True) |
||||||
|
return |
||||||
|
|
||||||
|
if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"): |
||||||
|
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {} |
||||||
|
for wid in windows: |
||||||
|
pos = positions_by_window.get(wid, {}) |
||||||
|
for mp_name, (x, y) in pos.items(): |
||||||
|
mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y)) |
||||||
|
|
||||||
|
mp_list = sorted(mp_positions.keys()) |
||||||
|
if not mp_list: |
||||||
|
st.info("Geen MP-positiegegevens beschikbaar om te tonen.") |
||||||
|
return |
||||||
|
|
||||||
|
sample_mps = mp_list[:6] |
||||||
|
fig = go.Figure() |
||||||
|
for mp in sample_mps: |
||||||
|
wids_sorted = sorted(mp_positions[mp].keys()) |
||||||
|
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted] |
||||||
|
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted] |
||||||
|
xs = _ema_smooth(xs_raw, 0.35) |
||||||
|
ys = _ema_smooth(ys_raw, 0.35) |
||||||
|
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] |
||||||
|
fig.add_trace( |
||||||
|
go.Scatter( |
||||||
|
x=xs, |
||||||
|
y=ys, |
||||||
|
mode="lines+markers", |
||||||
|
name=mp, |
||||||
|
text=wids_sorted, |
||||||
|
customdata=custom_raw, |
||||||
|
line=dict(color="#444444", shape="spline", smoothing=1.3), |
||||||
|
marker=dict(color="#444444", size=6), |
||||||
|
hovertemplate=( |
||||||
|
f"<b>{mp}</b><br>" |
||||||
|
"venster: %{text}<br>" |
||||||
|
"x (smoothed): %{x:.3f}<br>" |
||||||
|
"x (raw): %{customdata[0]:.3f}<br>" |
||||||
|
"y (smoothed): %{y:.3f}<br>" |
||||||
|
"y (raw): %{customdata[1]:.3f}<extra></extra>" |
||||||
|
), |
||||||
|
) |
||||||
|
) |
||||||
|
_add_y_direction_annotations(fig) |
||||||
|
st.plotly_chart(fig, use_container_width=True) |
||||||
|
return |
||||||
|
|
||||||
|
try: |
||||||
|
debug_checkbox = False |
||||||
|
try: |
||||||
|
debug_checkbox = st.checkbox( |
||||||
|
"Enable trajectories diagnostics (show extra info)", |
||||||
|
value=get_debug_trajectories_enabled(), |
||||||
|
) |
||||||
|
except Exception: |
||||||
|
debug_checkbox = get_debug_trajectories_enabled() |
||||||
|
if debug_checkbox: |
||||||
|
try: |
||||||
|
with st.expander( |
||||||
|
"DEBUG: Trajectories data (showing diagnostics)", expanded=False |
||||||
|
): |
||||||
|
st.write("windows (count):", len(windows)) |
||||||
|
st.write("windows sample:", windows[:10]) |
||||||
|
st.write("party_map entries:", len(party_map)) |
||||||
|
st.write("parties with centroids:", len(all_parties_sorted)) |
||||||
|
st.write("default_parties:", default_parties) |
||||||
|
st.write("selected_parties:", selected_parties) |
||||||
|
st.write("min_mps setting:", 3) |
||||||
|
sample = { |
||||||
|
p: len(centroids.get(p, {})) |
||||||
|
for p in list(all_parties_sorted)[:8] |
||||||
|
} |
||||||
|
st.write("sample centroid window counts per party:", sample) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
|
||||||
|
smoothing_method = st.selectbox( |
||||||
|
"Smoothing methode", |
||||||
|
options=["EMA", "Spline", "None"], |
||||||
|
index=0, |
||||||
|
help="EMA = exponential moving average; Spline = low-degree polynomial spline fit; None = raw centroids", |
||||||
|
) |
||||||
|
|
||||||
|
smooth_alpha = 1.0 |
||||||
|
if smoothing_method == "EMA": |
||||||
|
smooth_alpha = st.slider( |
||||||
|
"Glad maken (EMA-\u03b1)", |
||||||
|
min_value=0.1, |
||||||
|
max_value=1.0, |
||||||
|
value=0.35, |
||||||
|
step=0.05, |
||||||
|
help=( |
||||||
|
"\u03b1=1.0 toont de ruwe data; lagere waarden maken de lijn gladder. " |
||||||
|
"Standaard 0.35 voor een goed evenwicht tussen detail en ruis." |
||||||
|
), |
||||||
|
) |
||||||
|
|
||||||
|
def _spline_smooth(values: List[float]) -> List[float]: |
||||||
|
n = len(values) |
||||||
|
if n <= 2: |
||||||
|
return values |
||||||
|
deg = min(3, n - 1) |
||||||
|
try: |
||||||
|
idx = np.arange(n, dtype=float) |
||||||
|
coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg) |
||||||
|
smooth = np.polyval(coeffs, idx) |
||||||
|
return [float(v) for v in smooth] |
||||||
|
except Exception: |
||||||
|
return values |
||||||
|
|
||||||
|
fig = go.Figure() |
||||||
|
trace_count = 0 |
||||||
|
helper_succeeded = False |
||||||
|
try: |
||||||
|
fig2, trace_count2, banner_text = select_trajectory_plot_data( |
||||||
|
positions_by_window, party_map, windows, selected_parties, smooth_alpha |
||||||
|
) |
||||||
|
if fig2 is not None: |
||||||
|
fig = fig2 |
||||||
|
trace_count = trace_count2 |
||||||
|
helper_succeeded = True |
||||||
|
if banner_text: |
||||||
|
try: |
||||||
|
st.caption(banner_text) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
try: |
||||||
|
_last_trajectories_diagnostics.update({"banner_text": banner_text}) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
except Exception as e: |
||||||
|
tb = traceback.format_exc() |
||||||
|
try: |
||||||
|
select_trajectory_plot_data._last_diagnostics = {"exception": tb} |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
try: |
||||||
|
_last_trajectories_diagnostics.update( |
||||||
|
{"stage": "select_helper_exception", "exception": tb} |
||||||
|
) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
logger.exception("select_trajectory_plot_data failed") |
||||||
|
debug_enabled = get_debug_trajectories_enabled() |
||||||
|
if debug_enabled: |
||||||
|
try: |
||||||
|
st.text_area("select_trajectory_plot_data traceback", tb, height=240) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
logging.getLogger(__name__).debug( |
||||||
|
"[TRAJ DEBUG] helper_succeeded=%s", helper_succeeded |
||||||
|
) |
||||||
|
if not helper_succeeded: |
||||||
|
for party in selected_parties: |
||||||
|
if party not in centroids: |
||||||
|
continue |
||||||
|
wids_sorted = sorted(centroids[party].keys()) |
||||||
|
xs_raw = [centroids[party][w][0] for w in wids_sorted] |
||||||
|
ys_raw = [centroids[party][w][1] for w in wids_sorted] |
||||||
|
xs = _ema_smooth(xs_raw, smooth_alpha) |
||||||
|
ys = _ema_smooth(ys_raw, smooth_alpha) |
||||||
|
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)] |
||||||
|
colour = PARTY_COLOURS.get(party, "#9E9E9E") |
||||||
|
fig.add_trace( |
||||||
|
go.Scatter( |
||||||
|
x=xs, |
||||||
|
y=ys, |
||||||
|
mode="lines+markers", |
||||||
|
name=party, |
||||||
|
text=wids_sorted, |
||||||
|
customdata=custom_raw, |
||||||
|
line=dict(color=colour, shape="spline", smoothing=1.3), |
||||||
|
marker=dict(color=colour, size=8), |
||||||
|
hovertemplate=( |
||||||
|
f"<b>{party}</b><br>" |
||||||
|
"venster: %{text}<br>" |
||||||
|
"x (smoothed): %{x:.3f}<br>" |
||||||
|
"x (raw): %{customdata[0]:.3f}<br>" |
||||||
|
"y (smoothed): %{y:.3f}<br>" |
||||||
|
"y (raw): %{customdata[1]:.3f}<extra></extra>" |
||||||
|
), |
||||||
|
) |
||||||
|
) |
||||||
|
trace_count += 1 |
||||||
|
|
||||||
|
_THRESHOLD = 0.65 |
||||||
|
x_conf_map = axis_def.get("x_label_confidence", {}) or {} |
||||||
|
y_conf_map = axis_def.get("y_label_confidence", {}) or {} |
||||||
|
|
||||||
|
def _mean_conf(m: dict) -> Optional[float]: |
||||||
|
vals = [v for v in m.values() if v is not None] |
||||||
|
if not vals: |
||||||
|
return None |
||||||
|
return float(sum(vals) / len(vals)) |
||||||
|
|
||||||
|
x_mean = _mean_conf(x_conf_map) |
||||||
|
y_mean = _mean_conf(y_conf_map) |
||||||
|
|
||||||
|
x_title = trajectory.choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD) |
||||||
|
y_title = trajectory.choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD) |
||||||
|
|
||||||
|
fig.update_layout( |
||||||
|
title="Partij trajectories", |
||||||
|
xaxis_title=x_title, |
||||||
|
yaxis_title=y_title, |
||||||
|
height=600, |
||||||
|
legend_title_text="Partij", |
||||||
|
) |
||||||
|
_add_y_direction_annotations(fig) |
||||||
|
try: |
||||||
|
_last_trajectories_diagnostics.update({"trace_count": trace_count}) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
debug_enabled = get_debug_trajectories_enabled() |
||||||
|
if trace_count == 0: |
||||||
|
_last_trajectories_diagnostics.update( |
||||||
|
{ |
||||||
|
"stage": "zero_traces", |
||||||
|
"positions_count": sum(len(pos) for pos in positions_by_window.values()) |
||||||
|
if positions_by_window |
||||||
|
else 0, |
||||||
|
"party_map_count": len(party_map) if party_map else 0, |
||||||
|
"centroids_count": len(centroids) if centroids else 0, |
||||||
|
"selected_parties_count": len(selected_parties) |
||||||
|
if selected_parties |
||||||
|
else 0, |
||||||
|
"timestamp": datetime.now().isoformat(), |
||||||
|
} |
||||||
|
) |
||||||
|
if positions_by_window and party_map and not centroids: |
||||||
|
sample_mps = [] |
||||||
|
for window, positions in list(positions_by_window.items())[:1]: |
||||||
|
sample_mps = list(positions.keys())[:5] |
||||||
|
break |
||||||
|
matched = sum(1 for mp in sample_mps if mp in party_map) |
||||||
|
_last_trajectories_diagnostics["name_match_check"] = { |
||||||
|
"sample_mps": sample_mps, |
||||||
|
"matched_in_party_map": matched, |
||||||
|
"sample_size": len(sample_mps), |
||||||
|
} |
||||||
|
if trace_count == 0: |
||||||
|
st.info("📊 **Geen trajecten getekend**") |
||||||
|
|
||||||
|
with st.expander("🔍 Diagnostische informatie"): |
||||||
|
st.write("**Data status:**") |
||||||
|
st.write( |
||||||
|
f"- Positie vensters: {len(positions_by_window) if positions_by_window else 0}" |
||||||
|
) |
||||||
|
st.write(f"- Party mappings: {len(party_map) if party_map else 0}") |
||||||
|
st.write( |
||||||
|
f"- Geselecteerde partijen: {len(selected_parties) if selected_parties else 0}" |
||||||
|
) |
||||||
|
|
||||||
|
if "centroid_diagnostics" in locals(): |
||||||
|
st.write("**Centroid berekening:**") |
||||||
|
st.write( |
||||||
|
f"- Partijen met posities: {len(centroid_diagnostics.get('parties_with_positions', []))}" |
||||||
|
) |
||||||
|
st.write( |
||||||
|
f"- Partijen met alleen NaN: {len(centroid_diagnostics.get('parties_all_nan', []))}" |
||||||
|
) |
||||||
|
|
||||||
|
st.write("\n**Mogelijke oorzaken:**") |
||||||
|
st.write("1. Geen SVD vectoren berekend voor de geselecteerde vensters") |
||||||
|
st.write("2. MP namen in posities komen niet overeen met party_map") |
||||||
|
st.write("3. Alle geselecteerde partijen hebben te weinig MPs (< 5)") |
||||||
|
|
||||||
|
if st.button("🔧 Database diagnostiek uitvoeren"): |
||||||
|
with st.spinner("Bezig met diagnostiek..."): |
||||||
|
from scripts.diagnose_trajectories_cli import ( |
||||||
|
run as diagnose_trajectories, |
||||||
|
) |
||||||
|
|
||||||
explorer.build_trajectories_tab(db_path, window_size) |
results = diagnose_trajectories(db_path) |
||||||
|
st.json(results) |
||||||
|
else: |
||||||
|
try: |
||||||
|
st.info( |
||||||
|
f"[DEBUG] trace_count={trace_count}, fig data count={len(fig.data)}, layout title={fig.layout.title.text if fig.layout.title else 'none'}" |
||||||
|
) |
||||||
|
except Exception: |
||||||
|
pass |
||||||
|
try: |
||||||
|
logging.getLogger(__name__).debug( |
||||||
|
"[TRAJ DEBUG] About to render plotly chart — trace_count=%d, banner=%s, fig has %d traces", |
||||||
|
trace_count, |
||||||
|
banner_text, |
||||||
|
len(fig.data), |
||||||
|
) |
||||||
|
st.plotly_chart(fig, use_container_width=True) |
||||||
|
except Exception as e: |
||||||
|
st.error(f"Trajectories rendering failed: {e}") |
||||||
|
if get_debug_trajectories_enabled(): |
||||||
|
try: |
||||||
|
st.json(_last_trajectories_diagnostics) |
||||||
|
except Exception: |
||||||
|
st.text_area( |
||||||
|
"Trajectories diagnostics (JSON failed)", |
||||||
|
json.dumps(_last_trajectories_diagnostics, default=str), |
||||||
|
height=240, |
||||||
|
) |
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,170 @@ |
|||||||
|
"""Automated pipeline scheduling. |
||||||
|
|
||||||
|
Runs the parliamentary embedding pipeline and motion summarization |
||||||
|
on a configurable schedule using the `schedule` library. |
||||||
|
|
||||||
|
Usage: |
||||||
|
uv run python scheduler.py # start scheduler loop |
||||||
|
uv run python scheduler.py --once # run once and exit |
||||||
|
uv run python scheduler.py --pipeline-time 03:00 --summarizer-every 6 |
||||||
|
""" |
||||||
|
|
||||||
|
from __future__ import annotations |
||||||
|
|
||||||
|
import argparse |
||||||
|
import logging |
||||||
|
import signal |
||||||
|
import sys |
||||||
|
import time |
||||||
|
from typing import Callable |
||||||
|
|
||||||
|
import schedule |
||||||
|
|
||||||
|
from config import config |
||||||
|
import argparse |
||||||
|
|
||||||
|
from pipeline.run_pipeline import run as run_pipeline |
||||||
|
from summarizer import summarizer |
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__) |
||||||
|
|
||||||
|
|
||||||
|
class PipelineScheduler: |
||||||
|
"""Schedules and runs pipeline jobs.""" |
||||||
|
|
||||||
|
def __init__(self, db_path: str = "data/motions.db"): |
||||||
|
self.db_path = db_path |
||||||
|
self._running = False |
||||||
|
|
||||||
|
def run_pipeline(self) -> int: |
||||||
|
"""Run the full embedding pipeline. |
||||||
|
|
||||||
|
Returns the exit code from the pipeline run. |
||||||
|
""" |
||||||
|
_logger.info("Starting scheduled pipeline run") |
||||||
|
try: |
||||||
|
args = argparse.Namespace( |
||||||
|
db_path=self.db_path, |
||||||
|
window_size="annual", |
||||||
|
start_date=None, |
||||||
|
end_date=None, |
||||||
|
svd_k=50, |
||||||
|
svd_workers=None, |
||||||
|
text_model=None, |
||||||
|
text_batch_size=200, |
||||||
|
skip_metadata=False, |
||||||
|
skip_extract=False, |
||||||
|
skip_svd=False, |
||||||
|
skip_text=False, |
||||||
|
skip_fusion=False, |
||||||
|
dry_run=False, |
||||||
|
) |
||||||
|
result = run_pipeline(args) |
||||||
|
_logger.info("Pipeline run completed with code %s", result) |
||||||
|
return result if isinstance(result, int) else 0 |
||||||
|
except Exception: |
||||||
|
_logger.exception("Pipeline run failed") |
||||||
|
return 1 |
||||||
|
|
||||||
|
def run_summarizer(self) -> None: |
||||||
|
"""Run motion summarization for missing explanations.""" |
||||||
|
_logger.info("Starting scheduled summarizer run") |
||||||
|
try: |
||||||
|
summarizer.update_motion_summaries() |
||||||
|
_logger.info("Summarizer run completed") |
||||||
|
except Exception: |
||||||
|
_logger.exception("Summarizer run failed") |
||||||
|
|
||||||
|
def schedule_daily(self, time_str: str = "02:00") -> None: |
||||||
|
"""Schedule the pipeline to run daily at *time_str*.""" |
||||||
|
_logger.info("Scheduling daily pipeline run at %s", time_str) |
||||||
|
schedule.every().day.at(time_str).do(self.run_pipeline) |
||||||
|
|
||||||
|
def schedule_summarizer(self, every_n_hours: int = 6) -> None: |
||||||
|
"""Schedule the summarizer to run every *every_n_hours* hours.""" |
||||||
|
_logger.info("Scheduling summarizer every %s hours", every_n_hours) |
||||||
|
schedule.every(every_n_hours).hours.do(self.run_summarizer) |
||||||
|
|
||||||
|
def _signal_handler(self, signum, frame) -> None: |
||||||
|
"""Handle shutdown signals gracefully.""" |
||||||
|
_logger.info("Received signal %s, shutting down", signum) |
||||||
|
self.stop() |
||||||
|
|
||||||
|
def start(self) -> None: |
||||||
|
"""Start the scheduler loop. |
||||||
|
|
||||||
|
Blocks until :meth:`stop` is called or a signal is received. |
||||||
|
""" |
||||||
|
self._running = True |
||||||
|
|
||||||
|
# Register signal handlers for graceful shutdown |
||||||
|
signal.signal(signal.SIGTERM, self._signal_handler) |
||||||
|
signal.signal(signal.SIGINT, self._signal_handler) |
||||||
|
|
||||||
|
_logger.info("Scheduler started") |
||||||
|
while self._running: |
||||||
|
schedule.run_pending() |
||||||
|
time.sleep(1) |
||||||
|
_logger.info("Scheduler stopped") |
||||||
|
|
||||||
|
def stop(self) -> None: |
||||||
|
"""Stop the scheduler loop.""" |
||||||
|
self._running = False |
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser: |
||||||
|
parser = argparse.ArgumentParser( |
||||||
|
description="Automated pipeline scheduler", |
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter, |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
"--db-path", |
||||||
|
default="data/motions.db", |
||||||
|
help="Path to the DuckDB file", |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
"--pipeline-time", |
||||||
|
default="02:00", |
||||||
|
help="Daily pipeline run time (HH:MM)", |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
"--summarizer-every", |
||||||
|
type=int, |
||||||
|
default=6, |
||||||
|
help="Run summarizer every N hours", |
||||||
|
) |
||||||
|
parser.add_argument( |
||||||
|
"--once", |
||||||
|
action="store_true", |
||||||
|
help="Run pipeline + summarizer once and exit (no scheduling loop)", |
||||||
|
) |
||||||
|
return parser |
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int: |
||||||
|
parser = build_parser() |
||||||
|
args = parser.parse_args(argv) |
||||||
|
|
||||||
|
logging.basicConfig( |
||||||
|
level=logging.INFO, |
||||||
|
format="%(asctime)s %(levelname)s %(name)s %(message)s", |
||||||
|
) |
||||||
|
|
||||||
|
sched = PipelineScheduler(db_path=args.db_path) |
||||||
|
|
||||||
|
if args.once: |
||||||
|
_logger.info("Running in single-shot mode") |
||||||
|
pipeline_rc = sched.run_pipeline() |
||||||
|
sched.run_summarizer() |
||||||
|
return pipeline_rc |
||||||
|
|
||||||
|
sched.schedule_daily(args.pipeline_time) |
||||||
|
if args.summarizer_every > 0: |
||||||
|
sched.schedule_summarizer(args.summarizer_every) |
||||||
|
|
||||||
|
sched.start() |
||||||
|
return 0 |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
sys.exit(main()) |
||||||
@ -0,0 +1,95 @@ |
|||||||
|
"""Tests for explorer.py decomposition (P3-001). |
||||||
|
|
||||||
|
Acceptance criteria: |
||||||
|
- explorer.py must be under 1500 lines. |
||||||
|
- Tab modules must define their build functions locally (not re-export from explorer). |
||||||
|
- No circular imports between explorer.py and analysis.tabs. |
||||||
|
""" |
||||||
|
|
||||||
|
import ast |
||||||
|
import inspect |
||||||
|
import pathlib |
||||||
|
|
||||||
|
|
||||||
|
class TestExplorerDecomposition: |
||||||
|
"""RED test: explorer.py must be under 1500 lines.""" |
||||||
|
|
||||||
|
def test_explorer_line_count_under_1500(self): |
||||||
|
path = pathlib.Path("explorer.py") |
||||||
|
lines = path.read_text(encoding="utf-8").splitlines() |
||||||
|
assert len(lines) < 1500, ( |
||||||
|
f"explorer.py has {len(lines)} lines; target is < 1500. " |
||||||
|
f"Extract tab functions and rendering helpers into analysis/tabs/." |
||||||
|
) |
||||||
|
|
||||||
|
def test_tab_modules_define_functions_locally(self): |
||||||
|
"""Each tab module must define its build_*_tab without delegating to explorer.""" |
||||||
|
tabs = [ |
||||||
|
("analysis/tabs/compass.py", "build_compass_tab"), |
||||||
|
("analysis/tabs/trajectories.py", "build_trajectories_tab"), |
||||||
|
("analysis/tabs/search.py", "build_search_tab"), |
||||||
|
("analysis/tabs/browser.py", "build_browser_tab"), |
||||||
|
("analysis/tabs/components.py", "build_svd_components_tab"), |
||||||
|
("analysis/tabs/quiz.py", "build_mp_quiz_tab"), |
||||||
|
] |
||||||
|
for module_path, func_name in tabs: |
||||||
|
source = pathlib.Path(module_path).read_text(encoding="utf-8") |
||||||
|
tree = ast.parse(source) |
||||||
|
func_def = None |
||||||
|
for node in ast.walk(tree): |
||||||
|
if isinstance(node, ast.FunctionDef) and node.name == func_name: |
||||||
|
func_def = node |
||||||
|
break |
||||||
|
assert func_def is not None, ( |
||||||
|
f"{module_path} must define {func_name}" |
||||||
|
) |
||||||
|
# Ensure it's not a one-liner stub that imports from explorer |
||||||
|
body = func_def.body |
||||||
|
assert len(body) > 3, ( |
||||||
|
f"{module_path}.{func_name} looks like a stub ({len(body)} lines). " |
||||||
|
f"Extract the real implementation from explorer.py." |
||||||
|
) |
||||||
|
|
||||||
|
def test_rendering_helpers_extracted(self): |
||||||
|
"""Rendering helpers should not live in explorer.py.""" |
||||||
|
helpers = [ |
||||||
|
"_render_scree_plot", |
||||||
|
"_build_party_axis_figure", |
||||||
|
"_render_party_axis_chart", |
||||||
|
"_render_party_axis_chart_1d", |
||||||
|
"_render_svd_time_trajectory", |
||||||
|
"_render_voting_results", |
||||||
|
"_add_y_direction_annotations", |
||||||
|
] |
||||||
|
source = pathlib.Path("explorer.py").read_text(encoding="utf-8") |
||||||
|
tree = ast.parse(source) |
||||||
|
defined = {node.name for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)} |
||||||
|
for helper in helpers: |
||||||
|
assert helper not in defined, ( |
||||||
|
f"{helper} should be extracted from explorer.py " |
||||||
|
f"into analysis/tabs/_rendering.py" |
||||||
|
) |
||||||
|
|
||||||
|
def test_no_circular_import_tabs_to_explorer(self): |
||||||
|
"""Tab modules must not import from explorer.""" |
||||||
|
tab_modules = [ |
||||||
|
"analysis/tabs/compass.py", |
||||||
|
"analysis/tabs/trajectories.py", |
||||||
|
"analysis/tabs/search.py", |
||||||
|
"analysis/tabs/browser.py", |
||||||
|
"analysis/tabs/components.py", |
||||||
|
"analysis/tabs/quiz.py", |
||||||
|
"analysis/tabs/_rendering.py", |
||||||
|
] |
||||||
|
for module_path in tab_modules: |
||||||
|
if not pathlib.Path(module_path).exists(): |
||||||
|
continue |
||||||
|
source = pathlib.Path(module_path).read_text(encoding="utf-8") |
||||||
|
assert "from explorer import" not in source, ( |
||||||
|
f"{module_path} imports from explorer.py — " |
||||||
|
f"move shared helpers to explorer_data.py or _rendering.py instead" |
||||||
|
) |
||||||
|
assert "import explorer" not in source, ( |
||||||
|
f"{module_path} imports explorer module — " |
||||||
|
f"move shared helpers to explorer_data.py or _rendering.py instead" |
||||||
|
) |
||||||
@ -0,0 +1,159 @@ |
|||||||
|
"""Tests for scheduler.py — automated pipeline scheduling. |
||||||
|
|
||||||
|
TDD: write failing test, implement, refactor. |
||||||
|
""" |
||||||
|
|
||||||
|
from __future__ import annotations |
||||||
|
|
||||||
|
import signal |
||||||
|
from unittest.mock import MagicMock, patch |
||||||
|
|
||||||
|
import pytest |
||||||
|
|
||||||
|
|
||||||
|
class TestPipelineSchedulerInit: |
||||||
|
def test_default_db_path(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler() |
||||||
|
assert sched.db_path == "data/motions.db" |
||||||
|
assert not sched._running |
||||||
|
|
||||||
|
def test_custom_db_path(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler(db_path="/tmp/test.db") |
||||||
|
assert sched.db_path == "/tmp/test.db" |
||||||
|
|
||||||
|
|
||||||
|
class TestPipelineSchedulerRunPipeline: |
||||||
|
def test_calls_pipeline_run_with_db_path(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler(db_path="/tmp/test.db") |
||||||
|
with patch("scheduler.run_pipeline") as mock_run: |
||||||
|
mock_run.return_value = 0 |
||||||
|
sched.run_pipeline() |
||||||
|
mock_run.assert_called_once() |
||||||
|
# Verify args contain db_path via Namespace |
||||||
|
args = mock_run.call_args[0][0] |
||||||
|
assert args.db_path == "/tmp/test.db" |
||||||
|
|
||||||
|
def test_logs_error_on_pipeline_failure(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler() |
||||||
|
with patch("scheduler.run_pipeline") as mock_run: |
||||||
|
mock_run.side_effect = RuntimeError("pipeline failed") |
||||||
|
with patch("scheduler._logger") as mock_logger: |
||||||
|
result = sched.run_pipeline() |
||||||
|
assert result == 1 |
||||||
|
mock_logger.exception.assert_called_once() |
||||||
|
|
||||||
|
|
||||||
|
class TestPipelineSchedulerRunSummarizer: |
||||||
|
def test_calls_summarizer_update(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler() |
||||||
|
with patch("scheduler.summarizer") as mock_summarizer: |
||||||
|
sched.run_summarizer() |
||||||
|
mock_summarizer.update_motion_summaries.assert_called_once() |
||||||
|
|
||||||
|
def test_logs_error_on_summarizer_failure(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler() |
||||||
|
with patch("scheduler.summarizer") as mock_summarizer: |
||||||
|
mock_summarizer.update_motion_summaries.side_effect = RuntimeError( |
||||||
|
"summarizer failed" |
||||||
|
) |
||||||
|
with patch("scheduler._logger") as mock_logger: |
||||||
|
sched.run_summarizer() |
||||||
|
mock_logger.exception.assert_called_once() |
||||||
|
|
||||||
|
|
||||||
|
class TestPipelineSchedulerSchedule: |
||||||
|
def test_schedule_daily_adds_job(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler() |
||||||
|
with patch("scheduler.schedule") as mock_schedule: |
||||||
|
mock_job = MagicMock() |
||||||
|
mock_schedule.every.return_value.day.at.return_value.do = mock_job |
||||||
|
sched.schedule_daily("02:00") |
||||||
|
mock_schedule.every.assert_called_once() |
||||||
|
|
||||||
|
def test_schedule_summarizer_adds_job(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler() |
||||||
|
with patch("scheduler.schedule") as mock_schedule: |
||||||
|
mock_job = MagicMock() |
||||||
|
mock_schedule.every.return_value.hour.do = mock_job |
||||||
|
sched.schedule_summarizer(every_n_hours=6) |
||||||
|
mock_schedule.every.assert_called_once() |
||||||
|
|
||||||
|
|
||||||
|
class TestPipelineSchedulerLoop: |
||||||
|
def test_start_runs_pending_jobs(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler() |
||||||
|
call_count = 0 |
||||||
|
|
||||||
|
def _stop_after_first(*args, **kwargs): |
||||||
|
nonlocal call_count |
||||||
|
call_count += 1 |
||||||
|
if call_count >= 3: |
||||||
|
sched.stop() |
||||||
|
|
||||||
|
with patch("scheduler.schedule.run_pending") as mock_run_pending: |
||||||
|
with patch("scheduler.time.sleep", side_effect=_stop_after_first): |
||||||
|
with patch("scheduler.signal.signal"): |
||||||
|
sched.start() |
||||||
|
assert mock_run_pending.called |
||||||
|
assert not sched._running |
||||||
|
|
||||||
|
def test_stop_sets_running_false(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler() |
||||||
|
sched._running = True |
||||||
|
sched.stop() |
||||||
|
assert not sched._running |
||||||
|
|
||||||
|
def test_signal_handler_stops_scheduler(self): |
||||||
|
from scheduler import PipelineScheduler |
||||||
|
|
||||||
|
sched = PipelineScheduler() |
||||||
|
sched._running = True |
||||||
|
with patch.object(sched, "stop") as mock_stop: |
||||||
|
sched._signal_handler(signal.SIGINT, None) |
||||||
|
mock_stop.assert_called_once() |
||||||
|
|
||||||
|
|
||||||
|
class TestSchedulerCLI: |
||||||
|
def test_main_parses_args(self): |
||||||
|
from scheduler import main |
||||||
|
|
||||||
|
with patch("scheduler.PipelineScheduler") as mock_sched_class: |
||||||
|
mock_sched = MagicMock() |
||||||
|
mock_sched_class.return_value = mock_sched |
||||||
|
rc = main(["--pipeline-time", "03:00"]) |
||||||
|
assert rc == 0 |
||||||
|
mock_sched_class.assert_called_once_with(db_path="data/motions.db") |
||||||
|
mock_sched.schedule_daily.assert_called_once_with("03:00") |
||||||
|
mock_sched.start.assert_called_once() |
||||||
|
|
||||||
|
def test_main_custom_db_path(self): |
||||||
|
from scheduler import main |
||||||
|
|
||||||
|
with patch("scheduler.PipelineScheduler") as mock_sched_class: |
||||||
|
mock_sched = MagicMock() |
||||||
|
mock_sched.run_pipeline.return_value = 0 |
||||||
|
mock_sched_class.return_value = mock_sched |
||||||
|
rc = main(["--db-path", "/tmp/test.db", "--once"]) |
||||||
|
assert rc == 0 |
||||||
|
mock_sched_class.assert_called_once_with(db_path="/tmp/test.db") |
||||||
|
mock_sched.run_pipeline.assert_called_once() |
||||||
Loading…
Reference in new issue