You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
motief/analysis/tabs/trajectories.py

774 lines
28 KiB

"""Trajectories tab for the parliamentary explorer."""
from __future__ import annotations
import json
import logging
import os
import re
import traceback
from datetime import datetime
from typing import Dict, List, Optional, Tuple
import numpy as np
from analysis import config
import analysis.explorer_data as explorer_data
from analysis import trajectory
from analysis.tabs._rendering import (
PARTY_COLOURS,
_add_y_direction_annotations,
go,
st,
)
from explorer_helpers import compute_party_centroids, inspect_positions_for_issues
KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
logger = logging.getLogger(__name__)
_last_trajectories_diagnostics: dict = {}
_last_diagnostics = _last_trajectories_diagnostics
def get_debug_trajectories_enabled() -> bool:
"""Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode."""
v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES")
return str(v) in ("1", "true", "True")
def select_trajectory_plot_data(
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
party_map: Dict[str, str],
windows: List[str],
selected_parties: List[str],
smooth_alpha: float = 0.35,
mp_fallback_count: Optional[int] = None,
) -> Tuple[go.Figure, int, Optional[str]]:
"""Return (fig, trace_count, banner_text).
Helper used by build_trajectories_tab. Does not call Streamlit.
"""
if mp_fallback_count is None:
try:
mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20"))
except Exception:
mp_fallback_count = 20
party_centroids, meta = compute_party_centroids(
positions_by_window, party_map, windows
)
try:
inspector_summary = inspect_positions_for_issues(positions_by_window, party_map)
except Exception:
tb = traceback.format_exc()
inspector_summary = {}
try:
select_trajectory_plot_data._last_diagnostics = {
"stage": "inspector_exception",
"exception": tb,
}
except Exception:
pass
try:
_last_trajectories_diagnostics.update(
{"stage": "inspector_exception", "exception": tb}
)
except Exception:
pass
logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary)
plottable_parties = []
for p, vals in party_centroids.items():
has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals)
if has_valid:
plottable_parties.append(p)
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] plottable_parties: %d parties, sample=%s",
len(plottable_parties),
(plottable_parties[:5] if plottable_parties else "empty"),
)
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] party_centroids keys: %s",
list(party_centroids.keys())[:10],
)
if party_centroids:
sample_party = list(party_centroids.keys())[0]
sample_vals = party_centroids[sample_party]
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] Sample party '%s' centroids: %s...",
sample_party,
sample_vals[:3],
)
fig = go.Figure()
trace_count = 0
banner_text: Optional[str] = None
def _ema_smooth(values: List[float], alpha: float) -> List[float]:
if not values or alpha >= 1.0:
return values
smoothed: List[float] = []
prev = None
for v in values:
if v is None or (isinstance(v, float) and np.isnan(v)):
smoothed.append(float(np.nan))
continue
v = float(v)
if prev is None:
prev = v
else:
prev = alpha * v + (1 - alpha) * prev
smoothed.append(float(prev))
return smoothed
if not plottable_parties:
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
for wid in windows:
pos = positions_by_window.get(wid, {})
for mp_name, xy in pos.items():
try:
x, y = float(xy[0]), float(xy[1])
except Exception:
continue
mp_positions.setdefault(mp_name, {})[wid] = (x, y)
mp_activity = sorted(
[(mp, len(wdict)) for mp, wdict in mp_positions.items()],
key=lambda t: t[1],
reverse=True,
)
top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]]
for mp in top_mps:
wids_sorted = sorted(mp_positions.get(mp, {}).keys())
if not wids_sorted:
continue
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
xs = _ema_smooth(xs_raw, smooth_alpha)
ys = _ema_smooth(ys_raw, smooth_alpha)
custom_raw = [
(
float(rx) if rx is not None else float(np.nan),
float(ry) if ry is not None else float(np.nan),
)
for rx, ry in zip(xs_raw, ys_raw)
]
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=mp,
text=wids_sorted,
customdata=custom_raw,
line=dict(color="#888888", shape="spline", smoothing=1.3),
marker=dict(color="#888888", size=6),
)
)
trace_count += 1
banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] Fallback to MP trajectories: trace_count=%d, top_mps=%d",
trace_count,
len(top_mps),
)
return fig, trace_count, banner_text
to_plot = [p for p in selected_parties if p in plottable_parties]
if not to_plot:
to_plot = plottable_parties
for party in to_plot:
vals = party_centroids.get(party, [])
if not vals:
continue
xs_raw = [v[0] for v in vals]
ys_raw = [v[1] for v in vals]
xs = _ema_smooth(xs_raw, smooth_alpha)
ys = _ema_smooth(ys_raw, smooth_alpha)
custom_raw = [
(
float(x) if (x is not None and not np.isnan(x)) else float(np.nan),
float(y) if (y is not None and not np.isnan(y)) else float(np.nan),
)
for x, y in zip(xs_raw, ys_raw)
]
colour = PARTY_COLOURS.get(party, "#9E9E9E")
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=party,
text=windows,
customdata=custom_raw,
line=dict(color=colour, shape="spline", smoothing=1.3),
marker=dict(color=colour, size=8),
)
)
trace_count += 1
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] Final trace_count=%d, plottable_parties=%d, to_plot=%s",
trace_count,
len(plottable_parties),
(len(to_plot) if "to_plot" in dir() else "N/A"),
)
return fig, trace_count, None
def build_trajectories_tab(db_path: str, window_size: str) -> None:
"""Build the Partij Trajectories tab."""
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] build_trajectories_tab called — db_path=%s, window_size=%s",
db_path,
window_size,
)
st.subheader("Partij Trajectories")
st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")
positions_by_window, axis_def = explorer_data.load_positions(db_path, window_size)
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] load_positions → %d windows, total MPs=%d",
len(positions_by_window),
sum(len(v) for v in positions_by_window.values()),
)
if axis_def is None:
axis_def = {}
if not positions_by_window:
try:
_last_trajectories_diagnostics.update(
{
"stage": "load_positions_empty",
"positions_by_window_len": len(positions_by_window),
}
)
except Exception:
pass
try:
st.warning("Geen positiedata beschikbaar.")
except Exception:
pass
try:
if get_debug_trajectories_enabled():
try:
st.text_area(
"Trajectories diagnostics",
json.dumps(_last_trajectories_diagnostics, default=str),
height=160,
)
except Exception:
pass
except Exception:
pass
return
party_map = explorer_data.load_party_map(db_path)
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] load_party_map → %d entries, sample=%s",
len(party_map),
list(party_map.items())[:3],
)
def normalize_mp_name(name):
"""Normalize MP name for better matching between data sources."""
if not name:
return ""
name = name.strip()
if "," in name and ", " not in name:
name = name.replace(",", ", ")
return name
party_map = {normalize_mp_name(k): v for k, v in party_map.items()}
normalized_positions = {}
for window, positions in positions_by_window.items():
normalized_positions[window] = {
normalize_mp_name(k): v for k, v in positions.items()
}
positions_by_window = normalized_positions
all_mp_names = set()
for positions in positions_by_window.values():
all_mp_names.update(positions.keys())
matched_names = sum(1 for mp in all_mp_names if mp in party_map)
if all_mp_names:
logger.info(
f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)"
)
else:
logger.info("MP name matching: no MPs found in positions data")
if matched_names == 0 and len(all_mp_names) > 0:
logger.warning("No MP names matched between positions and party_map!")
logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}")
logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}")
windows = sorted(positions_by_window.keys())
centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
all_parties: set = set()
def _strip_paren(name: str) -> str:
return re.sub(r"\s*\([^)]*\)", "", name).strip()
for wid in windows:
pos = positions_by_window.get(wid, {})
per_party: Dict[str, List[Tuple[float, float]]] = {}
for mp_name, (x, y) in pos.items():
party = party_map.get(mp_name) or party_map.get(
_strip_paren(mp_name), "Unknown"
)
if party == "Unknown":
continue
per_party.setdefault(party, []).append((x, y))
for party, coords in per_party.items():
all_parties.add(party)
xs = [c[0] for c in coords]
ys = [c[1] for c in coords]
centroids.setdefault(party, {})[wid] = (
float(np.mean(xs)),
float(np.mean(ys)),
)
all_parties = sorted(
set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs)
- {None, "Unknown"}
)
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] all_parties (raw from party_map) → %d parties: %s",
len(all_parties),
all_parties[:10],
)
all_parties_sorted = sorted(all_parties)
if not all_parties_sorted:
st.info(
"Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat."
)
try:
st.caption(f"Bekende partijen in party_map: {len(party_map)}")
except Exception:
pass
default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties]
if not default_parties:
default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
if not default_parties:
default_parties = all_parties_sorted[:6]
selected_parties = st.multiselect(
"Selecteer partijen",
options=all_parties_sorted,
default=default_parties,
)
def _ema_smooth(values: List[float], alpha: float) -> List[float]:
if not values or alpha >= 1.0:
return values
smoothed = [values[0]]
for v in values[1:]:
smoothed.append(alpha * v + (1 - alpha) * smoothed[-1])
return smoothed
smooth_alpha = 0.35
if not centroids:
st.info(
"Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
)
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
for wid in windows:
pos = positions_by_window.get(wid, {})
for mp_name, xy in pos.items():
try:
x, y = float(xy[0]), float(xy[1])
except Exception:
continue
mp_positions.setdefault(mp_name, {})[wid] = (x, y)
mp_positions = {
mp: pos
for mp, pos in mp_positions.items()
if len(pos) >= 2
and not all(np.isnan(x) and np.isnan(y) for x, y in pos.values())
}
if not mp_positions:
st.warning("Geen positiedata beschikbaar voor trajectplotten.")
_last_trajectories_diagnostics.update(
{
"stage": "no_mp_positions",
"mp_positions_count": 0,
}
)
try:
if get_debug_trajectories_enabled():
try:
st.text_area(
"Trajectories diagnostics",
json.dumps(_last_trajectories_diagnostics, default=str),
height=160,
)
except Exception:
pass
except Exception:
pass
return
st.session_state["_trajectory_mp_positions"] = mp_positions
mp_list = sorted(mp_positions.keys())
default_mps = mp_list[:6]
selected_mps = st.multiselect(
"Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps
)
fig = go.Figure()
trace_count = 0
for mp in selected_mps:
wids_sorted = sorted(mp_positions[mp].keys())
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
xs = _ema_smooth(xs_raw, smooth_alpha)
ys = _ema_smooth(ys_raw, smooth_alpha)
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=mp,
text=wids_sorted,
customdata=custom_raw,
line=dict(color="#888888", shape="spline", smoothing=1.3),
marker=dict(color="#888888", size=6),
hovertemplate=(
f"<b>{mp}</b><br>"
"venster: %{text}<br>"
"x (smoothed): %{x:.3f}<br>"
"x (raw): %{customdata[0]:.3f}<br>"
"y (smoothed): %{y:.3f}<br>"
"y (raw): %{customdata[1]:.3f}<extra></extra>"
),
)
)
trace_count += 1
_add_y_direction_annotations(fig)
if trace_count == 0:
st.info(
"Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data."
)
else:
st.plotly_chart(fig, use_container_width=True)
return
if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"):
mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
for wid in windows:
pos = positions_by_window.get(wid, {})
for mp_name, (x, y) in pos.items():
mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y))
mp_list = sorted(mp_positions.keys())
if not mp_list:
st.info("Geen MP-positiegegevens beschikbaar om te tonen.")
return
sample_mps = mp_list[:6]
fig = go.Figure()
for mp in sample_mps:
wids_sorted = sorted(mp_positions[mp].keys())
xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
xs = _ema_smooth(xs_raw, 0.35)
ys = _ema_smooth(ys_raw, 0.35)
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=mp,
text=wids_sorted,
customdata=custom_raw,
line=dict(color="#444444", shape="spline", smoothing=1.3),
marker=dict(color="#444444", size=6),
hovertemplate=(
f"<b>{mp}</b><br>"
"venster: %{text}<br>"
"x (smoothed): %{x:.3f}<br>"
"x (raw): %{customdata[0]:.3f}<br>"
"y (smoothed): %{y:.3f}<br>"
"y (raw): %{customdata[1]:.3f}<extra></extra>"
),
)
)
_add_y_direction_annotations(fig)
st.plotly_chart(fig, use_container_width=True)
return
try:
debug_checkbox = False
try:
debug_checkbox = st.checkbox(
"Enable trajectories diagnostics (show extra info)",
value=get_debug_trajectories_enabled(),
)
except Exception:
debug_checkbox = get_debug_trajectories_enabled()
if debug_checkbox:
try:
with st.expander(
"DEBUG: Trajectories data (showing diagnostics)", expanded=False
):
st.write("windows (count):", len(windows))
st.write("windows sample:", windows[:10])
st.write("party_map entries:", len(party_map))
st.write("parties with centroids:", len(all_parties_sorted))
st.write("default_parties:", default_parties)
st.write("selected_parties:", selected_parties)
st.write("min_mps setting:", 3)
sample = {
p: len(centroids.get(p, {}))
for p in list(all_parties_sorted)[:8]
}
st.write("sample centroid window counts per party:", sample)
except Exception:
pass
except Exception:
pass
smoothing_method = st.selectbox(
"Smoothing methode",
options=["EMA", "Spline", "None"],
index=0,
help="EMA = exponential moving average; Spline = low-degree polynomial spline fit; None = raw centroids",
)
smooth_alpha = 1.0
if smoothing_method == "EMA":
smooth_alpha = st.slider(
"Glad maken (EMA-\u03b1)",
min_value=0.1,
max_value=1.0,
value=0.35,
step=0.05,
help=(
"\u03b1=1.0 toont de ruwe data; lagere waarden maken de lijn gladder. "
"Standaard 0.35 voor een goed evenwicht tussen detail en ruis."
),
)
def _spline_smooth(values: List[float]) -> List[float]:
n = len(values)
if n <= 2:
return values
deg = min(3, n - 1)
try:
idx = np.arange(n, dtype=float)
coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg)
smooth = np.polyval(coeffs, idx)
return [float(v) for v in smooth]
except Exception:
return values
fig = go.Figure()
trace_count = 0
helper_succeeded = False
try:
fig2, trace_count2, banner_text = select_trajectory_plot_data(
positions_by_window, party_map, windows, selected_parties, smooth_alpha
)
if fig2 is not None:
fig = fig2
trace_count = trace_count2
helper_succeeded = True
if banner_text:
try:
st.caption(banner_text)
except Exception:
pass
try:
_last_trajectories_diagnostics.update({"banner_text": banner_text})
except Exception:
pass
except Exception as e:
tb = traceback.format_exc()
try:
select_trajectory_plot_data._last_diagnostics = {"exception": tb}
except Exception:
pass
try:
_last_trajectories_diagnostics.update(
{"stage": "select_helper_exception", "exception": tb}
)
except Exception:
pass
logger.exception("select_trajectory_plot_data failed")
debug_enabled = get_debug_trajectories_enabled()
if debug_enabled:
try:
st.text_area("select_trajectory_plot_data traceback", tb, height=240)
except Exception:
pass
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] helper_succeeded=%s", helper_succeeded
)
if not helper_succeeded:
for party in selected_parties:
if party not in centroids:
continue
wids_sorted = sorted(centroids[party].keys())
xs_raw = [centroids[party][w][0] for w in wids_sorted]
ys_raw = [centroids[party][w][1] for w in wids_sorted]
xs = _ema_smooth(xs_raw, smooth_alpha)
ys = _ema_smooth(ys_raw, smooth_alpha)
custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
colour = PARTY_COLOURS.get(party, "#9E9E9E")
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="lines+markers",
name=party,
text=wids_sorted,
customdata=custom_raw,
line=dict(color=colour, shape="spline", smoothing=1.3),
marker=dict(color=colour, size=8),
hovertemplate=(
f"<b>{party}</b><br>"
"venster: %{text}<br>"
"x (smoothed): %{x:.3f}<br>"
"x (raw): %{customdata[0]:.3f}<br>"
"y (smoothed): %{y:.3f}<br>"
"y (raw): %{customdata[1]:.3f}<extra></extra>"
),
)
)
trace_count += 1
_THRESHOLD = 0.65
x_conf_map = axis_def.get("x_label_confidence", {}) or {}
y_conf_map = axis_def.get("y_label_confidence", {}) or {}
def _mean_conf(m: dict) -> Optional[float]:
vals = [v for v in m.values() if v is not None]
if not vals:
return None
return float(sum(vals) / len(vals))
x_mean = _mean_conf(x_conf_map)
y_mean = _mean_conf(y_conf_map)
x_title = trajectory.choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD)
y_title = trajectory.choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD)
fig.update_layout(
title="Partij trajectories",
xaxis_title=x_title,
yaxis_title=y_title,
height=600,
legend_title_text="Partij",
)
_add_y_direction_annotations(fig)
try:
_last_trajectories_diagnostics.update({"trace_count": trace_count})
except Exception:
pass
debug_enabled = get_debug_trajectories_enabled()
if trace_count == 0:
_last_trajectories_diagnostics.update(
{
"stage": "zero_traces",
"positions_count": sum(len(pos) for pos in positions_by_window.values())
if positions_by_window
else 0,
"party_map_count": len(party_map) if party_map else 0,
"centroids_count": len(centroids) if centroids else 0,
"selected_parties_count": len(selected_parties)
if selected_parties
else 0,
"timestamp": datetime.now().isoformat(),
}
)
if positions_by_window and party_map and not centroids:
sample_mps = []
for window, positions in list(positions_by_window.items())[:1]:
sample_mps = list(positions.keys())[:5]
break
matched = sum(1 for mp in sample_mps if mp in party_map)
_last_trajectories_diagnostics["name_match_check"] = {
"sample_mps": sample_mps,
"matched_in_party_map": matched,
"sample_size": len(sample_mps),
}
if trace_count == 0:
st.info("📊 **Geen trajecten getekend**")
with st.expander("🔍 Diagnostische informatie"):
st.write("**Data status:**")
st.write(
f"- Positie vensters: {len(positions_by_window) if positions_by_window else 0}"
)
st.write(f"- Party mappings: {len(party_map) if party_map else 0}")
st.write(
f"- Geselecteerde partijen: {len(selected_parties) if selected_parties else 0}"
)
if "centroid_diagnostics" in locals():
st.write("**Centroid berekening:**")
st.write(
f"- Partijen met posities: {len(centroid_diagnostics.get('parties_with_positions', []))}"
)
st.write(
f"- Partijen met alleen NaN: {len(centroid_diagnostics.get('parties_all_nan', []))}"
)
st.write("\n**Mogelijke oorzaken:**")
st.write("1. Geen SVD vectoren berekend voor de geselecteerde vensters")
st.write("2. MP namen in posities komen niet overeen met party_map")
st.write("3. Alle geselecteerde partijen hebben te weinig MPs (< 5)")
if st.button("🔧 Database diagnostiek uitvoeren"):
with st.spinner("Bezig met diagnostiek..."):
from scripts.diagnose_trajectories_cli import (
run as diagnose_trajectories,
)
results = diagnose_trajectories(db_path)
st.json(results)
else:
try:
st.info(
f"[DEBUG] trace_count={trace_count}, fig data count={len(fig.data)}, layout title={fig.layout.title.text if fig.layout.title else 'none'}"
)
except Exception:
pass
try:
logging.getLogger(__name__).debug(
"[TRAJ DEBUG] About to render plotly chart — trace_count=%d, banner=%s, fig has %d traces",
trace_count,
banner_text,
len(fig.data),
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Trajectories rendering failed: {e}")
if get_debug_trajectories_enabled():
try:
st.json(_last_trajectories_diagnostics)
except Exception:
st.text_area(
"Trajectories diagnostics (JSON failed)",
json.dumps(_last_trajectories_diagnostics, default=str),
height=240,
)