From 1dbf8da3a2131e5617ed6ebd4325e47982c61213 Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Sat, 4 Apr 2026 18:47:01 +0200 Subject: [PATCH] docs: move active plan to docs/plans/ --- ...6-04-01-fix-trajectory-plot-not-showing.md | 525 ++++++++++++++++++ 1 file changed, 525 insertions(+) create mode 100644 docs/plans/2026-04-01-fix-trajectory-plot-not-showing.md diff --git a/docs/plans/2026-04-01-fix-trajectory-plot-not-showing.md b/docs/plans/2026-04-01-fix-trajectory-plot-not-showing.md new file mode 100644 index 0000000..9a6d997 --- /dev/null +++ b/docs/plans/2026-04-01-fix-trajectory-plot-not-showing.md @@ -0,0 +1,525 @@ +# Fix Trajectory Plot Not Showing - Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Fix the trajectory plot not showing by diagnosing and handling the NaN centroid edge case that's causing `trace_count == 0` + +**Architecture:** Add diagnostics to identify why `plottable_parties` is empty, improve the name matching between positions and party_map, and ensure the plot renders even when party centroids have NaN values by falling back to MP trajectories. + +**Tech Stack:** Python, Streamlit, Plotly, DuckDB, NumPy + +--- + +## Investigation Summary + +The trajectory plot isn't rendering because: +1. `trace_count == 0` at `explorer.py:2099` +2. `plottable_parties` is empty because all party centroids have NaN values +3. NaN centroids occur when MP names in `positions_by_window` don't match names in `party_map` +4. The data exists (73k SVD vectors, 1036 party mappings) but the join fails silently + +## Files to Modify + +- `explorer.py` - Main trajectory tab logic (lines 1601-2143) +- `explorer_helpers.py` - `compute_party_centroids()` function (line 246) +- `tests/test_trajectory_debug_diagnostics.py` - New test for diagnostics + +--- + +### Task 1: Add Diagnostic Logging to Identify the Root Cause + +**Files:** +- Modify: `explorer.py:1966-2010` (around the `select_trajectory_plot_data` call) + +- [ ] **Step 1: Add diagnostics to show why trace_count is 0** + +Add diagnostic logging before the `trace_count == 0` check to capture the state: + +```python +# Around line 2095 in explorer.py, before the trace_count check +# Add detailed diagnostics to understand why trace_count is 0 + +# Debug: Log the state of data leading to trace_count +if trace_count == 0: + _last_trajectories_diagnostics.update({ + "stage": "zero_traces", + "positions_count": sum(len(pos) for pos in positions_by_window.values()) if positions_by_window else 0, + "party_map_count": len(party_map) if party_map else 0, + "centroids_count": len(centroids) if centroids else 0, + "selected_parties_count": len(selected_parties) if selected_parties else 0, + "timestamp": datetime.now().isoformat(), + }) + + # Check if there are positions but no centroids (name mismatch) + if positions_by_window and party_map and not centroids: + # Sample some MP names from positions + sample_mps = [] + for window, positions in list(positions_by_window.items())[:1]: + sample_mps = list(positions.keys())[:5] + break + + # Check if these MPs are in party_map + matched = sum(1 for mp in sample_mps if mp in party_map) + _last_trajectories_diagnostics["name_match_check"] = { + "sample_mps": sample_mps, + "matched_in_party_map": matched, + "sample_size": len(sample_mps), + } +``` + +- [ ] **Step 2: Run the app and check the diagnostics** + +Run the Streamlit app and navigate to the trajectory tab: + +```bash +cd /home/sgeboers/Projects/stemwijzer +.venv/bin/python -m streamlit run Home.py +``` + +Check if the diagnostics now show why `trace_count` is 0. + +- [ ] **Step 3: Commit the diagnostic changes** + +```bash +git add explorer.py +git commit -m "diagnose(trajectory): add diagnostics to identify why trace_count is 0" +``` + +--- + +### Task 2: Improve Party Centroid Calculation with NaN Handling + +**Files:** +- Modify: `explorer_helpers.py:246-297` (`compute_party_centroids` function) + +- [ ] **Step 1: Add diagnostics to compute_party_centroids** + +Modify the `compute_party_centroids` function to log when parties have NaN centroids: + +```python +# In explorer_helpers.py, modify compute_party_centroids function +# Add at the start of the function (around line 249) + +def compute_party_centroids(positions_by_window, party_map, min_mps=5): + """ + Compute party centroids from MP positions. + + Returns: + dict: {party: [(x, y), ...]} for each window + dict: Diagnostic info about computation + """ + diagnostics = { + "input_windows": len(positions_by_window) if positions_by_window else 0, + "input_party_map_entries": len(party_map) if party_map else 0, + "windows_processed": 0, + "parties_with_positions": set(), + "parties_all_nan": [], + "name_mismatch_samples": [], + } + + if not positions_by_window or not party_map: + return {}, diagnostics + + # ... rest of existing code ... + + # After computing centroids, check for all-NaN parties + for party, coords in party_centroids.items(): + if all(np.isnan(x) and np.isnan(y) for x, y in coords): + diagnostics["parties_all_nan"].append(party) + + return party_centroids, diagnostics +``` + +- [ ] **Step 2: Update the return signature and handle the new return value** + +Change the return from: +```python +return party_centroids +``` +to: +```python +return party_centroids, diagnostics +``` + +Then update all callers to handle the new return value. Search for all usages: + +```bash +grep -n "compute_party_centroids" explorer.py +``` + +Update each call site to unpack the tuple: + +```python +# Change from: +centroids = compute_party_centroids(positions_by_window, party_map) + +# To: +centroids, centroid_diagnostics = compute_party_centroids(positions_by_window, party_map) +``` + +- [ ] **Step 3: Run tests to verify the changes work** + +```bash +cd /home/sgeboers/Projects/stemwijzer +.venv/bin/python -m pytest tests/test_compute_party_centroids.py -v +``` + +Expected: Tests pass (or need updating if they check the return value) + +- [ ] **Step 4: Update tests for new return signature** + +If tests fail, update them to handle the new return signature: + +```python +# In tests/test_compute_party_centroids.py +# Change assertions from: +centroids = compute_party_centroids(...) + +# To: +centroids, diagnostics = compute_party_centroids(...) +``` + +- [ ] **Step 5: Commit the centroid diagnostics** + +```bash +git add explorer_helpers.py tests/test_compute_party_centroids.py +git commit -m "fix(trajectory): add diagnostics to compute_party_centroids for NaN detection" +``` + +--- + +### Task 3: Fix the Name Mismatch Between Positions and Party Map + +**Files:** +- Modify: `explorer.py:1645-1660` (around `load_party_map` and centroid computation) + +- [ ] **Step 1: Add name normalization to improve matching** + +MP names might have slightly different formats between SVD vectors and metadata. Add normalization: + +```python +# In explorer.py, after loading party_map (around line 1645) +# Add name normalization to improve matching + +def normalize_mp_name(name): + """Normalize MP name for better matching between data sources.""" + if not name: + return name + # Remove extra whitespace + name = name.strip() + # Ensure consistent spacing after comma + if ',' in name and ', ' not in name: + name = name.replace(',', ', ') + return name + +# Normalize party_map keys +party_map = {normalize_mp_name(k): v for k, v in party_map.items()} + +# Also normalize MP names in positions_by_window +normalized_positions = {} +for window, positions in positions_by_window.items(): + normalized_positions[window] = { + normalize_mp_name(k): v for k, v in positions.items() + } +positions_by_window = normalized_positions +``` + +- [ ] **Step 2: Add validation to log name matching issues** + +After normalization, check how many MPs are matched: + +```python +# After normalization, log the match rate +all_mp_names = set() +for positions in positions_by_window.values(): + all_mp_names.update(positions.keys()) + +matched_names = sum(1 for mp in all_mp_names if mp in party_map) +logger.info(f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100*matched_names/len(all_mp_names):.1f}%)") + +if matched_names == 0 and len(all_mp_names) > 0: + logger.warning("No MP names matched between positions and party_map!") + logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}") + logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}") +``` + +- [ ] **Step 3: Run the app and verify name matching improves** + +```bash +cd /home/sgeboers/Projects/stemwijzer +.venv/bin/python -m streamlit run Home.py +``` + +Check the logs for match rate information. + +- [ ] **Step 4: Commit the name normalization fix** + +```bash +git add explorer.py +git commit -m "fix(trajectory): normalize MP names to improve party_map matching" +``` + +--- + +### Task 4: Ensure Plot Renders Even with Partial Data + +**Files:** +- Modify: `explorer.py:1736-1777` (fallback to MP trajectories) +- Modify: `explorer.py:2099-2143` (trace_count == 0 handling) + +- [ ] **Step 1: Improve the MP trajectory fallback** + +When party centroids fail, ensure the MP trajectory fallback actually works: + +```python +# In explorer.py, around line 1750 where mp_positions is computed +# Make sure this path actually produces a plot + +if not centroids: + # Fallback: plot individual MP trajectories + st.info("Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback.") + + # Collect MP positions across all windows + mp_positions = {} + for window, positions in positions_by_window.items(): + for mp, (x, y) in positions.items(): + if mp not in mp_positions: + mp_positions[mp] = {} + mp_positions[mp][window] = (x, y) + + # Filter to MPs with at least 2 windows (need trajectory, not just point) + mp_positions = {mp: pos for mp, pos in mp_positions.items() + if len(pos) >= 2 and not all(np.isnan(x) and np.isnan(y) for x, y in pos.values())} + + if not mp_positions: + st.warning("Geen positiedata beschikbaar voor trajectplotten.") + _last_trajectories_diagnostics["stage"] = "no_mp_positions" + return + + # Store for later use + st.session_state["_trajectory_mp_positions"] = mp_positions +``` + +- [ ] **Step 2: Fix the trace_count == 0 handling** + +When `trace_count == 0`, provide more helpful information: + +```python +# In explorer.py, around line 2099, replace the existing trace_count == 0 block + +if trace_count == 0: + st.info("📊 **Geen trajecten getekend**") + + # Show diagnostic information + with st.expander("🔍 Diagnostische informatie"): + st.write("**Data status:**") + st.write(f"- Positie vensters: {len(positions_by_window) if positions_by_window else 0}") + st.write(f"- Party mappings: {len(party_map) if party_map else 0}") + st.write(f"- Geselecteerde partijen: {len(selected_parties) if selected_parties else 0}") + + if 'centroid_diagnostics' in locals(): + st.write("**Centroid berekening:**") + st.write(f"- Partijen met posities: {len(centroid_diagnostics.get('parties_with_positions', []))}") + st.write(f"- Partijen met alleen NaN: {len(centroid_diagnostics.get('parties_all_nan', []))}") + + st.write("\n**Mogelijke oorzaken:**") + st.write("1. Geen SVD vectoren berekend voor de geselecteerde vensters") + st.write("2. MP namen in posities komen niet overeen met party_map") + st.write("3. Alle geselecteerde partijen hebben te weinig MPs (< 5)") + + # Add a button to run diagnostics + if st.button("🔧 Database diagnostiek uitvoeren"): + with st.spinner("Bezig met diagnostiek..."): + # Import and run diagnostics + from scripts.diagnose_trajectories_cli import diagnose_trajectories + results = diagnose_trajectories(db_path) + st.json(results) +else: + # Render the plot + st.plotly_chart(fig, use_container_width=True, key="trajectory_plot") +``` + +- [ ] **Step 3: Test the improved error handling** + +Run the app and verify: +1. When data is missing, helpful diagnostics appear +2. The expander shows detailed information +3. The database diagnostics button works + +```bash +cd /home/sgeboers/Projects/stemwijzer +.venv/bin/python -m streamlit run Home.py +``` + +- [ ] **Step 4: Commit the improved fallback** + +```bash +git add explorer.py +git commit -m "fix(trajectory): improve fallback handling and diagnostics when trace_count is 0" +``` + +--- + +### Task 5: Add Integration Test for the Fix + +**Files:** +- Create: `tests/test_trajectory_plot_renders.py` + +- [ ] **Step 1: Create a test that verifies the plot renders** + +```python +# tests/test_trajectory_plot_renders.py +""" +Test that trajectory plot renders even with edge cases. +""" + +import pytest +import numpy as np +from unittest.mock import MagicMock, patch + +# Import the functions to test +import sys +sys.path.insert(0, '/home/sgeboers/Projects/stemwijzer') + +from explorer_helpers import compute_party_centroids + + +class TestTrajectoryPlotRendering: + """Tests to ensure trajectory plot renders in various scenarios.""" + + def test_compute_party_centroids_returns_diagnostics(self): + """Test that compute_party_centroids returns diagnostics tuple.""" + positions_by_window = { + "2024-Q1": {"MP1": (1.0, 2.0), "MP2": (3.0, 4.0)}, + "2024-Q2": {"MP1": (1.5, 2.5), "MP2": (3.5, 4.5)}, + } + party_map = {"MP1": "PartyA", "MP2": "PartyA"} + + centroids, diagnostics = compute_party_centroids( + positions_by_window, party_map, min_mps=1 + ) + + assert isinstance(centroids, dict) + assert isinstance(diagnostics, dict) + assert "input_windows" in diagnostics + assert diagnostics["input_windows"] == 2 + + def test_compute_party_centroids_detects_all_nan_parties(self): + """Test that diagnostics identify parties with all NaN centroids.""" + positions_by_window = { + "2024-Q1": {"MP1": (np.nan, np.nan)}, + "2024-Q2": {"MP1": (np.nan, np.nan)}, + } + party_map = {"MP1": "PartyA"} + + centroids, diagnostics = compute_party_centroids( + positions_by_window, party_map, min_mps=1 + ) + + assert "PartyA" in diagnostics.get("parties_all_nan", []) + + def test_name_normalization_improves_matching(self): + """Test that normalized names improve party matching.""" + # Positions with slightly different name format + positions_by_window = { + "2024-Q1": {"Agema, M.": (1.0, 2.0)}, + } + # Party map with different spacing + party_map = {"Agema, M.": "PVV"} # Without normalization, this might not match + + # After normalization, they should match + def normalize_mp_name(name): + if not name: + return name + name = name.strip() + if ',' in name and ', ' not in name: + name = name.replace(',', ', ') + return name + + normalized_party_map = { + normalize_mp_name(k): v for k, v in party_map.items() + } + normalized_positions = { + window: {normalize_mp_name(k): v for k, v in positions.items()} + for window, positions in positions_by_window.items() + } + + # Check matching + all_mp_names = set() + for positions in normalized_positions.values(): + all_mp_names.update(positions.keys()) + + matched = sum(1 for mp in all_mp_names if mp in normalized_party_map) + assert matched > 0, "Name normalization should improve matching" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) +``` + +- [ ] **Step 2: Run the new tests** + +```bash +cd /home/sgeboers/Projects/stemwijzer +.venv/bin/python -m pytest tests/test_trajectory_plot_renders.py -v +``` + +Expected: All tests pass + +- [ ] **Step 3: Commit the new tests** + +```bash +git add tests/test_trajectory_plot_renders.py +git commit -m "test(trajectory): add tests for plot rendering with edge cases" +``` + +--- + +### Task 6: Run Full Test Suite + +**Files:** +- All test files + +- [ ] **Step 1: Run all trajectory-related tests** + +```bash +cd /home/sgeboers/Projects/stemwijzer +.venv/bin/python -m pytest tests/test_trajectory*.py tests/test_compute_party_centroids.py -v +``` + +Expected: All tests pass + +- [ ] **Step 2: Verify no regressions in other tests** + +```bash +cd /home/sgeboers/Projects/stemwijzer +.venv/bin/python -m pytest tests/test_explorer*.py -v +``` + +Expected: All tests pass + +- [ ] **Step 3: Final commit** + +```bash +git log --oneline -5 # Review commits +git status # Ensure all changes are committed +``` + +--- + +## Self-Review Checklist + +- [ ] **Spec coverage:** All diagnostic and fallback improvements are covered +- [ ] **Placeholder scan:** No TBD, TODO, or incomplete sections +- [ ] **Type consistency:** Return signatures match between function and callers +- [ ] **Test coverage:** New tests added for edge cases + +## Execution Handoff + +**Plan complete.** Two execution options: + +**1. Subagent-Driven (recommended)** - I dispatch a fresh subagent per task, review between tasks, fast iteration + +**2. Inline Execution** - Execute tasks in this session using executing-plans, batch execution with checkpoints for review + +Which approach would you prefer?