From 0b797098470764d234e2490c3d83ef6e00fb3b21 Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Wed, 1 Apr 2026 01:43:39 +0200 Subject: [PATCH] fix(trajectory): normalize MP names to improve party_map matching --- explorer.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/explorer.py b/explorer.py index d770950..8aa6359 100644 --- a/explorer.py +++ b/explorer.py @@ -1648,6 +1648,45 @@ def build_trajectories_tab(db_path: str, window_size: str) -> None: f"[TRAJ DEBUG] load_party_map → {len(party_map)} entries, " f"sample={list(party_map.items())[:3]}" ) + + # Add name normalization to improve matching + def normalize_mp_name(name): + """Normalize MP name for better matching between data sources.""" + if not name: + return name + # Remove extra whitespace + name = name.strip() + # Ensure consistent spacing after comma + if "," in name and ", " not in name: + name = name.replace(",", ", ") + return name + + # Normalize party_map keys + party_map = {normalize_mp_name(k): v for k, v in party_map.items()} + + # Also normalize MP names in positions_by_window + normalized_positions = {} + for window, positions in positions_by_window.items(): + normalized_positions[window] = { + normalize_mp_name(k): v for k, v in positions.items() + } + positions_by_window = normalized_positions + + # After normalization, log the match rate + all_mp_names = set() + for positions in positions_by_window.values(): + all_mp_names.update(positions.keys()) + + matched_names = sum(1 for mp in all_mp_names if mp in party_map) + logger.info( + f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)" + ) + + if matched_names == 0 and len(all_mp_names) > 0: + logger.warning("No MP names matched between positions and party_map!") + logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}") + logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}") + windows = sorted(positions_by_window.keys()) # Compute party centroids per window