fix(trajectory): normalize MP names to improve party_map matching

main
Sven Geboers 1 month ago
parent 26bdb4c61c
commit 0b79709847
  1. 39
      explorer.py

@ -1648,6 +1648,45 @@ def build_trajectories_tab(db_path: str, window_size: str) -> None:
f"[TRAJ DEBUG] load_party_map → {len(party_map)} entries, "
f"sample={list(party_map.items())[:3]}"
)
# Add name normalization to improve matching
def normalize_mp_name(name):
"""Normalize MP name for better matching between data sources."""
if not name:
return name
# Remove extra whitespace
name = name.strip()
# Ensure consistent spacing after comma
if "," in name and ", " not in name:
name = name.replace(",", ", ")
return name
# Normalize party_map keys
party_map = {normalize_mp_name(k): v for k, v in party_map.items()}
# Also normalize MP names in positions_by_window
normalized_positions = {}
for window, positions in positions_by_window.items():
normalized_positions[window] = {
normalize_mp_name(k): v for k, v in positions.items()
}
positions_by_window = normalized_positions
# After normalization, log the match rate
all_mp_names = set()
for positions in positions_by_window.values():
all_mp_names.update(positions.keys())
matched_names = sum(1 for mp in all_mp_names if mp in party_map)
logger.info(
f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)"
)
if matched_names == 0 and len(all_mp_names) > 0:
logger.warning("No MP names matched between positions and party_map!")
logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}")
logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}")
windows = sorted(positions_by_window.keys())
# Compute party centroids per window

Loading…
Cancel
Save