|
|
|
|
@ -1648,6 +1648,45 @@ def build_trajectories_tab(db_path: str, window_size: str) -> None: |
|
|
|
|
f"[TRAJ DEBUG] load_party_map → {len(party_map)} entries, " |
|
|
|
|
f"sample={list(party_map.items())[:3]}" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
# Add name normalization to improve matching |
|
|
|
|
def normalize_mp_name(name): |
|
|
|
|
"""Normalize MP name for better matching between data sources.""" |
|
|
|
|
if not name: |
|
|
|
|
return name |
|
|
|
|
# Remove extra whitespace |
|
|
|
|
name = name.strip() |
|
|
|
|
# Ensure consistent spacing after comma |
|
|
|
|
if "," in name and ", " not in name: |
|
|
|
|
name = name.replace(",", ", ") |
|
|
|
|
return name |
|
|
|
|
|
|
|
|
|
# Normalize party_map keys |
|
|
|
|
party_map = {normalize_mp_name(k): v for k, v in party_map.items()} |
|
|
|
|
|
|
|
|
|
# Also normalize MP names in positions_by_window |
|
|
|
|
normalized_positions = {} |
|
|
|
|
for window, positions in positions_by_window.items(): |
|
|
|
|
normalized_positions[window] = { |
|
|
|
|
normalize_mp_name(k): v for k, v in positions.items() |
|
|
|
|
} |
|
|
|
|
positions_by_window = normalized_positions |
|
|
|
|
|
|
|
|
|
# After normalization, log the match rate |
|
|
|
|
all_mp_names = set() |
|
|
|
|
for positions in positions_by_window.values(): |
|
|
|
|
all_mp_names.update(positions.keys()) |
|
|
|
|
|
|
|
|
|
matched_names = sum(1 for mp in all_mp_names if mp in party_map) |
|
|
|
|
logger.info( |
|
|
|
|
f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
if matched_names == 0 and len(all_mp_names) > 0: |
|
|
|
|
logger.warning("No MP names matched between positions and party_map!") |
|
|
|
|
logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}") |
|
|
|
|
logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}") |
|
|
|
|
|
|
|
|
|
windows = sorted(positions_by_window.keys()) |
|
|
|
|
|
|
|
|
|
# Compute party centroids per window |
|
|
|
|
|