diff --git a/explorer_helpers.py b/explorer_helpers.py index b1f47bb..1abb9ea 100644 --- a/explorer_helpers.py +++ b/explorer_helpers.py @@ -253,18 +253,31 @@ def compute_party_centroids( Returns (party_centroids, metadata) - party_centroids: mapping party -> list of (x,y) tuples of length len(windows). Entries without MPs are (np.nan, np.nan). - - metadata: dict with keys 'per_party_counts', 'total_windows', 'parties' + - metadata: dict with keys 'per_party_counts', 'total_windows', 'parties', + 'input_windows', 'input_party_map_entries', 'windows_processed', + 'parties_with_positions', 'parties_all_nan', 'name_mismatch_samples' """ + # Initialize diagnostics metadata + metadata = { + "input_windows": len(positions_by_window) if positions_by_window else 0, + "input_party_map_entries": len(party_map) if party_map else 0, + "windows_processed": 0, + "parties_with_positions": set(), + "parties_all_nan": [], + "name_mismatch_samples": [], + "per_party_counts": {}, + "total_windows": len(windows), + "parties": [], + } + party_centroids: Dict[str, List[Tuple[float, float]]] = {} # collect all parties from party_map values parties = sorted(set(party_map.values())) + metadata["parties"] = parties + # if no parties known, return empty dict but still metadata if not parties: - return {}, { - "per_party_counts": {}, - "total_windows": len(windows), - "parties": [], - } + return {}, metadata # initialize lists for p in parties: @@ -273,25 +286,31 @@ def compute_party_centroids( # for each window, compute party coords using compute_party_coords for that window for w in windows: coords, _ = compute_party_coords(positions_by_window or {}, party_map, w) + metadata["windows_processed"] += 1 for p in parties: if p in coords: # ensure numeric floats party_centroids[p].append((float(coords[p][0]), float(coords[p][1]))) + metadata["parties_with_positions"].add(p) else: party_centroids[p].append((float(np.nan), float(np.nan))) - # metadata - per_party_counts: Dict[str, int] = {} + # Convert set to list for JSON serialization + metadata["parties_with_positions"] = sorted( + list(metadata["parties_with_positions"]) + ) + + # metadata per-party counts for p, vals in party_centroids.items(): count = 0 for x, y in vals: if not (np.isnan(x) or np.isnan(y)): count += 1 - per_party_counts[p] = count + metadata["per_party_counts"][p] = count + + # Check for parties with all NaN centroids + for party, coords in party_centroids.items(): + if all(np.isnan(x) and np.isnan(y) for x, y in coords): + metadata["parties_all_nan"].append(party) - metadata = { - "per_party_counts": per_party_counts, - "total_windows": len(windows), - "parties": parties, - } return party_centroids, metadata