From cd8aeec9979aca78f0259fa34f45ba3b9a1a2f0e Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Sun, 29 Mar 2026 23:13:30 +0200 Subject: [PATCH] Add compute_party_bootstrap_cis() to political_axis.py with tests Pure numpy function that computes bootstrap confidence intervals for party centroid vectors. Handles N>=2 (bootstrap), N=1 (degenerate CI), and N=0 (excluded) cases. Uses np.random.default_rng for reproducibility. --- analysis/political_axis.py | 77 ++++++++++++++++++++++ tests/test_political_axis.py | 121 +++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+) create mode 100644 tests/test_political_axis.py diff --git a/analysis/political_axis.py b/analysis/political_axis.py index 218b508..4117bf3 100644 --- a/analysis/political_axis.py +++ b/analysis/political_axis.py @@ -619,3 +619,80 @@ def compute_svd_spectrum( sv2 = s**2 evr = sv2 / (sv2.sum() + 1e-20) * 100 return list(evr) # already sorted descending by SVD + + +def compute_party_bootstrap_cis( + party_vectors: Dict[str, List[np.ndarray]], + n_boot: int = 1000, + ci: float = 95.0, + seed: int = 42, +) -> Dict[str, Dict]: + """Compute bootstrap confidence intervals for party centroid vectors. + + For each party, resamples its MP vectors with replacement to build a + distribution of centroid estimates, then extracts percentile-based + confidence intervals per dimension. + + Args: + party_vectors: mapping of party name → list of individual MP vectors + (each a numpy array of consistent length, e.g. 50 dimensions). + n_boot: number of bootstrap replicates. + ci: confidence level as a percentage (e.g. 95.0 for 95% CI). + seed: random seed for reproducibility (used with ``np.random.default_rng``). + + Returns: + Dict mapping party name → dict with keys ``centroid``, ``ci_lower``, + ``ci_upper``, ``std``, and ``n_mps``. Parties with no MPs (empty + list) are excluded from the output. + """ + alpha = 100.0 - ci + lo_pct = alpha / 2.0 + hi_pct = 100.0 - lo_pct + + result: Dict[str, Dict] = {} + + for party, vectors in party_vectors.items(): + n_mps = len(vectors) + + if n_mps == 0: + continue + + mat = np.vstack(vectors) # (n_mps, dim) + centroid = np.mean(mat, axis=0) + + if n_mps == 1: + result[party] = { + "centroid": centroid, + "ci_lower": centroid.copy(), + "ci_upper": centroid.copy(), + "std": np.zeros_like(centroid), + "n_mps": 1, + } + continue + + rng = np.random.default_rng(seed) + boot_centroids = np.empty((n_boot, mat.shape[1])) + + for b in range(n_boot): + idx = rng.integers(0, n_mps, size=n_mps) + boot_centroids[b] = mat[idx].mean(axis=0) + + ci_lower = np.percentile(boot_centroids, lo_pct, axis=0) + ci_upper = np.percentile(boot_centroids, hi_pct, axis=0) + std = np.std(boot_centroids, axis=0) + + result[party] = { + "centroid": centroid, + "ci_lower": ci_lower, + "ci_upper": ci_upper, + "std": std, + "n_mps": n_mps, + } + + _logger.info( + "Bootstrap CIs computed for %d parties (n_boot=%d, ci=%.1f%%)", + len(result), + n_boot, + ci, + ) + return result diff --git a/tests/test_political_axis.py b/tests/test_political_axis.py new file mode 100644 index 0000000..bbe3fd5 --- /dev/null +++ b/tests/test_political_axis.py @@ -0,0 +1,121 @@ +"""Tests for compute_party_bootstrap_cis in analysis.political_axis.""" + +import numpy as np + +from analysis.political_axis import compute_party_bootstrap_cis + + +# ── Helpers ────────────────────────────────────────────────────────────────── + + +def _make_party_vectors(n_mps: int, dim: int = 50, seed: int = 0) -> list: + """Generate a list of random MP vectors for a single party.""" + rng = np.random.default_rng(seed) + return [rng.standard_normal(dim) for _ in range(n_mps)] + + +# ── Tests ──────────────────────────────────────────────────────────────────── + + +class TestBootstrapDeterministic: + def test_same_seed_gives_identical_output(self): + """Same inputs + same seed -> identical outputs.""" + vecs = _make_party_vectors(10, dim=5, seed=99) + party_vectors = {"PartyA": vecs} + + result1 = compute_party_bootstrap_cis(party_vectors, n_boot=200, seed=42) + result2 = compute_party_bootstrap_cis(party_vectors, n_boot=200, seed=42) + + np.testing.assert_array_equal( + result1["PartyA"]["centroid"], result2["PartyA"]["centroid"] + ) + np.testing.assert_array_equal( + result1["PartyA"]["ci_lower"], result2["PartyA"]["ci_lower"] + ) + np.testing.assert_array_equal( + result1["PartyA"]["ci_upper"], result2["PartyA"]["ci_upper"] + ) + np.testing.assert_array_equal( + result1["PartyA"]["std"], result2["PartyA"]["std"] + ) + assert result1["PartyA"]["n_mps"] == result2["PartyA"]["n_mps"] + + +class TestBootstrapSingleMP: + def test_single_mp_collapses_ci(self): + """Party with 1 MP -> ci_lower == ci_upper == centroid, std == 0.""" + vec = np.array([1.0, 2.0, 3.0]) + party_vectors = {"Solo": [vec]} + + result = compute_party_bootstrap_cis(party_vectors, n_boot=500) + entry = result["Solo"] + + np.testing.assert_array_equal(entry["centroid"], vec) + np.testing.assert_array_equal(entry["ci_lower"], vec) + np.testing.assert_array_equal(entry["ci_upper"], vec) + np.testing.assert_array_equal(entry["std"], np.zeros_like(vec)) + assert entry["n_mps"] == 1 + + +class TestBootstrapCIWidthScalesWithN: + def test_larger_party_has_narrower_ci(self): + """Party with 3 MPs should have wider CIs than party with 30 MPs + when sampled from the same distribution.""" + rng = np.random.default_rng(123) + dim = 10 + # Same underlying distribution, different sample sizes + small_vecs = [rng.standard_normal(dim) for _ in range(3)] + large_vecs = [rng.standard_normal(dim) for _ in range(30)] + + party_vectors = {"Small": small_vecs, "Large": large_vecs} + result = compute_party_bootstrap_cis(party_vectors, n_boot=2000, seed=42) + + small_width = result["Small"]["ci_upper"] - result["Small"]["ci_lower"] + large_width = result["Large"]["ci_upper"] - result["Large"]["ci_lower"] + + # On average, the small party's CI should be wider + assert np.mean(small_width) > np.mean(large_width) + + +class TestBootstrapEmptyParty: + def test_empty_list_excluded(self): + """Party with empty list -> excluded from output.""" + party_vectors = { + "HasMPs": _make_party_vectors(5, dim=4), + "Empty": [], + } + + result = compute_party_bootstrap_cis(party_vectors, n_boot=100) + + assert "HasMPs" in result + assert "Empty" not in result + + +class TestBootstrapCIContainsCentroid: + def test_centroid_within_ci_bounds(self): + """ci_lower <= centroid <= ci_upper for each dimension.""" + party_vectors = {"A": _make_party_vectors(15, dim=8, seed=7)} + result = compute_party_bootstrap_cis(party_vectors, n_boot=1000, seed=42) + + entry = result["A"] + assert np.all(entry["ci_lower"] <= entry["centroid"]) + assert np.all(entry["centroid"] <= entry["ci_upper"]) + + +class TestBootstrapCustomCILevel: + def test_wider_ci_at_higher_level(self): + """ci=99 produces wider intervals than ci=90.""" + party_vectors = {"X": _make_party_vectors(20, dim=6, seed=55)} + + result_90 = compute_party_bootstrap_cis( + party_vectors, n_boot=2000, ci=90.0, seed=42 + ) + result_99 = compute_party_bootstrap_cis( + party_vectors, n_boot=2000, ci=99.0, seed=42 + ) + + width_90 = result_90["X"]["ci_upper"] - result_90["X"]["ci_lower"] + width_99 = result_99["X"]["ci_upper"] - result_99["X"]["ci_lower"] + + # 99% CI should be wider than 90% CI on every dimension + assert np.all(width_99 >= width_90)