Add compute_party_bootstrap_cis() to political_axis.py with tests

Pure numpy function that computes bootstrap confidence intervals for
party centroid vectors. Handles N>=2 (bootstrap), N=1 (degenerate CI),
and N=0 (excluded) cases. Uses np.random.default_rng for reproducibility.
main
Sven Geboers 1 month ago
parent ef96edf478
commit cd8aeec997
  1. 77
      analysis/political_axis.py
  2. 121
      tests/test_political_axis.py

@ -619,3 +619,80 @@ def compute_svd_spectrum(
sv2 = s**2 sv2 = s**2
evr = sv2 / (sv2.sum() + 1e-20) * 100 evr = sv2 / (sv2.sum() + 1e-20) * 100
return list(evr) # already sorted descending by SVD return list(evr) # already sorted descending by SVD
def compute_party_bootstrap_cis(
party_vectors: Dict[str, List[np.ndarray]],
n_boot: int = 1000,
ci: float = 95.0,
seed: int = 42,
) -> Dict[str, Dict]:
"""Compute bootstrap confidence intervals for party centroid vectors.
For each party, resamples its MP vectors with replacement to build a
distribution of centroid estimates, then extracts percentile-based
confidence intervals per dimension.
Args:
party_vectors: mapping of party name list of individual MP vectors
(each a numpy array of consistent length, e.g. 50 dimensions).
n_boot: number of bootstrap replicates.
ci: confidence level as a percentage (e.g. 95.0 for 95% CI).
seed: random seed for reproducibility (used with ``np.random.default_rng``).
Returns:
Dict mapping party name dict with keys ``centroid``, ``ci_lower``,
``ci_upper``, ``std``, and ``n_mps``. Parties with no MPs (empty
list) are excluded from the output.
"""
alpha = 100.0 - ci
lo_pct = alpha / 2.0
hi_pct = 100.0 - lo_pct
result: Dict[str, Dict] = {}
for party, vectors in party_vectors.items():
n_mps = len(vectors)
if n_mps == 0:
continue
mat = np.vstack(vectors) # (n_mps, dim)
centroid = np.mean(mat, axis=0)
if n_mps == 1:
result[party] = {
"centroid": centroid,
"ci_lower": centroid.copy(),
"ci_upper": centroid.copy(),
"std": np.zeros_like(centroid),
"n_mps": 1,
}
continue
rng = np.random.default_rng(seed)
boot_centroids = np.empty((n_boot, mat.shape[1]))
for b in range(n_boot):
idx = rng.integers(0, n_mps, size=n_mps)
boot_centroids[b] = mat[idx].mean(axis=0)
ci_lower = np.percentile(boot_centroids, lo_pct, axis=0)
ci_upper = np.percentile(boot_centroids, hi_pct, axis=0)
std = np.std(boot_centroids, axis=0)
result[party] = {
"centroid": centroid,
"ci_lower": ci_lower,
"ci_upper": ci_upper,
"std": std,
"n_mps": n_mps,
}
_logger.info(
"Bootstrap CIs computed for %d parties (n_boot=%d, ci=%.1f%%)",
len(result),
n_boot,
ci,
)
return result

@ -0,0 +1,121 @@
"""Tests for compute_party_bootstrap_cis in analysis.political_axis."""
import numpy as np
from analysis.political_axis import compute_party_bootstrap_cis
# ── Helpers ──────────────────────────────────────────────────────────────────
def _make_party_vectors(n_mps: int, dim: int = 50, seed: int = 0) -> list:
"""Generate a list of random MP vectors for a single party."""
rng = np.random.default_rng(seed)
return [rng.standard_normal(dim) for _ in range(n_mps)]
# ── Tests ────────────────────────────────────────────────────────────────────
class TestBootstrapDeterministic:
def test_same_seed_gives_identical_output(self):
"""Same inputs + same seed -> identical outputs."""
vecs = _make_party_vectors(10, dim=5, seed=99)
party_vectors = {"PartyA": vecs}
result1 = compute_party_bootstrap_cis(party_vectors, n_boot=200, seed=42)
result2 = compute_party_bootstrap_cis(party_vectors, n_boot=200, seed=42)
np.testing.assert_array_equal(
result1["PartyA"]["centroid"], result2["PartyA"]["centroid"]
)
np.testing.assert_array_equal(
result1["PartyA"]["ci_lower"], result2["PartyA"]["ci_lower"]
)
np.testing.assert_array_equal(
result1["PartyA"]["ci_upper"], result2["PartyA"]["ci_upper"]
)
np.testing.assert_array_equal(
result1["PartyA"]["std"], result2["PartyA"]["std"]
)
assert result1["PartyA"]["n_mps"] == result2["PartyA"]["n_mps"]
class TestBootstrapSingleMP:
def test_single_mp_collapses_ci(self):
"""Party with 1 MP -> ci_lower == ci_upper == centroid, std == 0."""
vec = np.array([1.0, 2.0, 3.0])
party_vectors = {"Solo": [vec]}
result = compute_party_bootstrap_cis(party_vectors, n_boot=500)
entry = result["Solo"]
np.testing.assert_array_equal(entry["centroid"], vec)
np.testing.assert_array_equal(entry["ci_lower"], vec)
np.testing.assert_array_equal(entry["ci_upper"], vec)
np.testing.assert_array_equal(entry["std"], np.zeros_like(vec))
assert entry["n_mps"] == 1
class TestBootstrapCIWidthScalesWithN:
def test_larger_party_has_narrower_ci(self):
"""Party with 3 MPs should have wider CIs than party with 30 MPs
when sampled from the same distribution."""
rng = np.random.default_rng(123)
dim = 10
# Same underlying distribution, different sample sizes
small_vecs = [rng.standard_normal(dim) for _ in range(3)]
large_vecs = [rng.standard_normal(dim) for _ in range(30)]
party_vectors = {"Small": small_vecs, "Large": large_vecs}
result = compute_party_bootstrap_cis(party_vectors, n_boot=2000, seed=42)
small_width = result["Small"]["ci_upper"] - result["Small"]["ci_lower"]
large_width = result["Large"]["ci_upper"] - result["Large"]["ci_lower"]
# On average, the small party's CI should be wider
assert np.mean(small_width) > np.mean(large_width)
class TestBootstrapEmptyParty:
def test_empty_list_excluded(self):
"""Party with empty list -> excluded from output."""
party_vectors = {
"HasMPs": _make_party_vectors(5, dim=4),
"Empty": [],
}
result = compute_party_bootstrap_cis(party_vectors, n_boot=100)
assert "HasMPs" in result
assert "Empty" not in result
class TestBootstrapCIContainsCentroid:
def test_centroid_within_ci_bounds(self):
"""ci_lower <= centroid <= ci_upper for each dimension."""
party_vectors = {"A": _make_party_vectors(15, dim=8, seed=7)}
result = compute_party_bootstrap_cis(party_vectors, n_boot=1000, seed=42)
entry = result["A"]
assert np.all(entry["ci_lower"] <= entry["centroid"])
assert np.all(entry["centroid"] <= entry["ci_upper"])
class TestBootstrapCustomCILevel:
def test_wider_ci_at_higher_level(self):
"""ci=99 produces wider intervals than ci=90."""
party_vectors = {"X": _make_party_vectors(20, dim=6, seed=55)}
result_90 = compute_party_bootstrap_cis(
party_vectors, n_boot=2000, ci=90.0, seed=42
)
result_99 = compute_party_bootstrap_cis(
party_vectors, n_boot=2000, ci=99.0, seed=42
)
width_90 = result_90["X"]["ci_upper"] - result_90["X"]["ci_lower"]
width_99 = result_99["X"]["ci_upper"] - result_99["X"]["ci_lower"]
# 99% CI should be wider than 90% CI on every dimension
assert np.all(width_99 >= width_90)
Loading…
Cancel
Save