Add compute_party_bootstrap_cis() to political_axis.py with tests

Pure numpy function that computes bootstrap confidence intervals for party centroid vectors. Handles N>=2 (bootstrap), N=1 (degenerate CI), and N=0 (excluded) cases. Uses np.random.default_rng for reproducibility.
1 month ago · cd8aeec997
parent ef96edf478
commit cd8aeec997
2 changed files with 198 additions and 0 deletions
--- a/analysis/political_axis.py
+++ b/analysis/political_axis.py
@ -619,3 +619,80 @@ def compute_svd_spectrum(
    sv2 = s**2
    evr = sv2 / (sv2.sum() + 1e-20) * 100
    return list(evr)  # already sorted descending by SVD
 def compute_party_bootstrap_cis(
    party_vectors: Dict[str, List[np.ndarray]],
    n_boot: int = 1000,
    ci: float = 95.0,
    seed: int = 42,
 ) -> Dict[str, Dict]:
    """Compute bootstrap confidence intervals for party centroid vectors.
    For each party, resamples its MP vectors with replacement to build a
    distribution of centroid estimates, then extracts percentile-based
    confidence intervals per dimension.
    Args:
        party_vectors: mapping of party name → list of individual MP vectors
            (each a numpy array of consistent length, e.g. 50 dimensions).
        n_boot: number of bootstrap replicates.
        ci: confidence level as a percentage (e.g. 95.0 for 95% CI).
        seed: random seed for reproducibility (used with ``np.random.default_rng``).
    Returns:
        Dict mapping party name → dict with keys ``centroid``, ``ci_lower``,
        ``ci_upper``, ``std``, and ``n_mps``.  Parties with no MPs (empty
        list) are excluded from the output.
    """
    alpha = 100.0 - ci
    lo_pct = alpha / 2.0
    hi_pct = 100.0 - lo_pct
    result: Dict[str, Dict] = {}
    for party, vectors in party_vectors.items():
        n_mps = len(vectors)
        if n_mps == 0:
            continue
        mat = np.vstack(vectors)  # (n_mps, dim)
        centroid = np.mean(mat, axis=0)
        if n_mps == 1:
            result[party] = {
                "centroid": centroid,
                "ci_lower": centroid.copy(),
                "ci_upper": centroid.copy(),
                "std": np.zeros_like(centroid),
                "n_mps": 1,
            }
            continue
        rng = np.random.default_rng(seed)
        boot_centroids = np.empty((n_boot, mat.shape[1]))
        for b in range(n_boot):
            idx = rng.integers(0, n_mps, size=n_mps)
            boot_centroids[b] = mat[idx].mean(axis=0)
        ci_lower = np.percentile(boot_centroids, lo_pct, axis=0)
        ci_upper = np.percentile(boot_centroids, hi_pct, axis=0)
        std = np.std(boot_centroids, axis=0)
        result[party] = {
            "centroid": centroid,
            "ci_lower": ci_lower,
            "ci_upper": ci_upper,
            "std": std,
            "n_mps": n_mps,
        }
    _logger.info(
        "Bootstrap CIs computed for %d parties (n_boot=%d, ci=%.1f%%)",
        len(result),
        n_boot,
        ci,
    )
    return result
--- a/tests/test_political_axis.py
+++ b/tests/test_political_axis.py
@ -0,0 +1,121 @@
 """Tests for compute_party_bootstrap_cis in analysis.political_axis."""
 import numpy as np
 from analysis.political_axis import compute_party_bootstrap_cis
 # ── Helpers ──────────────────────────────────────────────────────────────────
 def _make_party_vectors(n_mps: int, dim: int = 50, seed: int = 0) -> list:
    """Generate a list of random MP vectors for a single party."""
    rng = np.random.default_rng(seed)
    return [rng.standard_normal(dim) for _ in range(n_mps)]
 # ── Tests ────────────────────────────────────────────────────────────────────
 class TestBootstrapDeterministic:
    def test_same_seed_gives_identical_output(self):
        """Same inputs + same seed -> identical outputs."""
        vecs = _make_party_vectors(10, dim=5, seed=99)
        party_vectors = {"PartyA": vecs}
        result1 = compute_party_bootstrap_cis(party_vectors, n_boot=200, seed=42)
        result2 = compute_party_bootstrap_cis(party_vectors, n_boot=200, seed=42)
        np.testing.assert_array_equal(
            result1["PartyA"]["centroid"], result2["PartyA"]["centroid"]
        )
        np.testing.assert_array_equal(
            result1["PartyA"]["ci_lower"], result2["PartyA"]["ci_lower"]
        )
        np.testing.assert_array_equal(
            result1["PartyA"]["ci_upper"], result2["PartyA"]["ci_upper"]
        )
        np.testing.assert_array_equal(
            result1["PartyA"]["std"], result2["PartyA"]["std"]
        )
        assert result1["PartyA"]["n_mps"] == result2["PartyA"]["n_mps"]
 class TestBootstrapSingleMP:
    def test_single_mp_collapses_ci(self):
        """Party with 1 MP -> ci_lower == ci_upper == centroid, std == 0."""
        vec = np.array([1.0, 2.0, 3.0])
        party_vectors = {"Solo": [vec]}
        result = compute_party_bootstrap_cis(party_vectors, n_boot=500)
        entry = result["Solo"]
        np.testing.assert_array_equal(entry["centroid"], vec)
        np.testing.assert_array_equal(entry["ci_lower"], vec)
        np.testing.assert_array_equal(entry["ci_upper"], vec)
        np.testing.assert_array_equal(entry["std"], np.zeros_like(vec))
        assert entry["n_mps"] == 1
 class TestBootstrapCIWidthScalesWithN:
    def test_larger_party_has_narrower_ci(self):
        """Party with 3 MPs should have wider CIs than party with 30 MPs
        when sampled from the same distribution."""
        rng = np.random.default_rng(123)
        dim = 10
        # Same underlying distribution, different sample sizes
        small_vecs = [rng.standard_normal(dim) for _ in range(3)]
        large_vecs = [rng.standard_normal(dim) for _ in range(30)]
        party_vectors = {"Small": small_vecs, "Large": large_vecs}
        result = compute_party_bootstrap_cis(party_vectors, n_boot=2000, seed=42)
        small_width = result["Small"]["ci_upper"] - result["Small"]["ci_lower"]
        large_width = result["Large"]["ci_upper"] - result["Large"]["ci_lower"]
        # On average, the small party's CI should be wider
        assert np.mean(small_width) > np.mean(large_width)
 class TestBootstrapEmptyParty:
    def test_empty_list_excluded(self):
        """Party with empty list -> excluded from output."""
        party_vectors = {
            "HasMPs": _make_party_vectors(5, dim=4),
            "Empty": [],
        }
        result = compute_party_bootstrap_cis(party_vectors, n_boot=100)
        assert "HasMPs" in result
        assert "Empty" not in result
 class TestBootstrapCIContainsCentroid:
    def test_centroid_within_ci_bounds(self):
        """ci_lower <= centroid <= ci_upper for each dimension."""
        party_vectors = {"A": _make_party_vectors(15, dim=8, seed=7)}
        result = compute_party_bootstrap_cis(party_vectors, n_boot=1000, seed=42)
        entry = result["A"]
        assert np.all(entry["ci_lower"] <= entry["centroid"])
        assert np.all(entry["centroid"] <= entry["ci_upper"])
 class TestBootstrapCustomCILevel:
    def test_wider_ci_at_higher_level(self):
        """ci=99 produces wider intervals than ci=90."""
        party_vectors = {"X": _make_party_vectors(20, dim=6, seed=55)}
        result_90 = compute_party_bootstrap_cis(
            party_vectors, n_boot=2000, ci=90.0, seed=42
        )
        result_99 = compute_party_bootstrap_cis(
            party_vectors, n_boot=2000, ci=99.0, seed=42
        )
        width_90 = result_90["X"]["ci_upper"] - result_90["X"]["ci_lower"]
        width_99 = result_99["X"]["ci_upper"] - result_99["X"]["ci_lower"]
        # 99% CI should be wider than 90% CI on every dimension
        assert np.all(width_99 >= width_90)