From ffd8b191ef15fdea73dfa66d2162c0234baba725 Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Sun, 29 Mar 2026 20:12:24 +0200 Subject: [PATCH] fix: use annual-only windows for SVD to restore EVR (~20% PC1) Quarterly windows (29 of 41 total) diluted PC1 explained variance ratio from ~20% down to ~14.6%. The fix splits the vector collection loop into: - pca_vecs: annual windows only (re.match r'^\d{4}$') -> M_pca used for SVD - all_vecs: every window -> M used for projections onto derived axes Centering for SVD and global_mean for projection both now use M_pca.mean(axis=0) so axes are consistent. Falls back to all windows if no annual windows exist. --- analysis/political_axis.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/analysis/political_axis.py b/analysis/political_axis.py index 3adfd14..b296be9 100644 --- a/analysis/political_axis.py +++ b/analysis/political_axis.py @@ -14,6 +14,7 @@ Both modes return a dict mapping mp_name → scalar score for the given window. import json import logging +import re from typing import Dict, List, Optional, Tuple import numpy as np @@ -190,17 +191,23 @@ def compute_2d_axes( aligned_window_vecs = _trajectory._procrustes_align_windows(raw_window_vecs) - # Stack all vectors across windows into a single matrix for PCA if needed + # Stack all vectors across windows into a single matrix for PCA if needed. + # pca_vecs / pca_index: annual windows only (e.g. "2024") — used for SVD axis derivation. + # all_vecs / entity_index: every window — used for projection onto the derived axes. + pca_vecs = [] all_vecs = [] entity_index = [] # parallel list of (window_id, entity) for wid, d in aligned_window_vecs.items(): for ent, v in d.items(): if normalize_vectors: n = np.linalg.norm(v) - all_vecs.append(v / n if n > 1e-10 else v) + vec = v / n if n > 1e-10 else v else: - all_vecs.append(v) + vec = v + all_vecs.append(vec) entity_index.append((wid, ent)) + if re.match(r"^\d{4}$", wid): + pca_vecs.append(vec) if len(all_vecs) == 0: _logger.info("No vectors loaded for windows %s", window_ids) @@ -208,9 +215,19 @@ def compute_2d_axes( M = np.vstack(all_vecs) + # If no annual windows found, fall back to all windows for SVD. + if len(pca_vecs) == 0: + _logger.warning( + "No annual windows found; falling back to all %d windows for SVD axis derivation", + len(aligned_window_vecs), + ) + M_pca = M + else: + M_pca = np.vstack(pca_vecs) + if method == "pca": - # centre globally - Mc = M - M.mean(axis=0) + # centre using annual-only mean so SVD axes are not diluted by quarterly windows + Mc = M_pca - M_pca.mean(axis=0) try: U, s, Vt = np.linalg.svd(Mc, full_matrices=False) except np.linalg.LinAlgError: @@ -358,8 +375,8 @@ def compute_2d_axes( evr1 * 100, ) - # project per-window vectors (centre by global mean) - global_mean = M.mean(axis=0) + # project per-window vectors (centre by annual-window global mean, consistent with SVD axes) + global_mean = M_pca.mean(axis=0) axes["global_mean"] = global_mean positions_by_window: Dict[str, Dict[str, Tuple[float, float]]] = { wid: {} for wid in window_ids