Revert "fix: use annual-only windows for SVD to restore EVR (~20% PC1)"

This reverts commit ffd8b191ef.
main
Sven Geboers 1 month ago
parent ffd8b191ef
commit e0f17e8b83
  1. 31
      analysis/political_axis.py

@ -14,7 +14,6 @@ Both modes return a dict mapping mp_name → scalar score for the given window.
import json import json
import logging import logging
import re
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
import numpy as np import numpy as np
@ -191,23 +190,17 @@ def compute_2d_axes(
aligned_window_vecs = _trajectory._procrustes_align_windows(raw_window_vecs) aligned_window_vecs = _trajectory._procrustes_align_windows(raw_window_vecs)
# Stack all vectors across windows into a single matrix for PCA if needed. # Stack all vectors across windows into a single matrix for PCA if needed
# pca_vecs / pca_index: annual windows only (e.g. "2024") — used for SVD axis derivation.
# all_vecs / entity_index: every window — used for projection onto the derived axes.
pca_vecs = []
all_vecs = [] all_vecs = []
entity_index = [] # parallel list of (window_id, entity) entity_index = [] # parallel list of (window_id, entity)
for wid, d in aligned_window_vecs.items(): for wid, d in aligned_window_vecs.items():
for ent, v in d.items(): for ent, v in d.items():
if normalize_vectors: if normalize_vectors:
n = np.linalg.norm(v) n = np.linalg.norm(v)
vec = v / n if n > 1e-10 else v all_vecs.append(v / n if n > 1e-10 else v)
else: else:
vec = v all_vecs.append(v)
all_vecs.append(vec)
entity_index.append((wid, ent)) entity_index.append((wid, ent))
if re.match(r"^\d{4}$", wid):
pca_vecs.append(vec)
if len(all_vecs) == 0: if len(all_vecs) == 0:
_logger.info("No vectors loaded for windows %s", window_ids) _logger.info("No vectors loaded for windows %s", window_ids)
@ -215,19 +208,9 @@ def compute_2d_axes(
M = np.vstack(all_vecs) M = np.vstack(all_vecs)
# If no annual windows found, fall back to all windows for SVD.
if len(pca_vecs) == 0:
_logger.warning(
"No annual windows found; falling back to all %d windows for SVD axis derivation",
len(aligned_window_vecs),
)
M_pca = M
else:
M_pca = np.vstack(pca_vecs)
if method == "pca": if method == "pca":
# centre using annual-only mean so SVD axes are not diluted by quarterly windows # centre globally
Mc = M_pca - M_pca.mean(axis=0) Mc = M - M.mean(axis=0)
try: try:
U, s, Vt = np.linalg.svd(Mc, full_matrices=False) U, s, Vt = np.linalg.svd(Mc, full_matrices=False)
except np.linalg.LinAlgError: except np.linalg.LinAlgError:
@ -375,8 +358,8 @@ def compute_2d_axes(
evr1 * 100, evr1 * 100,
) )
# project per-window vectors (centre by annual-window global mean, consistent with SVD axes) # project per-window vectors (centre by global mean)
global_mean = M_pca.mean(axis=0) global_mean = M.mean(axis=0)
axes["global_mean"] = global_mean axes["global_mean"] = global_mean
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]] = { positions_by_window: Dict[str, Dict[str, Tuple[float, float]]] = {
wid: {} for wid in window_ids wid: {} for wid in window_ids

Loading…
Cancel
Save