@ -542,6 +542,155 @@ def compute_2d_axes(
raise ValueError ( " Unknown method ' %s ' " % method )
def compute_nd_axes (
db_path : str ,
window_ids : Optional [ List [ str ] ] = None ,
n_components : int = 10 ,
normalize_vectors : bool = True ,
) - > Tuple [ Dict [ str , Dict [ str , np . ndarray ] ] , Dict ] :
""" Compute aligned PCA projections onto N components for MPs per window.
This extends compute_2d_axes to return projections onto all N principal
components ( not just the first 2 ) , enabling consistent aligned positioning
for SVD components 1 - 10 in the explorer .
Args :
db_path : path to duckdb
window_ids : optional ordered list of windows ( defaults to all )
n_components : number of PCA components to compute ( default 10 )
normalize_vectors : whether to normalize vectors before PCA ( default True )
Returns :
scores_by_window , axes_def
- scores_by_window : { window_id : { entity : np . ndarray of shape ( n_components , ) } }
- axes_def : dict with ' components ' ( list of component vectors ) ,
' explained_variance_ratio ' , ' global_mean ' , etc .
"""
import importlib
_trajectory = importlib . import_module ( " analysis.trajectory " )
if window_ids is None :
window_ids = _trajectory . _load_window_ids ( db_path )
# Load per-window raw vectors and align them
raw_window_vecs : Dict [ str , Dict [ str , np . ndarray ] ] = { }
for wid in window_ids :
raw_window_vecs [ wid ] = _trajectory . _load_mp_vectors_for_window ( db_path , wid )
# Pad all vectors to maximum dimension across windows
if raw_window_vecs :
max_dim = max ( v . shape [ 0 ] for d in raw_window_vecs . values ( ) for v in d . values ( ) )
padded : Dict [ str , Dict [ str , np . ndarray ] ] = { }
for wid , d in raw_window_vecs . items ( ) :
padded [ wid ] = {
e : np . pad ( v , ( 0 , max_dim - v . shape [ 0 ] ) ) if v . shape [ 0 ] < max_dim else v
for e , v in d . items ( )
}
raw_window_vecs = padded
aligned_window_vecs = _trajectory . _procrustes_align_windows ( raw_window_vecs )
# Stack all aligned vectors across windows
all_vecs = [ ]
entity_index = [ ] # parallel list of (window_id, entity)
for wid , d in aligned_window_vecs . items ( ) :
for ent , v in d . items ( ) :
if normalize_vectors :
n = np . linalg . norm ( v )
all_vecs . append ( v / n if n > 1e-10 else v )
else :
all_vecs . append ( v )
entity_index . append ( ( wid , ent ) )
if len ( all_vecs ) == 0 :
_logger . info ( " No vectors loaded for windows %s " , window_ids )
return ( { } , { } )
M = np . vstack ( all_vecs )
global_mean = M . mean ( axis = 0 )
# PCA: centre globally and compute SVD
Mc = M - global_mean
try :
U , s , Vt = np . linalg . svd ( Mc , full_matrices = False )
except np . linalg . LinAlgError :
_logger . exception ( " SVD failed in compute_nd_axes " )
return ( { } , { } )
# Explained variance ratio for each component
sv2 = s * * 2
evr = sv2 / ( sv2 . sum ( ) + 1e-20 )
explained_variance_ratio = evr [ : n_components ] . tolist ( )
# Component directions (normalized)
components = [
Vt [ i ] / ( np . linalg . norm ( Vt [ i ] ) + 1e-12 )
for i in range ( min ( n_components , Vt . shape [ 0 ] ) )
]
# Build entity -> vector mapping
ent_to_vec = { ent : vec for ( wid , ent ) , vec in zip ( entity_index , M ) }
# Per-component flip directions using canonical party centroids
right_parties = CANONICAL_RIGHT
left_parties = CANONICAL_LEFT
def _centroid_for_party_set ( party_set ) :
vecs = [ ]
for p in party_set :
if p in ent_to_vec :
vecs . append ( ent_to_vec [ p ] )
try :
conn = duckdb . connect ( db_path )
rows = conn . execute ( " SELECT mp_name, party FROM mp_metadata " ) . fetchall ( )
conn . close ( )
except Exception :
rows = [ ]
for mp_name , party in rows :
if party in party_set and mp_name in ent_to_vec :
vecs . append ( ent_to_vec [ mp_name ] )
if not vecs :
return None
return np . mean ( np . vstack ( vecs ) , axis = 0 )
left_cent = _centroid_for_party_set ( left_parties )
right_cent = _centroid_for_party_set ( right_parties )
# Compute flip signs per component
flip_signs = [ ]
if left_cent is not None and right_cent is not None :
for i , comp in enumerate ( components ) :
left_proj = float ( np . dot ( left_cent - global_mean , comp ) )
right_proj = float ( np . dot ( right_cent - global_mean , comp ) )
# Flip if right parties project lower than left (we want RIGHT > LEFT)
flip_signs . append ( - 1.0 if right_proj < left_proj else 1.0 )
else :
flip_signs = [ 1.0 ] * len ( components )
# Project all entities onto all components
scores_by_window : Dict [ str , Dict [ str , np . ndarray ] ] = { wid : { } for wid in window_ids }
for ( wid , ent ) , vec in zip ( entity_index , M ) :
v_centered = vec - global_mean
scores = np . array (
[
flip_signs [ i ] * float ( np . dot ( v_centered , components [ i ] ) )
for i in range ( len ( components ) )
]
)
scores_by_window [ wid ] [ ent ] = scores
axes_def = {
" components " : components ,
" explained_variance_ratio " : explained_variance_ratio ,
" global_mean " : global_mean ,
" flip_signs " : flip_signs ,
" n_components " : len ( components ) ,
}
return scores_by_window , axes_def
def compute_svd_spectrum (
db_path : str ,
window_ids : Optional [ List [ str ] ] = None ,