@ -143,84 +143,73 @@ def compute_axis_stability(
top_n : int = 20 ,
n_components : int = 10 ,
stability_threshold : float = 0.7 ,
regression_alpha : float = 1.0 ,
) - > Dict :
""" Compute axis stability across windows using Procrustes-aligned SVD score s.
""" Compute axis stability across windows using Ridge regression weight s.
Aligns motion score matrices across windows using orthogonal Procrustes
to handle SVD sign ambiguity , then computes cosine similarity of per - component
centroids .
For each SVD axis and each window , fits Ridge regression :
SVD_score ~ fused_embedding
The weight vector ( 2610 dims ) is the semantic signature of the axis .
Stability = cosine similarity of weight vectors across window pairs .
Returns dict with stability_matrix , stable_axes , reordered_axes , unstable_axes .
Falls back to party - based sign consistency for windows with < 50 motions .
Returns dict with stability_matrix , stable_axes , reordered_axes , unstable_axes ,
and weight_vectors for downstream interpretation .
"""
from scipy . linalg import orthogonal_procrustes
from sklearn . linear_model import Ridge
from sklearn . preprocessing import StandardScaler
# Load motion scores per window
window_scores : Dict [ str , Dict [ int , np . ndarray ] ] = { }
# Load data per window
window_data : Dict [ str , Tuple [ np . ndarray , np . ndarray ] ] = { }
for w in windows :
motion_scores = _load_motion_scores ( con , w )
if not motion_scores :
fused = _load_fused_embeddings ( con , w )
if not motion_scores or not fused :
continue
window_scores [ w ] = motion_scores
if len ( window_scores ) < 2 :
return {
" stability_matrix " : np . array ( [ ] ) ,
" avg_stability " : np . zeros ( n_components ) ,
" stable_axes " : [ ] ,
" reordered_axes " : [ ] ,
" unstable_axes " : list ( range ( 1 , n_components + 1 ) ) ,
" windows " : list ( window_scores . keys ( ) ) ,
}
# Build feature matrix and targets
# Use motions that have both SVD scores and fused embeddings
common = [ m for m in motion_scores if m in fused ]
if len ( common ) < 50 :
continue
# Build motion score matrices per window (motions × components)
# Use common motions across all windows for alignment
common_motions = None
for w , scores in window_scores . items ( ) :
motions = set ( scores . keys ( ) )
if common_motions is None :
common_motions = motions
else :
common_motions = common_motions & motions
# Feature matrix: fused embeddings (align dimensions)
dim = min ( len ( fused [ m ] ) for m in common )
X = np . array ( [ fused [ m ] [ : dim ] for m in common ] )
# Target matrix: SVD scores (n_common × n_components)
Y = np . array ( [ motion_scores [ m ] [ : n_components ] for m in common ] )
if not common_motions or len ( common_motions ) < n_components :
# Fallback: use sign consistency based on canonical party scores
window_data [ w ] = ( X , Y )
if len ( window_data ) < 2 :
return _compute_stability_fallback (
con , windows , n_components , stability_threshold
)
common_motions = sorted ( common_motions )
# Build matrices: each row is a motion's first n_components scores
matrices = { }
for w , scores in window_scores . items ( ) :
mat = np . array (
[ scores [ m ] [ : n_components ] for m in common_motions if m in scores ]
)
if mat . shape [ 0 ] > = len ( common_motions ) * 0.5 : # At least 50% coverage
matrices [ w ] = mat
# Fit Ridge regression per axis per window
weight_vectors : Dict [ str , Dict [ int , np . ndarray ] ] = { }
window_list = sorted ( window_data . keys ( ) )
if len ( matrices ) < 2 :
return {
" stability_matrix " : np . array ( [ ] ) ,
" avg_stability " : np . zeros ( n_components ) ,
" stable_axes " : [ ] ,
" reordered_axes " : [ ] ,
" unstable_axes " : list ( range ( 1 , n_components + 1 ) ) ,
" windows " : list ( window_scores . keys ( ) ) ,
}
for w in window_list :
X , Y = window_data [ w ]
# Normalize features
scaler = StandardScaler ( )
X_scaled = scaler . fit_transform ( X )
# Align all matrices to the first window using Procrustes
window_list = sorted ( matrices . keys ( ) )
ref_matrix = matrices [ window_list [ 0 ] ]
aligned_matrices = { window_list [ 0 ] : ref_matrix }
weights = { }
for comp_idx in range ( n_components ) :
y = Y [ : , comp_idx ]
model = Ridge ( alpha = regression_alpha )
model . fit ( X_scaled , y )
weights [ comp_idx + 1 ] = model . coef_
for w in window_list [ 1 : ] :
mat = matrices [ w ]
# Orthogonal Procrustes: find rotation R that best aligns mat to ref
R , _ = orthogonal_procrustes ( mat , ref_matrix )
aligned_matrices [ w ] = mat @ R
weight_vectors [ w ] = weights
# Compute per-component centroids and cosine similarity
# Compute pairwise stability of weight vectors per component
# Use both cosine similarity and Jaccard of top-K dimensions
top_k = 100 # Compare top 100 dimensions by absolute weight
stability_matrix = np . zeros ( ( len ( window_list ) , len ( window_list ) , n_components ) )
for i , w1 in enumerate ( window_list ) :
@ -229,15 +218,35 @@ def compute_axis_stability(
stability_matrix [ i , j ] = 1.0
continue
for comp in range ( n_components ) :
a = aligned_matrices [ w1 ] [ : , comp ]
b = aligned_matrices [ w2 ] [ : , comp ]
for comp in range ( 1 , n_components + 1 ) :
if comp not in weight_vectors [ w1 ] or comp not in weight_vectors [ w2 ] :
stability_matrix [ i , j , comp - 1 ] = 0.0
continue
a = weight_vectors [ w1 ] [ comp ]
b = weight_vectors [ w2 ] [ comp ]
# Align dimensions
dim = min ( len ( a ) , len ( b ) )
a = a [ : dim ]
b = b [ : dim ]
# Method 1: Cosine similarity of full weight vectors
norm_a = np . linalg . norm ( a )
norm_b = np . linalg . norm ( b )
if norm_a == 0 or norm_b == 0 :
stability_matrix [ i , j , comp ] = 0.0
cosine_sim = 0.0
else :
stability_matrix [ i , j , comp ] = np . dot ( a , b ) / ( norm_a * norm_b )
cosine_sim = np . dot ( a , b ) / ( norm_a * norm_b )
# Method 2: Jaccard similarity of top-K dimensions
top_a = set ( np . argsort ( np . abs ( a ) ) [ - top_k : ] )
top_b = set ( np . argsort ( np . abs ( b ) ) [ - top_k : ] )
jaccard = (
len ( top_a & top_b ) / len ( top_a | top_b ) if top_a | top_b else 0.0
)
# Use the maximum of both methods (more robust)
stability_matrix [ i , j , comp - 1 ] = max ( cosine_sim , jaccard )
# Average stability across window pairs for each component
n_windows = len ( window_list )
@ -272,6 +281,7 @@ def compute_axis_stability(
" reordered_axes " : reordered_axes ,
" unstable_axes " : unstable_axes ,
" windows " : window_list ,
" weight_vectors " : weight_vectors ,
}
@ -291,6 +301,7 @@ def _compute_stability_fallback(
party_axes : Dict [ str , Dict [ int , float ] ] = { }
for w in windows :
# Get MP vectors with party mapping
try :
rows = con . execute (
"""
SELECT m . party , s . vector
@ -300,6 +311,9 @@ def _compute_stability_fallback(
""" ,
[ w ] ,
) . fetchall ( )
except Exception :
# mp_metadata may not exist in test DBs
continue
party_vectors = { }
for party , raw_vec in rows :
@ -1066,6 +1080,12 @@ def main(argv: Optional[List[str]] = None) -> int:
default = 0.7 ,
help = " Similarity threshold for axis stability (default: 0.7) " ,
)
p . add_argument (
" --regression-alpha " ,
type = float ,
default = 1.0 ,
help = " Ridge regression regularization strength (default: 1.0) " ,
)
args = p . parse_args ( argv )
if not os . path . exists ( args . db ) :
@ -1096,7 +1116,11 @@ def main(argv: Optional[List[str]] = None) -> int:
# Run analysis
logger . info ( " Computing axis stability... " )
stability_result = compute_axis_stability (
con , windows , args . top_n , stability_threshold = args . stability_threshold
con ,
windows ,
args . top_n ,
stability_threshold = args . stability_threshold ,
regression_alpha = args . regression_alpha ,
)
logger . info ( " Stable axes: %s " , stability_result [ " stable_axes " ] )