diff --git a/scripts/motion_drift.py b/scripts/motion_drift.py index 394756c..51bd763 100644 --- a/scripts/motion_drift.py +++ b/scripts/motion_drift.py @@ -157,7 +157,7 @@ def compute_axis_stability( Returns dict with stability_matrix, stable_axes, reordered_axes, unstable_axes, and weight_vectors for downstream interpretation. """ - from sklearn.linear_model import Ridge + from sklearn.linear_model import Lasso from sklearn.preprocessing import StandardScaler # Load data per window @@ -169,16 +169,12 @@ def compute_axis_stability( if not motion_scores or not fused: continue - # Build feature matrix and targets - # Use motions that have both SVD scores and fused embeddings common = [m for m in motion_scores if m in fused] if len(common) < 50: continue - # Feature matrix: fused embeddings (align dimensions) dim = min(len(fused[m]) for m in common) X = np.array([fused[m][:dim] for m in common]) - # Target matrix: SVD scores (n_common × n_components) Y = np.array([motion_scores[m][:n_components] for m in common]) window_data[w] = (X, Y) @@ -188,20 +184,21 @@ def compute_axis_stability( con, windows, n_components, stability_threshold ) - # Fit Ridge regression per axis per window + # Fit Lasso regression per axis per window + # Lasso (L1) produces sparse weight vectors, concentrating on the + # most important embedding dimensions for each axis weight_vectors: Dict[str, Dict[int, np.ndarray]] = {} window_list = sorted(window_data.keys()) for w in window_list: X, Y = window_data[w] - # Normalize features scaler = StandardScaler() X_scaled = scaler.fit_transform(X) weights = {} for comp_idx in range(n_components): y = Y[:, comp_idx] - model = Ridge(alpha=regression_alpha) + model = Lasso(alpha=regression_alpha, max_iter=5000) model.fit(X_scaled, y) weights[comp_idx + 1] = model.coef_ @@ -754,8 +751,10 @@ def compute_semantic_drift( if not valid_motions: continue - # Compute centroid - vectors = np.array([fused[m] for m in valid_motions]) + # Compute centroid (align dimensions) + vectors = [fused[m] for m in valid_motions] + dim = min(len(v) for v in vectors) + vectors = np.array([v[:dim] for v in vectors]) centroid = np.mean(vectors, axis=0) centroids.append(centroid) window_centroids[w] = { @@ -770,6 +769,10 @@ def compute_semantic_drift( drift_values = [] for i in range(len(centroids) - 1): a, b = centroids[i], centroids[i + 1] + # Align dimensions + dim = min(len(a), len(b)) + a = a[:dim] + b = b[:dim] norm_a = np.linalg.norm(a) norm_b = np.linalg.norm(b) if norm_a == 0 or norm_b == 0: @@ -1292,8 +1295,8 @@ def main(argv: Optional[List[str]] = None) -> int: p.add_argument( "--regression-alpha", type=float, - default=1.0, - help="Ridge regression regularization strength (default: 1.0)", + default=0.1, + help="Lasso regression regularization strength (default: 0.1)", ) args = p.parse_args(argv)