|
|
|
|
@ -134,6 +134,7 @@ def compute_2d_axes( |
|
|
|
|
method: str = "pca", |
|
|
|
|
anchor_kwargs: Optional[Dict] = None, |
|
|
|
|
normalize_vectors: bool = True, |
|
|
|
|
pca_residual: bool = False, |
|
|
|
|
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict[str, np.ndarray]]: |
|
|
|
|
"""Compute 2D coordinates for MPs per window. |
|
|
|
|
|
|
|
|
|
@ -192,19 +193,59 @@ def compute_2d_axes( |
|
|
|
|
# centre globally |
|
|
|
|
Mc = M - M.mean(axis=0) |
|
|
|
|
try: |
|
|
|
|
_, _, Vt = np.linalg.svd(Mc, full_matrices=False) |
|
|
|
|
U, s, Vt = np.linalg.svd(Mc, full_matrices=False) |
|
|
|
|
except np.linalg.LinAlgError: |
|
|
|
|
_logger.exception("SVD failed in compute_2d_axes (pca)") |
|
|
|
|
return ({}, {}) |
|
|
|
|
# take top-2 components as axes (shape k,) |
|
|
|
|
|
|
|
|
|
# explained variance ratio from singular values |
|
|
|
|
sv2 = s**2 |
|
|
|
|
evr = sv2 / (sv2.sum() + 1e-20) |
|
|
|
|
evr1 = float(evr[0]) if evr.size > 0 else 0.0 |
|
|
|
|
evr2 = float(evr[1]) if evr.size > 1 else 0.0 |
|
|
|
|
|
|
|
|
|
# take top-1 component as primary axis |
|
|
|
|
comp1 = Vt[0] |
|
|
|
|
comp1_hat = comp1 / (np.linalg.norm(comp1) + 1e-12) |
|
|
|
|
|
|
|
|
|
# By default take the second Vt row as the second axis, but optionally |
|
|
|
|
# compute the second axis from residuals (PCA on data with PC1 removed) |
|
|
|
|
comp2 = Vt[1] if Vt.shape[0] > 1 else np.zeros_like(comp1) |
|
|
|
|
|
|
|
|
|
# compute residual-based second component if requested or if PC1 dominates |
|
|
|
|
if pca_residual or evr1 > 0.85: |
|
|
|
|
# Project out PC1 from centred data and compute PCA on residuals |
|
|
|
|
proj1 = (Mc.dot(comp1_hat)).reshape(-1, 1) * comp1_hat.reshape(1, -1) |
|
|
|
|
residual = Mc - proj1 |
|
|
|
|
try: |
|
|
|
|
_, s_res, Vt_res = np.linalg.svd(residual, full_matrices=False) |
|
|
|
|
comp2 = Vt_res[0] |
|
|
|
|
_logger.info( |
|
|
|
|
"Using residual PCA for second axis (pca_residual=%s)", pca_residual |
|
|
|
|
) |
|
|
|
|
except Exception: |
|
|
|
|
_logger.exception( |
|
|
|
|
"Residual PCA failed; falling back to raw second component" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
comp2_hat = comp2 / (np.linalg.norm(comp2) + 1e-12) |
|
|
|
|
|
|
|
|
|
axes = { |
|
|
|
|
"x_axis": comp1 / (np.linalg.norm(comp1) + 1e-12), |
|
|
|
|
"y_axis": comp2 / (np.linalg.norm(comp2) + 1e-12), |
|
|
|
|
"x_axis": comp1_hat, |
|
|
|
|
"y_axis": comp2_hat, |
|
|
|
|
"method": "pca", |
|
|
|
|
"explained_variance_ratio": [evr1, evr2], |
|
|
|
|
"pca_residual_used": bool(pca_residual or evr1 > 0.85), |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
# warn if PCA is effectively 1-D |
|
|
|
|
if evr1 > 0.85 and not pca_residual: |
|
|
|
|
_logger.warning( |
|
|
|
|
"PCA first component explains %.1f%% of variance — data is near-1D;\n" |
|
|
|
|
"consider using pca_residual=True or the anchor method for a second axis", |
|
|
|
|
evr1 * 100, |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
# project per-window vectors (centre by global mean) |
|
|
|
|
global_mean = M.mean(axis=0) |
|
|
|
|
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]] = { |
|
|
|
|
|