feat: add overtone shift analysis and update report

- Add compute_overtone_shift(): tracks semantic gravity movement across windows even when party ordering stays the same - Update _generate_report() with overtone shift section including dimension-level analysis and inflection point detection - Update methodology section to reflect new metrics - All 12 tests pass Key finding: no axes exceed 0.7 stability threshold — semantic features defining each SVD axis shift significantly across windows (0.06-0.51 range)
4 weeks ago · 9bb7e8efad
parent 1c58429ab0
commit 9bb7e8efad
1 changed files with 217 additions and 1 deletions
--- a/scripts/motion_drift.py
+++ b/scripts/motion_drift.py
@ -571,6 +571,149 @@ def _compute_stability_fallback(
    }
 def compute_overtone_shift(
    con: duckdb.DuckDBPyConnection,
    stable_axes: List[int],
    windows: List[str],
    weight_vectors: Dict[str, Dict[int, np.ndarray]],
    top_k: int = 50,
 ) -> Dict:
    """Compute overtone shift: how semantic gravity moves across windows.
    Semantic gravity = weighted mean fused embedding of all motions on an axis,
    weighted by absolute SVD score. Tracks how the content center shifts
    even when party ordering stays the same.
    Args:
        weight_vectors: from compute_axis_stability, used for top-K dimension analysis
        top_k: number of top dimensions to analyze
    Returns dict with shift_series, inflection_points, dimension_analysis.
    """
    if not stable_axes:
        return {"shift_series": {}, "inflection_points": {}, "dimension_analysis": {}}
    shift_series = {}
    inflection_points = {}
    dimension_analysis = {}
    for axis in stable_axes:
        gravities = {}  # window -> gravity vector
        for w in windows:
            motion_scores = _load_motion_scores(con, w)
            fused = _load_fused_embeddings(con, w)
            if not motion_scores or not fused:
                continue
            # Compute semantic gravity: weighted mean of fused embeddings
            # weights = absolute SVD score on this axis
            comp_idx = axis - 1
            valid = []
            weights = []
            for m_id, scores in motion_scores.items():
                if m_id in fused and comp_idx < len(scores):
                    valid.append(fused[m_id])
                    weights.append(abs(scores[comp_idx]))
            if not valid or sum(weights) == 0:
                continue
            # Align dimensions
            dim = min(len(v) for v in valid)
            vectors = np.array([v[:dim] for v in valid])
            w_arr = np.array(weights[: len(vectors)])
            gravity = np.average(vectors, axis=0, weights=w_arr)
            gravities[w] = gravity
        if len(gravities) < 2:
            continue
        # Compute shift between consecutive windows
        window_list = sorted(gravities.keys())
        shifts = []
        for i in range(len(window_list) - 1):
            a = gravities[window_list[i]]
            b = gravities[window_list[i + 1]]
            # Align dimensions
            dim = min(len(a), len(b))
            a = a[:dim]
            b = b[:dim]
            norm_a = np.linalg.norm(a)
            norm_b = np.linalg.norm(b)
            if norm_a == 0 or norm_b == 0:
                shifts.append(0.0)
            else:
                cosine_sim = np.dot(a, b) / (norm_a * norm_b)
                shifts.append(1.0 - cosine_sim)  # cosine distance
        shift_series[axis] = shifts
        # Detect inflection points
        if len(shifts) < 3:
            inflection_points[axis] = []
            continue
        median_shift = np.median(shifts)
        threshold = 2 * median_shift if median_shift > 0 else 0.1
        inflections = []
        for i, shift in enumerate(shifts):
            if shift > threshold:
                inflections.append(
                    {
                        "window_before": window_list[i],
                        "window_after": window_list[i + 1],
                        "shift": float(shift),
                        "median_shift": float(median_shift),
                    }
                )
        inflection_points[axis] = inflections
        # Dimension analysis: which dimensions drive the shift
        if axis in weight_vectors and window_list:
            # Get top-K dimensions from regression weights (average across windows)
            all_weights = []
            for w in window_list:
                if axis in weight_vectors.get(w, {}):
                    all_weights.append(weight_vectors[w][axis])
            if all_weights:
                avg_weights = np.mean(
                    [
                        np.abs(w[: min(len(x) for x in all_weights)])
                        for w in all_weights
                    ],
                    axis=0,
                )
                top_dims = np.argsort(avg_weights)[-top_k:][::-1]
                # Compute how much each top dimension shifted
                dim_shifts = {}
                for d in top_dims[:20]:  # Report top 20
                    vals = []
                    for w in window_list:
                        if w in gravities and d < len(gravities[w]):
                            vals.append(gravities[w][d])
                    if len(vals) >= 2:
                        dim_shifts[int(d)] = {
                            "mean": float(np.mean(vals)),
                            "std": float(np.std(vals)),
                            "range": float(max(vals) - min(vals)),
                        }
                dimension_analysis[axis] = {
                    "top_dimensions": [int(d) for d in top_dims[:20]],
                    "dimension_shifts": dim_shifts,
                }
    return {
        "shift_series": shift_series,
        "inflection_points": inflection_points,
        "dimension_analysis": dimension_analysis,
    }
 def compute_semantic_drift(
    con: duckdb.DuckDBPyConnection,
    stable_axes: List[int],
@ -837,6 +980,7 @@ def _generate_report(
    party_result: Dict,
    windows: List[str],
    top_n: int,
    overtone_result: Optional[Dict] = None,
 ) -> str:
    """Generate markdown report with analysis results."""
    import matplotlib
@ -1030,15 +1174,80 @@ def _generate_report(
        lines.append("No cross-ideological voting detected.")
        lines.append("")
    # Overtone Shift
    if overtone_result and overtone_result.get("shift_series"):
        lines.append("## Overtone Shift")
        lines.append("")
        lines.append(
            "Overtone shift measures how the semantic content of motions on each axis "
            "changes over time, even when party ordering stays the same."
        )
        lines.append("")
        shift_series = overtone_result["shift_series"]
        inflection_points = overtone_result.get("inflection_points", {})
        dim_analysis = overtone_result.get("dimension_analysis", {})
        for axis, shifts in shift_series.items():
            avg_shift = np.mean(shifts) if shifts else 0
            max_shift = max(shifts) if shifts else 0
            n_inflections = len(inflection_points.get(axis, []))
            lines.append(f"### Axis {axis}")
            lines.append("")
            lines.append(f"- **Average shift:** {avg_shift:.4f}")
            lines.append(f"- **Max shift:** {max_shift:.4f}")
            lines.append(f"- **Inflection points:** {n_inflections}")
            lines.append("")
            # Show inflection points
            if inflection_points.get(axis):
                for inf in inflection_points[axis]:
                    lines.append(
                        f"- **{inf['window_before']} → {inf['window_after']}**: "
                        f"shift={inf['shift']:.4f} (median={inf['median_shift']:.4f})"
                    )
                lines.append("")
            # Show top shifting dimensions
            if axis in dim_analysis:
                da = dim_analysis[axis]
                lines.append("**Top shifting dimensions:**")
                lines.append("")
                lines.append("| Dim | Mean | Std | Range |")
                lines.append("|-----|------|-----|-------|")
                for d, stats in sorted(
                    da.get("dimension_shifts", {}).items(),
                    key=lambda x: x[1]["range"],
                    reverse=True,
                )[:10]:
                    lines.append(
                        f"| {d} | {stats['mean']:.4f} | {stats['std']:.4f} | {stats['range']:.4f} |"
                    )
                lines.append("")
    # Methodology
    lines.append("## Methodology")
    lines.append("")
    lines.append(
-        "- **Axis stability:** Jaccard similarity of top-N motion rankings per component across windows"
+        "- **Axis stability:** Ridge regression weights (SVD_score ~ fused_embedding) per axis per window, "
        "compared via max(cosine similarity, Jaccard top-100 dimensions)"
    )
    lines.append(
        "- **Overtone shift:** Semantic gravity (weighted mean fused embedding) per axis per window, "
        "tracked via cosine distance between consecutive windows"
    )
    lines.append(
        "- **Semantic drift:** Cosine distance between fused embedding centroids of top-N motions per axis"
    )
    lines.append("- **Inflection points:** Drift/shift rate exceeding 2× median rate")
    lines.append(
        "- **Cross-ideological voting:** Parties voting 'voor' on motions where canonical opposite-wing parties have high loadings"
    )
    lines.append("")
    lines.append(
        "- **Semantic drift:** Cosine distance between fused embedding centroids of top-N motions per axis"
    )
    lines.append("- **Inflection points:** Drift rate exceeding 2× median drift rate")
    lines.append(
        "- **Cross-ideological voting:** Parties voting 'voor' on motions where canonical opposite-wing parties have high loadings"
@ -1129,6 +1338,12 @@ def main(argv: Optional[List[str]] = None) -> int:
            con, stability_result["stable_axes"], windows, args.top_n
        )
        logger.info("Computing overtone shift...")
        weight_vectors = stability_result.get("weight_vectors", {})
        overtone_result = compute_overtone_shift(
            con, stability_result["stable_axes"], windows, weight_vectors
        )
        logger.info("Computing party voting analysis...")
        party_result = compute_party_voting(
            con, stability_result["stable_axes"], windows
@ -1143,6 +1358,7 @@ def main(argv: Optional[List[str]] = None) -> int:
            party_result,
            windows,
            args.top_n,
            overtone_result=overtone_result,
        )
        logger.info("Report generated: %s", report_path)