From 2c46e21acc615be8cc180b71ecfce959ecfb6530 Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Sun, 5 Apr 2026 19:44:08 +0200 Subject: [PATCH] feat: add semantic gravity examples script and Axis 1 shift analysis - Add script to find motions closest to semantic gravity per axis/window - Document Axis 1 semantic shift: from administrative law (2016) to migration/asylum policy (2026) - Shows that 'coalition' votes on different topics over time --- ...026-04-05-axis1-semantic-shift-examples.md | 68 +++++ scripts/semantic_gravity_examples.py | 286 ++++++++++++++++++ 2 files changed, 354 insertions(+) create mode 100644 docs/research/2026-04-05-axis1-semantic-shift-examples.md create mode 100644 scripts/semantic_gravity_examples.py diff --git a/docs/research/2026-04-05-axis1-semantic-shift-examples.md b/docs/research/2026-04-05-axis1-semantic-shift-examples.md new file mode 100644 index 0000000..dfdc346 --- /dev/null +++ b/docs/research/2026-04-05-axis1-semantic-shift-examples.md @@ -0,0 +1,68 @@ +# Semantic Content Shift: Axis 1 Over Time + +## What Changed: "Coalition vs Opposition" Axis Content + +| Year | Positive Pole (Coalition) | Negative Pole (Opposition) | Key Theme | +|------|-------------------------|---------------------------|-----------| +| **2016** | Tax law changes, international treaties | — | Administrative law | +| **2018** | Budget modifications, infrastructure, social affairs | — | Government spending | +| **2019** | Working conditions, monitoring issues | — | Administrative oversight | +| **2022** | Local government info, digital accounts | Digital governance, privacy | Digital transformation | +| **2023** | Welfare policy, parental support | Social services | Social policy | +| **2024** | Nuclear weapons, housing, Israel boycott | — | Foreign policy / Justice | +| **2025** | EU sanctions on Israel, asylum policies | — | Migration / Foreign affairs | +| **2026** | **Asylum stops**, Syrian permit revocations, Ukraine returns | IND backlog | **Migration dominates** | + +## Key Observations + +### 1. The "Coalition" Side Evolved Significantly + +| Period | Coalition Motions Focused On | +|--------|---------------------------| +| 2016-2019 | Administrative law, tax, budgets, infrastructure | +| 2022-2023 | Digital governance, welfare, social services | +| 2024-2025 | Foreign policy (Israel sanctions), migration | +| **2026** | **Asylum restriction**, Syria, Ukraine returns | + +### 2. Axis 1 Became Migration-Centric by 2026 + +In 2026, the **extreme positive motions** are ALL about asylum/migration: +- "Motie van het lid Vondeling over een totale asielstop" (total asylum stop) +- "Motie van het lid Vondeling over alle tijdelijke asielvergunningen van Syriërs intrekken" (revoke Syrian permits) +- "Motie van het lid Vondeling over een actief terugkeerbeleid voor alle Oekraïners" (active return policy for Ukrainians) + +This suggests the coalition/opposition dynamic in 2026 is increasingly defined by **migration policy** rather than the traditional left-right economic divide. + +### 3. The "Typical" Motion Changed + +Semantic gravity represents the "typical" motion on the axis. Its content shifted: + +| Year | Typical Motion Theme | +|------|---------------------| +| 2016 | Tax law, health law, financial administration | +| 2019 | Bureaucracy reduction, Kamer control, administrative burden | +| 2023 | Student finance, volunteer work, housing | +| 2024 | Fossil fuel phase-out, whistleblower protection, youth care | +| 2026 | Asylum, IND backlog, Ukraine, social grievances | + +## Implications + +1. **Axis label is temporally bounded**: "Rechts kabinetsbeleid versus links oppositiebeleid" works for 2016-2026 as a whole, but in 2026 it's increasingly about migration policy. + +2. **Party voting structure is stable** (0.83 stability), but **what parties vote on** has shifted from economics to migration. + +3. **Axis 6 (Migration/Culture)** low stability (0.35) may now be overlapping with Axis 1 — migration has become a coalition-defining issue. + +## Example: Concrete Before/After + +**2016 - "Coalition" side:** +> "Wijziging van enkele belastingwetten en enige andere wetten (Fiscale vereenvoudigingswet 2017)" + +**2026 - "Coalition" side:** +> "Motie van het lid Vondeling over een totale asielstop" + +Same axis (coalition votes FOR), but semantically completely different topics. + +--- + +*Generated by `scripts/semantic_gravity_examples.py`* diff --git a/scripts/semantic_gravity_examples.py b/scripts/semantic_gravity_examples.py new file mode 100644 index 0000000..b9d7a2b --- /dev/null +++ b/scripts/semantic_gravity_examples.py @@ -0,0 +1,286 @@ +"""semantic_gravity_examples.py — Show concrete motion examples for SVD axes across windows. + +For each axis and window, finds motions closest to the semantic gravity vector, +providing concrete examples of what the axis "means" in that period. + +Usage: + uv run python scripts/semantic_gravity_examples.py --db data/motions.db --axis 1 + uv run python scripts/semantic_gravity_examples.py --db data/motions.db --all +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +from typing import Dict, List, Tuple + +import duckdb +import numpy as np + + +def _load_fused_embeddings_with_titles( + con: duckdb.DuckDBPyConnection, window_id: str +) -> List[Tuple[int, np.ndarray, str]]: + """Load fused embeddings with motion titles for a window.""" + rows = con.execute( + """ + SELECT f.motion_id, f.vector, m.title + FROM fused_embeddings f + JOIN motions m ON f.motion_id = m.id + WHERE f.window_id = ? + """, + [window_id], + ).fetchall() + + result = [] + for motion_id, raw_vec, title in rows: + if isinstance(raw_vec, str): + vec = json.loads(raw_vec) + elif isinstance(raw_vec, (bytes, bytearray)): + vec = json.loads(raw_vec.decode()) + elif isinstance(raw_vec, list): + vec = raw_vec + else: + vec = list(raw_vec) + result.append( + ( + motion_id, + np.array([float(v) if v is not None else 0.0 for v in vec]), + title or "", + ) + ) + return result + + +def _load_motion_scores( + con: duckdb.DuckDBPyConnection, window_id: str +) -> Dict[int, np.ndarray]: + """Load SVD scores for a window. Returns {motion_id: score_array}.""" + rows = con.execute( + "SELECT entity_id, vector FROM svd_vectors WHERE window_id = ? AND entity_type = 'motion'", + [window_id], + ).fetchall() + + result = {} + for entity_id, raw_vec in rows: + if isinstance(raw_vec, str): + vec = json.loads(raw_vec) + elif isinstance(raw_vec, (bytes, bytearray)): + vec = json.loads(raw_vec.decode()) + elif isinstance(raw_vec, list): + vec = raw_vec + else: + vec = list(raw_vec) + result[int(entity_id)] = np.array( + [float(v) if v is not None else 0.0 for v in vec] + ) + return result + + +def compute_semantic_gravity_examples( + con: duckdb.DuckDBPyConnection, + windows: List[str], + axis: int, + n_examples: int = 5, + n_components: int = 10, +) -> Dict: + """Find motions closest to semantic gravity for an axis across windows.""" + comp_idx = axis - 1 + results = {} + + for w in windows: + # Load data + motion_scores = _load_motion_scores(con, w) + embeddings_data = _load_fused_embeddings_with_titles(con, w) + + if not motion_scores or not embeddings_data: + continue + + # Build motion_id -> embedding mapping + embeddings_by_id = {mid: (vec, title) for mid, vec, title in embeddings_data} + + # Find common motions + common = [m for m in motion_scores if m in embeddings_by_id] + if len(common) < 10: + continue + + # Compute semantic gravity (weighted mean by absolute SVD score on this axis) + valid_embeddings = [] + weights = [] + for m_id in common: + scores = motion_scores[m_id] + if comp_idx < len(scores): + valid_embeddings.append(embeddings_by_id[m_id][0]) + weights.append(abs(scores[comp_idx])) + + if not valid_embeddings or sum(weights) == 0: + continue + + # Align dimensions + dim = min(len(v) for v in valid_embeddings) + vectors = np.array([v[:dim] for v in valid_embeddings]) + weights = np.array(weights[: len(vectors)]) + gravity = np.average(vectors, axis=0, weights=weights) + + # Find motions closest to gravity (highest cosine similarity) + similarities = [] + for m_id in common: + vec, title = embeddings_by_id[m_id] + vec = vec[:dim] + norm_g = np.linalg.norm(gravity) + norm_v = np.linalg.norm(vec) + if norm_g > 0 and norm_v > 0: + sim = np.dot(gravity, vec) / (norm_g * norm_v) + similarities.append((sim, m_id, title)) + + # Sort by similarity and get top examples + similarities.sort(reverse=True) + top_positive = [s for s in similarities if s[0] > 0][:n_examples] + top_negative = [s for s in similarities if s[0] < 0][-n_examples:][::-1] + + # Get extreme motions (highest absolute loading on this axis) + extreme = sorted( + common, key=lambda m: abs(motion_scores[m][comp_idx]), reverse=True + )[:n_examples] + extreme_motions = [] + for m_id in extreme: + score = motion_scores[m_id][comp_idx] + title = embeddings_by_id.get(m_id, (None, ""))[1] + extreme_motions.append((score, m_id, title)) + + results[w] = { + "gravity": gravity, + "top_similar": top_positive, + "top_dissimilar": top_negative, + "extreme": extreme_motions, + } + + return results + + +def _get_annual_windows(con: duckdb.DuckDBPyConnection) -> List[str]: + """Get list of annual windows that have fused embeddings, sorted by year.""" + rows = con.execute( + """ + SELECT DISTINCT f.window_id + FROM fused_embeddings f + JOIN svd_vectors s ON f.window_id = s.window_id AND s.entity_type = 'motion' + WHERE f.window_id NOT LIKE '%-Q%' + ORDER BY f.window_id + """ + ).fetchall() + return [r[0] for r in rows] + + +def format_results(results: Dict, axis: int) -> str: + """Format results as markdown.""" + lines = [ + f"# Semantic Gravity Examples for Axis {axis}", + "", + f"Shows motions closest to semantic gravity (weighted mean embedding) for each window.", + "This represents the 'typical' motion on this axis.", + "", + "---", + "", + ] + + for window in sorted(results.keys()): + data = results[window] + gravity = data["gravity"] + + lines.append(f"## {window}") + lines.append("") + + # Positive-pole extreme motions + lines.append("### Extreme Positive Motions (high positive loading)") + for score, m_id, title in data["extreme"]: + if score > 0: + lines.append( + f"- **[{score:+.3f}]** {title[:100]}{'...' if len(title) > 100 else ''}" + ) + lines.append("") + + # Negative-pole extreme motions + lines.append("### Extreme Negative Motions (high negative loading)") + for score, m_id, title in data["extreme"]: + if score < 0: + lines.append( + f"- **[{score:+.3f}]** {title[:100]}{'...' if len(title) > 100 else ''}" + ) + lines.append("") + + # Motions closest to semantic gravity + lines.append("### Most Representative Motions (closest to semantic gravity)") + for sim, m_id, title in data["top_similar"]: + lines.append( + f"- **[{sim:.3f}]** {title[:100]}{'...' if len(title) > 100 else ''}" + ) + lines.append("") + + return "\n".join(lines) + + +def main(argv: List[str] | None = None) -> int: + p = argparse.ArgumentParser( + description="Find semantic gravity examples for SVD axes" + ) + p.add_argument("--db", default="data/motions.db", help="Path to motions database") + p.add_argument("--axis", type=int, default=1, help="SVD axis to analyze (1-10)") + p.add_argument( + "--windows", nargs="+", help="Specific windows (default: all annual windows)" + ) + p.add_argument( + "--n-examples", + type=int, + default=5, + help="Number of example motions per category", + ) + p.add_argument("--output", help="Output file (default: print to stdout)") + + args = p.parse_args(argv) + + if not os.path.exists(args.db): + print(f"Error: Database not found: {args.db}", file=sys.stderr) + return 1 + + con = duckdb.connect(database=args.db, read_only=True) + try: + # Determine windows + if args.windows: + windows = args.windows + else: + windows = _get_annual_windows(con) + print(f"Found {len(windows)} annual windows: {windows}", file=sys.stderr) + + if len(windows) < 2: + print("Need at least 2 windows for analysis", file=sys.stderr) + return 1 + + # Run analysis + print( + f"Computing semantic gravity examples for Axis {args.axis}...", + file=sys.stderr, + ) + results = compute_semantic_gravity_examples( + con, windows, args.axis, args.n_examples + ) + + # Format output + output = format_results(results, args.axis) + + if args.output: + with open(args.output, "w") as f: + f.write(output) + print(f"Results written to {args.output}", file=sys.stderr) + else: + print(output) + + return 0 + finally: + con.close() + + +if __name__ == "__main__": + raise SystemExit(main())