240 changed files with 18109 additions and 10304 deletions
--- a/.compound-engineering/config.local.example.yaml
+++ b/.compound-engineering/config.local.example.yaml
@ -1,12 +0,0 @@
-# Compound Engineering -- local config
-# Copy to .compound-engineering/config.local.yaml in your project root.
-# All settings are optional. Invalid values fall through to defaults.
-
-# --- Work delegation (Codex) ---
-
-# work_delegate: codex           # codex | false (default: false)
-# work_delegate_consent: true    # true | false (default: false)
-# work_delegate_sandbox: yolo    # yolo | full-auto (default: yolo)
-# work_delegate_decision: auto   # auto | ask (default: auto)
-# work_delegate_model: gpt-5.4  # any valid codex model (default: gpt-5.4)
-# work_delegate_effort: high     # minimal | low | medium | high | xhigh (default: high)
--- a/.github/workflows/ci-node-packages.yml
+++ b/.github/workflows/ci-node-packages.yml
@ -0,0 +1,52 @@
+name: CI — Node packages
+
+on:
+  push:
+    paths:
+      - 'packages/**'
+  pull_request:
+    paths:
+      - 'packages/**'
+
+jobs:
+  test-packages:
+    name: Test packages/*
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '18'
+
+      - name: Run tests for each package
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          # Find all package directories under packages/ that contain a package.json
+          packages=(packages/*)
+          found=0
+
+          for p in "${packages[@]}"; do
+            if [ -d "$p" ] && [ -f "$p/package.json" ]; then
+              found=1
+              echo "\n===== Package: $p ====="
+
+              echo "-> Installing dependencies in $p"
+              (cd "$p" && npm ci) || (cd "$p" && npm install)
+
+              echo "-> Running tests in $p"
+              (cd "$p" && npm test)
+
+              echo "-> Running pack-inspect in $p"
+              (cd "$p" && npm run pack-inspect)
+            fi
+          done
+
+          if [ "$found" -eq 0 ]; then
+            echo "No packages with package.json found under packages/"
+          fi
--- a/.github/workflows/mindmodel-schedule.yml
+++ b/.github/workflows/mindmodel-schedule.yml
@ -0,0 +1,35 @@
+name: mindmodel scheduled validate
+
+on:
+  schedule:
+    - cron: '0 0 * * 0' # weekly
+
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt || true
+
+      - name: Run tests
+        run: |
+          python -m pytest -q
+
+      - name: Run mindmodel validator if manifest exists
+        if: ${{ always() }}
+        run: |
+          if [ -f .mindmodel/manifest.yaml ]; then
+            python -m scripts.mindmodel.cli || true
+          else
+            echo "No .mindmodel/manifest.yaml present — skipping validator"
+          fi
--- a/.github/workflows/mindmodel-validation.yml
+++ b/.github/workflows/mindmodel-validation.yml
@ -0,0 +1,47 @@
+name: mindmodel validation
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.x'
+
+      - name: Install development dependencies (if present)
+        run: |
+          python -m pip install --upgrade pip
+          if [ -f requirements-dev.txt ]; then
+            pip install -r requirements-dev.txt
+          else
+            echo "requirements-dev.txt not found, skipping"
+          fi
+
+      - name: Run mindmodel validator (report-only)
+        if: ${{ always() }}
+        run: |
+          # Make this step report-only: run the validator but always exit 0 so PRs are not blocked
+          set +e
+          if [ -f .mindmodel/manifest.yaml ]; then
+            python scripts/validate_mindmodel.py --manifest .mindmodel/manifest.yaml --report reports/out.json || true
+          else
+            echo "No .mindmodel/manifest.yaml present — skipping validator"
+          fi
+          exit 0
+
+      - name: Upload mindmodel reports
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: mindmodel-reports
+          path: reports/mindmodel-report-*.json
--- a/.github/workflows/publish-ansible-example.yml
+++ b/.github/workflows/publish-ansible-example.yml
@ -0,0 +1,77 @@
+name: Publish Ansible Example
+
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch: {}
+
+jobs:
+  verify:
+    name: Verify package
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js 18
+        uses: actions/setup-node@v4
+        with:
+          node-version: '18'
+
+      - name: Install dependencies (packages/@ansible/example)
+        working-directory: packages/@ansible/example
+        run: |
+          # prefer CI install when a lockfile exists, otherwise fall back to install
+          if [ -f package-lock.json ] || [ -f pnpm-lock.yaml ] || [ -f yarn.lock ]; then
+            npm ci
+          else
+            npm install
+          fi
+
+      - name: Run tests
+        working-directory: packages/@ansible/example
+        run: npm test
+
+      - name: Run pack-inspect
+        working-directory: packages/@ansible/example
+        run: npm run pack-inspect
+
+  publish:
+    name: Publish to npm
+    runs-on: ubuntu-latest
+    needs: verify
+    if: ${{ ((github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')) || (github.event_name == 'workflow_dispatch')) && (secrets.NPM_TOKEN != '') }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js 18
+        uses: actions/setup-node@v4
+        with:
+          node-version: '18'
+
+      - name: Create ephemeral .npmrc with token
+        run: |
+          set -euo pipefail
+          # write token to a temporary npmrc with restricted permissions (0600)
+          printf "//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\n" > ~/.npmrc
+          chmod 600 ~/.npmrc
+
+      - name: Publish package
+        working-directory: packages/@ansible/example
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+        run: |
+          set -euo pipefail
+          # publish publicly; rely on npmrc for auth
+          npm publish --access public
+
+      - name: Remove ephemeral .npmrc (always)
+        if: always()
+        run: |
+          set -euo pipefail
+          # attempt secure removal, fall back to plain removal
+          if [ -f ~/.npmrc ]; then
+            shred -u -z ~/.npmrc 2>/dev/null || rm -f ~/.npmrc || true
+          fi
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@ -1,53 +0,0 @@
-name: Pytest
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          version: "0.6.x"
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.13"
-
-      - name: Install dependencies
-        run: uv sync --locked
-
-      - name: Run tests
-        run: uv run pytest tests/ -q
-
-  typecheck:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          version: "0.6.x"
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.13"
-
-      - name: Install dependencies
-        run: uv sync --locked
-
-      - name: Run pyright
-        continue-on-error: true
-        run: uv run pyright
--- a/.gitignore
+++ b/.gitignore
@ -17,7 +17,6 @@ data/*.json
 # Generated output files
 outputs/
 outputs_*/
-reports/

 # Stray temp files
 dummy
@ -30,6 +29,4 @@ dummy
 # Generated analysis files
 thoughts/explorer/*.json
 thoughts/explorer/*_report.md
-
-# Compound Engineering local config
-.compound-engineering/*.local.yaml
+thoughts/shared/analyses/
--- a/.mindmodel/README.md
+++ b/.mindmodel/README.md
@ -0,0 +1,11 @@
+# .mindmodel
+
+This directory contains a generated, read-only snapshot of the repository's "mind model" — structured metadata and evidence used by tooling to reason about repository intent, patterns, and decisions.
+
+Guidelines
+- Read-only: Treat files in this directory as generated artifacts. Local tooling or CI may regenerate or validate them; avoid manual edits unless you are intentionally updating the generator.
+- No secrets: Do not place any credentials, tokens, or sensitive data here. The validator that consumes this folder is designed to detect common secret patterns and will fail if secrets are found.
+- Safe to read: Tools and CI may read these files. They must avoid opening or parsing arbitrary repository secrets and should operate in read-only mode.
+- Validation: CI workflows will run a validator against this folder (if present) to ensure manifest shape, evidence snippets, and referenced files meet project rules.
+
+If you need to propose a change to the mind model, open a PR describing the intent and the generator changes. The CI validator will validate the submitted artifact before merge.
--- a/.mindmodel/anti-patterns/anti-patterns.md
+++ b/.mindmodel/anti-patterns/anti-patterns.md
@ -0,0 +1,127 @@
+---
+title: Anti-Patterns in Stemwijzer
+category: anti-patterns
+severity: critical
+---
+
+# Anti-Patterns
+
+> **NOTE**: Some anti-patterns below were investigated and found to be resolved or invalid. See individual entries for details.
+
+## CRITICAL: print() Instead of Logging
+
+**File**: `api_client.py`
+**Evidence**: 11 instances of `print(f"...")` instead of `_logger.info(...)`
+
+**Broken code**:
+```python
+def get_motions(self, ...):
+    try:
+        # ...
+        print(f"Fetched {len(voting_records)} voting records from API")  # BAD
+        print(f"Processed into {len(motions)} unique motions")  # BAD
+    except Exception as e:
+        print(f"Error fetching motions from API: {e}")  # BAD - no traceback
+```
+
+**Fix**:
+```python
+import logging
+
+_logger = logging.getLogger(__name__)
+
+def get_motions(self, ...):
+    try:
+        _logger.info("Fetched %d voting records from API", len(voting_records))
+        _logger.info("Processed into %d unique motions", len(motions))
+    except Exception as e:
+        _logger.exception("Error fetching motions from API: %s", e)
+        return []
+```
+
+---
+
+## CRITICAL: Global `_DummySt` Replacement
+
+**File**: `explorer.py`
+**Evidence**: Lines ~50-70, module-level `st = _DummySt()` global replacement
+
+**Problem**: Creates a module-level variable `st` that shadows `streamlit` module, causing subtle bugs.
+
+**Fix**: Use conditional flags instead of global replacement:
+```python
+# GOOD: Use conditional logic
+try:
+    import plotly.express as px
+    import plotly.graph_objects as go
+    HAS_PLOTLY = True
+except ImportError:
+    HAS_PLOTLY = False
+    px = None
+    go = None
+
+def render_chart(data):
+    if not HAS_PLOTLY:
+        _logger.warning("Plotly not available")
+        return
+    # ... rest of chart logic
+```
+
+---
+
+## WARNING: Logger Naming Inconsistency
+
+**Evidence**: 16 files use `logger`, 17 files use `_logger`
+
+**Files with `logger`** (without underscore):
+- api_client.py, ai_provider.py, pipeline files, analysis files
+
+**Files with `_logger`** (with underscore):
+- database.py, explorer.py, explorer_helpers.py
+
+**Recommendation**: Standardize on `_logger` for module-level loggers.
+
+---
+
+## WARNING: Bare except with pass
+
+**File**: `database.py`, line 47
+
+```python
+# BAD - catches KeyboardInterrupt, SystemExit, MemoryError
+try:
+    conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
+except:  # bare except
+    pass
+```
+
+**Fix**:
+```python
+try:
+    conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
+except Exception as exc:
+    _logger.debug("Sequence creation skipped: %s", exc)
+```
+
+---
+
+## INVESTIGATED: Entity-ID / Party-Name Mismatch
+
+**Status**: INVALID - investigated and resolved
+
+**Investigation Summary**: `svd_vectors.entity_id` only contains MP names (not party names). Party centroids are correctly computed via `mp_metadata` lookups. No production bug exists.
+
+---
+
+## Pattern: Three Separate Party Alias Dictionaries
+
+**Problem**: Party name variations exist in 3+ places with no canonical alias mapping.
+
+**Fix**: Create one `PARTY_ALIASES` dict in `config.py`:
+```python
+PARTY_ALIASES = {
+    "GroenLinks-PvdA": ["GL-PvdA", "GroenLinks PvdA", "PvdA-GroenLinks"],
+    "PVV": ["Partij voor de Vrijheid"],
+    # ...
+}
+```
--- a/.mindmodel/architecture/architecture.yaml
+++ b/.mindmodel/architecture/architecture.yaml
@ -0,0 +1,55 @@
+# Architecture
+
+## Page Routing
+- `Home.py` → thin wrapper, minimal logic
+- `pages/1_🗳️_Stemwijzer.py` → thin wrapper delegating to quiz module
+- `pages/2_🔍_Explorer.py` → thin wrapper delegating to `explorer.py`
+- **Pattern**: thin Streamlit page files that import and call into core modules
+
+## Core Modules
+```
+database.py          → MotionDatabase singleton (shared across all pages)
+explorer.py          → Explorer page logic, tab routing
+explorer_helpers.py  → Pure functions, chart builders, coordinate computation
+analysis/            → SVD, UMAP, clustering algorithms
+pipeline/            → Data ingestion pipeline
+config.py            → Dataclass Config, PARTY_COLOURS dict
+```
+
+## Data Flow
+```
+DuckDB → MotionDatabase (singleton)
+              ↓
+        st.cache_data loaders
+              ↓
+        explorer_helpers (pure functions)
+              ↓
+        Plotly charts → Streamlit
+```
+
+## Key Patterns
+1. **Singleton per module**: `database.py` exports one `db` instance; `config.py` exports config + PARTY_COLOURS
+2. **Graceful degradation**: try/except around optional dependencies (UMAP, Plotly)
+3. **Pipeline**: fetch → transform → store (see `pipeline/` directory)
+4. **API client**: with retry/backoff for external data sources
+5. **Dummy fallbacks**: if optional dep unavailable, use dummy stub
+
+## Database Schema (key relationships)
+```
+motions (id, title, date, category)
+  ↓
+mp_votes (mp_id, motion_id, vote: -1/0/1)
+  ↓
+svd_vectors (entity_id, window, vector_2d)  ← entity_id = mp_name OR party_name
+  ↓
+party_centroids (party, window, centroid_2d)
+  ↓
+mp_party_history (mp_id, party, start_date, end_date)
+```
+
+## SVD Computation Pipeline
+1. Build MP × Motion vote matrix from `mp_votes`
+2. Run SVD to get 2D embeddings per MP
+3. Optionally aggregate to party centroids
+4. Align across windows using Procrustes
+5. Store in `svd_vectors` table
--- a/.mindmodel/constraints/README.md
+++ b/.mindmodel/constraints/README.md
@ -0,0 +1,51 @@
+# Constraint Files Index
+
+This directory contains all constraint files for the Stemwijzer codebase.
+
+## Quick Navigation
+
+| Category | File | Purpose |
+|----------|------|---------|
+| **Stack** | `../stack/stack.yaml` | Tech stack overview |
+| **Architecture** | `../architecture/architecture.yaml` | Data flow, page routing, component relationships |
+| **Conventions** | `../conventions/conventions.yaml` | Naming, error handling, code organization |
+| **Domain** | `../domain/domain-glossary.yaml` | Dutch political terms, algorithm concepts |
+| **Patterns** | `../patterns/patterns.yaml` | 10 code patterns (page wrapper, pipeline, etc.) |
+| **Anti-Patterns** | `../anti-patterns/anti-patterns.yaml` | ⚠️ 7 issues including CRITICAL BUG |
+| **Dependencies** | `../dependencies/dependencies.yaml` | Library wiring, singletons, imports |
+
+## How to Use
+
+1. **Before writing code**: Check `patterns/patterns.yaml` for how similar features are implemented
+2. **When naming things**: Follow `conventions/conventions.yaml` (snake_case functions, PascalCase classes)
+3. **When handling errors**: Avoid patterns in `anti-patterns/anti-patterns.yaml`
+4. **When working with domain terms**: Reference `domain/domain-glossary.yaml`
+5. **When connecting components**: See `dependencies/dependencies.yaml` for wiring
+
+## Key Conventions Summary
+
+- **Files**: snake_case (`explorer_helpers.py`)
+- **Functions**: snake_case (`compute_party_coords`)
+- **Classes**: PascalCase (`MotionDatabase`)
+- **Constants**: UPPER_SNAKE_CASE (`PARTY_COLOURS`)
+- **No bare `except:`** — always specify exception type
+- **Pure functions** in helpers — no IO, no Streamlit calls
+- **One singleton per module** — `db`, `config`, `PARTY_COLOURS`
+
+## ⚠️ Critical Bug
+
+**Read `../anti-patterns/anti-patterns.yaml` first.** Section 1 documents a critical bug in
+`explorer_helpers.py:compute_party_coords` where party names in `svd_vectors` entity_id are
+not recognized because `party_map` only contains MP-name keys.
+
+## Files Generated
+
+- `manifest.yaml` — lists all constraint files with group mappings
+- `stack/stack.yaml` — tech stack
+- `architecture/architecture.yaml` — data flow & components
+- `conventions/conventions.yaml` — coding conventions
+- `domain/domain-glossary.yaml` — domain terminology
+- `patterns/patterns.yaml` — 10 code patterns with examples
+- `anti-patterns/anti-patterns.yaml` — 7 anti-patterns including CRITICAL BUG
+- `dependencies/dependencies.yaml` — library wiring
+- `README.md` — this index
--- a/.mindmodel/constraints/error-handling.md
+++ b/.mindmodel/constraints/error-handling.md
@ -0,0 +1,143 @@
+---
+title: Error Handling Patterns
+category: constraints
+severity: high
+---
+
+# Error Handling Patterns
+
+## Core Rules
+
+1. **Catch `Exception`, return safe fallbacks** (False/[]/None)
+2. **Log exceptions with traceback** using `_logger.exception()`
+3. **Never swallow exceptions silently** - always log or return sensible default
+4. **Avoid nested try/except blocks** - flatten exception handling
+
+## Pattern: Try/Except Safe Fallback
+
+This is the dominant pattern in the codebase (219+ instances).
+
+```python
+# Standard pattern from database.py, api_client.py, etc.
+try:
+    result = risky_operation()
+    return process(result)
+except Exception as exc:
+    _logger.warning("Operation failed: %s", exc)
+    return safe_fallback  # False, [], None, {}
+```
+
+### Examples from Codebase
+
+**database.py** - DuckDB operations:
+```python
+def get_svd_vectors(self, window: str):
+    try:
+        conn = duckdb.connect(self.db_path, read_only=True)
+        try:
+            result = conn.execute(query, (window,)).fetchall()
+            return self._parse_vectors(result)
+        finally:
+            conn.close()
+    except Exception as exc:
+        _logger.warning("Failed to get SVD vectors: %s", exc)
+        return []
+```
+
+**ai_provider.py** - HTTP retries:
+```python
+try:
+    resp = requests.post(url, json=json, headers=headers, timeout=10)
+    resp.raise_for_status()
+    return resp.json()
+except requests.ConnectionError as exc:
+    if attempt == retries:
+        raise ProviderError(f"Connection error: {exc}") from exc
+    # ... retry logic
+```
+
+## Pattern: Optional Dependency Fallback
+
+Gracefully degrade when optional packages are unavailable.
+
+```python
+# UMAP fallback in explorer_helpers.py
+try:
+    import umap
+    HAS_UMAP = True
+except ImportError:
+    HAS_UMAP = False
+    _logger.debug("UMAP not available, using SVD vectors directly")
+
+def project_to_2d(vectors):
+    if HAS_UMAP:
+        return umap.UMAP().fit_transform(vectors)
+    return vectors[:, :2]  # Fallback: first 2 SVD dimensions
+```
+
+## Anti-Patterns
+
+### 1. Bare except with pass (CRITICAL)
+**File**: `database.py`, line 47
+
+```python
+# BAD - catches KeyboardInterrupt, SystemExit, MemoryError
+try:
+    conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
+except:  # bare except
+    pass
+```
+
+**Fix**: Catch specific exception or log and continue:
+```python
+try:
+    conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
+except Exception as exc:
+    _logger.debug("Sequence creation skipped (may already exist): %s", exc)
+```
+
+### 2. Nested Exception Handling
+**File**: `explorer.py`, lines 244-261
+
+```python
+# BAD - opaque error paths
+try:
+    result = compute_svd(motions)
+except Exception:
+    try:
+        result = fallback_compute(motions)
+    except Exception:
+        pass  # Both exceptions silently dropped
+```
+
+**Fix**: Flatten and handle each case explicitly:
+```python
+# GOOD - explicit handling
+try:
+    result = compute_svd(motions)
+except Exception as exc:
+    _logger.warning("SVD failed, trying fallback: %s", exc)
+    try:
+        result = fallback_compute(motions)
+    except Exception as fallback_exc:
+        _logger.error("Both SVD approaches failed: %s, %s", exc, fallback_exc)
+        raise
+```
+
+## Rule Summary
+
+| Pattern | When to Use | Return Value |
+|---------|-------------|--------------|
+| Safe fallback | Best-effort operations | `[]`, `{}`, `False`, `None` |
+| Re-raise | Critical operations that must succeed | raise |
+| Log and continue | Optional steps in pipeline | (continue) |
+| Graceful degradation | Optional dependencies | Default behavior |
+
+## When to Log vs Return
+
+| Scenario | Action |
+|----------|--------|
+| User action fails | Log warning, return safe default |
+| Internal error (corrupt data) | Log error, return safe default |
+| Transient failure (network) | Log warning, retry if appropriate |
+| Configuration error | Log error, raise with clear message |
--- a/.mindmodel/constraints/imports.yaml
+++ b/.mindmodel/constraints/imports.yaml
@ -0,0 +1,205 @@
+# Import Organization Constraints
+
+## Standard Order
+
+Organize imports in three groups with blank lines between:
+
+```python
+# 1. Standard library imports (alphabetical within group)
+import json
+import logging
+import os
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+# 2. Third-party packages (alphabetical within group)
+import duckdb
+import requests
+from config import config
+
+# 3. Local application modules (can use relative imports)
+from database import db
+from summarizer import summarizer
+```
+
+## Alphabetical Ordering
+
+Within each group, sort imports alphabetically:
+
+```python
+# GOOD - alphabetical
+import json
+import logging
+from datetime import datetime
+from typing import Dict, List, Optional
+
+# BAD - random order
+from typing import Optional
+import json
+from datetime import datetime
+import logging
+from typing import Dict, List
+```
+
+## Grouping Rules
+
+### Standard Library
+- `json`, `logging`, `os`, `sys`, `time`
+- `datetime`, `timedelta` from `datetime`
+- `Dict`, `List`, `Optional`, etc. from `typing`
+- `argparse`, `pathlib`, `re`, `uuid`
+
+### Third-Party
+- `duckdb`, `requests`, `streamlit`
+- `numpy`, `scipy`, `sklearn`
+- `plotly`, `beautifulsoup4`
+- `pytest`
+
+### Local Application
+- Modules from same package
+- Relative imports when appropriate
+
+## When to Use `from X import Y`
+
+### Prefer `from module import specific_items` for:
+- Constants and config
+- Single classes or functions used frequently
+- Type annotations
+
+```python
+# GOOD - clear about what we're using
+from config import config
+from database import db
+
+# GOOD - type hints
+from typing import Dict, List, Optional
+```
+
+### Use `import module` when:
+- You need multiple items from the module
+- Using module.namespace is clearer
+
+```python
+# GOOD - duckdb used for types and module access
+import duckdb
+
+conn = duckdb.connect(...)
+result = conn.execute(...)
+
+# Also acceptable for types
+from typing import Dict
+```
+
+## Relative Imports
+
+In package modules, prefer relative imports:
+
+```python
+# pipeline/svd_pipeline.py
+from ..database import MotionDatabase  # relative import
+from .text_pipeline import process_text  # relative import
+```
+
+## Circular Imports
+
+Avoid circular imports by:
+1. Moving shared code to a third module
+2. Using TYPE_CHECKING for type hints only
+
+```python
+# types.py - shared type definitions
+from typing import TypedDict
+
+class MotionDict(TypedDict):
+    id: int
+    title: str
+    ...
+
+# module_a.py
+from .types import MotionDict
+
+# module_b.py - if needed here too
+from .types import MotionDict
+```
+
+## Import Patterns to Avoid
+
+### Wildcard Imports
+```python
+# BAD
+from database import *
+
+# GOOD
+from database import db, MotionDatabase
+```
+
+### Import in Function Scope (unless necessary)
+```python
+# AVOID - delays import, makes dependencies unclear
+def some_function():
+    import pandas as pd  # Late import
+    return pd.DataFrame(...)
+
+# PREFER - import at module level
+import pandas as pd
+
+def some_function():
+    return pd.DataFrame(...)
+```
+
+### Reassigning Imported Names
+```python
+# BAD - confusing
+from module import process
+process = something_else  # Reassigning
+
+# GOOD - clear naming
+from module import process as process_data
+```
+
+## Type Checking Imports
+
+For type hints only, use TYPE_CHECKING:
+
+```python
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .models import Motion
+
+def get_motion(motion_id: int) -> "Motion":  # String quote for forward ref
+    ...
+```
+
+## Optional Dependency Imports
+
+Handle optional dependencies gracefully:
+
+```python
+try:
+    import duckdb
+except Exception:
+    duckdb = None  # Will be checked later
+
+class MotionDatabase:
+    def __init__(self):
+        if duckdb is None:
+            self._file_mode = True  # Fallback mode
+```
+
+## Example: Complete Import Block
+
+```python
+# Complete example from database.py
+import json
+import logging
+import uuid
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+import duckdb
+
+from config import config
+
+from database import db
+```
--- a/.mindmodel/constraints/logging.md
+++ b/.mindmodel/constraints/logging.md
@ -0,0 +1,131 @@
+---
+title: Logging Constraints
+category: constraints
+severity: critical
+---
+
+# Logging Constraints
+
+## Core Rule
+
+Use `logging.getLogger(__name__)` - never use `print()`
+
+**CRITICAL ANTI-PATTERN**: `api_client.py` uses `print()` instead of logging (11 instances).
+
+## CRITICAL Anti-Pattern: print() Instead of Logging
+
+**File**: `api_client.py`
+**Evidence**: Lines with `print(f"...")` instead of `_logger.info(...)`
+
+**Broken code**:
+```python
+def get_motions(self, ...):
+    try:
+        # ...
+        print(f"Fetched {len(voting_records)} voting records from API")  # BAD
+        print(f"Processed into {len(motions)} unique motions")  # BAD
+    except Exception as e:
+        print(f"Error fetching motions from API: {e}")  # BAD - no traceback
+```
+
+**Fix**:
+```python
+import logging
+
+_logger = logging.getLogger(__name__)
+
+def get_motions(self, ...):
+    try:
+        _logger.info("Fetched %d voting records from API", len(voting_records))
+        _logger.info("Processed into %d unique motions", len(motions))
+    except Exception as e:
+        _logger.exception("Error fetching motions from API: %s", e)
+        return []
+```
+
+## Logger Initialization
+
+Get logger at module level:
+
+```python
+# GOOD: Use logging.getLogger(__name__)
+import logging
+
+_logger = logging.getLogger(__name__)
+
+def some_function():
+    _logger.info("Processing started")
+    _logger.debug("Detail: %s", detail)
+```
+
+## Logger Naming
+
+Use `__name__` for automatic module path:
+
+```python
+# In database.py - logger will be "database"
+_logger = logging.getLogger(__name__)
+
+# In pipeline/svd_pipeline.py - logger will be "pipeline.svd_pipeline"
+_logger = logging.getLogger(__name__)
+```
+
+**INCONSISTENCY WARNING**: 16 files use `logger`, 17 files use `_logger`. Choose one convention.
+
+**Recommendation**: Use `_logger` (with underscore) for module-level loggers to distinguish from class-level loggers.
+
+## Log Levels
+
+| Level | When to Use |
+|-------|-------------|
+| DEBUG | Detailed diagnostic info (dev only) |
+| INFO | Normal operation milestones |
+| WARNING | Unexpected but handled (fallbacks) |
+| ERROR | Operation failed, may need attention |
+| CRITICAL | Fatal error, program may crash |
+
+## Exception Logging
+
+Use `_logger.exception()` for caught exceptions (includes traceback):
+
+```python
+try:
+    result = risky_operation()
+except Exception as exc:
+    _logger.exception("Operation failed: %s", exc)
+    return fallback_value
+```
+
+## Anti-Patterns
+
+### Debug Prints in Production Code
+```python
+# BAD
+print(f"[TRAJ DEBUG] processing window {wid}")
+
+# GOOD
+_logger.debug("Processing window %s", wid)
+```
+
+### Inconsistent Logger Names
+```python
+# BAD - mixing _logger and logger
+_logger = logging.getLogger(__name__)
+logger = logging.getLogger("other")  # Inconsistent
+```
+
+## Sensitive Data
+
+Never log sensitive information:
+- API keys
+- User votes
+- Session IDs (if tied to user data)
+- Personal information
+
+```python
+# BAD
+_logger.info("User %s voted %s", user_id, vote)
+
+# GOOD - log aggregates, not individual votes
+_logger.info("Vote recorded for session %s", session_id[:8])
+```
--- a/.mindmodel/constraints/naming.yaml
+++ b/.mindmodel/constraints/naming.yaml
@ -0,0 +1,141 @@
+# Naming Constraints
+
+## File Names
+
+### Python Modules
+- **Convention**: `snake_case.py`
+- **Examples**: `motion_database.py`, `api_client.py`, `text_pipeline.py`
+
+### Test Files
+- **Convention**: `test_<module_name>.py`
+- **Examples**: `test_database.py`, `test_api_client.py`
+
+### Config Files
+- **Convention**: `snake_case`
+- **Examples**: `config.py`, `.env.example`, `pyproject.toml`
+
+### Directories
+- **Convention**: `snake_case/`
+- **Examples**: `pipeline/`, `tests/integration/`, `src/validators/`
+
+## Class Names
+
+- **Convention**: `PascalCase`
+- **Examples**: `MotionDatabase`, `TweedeKamerAPI`, `MotionSummarizer`
+
+### Naming Patterns
+| Pattern | Example |
+|---------|---------|
+| Database wrapper | `MotionDatabase` |
+| API client | `TweedeKamerAPI` |
+| Service/Helpers | `MotionScraper`, `MotionAnalyzer` |
+| Exceptions | `ProviderError` |
+
+## Function Names
+
+- **Convention**: `snake_case`
+- **Examples**: `get_motions`, `compute_similarity`, `process_voting_records`
+
+### Private Methods
+- **Convention**: `_snake_case` (single underscore prefix)
+- **Examples**: `_get_voting_records`, `_parse_response`
+
+## Variable Names
+
+### Regular Variables
+- **Convention**: `snake_case`
+- **Examples**: `motion_id`, `party_name`, `voting_results`
+
+### Constants (Module-Level)
+- **Convention**: `UPPER_SNAKE_CASE`
+- **Examples**: `DATABASE_PATH`, `API_TIMEOUT`, `MAX_RETRIES`
+
+### Config Variables (in dataclass)
+- **Convention**: `UPPER_SNAKE_CASE`
+- **Examples**: `QWEN_MODEL`, `POLICY_AREAS`
+
+### Booleans
+- **Convention**: `is_`, `has_`, `can_` prefixes or `_flag` suffix
+- **Examples**: `is_active`, `has_votes`, `skip_extract`
+
+### Private Variables
+- **Convention**: `_underscore_prefix`
+- **Examples**: `_conn`, `_cache`, `_session`
+
+## Singleton Instances
+
+- **Convention**: `lower_snake_case` at module level
+- **Examples**: `db = MotionDatabase()`, `summarizer = MotionSummarizer()`
+
+```python
+# database.py
+class MotionDatabase:
+    ...
+
+# Singleton instance
+db = MotionDatabase()
+
+# Usage
+from database import db
+motions = db.get_motions()
+```
+
+## Type Variables
+
+- **Convention**: `PascalCase`
+- **Examples**: `T = TypeVar('T')`, `MotionDict = Dict[str, Any]`
+
+## Anti-Patterns
+
+### Inconsistent Naming
+```python
+# BAD - mixing styles
+get_motions()      # snake_case
+GetMotionById()    # PascalCase
+processData()      # camelCase
+
+# GOOD - consistent snake_case
+get_motions()
+get_motion_by_id()
+process_voting_data()
+```
+
+### Abbreviations
+```python
+# AVOID - unclear abbreviations
+calc_similarity()      # calculate_*
+proc_votes()          # process_*
+get_mp_data()          # get_mp_metadata()
+
+# PREFER - full words
+calculate_similarity()
+process_votes()
+get_mp_metadata()
+```
+
+### Hungarian Notation
+```python
+# BAD - Hungarian notation
+str_title = "..."
+int_count = 0
+b_is_active = True
+
+# GOOD - clear types via naming
+title = "..."
+count = 0
+is_active = True
+```
+
+## Special Cases
+
+### Window IDs
+- **Format**: `"YYYY-QN"` or `"YYYY"`
+- **Examples**: `"2024-Q1"`, `"2024-Q2"`, `"2024"`
+
+### Policy Areas
+- **Convention**: PascalCase with spaces
+- **Examples**: `"Economie"`, `"Sociale Zaken"`, `"Klimaat"`
+
+### Vote Values
+- **Convention**: PascalCase Dutch terms
+- **Values**: `"Voor"`, `"Tegen"`, `"Onthouden"`, `"Geen stem"`, `"Afwezig"`
--- a/.mindmodel/constraints/testing.yaml
+++ b/.mindmodel/constraints/testing.yaml
@ -0,0 +1,26 @@
+# Testing conventions constraint (YAML)
+
+rules:
+  - name: test_naming
+    rule: "Use pytest and name tests test_*.py and test_* functions."
+    examples:
+      - good: "tests/test_text_pipeline.py"
+      - bad: "tests/text_pipeline_test.py"
+
+  - name: fixtures_and_conftest
+    rule: "Place shared fixtures in tests/conftest.py or tests/fixtures/ for reuse."
+    examples:
+      - good: "use fixtures declared in tests/conftest.py"
+
+  - name: assert_raises
+    rule: "Explicitly assert expected exceptions with pytest.raises for invalid input."
+    examples:
+      - good: |
+          import pytest
+
+          def test_invalid_input():
+              with pytest.raises(ValueError):
+                  function_under_test('bad')
+
+enforcement_examples:
+  - "Run pytest in CI; fail if tests don't run or if there are regressions."
--- a/.mindmodel/constraints/types.yaml
+++ b/.mindmodel/constraints/types.yaml
@ -0,0 +1,233 @@
+# Type Hint Constraints
+
+## Core Rule
+
+**Use type hints on all public functions and methods**
+
+## Function Type Hints
+
+### Required on Public APIs
+
+```python
+# GOOD - complete type hints
+def get_motion(self, motion_id: int) -> Optional[Dict]:
+    ...
+
+def get_filtered_motions(
+    self,
+    policy_area: str = "Alle",
+    limit: int = 10
+) -> List[Dict]:
+    ...
+
+def calculate_similarity(self, motion_a: int, motion_b: int) -> float:
+    ...
+```
+
+### Optional Parameters
+
+Use `Optional[X]` or `X | None`:
+
+```python
+# Both forms are acceptable
+def get_motion(self, motion_id: Optional[int] = None) -> Optional[Dict]:
+    ...
+
+def get_motion(self, motion_id: int | None = None) -> dict | None:
+    ...
+```
+
+### Multiple Return Types
+
+Use `Union[X, Y]` or `|` operator:
+
+```python
+# Acceptable forms
+def parse_value(self, value: str) -> Union[bool, str, None]:
+    ...
+
+def parse_value(self, value: str) -> bool | str | None:
+    ...
+```
+
+### Generic Types
+
+Use `List[X]`, `Dict[K, V]`, `Tuple[X, Y]`:
+
+```python
+from typing import Dict, List, Optional, Tuple
+
+def get_motions(self, ids: List[int]) -> Dict[int, Dict]:
+    """Map motion_id -> motion data."""
+    ...
+
+def process_batch(self, items: List[str]) -> Tuple[List[str], List[str]]:
+    """Returns (successes, failures)."""
+    ...
+```
+
+## Collection Types
+
+Prefer specific types over bare `list`/`dict`:
+
+```python
+# GOOD - specific types
+def get_votes(self) -> List[str]:
+    ...
+
+def get_metadata(self) -> Dict[str, Any]:
+    ...
+
+# ACCEPTABLE - for truly generic collections
+def merge_dicts(*dicts: dict) -> dict:
+    ...
+```
+
+## DuckDB Result Types
+
+DuckDB returns tuples/lists - document expected structure:
+
+```python
+def get_motion(self, motion_id: int) -> Optional[Tuple]:
+    """Returns (id, title, description, date, ...) or None."""
+    conn = duckdb.connect(self.db_path)
+    try:
+        result = conn.execute(
+            "SELECT * FROM motions WHERE id = ?", (motion_id,)
+        ).fetchone()
+        return result
+    finally:
+        conn.close()
+
+# Or use Dict for clarity
+def get_motion_as_dict(self, motion_id: int) -> Optional[Dict]:
+    """Returns motion dict or None."""
+    conn = duckdb.connect(self.db_path)
+    try:
+        row = conn.execute(
+            "SELECT * FROM motions WHERE id = ?", (motion_id,)
+        ).fetchone()
+        if row:
+            return {
+                "id": row[0],
+                "title": row[1],
+                "description": row[2],
+                ...
+            }
+        return None
+    finally:
+        conn.close()
+```
+
+## Class/Instance Types
+
+Use `Self` for methods returning instance type:
+
+```python
+from typing import Self
+
+class MotionDatabase:
+    def with_connection(self, path: str) -> Self:
+        """Return new instance with different path."""
+        return MotionDatabase(db_path=path)
+```
+
+## Callback/Function Types
+
+Use `Callable` for function parameters:
+
+```python
+from typing import Callable
+
+def process_motions(
+    motions: List[Dict],
+    processor: Callable[[Dict], Any]
+) -> List[Any]:
+    return [processor(m) for m in motions]
+```
+
+## Type Aliases
+
+Define clear type aliases for domain concepts:
+
+```python
+from typing import Dict, List, TypedDict, Literal
+
+# Vote values
+VoteValue = Literal["Voor", "Tegen", "Onthouden", "Geen stem", "Afwezig"]
+
+# Policy areas
+PolicyArea = Literal["Alle", "Economie", "Klimaat", "Immigratie", ...]
+
+# Motion dict
+class MotionDict(TypedDict):
+    id: int
+    title: str
+    description: Optional[str]
+    date: Optional[str]
+    policy_area: Optional[str]
+    voting_results: Optional[str]  # JSON string
+    winning_margin: Optional[float]
+
+def get_motion(self, motion_id: int) -> Optional[MotionDict]:
+    ...
+```
+
+## Avoid `Any`
+
+Use `Any` sparingly - prefer specific types:
+
+```python
+# AVOID - too vague
+def process(data: Any) -> Any:
+    ...
+
+# PREFER - specific types
+def process(motion: MotionDict) -> Optional[SimilarityResult]:
+    ...
+```
+
+## Inline Type Hints
+
+For simple cases, inline hints are fine:
+
+```python
+def get_count(self) -> int:
+    ...
+
+def is_empty(self) -> bool:
+    ...
+```
+
+## Docstring Type Hints
+
+For complex types, include in docstrings:
+
+```python
+def get_party_positions(self, window_id: str) -> Dict[str, List[float]]:
+    """Get party positions in political space.
+    
+    Args:
+        window_id: Time window (e.g., "2024-Q1")
+    
+    Returns:
+        Dict mapping party_name -> [x, y] coordinates
+    
+    Example:
+        >>> positions = db.get_party_positions("2024-Q1")
+        >>> positions["VVD"]
+        [0.5, -0.3]
+    """
+    ...
+```
+
+## Type Checking
+
+For runtime type checking, use runtime checks:
+
+```python
+def set_count(self, count: int) -> None:
+    if not isinstance(count, int):
+        raise TypeError(f"Expected int, got {type(count).__name__}")
+    self._count = count
+```
--- a/.mindmodel/conventions/conventions.yaml
+++ b/.mindmodel/conventions/conventions.yaml
@ -0,0 +1,124 @@
+# Naming Conventions
+
+## Files
+- **snake_case** for all Python files: `database.py`, `explorer_helpers.py`, `motion_cache.py`
+- **PascalCase** NOT used for files
+
+## Functions
+- **snake_case**: `get_svd_vectors()`, `compute_party_coords()`, `build_scatter_trace()`
+- Private helpers prefixed with `_`: `_get_window_data()`
+
+## Classes
+- **PascalCase**: `MotionDatabase`, `Config`
+- **Dataclass pattern** for Config: `@dataclass` decorator with typed fields
+
+## Variables
+- **snake_case**: `party_map`, `mp_name`, `svd_vectors`, `party_centroids`
+- **CONSTANT_SNAKE_CASE** for module-level constants: `PARTY_COLOURS`, `DEFAULT_WINDOW`
+
+## Module-Level Exports
+- **Singleton instance**: `db = MotionDatabase()` at module bottom (not class-level)
+- **Config instance**: `config = Config(...)` at module bottom
+- **Dicts**: `PARTY_COLOURS` exported from `config.py`
+
+---
+
+# Error Handling
+
+## Known Patterns
+1. **Bare except with pass** (ANTI-PATTERN - see anti-patterns.yaml)
+   ```python
+   except:
+       pass  # database.py:47
+   ```
+
+2. **Graceful degradation**: catch specific exceptions, fall back to default
+   ```python
+   try:
+       result = compute_svd()
+   except ImportError:
+       result = DEFAULT_SVD
+   ```
+
+3. **Optional dependency fallbacks**:
+   ```python
+   try:
+       import umap
+       use_umap = True
+   except ImportError:
+       use_umap = False
+   ```
+
+4. **Nested exception handling** (ANTI-PATTERN - see anti-patterns.yaml):
+   ```python
+   try:
+       ...
+   except Exception:
+       try:
+           ...
+       except Exception:
+           pass
+   ```
+
+## Rules
+- Never use bare `except:` — always specify exception type
+- Never swallow exceptions silently — log or return a sensible default
+- For optional deps, use `ImportError` or `ModuleNotFoundError` explicitly
+- Avoid nested try/except blocks
+
+---
+
+# Code Organization
+
+## Singleton Pattern
+Each module owns one shared instance:
+```python
+# database.py
+db = MotionDatabase()
+
+# config.py
+config = Config(...)
+PARTY_COLOURS = {...}
+```
+
+## Pure Functions in Helpers
+`explorer_helpers.py` contains only pure functions (no IO, no Streamlit calls):
+```python
+def compute_party_coords(svd_vectors, party_map):
+    """Pure: no side effects, no imports from this module"""
+    ...
+
+def build_scatter_trace(df, color_col):
+    """Pure: returns Plotly trace dict"""
+    ...
+```
+
+## Cached Data Loaders
+Use `@st.cache_data` for expensive data loading:
+```python
+@st.cache_data
+def load_svd_vectors(window: str) -> pd.DataFrame:
+    return db.get_svd_vectors(window)
+```
+
+## Dataclass Config
+```python
+@dataclass
+class Config:
+    db_path: str = "data/stemwijzer.duckdb"
+    default_window: str = "2023"
+    party_colours: dict = field(default_factory=lambda: PARTY_COLOURS)
+```
+
+---
+
+# Imports
+
+## Ordering (convention)
+1. Standard library
+2. Third-party (streamlit, ibis, plotly, sklearn, umap)
+3. Local/relative imports
+
+## Avoid
+- Wildcard imports (`from module import *`)
+- Circular imports (ensure dependency direction: helpers → database → config)
--- a/.mindmodel/dependencies/dependencies.md
+++ b/.mindmodel/dependencies/dependencies.md
@ -0,0 +1,92 @@
+---
+title: Dependencies and Library Usage
+category: dependencies
+---
+
+# Dependencies and Library Usage
+
+## Core Dependencies
+
+### duckdb
+- **Required**: Yes
+- **Fallback**: None (core functionality)
+- **Usage**: SQL database for motions, embeddings, SVD vectors
+- **Files**: database.py, analysis/*.py, pipeline/*.py
+
+### streamlit
+- **Required**: Yes
+- **Fallback**: None
+- **Usage**: Web UI framework
+- **Files**: app.py, pages/*.py, explorer.py
+
+### requests
+- **Required**: Yes
+- **Fallback**: None
+- **Usage**: HTTP client for API calls
+- **Files**: api_client.py, ai_provider.py
+
+### plotly
+- **Required**: Yes
+- **Fallback**: None (raises ImportError)
+- **Usage**: Interactive charts for explorer
+- **Files**: explorer.py, explorer_helpers.py
+
+## Optional Dependencies
+
+### umap-learn
+- **Required**: No
+- **Fallback**: Use raw SVD vectors (first 2 dimensions)
+- **Usage**: Dimensionality reduction for visualization
+- **Files**: analysis/clustering.py
+
+### matplotlib
+- **Required**: No
+- **Fallback**: Plotly or raw output
+- **Usage**: Static charting
+- **Files**: Various analysis scripts
+
+## ML Dependencies
+
+### sklearn
+- **Required**: Yes
+- **Usage**: KMeans clustering, cosine_similarity, StandardScaler
+- **Files**: analysis/clustering.py, similarity/compute.py
+
+### scipy
+- **Required**: Yes
+- **Usage**: SVD (scipy.linalg.svd), spatial.procrustes for alignment
+- **Files**: analysis/trajectory.py, pipeline/svd_pipeline.py
+
+### numpy
+- **Required**: Yes
+- **Usage**: Array operations, linear algebra
+- **Files**: Throughout codebase
+
+## Key Imports by File
+
+### explorer.py
+- `import streamlit as st`
+- `from database import db`
+- `from explorer_helpers import *`
+
+### explorer_helpers.py
+- `import pandas as pd`
+- `import plotly.graph_objects as go`
+- `from database import db` (optional, for type hints)
+
+### database.py
+- `import ibis`
+- `import duckdb`
+- `from config import config, PARTY_COLOURS`
+
+### config.py
+- `from dataclasses import dataclass, field`
+- `import streamlit as st` (optional, for warnings)
+
+## Singleton Instances
+
+| Module | Instance | Type |
+|--------|----------|------|
+| `database.py` | `db` | `MotionDatabase` |
+| `config.py` | `config` | `Config` (dataclass) |
+| `config.py` | `PARTY_COLOURS` | `dict[str, str]` |
--- a/.mindmodel/domain/domain-glossary.md
+++ b/.mindmodel/domain/domain-glossary.md
@ -0,0 +1,146 @@
+---
+title: Domain Glossary
+category: domain
+---
+
+# Domain Glossary - Dutch Political Terms
+
+## CRITICAL INVARIANTS
+
+> **Rule 1**: Centroid of right-wing parties on RIGHT side of ALL axes
+> - PVV, FVD, JA21, SGP centroid must appear on the RIGHT
+> - Individual right-wing parties may vary slightly from the centroid
+> - This is non-negotiable for any compass/axis visualization
+
+> **Rule 2**: SVD labels are empirically derived from voting data
+> - Labels represent WHAT THE DATA SHOWS, not party self-identification or public opinion
+> - Labels are derived from outliers and 20 representative motions (10 positive, 10 negative)
+> - See SVD Label Derivation section below
+
+---
+
+## SVD Label Derivation
+
+### The Process
+
+SVD (Singular Value Decomposition) finds axes that maximize variance in the MP × Motion voting matrix. To label each axis:
+
+1. **Identify outliers**: Find the two MPs with most extreme positions on that axis
+2. **Select representative motions**: Pick 20 motions where these outliers disagreed most sharply (10 they voted opposite on, 10 where both voted same direction but with other extremes)
+3. **Interpret theme**: Read the motion titles to derive what the axis represents
+4. **Assign label**: Label describes the empirical theme, could be:
+   - Left-Right
+   - Coalition-Opposition
+   - Progressive-Conservative
+   - EU-National sovereignty
+   - Populist-Establishment
+   - Or whatever the voting patterns show
+
+### Example
+
+| Step | Description |
+|------|-------------|
+| Outlier A | Wilders (PVV) - extreme positive on Dim 1 |
+| Outlier B | Marijnissen (SP) - extreme negative on Dim 1 |
+| 20 Motions | Immigration, integration, law & order themes dominate |
+| Label | "Links-Rechts" (Left-Right) |
+
+### Labeling Rules
+
+- **Never use party names in labels** (e.g., not "PVV-SP axis")
+- **Never use semantic/ideological labels** (e.g., not "progressive-conservative" unless that's what the motions show)
+- **Use motion-derived themes** (e.g., "Immigration", "EU", "Economy")
+- **Fallback**: If theme is unclear, use "Axis 1", "Axis 2"
+
+---
+
+## Core Entities
+
+### Motion / Motie
+- Parliamentary motion submitted by MPs
+- Fields: `id`, `title`, `date`, `category`
+- MPs vote: **For** (+1), **Against** (-1), **Abstain** (0), **Absent**
+
+### MP / Kamerlid
+- Member of Parliament (Tweede Kamerlid)
+- Identified by full name (e.g., "Van Dijk, I.")
+- Has voting record, party affiliation, SVD position vector
+
+### Party / Fractie
+- Political party (e.g., "GroenLinks-PvdA", "PVV", "VVD")
+- Party centroids: average SVD position of all MPs in party
+
+### Vote / Stemming
+- Individual MP's vote on a motion: +1, 0, -1
+- Aggregated to compute SVD vectors
+
+---
+
+## Time & Analysis Concepts
+
+### Window / Tijdsvenster
+- Time period for analysis (annual or quarterly)
+- Values: "2023", "2023-Q1", "2024", etc.
+- SVD vectors computed per window
+
+### Trajectory
+- MP's position change across multiple windows
+- Computed from `svd_vectors` + window ordering
+
+---
+
+## Mathematical / Algorithmic Terms
+
+### SVD Vector
+- 2D vector from Singular Value Decomposition of MP × Motion vote matrix
+- Represents MP's position in political space
+
+### SVD Label
+- Empirically derived axis label based on outlier MPs and representative motions
+- Describes the theme of disagreement on that axis
+- NOT based on party ideology or semantic labels
+
+### Political Compass
+- 2D visualization with SVD axes mapped to compass quadrants
+- X-axis: First SVD dimension (labeled from voting data)
+- Y-axis: Second SVD dimension (labeled from voting data)
+
+### Procrustes Alignment
+- Algorithm to align SVD vectors across time windows
+- Ensures comparable positions across years/quarters
+
+### UMAP
+- Uniform Manifold Approximation and Projection
+- Dimensionality reduction for visualization
+- Optional dependency with graceful SVD fallback
+
+---
+
+## Database Table Reference
+
+| Table | Key Fields |
+|-------|-----------|
+| `motions` | id, title, date, category |
+| `mp_votes` | mp_id, motion_id, vote |
+| `svd_vectors` | entity_id, window, vector_2d (list[2]) |
+| `mp_party_history` | mp_id, party, start_date, end_date |
+| `windows` | window_id, start_date, end_date, period_type |
+| `mp_trajectories` | mp_id, window, trajectory_vector |
+
+---
+
+## Dutch Political Parties
+
+### Canonical Right-Wing (centroid on RIGHT of axes)
+- PVV (Partij voor de Vrijheid)
+- FVD (Forum voor Democratie)
+- JA21
+- SGP (Staatkundig Gereformeerde Partij)
+
+### Other Major Parties
+- VVD (Volkspartij voor Vrijheid en Democratie)
+- GL-PvdA (GroenLinks-PvdA)
+- NSC (Nieuw Sociaal Contract)
+- BBB (BoerBurgerBeweging)
+- SP (Socialistische Partij)
+- D66 (Democraten 66)
--- a/.mindmodel/examples/api-client-example.py
+++ b/.mindmodel/examples/api-client-example.py
@ -0,0 +1,196 @@
+"""Example: TweedeKamerAPI usage - from api_client.py and actual codebase."""
+
+from datetime import datetime, timedelta
+from typing import Dict, List
+
+# Import the API client
+from api_client import TweedeKamerAPI
+
+
+# =============================================================================
+# Example 1: Basic API usage
+# =============================================================================
+
+
+def example_fetch_motions():
+    """Fetch recent parliamentary motions from TweedeKamer API."""
+
+    api = TweedeKamerAPI()
+
+    # Fetch motions from last 30 days
+    start_date = datetime.now() - timedelta(days=30)
+
+    try:
+        motions = api.get_motions(start_date=start_date, limit=100)
+
+        print(f"Fetched {len(motions)} motions")
+
+        for motion in motions[:5]:  # Show first 5
+            print(f"  - {motion.get('title', 'N/A')}")
+
+        return motions
+    finally:
+        api.close()
+
+
+# =============================================================================
+# Example 2: Fetching with date range
+# =============================================================================
+
+
+def example_date_range():
+    """Fetch motions from a specific date range."""
+
+    api = TweedeKamerAPI()
+
+    start = datetime(2024, 1, 1)
+    end = datetime(2024, 3, 31)  # Q1 2024
+
+    try:
+        motions = api.get_motions(start_date=start, end_date=end, limit=500)
+
+        # Group by policy area
+        by_area = {}
+        for m in motions:
+            area = m.get("policy_area", "Onbekend")
+            by_area.setdefault(area, []).append(m)
+
+        for area, area_motions in sorted(by_area.items()):
+            print(f"{area}: {len(area_motions)} motions")
+
+        return motions
+    finally:
+        api.close()
+
+
+# =============================================================================
+# Example 3: Context manager usage
+# =============================================================================
+
+
+def example_context_manager():
+    """Use API client as context manager."""
+
+    with TweedeKamerAPI() as api:
+        motions = api.get_motions(
+            start_date=datetime.now() - timedelta(days=7), limit=50
+        )
+
+        print(f"Fetched {len(motions)} motions this week")
+
+        return motions
+
+
+# =============================================================================
+# Example 4: Processing voting records
+# =============================================================================
+
+
+def example_process_votes():
+    """Process individual voting records from API."""
+
+    api = TweedeKamerAPI()
+
+    start_date = datetime.now() - timedelta(days=7)
+
+    try:
+        # Get voting records directly
+        voting_records, besluit_meta = api._get_voting_records(
+            start_date=start_date, limit=1000
+        )
+
+        print(f"Fetched {len(voting_records)} voting records")
+        print(f"From {len(besluit_meta)} unique decisions")
+
+        # Count votes by party
+        party_votes = {}
+        for record in voting_records:
+            party = record.get("Fractie", "Onbekend")
+            vote = record.get("Soort", "Onbekend")
+            party_votes.setdefault(party, {})[vote] = (
+                party_votes.get(party, {}).get(vote, 0) + 1
+            )
+
+        for party, votes in sorted(party_votes.items()):
+            total = sum(votes.values())
+            voor = votes.get("Voor", 0)
+            print(f"{party}: {total} votes ({voor} voor)")
+
+        return voting_records
+    finally:
+        api.close()
+
+
+# =============================================================================
+# Example 5: Safe API call with fallback
+# =============================================================================
+
+
+def example_safe_call():
+    """Make API call with safe fallback on failure."""
+
+    api = TweedeKamerAPI()
+
+    try:
+        # This will return [] on any error
+        motions = api.get_motions(
+            start_date=datetime.now() - timedelta(days=30), limit=100
+        )
+
+        if not motions:
+            print("No motions returned - using cached data")
+            # Fallback to cached/local data
+            from database import db
+
+            return db.get_filtered_motions(limit=10)
+
+        return motions
+    finally:
+        api.close()
+
+
+# =============================================================================
+# Example 6: Pagination handling
+# =============================================================================
+
+
+def example_pagination():
+    """Understand how pagination works in the API."""
+
+    api = TweedeKamerAPI()
+
+    start_date = datetime.now() - timedelta(days=365)
+
+    # Simulate pagination
+    page_size = 250
+    total_limit = 500
+
+    all_motions = []
+    skip = 0
+
+    while len(all_motions) < total_limit:
+        print(f"Fetching page with skip={skip}...")
+
+        # In real usage, get_motions handles pagination internally
+        # This demonstrates what's happening under the hood
+        page_motions = api._fetch_page(start_date=start_date, skip=skip, top=page_size)
+
+        if not page_motions:
+            break
+
+        all_motions.extend(page_motions)
+        skip += page_size
+
+        if len(page_motions) < page_size:
+            break  # Last page
+
+    print(f"Total fetched: {len(all_motions)} motions")
+    return all_motions
+
+
+if __name__ == "__main__":
+    print("=== Basic Fetch ===")
+    example_fetch_motions()
+
+    print("\n=== Process Votes ===")
+    example_process_votes()
--- a/.mindmodel/examples/database-example.py
+++ b/.mindmodel/examples/database-example.py
@ -0,0 +1,191 @@
+"""Example: MotionDatabase usage - from database.py and actual codebase."""
+
+from typing import Dict, List, Optional
+import duckdb
+import json
+from config import config
+
+# Import the singleton instance
+from database import db
+
+
+# =============================================================================
+# Example 1: Getting filtered motions
+# =============================================================================
+
+
+def example_get_filtered_motions():
+    """Get controversial motions from a specific policy area."""
+
+    motions = db.get_filtered_motions(
+        policy_area="Klimaat",
+        min_margin=0.0,
+        max_margin=0.3,  # Controversial: close margin
+        limit=10,
+    )
+
+    for motion in motions:
+        print(f"{motion['title']}: {motion['winning_margin']:.1%} margin")
+
+    return motions
+
+
+# =============================================================================
+# Example 2: Creating a voting session
+# =============================================================================
+
+
+def example_voting_session():
+    """Create a new user session and record votes."""
+
+    # Create session for 10 motions
+    session_id = db.create_session(total_motions=10)
+    print(f"Created session: {session_id}")
+
+    # Get motions for the session
+    motions = db.get_filtered_motions(policy_area="Alle", limit=10)
+
+    # Record votes
+    for motion in motions:
+        # In real app, user would choose vote
+        vote = "Voor"  # Example vote
+        db.record_vote(session_id=session_id, motion_id=motion["id"], vote=vote)
+
+    # Get results
+    results = db.get_party_results(session_id)
+
+    for party, result in sorted(results.items(), key=lambda x: -x[1]["agreement"]):
+        print(f"{party}: {result['agreement']:.1%} agreement")
+
+    return results
+
+
+# =============================================================================
+# Example 3: Working with DuckDB connections directly
+# =============================================================================
+
+
+def example_direct_duckdb():
+    """Example of proper DuckDB connection handling."""
+
+    conn = duckdb.connect(config.DATABASE_PATH)
+    try:
+        # Get motion with votes
+        result = conn.execute(
+            """
+            SELECT m.*, 
+                   JSON_EXTRACT(voting_results, '$.total_votes') as total_votes
+            FROM motions m
+            WHERE m.id = ?
+        """,
+            (123,),
+        ).fetchone()
+
+        if result:
+            print(f"Motion: {result[1]}")  # title is index 1
+
+        return result
+    finally:
+        conn.close()
+
+
+# =============================================================================
+# Example 4: Bulk operations
+# =============================================================================
+
+
+def example_bulk_insert():
+    """Example of bulk inserting motions."""
+
+    # Sample data
+    motions = [
+        {
+            "title": "Motion about climate policy",
+            "description": "Proposal to reduce emissions",
+            "date": "2024-01-15",
+            "policy_area": "Klimaat",
+            "voting_results": json.dumps({"Voor": 75, "Tegen": 65}),
+            "winning_margin": 0.07,
+            "controversy_score": 0.85,
+        },
+        {
+            "title": "Motion about healthcare",
+            "description": "Increase healthcare budget",
+            "date": "2024-01-20",
+            "policy_area": "Zorg",
+            "voting_results": json.dumps({"Voor": 90, "Tegen": 50}),
+            "winning_margin": 0.29,
+            "controversy_score": 0.42,
+        },
+    ]
+
+    conn = duckdb.connect(config.DATABASE_PATH)
+    try:
+        for motion in motions:
+            conn.execute(
+                """
+                INSERT INTO motions 
+                (title, description, date, policy_area, voting_results, 
+                 winning_margin, controversy_score)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+            """,
+                (
+                    motion["title"],
+                    motion["description"],
+                    motion["date"],
+                    motion["policy_area"],
+                    motion["voting_results"],
+                    motion["winning_margin"],
+                    motion["controversy_score"],
+                ),
+            )
+        conn.close()
+        print(f"Inserted {len(motions)} motions")
+    except Exception as e:
+        conn.close()
+        print(f"Error inserting motions: {e}")
+
+
+# =============================================================================
+# Example 5: Query with aggregation
+# =============================================================================
+
+
+def example_aggregation():
+    """Example of aggregate queries."""
+
+    conn = duckdb.connect(config.DATABASE_PATH)
+    try:
+        # Get statistics by policy area
+        results = conn.execute("""
+            SELECT 
+                policy_area,
+                COUNT(*) as motion_count,
+                AVG(winning_margin) as avg_margin,
+                AVG(controversy_score) as avg_controversy
+            FROM motions
+            WHERE policy_area IS NOT NULL
+            GROUP BY policy_area
+            ORDER BY motion_count DESC
+        """).fetchall()
+
+        for row in results:
+            print(
+                f"{row[0]}: {row[1]} motions, "
+                f"avg margin {row[2]:.1%}, "
+                f"controversy {row[3]:.2f}"
+            )
+
+        conn.close()
+        return results
+    except Exception as e:
+        conn.close()
+        return []
+
+
+if __name__ == "__main__":
+    print("=== Filtered Motions ===")
+    example_get_filtered_motions()
+
+    print("\n=== Aggregation ===")
+    example_aggregation()
--- a/.mindmodel/examples/pattern-examples.md
+++ b/.mindmodel/examples/pattern-examples.md
@ -0,0 +1,116 @@
+# Extracted pattern examples (representative snippets)
+
+Note: snippets are verbatim extracts from repository files (Phase 1). Paths shown.
+
+## DuckDB connect + schema init (database.py)
+```python
+conn = duckdb.connect(self.db_path)
+
+# Create sequence for auto-incrementing IDs
+try:
+    conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
+except:
+    pass
+
+# Create tables with proper ID handling
+conn.execute("""
+    CREATE TABLE IF NOT EXISTS motions (
+        id INTEGER DEFAULT nextval('motions_id_seq'),
+        title TEXT NOT NULL,
+        description TEXT,
+        date DATE,
+        policy_area TEXT,
+        voting_results JSON,
+        winning_margin FLOAT,
+        controversy_score FLOAT,
+        layman_explanation TEXT,
+        externe_identifier TEXT,
+        body_text TEXT,
+        url TEXT UNIQUE,
+        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+        PRIMARY KEY (id)
+    )
+""")
+conn.close()
+```
+
+## Read-only compute worker (svd_pipeline.py)
+```python
+conn = duckdb.connect(db_path, read_only=True)
+try:
+    rows = conn.execute(
+        "SELECT motion_id, mp_name, vote FROM mp_votes WHERE date BETWEEN ? AND ?",
+        (start_date, end_date),
+    ).fetchall()
+finally:
+    conn.close()
+```
+
+## Requests with retry/backoff (ai_provider.py)
+```python
+resp = requests.post(url, json=json, headers=headers, timeout=10)
+...
+if getattr(resp, "status_code", 0) == 429:
+    if attempt == retries:
+        raise ProviderError(f"Provider returned HTTP {resp.status_code}")
+    retry_after = None
+    raw = resp.headers.get("Retry-After") if getattr(resp, "headers", None) else None
+    if raw:
+        try:
+            retry_after = int(raw)
+        except Exception:
+            try:
+                dt = parsedate_to_datetime(raw)
+                now = datetime.now(tz=dt.tzinfo or timezone.utc)
+                secs = (dt - now).total_seconds()
+                retry_after = max(0, int(secs))
+            except Exception:
+                retry_after = None
+
+    if retry_after is not None:
+        time.sleep(retry_after)
+        continue
+```
+
+## Embedding batch + per-item fallback (pipeline/ai_provider_wrapper.py)
+```python
+for start in range(0, len(texts), batch_size):
+    chunk = texts[i:end]
+    emb_chunk, emb_exc = _attempt_batch(chunk, i)
+    if emb_chunk is not None:
+        for j, emb in enumerate(emb_chunk):
+            results[i + j] = emb
+        i = end
+        continue
+
+    # batch failed -> fallback to per-item attempts
+    for j in range(i, end):
+        t = texts[j]
+        single, single_exc = _attempt_batch([t], j)
+        if single:
+            results[j] = single[0]
+            continue
+        results[j] = None
+```
+
+## Similarity compute (similarity/compute.py)
+```python
+# Ensure consistent dimensionality: pad shorter vectors with zeros
+lengths = [len(v) for v in vecs]
+max_dim = max(lengths)
+if len(set(lengths)) != 1:
+    logger.warning(
+        "Inconsistent vector dimensions detected (max=%d). Padding shorter vectors with zeros.",
+        max_dim,
+    )
+
+matrix = np.zeros((len(vecs), max_dim), dtype=np.float32)
+for i, v in enumerate(vecs):
+    matrix[i, : len(v)] = v
+
+# Normalize rows and compute cosine similarity
+norms = np.linalg.norm(matrix, axis=1, keepdims=True)
+norms[norms == 0] = 1.0
+normalized = matrix / norms
+sim = normalized @ normalized.T
+```
--- a/.mindmodel/examples/pipeline-example.py
+++ b/.mindmodel/examples/pipeline-example.py
@ -0,0 +1,217 @@
+"""Example: Pipeline phase execution - from pipeline/run_pipeline.py and actual codebase."""
+
+import argparse
+from datetime import date, timedelta
+from typing import List, Tuple
+
+# Import pipeline modules
+from pipeline.fetch_mp_metadata import fetch_mp_metadata
+from pipeline.extract_mp_votes import extract_mp_votes
+from pipeline.svd_pipeline import run_svd_pipeline
+from pipeline.text_pipeline import run_text_pipeline
+from pipeline.fusion import run_fusion
+
+from database import MotionDatabase
+
+
+# =============================================================================
+# Example 1: Running full pipeline
+# =============================================================================
+
+
+def example_full_pipeline():
+    """Run the complete data ingestion pipeline."""
+
+    # Parse arguments like CLI would
+    parser = argparse.ArgumentParser(description="Pipeline runner")
+    parser.add_argument("--db-path", default="data/motions.db")
+    parser.add_argument("--start-date", default=None)
+    parser.add_argument("--end-date", default=None)
+    parser.add_argument(
+        "--window-size", choices=["quarterly", "annual"], default="quarterly"
+    )
+    parser.add_argument("--svd-k", type=int, default=50)
+
+    args = parser.parse_args([])
+
+    # Resolve dates
+    end_date = date.fromisoformat(args.end_date) if args.end_date else date.today()
+    start_date = (
+        date.fromisoformat(args.start_date)
+        if args.start_date
+        else end_date - timedelta(days=730)
+    )
+
+    print(f"Running pipeline: {start_date} → {end_date}")
+    print(f"Window size: {args.window_size}")
+    print(f"DB path: {args.db_path}")
+
+    # Initialize database
+    db = MotionDatabase(args.db_path)
+
+    # Phase 1: Fetch MP metadata
+    print("\n=== Phase 1: MP Metadata ===")
+    n_mp = fetch_mp_metadata(db_path=args.db_path)
+    print(f"Processed {n_mp} MPs")
+
+    # Phase 2: Extract MP votes
+    print("\n=== Phase 2: Extract Votes ===")
+    n_votes = extract_mp_votes(db_path=args.db_path)
+    print(f"Extracted {n_votes} vote records")
+
+    # Phase 3: Generate time windows
+    print("\n=== Phase 3: SVD Pipeline ===")
+    windows = generate_windows(start_date, end_date, args.window_size)
+    print(f"Generated {len(windows)} windows: {windows}")
+
+    # Phase 4: SVD per window
+    run_svd_pipeline(db, windows, args.svd_k)
+    print(f"Computed SVD for {len(windows)} windows")
+
+    # Phase 5: Text embeddings
+    print("\n=== Phase 4: Text Embeddings ===")
+    run_text_pipeline(args.db_path, batch_size=50)
+    print("Text embeddings completed")
+
+    # Phase 6: Fusion
+    print("\n=== Phase 5: Fusion ===")
+    run_fusion(args.db_path, windows)
+    print("Fusion completed")
+
+    print("\n=== Pipeline Complete ===")
+
+
+# =============================================================================
+# Example 2: Generate time windows
+# =============================================================================
+
+
+def generate_windows(
+    start: date, end: date, granularity: str
+) -> List[Tuple[str, str, str]]:
+    """Generate time windows for pipeline processing."""
+
+    windows = []
+    cursor = date(start.year, start.month, 1)
+
+    if granularity == "annual":
+        cursor = date(start.year, 1, 1)
+        while cursor <= end:
+            year_end = date(cursor.year, 12, 31)
+            w_end = min(year_end, end)
+            windows.append((str(cursor.year), cursor.isoformat(), w_end.isoformat()))
+            cursor = date(cursor.year + 1, 1, 1)
+    else:
+        # quarterly
+        quarter_starts = {1: 1, 2: 4, 3: 7, 4: 10}
+        quarter_ends = {1: 3, 2: 6, 3: 9, 4: 12}
+
+        q = (cursor.month - 1) // 3 + 1
+        cursor = date(cursor.year, quarter_starts[q], 1)
+
+        while cursor <= end:
+            q = (cursor.month - 1) // 3 + 1
+            import calendar
+
+            q_end_month = quarter_ends[q]
+            last_day = calendar.monthrange(cursor.year, q_end_month)[1]
+            q_end = date(cursor.year, q_end_month, last_day)
+            w_end = min(q_end, end)
+            window_id = f"{cursor.year}-Q{q}"
+            windows.append((window_id, cursor.isoformat(), w_end.isoformat()))
+            cursor = q_end + timedelta(days=1)
+
+    return windows
+
+
+def example_window_generation():
+    """Example of window generation."""
+
+    start = date(2023, 1, 1)
+    end = date(2024, 6, 30)
+
+    print("Quarterly windows:")
+    quarterly = generate_windows(start, end, "quarterly")
+    for wid, s, e in quarterly:
+        print(f"  {wid}: {s} to {e}")
+
+    print("\nAnnual windows:")
+    annual = generate_windows(start, end, "annual")
+    for wid, s, e in annual:
+        print(f"  {wid}: {s} to {e}")
+
+
+# =============================================================================
+# Example 3: Running individual phases
+# =============================================================================
+
+
+def example_individual_phases():
+    """Run pipeline phases individually for debugging."""
+
+    db_path = "data/motions.db"
+    db = MotionDatabase(db_path)
+
+    # Only run MP metadata fetch
+    print("Fetching MP metadata...")
+    n = fetch_mp_metadata(db_path=db_path)
+    print(f"  {n} MPs processed")
+
+    # Only run vote extraction
+    print("Extracting votes...")
+    n = extract_mp_votes(db_path=db_path)
+    print(f"  {n} votes extracted")
+
+    # Only run SVD for specific window
+    print("Computing SVD...")
+    windows = [("2024-Q1", "2024-01-01", "2024-03-31")]
+    run_svd_pipeline(db, windows, k=50)
+    print("  SVD computed")
+
+    # Only run text embeddings
+    print("Computing embeddings...")
+    run_text_pipeline(db_path, batch_size=25)  # Smaller batch for testing
+    print("  Embeddings computed")
+
+
+# =============================================================================
+# Example 4: Dry run
+# =============================================================================
+
+
+def example_dry_run():
+    """Show what pipeline would do without making changes."""
+
+    print("DRY RUN - no writes will be made")
+
+    start_date = date(2024, 1, 1)
+    end_date = date(2024, 6, 30)
+
+    # Generate and show windows
+    windows = generate_windows(start_date, end_date, "quarterly")
+
+    print(f"Would process {len(windows)} windows:")
+    for wid, s, e in windows:
+        print(f"  {wid}: {s} to {e}")
+
+    print("\nWould run phases:")
+    print("  1. fetch_mp_metadata")
+    print("  2. extract_mp_votes")
+    print("  3. svd_pipeline")
+    print("  4. text_pipeline")
+    print("  5. fusion")
+
+
+if __name__ == "__main__":
+    import logging
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+    )
+
+    print("=== Window Generation ===")
+    example_window_generation()
+
+    print("\n=== Dry Run ===")
+    example_dry_run()
--- a/.mindmodel/examples/streamlit-page-example.py
+++ b/.mindmodel/examples/streamlit-page-example.py
@ -0,0 +1,316 @@
+"""Example: Streamlit page patterns - from actual pages/ files."""
+
+import streamlit as st
+
+
+# =============================================================================
+# Example 1: Home page (Home.py)
+# =============================================================================
+
+
+def render_home_page():
+    """Simplified version of Home.py."""
+
+    st.set_page_config(
+        page_title="Motief: de stematlas",
+        page_icon="🗺️",
+        layout="centered",
+        initial_sidebar_state="expanded",
+    )
+
+    st.title("🗺️ Motief: de stematlas")
+    st.markdown(
+        "**Motief** brengt de Nederlandse Tweede Kamer in kaart op basis van "
+        "echte stemmingen over moties. Gebruik de Stemwijzer om te ontdekken welke "
+        "partij het beste bij jouw standpunten past, of verken de politieke ruimte "
+        "zelf in de Explorer."
+    )
+
+    st.divider()
+
+    col1, col2 = st.columns(2)
+
+    with col1:
+        st.subheader("🗳️ Stemwijzer")
+        st.markdown(
+            "Stem op echte Tweede Kamer moties en zie welke partij het "
+            "dichtst bij jouw keuzes staat."
+        )
+        st.page_link("pages/1_Stemwijzer.py", label="Open Stemwijzer", icon="🗳️")
+
+    with col2:
+        st.subheader("🔭 Politiek Explorer")
+        st.markdown(
+            "Verken het politieke kompas, partijtrajecten door de tijd, "
+            "en zoek vergelijkbare moties op in het archief."
+        )
+        st.page_link("pages/2_Explorer.py", label="Open Explorer", icon="🔭")
+
+    st.divider()
+    st.caption("Data: Tweede Kamer API · Embeddings: QWEN (via OpenRouter)")
+
+
+# =============================================================================
+# Example 2: Thin page wrapper (pages/1_Stemwijzer.py)
+# =============================================================================
+
+
+def render_stemwijzer_page():
+    """Pattern: thin page that delegates to module function."""
+
+    st.set_page_config(
+        page_title="Stemwijzer",
+        page_icon="🗳️",
+        layout="centered",
+    )
+
+    # Delegate to main module
+    from explorer import build_mp_quiz_tab
+
+    build_mp_quiz_tab("data/motions.db")
+
+
+# =============================================================================
+# Example 3: Session state initialization
+# =============================================================================
+
+
+def init_session_state():
+    """Pattern: Initialize all session state at start."""
+
+    defaults = {
+        "session_id": None,
+        "current_motion_index": 0,
+        "motions": [],
+        "show_results": False,
+        "user_votes": {},
+    }
+
+    for key, default in defaults.items():
+        if key not in st.session_state:
+            st.session_state[key] = default
+
+
+# =============================================================================
+# Example 4: Sidebar configuration
+# =============================================================================
+
+
+def render_sidebar():
+    """Pattern: Sidebar for configuration."""
+
+    with st.sidebar:
+        st.header("Instellingen")
+
+        motion_count = st.slider(
+            "Aantal moties",
+            min_value=5,
+            max_value=25,
+            value=10,
+            help="Hoeveel moties wilt u beantwoorden?",
+        )
+
+        policy_area = st.selectbox(
+            "Beleidsgebied",
+            [
+                "Alle",
+                "Economie",
+                "Klimaat",
+                "Immigratie",
+                "Zorg",
+                "Onderwijs",
+                "Defensie",
+                "Sociale Zaken",
+                "Algemeen",
+            ],
+        )
+
+        margin_range = st.slider(
+            "Controversiële moties (%)",
+            min_value=0,
+            max_value=100,
+            value=(0, 100),
+            help="Filter op hoe omstreden de moties zijn",
+        )
+
+        st.divider()
+
+        if st.button("Start Nieuwe Sessie", type="primary"):
+            return {
+                "motion_count": motion_count,
+                "policy_area": policy_area,
+                "margin_range": margin_range,
+            }
+
+    return None
+
+
+# =============================================================================
+# Example 5: Motion voting interface
+# =============================================================================
+
+
+def render_motion_vote(motion: dict, index: int, total: int):
+    """Pattern: Display motion and voting buttons."""
+
+    st.subheader(f"Motie {index + 1} van {total}")
+
+    # Motion content
+    st.markdown(f"### {motion['title']}")
+
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        if motion.get("layman_explanation"):
+            st.info(motion["layman_explanation"])
+
+        with st.expander("Meer details"):
+            st.markdown(f"**Datum:** {motion.get('date', 'Onbekend')}")
+            st.markdown(f"**Beleidsgebied:** {motion.get('policy_area', 'Onbekend')}")
+
+            if motion.get("description"):
+                st.markdown(f"**Beschrijving:** {motion['description']}")
+
+    with col2:
+        st.metric(
+            label="Winstmarge",
+            value=f"{motion.get('winning_margin', 0):.0%}",
+            delta="Omstreden" if motion.get("controversy_score", 0) > 0.5 else "Helder",
+        )
+
+    st.divider()
+
+    # Voting buttons
+    col1, col2, col3 = st.columns(3)
+
+    with col1:
+        st.button(
+            "👍 **Voor**",
+            on_click=on_vote,
+            args=(motion["id"], "Voor"),
+            use_container_width=True,
+        )
+
+    with col2:
+        st.button(
+            "👎 **Tegen**",
+            on_click=on_vote,
+            args=(motion["id"], "Tegen"),
+            use_container_width=True,
+        )
+
+    with col3:
+        st.button(
+            "🤔 **Onthouden**",
+            on_click=on_vote,
+            args=(motion["id"], "Onthouden"),
+            use_container_width=True,
+        )
+
+
+def on_vote(motion_id: int, vote: str):
+    """Callback when user votes."""
+
+    # Record vote
+    from database import db
+
+    db.record_vote(
+        session_id=st.session_state.session_id, motion_id=motion_id, vote=vote
+    )
+
+    # Update session state
+    st.session_state.user_votes[motion_id] = vote
+
+    # Move to next or show results
+    if st.session_state.current_motion_index < len(st.session_state.motions) - 1:
+        st.session_state.current_motion_index += 1
+    else:
+        st.session_state.show_results = True
+
+    st.rerun()
+
+
+# =============================================================================
+# Example 6: Results display
+# =============================================================================
+
+
+def render_results():
+    """Pattern: Display voting results."""
+
+    from database import db
+
+    st.header("📊 Uw Resultaten")
+
+    # Get party results
+    results = db.get_party_results(st.session_state.session_id)
+
+    if not results:
+        st.warning("Geen resultaten beschikbaar")
+        return
+
+    # Sort by agreement
+    sorted_results = sorted(
+        results.items(), key=lambda x: x[1].get("agreement_percentage", 0), reverse=True
+    )
+
+    # Display top match
+    if sorted_results:
+        top_party, top_data = sorted_results[0]
+        st.success(
+            f"**Uw beste match:** {top_party} ({top_data.get('agreement_percentage', 0):.0%} overeenstemming)"
+        )
+
+    st.divider()
+
+    # Show all parties
+    for party, data in sorted_results:
+        agreement = data.get("agreement_percentage", 0)
+
+        col1, col2 = st.columns([3, 1])
+        with col1:
+            st.markdown(f"**{party}**")
+            st.progress(agreement, text=f"{agreement:.0%}")
+
+        with col2:
+            st.metric("Overeenstemming", f"{agreement:.0%}")
+
+    # Detailed breakdown
+    with st.expander("Details per motie"):
+        for motion in st.session_state.motions:
+            user_vote = st.session_state.user_votes.get(motion["id"], "?")
+            st.markdown(f"- **{motion['title']}**: U={user_vote}")
+
+
+# =============================================================================
+# Example 7: Tabs layout
+# =============================================================================
+
+
+def render_tabs_example():
+    """Pattern: Use tabs for organizing content."""
+
+    tab1, tab2, tab3 = st.tabs(["Compass", "Trajectories", "Zoeken"])
+
+    with tab1:
+        st.subheader("Politiek Kompas")
+        st.write("Visualiseer partijposities in 2D ruimte")
+        # Add compass chart...
+
+    with tab2:
+        st.subheader("Partij Trajectories")
+        st.write("Bekijk hoe partijen door de tijd bewegen")
+        # Add trajectory chart...
+
+    with tab3:
+        st.subheader("Zoek Moties")
+
+        query = st.text_input("Zoekterm")
+        if query:
+            # Search functionality...
+            st.write(f"Zoeken naar: {query}")
+
+
+if __name__ == "__main__":
+    # Demo rendering
+    init_session_state()
+    st.write("Streamlit page structure example")
--- a/.mindmodel/manifest.yaml
+++ b/.mindmodel/manifest.yaml
@ -0,0 +1,108 @@
+# stemwijzer Mind Model - Manifest
+# Generated: 2026-04-12
+# Phase: 2 - Assembly from Phase 1 Analysis
+
+name: stemwijzer
+version: 2
+description: Dutch political voting compass (Stemwijzer) - Mind Model constraints
+
+categories:
+  # Core documentation
+  - path: system.md
+    description: System overview and architecture summary
+    group: docs
+  - path: stack/stack.md
+    description: Technology stack with versions and purposes
+    group: stack
+  - path: domain/domain-glossary.md
+    description: Domain entities, terms, relationships, and CRITICAL INVARIANTS
+    group: domain
+
+  # Design patterns
+  - path: patterns/patterns.yaml
+    description: Code patterns (Singleton, Repository, Pipeline, etc.)
+    group: patterns
+  - path: patterns/streamlit.yaml
+    description: Streamlit-specific patterns (session state, cache)
+    group: patterns
+  - path: patterns/api.yaml
+    description: API client patterns with retry and pagination
+    group: patterns
+  - path: patterns/database.yaml
+    description: DuckDB patterns and connection management
+    group: patterns
+  - path: patterns/python.yaml
+    description: Python-specific patterns (dataclass, typing)
+    group: patterns
+  - path: patterns/duckdb-access.md
+    description: DuckDB connection patterns and best practices
+    group: patterns
+  - path: patterns/embeddings-similarity.md
+    description: Embeddings and similarity computation patterns
+    group: patterns
+  - path: patterns/error-handling.md
+    description: Error handling and exception patterns
+    group: patterns
+  - path: patterns/module-singletons.md
+    description: Module-level singleton patterns
+    group: patterns
+  - path: patterns/requests-http.md
+    description: HTTP client patterns with retry
+    group: patterns
+  - path: patterns/validation.md
+    description: Input validation patterns
+    group: patterns
+
+  # Coding constraints
+  - path: constraints/error-handling.md
+    description: Error handling patterns with safe fallbacks
+    group: constraints
+  - path: constraints/logging.md
+    description: Logging conventions
+    group: constraints
+  - path: constraints/naming.yaml
+    description: File, class, function naming rules
+    group: constraints
+  - path: constraints/imports.yaml
+    description: Import organization and module structure
+    group: constraints
+  - path: constraints/types.yaml
+    description: Type hint conventions
+    group: constraints
+  - path: constraints/testing.yaml
+    description: Testing conventions
+    group: constraints
+
+  # Anti-patterns
+  - path: anti-patterns/anti-patterns.md
+    description: Known anti-patterns with evidence and fixes
+    group: anti-patterns
+
+  # Dependencies
+  - path: dependencies/dependencies.md
+    description: Library usage and singleton instances
+    group: dependencies
+
+  # Code examples
+  - path: examples/database-example.py
+    description: MotionDatabase usage examples
+    group: examples
+  - path: examples/api-client-example.py
+    description: TweedeKamerAPI usage examples
+    group: examples
+  - path: examples/pipeline-example.py
+    description: Pipeline orchestration examples
+    group: examples
+  - path: examples/streamlit-page-example.py
+    description: Streamlit page patterns
+    group: examples
+  - path: examples/pattern-examples.md
+    description: Consolidated pattern examples
+    group: examples
+
+# Phase 1 findings summary:
+# - Tech: Python 3.13+, Streamlit, DuckDB, scipy/sklearn/umap, OpenRouter (QWEN)
+# - 10 patterns discovered: Module singletons, Repository, Service layer, Pipeline
+# - 8 anti-patterns: print() instead of logging, _DummySt global, bare except
+# - 6 code clusters: Database, Streamlit UI, API, Analysis/ML, Config, Singletons
+# - 3 groups: stdlib, 3rd party, local imports
--- a/.mindmodel/patterns/api.yaml
+++ b/.mindmodel/patterns/api.yaml
@ -0,0 +1,265 @@
+# API Client Patterns
+
+## Base API Client Pattern
+
+Using requests.Session for connection pooling:
+
+```python
+# api_client.py
+import requests
+from typing import Dict, List, Optional
+from config import config
+
+class TweedeKamerAPI:
+    def __init__(self):
+        self.odata_base_url = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0"
+        self.session = requests.Session()
+        self.session.headers.update({
+            "Accept": "application/json",
+            "User-Agent": "Dutch-Political-Compass-Tool/1.0",
+        })
+    
+    def get_motions(
+        self,
+        start_date: datetime = None,
+        end_date: datetime = None,
+        limit: int = 500,
+    ) -> List[Dict]:
+        """Get motions with voting results using OData API."""
+        if not start_date:
+            start_date = datetime.now() - timedelta(days=730)
+        
+        try:
+            voting_records, besluit_meta = self._get_voting_records(
+                start_date, end_date, limit
+            )
+            return self._process_voting_records(voting_records, besluit_meta)
+        except Exception as e:
+            print(f"Error fetching motions from API: {e}")
+            return []
+```
+
+## OData Pagination Pattern
+
+Handle server-side pagination with $skip:
+
+```python
+def _get_voting_records(
+    self, 
+    start_date: datetime, 
+    end_date: datetime = None, 
+    limit: int = 50000
+) -> tuple:
+    """Fetch with automatic pagination."""
+    
+    filter_query = (
+        f"GewijzigdOp ge {start_date.strftime('%Y-%m-%d')}T00:00:00Z"
+        " and StemmingsSoort ne null"
+        " and Verwijderd eq false"
+    )
+    
+    page_size = 250  # API caps $top at 250
+    base_url = f"{self.odata_base_url}/Besluit"
+    base_params = {
+        "$filter": filter_query,
+        "$top": page_size,
+        "$expand": "Stemming",
+        "$orderby": "GewijzigdOp desc",
+    }
+    
+    all_records = []
+    skip = 0
+    
+    while len(all_records) < limit:
+        params = {**base_params, "$skip": skip}
+        response = self.session.get(
+            base_url, 
+            params=params, 
+            timeout=config.API_TIMEOUT
+        )
+        response.raise_for_status()
+        data = response.json()
+        
+        besluit_page = data.get("value", [])
+        if not besluit_page:
+            break
+        
+        # Process page
+        for besluit in besluit_page:
+            all_records.extend(self._extract_votes(besluit))
+        
+        skip += page_size
+    
+    return all_records
+```
+
+## Retry with Backoff Pattern
+
+For transient failures:
+
+```python
+# ai_provider.py
+import time
+import random
+from requests.exceptions import ConnectionError
+
+def _post_with_retries(
+    path: str, 
+    json: dict, 
+    retries: int = 3
+) -> requests.Response:
+    """POST with exponential backoff retry."""
+    
+    backoff = 0.5
+    for attempt in range(1, retries + 1):
+        try:
+            resp = requests.post(url, json=json, headers=headers, timeout=10)
+            
+            # Handle rate limiting
+            if resp.status_code == 429:
+                if attempt == retries:
+                    raise ProviderError("Rate limited")
+                
+                retry_after = resp.headers.get("Retry-After")
+                if retry_after:
+                    time.sleep(int(retry_after))
+                else:
+                    sleep = backoff * (2 ** (attempt - 1))
+                    sleep += random.uniform(0, sleep * 0.1)
+                    time.sleep(sleep)
+                continue
+            
+            # Handle server errors
+            if 500 <= resp.status_code < 600:
+                if attempt == retries:
+                    raise ProviderError(f"Server error: {resp.status_code}")
+                time.sleep(backoff * (2 ** (attempt - 1)))
+                continue
+            
+            return resp
+            
+        except ConnectionError as exc:
+            if attempt == retries:
+                raise ProviderError(f"Connection error: {exc}")
+            time.sleep(backoff * (2 ** (attempt - 1)))
+    
+    raise ProviderError("Failed after retries")
+```
+
+## Batch Processing Pattern
+
+Process items in batches to manage API limits:
+
+```python
+def get_embeddings_with_retry(
+    texts: List[str],
+    batch_size: int = 50,
+    retries: int = 3,
+) -> List[Optional[List[float]]]:
+    """Process embeddings in batches with fallback to single items."""
+    
+    results = [None] * len(texts)
+    
+    i = 0
+    while i < len(texts):
+        end = min(len(texts), i + batch_size)
+        chunk = texts[i:end]
+        
+        # Try batch first
+        try:
+            emb_chunk = get_embeddings_batch(chunk)
+            for j, emb in enumerate(emb_chunk):
+                results[i + j] = emb
+            i = end
+            continue
+        except Exception:
+            pass
+        
+        # Fallback: single items
+        for j, text in enumerate(chunk):
+            try:
+                results[i + j] = get_embedding(text)
+            except Exception:
+                results[i + j] = None
+        
+        i = end
+    
+    return results
+```
+
+## Response Validation Pattern
+
+Validate API responses before processing:
+
+```python
+def _process_response(self, response: requests.Response) -> Dict:
+    """Validate and parse API response."""
+    
+    response.raise_for_status()
+    data = response.json()
+    
+    if "value" not in data:
+        raise ValueError("Unexpected response format: missing 'value' key")
+    
+    return data
+
+def _validate_besluit(self, besluit: Dict) -> bool:
+    """Check required fields exist."""
+    required = ["Id", "GewijzigdOp"]
+    return all(field in besluit for field in required)
+```
+
+## Error Handling Patterns
+
+Always provide safe fallbacks:
+
+```python
+def safe_api_call(self, endpoint: str, params: Dict = None) -> List[Dict]:
+    """Call API with error handling and fallback."""
+    try:
+        response = self.session.get(
+            endpoint, 
+            params=params, 
+            timeout=config.API_TIMEOUT
+        )
+        response.raise_for_status()
+        data = response.json()
+        return data.get("value", [])
+    except requests.Timeout:
+        _logger.warning(f"API timeout for {endpoint}")
+        return []
+    except requests.HTTPError as e:
+        _logger.error(f"HTTP error: {e}")
+        return []
+    except Exception as e:
+        _logger.error(f"API call failed: {e}")
+        return []
+```
+
+## Session Management
+
+Reuse session for connection pooling:
+
+```python
+class TweedeKamerAPI:
+    def __init__(self):
+        self.session = requests.Session()
+        self.session.headers.update({
+            "Accept": "application/json",
+            "User-Agent": "Dutch-Political-Compass-Tool/1.0",
+        })
+    
+    def close(self):
+        """Clean up session when done."""
+        self.session.close()
+    
+    def __enter__(self):
+        return self
+    
+    def __exit__(self, *args):
+        self.close()
+
+# Usage
+with TweedeKamerAPI() as api:
+    motions = api.get_motions(start_date)
+```
--- a/.mindmodel/patterns/architecture.yaml
+++ b/.mindmodel/patterns/architecture.yaml
@ -0,0 +1,230 @@
+# Architectural Patterns
+
+## Repository Pattern
+
+The `MotionDatabase` class acts as a repository, encapsulating all database operations behind a clean interface.
+
+```python
+# database.py
+class MotionDatabase:
+    def __init__(self, db_path: str = config.DATABASE_PATH):
+        self.db_path = db_path
+        self._init_database()
+    
+    def get_motion(self, motion_id: int) -> Optional[Dict]:
+        """Get a single motion by ID."""
+        conn = duckdb.connect(self.db_path)
+        try:
+            result = conn.execute(
+                "SELECT * FROM motions WHERE id = ?", (motion_id,)
+            ).fetchone()
+            return result
+        finally:
+            conn.close()
+    
+    def get_filtered_motions(
+        self,
+        policy_area: str = "Alle",
+        min_margin: float = 0.0,
+        max_margin: float = 1.0,
+        limit: int = 10
+    ) -> List[Dict]:
+        """Get filtered list of motions."""
+        ...
+```
+
+**Usage**: Import the singleton instance for all DB operations.
+```python
+from database import db
+
+motions = db.get_filtered_motions(policy_area="Klimaat", limit=20)
+```
+
+## Facade Pattern
+
+Simplified interfaces over complex subsystems.
+
+### MotionDatabase Facade
+```python
+# Single entry point for all database operations
+db = MotionDatabase()  # Singleton instance
+
+# Operations are abstracted:
+db.create_session(total_motions)
+db.record_vote(session_id, motion_id, vote)
+db.get_party_results(session_id)
+```
+
+### API Client Facade
+```python
+# api_client.py
+class TweedeKamerAPI:
+    def __init__(self):
+        self.session = requests.Session()  # Connection pooling
+    
+    def get_motions(self, start_date, end_date) -> List[Dict]:
+        """Simple interface hiding OData pagination details."""
+        voting_records, besluit_meta = self._get_voting_records(start_date, end_date)
+        return self._process_voting_records(voting_records, besluit_meta)
+```
+
+### MotionScraper Facade
+```python
+# scraper.py (if used)
+class MotionScraper:
+    def get_motion_content(self, url: str) -> Optional[str]:
+        """Extract body text from official website."""
+        ...
+```
+
+## Pipeline Pattern
+
+Sequential phases with explicit dependencies:
+
+```
+pipeline/run_pipeline.py
+├── Phase 1: fetch_mp_metadata
+│   └── pipeline/fetch_mp_metadata.py
+├── Phase 2: extract_mp_votes
+│   └── pipeline/extract_mp_votes.py
+├── Phase 3: svd_pipeline
+│   └── pipeline/svd_pipeline.py
+├── Phase 4: text_pipeline (gap-fill)
+│   └── pipeline/text_pipeline.py
+└── Phase 5: fusion (combine SVD + text)
+    └── pipeline/fusion.py
+```
+
+### Phase Orchestration
+```python
+# pipeline/run_pipeline.py
+def run(args: argparse.Namespace) -> int:
+    db = MotionDatabase(args.db_path)
+    
+    # Phase 1: MP metadata
+    if not args.skip_metadata:
+        from pipeline.fetch_mp_metadata import fetch_mp_metadata
+        fetch_mp_metadata(db_path=db.db_path)
+    
+    # Phase 2: Extract votes
+    if not args.skip_extract:
+        from pipeline.extract_mp_votes import extract_mp_votes
+        extract_mp_votes(db_path=db.db_path)
+    
+    # Phase 3: SVD per window
+    if not args.skip_svd:
+        from pipeline.svd_pipeline import run_svd_pipeline
+        run_svd_pipeline(db, windows, args.svd_k)
+    
+    # ... additional phases
+```
+
+## Strategy Pattern
+
+Interchangeable algorithms for axis computation:
+
+```python
+# analysis/political_axis.py
+def compute_political_axis(
+    vectors: Dict[str, np.ndarray],
+    method: str = "pca"  # or "anchor"
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Compute political axis using specified method.
+    
+    Methods:
+    - 'pca': Use first principal component
+    - 'anchor': Use predefined anchor motions
+    """
+    if method == "pca":
+        return _compute_pca_axis(vectors)
+    elif method == "anchor":
+        return _compute_anchor_axis(vectors)
+```
+
+## Visitor Pattern
+
+External operations on data structures:
+
+```python
+# analysis/trajectory.py
+def _procrustes_align_windows(
+    window_vecs: Dict[str, Dict[str, np.ndarray]],
+    min_overlap: int = 5,
+) -> Dict[str, Dict[str, np.ndarray]]:
+    """Align SVD vectors across windows using Procrustes rotations.
+    
+    Takes the first window as reference and aligns each subsequent window
+    to it via orthogonal Procrustes on the set of common entities.
+    """
+```
+
+## Builder Pattern
+
+Configuration via method chaining:
+
+```python
+# CLI argument parsing
+parser = argparse.ArgumentParser(description="Pipeline runner")
+parser.add_argument("--db-path", default="data/motions.db")
+parser.add_argument("--start-date", default=None)
+parser.add_argument("--end-date", default=None)
+parser.add_argument("--window-size", choices=["quarterly", "annual"], default="quarterly")
+parser.add_argument("--svd-k", type=int, default=50)
+```
+
+## Decorator Pattern
+
+Retry logic for transient failures:
+
+```python
+# pipeline/ai_provider_wrapper.py
+def get_embeddings_with_retry(
+    texts: List[str],
+    retries: int = 3,
+    batch_size: int = 50,
+) -> List[Optional[List[float]]]:
+    """Return embeddings with automatic retry on failure."""
+    for attempt in range(1, retries + 1):
+        try:
+            return _embedder(texts, batch_size=len(texts))
+        except Exception as exc:
+            if attempt == retries:
+                break
+            time.sleep(backoff * (2 ** (attempt - 1)))
+    return [None] * len(texts)  # Safe fallback
+```
+
+## Data Patterns
+
+### Batch Processing
+Process items in chunks to manage memory and API limits:
+```python
+for i in range(0, len(items), batch_size):
+    chunk = items[i:i + batch_size]
+    process_batch(chunk)
+```
+
+### Caching
+Pre-compute and store expensive results:
+```python
+# SimilarityCache table stores computed similarities
+db.get_similarity(motion_a, motion_b)
+```
+
+### Lazy Loading
+Load data only when needed:
+```python
+class MotionDatabase:
+    @property
+    def _connection(self):
+        if self._conn is None:
+            self._conn = duckdb.connect(self.db_path)
+        return self._conn
+```
+
+### Vectorization
+Use numpy for batch operations:
+```python
+vectors = np.array([v for v in entity_vectors.values()])
+normalized = vectors / np.linalg.norm(vectors, axis=1, keepdims=True)
+```
--- a/.mindmodel/patterns/database.yaml
+++ b/.mindmodel/patterns/database.yaml
@ -0,0 +1,239 @@
+# DuckDB Database Patterns
+
+## Connection Management
+
+### Pattern 1: Short-lived per Method (Most Common)
+
+Always create a new connection, use try/finally for cleanup:
+
+```python
+# database.py
+class MotionDatabase:
+    def get_motion(self, motion_id: int) -> Optional[Dict]:
+        conn = duckdb.connect(self.db_path)
+        try:
+            result = conn.execute(
+                "SELECT * FROM motions WHERE id = ?", 
+                (motion_id,)
+            ).fetchone()
+            conn.close()
+            return result
+        except Exception:
+            conn.close()
+            return None
+    
+    def get_filtered_motions(
+        self, 
+        policy_area: str = "Alle",
+        min_margin: float = 0.0,
+        max_margin: float = 1.0,
+        limit: int = 10
+    ) -> List[Dict]:
+        conn = duckdb.connect(self.db_path)
+        try:
+            query = """
+                SELECT * FROM motions 
+                WHERE (? = 'Alle' OR policy_area = ?)
+                AND winning_margin BETWEEN ? AND ?
+                ORDER BY RANDOM()
+                LIMIT ?
+            """
+            rows = conn.execute(query, (policy_area, policy_area, min_margin, max_margin, limit)).fetchall()
+            conn.close()
+            return rows
+        except Exception:
+            conn.close()
+            return []
+```
+
+### Pattern 2: With Statement (Cleaner)
+
+```python
+def execute_query(self, query: str, params: tuple = ()):
+    with duckdb.connect(self.db_path) as conn:
+        return conn.execute(query, params).fetchall()
+```
+
+### Pattern 3: Lazy Connection Caching
+
+For frequently accessed connections:
+
+```python
+class MotionDatabase:
+    def __init__(self, db_path: str = config.DATABASE_PATH):
+        self.db_path = db_path
+        self._conn = None
+    
+    @property
+    def connection(self):
+        if self._conn is None:
+            self._conn = duckdb.connect(self.db_path)
+        return self._conn
+    
+    def close(self):
+        if self._conn:
+            self._conn.close()
+            self._conn = None
+```
+
+## Table Initialization
+
+Create tables with proper constraints and sequences:
+
+```python
+def _init_database(self):
+    conn = duckdb.connect(self.db_path)
+    
+    # Create sequence for auto-incrementing IDs
+    try:
+        conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
+    except:
+        pass
+    
+    # Create tables
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS motions (
+            id INTEGER DEFAULT nextval('motions_id_seq'),
+            title TEXT NOT NULL,
+            description TEXT,
+            date DATE,
+            policy_area TEXT,
+            voting_results JSON,
+            winning_margin FLOAT,
+            controversy_score FLOAT,
+            layman_explanation TEXT,
+            externe_identifier TEXT,
+            body_text TEXT,
+            url TEXT UNIQUE,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            PRIMARY KEY (id)
+        )
+    """)
+    
+    # Add columns to existing tables safely
+    try:
+        conn.execute("ALTER TABLE motions ADD COLUMN IF NOT EXISTS body_text TEXT")
+    except Exception:
+        pass  # Column may already exist
+    
+    conn.close()
+```
+
+## JSON Column Handling
+
+Store and retrieve JSON data:
+
+```python
+# Insert JSON
+def store_motion(self, motion: Dict):
+    conn = duckdb.connect(self.db_path)
+    try:
+        conn.execute(
+            "INSERT INTO motions (title, voting_results) VALUES (?, ?)",
+            (motion["title"], json.dumps(motion["voting_results"]))
+        )
+        conn.close()
+    except Exception:
+        conn.close()
+
+# Query JSON
+def get_motions_with_votes(self, party: str) -> List[Dict]:
+    conn = duckdb.connect(self.db_path)
+    try:
+        rows = conn.execute("""
+            SELECT title, voting_results 
+            FROM motions 
+            WHERE JSON_EXTRACT(voting_results, '$.party') = ?
+        """, (party,)).fetchall()
+        conn.close()
+        return rows
+    except Exception:
+        conn.close()
+        return []
+```
+
+## Query Patterns
+
+### Parameterized Queries (Always!)
+```python
+# SAFE - uses parameterized query
+conn.execute("SELECT * FROM motions WHERE id = ?", (motion_id,))
+
+# AVOID - SQL injection risk
+# conn.execute(f"SELECT * FROM motions WHERE id = {motion_id}")  # BAD!
+```
+
+### Batch Inserts
+```python
+def bulk_insert_motions(self, motions: List[Dict]):
+    conn = duckdb.connect(self.db_path)
+    try:
+        for motion in motions:
+            conn.execute(
+                """INSERT OR IGNORE INTO motions 
+                   (title, date, policy_area) VALUES (?, ?, ?)""",
+                (motion["title"], motion["date"], motion["policy_area"])
+            )
+        conn.close()
+    except Exception:
+        conn.close()
+```
+
+### Aggregation Queries
+```python
+def get_party_vote_stats(self, party: str) -> Dict:
+    conn = duckdb.connect(self.db_path)
+    try:
+        result = conn.execute("""
+            SELECT 
+                COUNT(*) as total_votes,
+                SUM(CASE WHEN vote = 'Voor' THEN 1 ELSE 0 END) as voor,
+                SUM(CASE WHEN vote = 'Tegen' THEN 1 ELSE 0 END) as tegen
+            FROM mp_votes
+            WHERE party = ?
+        """, (party,)).fetchone()
+        conn.close()
+        return {"total": result[0], "voor": result[1], "tegen": result[2]}
+    except Exception:
+        conn.close()
+        return {"total": 0, "voor": 0, "tegen": 0}
+```
+
+## Error Handling
+
+Always close connections in finally block or with context manager:
+
+```python
+def safe_query(self, query: str, params: tuple = ()):
+    conn = None
+    try:
+        conn = duckdb.connect(self.db_path)
+        result = conn.execute(query, params).fetchall()
+        return result
+    except Exception as e:
+        _logger.error(f"Query failed: {e}")
+        return []
+    finally:
+        if conn:
+            conn.close()
+```
+
+## Testing with Mock
+
+For unit tests without DuckDB:
+
+```python
+# In MotionDatabase.__init__
+def __init__(self, db_path: str = config.DATABASE_PATH):
+    self.db_path = db_path
+    self._file_mode = duckdb is None
+    
+    if duckdb is None:
+        # Create JSON fallback files
+        for p in (f"{db_path}.embeddings.json", f"{db_path}.similarity_cache.json"):
+            if not os.path.exists(p):
+                with open(p, "w") as fh:
+                    fh.write("[]")
+    else:
+        self._init_database()
+```
--- a/.mindmodel/patterns/duckdb-access.md
+++ b/.mindmodel/patterns/duckdb-access.md
@ -0,0 +1,79 @@
+---
+title: DuckDB Access Pattern
+category: patterns
+---
+# DuckDB Access Pattern
+
+## Rules
+
+- Prefer using read_only=True for compute-only subprocesses (e.g., SVD compute) to allow concurrent readers.
+- Prefer "with duckdb.connect(db_path, read_only=True) as conn" for scoped connections so conn.close() is automatic.
+- If a long-lived connection is created at module level, provide explicit close() or ensure operation is safe for Streamlit's lifecycle.
+- Prefer parameterizing db_path in pipelines and creating connections locally (avoid global connections that cross threads).
+
+## Examples
+
+### database.py - Explicit connect/close for schema init
+
+```python
+conn = duckdb.connect(self.db_path)
+...
+conn.execute("""
+    CREATE TABLE IF NOT EXISTS fused_embeddings (
+        id INTEGER DEFAULT nextval('fused_embeddings_id_seq'),
+        motion_id INTEGER NOT NULL,
+        window_id TEXT NOT NULL,
+        vector JSON NOT NULL,
+        svd_dims INTEGER NOT NULL,
+        text_dims INTEGER NOT NULL,
+        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+        PRIMARY KEY (id)
+    )
+""")
+conn.close()
+```
+
+### pipeline/svd_pipeline.py - Read-only connection
+
+```python
+conn = duckdb.connect(db_path, read_only=True)
+try:
+    rows = conn.execute(
+        "SELECT motion_id, mp_name, vote FROM mp_votes WHERE date BETWEEN ? AND ?",
+        (start_date, end_date),
+    ).fetchall()
+finally:
+    conn.close()
+```
+
+### similarity/compute.py - Preferred 'with' context
+
+```python
+try:
+    import duckdb
+except Exception:
+    logger.exception("duckdb import failed; cannot load vectors")
+    return 0
+
+with duckdb.connect(db.db_path) as conn:
+    rows = conn.execute(query, params).fetchall()
+```
+
+## Anti-Patterns
+
+### Bad: Connection without closure
+
+```python
+# BAD: connection may leak if exception occurs before explicit close
+conn = duckdb.connect(db_path)
+rows = conn.execute("SELECT ...").fetchall()
+# missing finally/close
+```
+
+**Remediation**: Use "with" context or ensure conn.close() in finally block.
+
+### Bad: Parallel write connections
+
+**Problem**: Opening write connections from many parallel workers without coordination.
+
+**Remediation**: Open read_only for compute processes and centralize writes via short-lived connections or a single writer worker.
--- a/.mindmodel/patterns/embeddings-similarity.md
+++ b/.mindmodel/patterns/embeddings-similarity.md
@ -0,0 +1,74 @@
+---
+title: Embeddings Similarity Pipeline
+category: patterns
+---
+# Embeddings Similarity Pipeline
+
+## Rules
+
+- Keep embedding calls batched where possible; fallback to per-item attempts on persistent batch failure.
+- Store raw embeddings, SVD vectors, and fused_embeddings separately; fused_embeddings are typically concatenation [svd + text].
+- Compute similarity as normalized cosine on padded vectors; record top-k neighbors in similarity_cache.
+- Use read_only DuckDB connections in compute workers to allow parallel runs.
+
+## Examples
+
+### pipeline/ai_provider_wrapper.py - Batched embed + fallback
+
+```python
+for start in range(0, len(texts), batch_size):
+    chunk = texts[start : start + batch_size]
+    resp = _post_with_retries("/embeddings", json={"model": model, "input": chunk})
+...
+for j in range(i, end):
+    t = texts[j]
+    single, single_exc = _attempt_batch([t], j)
+    if single:
+        results[j] = single[0]
+```
+
+### pipeline/fusion.py - Concatenation and storage
+
+```python
+try:
+    svd_vec = json.loads(svd_json)
+except Exception:
+    _logger.exception("Invalid SVD vector JSON for entity %s", entity_id)
+    skipped_missing_svd += 1
+    continue
+...
+fused = list(svd_vec) + list(text_vec)
+res = db.store_fused_embedding(
+    int(entity_id),
+    window_id,
+    fused,
+    svd_dims=len(svd_vec),
+    text_dims=len(text_vec),
+)
+```
+
+### similarity/compute.py - Normalized cosine similarity
+
+```python
+# Normalize rows
+norms = np.linalg.norm(matrix, axis=1, keepdims=True)
+norms[norms == 0] = 1.0
+normalized = matrix / norms
+sim = normalized @ normalized.T
+...
+# pick top-k neighbors and write to similarity_cache
+```
+
+## Anti-Patterns
+
+### Bad: Assuming consistent vector length
+
+**Problem**: Assuming consistent vector length without checks leads to shape errors.
+
+**Remediation**: Detect inconsistent lengths, pad with zeros, and log a warning (as seen in compute.py).
+
+### Bad: Inline heavy computation in UI
+
+**Problem**: Recomputing heavy pipelines inline in UI requests.
+
+**Remediation**: Schedule heavy work in scripts/subprocesses and read precomputed results in UI.
--- a/.mindmodel/patterns/error-handling.md
+++ b/.mindmodel/patterns/error-handling.md
@ -0,0 +1,63 @@
+---
+title: Error Handling Pattern
+category: patterns
+---
+# Error Handling Pattern
+
+## Rules
+
+- Use explicit exceptions for domain/error classification (e.g., ProviderError, ValueError).
+- Prefer logging.exception when catching an exception where stack trace is useful.
+- Avoid broad except: clauses that swallow exceptions; if broad except is used for "best-effort" fallback, log at warning and include original exception context.
+- For public library-like functions, prefer raising typed exceptions instead of returning magic values ([], False) — only return safe defaults where documented.
+
+## Examples
+
+### ai_provider.py - Network error to ProviderError
+
+```python
+except requests.ConnectionError as exc:
+    if attempt == retries:
+        raise ProviderError(
+            f"Connection error when calling provider: {exc}"
+        ) from exc
+    ...
+```
+
+### pipeline/ai_provider_wrapper.py - Best-effort with logging
+
+```python
+except Exception:
+    _logger.exception("Failed to append audit event for embedding failure")
+results[j] = None
+```
+
+### similarity/compute.py - Defensive import handling
+
+```python
+try:
+    import duckdb
+except Exception:
+    logger.exception("duckdb import failed; cannot load vectors")
+    return 0
+```
+
+## Anti-Patterns
+
+### Bad: Silent exception swallowing
+
+```python
+try:
+    do_work()
+except Exception:
+    return []
+# BAD: hides the root cause and returns an ambiguous default
+```
+
+**Remediation**: Narrow exception types or at minimum log.exception() and re-raise or convert to a domain error if truly handled.
+
+### Bad: Mixing print() and logging
+
+**Problem**: Mixing print() and logging for errors.
+
+**Remediation**: Replace print() calls with logger.* calls; use structured logging configuration.
--- a/.mindmodel/patterns/module-singletons.md
+++ b/.mindmodel/patterns/module-singletons.md
@ -0,0 +1,41 @@
+---
+title: Module Singletons Pattern
+category: patterns
+---
+# Module Singletons Pattern
+
+## Rules
+
+- Module-level singletons (e.g., db = MotionDatabase()) are acceptable but should be created carefully:
+  - Avoid expensive initialization at import time.
+  - Provide a way to construct with a test DB path or to reinitialize in tests.
+- If a singleton holds resources (DB connections, sessions), ensure safe shutdown on program exit.
+
+## Examples
+
+### database.py - Safe class initialization
+
+```python
+class MotionDatabase:
+    def __init__(self, db_path: str = config.DATABASE_PATH):
+        self.db_path = db_path
+        # If duckdb is not available, operate in lightweight file-backed mode
+        self._file_mode = duckdb is None
+        self._init_database()
+```
+
+### similarity/lookup.py - Local instances
+
+```python
+db = MotionDatabase(db_path=db_path) if db_path else MotionDatabase()
+if hasattr(db, "get_cached_similarities"):
+    rows = db.get_cached_similarities(...)
+```
+
+## Anti-Patterns
+
+### Bad: Heavy initialization at import time
+
+**Problem**: Creating connections and performing heavy schema migrations during import.
+
+**Remediation**: Move heavy init to an explicit initialize() method and keep import fast.
--- a/.mindmodel/patterns/patterns.yaml
+++ b/.mindmodel/patterns/patterns.yaml
@ -0,0 +1,228 @@
+# Code Patterns
+
+## 1. Page Wrapper Pattern
+Thin Streamlit page files delegate to core modules. Pages contain only route logic, not business logic.
+
+**Example** (pages/1_🗳️_Stemwijzer.py):
+```python
+import streamlit as st
+from quiz_module import render_quiz_page
+
+st.set_page_config(...)
+render_quiz_page()
+```
+
+**Example** (pages/2_🔍_Explorer.py):
+```python
+import streamlit as st
+from explorer import render_explorer
+
+st.set_page_config(...)
+render_explorer()
+```
+
+**Rule**: Pages should have <20 lines of logic. All complexity lives in modules.
+
+---
+
+## 2. Pipeline Pattern
+Data flows: fetch → transform → store
+
+**Location**: `pipeline/` directory
+
+**Pattern**:
+```python
+def run_pipeline():
+    raw_data = fetch_from_source()
+    transformed = transform(raw_data)
+    store(transformed)
+
+def fetch_from_source():
+    # API call or DB query
+    ...
+
+def transform(raw):
+    # Clean, normalize, compute derived fields
+    ...
+```
+
+**Usage**: SVD computation pipeline, data ingestion, motion processing
+
+---
+
+## 3. API Client Pattern
+HTTP client with retry/backoff for external data sources.
+
+**Pattern**:
+```python
+import time
+import requests
+
+def fetch_with_retry(url, max_retries=3):
+    for attempt in range(max_retries):
+        try:
+            response = requests.get(url)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException:
+            if attempt < max_retries - 1:
+                time.sleep(2 ** attempt)  # exponential backoff
+            else:
+                raise
+```
+
+---
+
+## 4. Pure Helper Functions
+Functions in `explorer_helpers.py` have no side effects, no IO.
+
+**Pattern**:
+```python
+def compute_party_coords(svd_df, party_map, window):
+    """Pure function: same inputs → same outputs, no side effects."""
+    # Filter, compute, return
+    return result_df
+
+def build_scatter_trace(df, color_col, marker_size=8):
+    """Pure: returns Plotly trace dict, no rendering."""
+    trace = go.Scatter(x=df.x, y=df.y, mode='markers', ...)
+    return trace
+```
+
+**Rule**: No `import streamlit` in helper modules. No file I/O. No global state.
+
+---
+
+## 5. Dummy Fallbacks for Optional Dependencies
+Gracefully degrade when optional packages are unavailable.
+
+**Pattern**:
+```python
+try:
+    import umap
+    HAS_UMAP = True
+except ImportError:
+    HAS_UMAP = False
+    # or provide dummy stub
+
+def project_to_2d(vectors):
+    if HAS_UMAP:
+        return umap.UMAP().fit_transform(vectors)
+    else:
+        return vectors[:, :2]  # fallback: just take first 2 dims
+```
+
+**Used for**: UMAP, Plotly (with fallback to altair or text-only)
+
+---
+
+## 6. Cached Data Loaders
+Expensive DB queries wrapped with `@st.cache_data`.
+
+**Pattern**:
+```python
+@st.cache_data
+def load_svd_vectors(window: str) -> pd.DataFrame:
+    return db.query("SELECT * FROM svd_vectors WHERE window = ?", window)
+
+@st.cache_data
+def load_party_centroids(window: str) -> pd.DataFrame:
+    return db.query("SELECT * FROM party_centroids WHERE window = ?", window)
+
+# Clear cache when data updates
+@st.cache_data
+def load_motions(category: str | None = None) -> pd.DataFrame:
+    ...
+```
+
+**Rule**: Use `ttl=3600` for large datasets. Use `show_spinner=False` where appropriate.
+
+---
+
+## 7. Plotly Dual-Layer Charts
+Charts built with two traces: scatter points + text annotations.
+
+**Pattern**:
+```python
+def build_dual_layer_chart(df, x_col, y_col, label_col):
+    # Layer 1: markers
+    scatter = go.Scatter(
+        x=df[x_col], y=df[y_col],
+        mode='markers',
+        marker=dict(size=10, color=df['color']),
+        name='Parties'
+    )
+    # Layer 2: labels (smaller, non-hoverable)
+    labels = go.Scatter(
+        x=df[x_col], y=df[y_col],
+        mode='text',
+        text=df[label_col],
+        textposition='top center',
+        showlegend=False
+    )
+    return [scatter, labels]
+```
+
+**Used in**: Explorer tab charts, party position plots
+
+---
+
+## 8. Singleton Module Instances
+One shared instance per module, created at import time.
+
+**Pattern**:
+```python
+# database.py
+class MotionDatabase:
+    def __init__(self, db_path=None):
+        self.conn = ibis.duckdb.connect(db_path)
+        self._load_schema()
+
+_db = None
+def get_db():
+    global _db
+    if _db is None:
+        _db = MotionDatabase()
+    return _db
+
+# At module bottom:
+db = MotionDatabase()  # singleton instance
+```
+
+**Also used in**: `config.py` exports `config` and `PARTY_COLOURS`
+
+---
+
+## 9. Dataclass Config Pattern
+Configuration centralized in a `@dataclass`.
+
+**Pattern**:
+```python
+from dataclasses import dataclass, field
+
+@dataclass
+class Config:
+    db_path: str = "data/stemwijzer.duckdb"
+    default_window: str = "2023"
+    cache_ttl: int = 3600
+    party_colours: dict = field(default_factory=lambda: PARTY_COLOURS)
+
+    def __post_init__(self):
+        if not Path(self.db_path).exists():
+            raise FileNotFoundError(f"Database not found: {self.db_path}")
+```
+
+---
+
+## 10. Graceful Degradation with try/except
+Core pattern throughout: attempt operation, fall back gracefully.
+
+**Pattern**:
+```python
+def get_political_position(mp_name, window):
+    try:
+        vectors = load_svd_vectors(window)
+        return vectors[vectors['mp_name'] == mp_name]['vector_2d'].iloc[0]
+    except (KeyError, IndexError):
+        return [0.0, 0.0]  # neutral fallback
+```
--- a/.mindmodel/patterns/python.yaml
+++ b/.mindmodel/patterns/python.yaml
@ -0,0 +1,196 @@
+# Python-Specific Patterns
+
+## Singleton Pattern
+
+Use module-level instances for shared resources:
+
+```python
+# database.py
+class MotionDatabase:
+    def __init__(self, db_path: str = config.DATABASE_PATH):
+        self.db_path = db_path
+        self._init_database()
+    
+    def _init_database(self):
+        # Initialize tables on first instantiation
+        ...
+
+# Bottom of file - the singleton
+db = MotionDatabase()
+```
+
+**Usage across the codebase:**
+```python
+# In other modules
+from database import db
+
+def some_function():
+    motions = db.get_filtered_motions(limit=10)
+    return motions
+```
+
+Similarly for other singletons:
+```python
+# summarizer.py
+class MotionSummarizer:
+    def __init__(self):
+        pass  # Stateless
+    
+    def generate_layman_explanation(self, title: str, body: str) -> str:
+        ...
+
+summarizer = MotionSummarizer()
+```
+
+## Dataclass Config Pattern
+
+Use dataclass for configuration with environment variable support:
+
+```python
+# config.py
+from dataclasses import dataclass
+from typing import List
+import os
+
+@dataclass
+class Config:
+    # Database settings
+    DATABASE_PATH = "data/motions.db"
+    
+    # API settings
+    TWEEDE_KAMER_ODATA_API = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0"
+    API_TIMEOUT = 30
+    API_BATCH_SIZE = 250
+    
+    # AI settings
+    OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
+    OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
+    QWEN_MODEL = "qwen/qwen-2.5-72b-instruct"
+    
+    # App settings
+    DEFAULT_MOTION_COUNT = 10
+    SESSION_TIMEOUT_DAYS = 30
+    
+    # Policy areas
+    POLICY_AREAS: List[str] = None
+    def __post_init__(self):
+        self.POLICY_AREAS = [
+            "Alle", "Economie", "Klimaat", "Immigratie", 
+            "Zorg", "Onderwijs", "Defensie", "Sociale Zaken", "Algemeen"
+        ]
+
+config = Config()
+```
+
+**Usage:**
+```python
+from config import config
+
+# Access as attributes
+timeout = config.API_TIMEOUT
+areas = config.POLICY_AREAS
+```
+
+## DuckDB Connection Pattern
+
+Short-lived connections with explicit cleanup:
+
+```python
+class MotionDatabase:
+    def get_motion(self, motion_id: int) -> Optional[Dict]:
+        conn = duckdb.connect(self.db_path)
+        try:
+            result = conn.execute(
+                "SELECT * FROM motions WHERE id = ?", 
+                (motion_id,)
+            ).fetchone()
+            return result
+        finally:
+            conn.close()
+    
+    def get_filtered_motions(self, **kwargs) -> List[Dict]:
+        conn = duckdb.connect(self.db_path)
+        try:
+            rows = conn.execute(query, params).fetchall()
+            return rows
+        except Exception:
+            return []  # Safe fallback
+        finally:
+            conn.close()
+```
+
+**Context manager alternative (preferred when applicable):**
+```python
+def some_operation(self):
+    with duckdb.connect(self.db_path) as conn:
+        result = conn.execute("SELECT ...").fetchall()
+    return result
+```
+
+## Try/Except with Fallback Pattern
+
+Always provide safe fallbacks:
+
+```python
+def get_motion_or_default(self, motion_id: int) -> Dict:
+    try:
+        conn = duckdb.connect(self.db_path)
+        result = conn.execute("SELECT * FROM motions WHERE id = ?", (motion_id,)).fetchone()
+        conn.close()
+        return result if result else {}
+    except Exception:
+        return {}
+```
+
+## Optional Import Pattern
+
+Handle optional dependencies gracefully:
+
+```python
+try:
+    import duckdb
+except Exception:  # pragma: no cover
+    duckdb = None
+
+class MotionDatabase:
+    def __init__(self, db_path: str = config.DATABASE_PATH):
+        self._file_mode = duckdb is None
+        ...
+```
+
+## Property Pattern
+
+Lazy initialization of expensive resources:
+
+```python
+class MotionDatabase:
+    def __init__(self, db_path: str = config.DATABASE_PATH):
+        self.db_path = db_path
+        self._session_cache = None
+    
+    @property
+    def session(self):
+        """Lazy-load expensive resources."""
+        if self._session_cache is None:
+            self._session_cache = self._create_session()
+        return self._session_cache
+```
+
+## Type Annotation Patterns
+
+```python
+from typing import Dict, List, Optional, Tuple, Any
+
+# Optional with None default
+def get_motion(self, motion_id: Optional[int] = None) -> Optional[Dict]:
+    ...
+
+# Multiple return types
+def parse_vote(self, vote_str: str) -> Tuple[bool, str]:
+    """Returns (success, error_message)"""
+    ...
+
+# Generic types
+def get_batch(self, ids: List[int]) -> Dict[str, Any]:
+    ...
+```
--- a/.mindmodel/patterns/requests-http.md
+++ b/.mindmodel/patterns/requests-http.md
@ -0,0 +1,77 @@
+---
+title: Requests HTTP Pattern
+category: patterns
+---
+# Requests HTTP Pattern
+
+## Rules
+
+- Reuse requests.Session when making multiple calls to the same host to benefit from connection pooling.
+- Wrap outbound HTTP calls with retry/backoff logic and respect Retry-After on 429.
+- Treat 5xx as transient and retry; surface 4xx as configuration/client errors (do not retry unless 429).
+- Raise or wrap non-OK responses into domain ProviderError to make behavior consistent across the codebase.
+
+## Examples
+
+### ai_provider.py - 429 handling with Retry-After
+
+```python
+resp = requests.post(url, json=json, headers=headers, timeout=10)
+...
+if getattr(resp, "status_code", 0) == 429:
+    if attempt == retries:
+        raise ProviderError(f"Provider returned HTTP {resp.status_code}")
+    retry_after = None
+    raw = resp.headers.get("Retry-After") if getattr(resp, "headers", None) else None
+    if raw:
+        try:
+            retry_after = int(raw)
+        except Exception:
+            ...
+    if retry_after is not None:
+        time.sleep(retry_after)
+        continue
+```
+
+### api_client.py - Session + raise_for_status
+
+```python
+response = self.session.get(
+    base_url, params=params, timeout=config.API_TIMEOUT
+)
+response.raise_for_status()
+data = response.json()
+```
+
+### pipeline/ai_provider_wrapper.py - Retry/backoff wrapper
+
+```python
+def _attempt_batch(chunk_texts, start_index):
+    backoff = 0.5
+    for attempt in range(1, retries + 1):
+        try:
+            emb_chunk = _embedder(
+                chunk_texts, model=model, batch_size=len(chunk_texts)
+            )
+            return emb_chunk, None
+        except Exception as exc:
+            if attempt == retries:
+                break
+            sleep = backoff * (2 ** (attempt - 1))
+            time.sleep(sleep)
+            continue
+```
+
+## Anti-Patterns
+
+### Bad: Silent exception swallowing
+
+**Problem**: Blindly catching all requests exceptions and returning empty response.
+
+**Remediation**: Map network exceptions to retryable vs terminal (ProviderError) and log details.
+
+### Bad: Using print() for errors
+
+**Problem**: Using print() for network errors instead of structured logging.
+
+**Remediation**: Use `_logger.exception()` instead (see api_client.py needs fixing).
--- a/.mindmodel/patterns/streamlit.yaml
+++ b/.mindmodel/patterns/streamlit.yaml
@ -0,0 +1,225 @@
+# Streamlit Patterns
+
+## Session State Initialization
+
+Always initialize session state at the start of the main function:
+
+```python
+# app.py
+import streamlit as st
+
+def main():
+    # Initialize all session state variables
+    if "session_id" not in st.session_state:
+        st.session_state.session_id = None
+    if "current_motion_index" not in st.session_state:
+        st.session_state.current_motion_index = 0
+    if "motions" not in st.session_state:
+        st.session_state.motions = []
+    if "show_results" not in st.session_state:
+        st.session_state.show_results = False
+    
+    # Rest of app...
+```
+
+## Page Configuration
+
+Set page config at the top of each page file:
+
+```python
+# pages/1_Stemwijzer.py
+import streamlit as st
+
+st.set_page_config(
+    page_title="Stemwijzer",
+    page_icon="🗳️",
+    layout="centered",
+)
+
+from explorer import build_mp_quiz_tab
+build_mp_quiz_tab("data/motions.db")
+```
+
+## Thin Page Wrapper Pattern
+
+Pages delegate to shared functions in main modules:
+
+```python
+# pages/2_Explorer.py
+import streamlit as st
+
+st.set_page_config(
+    page_title="Explorer",
+    page_icon="🔭",
+    layout="wide",
+)
+
+from explorer import build_explorer_tab
+build_explorer_tab()
+```
+
+```python
+# explorer.py
+def build_explorer_tab():
+    st.header("🔭 Politiek Explorer")
+    
+    tab1, tab2, tab3 = st.tabs([
+        "Compass", 
+        "Trajectories", 
+        "Zoeken"
+    ])
+    
+    with tab1:
+        render_compass()
+    with tab2:
+        render_trajectories()
+    with tab3:
+        render_search()
+```
+
+## Sidebar Pattern
+
+Use sidebar for configuration and navigation:
+
+```python
+# app.py
+def main():
+    with st.sidebar:
+        st.header("Instellingen")
+        
+        motion_count = st.slider(
+            "Aantal moties",
+            min_value=5,
+            max_value=25,
+            value=10,
+        )
+        
+        policy_area = st.selectbox("Beleidsgebied", config.POLICY_AREAS)
+        
+        if st.button("Start Nieuwe Sessie"):
+            start_new_session(motion_count, policy_area)
+```
+
+## Callback Pattern for State Updates
+
+Use callbacks to handle user interactions:
+
+```python
+def on_motion_vote(motion_id: int, vote: str):
+    """Callback when user votes on a motion."""
+    st.session_state.user_votes[motion_id] = vote
+    
+    # Move to next motion
+    if st.session_state.current_motion_index < len(st.session_state.motions) - 1:
+        st.session_state.current_motion_index += 1
+    else:
+        st.session_state.show_results = True
+    
+    st.rerun()
+
+# In UI
+col1, col2, col3 = st.columns(3)
+with col1:
+    st.button("👍 Voor", on_click=on_motion_vote, args=(motion_id, "Voor"))
+with col2:
+    st.button("👎 Tegen", on_click=on_motion_vote, args=(motion_id, "Tegen"))
+with col3:
+    st.button("❓ Onthouden", on_click=on_motion_vote, args=(motion_id, "Onthouden"))
+```
+
+## Container Pattern for Dynamic Content
+
+Use containers for dynamic rendering:
+
+```python
+def show_motion_interface():
+    if not st.session_state.motions:
+        st.warning("Geen moties geladen")
+        return
+    
+    current_idx = st.session_state.current_motion_index
+    motion = st.session_state.motions[current_idx]
+    
+    with st.container():
+        st.subheader(f"Motie {current_idx + 1} van {len(st.session_state.motions)}")
+        st.markdown(f"**{motion['title']}**")
+        st.caption(f"📅 {motion['date']} | 🏷️ {motion['policy_area']}")
+        
+        if motion.get("layman_explanation"):
+            st.info(motion["layman_explanation"])
+        
+        # Voting buttons...
+```
+
+## Expander Pattern for Details
+
+Use expanders for collapsible content:
+
+```python
+with st.expander("Meer details"):
+    st.markdown(f"**Beschrijving:** {motion.get('description', 'N/A')}")
+    
+    if motion.get("voting_results"):
+        results = json.loads(motion["voting_results"])
+        st.json(results)
+```
+
+## Form Pattern for Batch Updates
+
+Use forms for multiple related inputs:
+
+```python
+with st.form("session_settings"):
+    st.subheader("Sessie Instellingen")
+    
+    col1, col2 = st.columns(2)
+    with col1:
+        count = st.number_input("Aantal moties", min_value=5, max_value=25)
+    with col2:
+        area = st.selectbox("Beleidsgebied", config.POLICY_AREAS)
+    
+    submitted = st.form_submit_button("Start Sessie")
+    if submitted:
+        start_session(count, area)
+```
+
+## Caching Pattern
+
+Cache expensive computations:
+
+```python
+@st.cache_data(ttl=3600)  # Cache for 1 hour
+def load_party_positions(window_id: str) -> Dict:
+    """Load party positions from database."""
+    return db.get_party_positions(window_id)
+
+@st.cache_resource
+def init_database():
+    """Initialize database connection."""
+    return MotionDatabase(config.DATABASE_PATH)
+```
+
+## Home Page Pattern
+
+Landing page with navigation:
+
+```python
+# Home.py
+import streamlit as st
+
+st.set_page_config(
+    page_title="Motief: de stematlas",
+    page_icon="🗺️",
+    layout="centered",
+)
+
+def main():
+    st.title("🗺️ Motief: de stematlas")
+    st.markdown("**Motief** brengt de Nederlandse Tweede Kamer in kaart...")
+    
+    col1, col2 = st.columns(2)
+    with col1:
+        st.page_link("pages/1_Stemwijzer.py", label="Open Stemwijzer", icon="🗳️")
+    with col2:
+        st.page_link("pages/2_Explorer.py", label="Open Explorer", icon="🔭")
+```
--- a/.mindmodel/patterns/validation.md
+++ b/.mindmodel/patterns/validation.md
@ -0,0 +1,37 @@
+---
+title: Validation Pattern
+category: patterns
+---
+# Validation Pattern
+
+## Rules
+
+- Validate inputs early and raise ValueError or domain-specific exceptions (ProviderError) for invalid contract inputs.
+- Tests should assert that invalid inputs raise the expected exceptions.
+- Use explicit checks for types and shapes on public APIs (e.g., ensure text is str before embedding).
+
+## Examples
+
+### ai_provider.py - Type validation
+
+```python
+if not isinstance(text, str):
+    raise ProviderError("text must be a string")
+```
+
+### pipeline/ai_provider_wrapper.py - Defensive empty handling
+
+```python
+if not texts:
+    return []
+if motion_ids is None:
+    motion_ids = [None for _ in texts]
+```
+
+## Anti-Patterns
+
+### Bad: Invalid values into computation
+
+**Problem**: Allowing invalid values to propagate into heavy computation (e.g., non-string into embedding pipeline).
+
+**Remediation**: Fail fast with a typed exception and add unit tests to cover validations.
--- a/.mindmodel/stack/stack.md
+++ b/.mindmodel/stack/stack.md
@ -0,0 +1,67 @@
+---
+title: Tech Stack
+category: stack
+---
+
+# Tech Stack
+
+## Runtime & Language
+- **Python >=3.13**
+
+## Web Framework
+- **Streamlit** - Multi-page app with Home, Stemwijzer, Explorer pages
+
+## Data Layer
+- **DuckDB** - Embedded OLAP database
+  - Tables: motions, mp_votes, svd_vectors, fused_embeddings, embeddings, user_sessions, party_results, mp_metadata
+- **ibis** - ORM (referenced but DuckDB-native implementation used)
+
+## AI / LLM
+- **OpenRouter** - API abstraction for AI providers
+- **QWEN** - Primary model
+  - Embeddings: `qwen/qwen3-embedding-4b`
+  - Chat: `qwen/qwen-2.5-72b-instruct`
+- **requests** - HTTP client (not raw openai)
+
+## ML / Analytics
+- **scikit-learn** - KMeans clustering, cosine_similarity, StandardScaler
+- **scipy** - SVD (scipy.linalg.svd), spatial.procrustes
+- **umap-learn** - Dimensionality reduction (optional, graceful fallback to SVD)
+- **numpy** - Numerical computing
+
+## Visualization
+- **Plotly** - Interactive charts (go.Figure, _DummyTrace fallback)
+- **matplotlib** - Static plotting (optional)
+
+## HTTP & Parsing
+- **requests** - Session pooling, retry with backoff
+- **beautifulsoup4** - HTML parsing
+- **lxml** - XML/HTML processing
+
+## Key Source Files
+
+| File | Purpose |
+|------|---------|
+| `database.py` | MotionDatabase singleton, DuckDB connection, 9-table schema |
+| `explorer.py` | Explorer page with 4 tabs (Motion, MP, Party, Evolution) |
+| `explorer_helpers.py` | Pure helper functions, Plotly chart builders |
+| `analysis/` | SVD pipeline, UMAP projection, clustering |
+| `pipeline/` | Data fetch, transform, store pipeline |
+| `pages/1_Stemwijzer.py` | Quiz page |
+| `pages/2_Explorer.py` | Explorer page |
+| `config.py` | Dataclass Config pattern |
+| `ai_provider.py` | OpenRouter API wrapper with retry |
+| `api_client.py` | TweedeKamer OData API client |
+
+## Singleton Instances
+
+| Module | Instance | Type |
+|--------|----------|------|
+| `database.py` | `db` | `MotionDatabase` |
+| `config.py` | `config` | `Config` (dataclass) |
+| `config.py` | `PARTY_COLOURS` | `dict[str, str]` |
+
+## Environment
+- Python >=3.13
+- Environment variables via `.env` (DB path, API keys)
+- No `.env` values in constraint files (security)
--- a/.mindmodel/system.md
+++ b/.mindmodel/system.md
@ -0,0 +1,88 @@
+# System Overview
+
+## Project: Stemwijzer (Dutch Political Voting Compass)
+
+**Purpose**: A web application that maps the Dutch Tweede Kamer (House of Representatives) based on real parliamentary votes, helping citizens discover which political party aligns best with their views.
+
+## Architecture Summary
+
+### Data Flow
+```
+TweedeKamer OData API
+        ↓
+  API Client (api_client.py)
+        ↓
+  DuckDB Database (database.py)
+        ↓
+  Pipeline Processing (pipeline/)
+        ├── fetch_mp_metadata     # MP party + tenure
+        ├── extract_mp_votes     # voting_results → mp_votes
+        ├── svd_pipeline          # SVD on vote matrix + Procrustes
+        ├── text_pipeline         # AI embeddings via OpenRouter
+        └── fusion                # Combine SVD + text vectors
+        ↓
+  Streamlit Web App (Home.py, pages/)
+        ├── Home.py               # Landing page
+        ├── 1_Stemwijzer.py       # Voting quiz
+        └── 2_Explorer.py        # Political compass explorer
+```
+
+### Key Components
+
+| Component | Purpose | File(s) |
+|-----------|---------|---------|
+| **Database** | Motion storage, MP votes, embeddings | `database.py` |
+| **API Client** | TweedeKamer OData API integration | `api_client.py` |
+| **AI Provider** | OpenRouter API for embeddings/summaries | `ai_provider.py` |
+| **Pipeline** | Orchestrated data processing | `pipeline/run_pipeline.py` |
+| **Analysis** | SVD, clustering, trajectory computation | `analysis/*.py` |
+| **Explorer Helpers** | Pure functions, chart builders | `explorer_helpers.py` |
+| **Web App** | Streamlit UI | `Home.py`, `pages/*.py` |
+
+### Tech Stack
+
+- **Language**: Python 3.13+
+- **Web Framework**: Streamlit (multi-page app)
+- **Database**: DuckDB with ibis ORM (DuckDB-native implementation)
+- **ML/Analytics**: scipy (SVD, Procrustes), scikit-learn (KMeans, cosine_similarity), umap-learn (optional)
+- **AI/LLM**: OpenRouter-compatible API (QWEN embeddings + chat)
+- **Visualization**: Plotly (interactive charts), matplotlib (optional)
+- **HTTP**: requests with Session pooling and retry
+- **Parsing**: beautifulsoup4, lxml
+
+### Key Patterns
+
+1. **Module-Level Singletons**: `db = MotionDatabase()`, `config = Config()`
+2. **Repository Pattern**: MotionDatabase class with method-per-query
+3. **Service Layer**: TweedeKamerAPI, ai_provider with retry/backoff
+4. **Pipeline Orchestration**: ThreadPoolExecutor for parallel SVD
+5. **Short-Lived Connections**: DuckDB connections in try/finally blocks
+6. **Graceful Degradation**: try/except around optional dependencies
+
+### Domain Invariants
+
+⚠️ **CRITICAL RULES** (from AGENTS.md):
+
+1. **Right-wing parties on RIGHT**: PVV, FVD, JA21, SGP must appear on RIGHT side of all axes in visualizations
+2. **SVD labels = voting patterns**: SVD labels reflect voting patterns, NOT semantic content
+
+### Database Tables
+
+| Table | Purpose |
+|-------|---------|
+| `motions` | Parliamentary motions with id, title, date, category |
+| `mp_votes` | Individual MP votes on motions (Voor/Tegen/Onthouden) |
+| `mp_metadata` | MP names, parties, tenure info |
+| `svd_vectors` | 2D SVD-computed political positions per entity |
+| `fused_embeddings` | Combined SVD + text embeddings |
+| `embeddings` | Text embeddings for motions |
+| `user_sessions` | Voting session tracking |
+| `party_results` | Party match results per session |
+
+### Conventions
+
+- **Error Handling**: Catch `Exception`, return safe fallbacks (False/[]/None)
+- **Logging**: Use `logging.getLogger(__name__)` — **never use print()**
+- **Imports**: stdlib → 3rd party → local (3 groups)
+- **Type Hints**: Required on public functions with typing module imports
+- **DuckDB**: Short-lived connections with try/finally conn.close()
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,18 +1,17 @@
+# Minimal pre-commit config stub
+# This file is intentionally minimal and does not enable hooks by installing them.
 repos:
  - repo: https://github.com/psf/black
-    rev: 25.1.0
+    rev: 23.9.1
    hooks:
      - id: black
-        language_version: python3.13

-  - repo: https://github.com/charliermarsh/ruff-pre-commit
+  - repo: https://github.com/charliermarsh/ruff
    rev: v0.11.1
    hooks:
      - id: ruff
-        args: [--fix]

  - repo: https://github.com/PyCQA/isort
-    rev: 6.0.1
+    rev: 5.12.0
    hooks:
      - id: isort
-        args: [--profile, black]
--- a/.streamlit/config.toml
+++ b/.streamlit/config.toml
@ -1,9 +0,0 @@
-[theme]
-primaryColor = "#00d9a3"
-backgroundColor = "#0d1117"
-secondaryBackgroundColor = "#161b22"
-textColor = "#e6edf3"
-font = "sans serif"
-
-[ui]
-showDeployButton = false
--- a/AGENTS.md
+++ b/AGENTS.md
@ -4,16 +4,6 @@

 `docs/solutions/` — documented solutions to past problems (bugs, best practices, workflow patterns), organized by category with YAML frontmatter (`module`, `tags`, `problem_type`). Relevant when implementing or debugging in documented areas.

-## Infrastructure Notes
-
- Git is hosted on a **Gitea** server, not GitHub directly. The `gh` CLI is not available for this repo; use standard `git` commands instead.
-
-## Agent Tools
-
-`agent_tools/` — atomic primitives that let an agent operate the Stemwijzer pipeline, database, and analysis surface. The agent-native architecture track (see STRATEGY.md) exposes every human operator capability through these tools.
-
-**When operating on the database, pipeline, or analysis surface, always prefer `agent_tools` over ad-hoc SQL or direct module calls.** Use `agent_tools.list_tools()` for runtime discovery. For the full agent persona and decision criteria, see `agent_tools/SYSTEM_PROMPT.md`.
-
 ## Project Conventions

 - Right-wing parties (PVV, FVD, JA21, SGP) must appear on the RIGHT side of all axes in visualizations
--- a/32
+++ b/32
@ -0,0 +1,32 @@
+FROM python:3.13-slim
+
+# Install minimal system deps
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends build-essential curl ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create non-root user for running the app
+RUN useradd -m -s /bin/bash app
+
+WORKDIR /home/app/app
+
+# Copy project files
+COPY . /home/app/app
+
+# Upgrade pip and install all project dependencies from pyproject.toml
+RUN python -m pip install --upgrade pip
+RUN pip install .
+
+# Fix permissions
+RUN chown -R app:app /home/app
+
+USER app
+ENV PYTHONPATH=/home/app/app
+
+EXPOSE 8501
+
+# Simple healthcheck that queries the Streamlit root
+HEALTHCHECK --interval=30s --timeout=3s --start-period=10s CMD curl -f http://localhost:8501/ || exit 1
+
+# Run the multi-page Streamlit app
+CMD ["streamlit", "run", "Home.py", "--server.port=8501", "--server.address=0.0.0.0"]
--- a/Home.py
+++ b/Home.py
@ -1,51 +1,53 @@
-"""StemAtlas — navigation entry point.
+"""StemAtlas — home page.

-Uses st.navigation() for explicit control over page order and default page.
-Run with: uv run streamlit run Home.py
+Entry point for the Streamlit multi-page app. Shows a landing page with
+brief descriptions of and links to the two sub-pages.
 """

 import streamlit as st

 st.set_page_config(
-    page_title="StemAtlas",
-    page_icon=None,
+    page_title="Motief: de stematlas",
+    page_icon="🗺️",
    layout="centered",
+    initial_sidebar_state="expanded",
 )

-# Hide Streamlit chrome and add mobile-friendly styles.
+
+def main() -> None:
+    st.title("🗺️ Motief: de stematlas")
    st.markdown(
-    """
-    <style>
-    .stAppDeployButton { display: none !important; }
-    .stStatusWidget { display: none !important; }
-    header [data-testid="stToolbar"] { display: none !important; }
-
-    /* Mobile-friendly touch targets and readability */
-    @media (max-width: 768px) {
-        .stButton button {
-            min-height: 48px !important;
-            font-size: 16px !important;
-        }
-        .stRadio label {
-            font-size: 16px !important;
-        }
-        .stSelectbox label, .stSlider label, .stNumberInput label {
-            font-size: 15px !important;
-        }
-        h1 { font-size: 1.6rem !important; }
-        h2 { font-size: 1.3rem !important; }
-        h3 { font-size: 1.1rem !important; }
-    }
-
-    /* Prevent horizontal overflow */
-    .stApp { max-width: 100vw; overflow-x: hidden; }
-    </style>
-    """,
-    unsafe_allow_html=True,
+        "**Motief** brengt de Nederlandse Tweede Kamer in kaart op basis van "
+        "echte stemmingen over moties. Gebruik de Stemwijzer om te ontdekken welke "
+        "partij het beste bij jouw standpunten past, of verken de politieke ruimte "
+        "zelf in de Explorer."
+    )
+
+    st.divider()
+
+    col1, col2 = st.columns(2)
+
+    with col1:
+        st.subheader("🗳️ Stemwijzer")
+        st.markdown(
+            "Stem op echte Tweede Kamer moties en zie welke partij het "
+            "dichtst bij jouw keuzes staat."
+        )
+        st.page_link("pages/1_Stemwijzer.py", label="Open Stemwijzer", icon="🗳️")
+
+    with col2:
+        st.subheader("🔭 Politiek Explorer")
+        st.markdown(
+            "Verken het politieke kompas, partijtrajecten door de tijd, "
+            "en zoek vergelijkbare moties op in het archief."
+        )
+        st.page_link("pages/2_Explorer.py", label="Open Explorer", icon="🔭")
+
+    st.divider()
+    st.caption(
+        "Data: Tweede Kamer API · Embeddings: QWEN (via OpenRouter) · "
+        "Gemaakt door [Sven Geboers](https://sgeboers.nl)"
    )

-explorer = st.Page("pages/2_Explorer.py", title="Explorer", default=True)
-stemwijzer = st.Page("pages/1_Stemwijzer.py", title="Stemwijzer")

-pg = st.navigation([explorer, stemwijzer])
-pg.run()
+main()
--- a/README.md
+++ b/README.md
@ -1,77 +1,22 @@
-# Stemwijzer
+# stemwijzer

-A Dutch parliamentary voting compass that lets you vote on real Tweede Kamer motions and see which parties match your positions.
+A small project that uses QWEN embeddings for semantic features. The codebase includes an example Ansible package under packages/@ansible/example and helper scripts for deployment.

-![Stemwijzer Explorer](docs/assets/stemwijzer-screenshot.png)
+Embeddings
+- This project uses QWEN embeddings (model: `qwen/qwen3-embedding-4b`) via OpenRouter-compatible APIs.
+- Preferred environment variable: `OPENROUTER_API_KEY` with a fallback to `OPENAI_API_KEY`.

-## What is Stemwijzer?
+Publishing and deploying the Ansible package

-Stemwijzer ingests motions and voting records from the Dutch House of Representatives (Tweede Kamer), stores them in DuckDB, generates AI-powered explanations with an LLM, and presents a Streamlit UI where users can vote on real motions and explore party positions through SVD visualizations, trajectory analysis, and embedding-based similarity search.
+- Package location: `packages/@ansible/example` — this contains the Ansible playbooks and packaging used by CI.
+- To publish the package (CI): create a git tag for the version and provide `NPM_TOKEN` as a secret to the CI runner so it can publish to npm.
+- To deploy the package (CI): set the following repository secrets in your CI pipeline:
+  - `DEPLOY_HOST` (default: `motief.sgeboers.nl`)
+  - `DEPLOY_SSH_KEY` (private key for the `webapps` user)
+  - `DEPLOY_USER` (default: `webapps`)

-## Features
+Defaults
+- DEPLOY_HOST: `motief.sgeboers.nl`
+- DEPLOY_USER: `webapps`

- **Voting Compass** — Vote on real parliamentary motions and see which parties align with your choices
- **Explorer** — Interactive SVD visualizations, party trajectories over time, motion browser, and semantic search
- **Analytics** — SVD decomposition of voting patterns, UMAP projections, clustering, and drift analysis
- **LLM Enrichment** — Automatic generation of layman-friendly motion explanations using QWEN via OpenRouter
-
-## Prerequisites
-
- Python >= 3.13
- [uv](https://docs.astral.sh/uv/) for dependency management
- (Optional) `OPENROUTER_API_KEY` for LLM enrichment
-
-## Quickstart
-
-```bash
-# Clone and enter the repository
-git clone <your-gitea-url>/sgeboers/stemwijzer.git
-cd stemwijzer
-
-# Install dependencies
-uv sync
-
-# Run the Streamlit app
-uv run streamlit run Home.py
-
-# Run the data pipeline (fetch motions, compute embeddings, etc.)
-uv run python pipeline/run_pipeline.py
-
-# Run tests
-uv run pytest tests/ -q
-```
-
-The app will be available at http://localhost:8501.
-
-## Project Structure
-
-```
-├── app.py              # Streamlit UI entrypoint
-├── database.py         # DuckDB schema and queries
-├── api_client.py       # Tweede Kamer OData API client
-├── explorer.py         # Explorer page with SVD visualizations
-├── pipeline/           # Data ingestion and analysis pipelines
-├── analysis/           # SVD, clustering, trajectory modules
-├── tests/              # pytest test suite
-├── docs/               # Documentation, research, and plans
-└── data/motions.db     # DuckDB database (~18 GB)
-```
-
-## Documentation
-
- **[ARCHITECTURE.md](ARCHITECTURE.md)** — Comprehensive architecture overview, tech stack, and contributor guidance
- **[CODE_STYLE.md](CODE_STYLE.md)** — Coding conventions, naming, typing, and testing standards
- **[docs/solutions/](docs/solutions/)** — Documented solutions to past bugs and best practices
-
-## Tech Stack
-
- **Language:** Python 3.13+
- **Data:** DuckDB via ibis-framework
- **UI:** Streamlit + Plotly
- **ML/Analysis:** scipy, scikit-learn, umap-learn
- **LLM:** QWEN via OpenRouter (OpenAI-compatible)
- **Package Manager:** uv
-
-## License
-
-[Your license here]
+See docs/deployment/ansible-package-deploy.md for more detailed deploy instructions and defaults.
--- a/STRATEGY.md
+++ b/STRATEGY.md
@ -1,59 +0,0 @@
---
-name: Stemwijzer
-last_updated: 2026-05-04
---
-
-# Stemwijzer Strategy
-
-## Target problem
-
-Voters in the Netherlands lack accessible, data-driven tools to understand how political parties actually vote in parliament versus how they present themselves. Existing voting compasses are either static (updated once per election cycle) or based on party self-assessment rather than real voting records.
-
-## Our approach
-
-Build the most transparent, data-grounded political compass by ingesting every parliamentary vote from the Tweede Kamer's public API, computing latent political dimensions via SVD, and letting users vote on real motions to see which parties actually align with their positions — not just what parties claim.
-
-## Who it's for
-
-**Primary:** Politically curious Dutch voters who want to move beyond party branding and understand actual parliamentary behavior. They're hiring Stemwijzer to make an informed voting decision based on data rather than rhetoric.
-
-## Key metrics
-
- **Motion coverage** — Percentage of parliamentary motions ingested and available for voting; measured in `data/motions.db`
- **User-session completion rate** — Share of users who vote on at least 10 motions before exiting; measured via Streamlit session state
- **Party-match accuracy** — How well the SVD-derived party positions predict actual voting alignment; measured via cross-validation on held-out motions
- **Pipeline freshness** — Days since last successful pipeline run (fetch → embeddings → SVD); measured via `scripts/health_check.py`
- **Exploration depth** — Average number of tabs visited per session (compass, trajectories, SVD components); measured via Streamlit
-
-## Tracks
-
-### Data pipeline reliability
-
-Make the data ingestion and analysis pipeline robust enough to run unattended and recover from failures.
-
-_Why it serves the approach:_ The entire product depends on accurate, up-to-date voting data. If the pipeline breaks, the compass becomes stale and untrustworthy.
-
-### Analytical depth and transparency
-
-Deepen the SVD analysis and make the political dimensions interpretable and explorable — not just a black-box score.
-
-_Why it serves the approach:_ Users need to trust and understand why parties are positioned where they are. Raw scores without explanation are no better than party branding.
-
-### Agent-native architecture
-
-Restructure the codebase so that agents can safely explore, test, and modify it without human hand-holding — comprehensive tests, clear contracts, and self-documenting structure.
-
-_Why it serves the approach:_ A data-driven product requires constant iteration on analysis methods, visualizations, and feature experiments. Making the codebase agent-native enables rapid, safe iteration.
-
-## Not working on
-
- Mobile native apps — the web-based Streamlit UI is sufficient for the target audience
- Social features (sharing, leaderboards, discussions) — the product is a research tool, not a social network
- Predictive modeling of election outcomes — the focus is on transparency of past/current voting, not forecasting
- Multi-language support — Dutch parliament, Dutch voters, Dutch UI
-
-## Marketing
-
-**One-liner:** Stemwijzer — vote on real parliamentary motions and discover which parties actually match your politics.
-
-**Key message:** Every vote in the Tweede Kamer is public. We compute the patterns, you discover where you fit.
--- a/agent_tools/SYSTEM_PROMPT.md
+++ b/agent_tools/SYSTEM_PROMPT.md
@ -1,82 +0,0 @@
-# Stemwijzer Agent System Prompt
-
-You are the **Stemwijzer Pipeline Operator** — an autonomous agent that operates the Stemwijzer parliamentary voting analysis pipeline.
-
-## Your Identity
-
- You are methodical, precise, and data-driven.
- You prefer structured outputs (JSON, markdown tables) over prose.
- You always verify assumptions with data before making claims.
- You write reports to `reports/` and accumulate learnings in `agent_tools/context.md`.
-
-## Your Capabilities
-
-You have access to these atomic tools. Always use them instead of raw SQL or direct module calls.
-
-### Database Queries (`agent_tools.database`)
- `query_motions(db_path, limit, policy_area, start_date, end_date)` — Query motions with filters
- `query_votes(db_path, motion_id, party)` — Query votes for a motion
- `query_svd_vectors(db_path, window_id, entity_type)` — Query SVD vectors
- `query_party_positions(db_path, window_id)` — Query party axis scores
- `compute_party_positions_from_vectors(db_path, window_id)` — Compute positions when pre-computed table is unavailable
- `query_pipeline_status(db_path)` — Get pipeline freshness and coverage metrics
- `query_embeddings(db_path, motion_id, model, limit)` — Query text/fused embeddings
- `query_similar_motions(db_path, motion_id, top_k)` — Query similar motions from similarity cache
- `query_compass_positions(db_path, window_id)` — Query 2D compass positions for parties/MPs
- `create_motion(db_path, title, description, date, ...)` — Insert a new motion
- `update_motion(db_path, motion_id, **fields)` — Update an existing motion
- `delete_report(output_path)` — Delete a generated report file
-
-### Pipeline Control (`agent_tools.pipeline`)
- `pipeline_run_stage(db_path, stage, window_id, dry_run)` — Run one pipeline stage
- `pipeline_get_logs(stage, lines)` — Get recent log output for a stage
-
-### Content Validation (`agent_tools.content`)
- `validate_motion_coverage(db_path, start_date, end_date)` — Find data gaps
- `validate_layman_explanations(db_path, sample_size)` — Check explanation quality
- `check_embedding_quality(db_path, window_id)` — Measure embedding coverage
-
-### Context & Discovery (`agent_tools.context` + `agent_tools`)
- `list_tools()` — Runtime discovery of all available tools
- `read_context_md()` — Read accumulated agent knowledge
- `append_context_note(note)` — Write a learning to context.md
- `list_recent_reports()` — List recently generated report files
-
-## Decision Criteria
-
-### When to use agent_tools vs direct code
- **Always use `agent_tools`** for database queries, pipeline operations, and content validation
- Only write direct Python/SQL when `agent_tools` lacks the needed capability
- Use `list_tools()` when unsure what primitives exist
-
-### When to run the pipeline
- Data is stale (> 7 days since last motion)
- Pipeline status shows gaps or failures
- User explicitly requests fresh data
-
-### When to validate content
- After pipeline runs
- When SVD labels look suspicious
- Before publishing analysis to users
-
-## Output Conventions
-
-1. **Always return structured data** — dicts and lists, not raw prose
-2. **Include `error` keys** when things fail, with actionable suggestions
-3. **Write reports to `reports/`** — ephemeral, human-readable artifacts
-4. **Update `context.md`** when you learn something about the pipeline
-5. **Be explicit about uncertainty** — "Data shows X (n=123)" not "Probably X"
-
-## Knowledge Base
-
-Before making claims about the data, check `docs/solutions/` for documented patterns:
- SVD labels reflect voting patterns, not semantic content
- Right-wing parties appear on the RIGHT side of all axes
- EVR percentages come from `analysis.political_axis.compute_svd_spectrum`
-
-## Safety
-
- You operate in the same trust boundary as the developer
- You can read the full database but write only to `reports/` and `context.md`
- You cannot delete data or modify pipeline logic
- Always use `dry_run=True` when the user says "what would happen if..."
--- a/agent_tools/init.py
+++ b/agent_tools/init.py
@ -1,82 +0,0 @@
-"""Agent tools for Stemwijzer — atomic primitives for agent operation.
-
-Import individual modules or use `list_tools()` for runtime discovery.
-"""
-
-from __future__ import annotations
-
-from agent_tools.context import (
-    append_context_note,
-    list_recent_reports,
-    read_context_md,
-)
-from agent_tools.database import (
-    compute_party_positions_from_vectors,
-    create_motion,
-    delete_report,
-    query_compass_positions,
-    query_embeddings,
-    query_motions,
-    query_party_positions,
-    query_pipeline_status,
-    query_similar_motions,
-    query_svd_vectors,
-    query_votes,
-    update_motion,
-)
-from agent_tools.pipeline import (
-    pipeline_get_logs,
-    pipeline_run_stage,
-)
-
-__all__ = [
-    # Database
-    "query_motions",
-    "query_votes",
-    "query_svd_vectors",
-    "query_party_positions",
-    "compute_party_positions_from_vectors",
-    "query_pipeline_status",
-    "query_embeddings",
-    "query_similar_motions",
-    "query_compass_positions",
-    "create_motion",
-    "update_motion",
-    "delete_report",
-    # Pipeline
-    "pipeline_run_stage",
-    "pipeline_get_logs",
-    # Context
-    "list_recent_reports",
-    "read_context_md",
-    "append_context_note",
-    # Discovery
-    "list_tools",
-]
-
-
-def list_tools() -> list[dict[str, str]]:
-    """Return a list of all available agent tools with signatures and descriptions.
-
-    Useful for runtime capability discovery and prompt injection.
-    """
-    return [
-        {"name": "query_motions", "signature": "query_motions(db_path, limit=100, policy_area=None, start_date=None, end_date=None)", "description": "Query motions from the database with optional filters."},
-        {"name": "query_votes", "signature": "query_votes(db_path, motion_id=None, party=None)", "description": "Query vote counts or individual votes."},
-        {"name": "query_svd_vectors", "signature": "query_svd_vectors(db_path, window_id, entity_type='motion')", "description": "Query SVD vectors for a window and entity type."},
-        {"name": "query_party_positions", "signature": "query_party_positions(db_path, window_id='current_parliament')", "description": "Query party axis positions for a window."},
-        {"name": "compute_party_positions_from_vectors", "signature": "compute_party_positions_from_vectors(db_path, window_id)", "description": "Compute party positions from MP vectors when pre-computed table is unavailable."},
-        {"name": "query_pipeline_status", "signature": "query_pipeline_status(db_path)", "description": "Query pipeline freshness and coverage metrics (raw counts, no judgment)."},
-        {"name": "query_embeddings", "signature": "query_embeddings(db_path, motion_id=None, model=None, limit=100)", "description": "Query text/fused embeddings."},
-        {"name": "query_similar_motions", "signature": "query_similar_motions(db_path, motion_id, top_k=10)", "description": "Query similar motions from similarity cache."},
-        {"name": "query_compass_positions", "signature": "query_compass_positions(db_path, window_id='current_parliament')", "description": "Query 2D compass positions for parties/MPs."},
-        {"name": "create_motion", "signature": "create_motion(db_path, title, description, date, policy_area='General', voting_results='[]')", "description": "Insert a new motion into the database."},
-        {"name": "update_motion", "signature": "update_motion(db_path, motion_id, **fields)", "description": "Update fields of an existing motion."},
-        {"name": "delete_report", "signature": "delete_report(output_path)", "description": "Delete a generated report file."},
-        {"name": "pipeline_run_stage", "signature": "pipeline_run_stage(db_path, stage, window_id, dry_run=False)", "description": "Run a single pipeline stage (agent decides which and when)."},
-        {"name": "pipeline_get_logs", "signature": "pipeline_get_logs(stage, lines=50)", "description": "Retrieve recent log output for a stage."},
-        {"name": "list_recent_reports", "signature": "list_recent_reports()", "description": "List recently generated report files."},
-        {"name": "read_context_md", "signature": "read_context_md()", "description": "Read accumulated agent knowledge from context.md."},
-        {"name": "append_context_note", "signature": "append_context_note(note)", "description": "Append a note to the accumulated agent knowledge."},
-        {"name": "list_tools", "signature": "list_tools()", "description": "Return a list of all available agent tools."},
-    ]
--- a/agent_tools/analysis.py
+++ b/agent_tools/analysis.py
@ -1,10 +0,0 @@
-"""Analysis primitives for agent operation.
-
-NOTE: Multi-step analytical workflows (party shift, axis stability, SVD label
-validation) have been removed. Agents should compose raw database primitives
-(query_party_positions, query_svd_vectors, etc.) and perform analysis in their
-own reasoning loop.
-
-This module is intentionally empty. If needed, pure computational helpers
-(without business logic) can be added here.
-"""
--- a/agent_tools/content.py
+++ b/agent_tools/content.py
@ -1,133 +0,0 @@
-"""Content validation primitives for agent operation.
-
-Tools for validating data quality, coverage, and content correctness.
-"""
-
-from __future__ import annotations
-
-import logging
-from datetime import datetime, timedelta
-from typing import Any, Dict
-
-from agent_tools.database import query_motions, query_svd_vectors
-
-logger = logging.getLogger(__name__)
-
-
-def validate_motion_coverage(
-    db_path: str,
-    start_date: str,
-    end_date: str,
-) -> Dict[str, Any]:
-    """Validate motion coverage for a date range.
-
-    Returns gaps where no motions exist in the database.
-    """
-    try:
-        motions = query_motions(db_path, limit=10000)
-
-        if not motions:
-            return {
-                "gaps": [{"start": start_date, "end": end_date}],
-                "coverage_rate": 0.0,
-                "total_motions": 0,
-            }
-
-        # Convert dates
-        start = datetime.fromisoformat(start_date)
-        end = datetime.fromisoformat(end_date)
-
-        # Check coverage month by month
-        gaps = []
-        current = start
-        while current < end:
-            month_end = min(current + timedelta(days=31), end)
-            month_motions = [
-                m for m in motions
-                if current <= datetime.fromisoformat(str(m.get("date", "1970-01-01"))) < month_end
-            ]
-            if not month_motions:
-                gaps.append({
-                    "start": current.isoformat(),
-                    "end": month_end.isoformat(),
-                })
-            current = month_end
-
-        total_days = (end - start).days
-        gap_days = sum(
-            (datetime.fromisoformat(g["end"]) - datetime.fromisoformat(g["start"])).days
-            for g in gaps
-        )
-        coverage_rate = round((total_days - gap_days) / total_days, 4) if total_days > 0 else 0.0
-
-        return {
-            "gaps": gaps,
-            "coverage_rate": coverage_rate,
-            "total_motions": len(motions),
-            "date_range": {"start": start_date, "end": end_date},
-        }
-    except Exception as e:
-        logger.exception("validate_motion_coverage failed")
-        return {"gaps": [], "coverage_rate": 0.0, "error": str(e)}
-
-
-def validate_layman_explanations(
-    db_path: str,
-    sample_size: int = 100,
-) -> Dict[str, Any]:
-    """Sample motions and check layman explanation coverage.
-
-    Returns quality metrics for explanations.
-    """
-    try:
-        motions = query_motions(db_path, limit=sample_size)
-
-        if not motions:
-            return {
-                "sample_size": 0,
-                "coverage": 0.0,
-                "empty_count": 0,
-            }
-
-        with_explanation = sum(
-            1 for m in motions
-            if m.get("layman_explanation") and str(m.get("layman_explanation")).strip()
-        )
-
-        return {
-            "sample_size": len(motions),
-            "coverage": round(with_explanation / len(motions), 4),
-            "empty_count": len(motions) - with_explanation,
-            "total_in_db": len(motions),
-        }
-    except Exception as e:
-        logger.exception("validate_layman_explanations failed")
-        return {"sample_size": 0, "coverage": 0.0, "error": str(e)}
-
-
-def check_embedding_quality(
-    db_path: str,
-    window_id: str,
-) -> Dict[str, Any]:
-    """Check embedding coverage for a window.
-
-    Returns raw coverage stats. The agent decides whether coverage is acceptable.
-    """
-    try:
-        vectors = query_svd_vectors(db_path, window_id, entity_type="motion")
-        motions = query_motions(db_path, limit=100000)
-
-        total_motions = len(motions)
-        with_embeddings = len(vectors)
-
-        coverage = round(with_embeddings / total_motions, 4) if total_motions > 0 else 0.0
-
-        return {
-            "window_id": window_id,
-            "total_motions": total_motions,
-            "with_embeddings": with_embeddings,
-            "coverage": coverage,
-        }
-    except Exception as e:
-        logger.exception("check_embedding_quality failed")
-        return {"window_id": window_id, "coverage": 0.0, "error": str(e)}
--- a/agent_tools/context.md
+++ b/agent_tools/context.md
@ -1,20 +0,0 @@
-# Agent Accumulated Context
-
-This file is maintained by the agent. It stores learnings about the pipeline,
-data patterns, and operational notes that persist across sessions.
-
-## How to use this file
-
- The agent reads this at session start for accumulated context
- The agent appends new learnings after each significant operation
- Humans can read this to understand what the agent has discovered
-
---
-
-## Initial State
-
-Pipeline is fresh. No accumulated learnings yet.
-
---
-
-*This file grows over time as the agent operates the pipeline.*
--- a/agent_tools/context.py
+++ b/agent_tools/context.py
@ -1,52 +0,0 @@
-"""Runtime context injection for agent operation.
-
-Filesystem primitives for managing agent accumulated knowledge.
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-from datetime import datetime
-from typing import List
-
-logger = logging.getLogger(__name__)
-
-
-def list_recent_reports() -> List[str]:
-    """List recently generated reports."""
-    try:
-        reports_dir = "reports"
-        if not os.path.exists(reports_dir):
-            return []
-        files = sorted(
-            (f for f in os.listdir(reports_dir) if f.endswith(".md")),
-            key=lambda f: os.path.getmtime(os.path.join(reports_dir, f)),
-            reverse=True,
-        )
-        return files[:10]
-    except Exception:
-        return []
-
-
-def read_context_md() -> str:
-    """Read accumulated knowledge from context.md."""
-    try:
-        path = os.path.join("agent_tools", "context.md")
-        if os.path.exists(path):
-            with open(path, "r", encoding="utf-8") as f:
-                return f.read()
-        return ""
-    except Exception:
-        return ""
-
-
-def append_context_note(note: str) -> None:
-    """Append a learning to context.md."""
-    try:
-        path = os.path.join("agent_tools", "context.md")
-        timestamp = datetime.now().isoformat()
-        with open(path, "a", encoding="utf-8") as f:
-            f.write(f"\n## {timestamp}\n\n{note}\n")
-    except Exception:
-        logger.exception("Failed to append context note")
--- a/agent_tools/database.py
+++ b/agent_tools/database.py
@ -1,376 +0,0 @@
-"""Database query primitives for agent operation.
-
-Thin wrappers around DuckDB that return structured JSON-friendly results.
-All functions accept db_path as first argument and return either list[dict] or dict.
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import Any, Dict, List, Optional
-
-logger = logging.getLogger(__name__)
-
-
-def _connect(db_path: str, read_only: bool = True):
-    import duckdb
-
-    return duckdb.connect(database=db_path, read_only=read_only)
-
-
-def query_motions(
-    db_path: str,
-    *,
-    year: Optional[int] = None,
-    policy_area: Optional[str] = None,
-    limit: int = 100,
-    order: str = "date DESC",
-) -> List[Dict[str, Any]]:
-    """Query motions with optional filters."""
-    try:
-        con = _connect(db_path)
-        conditions = []
-        params = []
-
-        if year is not None:
-            conditions.append("EXTRACT(YEAR FROM date) = ?")
-            params.append(year)
-        if policy_area is not None:
-            conditions.append("policy_area = ?")
-            params.append(policy_area)
-
-        where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
-        sql = f"""
-            SELECT id, title, description, date, policy_area,
-                   winning_margin, controversy_score, layman_explanation
-            FROM motions
-            {where_clause}
-            ORDER BY {order}
-            LIMIT ?
-        """
-        params.append(limit)
-
-        result = con.execute(sql, params).fetchdf().to_dict("records")
-        con.close()
-        return result
-    except Exception:
-        logger.exception("query_motions failed")
-        return []
-
-
-def query_votes(
-    db_path: str,
-    motion_id: int,
-    party: Optional[str] = None,
-) -> List[Dict[str, Any]]:
-    """Query vote counts for a motion, optionally filtered by party."""
-    try:
-        con = _connect(db_path)
-        if party:
-            sql = """
-                SELECT mp_name, vote
-                FROM mp_votes
-                WHERE motion_id = ? AND mp_name IN (
-                    SELECT mp_name FROM mp_metadata WHERE party = ?
-                )
-            """
-            result = con.execute(sql, (motion_id, party)).fetchdf().to_dict("records")
-        else:
-            sql = "SELECT mp_name, vote FROM mp_votes WHERE motion_id = ?"
-            result = con.execute(sql, (motion_id,)).fetchdf().to_dict("records")
-        con.close()
-        return result
-    except Exception:
-        logger.exception("query_votes failed")
-        return []
-
-
-def query_svd_vectors(
-    db_path: str,
-    window_id: str,
-    entity_type: Optional[str] = None,
-) -> List[Dict[str, Any]]:
-    """Query SVD vectors for a window."""
-    try:
-        con = _connect(db_path)
-        if entity_type:
-            sql = """
-                SELECT entity_id, vector, model
-                FROM svd_vectors
-                WHERE window_id = ? AND entity_type = ?
-            """
-            result = con.execute(sql, (window_id, entity_type)).fetchdf().to_dict("records")
-        else:
-            sql = """
-                SELECT entity_id, entity_type, vector, model
-                FROM svd_vectors
-                WHERE window_id = ?
-            """
-            result = con.execute(sql, (window_id,)).fetchdf().to_dict("records")
-        con.close()
-        return result
-    except Exception:
-        logger.exception("query_svd_vectors failed")
-        return []
-
-
-def query_party_positions(
-    db_path: str,
-    window_id: str,
-) -> List[Dict[str, Any]]:
-    """Query party axis scores for a window."""
-    try:
-        con = _connect(db_path)
-        tables = con.execute(
-            "SELECT table_name FROM information_schema.tables WHERE table_name = 'party_axis_scores'"
-        ).fetchall()
-
-        if not tables:
-            con.close()
-            return []
-
-        result = con.execute(
-            """
-            SELECT party, axis, score
-            FROM party_axis_scores
-            WHERE window_id = ?
-            """,
-            (window_id,),
-        ).fetchdf().to_dict("records")
-        con.close()
-        return result
-    except Exception:
-        logger.exception("query_party_positions failed")
-        return []
-
-
-def compute_party_positions_from_vectors(con, window_id: str) -> List[Dict[str, Any]]:
-    """Compute party positions from MP vectors.
-
-    This is a separate primitive for when party_axis_scores is not pre-computed.
-    """
-    import duckdb
-    if isinstance(con, str):
-        con = duckdb.connect(database=con, read_only=True)
-        should_close = True
-    else:
-        should_close = False
-    rows = con.execute(
-        """
-        SELECT sv.entity_id, sv.vector, mm.party
-        FROM svd_vectors sv
-        JOIN mp_metadata mm ON sv.entity_id = mm.mp_name
-        WHERE sv.window_id = ? AND sv.entity_type = 'mp'
-        """,
-        (window_id,),
-    ).fetchall()
-
-    import json
-    from collections import defaultdict
-
-    party_vectors = defaultdict(list)
-    for mp_name, vector_json, party in rows:
-        vec = json.loads(vector_json) if isinstance(vector_json, str) else vector_json
-        party_vectors[party].append(vec)
-
-    result = []
-    for party, vectors in party_vectors.items():
-        if not vectors:
-            continue
-        dim = len(vectors[0])
-        mean = [sum(v[i] for v in vectors) / len(vectors) for i in range(min(dim, 2))]
-        result.append({
-            "party": party,
-            "axis_1": mean[0] if len(mean) > 0 else 0.0,
-            "axis_2": mean[1] if len(mean) > 1 else 0.0,
-        })
-
-    if should_close:
-        con.close()
-
-    return result
-
-
-def query_pipeline_status(db_path: str) -> Dict[str, Any]:
-    """Return pipeline freshness metrics."""
-    try:
-        con = _connect(db_path)
-
-        motion_count = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0]
-
-        latest = con.execute("SELECT MAX(date) FROM motions").fetchone()
-        latest_motion_date = latest[0] if latest and latest[0] else None
-
-        svd_windows = con.execute(
-            "SELECT COUNT(DISTINCT window_id) FROM svd_vectors"
-        ).fetchone()[0]
-
-        embedding_count = con.execute(
-            "SELECT COUNT(*) FROM svd_vectors WHERE entity_type = 'motion'"
-        ).fetchone()[0]
-
-        con.close()
-
-        return {
-            "motion_count": motion_count,
-            "latest_motion_date": str(latest_motion_date) if latest_motion_date else None,
-            "svd_window_count": svd_windows,
-            "embedding_count": embedding_count,
-        }
-    except Exception:
-        logger.exception("query_pipeline_status failed")
-        return {
-            "motion_count": 0,
-            "latest_motion_date": None,
-            "svd_window_count": 0,
-            "embedding_count": 0,
-            "error": "Failed to query pipeline status",
-        }
-
-
-def query_embeddings(
-    db_path: str,
-    *,
-    motion_id: Optional[int] = None,
-    model: Optional[str] = None,
-    limit: int = 100,
-) -> List[Dict[str, Any]]:
-    """Query fused embeddings for motions."""
-    try:
-        con = _connect(db_path)
-        conditions = []
-        params = []
-
-        if motion_id is not None:
-            conditions.append("motion_id = ?")
-            params.append(motion_id)
-        if model is not None:
-            conditions.append("model = ?")
-            params.append(model)
-
-        where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
-        sql = f"""
-            SELECT motion_id, vector, model
-            FROM fused_embeddings
-            {where_clause}
-            LIMIT ?
-        """
-        params.append(limit)
-
-        result = con.execute(sql, params).fetchdf().to_dict("records")
-        con.close()
-        return result
-    except Exception:
-        logger.exception("query_embeddings failed")
-        return []
-
-
-def query_similar_motions(
-    db_path: str,
-    motion_id: int,
-    top_k: int = 10,
-) -> List[Dict[str, Any]]:
-    """Query top-k similar motions from similarity cache."""
-    try:
-        con = _connect(db_path)
-        result = con.execute(
-            """
-            SELECT target_motion_id, similarity_score
-            FROM similarity_cache
-            WHERE source_motion_id = ?
-            ORDER BY similarity_score DESC
-            LIMIT ?
-            """,
-            (motion_id, top_k),
-        ).fetchdf().to_dict("records")
-        con.close()
-        return result
-    except Exception:
-        logger.exception("query_similar_motions failed")
-        return []
-
-
-def query_compass_positions(
-    db_path: str,
-    window_id: str,
-) -> List[Dict[str, Any]]:
-    """Query 2D PCA compass positions for MPs in a window."""
-    try:
-        con = _connect(db_path)
-        result = con.execute(
-            """
-            SELECT sv.entity_id, sv.vector, mm.party
-            FROM svd_vectors sv
-            JOIN mp_metadata mm ON sv.entity_id = mm.mp_name
-            WHERE sv.window_id = ? AND sv.entity_type = 'mp'
-            """,
-            (window_id,),
-        ).fetchdf().to_dict("records")
-        con.close()
-        return result
-    except Exception:
-        logger.exception("query_compass_positions failed")
-        return []
-
-
-def create_motion(
-    db_path: str,
-    title: str,
-    description: str = "",
-    date: str = "",
-    policy_area: str = "",
-) -> Dict[str, Any]:
-    """Create a new motion record."""
-    try:
-        con = _connect(db_path, read_only=False)
-        con.execute(
-            """
-            INSERT INTO motions (title, description, date, policy_area)
-            VALUES (?, ?, ?, ?)
-            """,
-            (title, description, date, policy_area),
-        )
-        con.close()
-        return {"created": True, "title": title}
-    except Exception:
-        logger.exception("create_motion failed")
-        return {"created": False, "error": "Failed to create motion"}
-
-
-def update_motion(
-    db_path: str,
-    motion_id: int,
-    **fields: str,
-) -> Dict[str, Any]:
-    """Update a motion record."""
-    try:
-        con = _connect(db_path, read_only=False)
-        allowed = {"title", "description", "date", "policy_area", "layman_explanation"}
-        updates = {k: v for k, v in fields.items() if k in allowed}
-        if not updates:
-            return {"updated": False, "error": "No valid fields to update"}
-
-        set_clause = ", ".join(f"{k} = ?" for k in updates)
-        params = list(updates.values()) + [motion_id]
-        con.execute(
-            f"UPDATE motions SET {set_clause} WHERE id = ?",
-            params,
-        )
-        con.close()
-        return {"updated": True, "motion_id": motion_id, "fields": list(updates.keys())}
-    except Exception:
-        logger.exception("update_motion failed")
-        return {"updated": False, "error": "Failed to update motion"}
-
-
-def delete_report(output_path: str) -> Dict[str, Any]:
-    """Delete a generated report file."""
-    try:
-        import os
-        if os.path.exists(output_path):
-            os.remove(output_path)
-            return {"deleted": True, "path": output_path}
-        return {"deleted": False, "error": "File not found"}
-    except Exception:
-        logger.exception("delete_report failed")
-        return {"deleted": False, "error": "Failed to delete report"}
--- a/agent_tools/pipeline.py
+++ b/agent_tools/pipeline.py
@ -1,60 +0,0 @@
-"""Pipeline control primitives for agent operation.
-
-Thin execution wrappers. The agent decides which stages to run and in what order.
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import Any, Dict, List, Optional
-
-logger = logging.getLogger(__name__)
-
-
-def pipeline_run_stage(
-    db_path: str,
-    stage: str,
-    window_id: Optional[str] = None,
-    dry_run: bool = False,
-) -> Dict[str, Any]:
-    """Run a single pipeline stage.
-
-    Args:
-        db_path: Path to DuckDB database
-        stage: Pipeline stage name (e.g. "ingestion", "svd", "similarity")
-        window_id: Optional window identifier (e.g. "2024", "current_parliament")
-        dry_run: If True, return planned actions without executing
-
-    Returns:
-        dict with status and metadata
-    """
-    result = {
-        "stage": stage,
-        "window_id": window_id,
-        "dry_run": dry_run,
-        "status": "planned" if dry_run else "not_implemented",
-    }
-
-    if dry_run:
-        return result
-
-    # Actual execution would delegate to pipeline/run_pipeline.py
-    # For now, mark as not implemented — the agent can still plan and diagnose
-    logger.info("pipeline_run_stage: %s (dry_run=%s)", stage, dry_run)
-    return result
-
-
-def pipeline_get_logs(
-    db_path: str,
-    stage: Optional[str] = None,
-    lines: int = 50,
-) -> List[str]:
-    """Return recent log lines for a stage.
-
-    Note: This is a placeholder. In a full implementation, this would read
-    from a structured log store or log files.
-    """
-    # Placeholder: return empty list
-    # Real implementation would read from logging infrastructure
-    logger.info("pipeline_get_logs requested for stage=%s lines=%d", stage, lines)
-    return []
--- a/agent_tools/reports.py
+++ b/agent_tools/reports.py
@ -1,8 +0,0 @@
-"""Report generation primitives for agent operation.
-
-NOTE: The report template engine (generate_report, _render_report) has been
-removed. Agents should compose markdown in their reasoning loop and write it
-directly using standard file I/O.
-
-This module is intentionally empty.
-"""
--- a/analysis/config.py
+++ b/analysis/config.py
@ -267,65 +267,3 @@ _PARTY_NORMALIZE: dict[str, str] = {
    "Lid Keijzer": "BBB",
    "Groep Markuszower": "PVV",
 }
-
-# ---------------------------------------------------------------------------
-# Application configuration (migrated from root config.py)
-# ---------------------------------------------------------------------------
-
-import os
-from dataclasses import dataclass
-
-
-@dataclass
-class Config:
-    # Database settings
-    DATABASE_PATH = "data/motions.db"
-
-    # API settings
-    TWEEDE_KAMER_ODATA_API = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0"
-    API_TIMEOUT = 30
-    API_BATCH_SIZE = 250
-    API_MAX_LIMIT = 250
-
-    # AI settings
-    OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
-    OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
-    QWEN_MODEL = "qwen/qwen-2.5-72b-instruct"
-
-    # App settings
-    DEFAULT_MOTION_COUNT = 10
-    DEFAULT_WINNING_MARGIN_MIN = 0
-    DEFAULT_WINNING_MARGIN_MAX = 100
-    SESSION_TIMEOUT_DAYS = 30
-
-    # Policy areas
-    POLICY_AREAS = [
-        "Alle",
-        "Economie",
-        "Klimaat",
-        "Immigratie",
-        "Zorg",
-        "Onderwijs",
-        "Defensie",
-        "Sociale Zaken",
-        "Algemeen",
-    ]
-
-    # Scraper defaults
-    BASE_URL = "https://www.tweedekamer.nl/zoeken/zoekresultaten"
-    SCRAPING_DELAY = int(os.getenv("SCRAPING_DELAY", "5"))
-
-
-config = Config()
-
-__all__ = [
-    "PARTY_COLOURS",
-    "SVD_THEMES",
-    "KNOWN_MAJOR_PARTIES",
-    "CURRENT_PARLIAMENT_PARTIES",
-    "_PARTY_NORMALIZE",
-    "CANONICAL_RIGHT",
-    "CANONICAL_LEFT",
-    "Config",
-    "config",
-]
--- a/analysis/explorer_data.py
+++ b/analysis/explorer_data.py
@ -23,7 +23,6 @@ from analysis.config import CURRENT_PARLIAMENT_PARTIES, _PARTY_NORMALIZE
 __all__ = [
    "get_available_windows",
    "get_uniform_dim_windows",
-    "load_positions",
    "load_party_map",
    "load_active_mps",
    "load_mp_vectors_by_window",
@ -38,9 +37,6 @@ __all__ = [
    "load_motions_df",
    "query_similar",
    "compute_party_axis_scores",
-    "get_aligned_party_scores",
-    "compute_party_discipline",
-    "_get_aligned_trajectory_scores",
 ]

 logger = logging.getLogger(__name__)
@ -144,10 +140,25 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
    """Return party scores for all windows (non-aligned).

    Returns dict mapping party_abbrev -> list of axis scores, one per window.
-    Computed as the mean of individual MP vectors per party.
    """
    try:
-        return compute_party_axis_scores(load_mp_vectors_by_party(db_path))
+        con = duckdb.connect(database=db_path, read_only=True)
+        rows = con.execute(
+            """
+            SELECT party_abbrev, window_id, x_axis, y_axis
+            FROM party_axis_scores
+            ORDER BY party_abbrev, window_id
+            """
+        ).fetchall()
+        con.close()
+
+        scores: Dict[str, List[float]] = {}
+        for party, window, x, y in rows:
+            if party not in scores:
+                scores[party] = []
+            if x is not None and y is not None:
+                scores[party].extend([x, y])
+        return scores
    except Exception:
        logger.exception("Failed to load party axis scores")
        return {}
@ -156,14 +167,21 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
 def load_party_axis_scores_for_window(
    db_path: str, window: str
 ) -> Dict[str, List[float]]:
-    """Return party scores for a specific window.
-
-    Computed as the mean of individual MP vectors per party for the window.
-    """
+    """Return party scores for a specific window (aligned)."""
    try:
-        return compute_party_axis_scores(
-            load_mp_vectors_by_party_for_window(db_path, window)
-        )
+        con = duckdb.connect(database=db_path, read_only=True)
+        rows = con.execute(
+            """
+            SELECT party_abbrev, x_axis, y_axis
+            FROM party_axis_scores
+            WHERE window_id = ?
+            ORDER BY party_abbrev
+            """,
+            [window],
+        ).fetchall()
+        con.close()
+
+        return {party: [x or 0.0, y or 0.0] for party, x, y in rows}
    except Exception:
        logger.exception("Failed to load party axis scores for window %s", window)
        return {}
@ -173,10 +191,6 @@ def load_party_scores_all_windows(db_path: str) -> Dict[str, List[List[float]]]:
    """Return party scores across all windows (non-aligned)."""
    try:
        con = duckdb.connect(database=db_path, read_only=True)
-        table_exists = con.execute(
-            "SELECT COUNT(*) FROM information_schema.tables WHERE table_name = 'party_axis_scores'"
-        ).fetchone()[0]
-        if table_exists:
        rows = con.execute(
            """
            SELECT party_abbrev, window_id, x_axis, y_axis
@ -197,31 +211,8 @@ def load_party_scores_all_windows(db_path: str) -> Dict[str, List[List[float]]]:
            else:
                scores[party].append([0.0, 0.0])
        return scores
-        con.close()
-    except Exception:
-        logger.exception("Failed to load party scores all windows from table")
-
-    # Fallback: compute from positions when table does not exist
-    try:
-        positions_by_window, _ = load_positions(db_path, "annual")
-        _party_map = load_party_map(db_path)
-        scores: Dict[str, List[List[float]]] = {}
-        for window, window_pos in positions_by_window.items():
-            party_coords: Dict[str, List[Tuple[float, float]]] = {}
-            for mp_name, (x, y) in window_pos.items():
-                party = _party_map.get(
-                    mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
-                )
-                if party:
-                    party_coords.setdefault(party, []).append((x, y))
-            for party, coords in party_coords.items():
-                if coords:
-                    mean_x = float(np.mean([c[0] for c in coords]))
-                    mean_y = float(np.mean([c[1] for c in coords]))
-                    scores.setdefault(party, []).append([mean_x, mean_y])
-        return scores
    except Exception:
-        logger.exception("Failed to compute party scores all windows from positions")
+        logger.exception("Failed to load party scores all windows")
        return {}


@ -231,10 +222,6 @@ def load_party_scores_all_windows_aligned(
    """Return party scores across all windows (Procrustes-aligned)."""
    try:
        con = duckdb.connect(database=db_path, read_only=True)
-        table_exists = con.execute(
-            "SELECT COUNT(*) FROM information_schema.tables WHERE table_name = 'party_axis_scores'"
-        ).fetchone()[0]
-        if table_exists:
        rows = con.execute(
            """
            SELECT party_abbrev, window_id, x_axis_aligned, y_axis_aligned
@ -255,31 +242,8 @@ def load_party_scores_all_windows_aligned(
            else:
                scores[party].append([0.0, 0.0])
        return scores
-        con.close()
    except Exception:
-        logger.exception("Failed to load aligned party scores all windows from table")
-
-    # Fallback: compute from positions when table does not exist
-    try:
-        positions_by_window, _ = load_positions(db_path, "annual")
-        _party_map = load_party_map(db_path)
-        scores: Dict[str, List[List[float]]] = {}
-        for window, window_pos in positions_by_window.items():
-            party_coords: Dict[str, List[Tuple[float, float]]] = {}
-            for mp_name, (x, y) in window_pos.items():
-                party = _party_map.get(
-                    mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
-                )
-                if party:
-                    party_coords.setdefault(party, []).append((x, y))
-            for party, coords in party_coords.items():
-                if coords:
-                    mean_x = float(np.mean([c[0] for c in coords]))
-                    mean_y = float(np.mean([c[1] for c in coords]))
-                    scores.setdefault(party, []).append([mean_x, mean_y])
-        return scores
-    except Exception:
-        logger.exception("Failed to compute aligned party scores all windows from positions")
+        logger.exception("Failed to load aligned party scores all windows")
        return {}


@ -346,20 +310,13 @@ def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]:


 def load_scree_data(db_path: str) -> List[float]:
-    """Load scree plot data (explained variance) for current_parliament.
-
-    First tries to read the cached metadata row from svd_vectors.
-    Falls back to on-the-fly computation via compute_svd_spectrum for
-    backward compatibility with databases that haven't stored it yet.
-    """
+    """Load scree plot data (explained variance) for current_parliament."""
    try:
        con = duckdb.connect(database=db_path, read_only=True)
        row = con.execute(
            """
-            SELECT vector FROM svd_vectors
-            WHERE window_id = 'current_parliament'
-              AND entity_type = 'metadata'
-              AND entity_id = 'explained_variance'
+            SELECT sv_metadata FROM svd_vectors
+            WHERE window_id = 'current_parliament' AND entity_type = 'singular_values'
            LIMIT 1
            """
        ).fetchone()
@ -369,11 +326,7 @@ def load_scree_data(db_path: str) -> List[float]:
            import json

            return json.loads(row[0])
-
-        # Fallback: compute dynamically for backward compatibility
-        from analysis.political_axis import compute_svd_spectrum
-
-        return compute_svd_spectrum(db_path)
+        return []
    except Exception:
        logger.exception("Failed to load scree data")
        return []
@ -614,154 +567,3 @@ def compute_party_axis_scores(
    except Exception:
        logger.exception("Failed to compute party axis scores")
        return {}
-
-
-def load_positions(
-    db_path: str, window_size: str = "annual"
-) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
-    """Compute 2D positions per window using PCA on aligned SVD vectors.
-
-    Returns:
-        positions_by_window: {window_id: {entity_name: (x, y)}}
-        axis_def: dict with x_axis, y_axis, method keys
-    """
-    from analysis.political_axis import compute_2d_axes
-
-    all_available = get_uniform_dim_windows(db_path)
-
-    if not all_available:
-        return {}, {}
-
-    positions_by_window, axis_def = compute_2d_axes(
-        db_path,
-        window_ids=all_available,
-        method="pca",
-        pca_residual=True,
-        normalize_vectors=True,
-    )
-
-    try:
-        from analysis.axis_classifier import classify_axes
-
-        axis_def = classify_axes(positions_by_window, axis_def, db_path)
-    except Exception:
-        logger.exception("classify_axes failed; using generic axis labels")
-
-    if window_size == "annual":
-        annual_keys = set(w for w in all_available if "-Q" not in w)
-        positions_by_window = {
-            w: v for w, v in positions_by_window.items() if w in annual_keys
-        }
-
-    return positions_by_window, axis_def
-
-
-def get_aligned_party_scores(
-    db_path: str, window: str, active_mps: set | None = None
-) -> Dict[str, np.ndarray]:
-    """Get party scores for all N components from aligned PCA positions.
-
-    For current_parliament, pass active_mps to filter to only seated MPs
-    (matching the compass behaviour). Historical windows include all MPs.
-    """
-    from analysis.political_axis import compute_nd_axes
-
-    annual_windows = get_uniform_dim_windows(db_path)
-    scores_by_window, _ = compute_nd_axes(
-        db_path, window_ids=annual_windows, n_components=10
-    )
-    window_scores = scores_by_window.get(window, {})
-    if not window_scores:
-        return {}
-
-    if window == "current_parliament" and active_mps is not None:
-        window_scores = {mp: sc for mp, sc in window_scores.items() if mp in active_mps}
-
-    _party_map = load_party_map(db_path)
-
-    n_comps = 10
-    party_scores_agg: Dict[str, List[np.ndarray]] = {}
-    for mp_name, scores in window_scores.items():
-        party = _party_map.get(
-            mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
-        )
-        if party:
-            party_scores_agg.setdefault(party, []).append(scores[:n_comps])
-
-    return {
-        party: np.mean(np.vstack(score_list), axis=0)
-        for party, score_list in party_scores_agg.items()
-        if score_list
-    }
-
-
-def compute_party_discipline(
-    db_path: str,
-    start_date: str,
-    end_date: str,
-) -> pd.DataFrame:
-    """Compute per-party voting discipline (Rice index) for roll-call votes in a date range.
-
-    Only individual MP vote rows are used (mp_name LIKE '%,%').
-    Returns a DataFrame with columns [party, n_motions, discipline] sorted by discipline ascending.
-    Returns an empty DataFrame if fewer than 1 qualifying motion exists or on any DB error.
-    """
-    from analysis import trajectory
-
-    return trajectory.compute_party_discipline(db_path, start_date, end_date)
-
-
-def _get_aligned_trajectory_scores(
-    db_path: str, windows: List[str], n_components: int = 10
-) -> Dict[str, Dict[str, List[float]]]:
-    """Get aligned PCA scores for all windows as {window: {party: [scores per component]}}.
-
-    Uses compute_nd_axes to get PCA-projected, flip-corrected scores across all windows,
-    ensuring consistency with the single-window SVD components view.
-
-    Computes the global PCA basis on *all* uniform-dim windows (matching
-    get_aligned_party_scores) so that trajectory scores are numerically
-    consistent with the single-window view even when the caller passes a
-    subset of windows for display.
-    """
-    from analysis.political_axis import compute_nd_axes
-
-    all_uniform_windows = get_uniform_dim_windows(db_path)
-    scores_by_window, _ = compute_nd_axes(
-        db_path, window_ids=all_uniform_windows, n_components=n_components
-    )
-    if not scores_by_window:
-        return {}
-
-    party_map = load_party_map(db_path)
-    active_mps = load_active_mps(db_path)
-
-    result: Dict[str, Dict[str, List[float]]] = {}
-    for window in windows:
-        window_scores = scores_by_window.get(window, {})
-        if not window_scores:
-            continue
-
-        # For current_parliament, match single-window view by filtering to
-        # only MPs who are still seated (active). Historical windows include
-        # all MPs present in that window.
-        if window == "current_parliament":
-            window_scores = {
-                mp: sc for mp, sc in window_scores.items() if mp in active_mps
-            }
-
-        party_vecs: Dict[str, List[np.ndarray]] = {}
-        for mp_name, scores in window_scores.items():
-            party = party_map.get(
-                mp_name, party_map.get(mp_name.split("(")[0].strip(), None)
-            )
-            if party:
-                party_vecs.setdefault(party, []).append(scores[:n_components])
-
-        result[window] = {
-            party: np.mean(np.vstack(score_list), axis=0).tolist()
-            for party, score_list in party_vecs.items()
-            if score_list
-        }
-
-    return result
--- a/analysis/tabs/init.py
+++ b/analysis/tabs/init.py
@ -6,12 +6,16 @@ Each module contains a `build_<tab>_tab()` function that implements one tab.

 from analysis.tabs.compass import build_compass_tab
 from analysis.tabs.trajectories import build_trajectories_tab
+from analysis.tabs.search import build_search_tab
+from analysis.tabs.browser import build_browser_tab
 from analysis.tabs.components import build_svd_components_tab
 from analysis.tabs.quiz import build_mp_quiz_tab

 __all__ = [
    "build_compass_tab",
    "build_trajectories_tab",
+    "build_search_tab",
+    "build_browser_tab",
    "build_svd_components_tab",
    "build_mp_quiz_tab",
 ]
--- a/analysis/tabs/_rendering.py
+++ b/analysis/tabs/_rendering.py
@ -1,797 +0,0 @@
-"""Rendering helpers for explorer tabs.
-
-This module contains all Plotly/Streamlit rendering functions extracted from
-explorer.py.  It is import-safe: plotly and streamlit are optional.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-from typing import Dict, List, Optional, Tuple
-
-try:
-    import plotly.express as px
-    import plotly.graph_objects as go
-except Exception:
-    px = None
-    import types
-
-    class _DummyTrace:
-        def __init__(self, **kwargs):
-            self.name = kwargs.get("name")
-            self.x = kwargs.get("x")
-            self.y = kwargs.get("y")
-            self.text = kwargs.get("text")
-            self.customdata = kwargs.get("customdata")
-
-    class _DummyFigure:
-        def __init__(self):
-            self.data = []
-
-        def add_trace(self, trace):
-            if isinstance(trace, _DummyTrace):
-                self.data.append(trace)
-            else:
-                try:
-                    name = getattr(trace, "name", None)
-                    x = getattr(trace, "x", None)
-                    y = getattr(trace, "y", None)
-                    text = getattr(trace, "text", None)
-                    customdata = getattr(trace, "customdata", None)
-                except Exception:
-                    name = trace.get("name") if hasattr(trace, "get") else None
-                    x = trace.get("x") if hasattr(trace, "get") else None
-                    y = trace.get("y") if hasattr(trace, "get") else None
-                    text = trace.get("text") if hasattr(trace, "get") else None
-                    customdata = (
-                        trace.get("customdata") if hasattr(trace, "get") else None
-                    )
-                self.data.append(
-                    _DummyTrace(name=name, x=x, y=y, text=text, customdata=customdata)
-                )
-
-        def add_annotation(self, *args, **kwargs):
-            return None
-
-        def update_layout(self, **kwargs):
-            return None
-
-        def update_traces(self, **kwargs):
-            return None
-
-        def add_hline(self, **kwargs):
-            return None
-
-    go = types.SimpleNamespace(
-        Figure=_DummyFigure,
-        Scatter=lambda **kwargs: _DummyTrace(**kwargs),
-        Bar=lambda **kwargs: _DummyTrace(**kwargs),
-    )
-
-try:
-    import streamlit as st
-except Exception:
-
-    class _DummySt:
-        def cache_data(self, *args, **kwargs):
-            def _decorator(func):
-                return func
-
-            return _decorator
-
-        def markdown(self, *args, **kwargs):
-            return None
-
-        def subheader(self, *args, **kwargs):
-            return None
-
-        def plotly_chart(self, *args, **kwargs):
-            return None
-
-        def caption(self, *args, **kwargs):
-            return None
-
-        def text_area(self, *args, **kwargs):
-            return None
-
-        def json(self, *args, **kwargs):
-            return None
-
-        def checkbox(self, *args, **kwargs):
-            return kwargs.get("value", False)
-
-        def warning(self, *args, **kwargs):
-            return None
-
-        def info(self, *args, **kwargs):
-            return None
-
-        def error(self, *args, **kwargs):
-            return None
-
-        def success(self, *args, **kwargs):
-            return None
-
-        def selectbox(self, *args, **kwargs):
-            opts = (
-                kwargs.get("options")
-                if kwargs.get("options") is not None
-                else (args[1] if len(args) > 1 else [])
-            )
-            return opts[0] if opts else None
-
-        def multiselect(self, *args, **kwargs):
-            opts = (
-                kwargs.get("options")
-                if kwargs.get("options") is not None
-                else (args[1] if len(args) > 1 else [])
-            )
-            default = kwargs.get("default")
-            if default is not None:
-                return default
-            return opts[:6] if opts else []
-
-        def number_input(self, *args, **kwargs):
-            return kwargs.get("value") if "value" in kwargs else 1
-
-        def slider(self, *args, **kwargs):
-            return kwargs.get("value") if "value" in kwargs else 0.35
-
-        def select_slider(self, *args, **kwargs):
-            return kwargs.get("value") if "value" in kwargs else (None, None)
-
-        def expander(self, *args, **kwargs):
-            class _Ctx:
-                def __enter__(self_inner):
-                    return self_inner
-
-                def __exit__(self_inner, exc_type, exc, tb):
-                    return False
-
-            return _Ctx()
-
-        def columns(self, *args, **kwargs):
-            class _Col:
-                def markdown(self, *a, **k):
-                    return None
-
-                def metric(self, *a, **k):
-                    return None
-
-                def dataframe(self, *a, **k):
-                    return None
-
-                def write(self, *a, **k):
-                    return None
-
-                def text_input(self, *a, **k):
-                    return None
-
-            n = len(args[0]) if args else 1
-            return tuple(_Col() for _ in range(n))
-
-        def form(self, *args, **kwargs):
-            class _Ctx:
-                def __enter__(self_inner):
-                    return self_inner
-
-                def __exit__(self_inner, exc_type, exc, tb):
-                    return False
-
-            return _Ctx()
-
-        def form_submit_button(self, *args, **kwargs):
-            return False
-
-        def button(self, *args, **kwargs):
-            return False
-
-        def rerun(self, *args, **kwargs):
-            return None
-
-        def divider(self, *args, **kwargs):
-            return None
-
-        def spinner(self, *args, **kwargs):
-            class _Ctx:
-                def __enter__(self_inner):
-                    return self_inner
-
-                def __exit__(self_inner, exc_type, exc, tb):
-                    return False
-
-            return _Ctx()
-
-        def write(self, *args, **kwargs):
-            return None
-
-        def dataframe(self, *args, **kwargs):
-            return None
-
-        def set_page_config(self, *args, **kwargs):
-            return None
-
-        def title(self, *args, **kwargs):
-            return None
-
-        def sidebar(self, *args, **kwargs):
-            return self
-
-        def radio(self, *args, **kwargs):
-            return kwargs.get("value") if "value" in kwargs else None
-
-        def text_input(self, *args, **kwargs):
-            return kwargs.get("value", "")
-
-        def tabs(self, *args, **kwargs):
-            n = len(args[0]) if args else 1
-            return [self for _ in range(n)]
-
-        @property
-        def session_state(self):
-            if not hasattr(self, "_session_state"):
-                self._session_state = {}
-            return self._session_state
-
-    st = _DummySt()
-
-from analysis.config import PARTY_COLOURS
-
-logger = logging.getLogger(__name__)
-
-
-def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
-    """Render a scree plot showing relative SVD component importance.
-
-    Highlighted bars for the top-2 components (used in the compass); muted bars
-    for the rest.  A cumulative-variance dashed line on the same y-axis helps
-    spot the elbow.  A 50 % cumulative threshold line is drawn for reference.
-
-    Args:
-        importances: List of importance values sorted descending (from load_scree_data).
-        n_show: How many components to display (default: first 15).
-    """
-    if not importances:
-        return
-    data = list(importances[:n_show])
-    ranks = list(range(1, len(data) + 1))
-
-    cumsum = []
-    running = 0.0
-    for v in data:
-        running += v
-        cumsum.append(running)
-
-    n_highlight = 2
-    bar_colours = [
-        "#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data))
-    ]
-
-    fig = go.Figure()
-
-    fig.add_trace(
-        go.Bar(
-            x=ranks,
-            y=data,
-            marker_color=bar_colours,
-            hovertemplate="As %{x}<br><b>%{y:.1f}%</b> verklaarde variantie<extra></extra>",
-            showlegend=False,
-        )
-    )
-
-    fig.add_trace(
-        go.Scatter(
-            x=ranks,
-            y=cumsum,
-            mode="lines+markers",
-            line={"color": "#F57C00", "width": 2, "dash": "dot"},
-            marker={"size": 5, "color": "#F57C00"},
-            hovertemplate="As %{x}<br>Cumulatief: <b>%{y:.1f}%</b><extra></extra>",
-            name="Cumulatief",
-            showlegend=True,
-        )
-    )
-
-    fig.add_hline(
-        y=50,
-        line_dash="dash",
-        line_color="#BDBDBD",
-        line_width=1,
-        annotation_text="50%",
-        annotation_position="right",
-        annotation_font_color="#9E9E9E",
-        annotation_font_size=11,
-    )
-
-    for i in range(min(n_highlight, len(data))):
-        fig.add_annotation(
-            x=ranks[i],
-            y=data[i] + 0.3,
-            text=f"{data[i]:.1f}%",
-            showarrow=False,
-            font={"size": 11, "color": "#1565C0"},
-            yanchor="bottom",
-        )
-
-    fig.update_layout(
-        height=280,
-        margin={"l": 10, "r": 50, "t": 30, "b": 40},
-        title={
-            "text": "Belang per SVD-as",
-            "font": {"size": 13, "color": "#555555"},
-            "x": 0.02,
-            "xanchor": "left",
-        },
-        legend={
-            "orientation": "h",
-            "x": 0.5,
-            "xanchor": "center",
-            "y": 1.08,
-            "font": {"size": 11},
-        },
-        xaxis={
-            "title": {"text": "As (rang)", "font": {"size": 11}},
-            "tickmode": "linear",
-            "tick0": 1,
-            "dtick": 1,
-            "showline": False,
-            "showgrid": False,
-        },
-        yaxis={
-            "title": {"text": "% van totale variantie", "font": {"size": 11}},
-            "showline": False,
-            "showgrid": True,
-            "gridcolor": "#eeeeee",
-            "ticksuffix": "%",
-            "range": [0, max(cumsum) * 1.08],
-        },
-        plot_bgcolor="rgba(0,0,0,0)",
-        paper_bgcolor="rgba(0,0,0,0)",
-        bargap=0.25,
-    )
-    st.plotly_chart(fig, use_container_width=True)
-
-
-def _build_party_axis_figure(
-    party_coords: Dict[str, Tuple[float, float]],
-    comp_sel: int,
-    theme: dict,
-    bootstrap_data: Optional[Dict[str, Dict]] = None,
-) -> Optional[go.Figure]:
-    """Build a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
-
-    Accepts explicit per-party 2D coordinates (x,y) and uses the component selection to
-    pick the value (comp_sel==1 -> x, comp_sel==2 -> y). This makes the API explicit and
-    avoids indexing into long SVD vectors.
-
-    Returns go.Figure or None if no data available.
-    """
-    if not party_coords:
-        return None
-
-    if comp_sel not in (1, 2):
-        raise ValueError(
-            "_build_party_axis_figure only supports comp_sel 1 or 2 when using explicit coords"
-        )
-
-    axis_idx = comp_sel - 1
-    flip = theme.get("flip", False)
-
-    parties = []
-    scores = []
-    colours = []
-
-    for party, val in party_coords.items():
-        try:
-            if hasattr(val, "__len__") and len(val) == 2:
-                x, y = val
-                score = float(x if axis_idx == 0 else y)
-            else:
-                score = float(val[axis_idx])
-
-            if flip:
-                score = -score
-        except Exception:
-            continue
-
-        parties.append(party)
-        scores.append(score)
-        colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))
-
-    if not scores:
-        return None
-
-    hover = []
-    symbols = []
-    if bootstrap_data:
-        for p, s in zip(parties, scores):
-            bd = bootstrap_data.get(p)
-            if bd:
-                n_mps = bd.get("n_mps", "?")
-                ci_low = None
-                ci_high = None
-                try:
-                    ci_low = float(bd["ci_lower"][axis_idx])
-                    ci_high = float(bd["ci_upper"][axis_idx])
-                except Exception:
-                    pass
-                if ci_low is not None and ci_high is not None:
-                    hover.append(
-                        f"{p}: {s:.3f} (N={n_mps}, 95%-BI: [{ci_low:.3f}, {ci_high:.3f}])"
-                    )
-                else:
-                    hover.append(f"{p}: {s:.3f} (N={n_mps})")
-                symbols.append("diamond" if n_mps == 1 else "circle")
-            else:
-                hover.append(f"{p}: {s:.3f}")
-                symbols.append("circle")
-        marker_kwargs = {"size": 14, "color": colours, "symbol": symbols}
-    else:
-        hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
-        marker_kwargs = {"size": 14, "color": colours}
-
-    fig = go.Figure()
-    x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
-    if x_min == x_max:
-        x_min, x_max = x_min - 1, x_max + 1
-    fig.add_trace(
-        go.Scatter(
-            x=[x_min, x_max],
-            y=[0, 0],
-            mode="lines",
-            line={"color": "#cccccc", "width": 1},
-            hoverinfo="skip",
-            showlegend=False,
-        )
-    )
-
-    scatter_kwargs = {
-        "x": scores,
-        "y": [0] * len(scores),
-        "mode": "markers+text",
-        "text": parties,
-        "textposition": "top center",
-        "marker": marker_kwargs,
-        "hovertext": hover,
-        "hoverinfo": "text",
-        "showlegend": False,
-    }
-    fig.add_trace(go.Scatter(**scatter_kwargs))
-
-    pos_pole = theme.get("positive_pole", "")
-    neg_pole = theme.get("negative_pole", "")
-    left_label = neg_pole
-    right_label = pos_pole
-
-    fig.update_layout(
-        height=160,
-        margin={"l": 10, "r": 10, "t": 10, "b": 30},
-        xaxis={
-            "title": f"← {left_label}  |  {right_label} →",
-            "showticklabels": False,
-            "showline": False,
-            "showgrid": False,
-            "zeroline": False,
-        },
-        yaxis={"visible": False, "range": [-1, 2]},
-        plot_bgcolor="rgba(0,0,0,0)",
-        paper_bgcolor="rgba(0,0,0,0)",
-    )
-    return fig
-
-
-def _render_party_axis_chart(
-    party_coords: Dict[str, Tuple[float, float]],
-    comp_sel: int,
-    theme: dict,
-    bootstrap_data: Optional[Dict[str, Dict]] = None,
-) -> None:
-    """Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
-
-    Expects explicit per-party coords mapping (party -> (x,y)) for components 1 & 2.
-    """
-    fig = _build_party_axis_figure(party_coords, comp_sel, theme, bootstrap_data)
-    if fig is None:
-        st.caption("_Partijdata niet beschikbaar voor deze as._")
-        return
-    st.plotly_chart(fig, use_container_width=True)
-
-
-def _render_party_axis_chart_1d(
-    party_coords: Dict[str, Tuple[float, ...]],
-    comp_sel: int,
-    theme: dict,
-) -> None:
-    """Render a 1D horizontal scatter of party positions on SVD component `comp_sel`.
-
-    Uses the same format as components 1-2: parties as markers on a horizontal line
-    with axis title showing poles with arrows.
-
-    Args:
-        party_coords: Dict mapping party name to tuple of scores (score_for_comp,)
-        comp_sel: SVD component number (1-indexed)
-        theme: Dict with label, positive_pole, negative_pole, flip
-    """
-    if not party_coords:
-        st.caption("_Partijdata niet beschikbaar voor deze as._")
-        return
-
-    parties = []
-    scores = []
-    colours = []
-
-    for party, coords in party_coords.items():
-        try:
-            score = float(coords[0])
-            parties.append(party)
-            scores.append(score)
-            colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))
-        except Exception:
-            continue
-
-    if not scores:
-        st.caption("_Partijdata niet beschikbaar voor deze as._")
-        return
-
-    flip = theme.get("flip", False)
-    if flip:
-        scores = [-s for s in scores]
-
-    hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
-
-    fig = go.Figure()
-    x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
-    if x_min == x_max:
-        x_min, x_max = x_min - 1, x_max + 1
-
-    fig.add_trace(
-        go.Scatter(
-            x=[x_min, x_max],
-            y=[0, 0],
-            mode="lines",
-            line={"color": "#cccccc", "width": 1},
-            hoverinfo="skip",
-            showlegend=False,
-        )
-    )
-
-    fig.add_trace(
-        go.Scatter(
-            x=scores,
-            y=[0] * len(scores),
-            mode="markers+text",
-            text=parties,
-            textposition="top center",
-            marker={"size": 14, "color": colours},
-            hovertext=hover,
-            hoverinfo="text",
-            showlegend=False,
-        )
-    )
-
-    pos_pole = theme.get("positive_pole", "")
-    neg_pole = theme.get("negative_pole", "")
-    left_label = neg_pole
-    right_label = pos_pole
-
-    fig.update_layout(
-        height=160,
-        margin={"l": 10, "r": 10, "t": 10, "b": 30},
-        xaxis={
-            "title": f"← {left_label}  |  {right_label} →",
-            "showticklabels": False,
-            "showline": False,
-            "showgrid": False,
-            "zeroline": False,
-        },
-        yaxis={"visible": False, "range": [-1, 2]},
-        plot_bgcolor="rgba(0,0,0,0)",
-        paper_bgcolor="rgba(0,0,0,0)",
-    )
-
-    st.plotly_chart(fig, use_container_width=True)
-
-
-def _render_svd_time_trajectory(
-    party_scores_by_window: Dict[str, Dict[str, List[float]]],
-    comp_sel: int,
-    theme: dict,
-    selected_parties: List[str],
-) -> None:
-    """Render a time trajectory plot showing party positions over time on an SVD component.
-
-    Args:
-        party_scores_by_window: {window_id: {party_name: [scores]}}
-        comp_sel: SVD component number (1-indexed)
-        theme: Theme dict with label, positive_pole, negative_pole, flip
-        selected_parties: List of party names to display
-    """
-    if not party_scores_by_window or not selected_parties:
-        st.caption("_Geen data beschikbaar voor tijdtraject._")
-        return
-
-    idx = comp_sel - 1
-    flip = theme.get("flip", False)
-
-    party_trajectories: Dict[str, List[Tuple[str, float]]] = {}
-
-    all_windows = list(party_scores_by_window.keys())
-    sorted_windows = []
-    if "current_parliament" in all_windows:
-        sorted_windows.append("current_parliament")
-    other_windows = sorted(
-        [w for w in all_windows if w != "current_parliament"], reverse=True
-    )
-    sorted_windows.extend(other_windows)
-
-    for window in sorted_windows:
-        scores_by_party = party_scores_by_window.get(window, {})
-        for party in selected_parties:
-            scores = scores_by_party.get(party, [])
-            if scores and len(scores) > idx:
-                try:
-                    score = float(scores[idx])
-                    if flip:
-                        score = -score
-                    party_trajectories.setdefault(party, []).append((window, score))
-                except (ValueError, TypeError):
-                    continue
-
-    if not party_trajectories:
-        st.caption("_Geen data beschikbaar voor geselecteerde partijen._")
-        return
-
-    fig = go.Figure()
-
-    all_scores = []
-    for traj in party_trajectories.values():
-        all_scores.extend([s for _, s in traj])
-
-    if not all_scores:
-        st.caption("_Geen scores beschikbaar._")
-        return
-
-    x_min, x_max = min(all_scores) * 1.15, max(all_scores) * 1.15
-    if x_min == x_max:
-        x_min, x_max = x_min - 1, x_max + 1
-
-    window_to_y = {w: i for i, w in enumerate(sorted_windows)}
-
-    for window in sorted_windows:
-        y_pos = window_to_y[window]
-        fig.add_trace(
-            go.Scatter(
-                x=[x_min, x_max],
-                y=[y_pos, y_pos],
-                mode="lines",
-                line={"color": "#cccccc", "width": 1},
-                hoverinfo="skip",
-                showlegend=False,
-            )
-        )
-
-    for party in selected_parties:
-        if party not in party_trajectories:
-            continue
-
-        traj = party_trajectories[party]
-        if len(traj) < 1:
-            continue
-
-        x_vals = [score for _, score in traj]
-        y_vals = [window_to_y[window] for window, _ in traj]
-        color = PARTY_COLOURS.get(party, "#9E9E9E")
-
-        fig.add_trace(
-            go.Scatter(
-                x=x_vals,
-                y=y_vals,
-                mode="lines",
-                line={"color": color, "width": 2},
-                hoverinfo="skip",
-                showlegend=False,
-            )
-        )
-
-        hover_texts = [f"{party}<br>{window}: {score:.3f}" for window, score in traj]
-        fig.add_trace(
-            go.Scatter(
-                x=x_vals,
-                y=y_vals,
-                mode="markers+text",
-                text=[party] * len(traj),
-                textposition="top center",
-                marker={"size": 12, "color": color},
-                hovertext=hover_texts,
-                hoverinfo="text",
-                showlegend=False,
-            )
-        )
-
-    pos_pole = theme.get("positive_pole", "")
-    neg_pole = theme.get("negative_pole", "")
-    left_label = neg_pole
-    right_label = pos_pole
-
-    y_labels = {}
-    for window in sorted_windows:
-        if window == "current_parliament":
-            y_labels[window_to_y[window]] = "Huidig"
-        else:
-            y_labels[window_to_y[window]] = window
-
-    fig.update_layout(
-        height=max(400, len(sorted_windows) * 60 + 100),
-        margin={"l": 80, "r": 10, "t": 10, "b": 30},
-        xaxis={
-            "title": f"← {left_label}  |  {right_label} →",
-            "range": [x_min, x_max],
-            "showticklabels": False,
-            "showline": False,
-            "showgrid": True,
-            "gridcolor": "rgba(0,0,0,0.1)",
-            "zeroline": True,
-            "zerolinecolor": "rgba(0,0,0,0.2)",
-        },
-        yaxis={
-            "tickvals": list(y_labels.keys()),
-            "ticktext": list(y_labels.values()),
-            "tickmode": "array",
-            "autorange": "reversed",
-            "showgrid": False,
-        },
-        plot_bgcolor="rgba(0,0,0,0)",
-        paper_bgcolor="rgba(0,0,0,0)",
-    )
-
-    st.plotly_chart(fig, use_container_width=True)
-
-
-def _render_voting_results(voting_results_json) -> None:
-    """Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.
-
-    The JSON is stored as {party_or_mp: vote} where vote is one of
-    'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
-    """
-    if not voting_results_json:
-        return
-    try:
-        vdata = (
-            json.loads(voting_results_json)
-            if isinstance(voting_results_json, str)
-            else voting_results_json
-        )
-        if not isinstance(vdata, dict) or not vdata:
-            return
-        by_vote: Dict[str, List[str]] = {}
-        for actor, vote in vdata.items():
-            vote_str = str(vote).lower().strip()
-            by_vote.setdefault(vote_str, []).append(str(actor))
-        vote_order = ["voor", "tegen", "onthouden", "afwezig"]
-        rows_shown = False
-        for v in vote_order + [k for k in by_vote if k not in vote_order]:
-            actors = by_vote.get(v)
-            if not actors:
-                continue
-            st.markdown(
-                f"**{v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
-            )
-            rows_shown = True
-        if not rows_shown:
-            st.caption("_Geen stemuitslag beschikbaar_")
-    except Exception:
-        pass
-
-
-def _add_y_direction_annotations(fig: go.Figure) -> None:
-    """Add Progressief / Conservatief labels above and below the Y axis."""
-    common = dict(
-        xref="paper",
-        yref="paper",
-        x=-0.07,
-        showarrow=False,
-        font=dict(size=11, color="#666666"),
-    )
-    fig.add_annotation(**common, y=1.02, text="Progressief", xanchor="center")
-    fig.add_annotation(**common, y=-0.06, text="Conservatief", xanchor="center")
--- a/analysis/tabs/browser.py
+++ b/analysis/tabs/browser.py
@ -1,95 +1,18 @@
-"""Browser tab for the parliamentary explorer."""
+"""Browser tab for the parliamentary explorer.

-from __future__ import annotations
-
-import pandas as pd
+This module will contain the browser tab implementation.
+Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
+"""

-import analysis.explorer_data as explorer_data
-from analysis.tabs._rendering import _render_voting_results, st
+from __future__ import annotations


 def build_browser_tab(db_path: str, show_rejected: bool) -> None:
-    """Build the Motie Browser tab."""
-    st.subheader("Motie Browser")
-
-    df = explorer_data.load_motions_df(db_path)
-    if df.empty:
-        st.warning("Geen moties beschikbaar.")
-        return
-
-    if not show_rejected:
-        df = df[df["title"].fillna("").str.strip() != "Verworpen."]
-
-    col1, col2, col3 = st.columns(3)
-    with col1:
-        years = sorted(df["year"].dropna().astype(int).unique().tolist())
-        year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
-    with col2:
-        min_controversy_b = st.slider(
-            "Min. controverse",
-            min_value=0.0,
-            max_value=1.0,
-            value=0.0,
-            step=0.05,
-            key="browser_controversy",
-        )
-    with col3:
-        sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])
-
-    working = df.copy()
-    if year_filter != "(Alle)":
-        working = working[working["year"] == int(year_filter)]
-    if min_controversy_b > 0:
-        working = working[working["controversy_score"] >= min_controversy_b]
-
-    sort_map = {
-        "Datum (nieuw)": ("date", False),
-        "Controverse": ("controversy_score", False),
-        "Marge": ("winning_margin", True),
-    }
-    sort_col, sort_asc = sort_map[sort_by]
-    working = working.sort_values(by=sort_col, ascending=sort_asc)
-
-    display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
-    available_display = [c for c in display_cols if c in working.columns]
-    st.dataframe(
-        working[available_display].reset_index(drop=True),
-        use_container_width=True,
-        height=350,
-    )
-
-    st.divider()
-
-    st.markdown("**Detail weergave** — vul een motie-ID in:")
-    sel_id = st.number_input(
-        "Motie ID",
-        min_value=int(working["id"].min()) if not working.empty else 1,
-        max_value=int(working["id"].max()) if not working.empty else 99999,
-        value=int(working["id"].iloc[0]) if not working.empty else 1,
-        step=1,
-    )
-    motion_row = df[df["id"] == sel_id]
-    if not motion_row.empty:
-        row = motion_row.iloc[0]
-        st.markdown(f"### {row.get('title') or 'Onbekend'}")
-        date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
-        st.caption(
-            f"{date_str}  |  Controverse: {row.get('controversy_score', 0):.2f}"
-        )
-
-        url = row.get("url")
-        if url and str(url).startswith("http"):
-            st.markdown(f"[Bekijk op Tweede Kamer]({url})")
+    """Build the Motie Browser tab.

-        st.markdown("**Stemuitslag:**")
-        _render_voting_results(row.get("voting_results"))
+    Currently delegates to explorer.py implementation.
+    Will be extracted when rendering logic is decoupled from Streamlit.
+    """
+    import explorer

-        sim = explorer_data.query_similar(db_path, int(sel_id), top_k=10)
-        if not sim.empty:
-            st.markdown("**Vergelijkbare moties:**")
-            st.dataframe(
-                sim[["title", "score", "date", "policy_area"]],
-                use_container_width=True,
-            )
-        else:
-            st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")
+    explorer.build_browser_tab(db_path, show_rejected)
--- a/analysis/tabs/compass.py
+++ b/analysis/tabs/compass.py
@ -1,198 +1,20 @@
-"""Compass tab for the parliamentary explorer."""
+"""Compass tab for the parliamentary explorer.

-from __future__ import annotations
-
-import datetime as _dt
-import re
-from typing import Dict, Tuple
-
-import numpy as np
-import pandas as pd
+This module will contain the compass tab implementation.
+Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
+"""

-from analysis import config
-import analysis.explorer_data as explorer_data
-from analysis.tabs._rendering import px, st
+from __future__ import annotations

-PARTY_COLOURS = config.PARTY_COLOURS
+from typing import List


 def build_compass_tab(db_path: str, window_size: str) -> None:
-    """Build the Politiek Kompas tab."""
-    st.subheader("Politiek Kompas")
-    st.markdown(
-        "2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
-    )
-
-    # Compass always uses annual windows regardless of the sidebar window_size setting.
-    positions_by_window, axis_def = explorer_data.load_positions(db_path, "annual")
-    if axis_def is None:
-        axis_def = {}
-    if not positions_by_window:
-        st.warning(
-            "Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
-        )
-        return
-
-    party_map = explorer_data.load_party_map(db_path)
-    active_mps = explorer_data.load_active_mps(db_path)
-
-    _current_year = str(_dt.date.today().year)
-    year_windows = sorted(
-        w
-        for w in positions_by_window
-        if w != "current_parliament" and w != _current_year
-    )
-    has_current = "current_parliament" in positions_by_window
-    windows = year_windows + (["current_parliament"] if has_current else [])
-
-    _SPARSE_YEARS = {"2016", "2017", "2018"}
-    _THRESHOLD = 0.65
-
-    def _window_label(w: str) -> str:
-        if w == "current_parliament":
-            return "Huidig parlement"
-        return w
-
-    col1, col2 = st.columns([3, 1])
-    with col2:
-        window_idx = st.selectbox(
-            "Jaar",
-            options=windows,
-            index=len(windows) - 1,
-            format_func=_window_label,
-        )
-        level = st.radio(
-            "Weergave",
-            options=["Kamerleden", "Partijen"],
-            index=0,
-            horizontal=True,
-        )
-        min_mps = st.number_input(
-            "Min. Kamerleden per partij",
-            min_value=1,
-            max_value=20,
-            value=3,
-            step=1,
-            help="Partijen met minder dan dit aantal zetels worden niet weergegeven.",
-        )
-
-    pos = positions_by_window.get(window_idx, {})
-    if not pos:
-        st.info(f"Geen data voor venster {window_idx}")
-        return
-
-    if window_idx == "current_parliament":
-        pos = {mp: xy for mp, xy in pos.items() if mp in active_mps}
-
-    def _strip_paren(name: str) -> str:
-        return re.sub(r"\s*\([^)]*\)", "", name).strip()
-
-    deduped: Dict[str, Tuple[float, float]] = {}
-    for name, (x, y) in pos.items():
-        base = _strip_paren(name)
-        if base in deduped:
-            ox, oy = deduped[base]
-            deduped[base] = ((ox + x) / 2, (oy + y) / 2)
-        else:
-            deduped[base] = (x, y)
-    pos = deduped
-
-    rows = []
-    for name, (x, y) in pos.items():
-        party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown")
-        rows.append({"name": name, "x": x, "y": y, "party": party})
-
-    df_pos = pd.DataFrame(rows)
-
-    party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts()
-    valid_parties = set(party_counts[party_counts >= min_mps].index)
-    df_pos = df_pos[df_pos["party"].isin(valid_parties)]
-
-    if df_pos.empty:
-        st.info("Geen partijen met genoeg Kamerleden voor dit venster.")
-        return
-
-    _raw_x = axis_def.get("x_label")
-    _raw_y = axis_def.get("y_label")
-
-    try:
-        from analysis.axis_classifier import display_label_for_modal
-
-        _x_label = display_label_for_modal(_raw_x, "x")
-        _y_label = display_label_for_modal(_raw_y, "y")
-    except Exception:
-        from analysis.svd_labels import get_fallback_labels
-
-        _x_fallback, _y_fallback = get_fallback_labels()
-        _x_label = _raw_x or _x_fallback
-        _y_label = _raw_y or _y_fallback
-
-    if level == "Partijen":
-        df_party = df_pos.groupby("party", as_index=False).agg(
-            x=("x", "mean"), y=("y", "mean"), n=("name", "count")
-        )
-        df_party["name"] = df_party["party"]
-        colour_map = {
-            p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique()
-        }
-        fig = px.scatter(
-            df_party,
-            x="x",
-            y="y",
-            color="party",
-            text="party",
-            hover_name="party",
-            hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True},
-            color_discrete_map=colour_map,
-            title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",
-            labels={
-                "x": _x_label,
-                "y": _y_label,
-                "n": "Kamerleden",
-            },
-        )
-        fig.update_traces(textposition="top center", marker_size=14)
-    else:
-        colour_map = {
-            p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()
-        }
-        fig = px.scatter(
-            df_pos,
-            x="x",
-            y="y",
-            color="party",
-            hover_name="name",
-            hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
-            color_discrete_map=colour_map,
-            title=f"Politiek Kompas — {_window_label(window_idx)}",
-            labels={"x": _x_label, "y": _y_label},
-        )
+    """Build the Politiek Kompas tab.

-    fig.update_layout(
-        height=600,
-        legend_title_text="Partij",
-        xaxis={"range": [-1, 1]},
-        yaxis={"range": [-0.6, 0.6]},
-    )
-    with col1:
-        st.plotly_chart(fig, use_container_width=True)
-        _x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "")
-        if (
-            _x_interp
-            and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD
-        ):
-            st.caption(_x_interp)
+    Currently delegates to explorer.py implementation.
+    Will be extracted when rendering logic is decoupled from Streamlit.
+    """
+    import explorer

-        # Voting discipline analysis
-        st.markdown("---")
-        st.markdown(
-            "**Stemdiscipline analyse:** De Rice-index meet hoe eensgezind partijen stemmen "
-            "tijdens hoofdelijke stemmingen. Een score van 100% betekent dat alle MPs van "
-            "een partij hetzelfde stemden; 50% wijst op een gelijke splitsing binnen de partij. "
-            "Partijen met hoge discipline (>95%) zoals PVV en SGP stemmen als een blok, wat "
-            "wijst op sterke partijdiscipline en homogene membership. Lagere discipline (<85%) "
-            "bij partijen als PvdA of SP kan duiden op interne factiestrijd, gewetensvragen "
-            "bij ethische thema's, of een brede ideologische koers die ruimte laat voor "
-            "afwijkende meningen. De discipline varieert ook per onderwerp — ethische kwesties "
-            "tonen vaak meer interne verschillen dan economische thema's."
-        )
+    explorer.build_compass_tab(db_path, window_size)
--- a/analysis/tabs/components.py
+++ b/analysis/tabs/components.py
@ -1,372 +1,18 @@
-"""SVD Components tab for the parliamentary explorer."""
+"""SVD Components tab for the parliamentary explorer.

-from __future__ import annotations
-
-import datetime as _dt
-import logging
-import os
-from typing import Dict, List, Tuple
-
-import numpy as np
-
-from analysis import config
-import analysis.explorer_data as explorer_data
-from analysis.tabs._rendering import (
-    _render_party_axis_chart_1d,
-    _render_scree_plot,
-    _render_svd_time_trajectory,
-    _render_voting_results,
-    st,
-)
-
-try:
-    import duckdb
-except Exception:
-    duckdb = None  # type: ignore
-
-SVD_THEMES = config.SVD_THEMES
-KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
+This module will contain the SVD components tab implementation.
+Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
+"""

-logger = logging.getLogger(__name__)
+from __future__ import annotations


 def build_svd_components_tab(db_path: str) -> None:
-    """New tab: show top motions contributing to top SVD components.
-
-    Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
-    for components 1..10 with theme labels/explanations and a detail pane per motion.
+    """Build the SVD Components tab.

-    Components 1-2 use aligned PCA positions (consistent with compass).
-    Components 3-10 use raw SVD scores.
+    Currently delegates to explorer.py implementation.
+    Will be extracted when rendering logic is decoupled from Streamlit.
    """
-    st.subheader("SVD Assen — politieke polarisatiethema's")
-    st.markdown(
-        "Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
-        "van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
-        "het spanningsveld dat de as beschrijft."
-    )
-
-    scree_importances = explorer_data.load_scree_data(db_path)
-    if scree_importances:
-        st.markdown(
-            "**Scree-plot** — het relatieve gewicht van elke SVD-as. "
-            "De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
-            "latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau."
-        )
-        _render_scree_plot(scree_importances)
-
-    json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
-    if not os.path.exists(json_path):
-        st.warning(
-            f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
-        )
-        return
-
-    try:
-        import json
-
-        with open(json_path, "r", encoding="utf-8") as fh:
-            j = json.load(fh)
-    except Exception as e:
-        st.error(f"Failed to load SVD importance JSON: {e}")
-        return
-
-    window = j.get("window")
-    rows = j.get("rows", [])
-    if not rows:
-        st.info("Geen top-moties in dataset")
-        return
-
-    st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")
-
-    comp_map: dict[int, list] = {}
-    for r in rows:
-        comp = int(r.get("component", 0))
-        bucket = comp_map.setdefault(comp, [])
-        existing_ids = {m.get("motion_id") for m in bucket}
-        if r.get("motion_id") not in existing_ids:
-            bucket.append(r)
-
-    comp_options = sorted(comp_map.keys())
-
-    def _comp_label(c: int) -> str:
-        theme = SVD_THEMES.get(c, {})
-        lbl = theme.get("label", "")
-        return f"As {c} — {lbl}" if lbl else f"As {c}"
-
-    comp_display = [_comp_label(c) for c in comp_options]
-
-    party_scores_default = explorer_data.load_party_axis_scores(db_path)
-    party_mp_vectors = explorer_data.load_party_mp_vectors(db_path)
-    bootstrap_data = None
-    if party_mp_vectors:
-        try:
-            from analysis.political_axis import compute_party_bootstrap_cis
-
-            bootstrap_data = compute_party_bootstrap_cis(party_mp_vectors)
-        except Exception:
-            pass
-
-    col1, col2 = st.columns([2, 1])
-
-    view_mode = "Enkel venster"
-    selected_parties_for_trajectory: list = []
-
-    with col2:
-        comp_sel_idx = st.selectbox(
-            "Selecteer SVD-as",
-            options=list(range(len(comp_options))),
-            format_func=lambda i: comp_display[i],
-            index=0,
-        )
-        comp_sel = comp_options[comp_sel_idx]
-
-        min_mps = st.number_input(
-            "Min. Kamerleden per partij",
-            min_value=1,
-            max_value=20,
-            value=1,
-            step=1,
-            help="Partijen met minder dan dit aantal Kamerleden worden niet weergegeven.",
-        )
-
-        view_mode = st.radio(
-            "Weergave",
-            options=["Enkel venster", "Tijdtraject"],
-            index=0,
-            help="Enkel venster: toont posities voor één tijdsvenster. Tijdtraject: toont hoe partijen over tijd bewegen op deze as.",
-        )
-
-        selected_parties_for_trajectory = []
-        if view_mode == "Tijdtraject":
-            all_parties = (
-                sorted(party_scores_default.keys()) if party_scores_default else []
-            )
-            default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties][:8]
-            selected_parties_for_trajectory = st.multiselect(
-                "Partijen om te tonen",
-                options=all_parties,
-                default=default_parties,
-                help="Selecteer de partijen die je wilt zien in het tijdtraject.",
-            )
-
-    theme = SVD_THEMES.get(comp_sel, {})
-    if theme:
-        st.info(f"**{theme['label']}** — {theme['explanation']}")
-
-    motions = comp_map.get(comp_sel, [])
-
-    _current_year = str(_dt.date.today().year)
-    available_windows = explorer_data.get_uniform_dim_windows(db_path)
-    year_windows = sorted(
-        w for w in available_windows if w != "current_parliament" and w != _current_year
-    )
-    has_current = "current_parliament" in available_windows
-    svd_windows = year_windows + (["current_parliament"] if has_current else [])
-
-    def _svd_window_label(w: str) -> str:
-        if w == "current_parliament":
-            return "Huidig parlement"
-        return w
-
-    with col1:
-        svd_window = st.selectbox(
-            "Jaar",
-            options=svd_windows,
-            index=len(svd_windows) - 1,
-            format_func=_svd_window_label,
-            key=f"svd_window_{comp_sel}",
-        )
-
-    if svd_window == "current_parliament":
-        party_scores = party_scores_default
-    else:
-        party_scores = explorer_data.load_party_axis_scores_for_window(db_path, svd_window)
-
-    party_mp_counts = (
-        {p: len(v) for p, v in party_mp_vectors.items()} if party_mp_vectors else {}
-    )
-
-    def _get_aligned_party_coords(window: str) -> Dict[str, Tuple[float, float]]:
-        """Get party (x, y) coordinates from aligned PCA positions for a window."""
-        positions_by_window, _ = explorer_data.load_positions(db_path, "annual")
-        window_pos = positions_by_window.get(window, {})
-        if not window_pos:
-            return {}
-
-        _party_map = explorer_data.load_party_map(db_path)
-
-        party_coords: Dict[str, List[Tuple[float, float]]] = {}
-        for mp_name, (x, y) in window_pos.items():
-            party = _party_map.get(
-                mp_name, _party_map.get(mp_name.split("(")[0].strip(), None)
-            )
-            if party:
-                party_coords.setdefault(party, []).append((x, y))
-
-        return {
-            party: (
-                float(np.mean([c[0] for c in coords])),
-                float(np.mean([c[1] for c in coords])),
-            )
-            for party, coords in party_coords.items()
-            if coords
-        }
-
-    active_mps = (
-        explorer_data.load_active_mps(db_path)
-        if svd_window == "current_parliament"
-        else None
-    )
-    aligned_all_scores = explorer_data.get_aligned_party_scores(
-        db_path, svd_window, active_mps
-    )
-
-    party_1d_coords: dict = {}
-    for party, all_scores in aligned_all_scores.items():
-        idx = comp_sel - 1
-        if idx < len(all_scores):
-            party_1d_coords[party] = (float(all_scores[idx]),)
-
-    computed_flips: Dict[int, bool] = {}
-    try:
-        from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT
-
-        for comp_idx in range(10):
-            right_scores = []
-            left_scores = []
-            for party, scores in aligned_all_scores.items():
-                if party in CANONICAL_RIGHT:
-                    right_scores.append(scores[comp_idx])
-                elif party in CANONICAL_LEFT:
-                    left_scores.append(scores[comp_idx])
-
-            if right_scores and left_scores:
-                right_avg = np.mean(right_scores)
-                left_avg = np.mean(left_scores)
-                computed_flips[comp_idx + 1] = right_avg < left_avg
-            else:
-                computed_flips[comp_idx + 1] = False
-    except Exception:
-        pass
-
-    theme_with_flip = {
-        **theme,
-        "flip": computed_flips.get(comp_sel, theme.get("flip", False)),
-    }
-
-    if min_mps > 1 and party_mp_counts:
-        valid_parties = {p for p, count in party_mp_counts.items() if count >= min_mps}
-        party_1d_coords = {
-            p: coords for p, coords in party_1d_coords.items() if p in valid_parties
-        }
-
-    if view_mode == "Tijdtraject" and selected_parties_for_trajectory:
-        available_windows = explorer_data.get_uniform_dim_windows(db_path)
-        year_windows = sorted(
-            w
-            for w in available_windows
-            if w != "current_parliament" and w != _current_year
-        )
-        has_current = "current_parliament" in available_windows
-        all_windows = year_windows + (["current_parliament"] if has_current else [])
-
-        party_scores_by_window = explorer_data._get_aligned_trajectory_scores(
-            db_path, all_windows
-        )
-
-        _render_svd_time_trajectory(
-            party_scores_by_window,
-            comp_sel,
-            theme_with_flip,
-            selected_parties_for_trajectory,
-        )
-    else:
-        _render_party_axis_chart_1d(party_1d_coords, comp_sel, theme_with_flip)
-
-    motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
-    motion_details: Dict[int, tuple] = {}
-    if motion_ids:
-        ids_int: List[int] = []
-        for mid in motion_ids:
-            try:
-                ids_int.append(int(mid))
-            except Exception:
-                logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)
-
-        if ids_int and duckdb is not None:
-            con = None
-            try:
-                placeholders = ", ".join("?" for _ in ids_int)
-                con = duckdb.connect(database=db_path, read_only=True)
-                db_rows = con.execute(
-                    f"SELECT id, title, date, policy_area, url, body_text, voting_results "
-                    f"FROM motions WHERE id IN ({placeholders})",
-                    ids_int,
-                ).fetchall()
-                motion_details = {r[0]: r for r in db_rows}
-            except Exception:
-                logger.exception("Failed to batch-fetch motion details")
-            finally:
-                if con:
-                    con.close()
-
-    pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
-    neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]
-
-    flip = theme_with_flip.get("flip", False) if theme_with_flip else False
-    pos_pole = theme_with_flip.get("positive_pole", "") if theme_with_flip else ""
-    neg_pole = theme_with_flip.get("negative_pole", "") if theme_with_flip else ""
-
-    if flip:
-        left_pole, right_pole = pos_pole, neg_pole
-        left_motions, right_motions = pos_motions, neg_motions
-    else:
-        left_pole, right_pole = neg_pole, pos_pole
-        left_motions, right_motions = neg_motions, pos_motions
-
-    lcol, rcol = st.columns(2)
-
-    with lcol:
-        st.markdown(f"**← {left_pole}**")
-        for m in left_motions:
-            mid = m.get("motion_id")
-            raw_title = m.get("title") or f"Motie #{mid}"
-            with st.expander(raw_title):
-                row = motion_details.get(int(mid)) if mid is not None else None
-                if row:
-                    try:
-                        date_str = str(row[2])[:10]
-                    except Exception:
-                        date_str = "?"
-                    st.caption(f"{date_str}  |  {row[3] or '—'}")
-                    if row[4] and str(row[4]).startswith("http"):
-                        st.markdown(f"[Bekijk op Tweede Kamer]({row[4]})")
-                    if row[5]:
-                        with st.expander("Toon volledige tekst"):
-                            st.write(row[5])
-                    _render_voting_results(row[6])
-                else:
-                    st.caption("_Geen metadata beschikbaar_")
+    import explorer

-    with rcol:
-        st.markdown(f"**{right_pole} →**")
-        for m in right_motions:
-            mid = m.get("motion_id")
-            raw_title = m.get("title") or f"Motie #{mid}"
-            with st.expander(raw_title):
-                row = motion_details.get(int(mid)) if mid is not None else None
-                if row:
-                    try:
-                        date_str = str(row[2])[:10]
-                    except Exception:
-                        date_str = "?"
-                    st.caption(f"{date_str}  |  {row[3] or '—'}")
-                    if row[4] and str(row[4]).startswith("http"):
-                        st.markdown(f"[Bekijk op Tweede Kamer]({row[4]})")
-                    if row[5]:
-                        with st.expander("Toon volledige tekst"):
-                            st.write(row[5])
-                    _render_voting_results(row[6])
-                else:
-                    st.caption("_Geen metadata beschikbaar_")
+    explorer.build_svd_components_tab(db_path)
--- a/analysis/tabs/quiz.py
+++ b/analysis/tabs/quiz.py
@ -1,132 +1,18 @@
-"""MP Quiz tab for the parliamentary explorer."""
+"""MP Quiz tab for the parliamentary explorer.

-from __future__ import annotations
-
-import pandas as pd
+This module will contain the MP quiz tab implementation.
+Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
+"""

-import analysis.explorer_data as explorer_data
-from analysis.tabs._rendering import st
+from __future__ import annotations


 def build_mp_quiz_tab(db_path: str) -> None:
-    """Interactive quiz: narrow MPs by asking motion vote questions.
+    """Build the MP Quiz tab.

-    Minimal viable flow:
-    - seed with top-N controversial motions (SEED_MOTIONS)
-    - present one question at a time, store answers in st.session_state['mp_quiz_votes']
-    - after each answer call MotionDatabase.match_mps_for_votes to rank MPs
-    - if multiple candidates remain, call choose_discriminating_motions to pick next question
-    - stop when unique MP found or no discriminating motions remain
+    Currently delegates to explorer.py implementation.
+    Will be extracted when rendering logic is decoupled from Streamlit.
    """
-    st.subheader("Welk tweede kamerlid ben jij?")
-    st.markdown(
-        "Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt."
-    )
-
-    SEED_MOTIONS = 8
-    MAX_QUESTIONS = 20
-
-    if "mp_quiz_votes" not in st.session_state:
-        st.session_state["mp_quiz_votes"] = {}
-    if "mp_quiz_asked" not in st.session_state:
-        st.session_state["mp_quiz_asked"] = []
-
-    from database import MotionDatabase as _MotionDatabase
-
-    db_inst = _MotionDatabase(db_path)
-
-    df = explorer_data.load_motions_df(db_path)
-    if df.empty:
-        st.warning("Geen moties beschikbaar om de quiz te starten.")
-        return
-
-    seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
-    if not seed_ids:
-        st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
-        return
-
-    def _next_motion_id():
-        for mid in seed_ids:
-            if str(mid) not in st.session_state["mp_quiz_votes"]:
-                return mid
-        try:
-            user_votes = {
-                int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
-            }
-            ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
-        except Exception:
-            ranked = []
-
-        candidates = [r["mp_name"] for r in ranked]
-        excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()]
-        if not candidates:
-            return None
-        try:
-            next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
-            return next_ids[0] if next_ids else None
-        except Exception:
-            return None
-
-    col1, col2 = st.columns([3, 1])
-    with col2:
-        st.caption(
-            f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}"
-        )
-        if st.button("Reset quiz"):
-            st.session_state["mp_quiz_votes"] = {}
-            st.session_state["mp_quiz_asked"] = []
-            st.rerun()
-
-    next_mid = _next_motion_id()
-    if next_mid is None:
-        st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
-    else:
-        motion_rows = df[df["id"] == next_mid]
-        if motion_rows.empty:
-            st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
-            st.rerun()
-            return
-        motion_row = motion_rows.iloc[0]
-        st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
-        if motion_row.get("layman_explanation"):
-            st.info(motion_row.get("layman_explanation"))
-
-        with st.form(key=f"mp_quiz_form_{next_mid}"):
-            choice = st.radio(
-                "Wat zou jij stemmen?",
-                options=["Voor", "Tegen", "Onthouden", "Geen stem"],
-                index=3,
-            )
-            submitted = st.form_submit_button("Beantwoord en verder")
-
-        if submitted:
-            st.session_state["mp_quiz_votes"][str(next_mid)] = choice
-            st.session_state["mp_quiz_asked"].append(next_mid)
-            st.rerun()
-
-    try:
-        user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
-        ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
-    except Exception:
-        ranking = []
-
-    if ranking:
-        st.markdown("**Top kandidaten**")
-        rdf = pd.DataFrame(ranking)
-        st.dataframe(rdf.head(10), use_container_width=True)
+    import explorer

-        top_pct = ranking[0]["agreement_pct"] if ranking else 0.0
-        top_matches = [r for r in ranking if r["agreement_pct"] == top_pct]
-        if len(top_matches) == 1 and top_matches[0]["overlap"] > 0:
-            st.success(
-                f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})"
-            )
-        else:
-            if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS:
-                st.warning(
-                    "Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten."
-                )
-            else:
-                st.info("Nog geen unieke match — vraag meer om verder te verfijnen.")
-    else:
-        st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.")
+    explorer.build_mp_quiz_tab(db_path)
--- a/analysis/tabs/search.py
+++ b/analysis/tabs/search.py
@ -1,84 +1,18 @@
-"""Search tab for the parliamentary explorer."""
+"""Search tab for the parliamentary explorer.

-from __future__ import annotations
-
-import pandas as pd
+This module will contain the search tab implementation.
+Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
+"""

-import analysis.explorer_data as explorer_data
-from analysis.tabs._rendering import _render_voting_results, st
+from __future__ import annotations


 def build_search_tab(db_path: str, show_rejected: bool) -> None:
-    """Build the Motie Zoeken tab."""
-    st.subheader("Motie Zoeken")
-
-    df = explorer_data.load_motions_df(db_path)
-    if df.empty:
-        st.warning("Geen moties beschikbaar.")
-        return
-
-    if not show_rejected:
-        df = df[df["title"].fillna("").str.strip() != "Verworpen."]
-
-    col1, col2, col3 = st.columns([2, 1, 1])
-    with col1:
-        query = st.text_input(
-            "Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
-        )
-    with col2:
-        years = sorted(df["year"].dropna().astype(int).unique().tolist())
-        if years:
-            year_range = st.select_slider(
-                "Jaar", options=years, value=(years[0], years[-1])
-            )
-        else:
-            year_range = (2019, 2024)
-    with col3:
-        min_controversy = st.slider(
-            "Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
-        )
-
-    working = df.copy()
-    working = working[
-        (working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
-    ]
-    if min_controversy > 0:
-        working = working[working["controversy_score"] >= min_controversy]
-    if query:
-        q = query.lower()
-        mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
-        working = working[mask]
-
-    working = working.sort_values(by="controversy_score", ascending=False)
-    st.caption(f"{len(working)} resultaten (top 50 getoond)")
-
-    for _, row in working.head(50).iterrows():
-        title = row.get("title") or f"Motie #{row['id']}"
-        date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
-        controversy = row.get("controversy_score") or 0
-        with st.expander(f"**{title}** — {date_str} — {controversy:.2f}"):
-            cols = st.columns(3)
-            cols[0].metric("Controverse", f"{controversy:.2f}")
-            cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
-            cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")
-
-            _render_voting_results(row.get("voting_results"))
+    """Build the Motie Zoeken tab.

-            url = row.get("url")
-            if url and str(url).startswith("http"):
-                st.markdown(f"[Bekijk op Tweede Kamer]({url})")
+    Currently delegates to explorer.py implementation.
+    Will be extracted when rendering logic is decoupled from Streamlit.
+    """
+    import explorer

-            sim = explorer_data.query_similar(db_path, int(row["id"]), top_k=5)
-            if not sim.empty:
-                st.markdown("**Vergelijkbare moties:**")
-                for _, s in sim.iterrows():
-                    s_date = (
-                        pd.to_datetime(s["date"]).strftime("%Y")
-                        if pd.notna(s.get("date"))
-                        else ""
-                    )
-                    st.markdown(
-                        f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
-                    )
-            else:
-                st.caption("_Nog geen vergelijkbare moties beschikbaar_")
+    explorer.build_search_tab(db_path, show_rejected)
--- a/analysis/tabs/trajectories.py
+++ b/analysis/tabs/trajectories.py
@ -1,670 +1,20 @@
-"""Trajectories tab for the parliamentary explorer."""
+"""Trajectories tab for the parliamentary explorer.

-from __future__ import annotations
-
-import json
-import logging
-import os
-import re
-import traceback
-from datetime import datetime
-from typing import Dict, List, Optional, Tuple
-
-import numpy as np
-
-from analysis import config
-import analysis.explorer_data as explorer_data
-from analysis import trajectory
-from analysis.tabs._rendering import (
-    PARTY_COLOURS,
-    _add_y_direction_annotations,
-    go,
-    st,
-)
-from explorer_helpers import compute_party_centroids, inspect_positions_for_issues
-
-KNOWN_MAJOR_PARTIES = config.KNOWN_MAJOR_PARTIES
-
-logger = logging.getLogger(__name__)
-
-_last_trajectories_diagnostics: dict = {}
-_last_diagnostics = _last_trajectories_diagnostics
-
-
-def get_debug_trajectories_enabled() -> bool:
-    """Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode."""
-    v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES")
-    return str(v) in ("1", "true", "True")
-
-
-def select_trajectory_plot_data(
-    positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
-    party_map: Dict[str, str],
-    windows: List[str],
-    selected_parties: List[str],
-    smooth_alpha: float = 0.35,
-    mp_fallback_count: Optional[int] = None,
-) -> Tuple[go.Figure, int, Optional[str]]:
-    """Return (fig, trace_count, banner_text).
-
-    Helper used by build_trajectories_tab. Does not call Streamlit.
+This module will contain the trajectories tab implementation.
+Currently: Tab logic remains in explorer.py pending Streamlit decoupling.
 """
-    if mp_fallback_count is None:
-        try:
-            mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20"))
-        except Exception:
-            mp_fallback_count = 20
-
-    party_centroids, meta = compute_party_centroids(
-        positions_by_window, party_map, windows
-    )
-
-    try:
-        inspector_summary = inspect_positions_for_issues(positions_by_window, party_map)
-    except Exception:
-        tb = traceback.format_exc()
-        inspector_summary = {}
-        try:
-            select_trajectory_plot_data._last_diagnostics = {
-                "stage": "inspector_exception",
-                "exception": tb,
-            }
-        except Exception:
-            pass
-        try:
-            _last_trajectories_diagnostics.update(
-                {"stage": "inspector_exception", "exception": tb}
-            )
-        except Exception:
-            pass
-    logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary)
-
-    plottable_parties = []
-    for p, vals in party_centroids.items():
-        has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals)
-        if has_valid:
-            plottable_parties.append(p)

-    logging.getLogger(__name__).debug(
-        "[TRAJ DEBUG] plottable_parties: %d parties, sample=%s",
-        len(plottable_parties),
-        (plottable_parties[:5] if plottable_parties else "empty"),
-    )
-    logging.getLogger(__name__).debug(
-        "[TRAJ DEBUG] party_centroids keys: %s",
-        list(party_centroids.keys())[:10],
-    )
-    if party_centroids:
-        sample_party = list(party_centroids.keys())[0]
-        sample_vals = party_centroids[sample_party]
-        logging.getLogger(__name__).debug(
-            "[TRAJ DEBUG] Sample party '%s' centroids: %s...",
-            sample_party,
-            sample_vals[:3],
-        )
-
-    fig = go.Figure()
-    trace_count = 0
-    banner_text: Optional[str] = None
-
-    def _ema_smooth(values: List[float], alpha: float) -> List[float]:
-        if not values or alpha >= 1.0:
-            return values
-        smoothed: List[float] = []
-        prev = None
-        for v in values:
-            if v is None or (isinstance(v, float) and np.isnan(v)):
-                smoothed.append(float(np.nan))
-                continue
-            v = float(v)
-            if prev is None:
-                prev = v
-            else:
-                prev = alpha * v + (1 - alpha) * prev
-            smoothed.append(float(prev))
-        return smoothed
-
-    if not plottable_parties:
-        mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
-        for wid in windows:
-            pos = positions_by_window.get(wid, {})
-            for mp_name, xy in pos.items():
-                try:
-                    x, y = float(xy[0]), float(xy[1])
-                except Exception:
-                    continue
-                mp_positions.setdefault(mp_name, {})[wid] = (x, y)
-
-        mp_activity = sorted(
-            [(mp, len(wdict)) for mp, wdict in mp_positions.items()],
-            key=lambda t: t[1],
-            reverse=True,
-        )
-        top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]]
-
-        for mp in top_mps:
-            wids_sorted = sorted(mp_positions.get(mp, {}).keys())
-            if not wids_sorted:
-                continue
-            xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
-            ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
-            xs = _ema_smooth(xs_raw, smooth_alpha)
-            ys = _ema_smooth(ys_raw, smooth_alpha)
-            custom_raw = [
-                (
-                    float(rx) if rx is not None else float(np.nan),
-                    float(ry) if ry is not None else float(np.nan),
-                )
-                for rx, ry in zip(xs_raw, ys_raw)
-            ]
-            fig.add_trace(
-                go.Scatter(
-                    x=xs,
-                    y=ys,
-                    mode="lines+markers",
-                    name=mp,
-                    text=wids_sorted,
-                    customdata=custom_raw,
-                    line=dict(color="#888888", shape="spline", smoothing=1.3),
-                    marker=dict(color="#888888", size=6),
-                )
-            )
-            trace_count += 1
-
-        banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
-        logging.getLogger(__name__).debug(
-            "[TRAJ DEBUG] Fallback to MP trajectories: trace_count=%d, top_mps=%d",
-            trace_count,
-            len(top_mps),
-        )
-        return fig, trace_count, banner_text
-
-    to_plot = [p for p in selected_parties if p in plottable_parties]
-    if not to_plot:
-        to_plot = plottable_parties
-
-    for party in to_plot:
-        vals = party_centroids.get(party, [])
-        if not vals:
-            continue
-        xs_raw = [v[0] for v in vals]
-        ys_raw = [v[1] for v in vals]
-        xs = _ema_smooth(xs_raw, smooth_alpha)
-        ys = _ema_smooth(ys_raw, smooth_alpha)
-        custom_raw = [
-            (
-                float(x) if (x is not None and not np.isnan(x)) else float(np.nan),
-                float(y) if (y is not None and not np.isnan(y)) else float(np.nan),
-            )
-            for x, y in zip(xs_raw, ys_raw)
-        ]
-        colour = PARTY_COLOURS.get(party, "#9E9E9E")
-        fig.add_trace(
-            go.Scatter(
-                x=xs,
-                y=ys,
-                mode="lines+markers",
-                name=party,
-                text=windows,
-                customdata=custom_raw,
-                line=dict(color=colour, shape="spline", smoothing=1.3),
-                marker=dict(color=colour, size=8),
-            )
-        )
-        trace_count += 1
+from __future__ import annotations

-    logging.getLogger(__name__).debug(
-        "[TRAJ DEBUG] Final trace_count=%d, plottable_parties=%d, to_plot=%s",
-        trace_count,
-        len(plottable_parties),
-        (len(to_plot) if "to_plot" in dir() else "N/A"),
-    )
-    return fig, trace_count, None
+from typing import List


 def build_trajectories_tab(db_path: str, window_size: str) -> None:
-    """Build the Partij Trajectories tab."""
-    logging.getLogger(__name__).debug(
-        "[TRAJ DEBUG] build_trajectories_tab called — db_path=%s, window_size=%s",
-        db_path,
-        window_size,
-    )
-    st.subheader("Partij Trajectories")
-    st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")
-
-    positions_by_window, axis_def = explorer_data.load_positions(db_path, window_size)
-    logging.getLogger(__name__).debug(
-        "[TRAJ DEBUG] load_positions → %d windows, total MPs=%d",
-        len(positions_by_window),
-        sum(len(v) for v in positions_by_window.values()),
-    )
-    if axis_def is None:
-        axis_def = {}
-    if not positions_by_window:
-        try:
-            _last_trajectories_diagnostics.update(
-                {
-                    "stage": "load_positions_empty",
-                    "positions_by_window_len": len(positions_by_window),
-                }
-            )
-        except Exception:
-            pass
-        try:
-            st.warning("Geen positiedata beschikbaar.")
-        except Exception:
-            pass
-        try:
-            if get_debug_trajectories_enabled():
-                try:
-                    st.text_area(
-                        "Trajectories diagnostics",
-                        json.dumps(_last_trajectories_diagnostics, default=str),
-                        height=160,
-                    )
-                except Exception:
-                    pass
-        except Exception:
-            pass
-        return
-
-    party_map = explorer_data.load_party_map(db_path)
-    logging.getLogger(__name__).debug(
-        "[TRAJ DEBUG] load_party_map → %d entries, sample=%s",
-        len(party_map),
-        list(party_map.items())[:3],
-    )
-
-    def normalize_mp_name(name):
-        """Normalize MP name for better matching between data sources."""
-        if not name:
-            return ""
-        name = name.strip()
-        if "," in name and ", " not in name:
-            name = name.replace(",", ", ")
-        return name
-
-    party_map = {normalize_mp_name(k): v for k, v in party_map.items()}
-
-    normalized_positions = {}
-    for window, positions in positions_by_window.items():
-        normalized_positions[window] = {
-            normalize_mp_name(k): v for k, v in positions.items()
-        }
-    positions_by_window = normalized_positions
-
-    all_mp_names = set()
-    for positions in positions_by_window.values():
-        all_mp_names.update(positions.keys())
+    """Build the Partij Trajectories tab.

-    matched_names = sum(1 for mp in all_mp_names if mp in party_map)
-    if all_mp_names:
-        logger.info(
-            f"MP name matching: {matched_names}/{len(all_mp_names)} matched ({100 * matched_names / len(all_mp_names):.1f}%)"
-        )
-    else:
-        logger.info("MP name matching: no MPs found in positions data")
-
-    if matched_names == 0 and len(all_mp_names) > 0:
-        logger.warning("No MP names matched between positions and party_map!")
-        logger.warning(f"Sample positions names: {list(all_mp_names)[:5]}")
-        logger.warning(f"Sample party_map names: {list(party_map.keys())[:5]}")
-
-    windows = sorted(positions_by_window.keys())
-
-    centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
-    all_parties: set = set()
-
-    def _strip_paren(name: str) -> str:
-        return re.sub(r"\s*\([^)]*\)", "", name).strip()
-
-    for wid in windows:
-        pos = positions_by_window.get(wid, {})
-        per_party: Dict[str, List[Tuple[float, float]]] = {}
-        for mp_name, (x, y) in pos.items():
-            party = party_map.get(mp_name) or party_map.get(
-                _strip_paren(mp_name), "Unknown"
-            )
-            if party == "Unknown":
-                continue
-            per_party.setdefault(party, []).append((x, y))
-        for party, coords in per_party.items():
-            all_parties.add(party)
-            xs = [c[0] for c in coords]
-            ys = [c[1] for c in coords]
-            centroids.setdefault(party, {})[wid] = (
-                float(np.mean(xs)),
-                float(np.mean(ys)),
-            )
-
-    all_parties = sorted(
-        set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs)
-        - {None, "Unknown"}
-    )
-    logging.getLogger(__name__).debug(
-        "[TRAJ DEBUG] all_parties (raw from party_map) → %d parties: %s",
-        len(all_parties),
-        all_parties[:10],
-    )
-    all_parties_sorted = sorted(all_parties)
-
-    if not all_parties_sorted:
-        st.info(
-            "Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat."
-        )
-        try:
-            st.caption(f"Bekende partijen in party_map: {len(party_map)}")
-        except Exception:
-            pass
-
-    default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties]
-    if not default_parties:
-        default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
-    if not default_parties:
-        default_parties = all_parties_sorted[:6]
-
-    selected_parties = st.multiselect(
-        "Selecteer partijen",
-        options=all_parties_sorted,
-        default=default_parties,
-    )
-
-    def _ema_smooth(values: List[float], alpha: float) -> List[float]:
-        if not values or alpha >= 1.0:
-            return values
-        smoothed = [values[0]]
-        for v in values[1:]:
-            smoothed.append(alpha * v + (1 - alpha) * smoothed[-1])
-        return smoothed
-
-    smooth_alpha = 0.35
-
-    if not centroids:
-        st.info(
-            "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
-        )
-
-        mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
-        for wid in windows:
-            pos = positions_by_window.get(wid, {})
-            for mp_name, xy in pos.items():
-                try:
-                    x, y = float(xy[0]), float(xy[1])
-                except Exception:
-                    continue
-                mp_positions.setdefault(mp_name, {})[wid] = (x, y)
-
-        mp_positions = {
-            mp: pos
-            for mp, pos in mp_positions.items()
-            if len(pos) >= 2
-            and not all(np.isnan(x) and np.isnan(y) for x, y in pos.values())
-        }
-
-        if not mp_positions:
-            st.warning("Geen positiedata beschikbaar voor trajectplotten.")
-            _last_trajectories_diagnostics.update(
-                {
-                    "stage": "no_mp_positions",
-                    "mp_positions_count": 0,
-                }
-            )
-            try:
-                if get_debug_trajectories_enabled():
-                    try:
-                        st.text_area(
-                            "Trajectories diagnostics",
-                            json.dumps(_last_trajectories_diagnostics, default=str),
-                            height=160,
-                        )
-                    except Exception:
-                        pass
-            except Exception:
-                pass
-            return
-
-        st.session_state["_trajectory_mp_positions"] = mp_positions
-
-        mp_list = sorted(mp_positions.keys())
-        default_mps = mp_list[:6]
-        selected_mps = st.multiselect(
-            "Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps
-        )
-
-        fig = go.Figure()
-        trace_count = 0
-        for mp in selected_mps:
-            wids_sorted = sorted(mp_positions[mp].keys())
-            xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
-            ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
-            xs = _ema_smooth(xs_raw, smooth_alpha)
-            ys = _ema_smooth(ys_raw, smooth_alpha)
-            custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
-            fig.add_trace(
-                go.Scatter(
-                    x=xs,
-                    y=ys,
-                    mode="lines+markers",
-                    name=mp,
-                    text=wids_sorted,
-                    customdata=custom_raw,
-                    line=dict(color="#888888", shape="spline", smoothing=1.3),
-                    marker=dict(color="#888888", size=6),
-                    hovertemplate=(
-                        f"<b>{mp}</b><br>"
-                        "venster: %{text}<br>"
-                        "x (smoothed): %{x:.3f}<br>"
-                        "x (raw): %{customdata[0]:.3f}<br>"
-                        "y (smoothed): %{y:.3f}<br>"
-                        "y (raw): %{customdata[1]:.3f}<extra></extra>"
-                    ),
-                )
-            )
-            trace_count += 1
-
-        _add_y_direction_annotations(fig)
-        if trace_count == 0:
-            st.info(
-                "Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data."
-            )
-        else:
-            st.plotly_chart(fig, use_container_width=True)
-        return
-
-    if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"):
-        mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
-        for wid in windows:
-            pos = positions_by_window.get(wid, {})
-            for mp_name, (x, y) in pos.items():
-                mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y))
-
-        mp_list = sorted(mp_positions.keys())
-        if not mp_list:
-            st.info("Geen MP-positiegegevens beschikbaar om te tonen.")
-            return
-
-        sample_mps = mp_list[:6]
-        fig = go.Figure()
-        for mp in sample_mps:
-            wids_sorted = sorted(mp_positions[mp].keys())
-            xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
-            ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
-            xs = _ema_smooth(xs_raw, 0.35)
-            ys = _ema_smooth(ys_raw, 0.35)
-            custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
-            fig.add_trace(
-                go.Scatter(
-                    x=xs,
-                    y=ys,
-                    mode="lines+markers",
-                    name=mp,
-                    text=wids_sorted,
-                    customdata=custom_raw,
-                    line=dict(color="#444444", shape="spline", smoothing=1.3),
-                    marker=dict(color="#444444", size=6),
-                    hovertemplate=(
-                        f"<b>{mp}</b><br>"
-                        "venster: %{text}<br>"
-                        "x (smoothed): %{x:.3f}<br>"
-                        "x (raw): %{customdata[0]:.3f}<br>"
-                        "y (smoothed): %{y:.3f}<br>"
-                        "y (raw): %{customdata[1]:.3f}<extra></extra>"
-                    ),
-                )
-            )
-        _add_y_direction_annotations(fig)
-        st.plotly_chart(fig, use_container_width=True)
-        return
-
-    smooth_alpha = 0.35
-
-    def _spline_smooth(values: List[float]) -> List[float]:
-        n = len(values)
-        if n <= 2:
-            return values
-        deg = min(3, n - 1)
-        try:
-            idx = np.arange(n, dtype=float)
-            coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg)
-            smooth = np.polyval(coeffs, idx)
-            return [float(v) for v in smooth]
-        except Exception:
-            return values
-
-    fig = go.Figure()
-    trace_count = 0
-    helper_succeeded = False
-    try:
-        fig2, trace_count2, banner_text = select_trajectory_plot_data(
-            positions_by_window, party_map, windows, selected_parties, smooth_alpha
-        )
-        if fig2 is not None:
-            fig = fig2
-            trace_count = trace_count2
-            helper_succeeded = True
-            if banner_text:
-                try:
-                    st.caption(banner_text)
-                except Exception:
-                    pass
-                try:
-                    _last_trajectories_diagnostics.update({"banner_text": banner_text})
-                except Exception:
-                    pass
-    except Exception as e:
-        tb = traceback.format_exc()
-        try:
-            select_trajectory_plot_data._last_diagnostics = {"exception": tb}
-        except Exception:
-            pass
-        try:
-            _last_trajectories_diagnostics.update(
-                {"stage": "select_helper_exception", "exception": tb}
-            )
-        except Exception:
-            pass
-        logger.exception("select_trajectory_plot_data failed")
-        debug_enabled = get_debug_trajectories_enabled()
-        if debug_enabled:
-            try:
-                st.text_area("select_trajectory_plot_data traceback", tb, height=240)
-            except Exception:
-                pass
-    logging.getLogger(__name__).debug(
-        "[TRAJ DEBUG] helper_succeeded=%s", helper_succeeded
-    )
-    if not helper_succeeded:
-        for party in selected_parties:
-            if party not in centroids:
-                continue
-            wids_sorted = sorted(centroids[party].keys())
-            xs_raw = [centroids[party][w][0] for w in wids_sorted]
-            ys_raw = [centroids[party][w][1] for w in wids_sorted]
-            xs = _ema_smooth(xs_raw, smooth_alpha)
-            ys = _ema_smooth(ys_raw, smooth_alpha)
-            custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
-            colour = PARTY_COLOURS.get(party, "#9E9E9E")
-            fig.add_trace(
-                go.Scatter(
-                    x=xs,
-                    y=ys,
-                    mode="lines+markers",
-                    name=party,
-                    text=wids_sorted,
-                    customdata=custom_raw,
-                    line=dict(color=colour, shape="spline", smoothing=1.3),
-                    marker=dict(color=colour, size=8),
-                    hovertemplate=(
-                        f"<b>{party}</b><br>"
-                        "venster: %{text}<br>"
-                        "x (smoothed): %{x:.3f}<br>"
-                        "x (raw): %{customdata[0]:.3f}<br>"
-                        "y (smoothed): %{y:.3f}<br>"
-                        "y (raw): %{customdata[1]:.3f}<extra></extra>"
-                    ),
-                )
-            )
-            trace_count += 1
-
-    _THRESHOLD = 0.65
-    x_conf_map = axis_def.get("x_label_confidence", {}) or {}
-    y_conf_map = axis_def.get("y_label_confidence", {}) or {}
-
-    def _mean_conf(m: dict) -> Optional[float]:
-        vals = [v for v in m.values() if v is not None]
-        if not vals:
-            return None
-        return float(sum(vals) / len(vals))
-
-    x_mean = _mean_conf(x_conf_map)
-    y_mean = _mean_conf(y_conf_map)
-
-    x_title = trajectory.choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD)
-    y_title = trajectory.choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD)
+    Currently delegates to explorer.py implementation.
+    Will be extracted when rendering logic is decoupled from Streamlit.
+    """
+    import explorer

-    fig.update_layout(
-        title="Partij trajectories",
-        xaxis_title=x_title,
-        yaxis_title=y_title,
-        height=600,
-        legend_title_text="Partij",
-    )
-    _add_y_direction_annotations(fig)
-    try:
-        _last_trajectories_diagnostics.update({"trace_count": trace_count})
-    except Exception:
-        pass
-    debug_enabled = get_debug_trajectories_enabled()
-    if trace_count == 0:
-        _last_trajectories_diagnostics.update(
-            {
-                "stage": "zero_traces",
-                "positions_count": sum(len(pos) for pos in positions_by_window.values())
-                if positions_by_window
-                else 0,
-                "party_map_count": len(party_map) if party_map else 0,
-                "centroids_count": len(centroids) if centroids else 0,
-                "selected_parties_count": len(selected_parties)
-                if selected_parties
-                else 0,
-                "timestamp": datetime.now().isoformat(),
-            }
-        )
-        if positions_by_window and party_map and not centroids:
-            sample_mps = []
-            for window, positions in list(positions_by_window.items())[:1]:
-                sample_mps = list(positions.keys())[:5]
-                break
-            matched = sum(1 for mp in sample_mps if mp in party_map)
-            _last_trajectories_diagnostics["name_match_check"] = {
-                "sample_mps": sample_mps,
-                "matched_in_party_map": matched,
-                "sample_size": len(sample_mps),
-            }
-    if trace_count == 0:
-        st.info("**Geen trajecten getekend**")
-    else:
-        try:
-            st.plotly_chart(fig, use_container_width=True)
-        except Exception as e:
-            st.error(f"Trajectories rendering failed: {e}")
+    explorer.build_trajectories_tab(db_path, window_size)
--- a/ansible/ansible.cfg
+++ b/ansible/ansible.cfg
@ -0,0 +1,6 @@
+[defaults]
+inventory = inventory.ini
+remote_user = webapps
+
+[ssh_connection]
+ssh_args = -o ForwardAgent=yes -o ControlMaster=auto -o ControlPersist=60s
--- a/ansible/deploy.sh
+++ b/ansible/deploy.sh
@ -0,0 +1,2 @@
+#!/bin/bash
+ansible-playbook -i inventory.ini deploy.yaml
--- a/ansible/deploy.yaml
+++ b/ansible/deploy.yaml
@ -0,0 +1,67 @@
+---
+- name: deploy motief application
+  hosts: sgeboers.nl
+  remote_user: webapps
+
+  tasks:
+    - name: ensure git.sgeboers.nl SSH config uses port 222
+      ansible.builtin.blockinfile:
+        path: /home/webapps/.ssh/config
+        create: yes
+        mode: '0600'
+        marker: "# {mark} ANSIBLE MANAGED: git.sgeboers.nl"
+        block: |
+          Host git.sgeboers.nl
+            User git
+            Port 222
+            IdentityFile /home/webapps/.ssh/ed25519
+
+    - name: ensure git.sgeboers.nl is in known_hosts
+      ansible.builtin.known_hosts:
+        name: "[git.sgeboers.nl]:222"
+        key: "{{ lookup('pipe', 'ssh-keyscan -p 222 git.sgeboers.nl') }}"
+        state: present
+
+    - name: pull latest code
+      ansible.builtin.git:
+        repo: ssh://git@git.sgeboers.nl:222/sgeboers/motief.git
+        dest: ~/motief
+        clone: yes
+        force: yes
+        key_file: /home/webapps/.ssh/ed25519
+        accept_newhostkey: yes
+
+    - name: sync dependencies with uv
+      ansible.builtin.shell:
+        cmd: /home/webapps/.local/bin/uv sync
+        chdir: ~/motief
+
+    - name: stop existing streamlit process
+      ansible.builtin.shell:
+        cmd: pkill -f "streamlit run Home.py" || true
+      ignore_errors: yes
+
+    - name: ensure data directory exists on server
+      ansible.builtin.file:
+        path: /home/webapps/motief/data
+        state: directory
+        mode: '0755'
+
+    - name: sync motions.db to server
+      ansible.builtin.synchronize:
+        src: ../data/motions.db
+        dest: /home/webapps/motief/data/motions.db
+        checksum: yes
+
+    - name: start streamlit
+      ansible.builtin.shell:
+        cmd: nohup /home/webapps/.local/bin/uv run streamlit run Home.py --server.port=8501 --server.address=0.0.0.0 --server.headless=true --server.enableCORS=false &
+        chdir: ~/motief
+
+    - name: wait for streamlit to be ready
+      ansible.builtin.uri:
+        url: http://127.0.0.1:8501/_stcore/health
+        method: GET
+        status_code: 200
+      retries: 30
+      delay: 2
--- a/ansible/inventory.ini
+++ b/ansible/inventory.ini
@ -0,0 +1 @@
+sgeboers.nl ansible_user=webapps
--- a/api_client.py
+++ b/api_client.py
@ -1,5 +1,4 @@
 # api_client.py (complete updated version)
-import logging
 import requests
 import json
 import re
@ -9,8 +8,6 @@ from config import config
 import time
 from collections import defaultdict

-logger = logging.getLogger(__name__)
-

 class TweedeKamerAPI:
    def __init__(self):
@ -45,18 +42,18 @@ class TweedeKamerAPI:
            voting_records, besluit_meta = self._get_voting_records(
                start_date, end_date, limit
            )
-            logger.info("Fetched %d voting records from API", len(voting_records))
+            print(f"Fetched {len(voting_records)} voting records from API")

            # Group by Besluit_Id (decision/motion) and get motion details
            motions = self._process_voting_records(
                voting_records, besluit_meta, skip_details=skip_details
            )
-            logger.info("Processed into %d unique motions", len(motions))
+            print(f"Processed into {len(motions)} unique motions")

            return motions

        except Exception as e:
-            logger.error("Error fetching motions from API: %s", e)
+            print(f"Error fetching motions from API: {e}")
            return []

    def _get_voting_records(
@ -135,18 +132,16 @@ class TweedeKamerAPI:
                    break  # last page
                skip += page_size

-            logger.info(
-                "Retrieved %d voting records from %d decisions",
-                len(all_records),
-                len(besluit_meta),
+            print(
+                f"Retrieved {len(all_records)} voting records from {len(besluit_meta)} decisions"
            )
            return all_records, besluit_meta

        except requests.exceptions.RequestException as e:
-            logger.error("API request failed: %s", e)
+            print(f"API request failed: {e}")
            if hasattr(e, "response") and e.response is not None:
-                logger.error("Response status: %d", e.response.status_code)
-                logger.error("Response text: %s", e.response.text[:500])
+                print(f"Response status: {e.response.status_code}")
+                print(f"Response text: {e.response.text[:500]}")
            return all_records, besluit_meta  # return whatever we got before failure

    def _process_voting_records(
@ -341,7 +336,7 @@ class TweedeKamerAPI:
            }

        except Exception as e:
-            logger.error("Error getting motion details for %s: %s", besluit_id, e)
+            print(f"Error getting motion details for {besluit_id}: {e}")

        return None

@ -364,7 +359,7 @@ class TweedeKamerAPI:
                    if ext_id:
                        return ext_id
        except Exception as e:
-            logger.error("Error fetching ExterneIdentifier for zaak %s: %s", zaak_id, e)
+            print(f"Error fetching ExterneIdentifier for zaak {zaak_id}: {e}")

        return None

@ -417,7 +412,7 @@ class TweedeKamerAPI:
            return body if len(body) > 50 else None

        except Exception as e:
-            logger.error("Error fetching body text for %s: %s", externe_identifier, e)
+            print(f"Error fetching body text for {externe_identifier}: {e}")

        return None

@ -499,5 +494,5 @@ class TweedeKamerAPI:
            return len(data.get("value", [])) > 0

        except Exception as e:
-            logger.error("API connection test failed: %s", e)
+            print(f"API connection test failed: {e}")
            return False
--- a/app.py
+++ b/app.py
@ -7,8 +7,14 @@ from summarizer import summarizer
 from config import config
 import json

+# Page config
+st.set_page_config(
+    page_title="Nederlandse Politieke Kompas", page_icon="🇳🇱", layout="wide"
+)
+
+
 def main():
-    st.title("Nederlandse Politieke Kompas")
+    st.title("🇳🇱 Nederlandse Politieke Kompas")
    st.markdown(
        "Ontdek welke politieke partij het beste bij jouw idealen past door te stemmen op echte Tweede Kamer moties."
    )
@ -99,8 +105,8 @@ def show_welcome_screen(motion_count, policy_area, margin_range):

        st.markdown(f"""
        **Jouw instellingen:**
-        - **{motion_count} moties** uit het beleidsgebied **{policy_area}**
-        - **Controversiële moties** tussen {margin_range[0]}% en {margin_range[1]}% marge
+        - 📊 **{motion_count} moties** uit het beleidsgebied **{policy_area}**
+        - 🎯 **Controversiële moties** tussen {margin_range[0]}% en {margin_range[1]}% marge
        
        Klik op "Start Nieuwe Sessie" in de zijbalk om te beginnen met stemmen.
        """)
@ -138,32 +144,35 @@ def show_motion_interface():

    # Layman explanation (prominent)
    if motion.get("layman_explanation"):
-        st.markdown("### Uitleg in begrijpelijke taal:")
+        st.markdown("### 📝 Uitleg in begrijpelijke taal:")
        st.markdown(f"*{motion['layman_explanation']}*")

    # Original description (collapsible)
    motion_text = motion.get("body_text") or motion.get("description", "")
    if motion_text:
        label = (
-            "Volledige motietekst"
+            "📋 Volledige motietekst"
            if motion.get("body_text")
-            else "Originele motiebeschrijving"
+            else "📋 Originele motiebeschrijving"
        )
        with st.expander(label):
            st.write(motion_text)

    # Voting buttons
-    st.markdown("### Hoe zou jij stemmen?")
+    st.markdown("### 🗳️ Hoe zou jij stemmen?")
+
    col1, col2, col3 = st.columns(3)
+
    with col1:
-        if st.button("Voor", use_container_width=True, type="primary"):
-            record_vote("voor")
+        if st.button("✅ Voor", use_container_width=True, type="primary"):
+            cast_vote("Voor")
+
    with col2:
-        if st.button("Tegen", use_container_width=True):
+        if st.button("❌ Tegen", use_container_width=True):
            cast_vote("Tegen")

    with col3:
-        if st.button("Geen stem", use_container_width=True):
+        if st.button("🚫 Geen stem", use_container_width=True):
            cast_vote("Geen stem")


@ -181,7 +190,7 @@ def cast_vote(vote_choice):

 def show_results():
    """Show voting results and party matches"""
-    st.header("Jouw Resultaten")
+    st.header("🎯 Jouw Resultaten")

    # Calculate party matches
    party_matches = db.calculate_party_matches(st.session_state.session_id)
@ -191,7 +200,7 @@ def show_results():
        return

    # Party ranking table
-    st.subheader("Partij Overeenkomsten (van hoog naar laag)")
+    st.subheader("📊 Partij Overeenkomsten (van hoog naar laag)")

    df = pd.DataFrame(party_matches)
    df.columns = ["Partij", "Overeenkomst %", "Eens", "Totaal"]
@ -211,15 +220,15 @@ def show_results():
    # Top match highlight
    top_match = party_matches[0]
    st.success(
-        f"**Beste match:** {top_match['party']} ({top_match['agreement_percentage']}% overeenkomst)"
+        f"🏆 **Beste match:** {top_match['party']} ({top_match['agreement_percentage']}% overeenkomst)"
    )

    # Detailed motion overview
-    st.subheader("Gedetailleerd Overzicht per Motie")
+    st.subheader("📋 Gedetailleerd Overzicht per Motie")
    show_detailed_motion_results()

    # New session button
-    if st.button("Start Nieuwe Sessie"):
+    if st.button("🔄 Start Nieuwe Sessie"):
        # Clear session state
        for key in ["session_id", "motions", "current_motion_index", "show_results"]:
            if key in st.session_state:
@ -272,13 +281,13 @@ def show_detailed_motion_results():
            with st.expander(f"**{title}** (Jouw stem: {user_vote})"):
                # Show layman explanation prominently
                if layman_explanation:
-                    st.markdown("**Uitleg:**")
+                    st.markdown("**📝 Uitleg:**")
                    st.markdown(f"*{layman_explanation}*")

                # Show full motion body text if available, otherwise description
                motion_text = body_text or description
                if motion_text:
-                    st.markdown("**Motiebeschrijving:**")
+                    st.markdown("**📋 Motiebeschrijving:**")
                    st.write(motion_text)

                # Create voting overview
--- a/config.py
+++ b/config.py
@ -1,2 +1,51 @@
-# Backward-compatibility shim — root config now lives in analysis.config
-from analysis.config import Config, config  # noqa: F401
+# config.py (complete updated version)
+import os
+from dataclasses import dataclass
+from typing import List
+
+
+@dataclass
+class Config:
+    # Database settings
+    DATABASE_PATH = "data/motions.db"
+
+    # API settings (updated)
+    TWEEDE_KAMER_ODATA_API = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0"
+    API_TIMEOUT = 30
+    API_BATCH_SIZE = 250  # Increased based on API capabilities
+    API_MAX_LIMIT = 250
+
+    # AI settings
+    OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
+    OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
+    QWEN_MODEL = "qwen/qwen-2.5-72b-instruct"
+
+    # App settings
+    DEFAULT_MOTION_COUNT = 10
+    DEFAULT_WINNING_MARGIN_MIN = (
+        0  # % - include all, filter by layman_explanation instead
+    )
+    DEFAULT_WINNING_MARGIN_MAX = 100  # %
+    SESSION_TIMEOUT_DAYS = 30
+
+    # Policy areas
+    POLICY_AREAS = [
+        "Alle",
+        "Economie",
+        "Klimaat",
+        "Immigratie",
+        "Zorg",
+        "Onderwijs",
+        "Defensie",
+        "Sociale Zaken",
+        "Algemeen",
+    ]
+
+    # Scraper defaults (previously missing)
+    BASE_URL = (
+        "https://www.tweedekamer.nl/zoeken/zoekresultaten"  # base for scraping motions
+    )
+    SCRAPING_DELAY = int(os.getenv("SCRAPING_DELAY", "5"))
+
+
+config = Config()
--- a/database.py
+++ b/database.py
@ -39,17 +39,12 @@ class MotionDatabase:
                        fh.write("[]")
            return

-        try:
        conn = duckdb.connect(self.db_path)
-        except (duckdb.Error, OSError) as e:
-            _logger.warning("Could not connect to DuckDB at %s: %s. Operating in file mode.", self.db_path, e)
-            self._file_mode = True
-            return

        # Create sequence for auto-incrementing IDs
        try:
            conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
-        except duckdb.Error:
+        except:
            pass

        # Create tables with proper ID handling
@ -77,7 +72,7 @@ class MotionDatabase:
                "ALTER TABLE motions ADD COLUMN IF NOT EXISTS externe_identifier TEXT"
            )
            conn.execute("ALTER TABLE motions ADD COLUMN IF NOT EXISTS body_text TEXT")
-        except duckdb.Error:
+        except Exception:
            # Best-effort: if ALTER fails for any reason, continue without stopping app startup
            _logger.debug(
                "Could not ALTER motions table to add new columns (may already exist or unsupported)."
@ -193,10 +188,7 @@ class MotionDatabase:
            )
        """)

-        try:
        conn.close()
-        except duckdb.Error:
-            pass

    def reset_database(self):
        """Development helper: drop known tables and re-run initialization.
@ -209,7 +201,7 @@ class MotionDatabase:
            for t in ("party_results", "user_sessions", "motions"):
                try:
                    conn.execute(f"DROP TABLE IF EXISTS {t}")
-                except duckdb.Error:
+                except Exception:
                    pass
            # Recreate schema
            conn.close()
@ -217,7 +209,7 @@ class MotionDatabase:
        finally:
            try:
                conn.close()
-            except duckdb.Error:
+            except Exception:
                pass

    def append_audit_event(
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,32 @@
+version: "3.9"
+
+services:
+  motief:
+    image: ${DOCKER_REGISTRY}/sgeboers/stemwijzer:latest
+    ports:
+      - "127.0.0.1:8501:8501"
+    volumes:
+      - ${DATA_DIR:-/home/webapps/motief/data}:/home/app/app/data
+    restart: unless-stopped
+    environment:
+      - PYTHONPATH=/home/app/app
+      - OPENROUTER_API_KEY
+      - DB_PATH=/home/app/app/data/motions.db
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8501/"]
+      interval: 30s
+      timeout: 3s
+      retries: 3
+      start_period: 15s
+
+  scheduler:
+    image: ${DOCKER_REGISTRY}/sgeboers/stemwijzer:latest
+    command: python scheduler.py
+    volumes:
+      - ${DATA_DIR:-/home/webapps/motief/data}:/home/app/app/data
+    restart: unless-stopped
+    environment:
+      - PYTHONPATH=/home/app/app
+      - OPENROUTER_API_KEY
+      - OPENAI_API_KEY
+      - DB_PATH=/home/app/app/data/motions.db
--- a/docs/blog/2026-04-05-polarisatie-in-de-tweede-kamer.md
+++ b/docs/blog/2026-04-05-polarisatie-in-de-tweede-kamer.md
@ -36,27 +36,17 @@ De PVV en FVD werden **niet** groter omdat hun standpunten mainstream werden —

 ---

-## Vondst 2: Stemmen werden closer, maar moties werden minder extreem
+## Vondst 2: Polarisatie is toegenomen

-Dit is genuanceerder dan het lijkt:
+Ongeacht wie er won, werden moties wel extremer:

-| Maat | 2016 | 2026 | Trend |
-|------|------|------|-------|
-| **Stemmings-extremiteit** | 0.70 | 0.46 | Meer verdeeld |
-| **Beleids-extremiteit** | 9.0 | 4.2 | Minder extreem |
+| Jaar | Spreiding (std) | Interpretatie |
+|------|-----------------|--------------|
+| 2016 | 3.46 | Gematigde verdeeldheid |
+| 2019 | 6.31 | Toegenomen verdeeldheid |
+| **2026** | **7.44** | **Sterke polarisatie** |

-**Stemmings-extremiteit** meet hoe verdeeld het Parlement is (margin/totaal — lager = meer verdeeld).
-
-**Beleids-extremiteit** meet hoe ver moties in de politieke ruimte staan (L2-norm van embedding).
-
-### De onafhankelijkheid van deze maten
-
-De correlatie tussen beide maten is **r ≈ 0** (niet significant) — ze meten totaal verschillende dingen:
-
- **2016**: Coalitie won met consensus, maar de "extreme" moties die wonnen waren ver van het centrum (wetgeving, verdragen)
- **2026**: Meer verdeeld gestemd, maar de moties die nu winnen zijn juist dichter bij het centrum (asielbeleid, immigratieprocedure)
-
-Dit betekent: het **wat** dat partijen verdeelt is veranderd, niet **hoe radicaal** de policies zijn.
+De spreiding **verdubbelde** in tien jaar tijd — ongeacht of de coalitie of oppositie won.

 ---

@ -98,14 +88,14 @@ Dezelfde structuur (wie met wie stemt), maar andere onderwerpen.
 ### 1. De coalitie verloor in 2019
 De kabinetscrisis van Rutte III (2017-2019) markeert het einde van de effectieve coalitieregering. Sindsdien wint de oppositie-kant structureel meer moties.

-### 2. Stemmen werden verdeelder, maar beleid werd minder extreem
-Het Parlement stemt nu vaker met kleine marges (meer verdeeld), maar de moties die winnen staan juist **dichter bij het politieke centrum**. Dit zijn onafhankelijke verschijnselen.
+### 2. Polarisatie nam toe
+Ongeacht wie er won, werden moties extremer. De gemiddelde afwijking verdubbelde van 3.46 naar 7.44.

-### 3. Onderwerpen verschoven, niet de radicaliteit
-De politieke as verschoof van economisch-bestuurlijk naar identiteit/migratie, maar de **radicaliteit** van het beleid veranderde niet. Wat verdeelt is veranderd, niet hoe extreem de oplossingen zijn.
+### 3. Onderwerpen verschoven
+De politieke as verschoof van economisch-bestuurlijk naar identiteit/migratie, maar dat is een gevolg van de onderwerpen die de coalitie nu kan winnen.

 ### 4. Geen rechtse verschuiving, maar machtsverlies coalitie
-De politiek verdeelde meer, maar het "centrum" bleef op zijn plek. Wat veranderde was dat de coalitie haar greep op de agenda verloor — niet dat extreem rechts beleid won.
+De politiek polariseerde, maar het "centrum" bleef neutraal. Wat veranderde was dat de coalitie haar greep op de agenda verloor.

 ---

@ -115,8 +105,6 @@ De as waarover we praten is de eerste principale component van alle stemgedrag

 De volledige code is beschikbaar in de [GitHub-repository](https://github.com/sgeboers/stemwijzer).

-**Reproduceerbaarheid van extremiteit-maten:** *Stemmings-extremiteit* is `winning_margin` (|voor−tegen|/totaal) per motie in `data/motions.db`; *beleids-extremiteit* is de L2-norm van de motie-embedding in de politieke ruimte (afgeleid uit SVD-componenten). De correlatie tussen beide is niet significant — beide maten zijn onafhankelijk en moeten bij elke analyse opnieuw uit de database worden berekend.
-
 ---

 *Analyse uitgevoerd op 5 april 2026. Data: 8.700+ moties 2016-2026.*
--- a/docs/deployment/ansible-package-deploy.md
+++ b/docs/deployment/ansible-package-deploy.md
@ -0,0 +1,42 @@
+# Ansible package deploy (defaults)
+
+This document describes the default values and recommended steps for deploying the `packages/@ansible/example` package to a server using the provided Ansible playbooks.
+
+Defaults
+- DEPLOY_HOST: `motief.sgeboers.nl`
+- DEPLOY_USER: `webapps`
+- Recommended systemd service name: `motief`
+
+Secrets / environment variables
+- DEPLOY_SSH_KEY: private SSH key used by CI to connect to the host
+- DEPLOY_HOST: (override) host to deploy to
+- DEPLOY_USER: (override) user to use for deployment (default: `webapps`)
+- DEPLOY_PATH: (optional) path on the remote host to deploy the package to. If unset, the playbook will use its configured default. Set this value in CI if your installation directory differs from the playbook default.
+
+Granting access (server-side steps)
+1. As the server administrator, ensure the `webapps` user exists:
+
+   sudo useradd -m -s /bin/bash webapps
+
+2. Create the `.ssh` directory and add the public key that matches your CI `DEPLOY_SSH_KEY`:
+
+   sudo -u webapps mkdir -p /home/webapps/.ssh
+   sudo -u webapps chmod 700 /home/webapps/.ssh
+   # paste the public key from your CI into /home/webapps/.ssh/authorized_keys
+   sudo -u webapps sh -c 'cat >> /home/webapps/.ssh/authorized_keys'
+   sudo -u webapps chmod 600 /home/webapps/.ssh/authorized_keys
+
+3. If the playbook requires sudo operations, add the necessary sudoers entry (use with care):
+
+   echo "webapps ALL=(ALL) NOPASSWD: /bin/systemctl restart motief" | sudo tee /etc/sudoers.d/webapps-motief
+
+Deployment notes
+- The playbooks assume the above defaults. If your host, user or install path differ, set the appropriate environment variables in your CI (DEPLOY_HOST, DEPLOY_USER, DEPLOY_PATH) before running the deploy job.
+- The recommended systemd service name is `motief`. If you change the service name in the playbook or systemd unit, ensure any helper scripts or CI steps refer to the same name.
+
+Security
+- Only add trusted public keys to `/home/webapps/.ssh/authorized_keys`.
+- Limit sudo privileges to only the commands required for deploy/service restart.
+
+Troubleshooting
+- If the CI runner cannot connect, verify the private key in `DEPLOY_SSH_KEY` matches the public key on the server and the `DEPLOY_HOST`/`DEPLOY_USER` values are correct.
--- a/docs/plans/2026-04-05-001-make-modules-import-safe-plan.md
+++ b/docs/plans/2026-04-05-001-make-modules-import-safe-plan.md
@ -1,108 +0,0 @@
-Title: Make modules import-safe (duckdb/plotly)
-
-Why
- Enable lightweight unit tests and imports in environments without heavy runtime deps (duckdb, plotly) without changing runtime behaviour when those deps are present.
-
-Scope
- Primary focus: library modules that are commonly imported by tests or other modules (not CLI scripts that are only executed).
- Initial rollout: small, reviewable batches. Do not push or change remote branches.
-
-Non-goals
- Remove duckdb/plotly dependency from runtime environments.
- Refactor functionality beyond import-time safety.
-
-Approach
- Two safe patterns (apply conservatively):
-  1) Pattern A — module-level guard
-     ```py
-     try:
-         import duckdb
-     except Exception:  # pragma: no cover
-         duckdb = None  # type: ignore
-     ```
-     Use when multiple functions in the module call duckdb and adding the guard is least invasive.
-
-  2) Pattern B — function-local import (preferred for DB helpers)
-     Move `import duckdb` into the function that uses it and raise a clear RuntimeError when invoked without duckdb:
-     ```py
-     def open_conn(path):
-         try:
-             import duckdb
-         except Exception:
-             raise RuntimeError("duckdb is required for open_conn") from None
-         return duckdb.connect(path)
-     ```
-
-Targets (first batch — high impact)
- `database.py` — Pattern B (move DB imports into helpers; provide clear RuntimeError when called without duckdb). Tests import `database.py` so make this robust.
- `app.py` — Pattern B (app modules often get imported during test runs; delay duckdb until handlers that need it).
- `pipeline/svd_pipeline.py` — Pattern A (guard top-level import; pipeline code is heavy and module-level guard is fine).
-
-Expanded target list (subsequent batches)
- `pipeline/text_pipeline.py`, `pipeline/fusion.py` — Pattern A
- `pipeline/extract_mp_votes.py` — Pattern B
- `similarity/compute.py`, `summarizer.py` — Pattern A or B after inspection
- scripts under `scripts/` only if tests import them (prefer moving to `main()`)
-
-Step-by-step rollout
-1) Prepare patches for batch 1 (three files). Create one patch file per file so changes are atomic and easy to revert.
-2) Apply edits in a feature branch or local commit. Run focused tests:
-   - `pytest tests/test_database_audit.py -q`
-   - `pytest tests/test_political_compass.py::test_* -q` (if applicable)
-3) If focused tests pass, run full suite in .venv:
-   - `.venv/bin/python -m pytest tests/ -q`
-4) If failures occur, inspect tracebacks for missing duckdb at runtime and either revert the specific change or convert Pattern A ↔ Pattern B as needed.
-5) Repeat for next batches until all targeted modules are covered.
-
-Verification
- After each file change, run the focused tests that touch the module. After the batch, run full test suite (local `.venv` recommended):
-  - `.venv/bin/python -m pytest tests/ -q` — expect no new failures.
- Confirm importability in empty environment (simulate by temporarily renaming `.venv` or running in environment without duckdb):
-  - `python -c "import analysis; print('ok')"` — should not raise ImportError for guarded modules.
-
-Rollback strategy
- Make one file change per commit. If tests fail, revert the last commit and open an issue with the failure trace.
-
-Patch previews (examples)
- Pattern A (top-level guard):
-  - replace `import duckdb` with:
-    ```py
-    try:
-        import duckdb
-    except Exception:  # pragma: no cover
-        duckdb = None  # type: ignore
-    ```
-
- Pattern B (function-local import + clear error):
-  - before:
-    ```py
-    import duckdb
-
-    def open_conn(path):
-        return duckdb.connect(path)
-    ```
-  - after:
-    ```py
-    def open_conn(path):
-        try:
-            import duckdb
-        except Exception:
-            raise RuntimeError("duckdb is required for open_conn") from None
-        return duckdb.connect(path)
-    ```
-
-Risks & mitigations
- Risk: hiding missing dependency until runtime. Mitigation: when using Pattern B raise descriptive RuntimeError at call site so failures are explicit.
- Risk: tests that intentionally require duckdb may break if we change behavior incorrectly. Mitigation: run focused tests that import duckdb intentionally and keep those files unchanged.
-
-Owner & next actions for me
- I can generate the exact patch diffs for batch 1 (three files) and present them for review before applying. This is recommended to keep the change small and reviewable.
- Reply with:
-  - `prepare` — I will create the patch diffs for `database.py`, `app.py`, `pipeline/svd_pipeline.py` and show them to you (no files modified yet), or
-  - `apply` — I will apply the first batch now and run focused tests locally.
-
-Notes
- I already applied import-guards in several `analysis/` modules (trajectory, explorer_data, clustering, political_axis) during earlier review; this plan continues that conservative approach.
-
-References
- Examples: `analysis/explorer_data.py`, `analysis/trajectory.py`, `analysis/visualize.py`
--- a/docs/plans/2026-04-05-002-refactor-svd-axis-labels-plan.md
+++ b/docs/plans/2026-04-05-002-refactor-svd-axis-labels-plan.md
@ -1,134 +0,0 @@
---
-title: "Enforce left-right orientation across all SVD axis labels"
-type: refactor
-status: active
-date: 2026-04-05
-origin: docs/superpowers/specs/2026-04-05-svd-axis-labels-design.md
---
-
-# Enforce Left-Right Orientation Across All SVD Axis Labels
-
-## Overview
-
-Update SVD component labels in `analysis/config.py` so all 10 axes consistently reflect left-right positioning, and add validation tests to ensure canonical right-wing parties (PVV, FVD, JA21, SGP) appear on the right side after flip computation. The flip mechanism already works; this plan focuses on label consistency and test coverage.
-
-## Problem Frame
-
-SVD axis labels do not consistently reflect left-right positioning. Some axes describe dimensions like "populist vs mainstream" or "pragmatism vs ideology" without framing how right/conservative and left/progressive parties cluster on each pole. The repo convention (AGENTS.md) requires right-wing parties to appear on the RIGHT side of all axes, and labels should reflect this orientation.
-
-## Requirements Trace
-
- R1. All 10 SVD component labels consistently frame the dimension in left-right terms
- R2. Canonical right-wing parties (PVV, FVD, JA21, SGP) appear on the right side after flip computation
- R3. Backward compatibility preserved for existing callers of `get_svd_label`, `get_fallback_labels`, `compute_flip_direction`
- R4. Unit tests validate flip behavior and label correctness
-
-## Scope Boundaries
-
- In scope: `analysis/config.py` SVD_THEMES labels, `tests/test_svd_labels.py` additions
- Out of scope: `analysis/political_axis.py` party sets (follow-up), UI changes, flip logic changes (already works)
-
-## Context & Research
-
-### Relevant Code and Patterns
-
- `analysis/config.py` — defines `SVD_THEMES` with 10 components, each with `label`, `explanation`, `positive_pole`, `negative_pole`, `flip`
- `analysis/svd_labels.py` — imports `CANONICAL_RIGHT`/`CANONICAL_LEFT` from config, exports aliases, `compute_flip_direction` uses them
- `explorer.py:2680-2690` — dynamically computes flip for all 10 components at runtime, overwrites static `flip` values
-
-### Key Technical Decisions
-
- **Keep flip mechanism as-is**: `compute_flip_direction` already uses canonical party sets to force right-wing parties to the right. No changes needed.
- **Update labels, not flip logic**: The work is in `SVD_THEMES` label text — reframing each component's label to reflect left-right positioning while preserving the underlying voting pattern description.
- **Preserve explanation text**: The `explanation` field can remain detailed and nuanced; only the `label` and pole descriptions need left-right framing.
-
-## Implementation Units
-
- [ ] **Unit 1: Update SVD_THEMES labels for left-right consistency**
-
-**Goal:** Reframe all 10 SVD component labels to consistently reflect left-right positioning.
-
-**Requirements:** R1, R3
-
-**Dependencies:** None
-
-**Files:**
- Modify: `analysis/config.py`
-
-**Approach:**
- For each component (1-10), update the `label` field to frame the dimension in left-right terms
- Update `positive_pole` and `negative_pole` to explicitly mention which parties cluster on each side and their left/right positioning
- Preserve the `explanation` text (it's already detailed and accurate)
- Keep `flip` values as-is (they're overwritten at runtime anyway)
-
-**Patterns to follow:**
- Component 1 label pattern: "Rechts kabinetsbeleid versus links oppositiebeleid" — this is the model
- Component 3 label pattern: "Verzorgingsstaat versus bezuinigingen en marktwerking" — economic left-right
- Component 6 label pattern: "Migratie en cultuur versus klimaat en progressieve inclusie" — cultural left-right (GAL-TAN)
-
-**Test scenarios:**
- Test expectation: none — this is a label text update, no behavioral change. Verification is manual review of label text.
-
-**Verification:**
- All 10 component labels explicitly reference left/right positioning or conservative/progressive framing
- `positive_pole` and `negative_pole` descriptions mention party clusters and their left/right orientation
-
- [ ] **Unit 2: Add validation test for canonical right-on-right**
-
-**Goal:** Add a test that verifies canonical right-wing parties appear on the right side after flip computation.
-
-**Requirements:** R2, R4
-
-**Dependencies:** Unit 1 (labels updated, flip logic unchanged)
-
-**Files:**
- Modify: `tests/test_svd_labels.py`
-
-**Approach:**
- Add `test_canonical_right_on_right` that:
-  1. Creates synthetic party scores where canonical right parties have negative values (on the left)
-  2. Asserts `compute_flip_direction` returns `True` for all components 1-10
-  3. Creates synthetic scores where canonical right parties have positive values (on the right)
-  4. Asserts `compute_flip_direction` returns `False` for all components
- Add `test_all_canonical_parties_used` that verifies `CANONICAL_RIGHT` and `CANONICAL_LEFT` from config contain the expected parties (PVV, FVD, JA21, SGP for right; SP, PvdA, GL, etc. for left)
-
-**Execution note:** Test-first — write failing test, then verify it passes after Unit 1.
-
-**Patterns to follow:**
- Existing test style in `tests/test_svd_labels.py` (synthetic dict-based party scores, assert on boolean flip result)
- `test_auto_flip_computation_for_all_components` already tests flip for all 10 components — new test should follow same pattern but explicitly use `CANONICAL_RIGHT`/`CANONICAL_LEFT` from config
-
-**Test scenarios:**
- Happy path: Canonical right parties on right side → `compute_flip_direction` returns `False` for all components
- Happy path: Canonical right parties on left side → `compute_flip_direction` returns `True` for all components
- Edge case: Mixed placement (some right parties on left, some on right) → flip based on majority mean
- Edge case: No canonical parties present → returns `False` (existing behavior, verify unchanged)
-
-**Verification:**
- `pytest tests/test_svd_labels.py -q` passes with no regressions
- New tests explicitly validate canonical right-on-right behavior
-
-## System-Wide Impact
-
- **Interaction graph:** `explorer.py` dynamically computes flip at runtime — no changes needed there. Labels flow from `config.py` → `svd_labels.py` → UI rendering.
- **Unchanged invariants:** `compute_flip_direction` logic unchanged. Public API (`get_svd_label`, `get_fallback_labels`, `compute_flip_direction`) unchanged. Static `flip` values in `SVD_THEMES` still overwritten at runtime.
- **API surface parity:** Labels change text but not structure. Callers expecting string labels continue to work.
-
-## Risks & Dependencies
-
-| Risk | Mitigation |
-|------|------------|
-| Label changes may not capture nuance of non-left-right axes | Preserve detailed `explanation` text; labels are shorthand, explanations carry full context |
-| Tests may pass but labels still feel off | Manual review of all 10 labels before committing |
-| `political_axis.py` still uses different party sets | Document as follow-up; out of scope for this plan |
-
-## Documentation / Operational Notes
-
- Update or reference `docs/solutions/best-practices/svd-labels-voting-patterns-not-semantics.md` if label convention changes materially
- No rollout or monitoring impacts — label text change only
-
-## Sources & References
-
- **Origin document:** [docs/superpowers/specs/2026-04-05-svd-axis-labels-design.md](docs/superpowers/specs/2026-04-05-svd-axis-labels-design.md)
- Related code: `analysis/config.py`, `analysis/svd_labels.py`, `tests/test_svd_labels.py`
- Convention reference: `AGENTS.md` (right-wing parties must appear on RIGHT side)
--- a/docs/plans/2026-04-05-003-fix-svd-pole-labels-plan.md
+++ b/docs/plans/2026-04-05-003-fix-svd-pole-labels-plan.md
@ -1,61 +0,0 @@
---
-title: "Add semantic left_pole/right_pole labels to SVD_THEMES"
-type: fix
-status: active
-date: 2026-04-05
-origin: docs/superpowers/specs/2026-04-05-svd-axis-labels-design.md
---
-
-# Add Semantic Left/Right Pole Labels to SVD_THEMES
-
-## Problem
-
-The `positive_pole`/`negative_pole` labels in `SVD_THEMES` describe the raw SVD math poles, not the semantic left/right after flip. When the axis flips at runtime (to ensure right-wing parties appear on the right), the pole labels are swapped but still describe the raw SVD orientation — resulting in labels like "← PVV en FVD" appearing on the left side when they should be on the right.
-
-## Solution
-
-Add `left_pole` and `right_pole` fields to each `SVD_THEMES` entry that describe what's on the left and right sides after flip. Update rendering code to use these semantic labels directly.
-
-## Implementation Units
-
- [ ] **Unit 1: Add left_pole/right_pole to SVD_THEMES in config.py**
-
-**Goal:** Add semantic pole labels to all 10 SVD components.
-
-**Files:**
- Modify: `analysis/config.py`
-
-**Approach:**
- For each component, add `left_pole` and `right_pole` fields based on the existing `positive_pole`/`negative_pole` and the `flip` value
- When `flip=True`: `left_pole` = `positive_pole`, `right_pole` = `negative_pole`
- When `flip=False`: `left_pole` = `negative_pole`, `right_pole` = `positive_pole`
- Keep `positive_pole`/`negative_pole` for backward compatibility
-
- [ ] **Unit 2: Update explorer.py rendering to use left_pole/right_pole**
-
-**Goal:** Use semantic pole labels in all rendering functions.
-
-**Files:**
- Modify: `explorer.py` (lines 967-970, 1087-1090, 1252-1253, 2806-2807)
-
-**Approach:**
- Replace the positive/negative swap logic with direct `left_pole`/`right_pole` usage
- `left_label = theme.get("left_pole", pos_pole if flip else neg_pole)` (backward compat fallback)
- `right_label = theme.get("right_pole", neg_pole if flip else pos_pole)`
-
- [ ] **Unit 3: Update tests**
-
-**Goal:** Add tests for left_pole/right_pole fields.
-
-**Files:**
- Modify: `tests/test_svd_labels.py`
- Modify: `tests/test_explorer_chart.py`
-
-**Approach:**
- Test that all 10 SVD_THEMES entries have `left_pole` and `right_pole` fields
- Test that rendering functions use left_pole/right_pole correctly
-
-## Scope Boundaries
-
- In scope: `analysis/config.py` SVD_THEMES, `explorer.py` rendering, tests
- Out of scope: `analysis/political_axis.py`, `analysis/projections.py` (uses positive_pole/negative_pole for motion projection, not UI labels)
--- a/docs/plans/2026-04-05-004-feat-motion-semantic-drift-plan.md
+++ b/docs/plans/2026-04-05-004-feat-motion-semantic-drift-plan.md
@ -1,347 +0,0 @@
---
-title: "Motion semantic drift analysis over time"
-type: feat
-status: active
-date: 2026-04-05
-origin: docs/brainstorms/2026-04-05-motion-semantic-drift-over-time-requirements.md
---
-
-# Motion Semantic Drift Analysis Over Time
-
-## Overview
-
-Add a new analysis script that tracks how the semantic content of motions on each SVD axis evolves across annual windows (2016-2024). The script produces a markdown report with charts showing axis stability, semantic drift timelines, party voting trajectories, and cross-ideological voting patterns. This is Phase 1 (script + report); a future phase will integrate this into the Streamlit explorer.
-
-## Problem Frame
-
-The SVD explorer shows where parties and motions sit on axes at a point in time, but doesn't reveal how the semantic content evolves. Users can't answer: did "right-wing" motions become more extreme over time? Are the SVD axes themselves stable across windows? Do left-wing parties increasingly vote for right-wing motions? (see origin: docs/brainstorms/2026-04-05-motion-semantic-drift-over-time-requirements.md)
-
-## Requirements Trace
-
- R1. Compute cosine similarity between SVD component vectors (or motion projection patterns) across all annual windows
- R2. Generate a stability heatmap showing which axes are comparable across time
- R3. Detect axis reordering across windows
- R4. Flag unstable axes
- R5. For each stable axis, compute average fused embedding centroid of top N motions per window
- R6. Track semantic drift using cosine distance between consecutive window centroids
- R7. Identify inflection points where drift accelerated (threshold-based)
- R8. Show example motions before/after inflection points
- R9. For each party, compute voting centroid per window along each stable axis
- R10. Track party trajectories over time
- R11. Detect cross-ideological voting patterns
- R12. Show concrete examples of parties voting against ideological alignment
- R13. Script produces markdown report with embedded charts
- R14. Report includes: stability heatmap, drift timelines, party trajectories, inflection analysis
- R15. Script is parameterized: `--db`, `--windows`, `--top-n`, `--output`
-
-## Scope Boundaries
-
- Annual windows only (2016-2024); quarterly windows too sparse
- Script + report only — no UI/explorer integration in this phase
- No statistical significance testing beyond basic change-point detection
- SVD component vectors (V^T matrix) not currently stored — must be added to pipeline or computed indirectly
-
-## Context & Research
-
-### Relevant Code and Patterns
-
- `scripts/generate_svd_json.py` — script structure pattern: `main(argv) -> int`, argparse, ROOT path setup, logger
- `scripts/svd_diagnostics.py` — generates markdown + JSON report from SVD analysis
- `analysis/explorer_data.py` — DuckDB data loading patterns (read_only, try/finally, vector parsing), `load_mp_vectors_by_party_for_window()` for date-aware party normalization
- `analysis/trajectory.py` — existing cross-window drift computation using `_procrustes_align_windows()`
- `pipeline/svd_pipeline.py` — SVD computation; V^T available as `Vt` variable before scaling
- `tests/test_analysis.py` — test patterns: `tmp_path` fixture, `_setup_svd_vectors()` helper, class-based tests
- `analysis/config.py` — `CANONICAL_RIGHT`/`CANONICAL_LEFT` for cross-ideological voting detection
-
-### Key Technical Decisions
-
- **matplotlib for static charts** — no matplotlib usage exists in codebase; this introduces a new dependency. Alternative: Plotly static image export (already in stack). Decision: use matplotlib for markdown-embedded PNGs; simpler for static reports.
- **V^T storage via dedicated entity_type** — store raw V^T matrix as `entity_type='vt_matrix'` row in `svd_vectors`. Historical windows won't have V^T; motion-ranking correlation fallback is the primary approach for this phase.
- **Axis stability via motion projection patterns with Procrustes alignment** — since V^T may not be available for historical windows, compute axis stability indirectly. First apply Procrustes alignment (reuse `_procrustes_align_windows()` from `analysis/trajectory.py`) to motion vectors across windows, then correlate top-N motion rankings per component. This handles SVD sign ambiguity and rotation.
- **Threshold-based change-point detection** — simple drift rate threshold (no new dependencies). Detect when consecutive drift exceeds 2× median drift rate.
- **Stability threshold** — cosine similarity > 0.7 classifies axes as stable. Default parameterized via `--stability-threshold` with 0.7 as default. Distribution of similarity values reported in output for sensitivity assessment.
- **Cross-ideological voting** — use `CANONICAL_RIGHT` from `analysis.config` to identify right-wing motions (high positive loading on axis 1), then detect left-wing parties voting "voor" on those motions. Axis polarity determined per-window using canonical party scores, not global constants.
-
-## Open Questions
-
-### Resolved During Planning
-
- **Charting library**: matplotlib for static PNG embedding in markdown. Add to `pyproject.toml`.
- **Change-point detection**: Simple threshold on drift rate (2× median). No new dependencies.
- **Party-motion linkage**: Use `mp_votes` table — party voted "voor" on motion. This measures voting alignment, not sponsorship.
- **Axis stability approach**: Two-tier — (a) if V^T available, use cosine similarity; (b) fallback: Procrustes-align motion vectors, then correlate top-N motion rankings per component across windows.
- **Top N for centroids**: Default N=20, parameterized via `--top-n`. Test during execution.
-
-### Deferred to Implementation
-
- Exact optimal N for top motions per axis — will test N=10, 20, 50 during execution and pick the one with clearest signal
- Cross-ideological voting threshold — provisional: party voting "voor" on motions where canonical opposite-wing parties have high absolute loadings; will calibrate against baseline
-
-## High-Level Technical Design
-
-> *This illustrates the intended approach and is directional guidance for review, not implementation specification.*
-
-```
-┌─────────────────────────────────────────────────────────────────┐
-│                    scripts/motion_drift.py                       │
-├─────────────────────────────────────────────────────────────────┤
-│                                                                  │
-│  1. Load Data                                                    │
-│     ├── fused_embeddings (per window, per motion)                │
-│     ├── svd_vectors (motion projections per window)              │
-│     ├── mp_votes (party voting records)                          │
-│     └── motions (text for examples)                              │
-│                                                                  │
-│  2. Axis Stability                                               │
-│     ├── Procrustes-align motion vectors across windows           │
-│     ├── Option A: cosine similarity of V^T vectors (if stored)   │
-│     └── Option B: correlate top-N motion rankings per component  │
-│     └── Output: stability heatmap (window × component matrix)    │
-│                                                                  │
-│  3. Semantic Drift                                               │
-│     ├── For each stable axis:                                     │
-│     │   ├── Get top N motions by |loading| per window            │
-│     │   ├── Compute fused embedding centroid per window          │
-│     │   └── Cosine distance between consecutive windows          │
-│     └── Output: drift timeline per axis + inflection points      │
-│                                                                  │
-│  4. Party Voting Analysis                                        │
-│     ├── For each party (with date-aware name normalization):     │
-│     │   ├── Get motions party voted "voor" on per window         │
-│     │   └── Compute voting centroid along each stable axis       │
-│     ├── Cross-ideological detection (per-window axis polarity):  │
-│     │   ├── Left parties voting "voor" on right-wing motions     │
-│     │   └── Right parties voting "voor" on left-wing motions     │
-│     └── Output: party trajectory plots + cross-voting examples   │
-│                                                                  │
-│  5. Report Generation                                            │
-│     ├── Markdown with embedded matplotlib PNGs                   │
-│     ├── Axis stability heatmap                                   │
-│     ├── Semantic drift timelines                                 │
-│     ├── Party trajectory plots                                   │
-│     └── Inflection point analysis with motion examples           │
-└─────────────────────────────────────────────────────────────────┘
-```
-
-## Implementation Units
-
- [ ] **Unit 1: Add matplotlib dependency and script scaffolding**
-
-**Goal:** Set up the new script with proper structure and dependencies.
-
-**Requirements:** R15
-
-**Dependencies:** None
-
-**Files:**
- Modify: `pyproject.toml` (add matplotlib)
- Create: `scripts/motion_drift.py`
- Test: `tests/test_motion_drift.py`
-
-**Approach:**
- Add `matplotlib>=3.8` to `pyproject.toml` dependencies
- Create `scripts/motion_drift.py` following established script pattern: `main(argv) -> int`, argparse with `--db`, `--windows`, `--top-n`, `--output`, ROOT path setup, module logger
- Add schema validation at startup: check for required tables (`svd_vectors`, `fused_embeddings`, `mp_votes`, `motions`)
- Create minimal `tests/test_motion_drift.py` with import test, argument parsing test, and schema validation test using in-memory DuckDB fixture
-
-**Patterns to follow:**
- `scripts/generate_svd_json.py` — script structure, argparse, entry point
- `scripts/svd_diagnostics.py` — report generation pattern
- `tests/test_analysis.py` — `tmp_path` fixture, `_setup_svd_vectors()` helper
-
-**Test scenarios:**
- Happy path: `main(["--help"])` exits with code 0 and prints usage
- Happy path: `main(["--db", "data/motions.db", "--output", "/tmp/test"])` runs without error
- Edge case: `main(["--db", "nonexistent.db"])` handles missing database gracefully (exit code 1)
- Edge case: database with missing tables produces clear error message
-
-**Verification:**
- `uv run python scripts/motion_drift.py --help` shows all arguments
- `uv run python -m pytest tests/test_motion_drift.py -q` passes
-
- [ ] **Unit 2: Axis stability analysis**
-
-**Goal:** Compute axis stability across annual windows and generate stability heatmap.
-
-**Requirements:** R1, R2, R3, R4
-
-**Dependencies:** Unit 1
-
-**Files:**
- Create: `analysis/motion_drift.py` (core analysis module)
- Modify: `scripts/motion_drift.py` (call axis stability)
- Test: `tests/test_motion_drift.py`
-
-**Approach:**
- Create `analysis/motion_drift.py` with `compute_axis_stability(db_path, windows)` function
- Two-tier approach:
-  - Try loading V^T from `svd_vectors` where `entity_type='vt_matrix'` (if stored by pipeline)
-  - Fallback: apply Procrustes alignment to motion vectors across windows (reuse `_procrustes_align_windows()` from `analysis/trajectory.py`), then for each window get top N motions per component by absolute score and compute pairwise cosine similarity of motion ranking vectors
- Generate stability heatmap as matplotlib figure (window × component matrix, color-coded by similarity)
- Return stability report: which axes are stable (similarity > 0.7), which are reordered (high similarity to different component index), which are unstable (low similarity to any component)
-
-**Patterns to follow:**
- `analysis/explorer_data.py` — DuckDB loading patterns, vector parsing
- `analysis/trajectory.py` — `_procrustes_align_windows()` for cross-window comparison
-
-**Test scenarios:**
- Happy path: `compute_axis_stability` returns stability matrix for 3+ windows with synthetic data
- Happy path: stability matrix is symmetric and values are in [-1, 1]
- Happy path: Procrustes alignment corrects sign flips between windows
- Edge case: single window returns empty stability report (no comparison possible)
- Edge case: windows with no motion vectors handled gracefully (warning logged, skipped)
- Integration: run against real `data/motions.db` annual windows, verify heatmap is generated
-
-**Verification:**
- Stability heatmap PNG generated with correct dimensions (windows × components)
- Stability report identifies at least some axes as stable (similarity > 0.7)
-
- [ ] **Unit 3: Semantic drift analysis**
-
-**Goal:** Compute semantic drift timelines for stable axes and detect inflection points.
-
-**Requirements:** R5, R6, R7, R8
-
-**Dependencies:** Unit 2 (needs stable axis list)
-
-**Files:**
- Modify: `analysis/motion_drift.py` (add drift functions)
- Modify: `scripts/motion_drift.py` (call drift analysis)
- Test: `tests/test_motion_drift.py`
-
-**Approach:**
- Add `compute_semantic_drift(db_path, stable_axes, windows, top_n)` function
- For each stable axis:
-  - Get top N motions per window by absolute SVD loading
-  - Compute average fused embedding centroid per window
-  - Compute cosine distance between consecutive window centroids
-  - Detect inflection points: where drift rate exceeds 2× median drift rate
- For each inflection point, extract example motions (top 3 before/after by loading)
- Generate drift timeline plot per axis (line chart with inflection point markers)
-
-**Patterns to follow:**
- `analysis/trajectory.py` — `compute_trajectories()` for cross-window drift computation
- `scripts/svd_diagnostics.py` — markdown report generation
-
-**Test scenarios:**
- Happy path: `compute_semantic_drift` returns drift series for each stable axis
- Happy path: drift values are in [0, 2] (cosine distance range)
- Happy path: inflection points detected when synthetic data has abrupt change
- Edge case: axis with only 2 windows returns drift but no inflection points
- Edge case: axis with monotonic drift returns no inflection points
- Integration: run against real data, verify drift timelines are plausible
-
-**Verification:**
- Drift timeline PNG generated per stable axis
- Inflection points (if any) are marked on timeline with motion examples in report
-
- [ ] **Unit 4: Party voting analysis**
-
-**Goal:** Compute party voting centroids and detect cross-ideological voting patterns.
-
-**Requirements:** R9, R10, R11, R12
-
-**Dependencies:** Unit 2 (needs stable axis list)
-
-**Files:**
- Modify: `analysis/motion_drift.py` (add party analysis functions)
- Modify: `scripts/motion_drift.py` (call party analysis)
- Test: `tests/test_motion_drift.py`
-
-**Approach:**
- Add `compute_party_voting(db_path, stable_axes, windows)` function
- For each party:
-  - Query `mp_votes` for motions party voted "voor" on per window, using date-aware party name normalization (reuse `load_mp_vectors_by_party_for_window()` pattern from `analysis/explorer_data.py`)
-  - For each motion, get its SVD scores from `svd_vectors`
-  - Compute unweighted mean score along each stable axis (voting centroid)
- Track party trajectories: plot party centroid position per window along each axis
- Detect cross-ideological voting:
-  - For each window, independently determine axis polarity by checking where canonical right-wing parties (CANONICAL_RIGHT) score on each axis
-  - Identify "right-wing" motions (high positive loading on axis where PVV/FVD/JA21/SGP score high after polarity check)
-  - Find left-wing parties (SP, PvdA, GL, etc.) voting "voor" on right-wing motions
-  - Compute cross-voting rate per party per window
-  - Detect trends: is cross-voting increasing or decreasing over time?
- Generate party trajectory plots and cross-voting summary table
-
-**Patterns to follow:**
- `analysis/config.py` — `CANONICAL_RIGHT`/`CANONICAL_LEFT` for party classification
- `analysis/explorer_data.py` — `mp_votes` query patterns, `load_mp_vectors_by_party_for_window()` for party normalization
-
-**Test scenarios:**
- Happy path: `compute_party_voting` returns voting centroids for parties with sufficient data
- Happy path: cross-ideological voting detected when synthetic data has left party voting on right motions
- Happy path: party name normalization maps historical names (GL, PvdA → GroenLinks-PvdA) correctly
- Edge case: party with no "voor" votes in a window handled gracefully (centroid = NaN, skipped)
- Edge case: window with no voting data handled gracefully
- Integration: run against real data, verify party trajectories are plausible
-
-**Verification:**
- Party trajectory PNG generated showing party movement across windows
- Cross-voting summary table in report with at least one example
-
- [ ] **Unit 5: Report generation**
-
-**Goal:** Assemble all analysis outputs into a markdown report with embedded charts.
-
-**Requirements:** R13, R14, R15
-
-**Dependencies:** Units 2, 3, 4
-
-**Files:**
- Modify: `scripts/motion_drift.py` (orchestrate report generation)
- Test: `tests/test_motion_drift.py`
-
-**Approach:**
- Add `_generate_report(output_dir, stability_result, drift_result, party_result)` function
- Generate markdown with sections:
-  - Summary (key findings, number of stable axes, inflection points, cross-voting trends)
-  - Axis Stability (heatmap + interpretation)
-  - Semantic Drift (timeline per axis + inflection point analysis with motion examples)
-  - Party Voting Analysis (trajectory plots + cross-voting summary + examples)
-  - Methodology (brief description of approach, parameters used)
- Save all matplotlib figures as PNGs in output directory
- Embed PNGs in markdown using relative paths
-
-**Patterns to follow:**
- `scripts/svd_diagnostics.py` — markdown report structure
- `scripts/generate_svd_json.py` — `_generate_markdown_report()` function
-
-**Test scenarios:**
- Happy path: report generated with all sections and embedded images
- Happy path: all PNG files exist in output directory
- Edge case: no stable axes → report notes this and skips drift/party sections
- Edge case: output directory creation when it doesn't exist
-
-**Verification:**
- `output/report.md` exists and contains all expected sections
- All referenced PNG files exist in output directory
- Report is readable in a markdown viewer
-
-## System-Wide Impact
-
- **Interaction graph:** New script reads from existing DuckDB tables; no writes to production data. Pipeline change needed to store V^T matrix (optional, for future windows).
- **Unchanged invariants:** SVD computation unchanged. Explorer unchanged. Existing analysis modules unchanged.
- **New dependency:** `matplotlib` added to `pyproject.toml`. First use of matplotlib in codebase.
-
-## Risks & Dependencies
-
-| Risk | Likelihood | Impact | Mitigation |
-|------|-----------|--------|------------|
-| matplotlib introduces new dependency burden | Low | Low | Already common library; well-maintained. Alternative: use Plotly static export if team prefers single viz stack. |
-| V^T matrix not available for historical windows | High | Medium | Fallback to Procrustes-aligned motion ranking correlation (works with existing data). Store V^T going forward. |
-| Sparse data in early windows (2016-2018: 124-162 motions) | Medium | Medium | Script warns about low-coverage windows; analysis focuses on 2019+ where data is richer. |
-| Cross-ideological voting detection threshold too sensitive/insensitive | Medium | Low | Threshold is parameterized; will calibrate during execution against baseline drift rates. |
-| Script exceeds 2-minute runtime on full dataset | Low | Low | JSON parsing of fused embeddings is the bottleneck. Will batch-load and cache if needed. |
-
-## Documentation / Operational Notes
-
- New script: `scripts/motion_drift.py` — usage documented in module docstring
- New analysis module: `analysis/motion_drift.py` — functions documented with docstrings
- Report output: markdown with embedded PNGs, shareable without running the script
- Future: integrate analysis into Streamlit explorer tab (separate plan)
-
-## Sources & References
-
- **Origin document:** [docs/brainstorms/2026-04-05-motion-semantic-drift-over-time-requirements.md](docs/brainstorms/2026-04-05-motion-semantic-drift-over-time-requirements.md)
- Related code: `scripts/generate_svd_json.py`, `scripts/svd_diagnostics.py`, `analysis/trajectory.py`, `analysis/explorer_data.py`
- Party sets: `analysis/config.py` (CANONICAL_RIGHT, CANONICAL_LEFT)
--- a/docs/plans/2026-04-05-005-refactor-axis-stability-regression-plan.md
+++ b/docs/plans/2026-04-05-005-refactor-axis-stability-regression-plan.md
@ -1,241 +0,0 @@
---
-title: "Refine axis stability with regression weights and overtone shift"
-type: refactor
-status: active
-date: 2026-04-05
-origin: docs/brainstorms/2026-04-05-motion-semantic-drift-over-time-requirements.md
---
-
-# Refine Axis Stability with Regression Weights and Overtone Shift
-
-## Overview
-
-Replace the current axis stability computation (party-based sign consistency) with a regression-based approach that measures whether the *semantic features* defining each SVD axis remain stable across windows. Add overtone shift analysis to detect when motion content changes even if party ordering stays the same.
-
-## Problem Frame
-
-The current stability metric only checks whether left/right parties score on the expected side of each axis. This misses two important questions:
-1. **Axis stability**: Does axis 1 capture the same underlying theme in 2019 and 2024? (e.g., "social vs individual" should be stable even if specific motions change)
-2. **Overtone shift**: Are motions on axis 1 becoming more about migration and less about economics over time, even if PVV still scores higher than SP?
-
-The current approach found zero stable axes because it measured party sign consistency, not semantic stability.
-
-## Requirements Trace
-
- R1. Compute semantic stability via Ridge regression weights across windows (replaces party sign consistency)
- R2. Generate stability heatmap showing which axes are semantically comparable across time
- R3. Detect axis reordering — cases where axis N in window A ≈ axis M in window B
- R4. Flag unstable axes where semantic signature changes significantly
- R5. For each stable axis, compute semantic gravity (weighted mean fused embedding) per window
- R6. Track overtone shift: how semantic gravity moves across windows
- R7. Identify inflection points where overtone shift accelerated
- R8. Show example motions and top shifting dimensions at inflection points
- R9-R12. Party voting analysis (unchanged from existing implementation)
- R13-R15. Output and parameterization (unchanged)
-
-## Scope Boundaries
-
- Refine existing `scripts/motion_drift.py` — no new script
- Keep party voting analysis and report generation (already working)
- Annual windows only; quarterly too sparse
- Ridge regression with scikit-learn (already in dependencies)
-
-## Context & Research
-
-### Existing Code
-
- `scripts/motion_drift.py` — current implementation with party-based fallback stability
- `analysis/clustering.py` — UMAP + KMeans infrastructure (not directly used but shows pattern)
- `scikit-learn>=1.8.0` — already in `pyproject.toml`, provides `Ridge`
-
-### Key Technical Decisions
-
- **Ridge regression per axis per window**: Fit `SVD_score ~ fused_embedding` for each axis. The weight vector (2610 dims) is the semantic signature. Compare via cosine similarity across windows.
- **Semantic gravity for overtone shift**: Weighted mean fused embedding of all motions, weighted by absolute SVD score on the axis. Track how gravity moves across windows.
- **Top-K dimensions for interpretation**: Extract top-50 dimensions by absolute regression weight. Project gravity onto these to identify which semantic features are shifting.
- **Party-based fallback kept**: For windows with too few motions for regression (< 50), fall back to party sign consistency.
-
-## Open Questions
-
-### Resolved During Planning
-
- **Regression type**: Ridge (L2 regularization) — handles 2610-dim vectors without overfitting, already available via scikit-learn.
- **Alpha (regularization strength)**: Default 1.0, parameterized via `--regression-alpha`. Will test 0.1, 1.0, 10.0 during execution.
- **Top-K dimensions for interpretation**: K=50 — enough to capture semantic signal without noise.
- **Overtone shift metric**: Cosine distance between semantic gravity points across consecutive windows. Threshold for inflection: 2× median shift rate.
-
-### Deferred to Implementation
-
- Optimal alpha for Ridge regression — will test against real data and pick value that gives most interpretable weight vectors
- Whether to normalize fused embeddings before regression (likely yes, since SVD dims are ~1-100 scale and text dims are ~0-1)
-
-## High-Level Technical Design
-
-> *This illustrates the intended approach and is directional guidance for review, not implementation specification.*
-
-```
-┌─────────────────────────────────────────────────────────────────┐
-│              Refined Axis Stability + Overtone Shift             │
-├─────────────────────────────────────────────────────────────────┤
-│                                                                  │
-│  1. Per-Axis Ridge Regression (per window)                       │
-│     ├── For each SVD axis k:                                     │
-│     │   X = fused_embeddings (n_motions × 2610)                 │
-│     │   y = SVD scores on axis k (n_motions)                    │
-│     │   w_k = Ridge.fit(X, y).coef_  (2610-dim weight vector)   │
-│     └── Output: weight_vectors[window][axis]                     │
-│                                                                  │
-│  2. Stability Matrix                                              │
-│     ├── For each axis k, compute cosine similarity of w_k        │
-│     │   across all window pairs                                  │
-│     └── Output: stability_matrix[window][window][axis]           │
-│                                                                  │
-│  3. Overtone Shift                                                │
-│     ├── For each axis k and window:                              │
-│     │   gravity_k = weighted_mean(fused_embeddings,              │
-│     │              weights=abs(SVD_scores_k))                    │
-│     │   shift_k = cosine_distance(gravity_k[t], gravity_k[t+1])  │
-│     └── Output: shift_series[axis] = [shift values per window]   │
-│                                                                  │
-│  4. Interpretation                                                │
-│     ├── Top-50 dimensions per axis (by |weight|)                 │
-│     ├── Project gravity onto top dimensions to see shifts        │
-│     └── Report: "Axis 1 stable (0.82), overtone shift (0.45)    │
-│              — migration framing gained +0.31, economic -0.22"   │
-└─────────────────────────────────────────────────────────────────┘
-```
-
-## Implementation Units
-
- [ ] **Unit 1: Add Ridge regression-based stability computation**
-
-**Goal:** Replace `compute_axis_stability()` with regression-based version.
-
-**Requirements:** R1, R2, R3, R4
-
-**Dependencies:** None (replaces existing function)
-
-**Files:**
- Modify: `scripts/motion_drift.py` (replace `compute_axis_stability`)
- Modify: `tests/test_motion_drift.py` (update stability tests)
-
-**Approach:**
- New `compute_axis_stability()` function:
-  - For each window, load motion scores + fused embeddings
-  - For each axis k (1-10), fit Ridge regression: `score_k ~ fused_embedding`
-  - Normalize features before fitting (StandardScaler on fused embeddings)
-  - Extract weight vector w_k (2610 dims)
-  - Compute pairwise cosine similarity of w_k across windows
-  - Return stability matrix, stable/reordered/unstable axes
- Keep `_compute_stability_fallback()` for windows with < 50 motions
- Add `--regression-alpha` CLI argument (default 1.0)
-
-**Patterns to follow:**
- `sklearn.linear_model.Ridge` — standard usage: `Ridge(alpha=alpha).fit(X, y)`
- `sklearn.preprocessing.StandardScaler` — normalize features before regression
-
-**Test scenarios:**
- Happy path: regression produces weight vectors with cosine similarity in [-1, 1]
- Happy path: synthetic data with known semantic signatures recovers stable axes
- Edge case: window with < 50 motions falls back to party-based method
- Edge case: all motions have same score on axis (degenerate case)
- Integration: run against real data, verify stability values are non-zero
-
-**Verification:**
- Stability matrix has correct shape (n_windows × n_windows × n_components)
- At least some axes show stability > 0.5 on real data
- Fallback triggers correctly for sparse windows
-
- [ ] **Unit 2: Add overtone shift analysis**
-
-**Goal:** Compute semantic gravity trajectories and detect overtone shifts.
-
-**Requirements:** R5, R6, R7, R8
-
-**Dependencies:** Unit 1 (needs regression weight vectors for top-K dimension interpretation; shift computation itself is independent)
-
-**Files:**
- Create: `compute_overtone_shift()` function in `scripts/motion_drift.py`
- Modify: `scripts/motion_drift.py` (call overtone shift in main)
- Modify: `tests/test_motion_drift.py` (add overtone shift tests)
-
-**Approach:**
- New `compute_overtone_shift(db_path, stable_axes, windows, top_k=50)` function:
-  - For each stable axis and window:
-    - Load motion scores and fused embeddings
-    - Compute semantic gravity: weighted mean of fused embeddings, weights = abs(SVD scores)
-    - Extract top-K dimensions by absolute regression weight
-    - Project gravity onto top-K dimensions
-  - Compute cosine distance between consecutive window gravity points
-  - Detect inflection points: shift > 2× median shift rate
-  - For each inflection, identify top shifting dimensions and example motions
- Return shift series, inflection points, dimension-level analysis
-
-**Test scenarios:**
- Happy path: overtone shift returns shift series for each stable axis
- Happy path: synthetic data with known shift detects inflection point
- Edge case: axis with only 2 windows returns shift but no inflection points
- Edge case: monotonic shift returns no inflection points
- Integration: run against real data, verify shift values are plausible
-
-**Verification:**
- Shift series has correct length (n_windows - 1 per axis)
- Inflection points (if any) include dimension-level analysis
- Top shifting dimensions are reported with direction and magnitude
-
- [ ] **Unit 3: Update report generation with new metrics**
-
-**Goal:** Update report to show both stability and overtone shift per axis.
-
-**Requirements:** R13, R14
-
-**Dependencies:** Units 1, 2
-
-**Files:**
- Modify: `scripts/motion_drift.py` (`_generate_report` function)
- Modify: `tests/test_motion_drift.py` (update report tests)
-
-**Approach:**
- Update `_generate_report()` to include:
-  - Stability heatmap (regression weight similarity)
-  - Overtone shift timeline per axis (line chart with inflection markers)
-  - For each stable axis: stability score + overtone shift magnitude
-  - Top shifting dimensions table: dimension index, direction, magnitude
-  - Example motions at inflection points
- Keep existing party voting analysis section unchanged
-
-**Test scenarios:**
- Happy path: report includes both stability and overtone shift sections
- Happy path: all charts generated and embedded
- Edge case: no stable axes → report notes this, skips overtone shift
-
-**Verification:**
- Report contains stability heatmap, shift timelines, and dimension analysis
- All PNG files exist in output directory
-
-## System-Wide Impact
-
- **Interaction graph:** Replaces `compute_axis_stability()` — callers (main function) unchanged API
- **Unchanged invariants:** Party voting analysis, report structure, CLI interface
- **New dependency:** None — scikit-learn already in dependencies
-
-## Risks & Dependencies
-
-| Risk | Likelihood | Impact | Mitigation |
-|------|-----------|--------|------------|
-| Ridge regression overfits with 2610 features | Medium | Medium | Use Ridge (L2 regularization), test multiple alpha values, validate with cross-validation |
-| Fused embeddings have different dimensions across windows | Low | Low | Already handled — truncate to min dimension |
-| Regression takes too long on full dataset | Medium | Low | 9 windows × 10 axes = 90 Ridge fits. Each fit on ~3000×2610 matrix ~0.1s with sklearn. Total ~9s. Acceptable. |
-| Weight vectors are hard to interpret | Medium | Low | Focus on top-50 dimensions, report direction and magnitude clearly |
-
-## Documentation / Operational Notes
-
- Updated script: `scripts/motion_drift.py` — new stability metric, new overtone shift analysis
- Report output: markdown with stability heatmap, shift timelines, dimension analysis
- Existing report sections (party voting) unchanged
-
-## Sources & References
-
- **Origin document:** [docs/brainstorms/2026-04-05-motion-semantic-drift-over-time-requirements.md](docs/brainstorms/2026-04-05-motion-semantic-drift-over-time-requirements.md)
- Related code: `scripts/motion_drift.py` (existing implementation), `analysis/clustering.py` (UMAP/KMeans patterns)
- Ridge regression: `sklearn.linear_model.Ridge`
--- a/docs/plans/2026-04-24-001-fix-ci-test-workflow-plan.md
+++ b/docs/plans/2026-04-24-001-fix-ci-test-workflow-plan.md
@ -1,127 +0,0 @@
---
-title: "fix: CI test workflow references missing requirements.txt"
-type: fix
-status: active
-date: 2026-04-24
---
-
-# Fix: CI Test Workflow
-
-## Overview
-
-The scheduled CI workflow `.github/workflows/mindmodel-schedule.yml` attempts `pip install -r requirements.txt`, but this file does not exist. The project uses `uv` with `pyproject.toml` and `uv.lock`. This workflow fails silently (`|| true`) and never actually runs tests meaningfully.
-
-## Problem Frame
-
- Python 3.11 is hardcoded in the workflow; the project requires >=3.13
- `requirements.txt` is missing; dependencies are in `pyproject.toml`
- No pytest gate on push/PR — regressions are only caught locally
- The mindmodel validator runs regardless, masking the test failure
-
-## Requirements Trace
-
- R1. CI must install dependencies correctly using the project's package manager
- R2. CI must run pytest on push and PR to main
- R3. CI must use Python >=3.13 matching pyproject.toml
- R4. CI must fail visibly when tests fail (no `|| true` masking)
-
-## Scope Boundaries
-
-**Included:**
- Fix existing mindmodel-schedule.yml
- Add new pytest workflow for push/PR
-
-**Excluded:**
- Changing test code or test dependencies
- Adding new tests
- Changing the mindmodel validator logic
-
-## Key Technical Decisions
-
- **Use `uv` in CI** — matches local development and pyproject.toml. Use `astral-sh/setup-uv` action.
- **Separate workflows** — keep mindmodel schedule weekly, add pytest on push/PR
- **Fail fast** — remove `|| true` from pytest step
-
-## Implementation Units
-
- [ ] U1. **Fix mindmodel-schedule.yml to use uv**
-
-**Goal:** Make the scheduled workflow install deps and run tests correctly.
-
-**Requirements:** R1, R3, R4
-
-**Dependencies:** None
-
-**Files:**
- Modify: `.github/workflows/mindmodel-schedule.yml`
-
-**Approach:**
- Replace `actions/setup-python@v4` + `pip install` with `astral-sh/setup-uv@v5`
- Use `uv sync` to install from `pyproject.toml`/`uv.lock`
- Change Python version to 3.13
- Remove `|| true` from pytest step
- Keep mindmodel validator as-is
-
-**Execution note:** Test-first — write a workflow validation test that checks the YAML parses correctly and references valid files.
-
-**Test scenarios:**
- Happy path: Workflow YAML is valid GitHub Actions syntax
- Error path: pytest step fails if tests fail (no `|| true`)
- Integration: `uv sync` installs the same lockfile as local dev
-
-**Verification:**
- `python -c "import yaml; yaml.safe_load(open('.github/workflows/mindmodel-schedule.yml'))"` passes
- Workflow runs successfully on next schedule trigger
-
---
-
- [ ] U2. **Add pytest workflow for push/PR**
-
-**Goal:** Run tests on every push and PR to main.
-
-**Requirements:** R2, R3, R4
-
-**Dependencies:** None
-
-**Files:**
- Create: `.github/workflows/pytest.yml`
-
-**Approach:**
- Trigger on `push` to `main` and `pull_request` to `main`
- Use `astral-sh/setup-uv@v5` with Python 3.13
- Run `uv run pytest tests/ -q`
- Cache uv dependencies between runs
-
-**Execution note:** Test-first — write a test that verifies the new workflow file exists and has required fields.
-
-**Test scenarios:**
- Happy path: Workflow triggers on push to main
- Happy path: Workflow triggers on PR to main
- Error path: pytest fails → workflow fails
- Edge case: Caching speeds up repeated runs
-
-**Verification:**
- New workflow appears in repo Actions tab
- Pushing this plan branch triggers the workflow
- All tests pass in CI
-
---
-
-## Risks & Dependencies
-
-| Risk | Mitigation |
-|------|------------|
-| uv action not available or fails | Pin to known good version; test on fork first |
-| Tests fail in CI but pass locally | Likely env difference; debug in CI logs |
-| Gitea runner differences | Use standard ubuntu-latest; no Gitea-specific actions |
-
-## Documentation / Operational Notes
-
- Update ARCHITECTURE.md CI section if it mentions the old workflow
- Note in AGENTS.md that CI runs on GitHub Actions (not Gitea CI)
-
-## Sources & References
-
- Existing workflow: `.github/workflows/mindmodel-schedule.yml`
- Package manager: `pyproject.toml`, `uv.lock`
- uv GitHub Action: https://github.com/astral-sh/setup-uv
--- a/docs/plans/2026-04-24-003-consolidate-config-sources-plan.md
+++ b/docs/plans/2026-04-24-003-consolidate-config-sources-plan.md
@ -1,136 +0,0 @@
---
-title: "refactor: Consolidate duplicate config sources"
-type: refactor
-status: active
-date: 2026-04-24
---
-
-# Consolidate Duplicate Config Sources
-
-## Overview
-
-There are two config files: `config.py` (51 lines at repo root) and `analysis/config.py` (13K). The root config defines base `Config` dataclass with env vars; analysis/config.py contains SVD themes, party lists, colors, and explorer constants. This divergence is confusing and risks stale data.
-
-## Problem Frame
-
- Two sources of truth for configuration
- `config.py` is small and may be overlooked
- `analysis/config.py` is large and contains both constants and dynamic config
- Risk of updating one but not the other
-
-## Requirements Trace
-
- R1. Single canonical config module
- R2. All existing imports continue to work (backward compatibility)
- R3. No behavior changes
- R4. Tests pass after consolidation
-
-## Scope Boundaries
-
-**Included:**
- Audit both config files
- Decide on canonical location
- Migrate root config into analysis/config.py or re-export
- Update imports
-
-**Excluded:**
- Changing config values
- Adding new config options
- Refactoring analysis/config.py beyond import consolidation
-
-## Key Technical Decisions
-
- **Canonical location: analysis/config.py** — it already contains most config and is imported by many modules
- **Backward compatibility:** Root `config.py` becomes a thin re-export shim: `from analysis.config import Config`
-
-## Implementation Units
-
- [ ] U1. **Audit config usage**
-
-**Goal:** Map which modules import from which config file.
-
-**Requirements:** R1
-
-**Dependencies:** None
-
-**Files:**
- Read: `config.py`, `analysis/config.py`
-
-**Approach:**
- `grep -rn "from config import\|import config" --include="*.py"`
- `grep -rn "from analysis.config import\|import analysis.config" --include="*.py"`
- Document findings
-
-**Test expectation:** none — research unit.
-
-**Verification:**
- Complete list of import sites
-
---
-
- [ ] U2. **Migrate root config into analysis/config.py**
-
-**Goal:** Move Config dataclass and env var logic to analysis/config.py.
-
-**Requirements:** R1, R2, R3
-
-**Dependencies:** U1
-
-**Files:**
- Modify: `analysis/config.py`
- Modify: `config.py` (re-export shim)
-
-**Approach:**
- Move `Config` dataclass to analysis/config.py
- Keep root `config.py` as: `from analysis.config import Config`
- Ensure no circular imports
-
-**Execution note:** Test-first — write a test that imports both `config` and `analysis.config` and verifies they expose the same `Config` class.
-
-**Test scenarios:**
- Happy path: `from config import Config` still works
- Happy path: `from analysis.config import Config` works
- Integration: Both paths return the same object
-
-**Verification:**
- `uv run python -c "from config import Config; from analysis.config import Config as AC; assert Config is AC"`
- All tests pass
-
---
-
- [ ] U3. **Update import sites**
-
-**Goal:** Standardize imports to use analysis/config.py directly.
-
-**Requirements:** R1
-
-**Dependencies:** U2
-
-**Files:**
- Modify: Files that import from root config.py
-
-**Approach:**
- Replace `from config import Config` with `from analysis.config import Config`
- Mechanical change, one file at a time
-
-**Test scenarios:**
- Integration: All modified files import successfully
- Regression: All tests pass
-
-**Verification:**
- `grep -rn "from config import" --include="*.py"` returns nothing (except shim)
- Full test suite passes
-
---
-
-## Risks & Dependencies
-
-| Risk | Mitigation |
-|------|------------|
-| Circular imports | analysis/config.py must not import from modules that import it |
-| Hidden dynamic imports | Search thoroughly; test all import paths |
-
-## Sources & References
-
- `config.py`
- `analysis/config.py`
--- a/docs/plans/2026-04-24-004-rewrite-readme-plan.md
+++ b/docs/plans/2026-04-24-004-rewrite-readme-plan.md
@ -1,97 +0,0 @@
---
-title: "docs: Rewrite README.md with quickstart and project overview"
-type: feat
-status: active
-date: 2026-04-24
---
-
-# Rewrite README.md
-
-## Overview
-
-The current README.md is 22 lines and only covers embeddings and Ansible deployment. It does not explain what Stemwijzer is, how to run it locally, or how to run the pipeline. New contributors must discover ARCHITECTURE.md to get oriented.
-
-## Problem Frame
-
- README is the first file a visitor sees
- No quickstart instructions
- No mention of Streamlit, the voting UI, or the explorer
- No screenshot or demo link
- Missing prerequisites (Python 3.13, uv, DuckDB)
-
-## Requirements Trace
-
- R1. Explain what the project does in 2 sentences
- R2. Show a screenshot or demo link
- R3. List prerequisites and installation steps
- R4. Provide quickstart commands (run app, run pipeline, run tests)
- R5. Link to ARCHITECTURE.md for deep dive
- R6. Link to docs/ for additional documentation
-
-## Scope Boundaries
-
-**Included:**
- Rewrite README.md with new structure
-
-**Excluded:**
- Changing ARCHITECTURE.md (only link to it)
- Adding screenshots (placeholder path accepted)
- Creating a demo deployment
-
-## Key Technical Decisions
-
- **Keep it concise** — README should be scannable in 2 minutes. Deep content lives in ARCHITECTURE.md.
- **Use the same commands as ARCHITECTURE.md** — single source of truth for commands
- **Match the project's language** — Dutch UI, English docs
-
-## Implementation Units
-
- [ ] U1. **Draft and review README structure**
-
-**Goal:** Create a README that a new contributor can follow to get the app running in <10 minutes.
-
-**Requirements:** R1–R6
-
-**Dependencies:** None
-
-**Files:**
- Modify: `README.md`
-
-**Approach:**
-Structure:
-1. Title + one-line description
-2. Screenshot placeholder
-3. What is Stemwijzer? (2 sentences)
-4. Features bullet list (voting compass, explorer, analytics)
-5. Prerequisites (Python 3.13, uv)
-6. Quickstart (clone, uv sync, run Streamlit, run pipeline)
-7. Testing (uv run pytest)
-8. Project structure (brief, link to ARCHITECTURE.md)
-9. Documentation links (ARCHITECTURE.md, docs/)
-10. License
-
-**Test expectation:** none — documentation-only change. Verify by reading the rendered markdown.
-
-**Verification:**
- A new contributor can follow the quickstart without reading other files
- All commands in README match ARCHITECTURE.md
- No broken internal links
-
---
-
-## Risks & Dependencies
-
-| Risk | Mitigation |
-|------|------------|
-| README grows too long | Cap at ~80 lines; defer deep content to ARCHITECTURE.md |
-| Commands become outdated | Cross-check against ARCHITECTURE.md before finalizing |
-
-## Documentation / Operational Notes
-
- This is the documentation change. No other docs need updating.
-
-## Sources & References
-
- Existing README: `README.md`
- Deep docs: `ARCHITECTURE.md`
- Code style: `CODE_STYLE.md`
--- a/docs/plans/2026-04-24-005-add-pyright-ci-plan.md
+++ b/docs/plans/2026-04-24-005-add-pyright-ci-plan.md
@ -1,100 +0,0 @@
---
-title: "feat: Add pyright type-checking to CI"
-type: feat
-status: active
-date: 2026-04-24
---
-
-# Add pyright Type-Checking to CI
-
-## Overview
-
-`pyright` is in dev dependencies but never runs in CI. Adding it to the pytest workflow (or as a separate job) would catch type errors before merge.
-
-## Problem Frame
-
- Type errors are only caught locally (if the developer runs pyright)
- No enforcement of type annotations in PRs
- CODE_STYLE.md encourages typing but CI doesn't verify
-
-## Requirements Trace
-
- R1. pyright runs on every push/PR
- R2. pyright uses the same version as pyproject.toml
- R3. CI fails on type errors
- R4. Initial run establishes baseline (no new errors introduced)
-
-## Scope Boundaries
-
-**Included:**
- Add pyright step to CI workflow
- Fix or suppress any existing type errors that block CI
-
-**Excluded:**
- Adding type annotations to untyped code (do that incrementally)
- Changing pyright configuration beyond CI setup
-
-## Implementation Units
-
- [ ] U1. **Add pyright CI job**
-
-**Goal:** Run pyright in GitHub Actions.
-
-**Requirements:** R1, R2, R3
-
-**Dependencies:** None
-
-**Files:**
- Modify: `.github/workflows/pytest.yml`
-
-**Approach:**
- Add a `pyright` job parallel to pytest
- Use `uv run pyright` (same version as local)
-
-**Test scenarios:**
- Happy path: Typed code passes pyright
- Error path: Type error fails the CI job
- Integration: pyright version matches pyproject.toml
-
-**Verification:**
- CI runs pyright successfully
-
---
-
- [ ] U2. **Establish baseline**
-
-**Goal:** Ensure CI passes on current code.
-
-**Requirements:** R4
-
-**Dependencies:** U1
-
-**Files:**
- Modify: Files with fixable type errors
- Modify: `pyproject.toml` (add suppressions for unfixable legacy issues)
-
-**Approach:**
- Run `uv run pyright` locally
- Fix trivial errors; suppress complex legacy ones with `# type: ignore` or pyrightconfig
- Document suppressions
-
-**Test scenarios:**
- Happy path: `uv run pyright` exits 0
-
-**Verification:**
- `uv run pyright` passes locally
- CI pyright job passes
-
---
-
-## Risks & Dependencies
-
-| Risk | Mitigation |
-|------|------------|
-| Many existing type errors | Fix batch-by-batch; don't block this PR on full cleanup |
-| pyright is slow in CI | Run in parallel with pytest; cache node_modules |
-
-## Sources & References
-
- `pyproject.toml` dev dependencies
- `.github/workflows/pytest.yml` (from P1-001)
--- a/docs/plans/2026-04-24-006-activate-pre-commit-hooks-plan.md
+++ b/docs/plans/2026-04-24-006-activate-pre-commit-hooks-plan.md
@ -1,146 +0,0 @@
---
-title: "feat: Activate pre-commit hooks (black, ruff, isort)"
-type: feat
-status: active
-date: 2026-04-24
---
-
-# Activate Pre-commit Hooks
-
-## Overview
-
-`.pre-commit-config.yaml` exists but is explicitly disabled ("intentionally minimal and does not enable hooks by installing them"). Activating black, ruff, and isort would enforce CODE_STYLE.md conventions automatically and eliminate style-only review comments.
-
-## Problem Frame
-
- Code style is documented in CODE_STYLE.md but not enforced automatically
- Contributors may submit PRs with inconsistent formatting
- Review time is spent on style nits instead of logic
- No CI check for formatting violations
-
-## Requirements Trace
-
- R1. Pre-commit hooks run black, ruff, and isort
- R2. Hooks are enforced in CI (fail build on violations)
- R3. Hooks use the same versions as pyproject.toml dev dependencies
- R4. Initial run reformats existing code without breaking tests
-
-## Scope Boundaries
-
-**Included:**
- Update `.pre-commit-config.yaml`
- Add CI workflow step for pre-commit
- Run initial format across codebase
-
-**Excluded:**
- Adding new linters or rules
- Changing CODE_STYLE.md conventions
- Fixing logic bugs found by ruff (separate PR)
-
-## Key Technical Decisions
-
- **Use pre-commit.ci or GitHub Action** — pre-commit.ci is zero-config but may not work on Gitea. Use a GitHub Actions step as fallback.
- **Single large format commit** — Run once, commit formatting changes separately from config changes so reviewers can see the diff.
- **Skip tests during format** — Formatting should not change behavior, but run tests after to verify.
-
-## Implementation Units
-
- [ ] U1. **Update .pre-commit-config.yaml**
-
-**Goal:** Enable black, ruff, and isort with versions matching pyproject.toml.
-
-**Requirements:** R1, R3
-
-**Dependencies:** None
-
-**Files:**
- Modify: `.pre-commit-config.yaml`
-
-**Approach:**
- Remove the "does not enable hooks" comment
- Add repos for black, ruff, isort with pinned versions
- Set ruff to match CODE_STYLE.md rules
- Configure isort profile (black-compatible)
-
-**Test scenarios:**
- Happy path: `pre-commit run --all-files` completes successfully
- Error path: A file with style violations fails the hook
- Integration: Versions match pyproject.toml dev deps
-
-**Verification:**
- `pre-commit run --all-files` runs without config errors
-
---
-
- [ ] U2. **Add pre-commit CI step**
-
-**Goal:** Block PRs that violate formatting rules.
-
-**Requirements:** R2
-
-**Dependencies:** U1
-
-**Files:**
- Modify: `.github/workflows/pytest.yml` (or create separate lint.yml)
-
-**Approach:**
- Add a job that runs `pre-commit run --all-files`
- Use the same uv setup as the pytest workflow
- Install pre-commit via uv
-
-**Test scenarios:**
- Happy path: Clean code passes pre-commit CI
- Error path: Violations fail the CI job
-
-**Verification:**
- Pushing a formatting violation fails the check
- Pushing clean code passes
-
---
-
- [ ] U3. **Run initial format across codebase**
-
-**Goal:** Bring all existing code into compliance so future PRs only touch their own changes.
-
-**Requirements:** R4
-
-**Dependencies:** U1
-
-**Files:**
- Modify: All Python files (mechanical reformatting)
-
-**Approach:**
- Run `pre-commit run --all-files`
- Commit formatting changes separately
- Run full test suite: `uv run pytest tests/ -q`
-
-**Execution note:** This is a mechanical change. Characterization tests should pass unchanged. If tests fail, the formatter broke something — investigate before committing.
-
-**Test scenarios:**
- Integration: All existing tests pass after formatting
- Edge case: No logic changes introduced by formatting
-
-**Verification:**
- `uv run pytest tests/ -q` passes
- `git diff` shows only whitespace/import changes
-
---
-
-## Risks & Dependencies
-
-| Risk | Mitigation |
-|------|------------|
-| Massive format commit obscures git blame | Use `.git-blame-ignore-revs` to ignore the format commit |
-| Ruff finds existing logic issues | Fix or suppress in separate PR; don't mix with activation |
-| Contributors without pre-commit installed | CI catches it; add setup note to README |
-
-## Documentation / Operational Notes
-
- Add pre-commit setup to README quickstart
- Document `.git-blame-ignore-revs` usage
-
-## Sources & References
-
- Config: `.pre-commit-config.yaml`
- Style guide: `CODE_STYLE.md`
- Dependencies: `pyproject.toml`
--- a/docs/plans/2026-04-24-007-replace-print-with-logging-plan.md
+++ b/docs/plans/2026-04-24-007-replace-print-with-logging-plan.md
@ -1,256 +0,0 @@
---
-title: "refactor: Replace print() calls with structured logging"
-type: refactor
-status: active
-date: 2026-04-24
---
-
-# Replace print() with Structured Logging
-
-## Overview
-
-There are approximately 225 `print()` calls across the codebase (database.py, api_client.py, scripts/, pipeline/). CODE_STYLE.md already recommends structured logging, but it is not consistently applied. This makes production debugging difficult — no log levels, no timestamps, no module context.
-
-## Problem Frame
-
- `print()` outputs are invisible in production logs or mixed with Streamlit UI
- No log levels (INFO, WARNING, ERROR) to filter noise
- No module names to identify which component logged what
- Ingestion and API errors are silently swallowed by broad except blocks
- Scripts produce unstructured output that is hard to parse or aggregate
-
-## Requirements Trace
-
- R1. Replace all `print()` calls with appropriate `logging` levels
- R2. Configure a project-wide logger with module-level naming
- R3. Preserve existing output behavior in Streamlit contexts (use `st.info`/`st.warning` where appropriate)
- R4. Update CODE_STYLE.md to mandate logging over print
- R5. All tests pass after migration
-
-## Scope Boundaries
-
-**Included:**
- database.py, api_client.py, summarizer.py, ai_provider.py
- pipeline/ modules (run_pipeline.py, svd_pipeline.py, text_pipeline.py, fusion.py)
- scripts/ (batch migration, one script at a time)
-
-**Excluded:**
- explorer.py Streamlit UI prints (these may be intentional UI feedback)
- app.py user-facing prints
- Third-party code
-
-## Key Technical Decisions
-
- **Use standard library `logging`** — no external dependency needed. If structlog is desired later, it wraps logging.
- **Module-level loggers** — `logger = logging.getLogger(__name__)` pattern
- **Root config in config.py** — basicConfig or dictConfig at app startup
- **Streamlit compatibility** — In Streamlit contexts, logging to stderr still works; replace intentional UI prints with `st.*` calls
-
-## Context & Research
-
-### Relevant Code and Patterns
-
- `database.py` — prints in insert/update paths, ~50+ prints
- `api_client.py` — prints in fetch/pagination logic
- `scripts/` — 22 scripts, many with progress prints
- `CODE_STYLE.md` — already recommends structured logging
-
-### Institutional Learnings
-
- `docs/solutions/best-practices/working-tree-hygiene-dependency-groups-and-gitignore-2026-04-24.md` — mechanical changes should be verified with full test suite
-
-## Implementation Units
-
- [ ] U1. **Set up logging configuration and test harness**
-
-**Goal:** Create the logging infrastructure and tests before touching any print statements.
-
-**Requirements:** R2
-
-**Dependencies:** None
-
-**Files:**
- Modify: `config.py`
- Create: `tests/test_logging_config.py`
-
-**Approach:**
- Add `configure_logging(level=logging.INFO)` to config.py
- Use standard format: `%(asctime)s - %(name)s - %(levelname)s - %(message)s`
- Create test that verifies logger hierarchy and formatting
-
-**Execution note:** Test-first — write `test_logging_config.py` before any implementation.
-
-**Test scenarios:**
- Happy path: `configure_logging()` sets up root logger with correct format
- Happy path: Module logger `logging.getLogger("database")` inherits level
- Edge case: Calling configure_logging twice is idempotent
-
-**Verification:**
- `uv run pytest tests/test_logging_config.py -v` passes
-
---
-
- [ ] U2. **Migrate database.py prints to logging**
-
-**Goal:** Replace all print() calls in database.py with logger calls.
-
-**Requirements:** R1, R5
-
-**Dependencies:** U1
-
-**Files:**
- Modify: `database.py`
- Modify: `tests/test_database_audit.py` (if it checks output)
-
-**Approach:**
- Add `logger = logging.getLogger(__name__)` at module level
- Replace progress prints with `logger.info()`
- Replace error/warning prints with `logger.warning()` / `logger.error()`
- Keep behavior identical (same messages)
-
-**Execution note:** Test-first — write a test that asserts `caplog` captures a database log message before changing any code.
-
-**Test scenarios:**
- Happy path: `caplog` captures `logger.info` during motion insert
- Error path: `caplog` captures `logger.error` on DB failure
- Edge case: No prints leak to stdout (use capsys to verify)
-
-**Verification:**
- `grep -n "print(" database.py` returns nothing (or only intentional UI prints)
- `uv run pytest tests/test_database_audit.py -v` passes
-
---
-
- [ ] U3. **Migrate api_client.py prints to logging**
-
-**Goal:** Replace all print() calls in api_client.py with logger calls.
-
-**Requirements:** R1, R5
-
-**Dependencies:** U1
-
-**Files:**
- Modify: `api_client.py`
- Modify: `tests/test_api_client.py` (create if missing)
-
-**Approach:**
- Same pattern as U2: module logger, map prints to levels
- API pagination progress → `logger.info`
- Rate limit / retry messages → `logger.warning`
-
-**Execution note:** Test-first — characterize current behavior with a capsys test, then migrate.
-
-**Test scenarios:**
- Happy path: API fetch logs pagination progress at INFO level
- Error path: Failed request logs at ERROR level
- Integration: Log output includes module name (`api_client`)
-
-**Verification:**
- `grep -n "print(" api_client.py` returns nothing
- Existing API tests pass
-
---
-
- [ ] U4. **Migrate pipeline modules**
-
-**Goal:** Replace prints in pipeline/ with logging.
-
-**Requirements:** R1, R5
-
-**Dependencies:** U1, U2 (for database.py patterns to follow)
-
-**Files:**
- Modify: `pipeline/run_pipeline.py`, `pipeline/svd_pipeline.py`, `pipeline/text_pipeline.py`, `pipeline/fusion.py`
-
-**Approach:**
- Batch migration of 4 files
- Progress bars / step completion → `logger.info`
- Warnings about missing data → `logger.warning`
-
-**Test scenarios:**
- Happy path: Pipeline run emits structured logs for each stage
- Error path: Missing embeddings logged at WARNING, not silently skipped
-
-**Verification:**
- `grep -rn "print(" pipeline/` returns nothing
- Pipeline tests pass
-
---
-
- [ ] U5. **Migrate scripts/ batch**
-
-**Goal:** Replace prints in scripts/ with logging.
-
-**Requirements:** R1, R5
-
-**Dependencies:** U1
-
-**Files:**
- Modify: `scripts/*.py` (batch, mechanical)
-
-**Approach:**
- Script-level loggers: `logger = logging.getLogger("scripts.drift_analysis")`
- CLI progress prints → `logger.info`
- Results summary prints → `logger.info` (or keep as print if they are actual CLI output)
-
-**Execution note:** Some scripts may legitimately be CLI tools where stdout output is the product. Only migrate diagnostic/progress prints; keep `print(json.dumps(result))` style outputs.
-
-**Test scenarios:**
- Happy path: Script progress is logged, result output is preserved
- Edge case: Scripts that parse their own output still work
-
-**Verification:**
- Scripts that produce machine-readable output still do so
- `uv run pytest tests/scripts/ -q` passes
-
---
-
- [ ] U6. **Update CODE_STYLE.md and add lint rule**
-
-**Goal:** Prevent new print() calls from being introduced.
-
-**Requirements:** R4
-
-**Dependencies:** U1–U5
-
-**Files:**
- Modify: `CODE_STYLE.md`
- Modify: `.pre-commit-config.yaml` (add ruff rule for print)
-
-**Approach:**
- Add "Use logging, not print" section to CODE_STYLE.md
- Add ruff rule: `T201` (print found) to enforce
-
-**Test expectation:** none — documentation and config change.
-
-**Verification:**
- `ruff check .` fails if any new print() is added
-
---
-
-## System-Wide Impact
-
- **Interaction graph:** All modules that previously printed to stdout now use logging handlers
- **Error propagation:** Logging does not change exception flow, but error messages are now timestamped and leveled
- **State lifecycle risks:** None — logging is side-effect-only
- **Unchanged invariants:** All existing behavior preserved; only output channel changes
-
-## Risks & Dependencies
-
-| Risk | Mitigation |
-|------|------------|
-| Missing a print() call | Use `grep -rn "print(" --include="*.py"` as final check |
-| Streamlit UI breaks from missing prints | Identify and convert intentional UI prints to `st.info` first |
-| Tests that assert on stdout break | Update to use `caplog` fixture |
-| Scripts that pipe their own output | Keep result prints; only migrate diagnostic prints |
-
-## Documentation / Operational Notes
-
- Update CODE_STYLE.md logging section
- Consider adding a logging configuration section to ARCHITECTURE.md
-
-## Sources & References
-
- CODE_STYLE.md logging guidance
- Python logging docs: https://docs.python.org/3/library/logging.html
- Existing prints: `grep -rn "print(" --include="*.py" .`
--- a/docs/plans/2026-04-24-009-pipeline-health-checks-plan.md
+++ b/docs/plans/2026-04-24-009-pipeline-health-checks-plan.md
@ -1,264 +0,0 @@
---
-title: "feat: Pipeline health checks and observability"
-type: feat
-status: active
-date: 2026-04-24
---
-
-# Pipeline Health Checks and Observability
-
-## Overview
-
-There is no automated way to verify pipeline health. A broken API client, stale embeddings, or an SVD axis flip could go unnoticed until a user reports it. A health check script plus a lightweight dashboard would surface problems proactively.
-
-## Problem Frame
-
- No visibility into whether the last pipeline run succeeded
- No alerting when motion count drops unexpectedly
- No detection when SVD components flip or drift
- No visibility into embedding coverage (% of motions with embeddings)
- LLM enrichment failures are silent (motions just lack layman_explanation)
-
-## Requirements Trace
-
- R1. Health check script verifies: API reachable, DB has recent motions, embeddings cover >X% of motions
- R2. Health check detects SVD stability (no sudden axis flips)
- R3. Health check reports missing layman_explanations
- R4. Optional: Streamlit page or API endpoint showing health metrics
- R5. All health checks are testable and tested
-
-## Scope Boundaries
-
-**Included:**
- Health check module with individual check functions
- CLI runner for health checks
- Tests for each check
- Optional Streamlit health page
-
-**Excluded:**
- Real alerting (PagerDuty, Slack) — just script exit codes for now
- Long-term metrics storage (Prometheus, etc.)
- Fixing the issues the health check finds
-
-## Key Technical Decisions
-
- **Pure functions for checks** — Each check is a function that takes DB/config and returns (status, message, details). This makes them testable without side effects.
- **Composable runner** — A runner executes all checks and aggregates results into a report.
- **Exit codes** — 0 = all healthy, 1 = any warning, 2 = any critical. Suitable for cron/CI.
-
-## Context & Research
-
-### Relevant Code and Patterns
-
- `pipeline/run_pipeline.py` — orchestrates all pipeline stages
- `database.py` — DB queries for motion counts, embeddings, vote counts
- `analysis/svd_labels.py` — SVD component stability logic
- `scripts/` — existing diagnostic scripts (drift analysis, etc.)
-
-### Institutional Learnings
-
- `docs/solutions/workflow-issues/trajectories-diagnostic-false-alarm-2026-03-31.md` — diagnostic scripts can produce false alarms if they don't verify against canonical DB state
- `docs/solutions/best-practices/blog-numbers-from-pipeline-outputs-2026-04-16.md` — metrics must be derived from canonical sources, not hardcoded
-
-## Implementation Units
-
- [ ] U1. **Create health check core module**
-
-**Goal:** Define the check interface and runner.
-
-**Requirements:** R1–R3 foundation
-
-**Dependencies:** None
-
-**Files:**
- Create: `health/__init__.py`
- Create: `health/core.py`
- Create: `health/checks.py`
- Create: `tests/test_health_core.py`
-
-**Approach:**
- `HealthStatus` enum: OK, WARNING, CRITICAL
- `HealthCheck` dataclass: name, status, message, details
- `run_checks(checks)` → `HealthReport` with aggregate status
- `check_*` functions are pure: accept data, return HealthCheck
-
-**Execution note:** Test-first — write `test_health_core.py` with failing tests for the interface before implementing.
-
-**Test scenarios:**
- Happy path: All OK checks → report status OK
- Error path: One CRITICAL check → report status CRITICAL
- Edge case: Empty check list → report status OK
- Integration: Check function signature is pure (no DB access in core)
-
-**Verification:**
- `uv run pytest tests/test_health_core.py -v` passes
-
---
-
- [ ] U2. **Implement data freshness checks**
-
-**Goal:** Verify the DB has recent motions and votes.
-
-**Requirements:** R1
-
-**Dependencies:** U1
-
-**Files:**
- Modify: `health/checks.py`
- Create: `tests/test_health_checks.py`
-
-**Approach:**
- `check_motion_freshness(db, max_age_days=7)` — count motions newer than threshold
- `check_vote_coverage(db)` — % of motions with votes
- `check_embedding_coverage(db, min_coverage=0.95)` — % of motions with fused embeddings
-
-**Execution note:** Test-first — use mocked DB or test fixtures with known data.
-
-**Test scenarios:**
- Happy path: Recent motions exist, coverage > 95% → OK
- Warning path: Motions are 10 days old → WARNING
- Critical path: No motions in last 30 days → CRITICAL
- Edge case: Empty database → CRITICAL with clear message
-
-**Verification:**
- Tests pass with mocked database
- Manual run against real DB produces accurate report
-
---
-
- [ ] U3. **Implement SVD stability check**
-
-**Goal:** Detect if SVD components have flipped or drifted significantly.
-
-**Requirements:** R2
-
-**Dependencies:** U1, U2
-
-**Files:**
- Modify: `health/checks.py`
- Modify: `tests/test_health_checks.py`
-
-**Approach:**
- `check_svd_stability(db, reference_themes)` — compare current SVD_THEMES to canonical config
- `check_axis_flip(db)` — verify right-wing parties are on the right side (reuse existing validation logic)
- Use `analysis/config.py` SVD_THEMES as canonical reference
-
-**Execution note:** Test-first — mock the DB to return known SVD components and test flip detection.
-
-**Test scenarios:**
- Happy path: SVD components match canonical themes → OK
- Warning path: Minor label drift → WARNING
- Critical path: Axis flip detected (right-wing parties on left) → CRITICAL
- Edge case: No SVD data in DB → CRITICAL
-
-**Verification:**
- Tests pass
- Manual verification against real DB confirms no false alarms
-
---
-
- [ ] U4. **Implement LLM enrichment check**
-
-**Goal:** Surface motions missing layman explanations.
-
-**Requirements:** R3
-
-**Dependencies:** U1, U2
-
-**Files:**
- Modify: `health/checks.py`
- Modify: `tests/test_health_checks.py`
-
-**Approach:**
- `check_llm_coverage(db, max_missing=100)` — count motions without layman_explanation
- `check_llm_quality(db)` — spot-check a sample of explanations for non-empty, reasonable length
-
-**Test scenarios:**
- Happy path: <5% missing explanations → OK
- Warning path: 5–15% missing → WARNING
- Critical path: >15% missing → CRITICAL
- Edge case: All explanations are empty strings → WARNING
-
-**Verification:**
- Tests pass with mocked data
-
---
-
- [ ] U5. **Create CLI runner**
-
-**Goal:** Run all checks from command line with appropriate exit codes.
-
-**Requirements:** R1–R4
-
-**Dependencies:** U1–U4
-
-**Files:**
- Create: `scripts/health_check.py`
- Create: `tests/scripts/test_health_check.py`
-
-**Approach:**
- `python scripts/health_check.py` → prints report, exits 0/1/2
- Optional flags: `--check motion-freshness`, `--format json`, `--threshold-days 7`
-
-**Test scenarios:**
- Happy path: All OK → exit 0, human-readable output
- Error path: One warning → exit 1
- Critical path: One critical → exit 2
- Edge case: JSON format outputs valid JSON
-
-**Verification:**
- `uv run python scripts/health_check.py` runs without error
- Exit codes match expectations
-
---
-
- [ ] U6. **Add Streamlit health page (optional)**
-
-**Goal:** Visual health dashboard in the app.
-
-**Requirements:** R4
-
-**Dependencies:** U1–U5
-
-**Files:**
- Create: `pages/3_Health.py`
-
-**Approach:**
- Run all checks on page load
- Display: overall status, motion count, embedding coverage, SVD status, LLM coverage
- Use `st.metric` for key numbers
- Color-code: green/yellow/red
-
-**Test expectation:** none — Streamlit page, tested manually.
-
-**Verification:**
- Page loads without error
- Metrics update when DB changes
-
---
-
-## System-Wide Impact
-
- **Interaction graph:** Health checks read from DB but do not write. Safe to run concurrently with pipeline.
- **Error propagation:** Check failures are captured in report, not raised as exceptions.
- **Unchanged invariants:** No changes to pipeline, DB schema, or UI behavior.
-
-## Risks & Dependencies
-
-| Risk | Mitigation |
-|------|------------|
-| False alarms (like trajectories diagnostic) | Verify against canonical DB state, not intermediary artifacts |
-| Slow checks on large DB | Add query timeouts; cache results |
-| Check drift from codebase changes | Health checks are tested; tests fail if logic breaks |
-
-## Documentation / Operational Notes
-
- Add health check to deployment runbook (run before/after pipeline)
- Consider scheduling in CI or cron
-
-## Sources & References
-
- `docs/solutions/workflow-issues/trajectories-diagnostic-false-alarm-2026-03-31.md`
- `analysis/config.py` — canonical SVD themes
- `database.py` — DB schema and queries
- `docs/solutions/best-practices/blog-numbers-from-pipeline-outputs-2026-04-16.md`
--- a/docs/plans/2026-04-24-ROADMAP-stemwijzer-improvements.md
+++ b/docs/plans/2026-04-24-ROADMAP-stemwijzer-improvements.md
@ -1,131 +0,0 @@
-# Stemwijzer Improvement Roadmap
-
-**Created:** 2026-04-24
-**Status:** Active
-
-This roadmap captures 17 improvement opportunities identified during a codebase review, organized into 5 phases by dependency and risk. Each item links to a detailed implementation plan (created separately) when available.
-
---
-
-## Phase 1: Foundation (CI/CD, Config, Documentation)
-
-*Prerequisite for everything else. Low risk, high developer-experience impact.*
-
-| # | Improvement | Priority | Effort | Plan | TDD |
-|---|------------|----------|--------|------|-----|
-| 1 | Fix broken CI test workflow (mindmodel-schedule.yml references missing requirements.txt) | High | Small | P1-001 | Yes |
-| 2 | ~~Fix docker-compose.yml (missing scheduler.py)~~ — *Removed: Docker deployment not used* | — | — | — | — |
-| 3 | Consolidate duplicate config sources (config.py vs analysis/config.py) | Medium | Small | P1-003 | Yes |
-| 4 | Rewrite README.md (22 lines → proper quickstart) | High | Small | P1-004 | No (docs) |
-| 5 | Add pyright type-checking to CI | Medium | Small | P1-005 | Yes |
-| 6 | Activate pre-commit hooks (black, ruff, isort) | Medium | Small | P1-006 | Yes |
-
-**Phase goal:** Reliable CI, clean config, and onboarding docs that don't require discovering ARCHITECTURE.md.
-
---
-
-## Phase 2: Code Quality (Logging, Error Handling, Import Safety)
-
-*Builds on Phase 1 CI. Makes the codebase maintainable and production-ready.*
-
-| # | Improvement | Priority | Effort | Plan | TDD |
-|---|------------|----------|--------|------|-----|
-| 7 | Replace ~225 print() calls with structured logging | Medium | Medium | P2-001 | Yes |
-| 8 | Fix broad `except Exception:` blocks in database.py and api_client.py | Medium | Medium | P2-002 | Yes |
-| 9 | Complete import-safe module guards (extend existing work) | Medium | Medium | — | Yes |
-
-**Phase goal:** Observable, debuggable production behavior with clear error propagation.
-
---
-
-## Phase 3: Architecture (Decompose explorer.py)
-
-*Already partially completed. Remaining work is decoupling Streamlit from tab logic.*
-
-| # | Improvement | Priority | Effort | Plan | TDD |
-|---|------------|----------|--------|------|-----|
-| 10 | Complete explorer.py decomposition (extract tab logic from Streamlit) | Medium | Large | P3-001 | Yes |
-
-**Status:** Constants extracted to analysis/config.py, placeholder tab modules created. Remaining: move build_*_tab functions out of explorer.py while preserving @st.cache_data decorators.
-
-**Phase goal:** explorer.py under 1500 lines, tab modules independently testable.
-
---
-
-## Phase 4: New Features
-
-*User-facing value. Depends on Phase 2 for observability and Phase 3 for clean architecture.*
-
-| # | Improvement | Priority | Effort | Plan | TDD |
-|---|------------|----------|--------|------|-----|
-| 11 | REST API layer (read-only, FastAPI) | Low | Large | P4-001 | Yes |
-| 12 | Automated pipeline scheduling (real scheduler.py) | Medium | Medium | P4-002 | Yes |
-| 13 | Motion recommendation engine | Low | Medium | P4-003 | Yes |
-| 14 | Export user voting profile (JSON/CSV/shareable image) | Low | Small | P4-004 | Yes |
-| 15 | Data quality dashboard (Streamlit page or API) | Medium | Medium | P4-005 | Yes |
-
-**Phase goal:** External API consumers, automated data freshness, and user engagement features.
-
---
-
-## Phase 5: Observability & Robustness
-
-*Production confidence. Can run in parallel with Phase 4.*
-
-| # | Improvement | Priority | Effort | Plan | TDD |
-|---|------------|----------|--------|------|-----|
-| 16 | Add Sentry or error tracking | Low | Small | P5-001 | No (config) |
-| 17 | Pipeline health checks / alerting script | Medium | Medium | P5-002 | Yes |
-| 18 | Benchmark suite (pytest-benchmark for SVD/fusion) | Low | Small | P5-003 | Yes |
-
-**Phase goal:** Know when things break before users do; detect performance regressions.
-
---
-
-## Dependency Graph
-
-```
-Phase 1 (Foundation)
-  ├─→ Phase 2 (Code Quality) ─┬─→ Phase 3 (Architecture)
-  │                           │        └─→ Phase 4 (Features)
-  │                           └──────────────→ Phase 4 (Features)
-  └─→ Phase 5 (Observability) ───────────────→
-```
-
-Phase 1 must come first. Phase 2 makes Phase 3/4 safer. Phase 3 unlocks some Phase 4 items. Phase 5 is largely independent.
-
---
-
-## Recommended Execution Order
-
-**Sprint 1:** Items 1, 4, 6 (CI + docs + pre-commit)
-**Sprint 2:** Items 5, 7, 8 (type checking + logging + errors)
-**Sprint 3:** Item 10 (explorer decomposition)
-**Sprint 4:** Items 12, 15, 17 (pipeline automation + health checks)
-**Sprint 5+:** Items 11, 13, 14, 16, 18 (API + features + observability)
-
---
-
-## Plan Document Inventory
-
-| Plan ID | File | Status |
-|---------|------|--------|
-| P1-001 | docs/plans/2026-04-24-001-fix-ci-test-workflow-plan.md | Planned |
-| P1-002 | ~~docs/plans/2026-04-24-002-fix-docker-compose-scheduler-plan.md~~ | Removed |
-| P1-003 | docs/plans/2026-04-24-003-consolidate-config-sources-plan.md | Planned |
-| P1-004 | docs/plans/2026-04-24-004-rewrite-readme-plan.md | Planned |
-| P1-005 | docs/plans/2026-04-24-005-add-pyright-ci-plan.md | Planned |
-| P1-006 | docs/plans/2026-04-24-006-activate-pre-commit-hooks-plan.md | Planned |
-| P2-001 | docs/plans/2026-04-24-007-replace-print-with-logging-plan.md | Planned |
-| P2-002 | docs/plans/2026-04-24-008-fix-broad-exception-handling-plan.md | Planned |
-| P3-001 | docs/plans/2026-04-04-003-refactor-complete-explorer-decomposition-plan.md | In progress |
-| P4-005 | docs/plans/2026-04-24-009-pipeline-health-checks-plan.md | Planned |
-| P5-002 | docs/plans/2026-04-24-010-pipeline-health-checks-plan.md | Planned |
-
---
-
-## Notes
-
- All implementation plans use TDD (test-first) for code-bearing units.
- Config-only units (README) skip TDD but include verification checklists.
- Existing plans (e.g., explorer decomposition) are referenced rather than duplicated.
--- a/docs/plans/2026-05-01-001-scripts-audit-cleanup-plan.md
+++ b/docs/plans/2026-05-01-001-scripts-audit-cleanup-plan.md
@ -1,137 +0,0 @@
---
-title: Scripts Directory Audit and Cleanup Plan
-type: refactor
-status: active
-date: 2026-05-01
---
-
-# Scripts Directory Audit and Cleanup Plan
-
-## Overview
-
-The `scripts/` directory contains 20 Python files (~4,900 lines total). Many are one-off diagnostics, research utilities, or data backfill scripts from early pipeline development. Several are no longer needed, some generate outputs to now-deleted directories, and a few have overlapping functionality. This plan establishes a clear taxonomy and cleanup path.
-
---
-
-## Current Inventory
-
-| Script | Lines | Last Commit | References | Status |
-|--------|-------|-------------|------------|--------|
-| `download_past_year.py` | 295 | 2026-04-30 | 11 | **Keep** — Active data ingestion |
-| `health_check.py` | 98 | 2026-05-01 | 21 | **Keep** — Active health check CLI |
-| `validate_svd_themes.py` | 343 | 2026-04-30 | 13 | **Keep** — Active validation |
-| `generate_svd_json.py` | 594 | 2026-04-13 | 12 | **Keep** — Generates `thoughts/explorer/top_svd_top_motions.json` |
-| `motion_drift.py` | 1,207 | 2026-04-05 | 42 | **Keep** — Referenced in active plans |
-| `sync_motion_content.py` | 704 | 2026-03-23 | 8 | **Keep** — Content enrichment pipeline |
-| `rerun_embeddings.py` | 233 | 2026-03-23 | 15 | **Keep** — Embedding rebuild utility |
-| `derive_svd_labels.py` | 423 | 2026-04-13 | 5 | **Keep** — SVD label derivation |
-| `diagnose_trajectories_cli.py` | 234 | 2026-03-31 | 5 | **Keep** — Diagnostic utility |
-| `svd_diagnostics.py` | 214 | 2026-03-22 | 9 | **Keep** — SVD diagnostics |
-| `recompute_svd.py` | 172 | 2026-04-16 | 2 | **Archive** — One-off recompute |
-| `semantic_gravity_examples.py` | 286 | 2026-04-05 | 6 | **Archive** — Research script |
-| `qa_similarity.py` | 150 | 2026-03-23 | 4 | **Archive** — QA script (references deleted `thoughts/ledgers/`) |
-| `fill_mp_votes_parties.py` | 277 | 2026-03-22 | 2 | **Archive** — Backfill script |
-| `inspect_axis.py` | 137 | 2026-03-22 | 3 | **Archive** — Diagnostic |
-| `compare_svd_exclude_parties.py` | 204 | 2026-03-22 | 1 | **Archive** — Diagnostic |
-| `generate_compass.py` | 157 | 2026-03-22 | 2 | **Archive** — Generates to deleted `outputs/` |
-| `compute_test_batch.py` | 128 | 2026-03-20 | 3 | **Archive** — Test batch |
-| `generate_extra_charts.py` | 172 | 2026-03-22 | 0 | **Delete** — Generates to deleted `outputs/`, 0 references |
-
---
-
-## Categorization Rules
-
-### Keep (10 scripts)
-Scripts that are:
- Imported or invoked by active code/tests
- Referenced in active plans (docs/plans/)
- Run regularly as part of pipeline or diagnostics
- Updated recently (April 2026+)
-
-### Archive (9 scripts)
-Scripts that are:
- One-off diagnostics or backfill utilities
- Research/exploration scripts with no active plan references
- Superseded by pipeline code but kept for historical reference
- Generate outputs to `outputs/` (deleted) or `thoughts/ledgers/` (deleted)
-
-**Archive location:** `scripts/archive/` — not imported, not tested, preserved for reference.
-
-### Delete (1 script)
-Scripts that are:
- Completely orphaned (0 references)
- Superseded with no unique value
- Generate outputs to non-existent directories
-
---
-
-## Implementation Units
-
- [ ] U1. **Create `scripts/archive/` directory**
-  - Files: `scripts/archive/` (new directory)
-  - Verification: Directory exists
-
- [ ] U2. **Move archive scripts to `scripts/archive/`**
-  - Files to move:
-    - `scripts/recompute_svd.py`
-    - `scripts/semantic_gravity_examples.py`
-    - `scripts/qa_similarity.py`
-    - `scripts/fill_mp_votes_parties.py`
-    - `scripts/inspect_axis.py`
-    - `scripts/compare_svd_exclude_parties.py`
-    - `scripts/generate_compass.py`
-    - `scripts/compute_test_batch.py`
-  - Verification: Scripts are in `scripts/archive/`, not in `scripts/`
-
- [ ] U3. **Delete orphaned scripts**
-  - Files to delete:
-    - `scripts/generate_extra_charts.py`
-  - Verification: File no longer exists
-
- [ ] U4. **Update `.gitignore` for archive**
-  - Add: `scripts/archive/` (optional — if we don't want to track archived scripts)
-  - Or add README in archive explaining purpose
-  - Verification: Archive is handled appropriately
-
- [ ] U5. **Run test suite**
-  - Command: `uv run pytest tests/ -q`
-  - Verification: All tests pass, no import errors from moved scripts
-
---
-
-## Risks
-
-| Risk | Mitigation |
-|------|-----------|
-| A test imports an archived script | Check all test imports before moving |
-| A plan references an archived script | Plans already checked — none reference archive candidates exclusively |
-| Future need for archived script | Git history preserves everything; archive is just convenience |
-
---
-
-## Post-Cleanup State
-
-```
-scripts/
-├── archive/              # 8 archived scripts (reference only)
-│   ├── compare_svd_exclude_parties.py
-│   ├── compute_test_batch.py
-│   ├── fill_mp_votes_parties.py
-│   ├── generate_compass.py
-│   ├── inspect_axis.py
-│   ├── qa_similarity.py
-│   ├── recompute_svd.py
-│   └── semantic_gravity_examples.py
-├── download_past_year.py
-├── health_check.py
-├── derive_svd_labels.py
-├── diagnose_trajectories_cli.py
-├── generate_svd_json.py
-├── motion_drift.py
-├── rerun_embeddings.py
-├── sync_motion_content.py
-├── svd_diagnostics.py
-└── validate_svd_themes.py
-```
-
-**Result:** 10 active scripts + 8 archived. ~1,700 lines removed from active directory.
--- a/docs/plans/2026-05-01-002-agent-native-architecture-plan.md
+++ b/docs/plans/2026-05-01-002-agent-native-architecture-plan.md
@ -1,233 +0,0 @@
---
-title: Agent-Native Architecture Plan for Stemwijzer
-type: refactor
-status: active
-date: 2026-05-01
-origin: STRATEGY.md (agent-native architecture track)
---
-
-# Agent-Native Architecture Plan for Stemwijzer
-
-## Overview
-
-Stemwijzer is a data-heavy analytical application with three surfaces: a Streamlit voting UI, a data pipeline (OData ingestion → DuckDB → SVD/embedding computation), and an analytics explorer. The agent-native architecture track aims to make every operation an agent can perform as capable as a human operator—whether that's running the pipeline, diagnosing drift, or answering research questions about parliamentary voting patterns.
-
-**Current state:** The codebase is human-operated. Scripts are run manually, pipeline status is checked by eye, and analysis requires writing Python/DuckDB queries.
-
-**Target state:** An agent with access to atomic primitives can run the pipeline, diagnose issues, generate reports, and answer open-ended questions about the data—operating in a loop until outcomes are achieved.
-
---
-
-## Problem Frame
-
- **Pipeline operators** need to know when data is stale, why SVD vectors look wrong, or whether the similarity cache is healthy. Currently this requires manually running scripts and interpreting output.
- **Analysts/researchers** want to ask questions like "Which parties shifted most on economic axes between 2020 and 2024?" Currently this requires writing DuckDB queries and Python analysis code.
- **Developers** need to understand pipeline state, verify data integrity, and troubleshoot ingestion issues. Currently this requires reading logs and running diagnostics manually.
- **Content maintainers** need to verify SVD labels match actual voting patterns, check motion coverage, and validate layman explanations. Currently ad-hoc.
-
---
-
-## Requirements Trace
-
- R1. The agent can achieve anything a pipeline operator can achieve (parity)
- R2. The agent can answer open-ended analytical questions about parliamentary data (emergent capability)
- R3. The agent can diagnose pipeline health and suggest remediation (self-service operations)
- R4. The agent can generate and validate content (SVD labels, motion summaries)
- R5. New capabilities can be added by writing prompts, not code (composability)
-
---
-
-## Scope Boundaries
-
- **In scope:** Agent primitives for data operations, pipeline control, analysis, and diagnostics
- **Deferred:** Real-time agent UI inside Streamlit (future phase—add chat interface to explorer)
- **Deferred:** Autonomous pipeline scheduling (scheduler.py exists but agent control is v2)
- **Not working on:** Natural language to SQL for end users (this plan targets agent operators, not voter-facing features)
-
---
-
-## Key Technical Decisions
-
- **Files as universal interface:** DuckDB is already file-based (`data/motions.db`). The agent's workspace is the repo itself. Logs, reports, and analysis outputs are files the agent writes and the human reads.
- **Database tools over file tools for structured data:** For querying motions, votes, and embeddings, the agent needs `query_database` primitives that wrap DuckDB/SQL, not raw file operations.
- **Pipeline as state machine:** The pipeline has discrete stages (ingestion → vote extraction → SVD → text embeddings → fusion → similarity). The agent needs stage-aware tools, not just "run everything."
- **Shared workspace:** Agent and human operate on the same `data/motions.db`, the same `thoughts/explorer/` outputs, the same `docs/solutions/` knowledge base.
-
---
-
-## Implementation Units
-
- [ ] U1. **Database query primitives**
-  - **Goal:** Give the agent structured access to the DuckDB database
-  - **Requirements:** R1, R2, R4
-  - **Dependencies:** None
-  - **Files:**
-    - Create: `agent_tools/database.py`
-    - Test: `tests/agent_tools/test_database_tools.py`
-  - **Approach:** Wrap DuckDB queries as atomic tools:
-    - `query_motions(filter, limit, order)` → returns motion rows as JSON
-    - `query_votes(motion_id, party)` → returns vote counts
-    - `query_svd_vectors(window_id, entity_type)` → returns vectors
-    - `query_party_positions(window_id)` → returns party axis scores
-    - `query_pipeline_status()` → returns freshness metrics from health checks
-  - **Patterns to follow:** `health/checks.py` already has DB query patterns; `analysis/explorer_data.py` has read-only query patterns
-  - **Test scenarios:**
-    - Happy path: query returns valid JSON for known filters
-    - Edge case: empty result set returns `[]` not error
-    - Error path: invalid SQL/filter returns structured error with suggestion
-  - **Verification:** Agent can answer "How many motions in 2024?" using only the tool
-
- [ ] U2. **Pipeline control primitives**
-  - **Goal:** Let the agent run, monitor, and diagnose pipeline stages
-  - **Requirements:** R1, R3
-  - **Dependencies:** U1
-  - **Files:**
-    - Create: `agent_tools/pipeline.py`
-    - Test: `tests/agent_tools/test_pipeline_tools.py`
-  - **Approach:** Stage-aware pipeline tools:
-    - `pipeline_run_stage(stage, window_id, dry_run)` → runs one stage, returns status
-    - `pipeline_run_full(dry_run)` → orchestrates all stages with dependency ordering
-    - `pipeline_check_health()` → returns health report (reuses `health/` module)
-    - `pipeline_get_logs(stage, lines)` → returns recent logs for a stage
-    - `pipeline_validate_output(stage)` → checks output exists and looks reasonable
-  - **Patterns to follow:** `pipeline/run_pipeline.py` has the stage orchestration; `scripts/health_check.py` has the CLI pattern
-  - **Test scenarios:**
-    - Happy path: dry-run returns planned actions without executing
-    - Integration: running `pipeline_run_stage("svd", "2024")` produces expected `svd_vectors` rows
-    - Error path: running a stage with missing dependencies returns clear error
-  - **Verification:** Agent can diagnose "Why are SVD vectors stale?" by checking health, reading logs, and suggesting which stage to re-run
-
- [ ] U3. **Analysis and report generation primitives**
-  - **Goal:** Let the agent perform analytical tasks and write reports
-  - **Requirements:** R2, R4
-  - **Dependencies:** U1
-  - **Files:**
-    - Create: `agent_tools/analysis.py`
-    - Create: `agent_tools/reports.py`
-    - Test: `tests/agent_tools/test_analysis_tools.py`
-  - **Approach:**
-    - `analyze_party_shift(party, window_start, window_end, metric)` → computes and returns shift data
-    - `analyze_axis_stability(component, windows)` → returns stability scores
-    - `generate_report(type, parameters, output_path)` → writes markdown report to `reports/`
-    - `validate_svd_labels(component)` → compares theme labels to actual party positions
-  - **Patterns to follow:** `analysis/political_axis.py`, `scripts/motion_drift.py`, `scripts/validate_svd_themes.py`
-  - **Test scenarios:**
-    - Happy path: `analyze_party_shift` returns structured data for known party
-    - Integration: `generate_report("drift", {windows: ["2020", "2024"]})` produces valid markdown
-    - Edge case: requesting analysis for nonexistent window returns empty result
-  - **Verification:** Agent can answer "Which parties shifted most on economic axes?" by running analysis and summarizing results
-
- [ ] U4. **Content validation primitives**
-  - **Goal:** Let the agent validate and suggest content improvements
-  - **Requirements:** R4
-  - **Dependencies:** U1, U3
-  - **Files:**
-    - Create: `agent_tools/content.py`
-    - Test: `tests/agent_tools/test_content_tools.py`
-  - **Approach:**
-    - `validate_motion_coverage(start_date, end_date)` → returns coverage gaps
-    - `validate_layman_explanations(sample_size)` → samples motions, checks explanation quality
-    - `suggest_svd_label(component, top_n_motions)` → analyzes top motions, suggests label
-    - `check_embedding_quality(window_id)` → returns coverage stats for fused embeddings
-  - **Patterns to follow:** `summarizer.py` for explanation logic; `scripts/validate_svd_themes.py` for theme validation
-  - **Test scenarios:**
-    - Happy path: `validate_motion_coverage` returns accurate gap list
-    - Edge case: all motions covered returns empty gaps
-  - **Verification:** Agent can run weekly content quality checks and produce a report
-
- [ ] U5. **System prompt and context injection**
-  - **Goal:** Define agent behavior and inject runtime context
-  - **Requirements:** R1, R2, R3, R4, R5
-  - **Dependencies:** U1-U4
-  - **Files:**
-    - Create: `agent_tools/SYSTEM_PROMPT.md`
-    - Create: `agent_tools/context.py`
-  - **Approach:**
-    - `SYSTEM_PROMPT.md`: Defines agent identity ("You are the Stemwijzer pipeline operator"), available tools, decision criteria, and output conventions
-    - `context.py`: Injects runtime context—current pipeline status, latest SVD window, known issues from `docs/solutions/`, active party list
-    - `context.md` pattern: Agent maintains `agent_tools/context.md` with accumulated learnings about the pipeline
-  - **Patterns to follow:** `ce-agent-native-architecture` context.md pattern; `AGENTS.md` for project conventions
-  - **Test scenarios:**
-    - Context injection produces valid markdown with current DB stats
-    - System prompt loads and parses without errors
-  - **Verification:** Agent session starts with full context of pipeline state
-
- [ ] U6. **Agent-native testing and parity verification**
-  - **Goal:** Ensure agent can do everything humans can do
-  - **Requirements:** R1
-  - **Dependencies:** U1-U5
-  - **Files:**
-    - Create: `tests/agent_tools/test_parity.py`
-    - Modify: `tests/conftest.py` (add agent tool fixtures)
-  - **Approach:**
-    - Parity tests: For each human action (run pipeline, check health, generate report), verify the agent tool achieves the same outcome
-    - Integration tests: Agent runs a full diagnostic loop (check health → identify issue → run fix → verify)
-    - `test_parity.py`: Matrix of human action → agent tool → expected outcome
-  - **Test scenarios:**
-    - Parity: "Human runs health check CLI" vs "Agent calls pipeline_check_health()" → same result
-    - Integration: Agent detects stale data, runs pipeline, verifies freshness
-  - **Verification:** All parity tests pass
-
---
-
-## Output Structure
-
-```
-agent_tools/                    # New directory
-├── __init__.py
-├── SYSTEM_PROMPT.md            # Agent behavior definition
-├── context.py                  # Runtime context injection
-├── context.md                  # Accumulated agent knowledge
-├── database.py                 # DB query primitives
-├── pipeline.py                 # Pipeline control primitives
-├── analysis.py                 # Analysis primitives
-├── reports.py                  # Report generation
-└── content.py                  # Content validation primitives
-
-tests/agent_tools/              # New test directory
-├── __init__.py
-├── test_database_tools.py
-├── test_pipeline_tools.py
-├── test_analysis_tools.py
-├── test_content_tools.py
-└── test_parity.py
-
-reports/                        # Agent-generated reports (gitignored)
-```
-
---
-
-## System-Wide Impact
-
- **Interaction graph:** Agent tools call into `database.py`, `pipeline/`, `analysis/`, `health/` modules. These modules are already well-factored and read-only where appropriate.
- **Error propagation:** Agent tools return structured errors (JSON with `error`, `suggestion`, `retryable` fields) rather than raising exceptions. This lets the agent reason about failures.
- **State lifecycle:** Agent-generated reports in `reports/` are ephemeral (gitignored). Agent updates to `context.md` are durable and committed.
- **Unchanged invariants:** The Streamlit UI, the data pipeline logic, and the SVD computation remain unchanged. Agent tools are a new surface, not a refactor.
-
---
-
-## Risks & Dependencies
-
-| Risk | Mitigation |
-|------|-----------|
-| DuckDB concurrency (read-only agent + write pipeline) | Agent uses read-only connections; pipeline uses write connections. DuckDB handles this at the file level. |
-| Agent tools become stale as pipeline evolves | Tools are thin wrappers around stable module interfaces. U6 parity tests catch drift. |
-| Context injection grows too large | Context is scoped to the task. `context.py` generates minimal relevant context, not full DB dumps. |
-| Security: agent has DB access | Agent runs in the same trust boundary as the developer. No new security surface. |
-
---
-
-## Documentation / Operational Notes
-
- Add `agent_tools/` to `AGENTS.md` so future agents know the capability surface exists
- Document the parity test matrix in `tests/agent_tools/README.md`
- `reports/` should be gitignored; agent reports are ephemeral outputs
-
---
-
-## Sources & References
-
- **Origin:** STRATEGY.md (agent-native architecture track)
- **Skill:** `ce-agent-native-architecture` (parity, granularity, composability, emergent capability)
- **Related code:** `health/`, `pipeline/`, `analysis/`, `database.py`
- **Related docs:** `docs/plans/2026-04-24-ROADMAP-stemwijzer-improvements.md` (P4 tracks)
--- a/docs/research/bipartisan_anchor_extremity.png
+++ b/docs/research/bipartisan_anchor_extremity.png
--- a/docs/research/cross_temporal_drift.png
+++ b/docs/research/cross_temporal_drift.png
--- a/docs/research/cross_temporal_policy_extremity.png
+++ b/docs/research/cross_temporal_policy_extremity.png
--- a/docs/research/llm-motion-classification.md
+++ b/docs/research/llm-motion-classification.md
@ -1,181 +0,0 @@
-# Motion Extremity Classification with LLMs
-
-## Implementation Status
-
-**Script**: `scripts/classify_motions.py` - Ready to run
-
-**Requirements**:
- Valid OpenRouter API key in `.env` (current key returns "User not found")
- ~28,000 motions to classify
-
-**Usage**:
-```bash
-# Classify all motions (will take hours)
-.venv/bin/python scripts/classify_motions.py --delay 0.5
-
-# Test with small sample first
-.venv/bin/python scripts/classify_motions.py --limit 10 --delay 2
-
-# Analyze existing classifications
-.venv/bin/python scripts/classify_motions.py --analyze-only
-```
-
-## Why LLMs?
-
-Rule-based keyword matching is too crude:
- Only captures 3-4% as "high extremity"
- Can't understand nuance ("verbod" appears in mundane contexts)
- Can't assess policy impact magnitude
-
-LLMs can:
- Understand policy context and implications
- Assess deviation from consensus/norms
- Interpret Dutch political terminology
-
-## Proposed LLM Classification Schema
-
-### Output Format
-```json
-{
-  "extremity_score": 1-5,
-  "policy_domain": "migration|identity|economy|social|climate|foreign_policy|justice|education|health|other",
-  "policy_direction": "restrictive|permissive|neutral",
-  "deviation_type": "procedural|semantic|structural",
-  "consensus_level": "broad|partial|narrow|opposition",
-  "rationale": "1-2 sentence explanation"
-}
-```
-
-### Extremity Scale (1-5)
-
-| Score | Label | Description | Examples |
-|-------|-------|-------------|----------|
-| 1 | Mainstream | Standard governance, routine | Budget adjustments, procedural changes |
-| 2 | Minor deviation | Small policy tweaks within consensus | Minor fee changes, small program adjustments |
-| 3 | Moderate deviation | Meaningful but within coalition consensus | Immigration processing changes, targeted regulations |
-| 4 | Major deviation | Challenges status quo meaningfully | Tighter migration rules, significant policy reversals |
-| 5 | Extreme | Fundamental/populist, outside consensus | Complete bans, anti-democratic motions |
-
-### Policy Direction
-
- **restrictive**: Limits freedoms, tightens rules, reduces access
- **permissive**: Expands freedoms, loosens rules, increases access  
- **neutral**: Procedural, administrative, technical
-
-### Consensus Level
-
- **broad**: Passed with 80%+ parties voting same way
- **partial**: Passed with 60-80% agreement
- **narrow**: Passed with 50-60% (close vote)
- **opposition**: Coalition parties voted against
-
-## LLM Prompt
-
-```
-SYSTEM:
-You are an expert on Dutch parliamentary politics. Classify parliamentary motions 
-on policy extremity using the provided schema.
-
-CLASSIFICATION_RUBRIC:
- Score 1 (Mainstream): Routine governance, budget adjustments, procedural changes
- Score 2 (Minor): Small policy tweaks within consensus
- Score 3 (Moderate): Meaningful changes but within coalition consensus
- Score 4 (Major): Challenges status quo, significant policy shifts
- Score 5 (Extreme): Fundamental changes, populist, outside consensus
-
-Consider:
- Policy impact magnitude
- Deviation from current norms/policies
- Coalition/opposition dynamics
- Dutch political context
-
-USER:
-Classify this motion:
-
-Title: {title}
-Description: {description}
-Voting result: {passed/rejected}, {party_coalition} parties voted for
-
-Respond in JSON format.
-```
-
-## Batch Processing Strategy
-
-```python
-import json
-import asyncio
-from openai import AsyncOpenAI
-
-async def classify_motion_batch(motions: list[dict], model: str = "gpt-4o") -> list[dict]:
-    """Process motions in parallel batches."""
-    
-    client = AsyncOpenAI()
-    
-    async def classify_one(motion: dict) -> dict:
-        prompt = build_prompt(motion)
-        
-        response = await client.chat.completions.create(
-            model=model,
-            messages=[{"role": "system", "content": SYSTEM_PROMPT},
-                    {"role": "user", "content": prompt}],
-            response_format={"type": "json_object"}
-        )
-        
-        result = json.loads(response.choices[0].message.content)
-        result["motion_id"] = motion["id"]
-        return result
-    
-    # Process 50 in parallel
-    results = []
-    for i in range(0, len(motions), 50):
-        batch = motions[i:i+50]
-        batch_results = await asyncio.gather(*[classify_one(m) for m in batch])
-        results.extend(batch_results)
-    
-    return results
-
-async def main():
-    motions = load_motions()  # Load from database
-    classifications = await classify_motion_batch(motions)
-    save_to_database(classifications)
-
-asyncio.run(main())
-```
-
-## Cost Estimate
-
-| Dataset Size | Model | Est. Cost | Est. Time |
-|-------------|-------|-----------|-----------|
-| 35,000 motions | gpt-4o-mini | ~$5-10 | 30-60 min |
-| 35,000 motions | gpt-4o | ~$50-100 | 2-4 hours |
-
-Using `gpt-4o-mini` is sufficient for classification tasks.
-
-## Analysis After Classification
-
-Once classified, we can analyze:
-
-```python
-# Extremity by period
-df.groupby(['period', 'extremity_score']).size().unstack(fill_value=0)
-
-# Domain-Extremity heatmap
-pivot = df.pivot_table(values='motion_id', 
-                        index='policy_domain', 
-                        columns='extremity_score', 
-                        aggfunc='count')
-
-# Passed vs rejected extremity
-df.groupby('passed')['extremity_score'].mean()
-
-# Coalition shift analysis
-df[df['policy_domain'] == 'migration'].groupby(['period', 'policy_direction']).size()
-```
-
-## Expected Insights
-
-1. **Extremity distribution over time** - Has 4-5 score increased?
-2. **Domain-extremity correlation** - Which domains produce extreme policies?
-3. **Direction-extremity** - Restrictive vs permissive extremity by period
-4. **Consensus-extremity** - Are extreme policies passing with broad or narrow consensus?
-5. **Coalition voting** - Which parties support extreme policies?
--- a/docs/research/mainstream_shift.png
+++ b/docs/research/mainstream_shift.png
--- a/docs/research/motion-classification-prompt-v2.md
+++ b/docs/research/motion-classification-prompt-v2.md
@ -1,136 +0,0 @@
-# Motion Classification Prompt - v2
-
-## Design Principles
-
-1. **Separation of concerns**: Democratic erosion (substance) is distinct from populist style and restrictiveness
-2. **Orthogonal dimensions**: Each dimension can be classified independently
-3. **Clear boundaries**: Defined transitions between levels
-4. **Dutch political context**: Accounts for EU, referenda, institutional attacks
-
-## Refined Prompt
-
-```python
-SYSTEM_PROMPT = """Je bent een expert in Nederlandse parlementaire politiek en democratische normen.
-
-Classificeer Kamermoties op vier onafhankelijke dimensies:
-
---
-
-### 1. DEMOCRATIC_EROSION (0-4) — SUBSTANTIEEL
-Meet of deze motie de democratische instituties, rechtsstaat, of burgersrechten bedreigt.
-
-| Score | Label | Beschrijving | Voorbeelden |
-|-------|-------|-------------|-------------|
-| 0 | None | Geen impact op democratische normen | Begroting, procedureel, technische wijzigingen |
-| 1 | Minor | Kleine afwijking van gebruikelijke processen | Kleine uitzonderingen op transparantie-eisen |
-| 2 | Moderate | Betekenisvolle beleidswijziging, maar binnen constitutioneel kader | Verandering asielprocedures, strengere veiligheidsmaatregelen |
-| 3 | Significant | Vraagt om fundamentele verandering in checks & balances | Beperking rechterlijke toetsing, afschaffen referendum |
-| 4 | Critical | Ondermijnt openbaar bestuur, rechtsstaat, of universele rechten | Afschaffing persvrijheid, discriminatie bij wet, anti-EU obstructionisme |
-
-**Beslisregels:**
- Score 4 ALLEEN bij: (a) directe aanval op persvrijheid/rechterlijke macht, OF (b) systematische discriminatie in wetgeving, OF (c) oproep tot schending internationale verdragen
- Score 3 bij: (a) referendum afschaffen/herroepen, OF (b) EU-samenwerking fundamenteel ter discussie stellen, OF (c) bevoegdheden uitvoerende macht significant uitbreiden zonder tegenwicht
- Score 2 is default voor significante beleidswijzigingen die niet bovenstaande raken
-
---
-
-### 2. POPULIST_STYLE (0-1) — STIJL
-Meet of deze motie populistische retoriek gebruikt. Dit is onafhankelijk van de democratische impact.
-
-| Score | Label | Beschrijving |
-|-------|-------|-------------|
-| 0 | Normal | Zakelijke, institutionele toon |
-| 1 | Populist | Gebruikt anti-establishment framing |
-
-**Indicatoren voor score 1:**
- "Het volk" vs "de elite"/"de Haag"/"de politiek"
- "Wij vs zij" framing ("burgers vs bestuurders")
- Suggestie dat "gewone mensen" anders behandeld moeten worden
- Vragen om "direct door het volk" zonder institutionele checks
- Emotioneel geladen taalgebruik over "de problemen van gewone mensen"
-
-**Let op:** Partijpolitieke kritiek is normaal. Alleen extreem anti-institutionele framing telt.
-
---
-
-### 3. GROUP_TARGETING (0-2) — SELECTIEVE TOEPASSING
-Meet of het beleid specifieke groepen viseert.
-
-| Score | Label | Beschrijving |
-|-------|-------|-------------|
-| 0 | Universal | Algemeen beleid, geen specifieke groep |
-| 1 | Indirect | Algemeen beleid dat onevenredig groepen raakt |
-| 2 | Direct | Expliciet gericht op specifieke bevolkingsgroep |
-
-**Score 2 voorbeelden:**
- "Asielzoekers" / "illegalen" specifiek viseren
- "Moslims" / specifieke religieuze groepen
- "Linkse" of "rechtse" politieke tegenstanders bij naam
- "Etnische minderheden" als doelwit
-
-**Score 1 voorbeelden:**
- Algemeen immigratiebeleid dat effectief migranten raakt
- Veiligheidsmaatregelen die marginaliseerde groepen disproportioneel raken
-
---
-
-### 4. RESTRICTIVENESS (-1 to +1) — RICHTING
-Meet of het beleid vrijheden/rechten beperkt of uitbreidt.
-
-| Score | Label | Beschrijving |
-|-------|-------|-------------|
-| -1 | Expansive | Breidt vrijheden of toegang uit |
-| 0 | Neutral | Geen directe impact op vrijheden |
-| +1 | Restrictive | Beperkt vrijheden, toegang, of rechten |
-
-**Let op:** Budgettaire of procedurele zaken zijn meestal 0.
-
---
-
-## OUTPUT FORMAT
-
-Respond in JSON:
-{
-  "democratic_erosion": 0-4,
-  "populist_style": 0-1,
-  "group_targeting": 0-2,
-  "restrictiveness": -1 to 1,
-  "domain": "migration|economy|climate|social|justice|foreign|education|health|other",
-  "rationale": "1-2 zinnen uitleg"
-}
-
---
-
-## BELANGRIJKE BESLISREGELS
-
-1. **DEMOCRATIC_EROSION en POPULIST_STYLE zijn onafhankelijk**: Een motie kan populistisch zijn (1) maar democratisch onschuldig (0), en omgekeerd.
-
-2. **GROUP_TARGETING is onafhankelijk van RESTRICTIVENESS**: Een restrictieve motie kan universeel (0) of selectief (2) zijn.
-
-3. **EU-afwijkingen gradueren**: 
-   - "Nederlandse invulling van EU-beleid" = score 0-1 erosion
-   - "Nexit/EU verlaten" = score 3-4 erosion
-   - "EU-regels overtreden" = score 2-3 erosion
-
-4. **Referendum-context**: Afschaffen referendum = score 3. Bestaand referendum gebruiken = score 0.
-
-5. **Voorbehoud bij onduidelijkheid**: Als motie tekst ambigu is, kies lagere score en noteer twijfel in rationale."""
-```
-
-## Summary of Changes
-
-| Old | New |
-|-----|-----|
-| Single EXTREMITY_SCORE (1-5) conflating substance+style | Four orthogonal dimensions |
-| "Populistische retoriek" as score 5 criterion | POPULIST_STYLE (0-1), independent of erosion |
-| Vague score boundaries | Defined decision rules with examples |
-| TARGETED_GROUP redundant with score | GROUP_TARGETING (0-2), orthogonal to restrictiveness |
-| EU deviation = score 5 | Graduated EU scores (0-4) with specific examples |
-| Missing referendum/Nexit | Explicit scoring for these patterns |
-
-## What This Enables
-
-1. **Plot RESTRICTIVENESS × DEMOCRATIC_EROSION** — 2D analysis of policy direction
-2. **Track POPULIST_STYLE over time** — Is rhetoric getting more populist?
-3. **Analyze GROUP_TARGETING** — Is group-specific targeting increasing?
-4. **Cross-correlate dimensions** — Does populist style correlate with erosion?
--- a/docs/research/normalized_extremity_trend.png
+++ b/docs/research/normalized_extremity_trend.png
--- a/docs/research/polarization_comprehensive.png
+++ b/docs/research/polarization_comprehensive.png
--- a/Show More
+++ b/Show More