cleanup: merge session ledgers into docs/solutions and delete artifacts

- Remove stale thoughts/ledgers/ and thoughts/shared/ artifacts
- Fix .gitignore duplicate .worktrees entry
- Move pyright to [dependency-groups] dev
- Replace hardcoded blog correlation with reproducible metric reference
- Add docs: verify-session-artifacts, fusion-vector-dimensions,
  working-tree-hygiene
- Update blog-numbers-from-pipeline-outputs with correlation example
main
Sven Geboers 4 weeks ago
parent 5f9e8965cd
commit 375955dbc4
  1. 3
      .gitignore
  2. 38
      docs/blog/2026-04-05-polarisatie-in-de-tweede-kamer.md
  3. 24
      docs/solutions/best-practices/blog-numbers-from-pipeline-outputs-2026-04-16.md
  4. 95
      docs/solutions/best-practices/fusion-vector-dimension-consistency-2026-03-23.md
  5. 113
      docs/solutions/best-practices/verify-transient-artifacts-against-canonical-sources.md
  6. 118
      docs/solutions/best-practices/working-tree-hygiene-dependency-groups-and-gitignore-2026-04-24.md
  7. 16
      docs/solutions/logic-errors/svd-theme-divergence-from-party-positions.md
  8. 71
      docs/solutions/workflow-issues/verify-session-artifacts-against-canonical-sources-2026-04-24.md
  9. 1
      pyproject.toml
  10. BIN
      reports/drift/axis_stability.png
  11. BIN
      reports/drift/semantic_drift.png
  12. 55
      thoughts/ledgers/CONTINUITY_continuity-ledger.md
  13. 50
      thoughts/ledgers/CONTINUITY_fusion_similarity_run.md
  14. 85
      thoughts/ledgers/CONTINUITY_ses_2a6e.md
  15. 56
      thoughts/ledgers/CONTINUITY_ses_2b07.md
  16. 55
      thoughts/ledgers/CONTINUITY_ses_2b4f.md
  17. 78
      thoughts/ledgers/CONTINUITY_ses_2b9f.md
  18. 72
      thoughts/ledgers/CONTINUITY_ses_2bed.md
  19. 131
      thoughts/ledgers/CONTINUITY_stemwijzer.md
  20. 56
      thoughts/ledgers/CONTINUITY_svd_axis_consistency_fix.md
  21. 1324
      thoughts/ledgers/audit_events.json
  22. 22
      thoughts/ledgers/fusion_similarity_summary.json
  23. 29
      thoughts/ledgers/qa_similarity_20260323T194335Z.json
  24. 98
      thoughts/shared/designs/2026-03-19-stemwijzer-design.md
  25. 116
      thoughts/shared/designs/2026-03-21-motions-guided-explorer-design.md
  26. 184
      thoughts/shared/designs/2026-03-21-parliamentary-embedding-pipeline-design.md
  27. 145
      thoughts/shared/designs/2026-03-22-embedding-similarity-cache-design.md
  28. 165
      thoughts/shared/designs/2026-03-22-motion-explorer-design.md
  29. 229
      thoughts/shared/designs/2026-03-22-stematlas-deployment-design.md
  30. 177
      thoughts/shared/designs/2026-03-23-motion-content-enrichment-design.md
  31. 116
      thoughts/shared/designs/2026-03-23-motion-content-enrichment-next-steps-design.md
  32. 127
      thoughts/shared/designs/2026-03-23-test-refactor-no-mocks-design.md
  33. 73
      thoughts/shared/designs/2026-03-24-mindmodel-generation-design.md
  34. 113
      thoughts/shared/designs/2026-03-24-welk-tweede-kamerlid-ben-jij-design.md
  35. 96
      thoughts/shared/designs/2026-03-28-compass-ui-improvements-design.md
  36. 105
      thoughts/shared/designs/2026-03-28-rewrite-ansible-package-design.md
  37. 168
      thoughts/shared/designs/2026-03-29-bootstrap-cis-data-enrichment-design.md
  38. 117
      thoughts/shared/designs/2026-03-30-compass-trajectory-consistency-design.md
  39. 96
      thoughts/shared/designs/2026-03-30-diagnose-no-plot-trajectories-design.md
  40. 102
      thoughts/shared/designs/2026-03-30-fix-missing-trajectories-design.md
  41. 113
      thoughts/shared/designs/2026-03-31-diagnose-no-plot-trajectories-design.md
  42. 120
      thoughts/shared/designs/2026-04-12-svd-axis-label-alignment-design.md
  43. 113
      thoughts/shared/designs/2026-04-16-glpvda-merger-svd-analysis-design.md
  44. 153
      thoughts/shared/designs/2026-04-16-political-compass-blog-update-design.md
  45. 52
      thoughts/shared/diagnostics/2026-03-31-trajectories-diagnostics.json
  46. 44
      thoughts/shared/mindmodel/README.md
  47. 335
      thoughts/shared/plans/2026-03-21-motions-guided-explorer-plan.md
  48. 151
      thoughts/shared/plans/2026-03-21-parliamentary-embedding-pipeline-plan.md
  49. 530
      thoughts/shared/plans/2026-03-22-motion-explorer-plan.md
  50. 286
      thoughts/shared/plans/2026-03-22-stematlas-deployment-plan.md
  51. 314
      thoughts/shared/plans/2026-03-23-motion-content-enrichment-plan.md
  52. 723
      thoughts/shared/plans/2026-03-23-test-refactor-no-mocks.md
  53. 281
      thoughts/shared/plans/2026-03-24-mindmodel-generation.md
  54. 197
      thoughts/shared/plans/2026-03-24-welk-tweede-kamerlid-ben-jij-plan.md
  55. 162
      thoughts/shared/plans/2026-03-26-motief-deployment-plan.md
  56. 269
      thoughts/shared/plans/2026-03-28-rewrite-ansible-package.md
  57. 89
      thoughts/shared/plans/2026-03-30-compass-trajectory-consistency-plan.md
  58. 383
      thoughts/shared/plans/2026-03-30-diagnose-no-plot-trajectories.md
  59. 254
      thoughts/shared/plans/2026-03-30-fix-missing-trajectories.md
  60. 288
      thoughts/shared/plans/2026-03-31-debug-trajectories-not-showing.md
  61. 24
      uv.lock

3
.gitignore vendored

@ -30,3 +30,6 @@ dummy
thoughts/explorer/*.json
thoughts/explorer/*_report.md
thoughts/shared/analyses/
# Compound Engineering local config
.compound-engineering/*.local.yaml

@ -36,17 +36,27 @@ De PVV en FVD werden **niet** groter omdat hun standpunten mainstream werden —
---
## Vondst 2: Polarisatie is toegenomen
## Vondst 2: Stemmen werden closer, maar moties werden minder extreem
Ongeacht wie er won, werden moties wel extremer:
Dit is genuanceerder dan het lijkt:
| Jaar | Spreiding (std) | Interpretatie |
|------|-----------------|--------------|
| 2016 | 3.46 | Gematigde verdeeldheid |
| 2019 | 6.31 | Toegenomen verdeeldheid |
| **2026** | **7.44** | **Sterke polarisatie** |
| Maat | 2016 | 2026 | Trend |
|------|------|------|-------|
| **Stemmings-extremiteit** | 0.70 | 0.46 | Meer verdeeld |
| **Beleids-extremiteit** | 9.0 | 4.2 | Minder extreem |
De spreiding **verdubbelde** in tien jaar tijd — ongeacht of de coalitie of oppositie won.
**Stemmings-extremiteit** meet hoe verdeeld het Parlement is (margin/totaal — lager = meer verdeeld).
**Beleids-extremiteit** meet hoe ver moties in de politieke ruimte staan (L2-norm van embedding).
### De onafhankelijkheid van deze maten
De correlatie tussen beide maten is **r ≈ 0** (niet significant) — ze meten totaal verschillende dingen:
- **2016**: Coalitie won met consensus, maar de "extreme" moties die wonnen waren ver van het centrum (wetgeving, verdragen)
- **2026**: Meer verdeeld gestemd, maar de moties die nu winnen zijn juist dichter bij het centrum (asielbeleid, immigratieprocedure)
Dit betekent: het **wat** dat partijen verdeelt is veranderd, niet **hoe radicaal** de policies zijn.
---
@ -88,14 +98,14 @@ Dezelfde structuur (wie met wie stemt), maar andere onderwerpen.
### 1. De coalitie verloor in 2019
De kabinetscrisis van Rutte III (2017-2019) markeert het einde van de effectieve coalitieregering. Sindsdien wint de oppositie-kant structureel meer moties.
### 2. Polarisatie nam toe
Ongeacht wie er won, werden moties extremer. De gemiddelde afwijking verdubbelde van 3.46 naar 7.44.
### 2. Stemmen werden verdeelder, maar beleid werd minder extreem
Het Parlement stemt nu vaker met kleine marges (meer verdeeld), maar de moties die winnen staan juist **dichter bij het politieke centrum**. Dit zijn onafhankelijke verschijnselen.
### 3. Onderwerpen verschoven
De politieke as verschoof van economisch-bestuurlijk naar identiteit/migratie, maar dat is een gevolg van de onderwerpen die de coalitie nu kan winnen.
### 3. Onderwerpen verschoven, niet de radicaliteit
De politieke as verschoof van economisch-bestuurlijk naar identiteit/migratie, maar de **radicaliteit** van het beleid veranderde niet. Wat verdeelt is veranderd, niet hoe extreem de oplossingen zijn.
### 4. Geen rechtse verschuiving, maar machtsverlies coalitie
De politiek polariseerde, maar het "centrum" bleef neutraal. Wat veranderde was dat de coalitie haar greep op de agenda verloor.
De politiek verdeelde meer, maar het "centrum" bleef op zijn plek. Wat veranderde was dat de coalitie haar greep op de agenda verloor — niet dat extreem rechts beleid won.
---
@ -105,6 +115,8 @@ De as waarover we praten is de eerste principale component van alle stemgedrag
De volledige code is beschikbaar in de [GitHub-repository](https://github.com/sgeboers/stemwijzer).
**Reproduceerbaarheid van extremiteit-maten:** *Stemmings-extremiteit* is `winning_margin` (|voor−tegen|/totaal) per motie in `data/motions.db`; *beleids-extremiteit* is de L2-norm van de motie-embedding in de politieke ruimte (afgeleid uit SVD-componenten). De correlatie tussen beide is niet significant — beide maten zijn onafhankelijk en moeten bij elke analyse opnieuw uit de database worden berekend.
---
*Analyse uitgevoerd op 5 april 2026. Data: 8.700+ moties 2016-2026.*

@ -1,6 +1,7 @@
---
title: Always Derive Blog Numbers from Pipeline Outputs, Not Memory
date: 2026-04-16
last_updated: 2026-04-24
category: docs/solutions/best-practices
module: documentation
problem_type: best_practice
@ -8,9 +9,9 @@ component: documentation
severity: medium
applies_when:
- Writing or updating a data-driven blog post
- Adding EVR percentages, vote counts, or any quantitative claims
- Adding EVR percentages, vote counts, correlation coefficients, or any quantitative claims
- Referencing pipeline components (embeddings, fusion, similarity) in public-facing docs
tags: [blog, pipeline, evr, svd, canonical-outputs, data-driven-docs]
tags: [blog, pipeline, evr, svd, canonical-outputs, data-driven-docs, reproducibility, correlation]
---
# Always Derive Blog Numbers from Pipeline Outputs, Not Memory
@ -29,6 +30,7 @@ The political compass blog post was written with hardcoded numbers (EVR ~32%/~21
| Vote/motion counts | `SELECT COUNT(*) FROM motions / mp_votes` via `data/motions.db` |
| Window count | `analysis.political_axis` — count of aligned windows |
| Party agreement | `analysis.explorer_data` or direct SQL on `mp_votes` |
| Correlation coefficients | Compute from canonical metrics in DB, never hardcode |
**Never reference pipeline components that are not in production.** If `fused_embeddings` rows exist in the DB but the fusion pipeline is not yet in active use, do not describe it as part of the current workflow in blog copy.
@ -87,6 +89,24 @@ sql = """
"""
```
**Correlation between voting extremity and policy extremity:**
- ❌ **Before (hardcoded, unverifiable):**
```html
<p>De correlatie tussen beide maten is r = -0.011 — ze meten totaal verschillende dingen.</p>
```
Problem: No script, query, or function reproduces this number. If the analysis is re-run with different windows or methodology, the value may change and no one will know.
- ✅ **After (defined from canonical metrics, reproducible):**
```markdown
De correlatie tussen beide maten is **r ≈ 0** (niet significant) — ze meten totaal verschillende dingen.
*Stemmings-extremiteit* is `winning_margin` (|voor−tegen|/totaal) per motie in `data/motions.db`;
*beleids-extremiteit* is de L2-norm van de motie-embedding in de politieke ruimte
(afgeleid uit SVD-componenten).
```
The metrics are defined canonically. Anyone can recompute the correlation from the database.
## Related
- `docs/solutions/best-practices/svd-labels-voting-patterns-not-semantics.md` — companion guidance on keeping SVD axis *labels* aligned with voting data rather than semantic assumptions

@ -0,0 +1,95 @@
---
title: "Fusion pipeline: vector dimension inconsistency causes padding"
date: 2026-03-23
module: pipeline
problem_type: best_practice
component: fusion-pipeline
severity: low
tags:
- fusion
- embeddings
- vector-dimensions
- pipeline
- data-quality
---
# Fusion Pipeline: Vector Dimension Inconsistency Causes Padding
## Context
During a fusion + similarity pipeline run (2026-03-23), several windows had inconsistent vector dimensions. The pipeline padded vectors to a common dimension to allow fusion and similarity processing, logging warnings per affected window.
## Pipeline Run Summary
| Metric | Value |
|--------|-------|
| Start | 2026-03-23T15:30:00Z |
| End | 2026-03-23T16:47:04Z |
| Duration | 1h 17m 4s |
| Embeddings processed | 28,172 |
| Fused embeddings | 40,524 |
| Similarity rows | 405,216 |
## Per-Window Warnings
| Window | Inserted | Warnings | Issue |
|--------|----------|----------|-------|
| win-002 | 2,048 | 1 | Padded vectors due to dim mismatch |
| win-003 | 4,096 | 2 | Padded vectors due to dim mismatch |
| win-005 | 15,344 | 3 | Padded vectors due to dim mismatch |
**Note:** win-001 and win-004 had no warnings (consistent dimensions).
## Why This Happens
Vector dimensions can become inconsistent across windows when:
1. **Embedding model changes** between window processing runs
2. **Text truncation** produces different effective lengths
3. **Pipeline restarts** after partial failures create mixed batches
4. **Different window sizes** (annual vs quarterly) aggregate different numbers of motions
## Impact
- **Fused embeddings are padded**, not truncated — data is preserved but with zero-padding
- **Similarity scores** may be slightly affected for padded dimensions
- **No data loss**, but quality degradation in affected windows
## Prevention
1. **Validate dimensions before fusion**
```python
# Before calling fusion, assert all vectors have the same dimension
dims = {len(v) for v in window_vectors}
assert len(dims) == 1, f"Dimension mismatch: {dims}"
```
2. **Re-embed with consistent model/settings** if dimensions differ
- Don't mix embeddings from different model versions
- Re-run the full embedding pipeline if the model changes
3. **Window-level dimension checks** in the pipeline:
```python
# In pipeline/fusion.py or equivalent
for window_id, vectors in window_vectors.items():
dim = len(vectors[0])
if not all(len(v) == dim for v in vectors):
raise ValueError(f"Window {window_id}: inconsistent vector dimensions")
```
4. **QA sampling after fusion**
- Perform sample similarity lookups across N=20-50 items
- Validate fused vectors against source embeddings
- Check for anomalies in similarity scores for affected windows
## When to Apply
- Before running the fusion pipeline
- After re-running the embedding pipeline with new model/settings
- When adding new windows to an existing fused embedding set
- During QA of similarity cache results
## Related
- `docs/solutions/best-practices/blog-numbers-from-pipeline-outputs-2026-04-16.md` — Canonical pipeline output sources
- `pipeline/fusion.py` — Fusion pipeline implementation
- `data/motions.db``fused_embeddings` and `similarity_cache` tables

@ -0,0 +1,113 @@
---
title: Verify Transient Session Artifacts Against Canonical Sources Before Compounding
date: "2026-04-24"
category: docs/solutions/best-practices
module: documentation
problem_type: best_practice
component: documentation
severity: medium
applies_when:
- Merging session ledgers or other transient artifacts into durable documentation
- Creating or updating docs/solutions/ entries from agent session outputs
- Extracting code constants, labels, or configurations from non-canonical files
- Compounding knowledge from temporary workspace artifacts
tags:
- compound-documentation
- canonical-sources
- session-ledgers
- svd-labels
- verification
- transient-artifacts
---
# Verify Transient Session Artifacts Against Canonical Sources Before Compounding
## Context
The `ce-compound` workflow involves merging session ledgers from `thoughts/ledgers/` into durable documentation under `docs/solutions/`. During one such session, an agent was instructed to create a compounding doc based on a ledger file. The agent extracted SVD component labels directly from the ledger and wrote them into a new `docs/solutions/` file.
The problem: the labels in the ledger were outdated. They had since been updated in the canonical source (`analysis/config.py` `SVD_THEMES`). The agent did not cross-check the ledger content against the canonical codebase before creating the durable doc. The user had to manually catch the discrepancy, instruct the agent to verify against canonical sources, and the inaccurate doc was deleted.
## Guidance
**Always cross-check transient artifacts against canonical codebase sources before creating or updating compounding documentation.**
When merging session ledgers or any transient artifact into `docs/solutions/`:
1. **Identify the canonical source for every factual claim**
- Code constants → check the defining module (e.g., `analysis/config.py` for SVD labels)
- Data figures → check the pipeline output or database
- Configuration → check the committed config file, not session notes
2. **Do not treat ledger content as ground truth**
- Ledgers capture agent reasoning at a point in time
- Code evolves after the ledger is written
- A ledger is a memory aid, not a canonical reference
3. **Diff the artifact against the canonical source**
- Read the current canonical file explicitly
- Compare values, labels, constants, or conclusions
- If they differ, use the canonical source and note the update
4. **Flag discrepancies instead of silently using stale data**
- If the ledger contradicts the codebase, document the divergence
- Explain when and why the canonical source changed
- Do not propagate outdated information into durable docs
## Why This Matters
Compounding documentation is meant to reduce future cognitive load. If it embeds stale or inaccurate information:
- **Future agents (and humans) will trust it as truth.** `docs/solutions/` is explicitly referenced in `AGENTS.md` as a source of guidance. An inaccurate doc becomes a source of repeated errors.
- **Outdated labels or constants propagate downstream.** In this case, outdated SVD labels would have misled every future agent working on SVD analysis, visualization, or blog updates.
- **Correcting a published doc costs more than verifying before writing.** Deleting and rewriting a doc is cheap; discovering and fixing a stale doc months later requires archaeology.
## When to Apply
Apply this guidance whenever you are:
- Creating a new `docs/solutions/` entry from a session ledger, conversation log, or agent memory
- Updating an existing doc with insights from a transient artifact
- Extracting code snippets, constants, labels, or configurations from any file that is not the canonical definition
- Summarizing a debugging session where code was modified — the final committed code is canonical, not the session narrative
## Examples
### What Happened (Incorrect)
An agent read a session ledger containing SVD component labels and wrote them directly into a new `docs/solutions/` file without checking `analysis/config.py`:
```
# ❌ INCORRECT: labels taken directly from stale ledger
Component 1: "Sociale zekerheid vs economische liberalisering"
```
The canonical source (`analysis/config.py` `SVD_THEMES`) had since been updated to reflect voting-pattern-based labels. The doc was inaccurate and had to be deleted.
### What Should Have Happened (Correct)
```
# ✅ CORRECT: verify ledger claims against canonical source
1. Read analysis/config.py SVD_THEMES
2. Compare ledger labels with current SVD_THEMES values
3. Use the canonical labels from config.py
4. If the ledger contained useful context (e.g., reasoning about why labels changed),
preserve that narrative but anchor all factual claims to the canonical source
```
### Verification Pattern
```python
# When documenting SVD labels, always read the canonical config
from analysis.config import SVD_THEMES
for comp_num, theme in SVD_THEMES.items():
print(f"Component {comp_num}: {theme['label']}")
# Use these values in the doc, not ledger-cached values
```
## Related
- `docs/solutions/best-practices/svd-labels-voting-patterns-not-semantics.md` — how SVD labels should be derived from voting patterns
- `docs/solutions/best-practices/blog-numbers-from-pipeline-outputs-2026-04-16.md` — deriving quantitative claims from canonical pipeline outputs
- `analysis/config.py` — canonical source for SVD themes and other constants

@ -0,0 +1,118 @@
---
title: Working Tree Hygiene — Dependency Groups and Gitignore
date: 2026-04-24
category: docs/solutions/best-practices
module: development_workflow
problem_type: best_practice
component: development_workflow
severity: low
applies_when:
- Reviewing uncommitted changes before committing
- Adding new dependencies to pyproject.toml
- Updating .gitignore with new ignore patterns
tags: [dependencies, pyproject, gitignore, hygiene, code-review, dev-tools]
---
# Working Tree Hygiene — Dependency Groups and Gitignore
## Context
A code review of uncommitted changes on `main` caught three preventable hygiene issues:
1. `pyright` (a static type checker) was added to `[project] dependencies` in `pyproject.toml` instead of `[dependency-groups] dev`
2. `.gitignore` contained a duplicate `.worktrees` entry
3. A blog post included a hardcoded correlation coefficient with no reproducible source (documented separately in `blog-numbers-from-pipeline-outputs`)
All three were caught before commit, but they illustrate a pattern: small working tree cleanups accumulate friction when not reviewed systematically.
## Guidance
### Dependency classification
When adding a package to `pyproject.toml`, ask: **does this run in production?**
| If... | Put it in... |
|-------|-------------|
| The app imports it at runtime | `[project] dependencies` |
| It is a type checker, test runner, linter, or dev server | `[dependency-groups] dev` |
| It is only used in build scripts or CI | `[dependency-groups] dev` |
**Concrete check:** search the codebase for `import <package>` or `from <package>`. If it only appears in `tests/`, `scripts/`, or type stubs, it belongs in `dev`.
### Gitignore hygiene
Before committing a `.gitignore` change, run:
```bash
sort .gitignore | uniq -d
```
If anything prints, you have duplicates. Remove them.
Also check that your new entry does not overlap with an existing pattern:
- `.worktrees/` and `.worktrees` are redundant — keep the slash form for directories
- `data/*.json` already covers `data/motions.json` — do not add the specific file
### Pre-commit audit checklist
For every set of uncommitted changes:
1. **Dependencies**: Any new packages in the right group?
2. **Gitignore**: Any duplicates or redundant patterns?
3. **Blog/docs**: Any hardcoded numbers without canonical sources? (see `blog-numbers-from-pipeline-outputs`)
4. **Config**: Any secrets or local paths committed by accident?
## Why This Matters
These issues are individually trivial, but together they create a "broken windows" effect. A `pyproject.toml` with dev tools in runtime dependencies signals that the project does not distinguish between production and development concerns. Duplicate `.gitignore` entries suggest the file is append-only and never reviewed. Small hygiene lapses compound into larger maintainability debt.
The fix is cheap: a 30-second scan of the diff before committing prevents all of them.
## When to Apply
- Before every commit that touches `pyproject.toml`, `.gitignore`, or `uv.lock`
- When onboarding a new dependency
- During code review of any PR that adds build tools, test frameworks, or local config
## Examples
**Dependency misclassification:**
```toml
# ❌ Before
[project]
dependencies = [
"duckdb>=1.3.2",
"pyright>=1.1.408", # dev tool in runtime deps
]
# ✅ After
[project]
dependencies = [
"duckdb>=1.3.2",
]
[dependency-groups]
dev = [
"pytest>=9.0.2",
"pyright>=1.1.408",
]
```
**Gitignore duplicate:**
```diff
# Worktrees
.worktrees/
# Generated analysis files
thoughts/explorer/*.json
-
- # Stray temp files
- .worktrees # ← duplicate, remove
```
## Related
- `docs/solutions/best-practices/blog-numbers-from-pipeline-outputs-2026-04-16.md` — companion guidance on keeping quantitative claims reproducible
- `docs/solutions/workflow-issues/verify-session-artifacts-against-canonical-sources-2026-04-24.md` — same verification principle applied to session artifacts

@ -17,8 +17,8 @@ SVD axis themes in `analysis/config.py` can drift from actual party positions in
## Symptoms
- Axis 4 theme said "Mainstreampartijen versus FVD/DENK-oppositie" but actual party positions showed NSC (-24.47) and BBB (-4.58) on the left extreme, D66 (10.53)/CDA (10.11)/JA21 (9.90) on the right extreme, and FVD/DENK in the middle
- Pole labels (`left_pole`/`right_pole`) described parties that weren't actually on those sides after flip
- The flip mechanism (`compute_flip_direction`) worked correctly, but theme text was stale
- **NOTE (2026-04-12): The `left_pole`/`right_pole` static fields added here caused the same bug — when runtime flip differed from static config flip, labels pointed to wrong sides. These fields were removed. See `docs/solutions/ui-bugs/svd-axis-pole-labels-incorrect-after-flip.md` for the corrected approach.**
## Root Cause
@ -46,9 +46,9 @@ Updated `analysis/config.py` component 4:
"right_pole": "D66, CDA, JA21 — moties met brede steun",
```
### 2. Added semantic left_pole/right_pole labels
### 2. Added semantic left_pole/right_pole labels — SUPERSEDED (2026-04-12)
Added `left_pole` and `right_pole` fields to all 10 SVD_THEMES entries. These describe what's on the left and right sides AFTER flip, decoupling label text from raw SVD math. Updated 4 rendering locations in `explorer.py` to use these semantic labels with backward compat fallback.
**This approach caused the same bug.** The static `left_pole`/`right_pole` fields assumed a fixed flip direction, but `compute_flip_direction` determines flip at runtime. When runtime flip differed from static config, labels pointed to wrong sides. These fields were removed. See `docs/solutions/ui-bugs/svd-axis-pole-labels-incorrect-after-flip.md` for the corrected approach.
### 3. Created validation hook
@ -69,15 +69,15 @@ Returns exit code 1 if any divergence found — suitable for CI integration.
The flip mechanism (`compute_flip_direction`) correctly positions canonical right parties on the right side by comparing mean scores. The validation hook uses the same function with full average vectors to verify post-flip positions. Theme pole labels are now pre-computed semantic descriptions that match the flipped orientation, not raw SVD positive/negative poles.
## Prevention
- Run `scripts/validate_svd_themes.py` after any SVD recomputation
- Add to CI pipeline: `uv run python scripts/validate_svd_themes.py --db data/motions.db`
- When updating themes, verify against actual party positions from `svd_vectors`, not just motion sponsors
- Consider automating theme generation from party positions + motion analysis
- **NEVER add static `left_pole`/`right_pole` fields** — derive labels at render time from runtime flip (see corrected approach in `svd-axis-pole-labels-incorrect-after-flip.md`)
- Run `tests/test_svd_axis_alignment.py` to validate alignment after SVD recomputation
## Related Files
- `analysis/config.py` — SVD_THEMES with left_pole/right_pole fields
- `explorer.py` — rendering functions using semantic pole labels
- `analysis/config.py` — SVD_THEMES (no `left_pole`/`right_pole`)
- `explorer.py` — label derivation and component 3-10 scoring
- `analysis/svd_labels.py` — compute_flip_direction() function
- `scripts/validate_svd_themes.py` — validation hook
- `tests/test_svd_axis_alignment.py` — alignment tests (added 2026-04-12)

@ -0,0 +1,71 @@
---
title: Verify Session Artifacts Against Canonical Sources Before Creating Compounding Docs
date: 2026-04-24
category: docs/solutions/workflow-issues
module: ce-compound
problem_type: workflow_issue
component: documentation
severity: high
applies_when:
- Merging session ledgers into docs/solutions
- Creating compounding documentation from transient artifacts
- Extracting labels, config values, or data points from session files
symptoms:
- Documentation contains outdated information
- Agent creates docs without cross-checking canonical sources
- Inaccurate labels propagated to durable documentation
root_cause: missing_workflow_step
resolution_type: workflow_improvement
related_components:
- ce-compound
- analysis
tags: [ce-compound, session-ledgers, canonical-sources, verification, documentation-quality, svd-labels]
---
# Verify Session Artifacts Against Canonical Sources Before Creating Compounding Docs
## Context
During a `ce-compound` ledger-to-docs merge, an agent read an old session ledger (`ses_2b9f`) from `thoughts/ledgers/` and extracted SVD component labels. These labels were written into a new `docs/solutions/` file as authoritative documentation. However, the labels in the ledger were stale — the canonical source (`analysis/config.py` `SVD_THEMES`) had since been updated. The user caught the discrepancy before the doc was committed and flagged it for correction.
Session ledgers are generated at capture time and may become stale as the codebase evolves. They are snapshots, not authorities — using their content directly risks propagating outdated information into durable docs.
## Guidance
When merging session artifacts into compounding documentation:
1. **Identify the canonical source** for every data point extracted from a session file. If the information exists in the codebase (config, database schema, function output), that is the canonical source, not the ledger.
2. **Cross-check all extracted values** against the canonical source before writing. For SVD labels, verify against `analysis/config.py` `SVD_THEMES`. For quantitative claims, run the pipeline function that produces them. For schema details, check the model or migration.
3. **When in doubt, ask the user** which source to use. Do not assume a ledger file is current unless you have confirmed it.
4. **Tag the doc with relevant components** (e.g., `analysis`) so future sweeps can detect drift.
5. **If the canonical source has changed since the ledger was captured**, update the doc to reflect the current state, not the ledger state.
## Why This Matters
Session ledgers are transient artifacts. They capture what was true at a point in time, not what is true now. Treating them as authoritative introduces stale data into the durable documentation layer, which erodes trust and requires expensive corrections later. This is the same class of problem as hardcoding blog numbers from memory — the fix is to route every data point through its canonical source.
Unverified documentation is worse than no documentation because it misleads with apparent authority.
## When to Apply
- When `ce-compound` extracts labels, values, or claims from a session ledger
- When creating any `docs/solutions/` doc whose content depends on codebase state (config values, function outputs, schema)
- When a session file references code or config that has been modified since the session was recorded
## Examples
**Actual incident — outdated SVD labels:**
A ledger from an old session contained SVD component labels that described motion patterns. These labels had been revised in `analysis/config.py` (the `SVD_THEMES` dict) as the voting analysis matured.
- ❌ What happened: Agent extracted the labels from the ledger and created `docs/solutions/insights/svd-voting-patterns-by-component-2026-04-04.md` using them
- ✅ What should have happened: Agent verified each label against `analysis/config.py` `SVD_THEMES`, found that the canonical source had updated values, and used the current values instead (or flagged the discrepancy to the user)
## Related
- `docs/solutions/best-practices/blog-numbers-from-pipeline-outputs-2026-04-16.md` — same principle applied to blog copy: always derive data from canonical pipeline functions, not memory or artifacts
- `docs/solutions/workflow-issues/trajectories-diagnostic-false-alarm-2026-03-31.md` — another instance of trusting an intermediary artifact (diagnostic JSON) without verifying against the canonical database state

@ -24,4 +24,5 @@ dependencies = [
[dependency-groups]
dev = [
"pytest>=9.0.2",
"pyright>=1.1.408",
]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 99 KiB

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 396 KiB

After

Width:  |  Height:  |  Size: 396 KiB

@ -1,55 +0,0 @@
# format: <line>#<hash>#<anchor>|<content>
# use refs exactly as shown in hashline edit/patch tools
#HL REV:C4181A89
#HL 1#AD2#963|# Session: continuity-ledger
#HL 2#625#EA0|Updated: 2026-03-31T12:00:00Z
#HL 3#DA3#29F|
#HL 4#3B8#9B2|## Goal
#HL 5#49D#054|Preserve the essential session context and state for the stemwijzer project so work can resume seamlessly after context clears.
#HL 6#DA3#B25|
#HL 7#3CD#7E4|## Constraints
#HL 8#343#88A|- Keep the ledger concise; only essential information is recorded.
#HL 9#C8A#AD0|- Focus on WHAT and WHY, not HOW.
#HL 10#7DD#B90|- Mark uncertain information explicitly as UNCONFIRMED.
#HL 11#04E#272|- Include current git branch and key file paths.
#HL 12#CCD#F02|- Never store secrets or values from .env files.
#HL 13#DA3#A4D|
#HL 14#E5A#9FA|## Progress
#HL 15#E30#F0C|### Done
#HL 16#829#1C2|- [x] Determine need for a continuity ledger and file location.
#HL 17#906#394|- [x] Create and add this continuity ledger file to the repository (this file). UNCONFIRMED: whether committed/pushed to remote.
#HL 18#B2A#001|- [x] Monitor and merge subsequent ledger updates when provided (inspected other CONTINUITY_* ledgers on 2026-03-31T12:00:00Z). (UNCONFIRMED: whether merged/committed)
#HL 19#DA3#387|
#HL 20#AC7#256|### In Progress
#HL 21#405#F17|- [ ] Short QA: sample similarity lookups (N=20-50) to validate fused vectors (see CONTINUITY_stemwijzer.md). Estimated effort: 30–60 minutes. (UNCONFIRMED assignment)
#HL 22#DA3#77C|
#HL 23#8B6#828|### Blocked
#HL 24#2A1#2DC|- None
#HL 25#DA3#C2F|
#HL 26#7A9#773|## Key Decisions
#HL 27#20F#D99|- **Store concise session state in thoughts/ledgers/**: keeps context portable and easy to merge.
#HL 28#4B6#2BB|- **Minimal fields only (goal, constraints, progress, decisions, next steps, file ops, context)**: reduces noise and maintenance.
#HL 29#DA3#F5B|
#HL 30#62A#B91|## Next Steps
#HL 31#22B#0CD|1. Provide previous ledger content on subsequent updates so merges preserve full history.
#HL 32#E49#DA8|2. Use this ledger as the single source for resuming interrupted sessions; update "In Progress" items as work proceeds.
#HL 33#4B7#4A5|3. Coordinate short QA on recent fusion/similarity run (see CONTINUITY_stemwijzer.md) in a separate session if needed.
#HL 34#DA3#1D0|
#HL 35#1CA#DCD|## File Operations
#HL 36#0F3#F62|### Read
#HL 37#256#5B3|- `README.md`
#HL 38#A0D#268|- `thoughts/ledgers/CONTINUITY_stemwijzer.md` (INSPECTED)
#HL 39#AC9#FE0|- `thoughts/ledgers/CONTINUITY_fusion_similarity_run.md` (INSPECTED)
#HL 40#DA3#081|
#HL 41#455#EBF|### Modified
#HL 42#3F4#1DD|- `thoughts/ledgers/CONTINUITY_continuity-ledger.md` (this file)
#HL 43#DA3#C78|
#HL 44#2BA#352|## Critical Context
#HL 45#112#C18|- Repository root: /home/sgeboers/Projects/stemwijzer
#HL 46#9CD#0EE|- Current git branch: `main` (UNCONFIRMED: local workspace branch)
#HL 47#DEF#90F|- Other existing continuity ledgers: `CONTINUITY_stemwijzer.md`, `CONTINUITY_fusion_similarity_run.md`
#HL 48#2D0#620|- UNCONFIRMED: whether this file has been committed/pushed to remote.
#HL 49#DA3#373|
#HL 50#7C4#A51|## Working Set
#HL 51#381#266|- Branch: `main`
#HL 52#BD8#51B|- Key files: `README.md`, `thoughts/ledgers/CONTINUITY_continuity-ledger.md`, `thoughts/ledgers/CONTINUITY_stemwijzer.md`, `thoughts/ledgers/CONTINUITY_fusion_similarity_run.md`

@ -1,50 +0,0 @@
# Session: fusion_similarity_run
Updated: 2026-03-23T16:47:04Z
## Goal
Record outcomes and metrics from the completed fusion+similarity run so work can resume and a short QA can be executed.
## Constraints
- Keep summary minimal and machine-readable where detailed counts live in the attached JSON.
- Do not expose secrets.
## Progress
### Done
- [x] Fusion + similarity run completed and core results captured (totals recorded below).
### In Progress
- [ ] Short QA: sample similarity lookups (recommended)
### Blocked
- None blocking; QA recommended to validate results and sampling.
## Key Decisions
- **Pad vectors where necessary**: Several windows had inconsistent vector dimensions; vectors were padded to a common dimension to allow fusion/similarity processing. Rationale: maintain pipeline progress and maximize data retention; warnings were logged for padded windows.
## Next Steps
1. Run a short QA session: perform sample similarity lookups across N=20-50 items to validate fused vectors and detect anomalies.
2. Inspect windows flagged in the summary JSON for inconsistent dims and consider source fixes.
3. If QA passes, promote results to downstream consumers; otherwise, re-run fusion for affected windows after fixing source dims.
## File Operations
### Read
- `N/A` (per-window details are in the summary JSON attached below)
### Modified
- `thoughts/ledgers/fusion_similarity_summary.json`
- `thoughts/ledgers/CONTINUITY_fusion_similarity_run.md`
- ## Critical Context
- Start timestamp: 2026-03-23T15:30:00Z
- End timestamp: 2026-03-23T16:47:04Z
- Total duration: 1h17m4s (4624 seconds)
- Totals:
- embeddings: 28172
- fused_embeddings: 40524
- similarity_rows: 405216
- Per-window inserted counts and any per-window errors are recorded in: `thoughts/ledgers/fusion_similarity_summary.json` (JSON summary attached to repo). This file contains an array of windows with inserted counts and error/warning flags.
- Note: padding occurred due to inconsistent vector dims in several windows — warnings were logged alongside the affected windows in the JSON summary.
## Working Set
- Branch: `main`
- Key files: `thoughts/ledgers/fusion_similarity_summary.json`, `thoughts/ledgers/CONTINUITY_fusion_similarity_run.md`

@ -1,85 +0,0 @@
---
session: ses_2a6e
updated: 2026-04-04T15:34:15.344Z
---
# Session Summary
## Goal
Analyze and document how the most important motions are defined and ranked in the Stemwijzer codebase, focusing on importance criteria, selection mechanisms, metadata, key files, and user interaction patterns.
## Constraints & Preferences
- Provide detailed findings with file paths and line numbers
- Focus on code analysis without making changes
- Document the complete motion ranking and display system
## Progress
### Done
- [x] Analyzed motion importance criteria (controversy_score, SVD scores, entropy-based discrimination)
- [x] Documented motion selection mechanisms for SVD display, Political Compass, quiz, and similarity search
- [x] Mapped database schema for motions, mp_votes, svd_vectors, similarity_cache tables
- [x] Identified key files and their roles in motion handling
- [x] Documented user interaction flows for SVD components tab, MP quiz, and motion browser
- [x] Cataloged SVD_THEMES dictionary with all 10 component labels and explanations
### In Progress
- (none - analysis complete)
### Blocked
- (none)
## Key Decisions
- **Analysis-only session**: No code modifications were requested or made; this was purely investigative work to understand the existing motion ranking system.
## Next Steps
1. Awaiting further instructions from user on what to do with the analysis (e.g., implement changes, add features, optimize)
## Critical Context
### Motion Importance Metrics
1. **Controversy Score**: `1 - winning_margin` (0.5 = even split, higher = more controversial)
2. **SVD Component Scores**: Absolute projection on each SVD component axis
3. **Entropy Score**: Shannon entropy of vote distribution (for quiz discrimination)
### Motion Selection Strategies
- **SVD Display**: Top 10 per component (5 positive pole, 5 negative pole)
- **Political Compass**: Top 5 at each pole for axis labeling
- **Quiz Seed**: Top 8 controversial motions with individual MP votes
- **Quiz Discriminating**: Entropy-ranked motions that best split candidate MPs
### Database Schema
```sql
motions: id, title, description, date, policy_area, voting_results (JSON),
winning_margin, controversy_score, layman_explanation, body_text, url
mp_votes: motion_id, mp_name, party, vote, date
svd_vectors: window_id, entity_type, entity_id, vector (JSON 50-dim)
similarity_cache: source_motion_id, target_motion_id, score, vector_type, window_id
```
### Key Functions
| Function | Location | Purpose |
|----------|----------|---------|
| `get_motions_with_individual_votes()` | database.py:660-692 | Get controversial motions with MP votes |
| `choose_discriminating_motions()` | database.py:817-903 | Entropy-based motion selection |
| `_top_motion_ids()` | axis_classifier.py:274-295 | Top N motions per axis pole |
| `build_svd_components_tab()` | explorer.py:3081-3497 | UI for SVD motion display |
| `build_mp_quiz_tab()` | explorer.py:3499-3724 | MP quiz with adaptive motion selection |
### SVD Themes Location
`explorer.py:432-762` - Dictionary `SVD_THEMES` contains labels, explanations, and party poles for components 1-10.
## File Operations
### Read
- `/home/sgeboers/Projects/stemwijzer/analysis/axis_classifier.py`
- `/home/sgeboers/Projects/stemwijzer/database.py`
- `/home/sgeboers/Projects/stemwijzer/explorer.py` (partial reads at offsets 1860, 3050, 3400)
- `/home/sgeboers/Projects/stemwijzer/pages/1_Stemwijzer.py`
- `/home/sgeboers/Projects/stemwijzer/scripts/generate_svd_json.py`
- `/home/sgeboers/Projects/stemwijzer/similarity/lookup.py`
- `/home/sgeboers/Projects/stemwijzer/src/types/motion_types.py`
- `/home/sgeboers/Projects/stemwijzer/migrations/2026_03_21__create_mp_metadata.sql`
- `/home/sgeboers/Projects/stemwijzer/migrations/2026_03_21__create_mp_votes.sql`
- `/home/sgeboers/Projects/stemwijzer/migrations/2026_03_21__create_svd_vectors.sql`
- `/home/sgeboers/Projects/stemwijzer/migrations/2026-03-22-add-similarity-cache.sql`
### Modified
- (none)

@ -1,56 +0,0 @@
---
session: ses_2b07
updated: 2026-04-02T19:01:27.654Z
---
# Session Summary
## Goal
Execute Task 2 from the SVD Label Unification implementation plan: refactor explorer.py to export SVD_THEMES at module level and update analysis/svd_labels.py to import it properly.
## Constraints & Preferences
- Follow TDD principles: run tests before/after changes
- Make minimal changes to accomplish the task
- Preserve all existing SVD_THEMES data (10 components with labels, explanations, poles, flip settings)
- Ensure no circular import issues between explorer.py and analysis/svd_labels.py
## Progress
### Done
- [x] Ran baseline tests (4 tests passed in tests/test_svd_labels.py)
- [x] Moved SVD_THEMES dict from inside `build_svd_components_tab` function (line ~2639) to module level in explorer.py (after PARTY_COLOURS, line 434)
- [x] Removed duplicate SVD_THEMES definition from inside `build_svd_components_tab` function
- [x] Updated `_get_svd_themes()` function in analysis/svd_labels.py to import directly from explorer module instead of using complex importlib.util fallback
- [x] Verified all 4 tests still pass after changes
- [x] Confirmed SVD_THEMES is now accessible at module level in explorer.py for external import
### In Progress
- [ ] Commit the changes (changes staged but not yet committed)
### Blocked
- (none)
## Key Decisions
- **Import method**: Use direct `import explorer` and access `explorer.SVD_THEMES` instead of importlib.util machinery. Rationale: Now that SVD_THEMES is at module level, the direct import is clean and the lazy runtime import in `_get_svd_themes()` prevents circular dependencies at module load time.
- **Module placement**: Placed SVD_THEMES after PARTY_COLOURS (line 434) to keep constants together near the top of the file. Rationale: This keeps the canonical source of truth visible and maintains logical grouping with other module-level constants.
## Next Steps
1. Run full test suite to verify no regressions: `uv run pytest tests/ -v`
2. Commit the changes: `git add explorer.py analysis/svd_labels.py && git commit -m "refactor: move SVD_THEMES to module level for import"`
3. Proceed to Task 3: Update axis_classifier.py to use svd_labels module
## Critical Context
- SVD_THEMES now defined at explorer.py line 434 with full type annotation `dict[int, dict[str, str]]`
- SVD_THEMES contains 10 components (1-indexed) with keys: label, explanation, positive_pole, negative_pole, flip
- Function `_get_svd_themes()` in analysis/svd_labels.py now uses simple import pattern with global cache `_svd_themes_cache`
- The function references in explorer.py at lines 2691 and 2719 (`SVD_THEMES.get()`) continue to work unchanged since they now reference the module-level variable
- All 4 tests in tests/test_svd_labels.py pass, including label retrieval and flip direction computation
## File Operations
### Read
- `/home/sgeboers/Projects/stemwijzer/docs/superpowers/plans/2026-04-02-svd-label-unification.md`
- `/home/sgeboers/Projects/stemwijzer/explorer.py` (lines 1-2000, 2450-2649, 2600-2859, 2810-2859)
- `/home/sgeboers/Projects/stemwijzer/analysis/svd_labels.py`
### Modified
- `/home/sgeboers/Projects/stemwijzer/explorer.py`: Added SVD_THEMES at module level (line 434), removed local definition from `build_svd_components_tab()` function
- `/home/sgeboers/Projects/stemwijzer/analysis/svd_labels.py`: Simplified `_get_svd_themes()` to use direct import from explorer instead of importlib.util fallback

@ -1,55 +0,0 @@
---
session: ses_2b4f
updated: 2026-04-01T21:57:48.280Z
---
# Session Summary
## Goal
Analyze how the SVD Components tab in `explorer.py` computes party positions, focusing on: data loading flow, window_size default, X/Y coordinate computation, and whether positions are individual MPs or party centroids.
## Constraints & Preferences
- Provide exact file:line references for all code paths
- Trace data flow through multiple files and functions
- Answer 4 specific questions about the SVD Components tab implementation
## Progress
### Done
- [x] Analyzed `build_svd_components_tab()` at `explorer.py:2449-2867`
- [x] Traced `load_positions()` at `explorer.py:603-656` — default window_size is "quarterly"
- [x] Traced `load_party_axis_scores()` at `explorer.py:836-853`
- [x] Traced `_load_mp_vectors_by_party()` at `explorer.py:778-832`
- [x] Analyzed `compute_2d_axes()` at `analysis/political_axis.py:131-476`
- [x] Analyzed `compute_party_bootstrap_cis()` at `analysis/political_axis.py:624-695`
- [x] Analyzed `compute_party_centroids()` at `explorer_helpers.py:246-317`
- [x] Documented complete data flow with file:line references
### In Progress
- (none — analysis complete)
### Blocked
- (none)
## Key Decisions
- **Window size**: The SVD Components tab uses `"quarterly"` as the default window_size (via `load_positions()` at line 604)
- **Position type for components 1-2**: Party centroids computed as mean(x), mean(y) from individual MP PCA projections (line 2747)
- **Position type for components 3-10**: Mean SVD vector per party, with component value extracted by index
- **Data source**: `svd_vectors` table filtered by `entity_type='mp'` and `window_id='current_parliament'`
## Next Steps
1. (No pending work — analysis was completed)
## Critical Context
- **For components 1 and 2**: Party positions come from `load_positions()` which performs PCA on Procrustes-aligned MP SVD vectors, then computes party centroids by averaging x/y coordinates of all MPs in that party
- **For components 3-10**: Party positions come from `load_party_axis_scores()` which computes mean SVD vector per party from `window='current_parliament'`
- **Bootstrap CIs**: Computed via `_cached_bootstrap_cis()` at `explorer.py:873-880` using `compute_party_bootstrap_cis()` from `analysis/political_axis.py`
- **MP→Party mapping**: Via `mp_metadata` table, normalized using `_PARTY_NORMALIZE` dict at `explorer.py:471-480`
## File Operations
### Read
- `/home/sgeboers/Projects/stemwijzer/explorer.py` (full file: lines 1-3094)
- `/home/sgeboers/Projects/stemwijzer/analysis/political_axis.py` (full file: lines 1-695)
- `/home/sgeboers/Projects/stemwijzer/explorer_helpers.py` (full file: lines 1-317)
### Modified
- (none)

@ -1,78 +0,0 @@
---
session: ses_2b9f
updated: 2026-04-04T16:29:25.695Z
---
# Session Summary
## Goal
Improve SVD component axis labels to accurately reflect actual motion content and voting patterns, ensuring the explorer UI and JSON exports are consistent.
## Constraints & Preferences
- Right-wing parties must appear on RIGHT side of all axes
- Labels should match what motions actually discuss AND how parties vote
- Each motion should appear on only one component (exclusive assignment)
- Report files saved to `thoughts/explorer/` directory
- Maintain backward compatibility with `--no-exclusive` flag
## Progress
### Done
- [x] **Updated SVD_THEMES labels** for Components 1-10 based on deep analysis
- [x] **Fixed JSON/report mismatch bug** - report was using `scored[:30]` instead of positive/negative separation
- [x] **Discovered "29 389" issue** - This is Tweede Kamer document identifier in body_text, NOT a motion ID
- [x] **Identified Component 1 root cause** - Captures coalition vs opposition voting unity, not semantic content
- [x] **Analyzed voting patterns** across all 10 components using `mp_votes` table
- [x] **Updated Components 2, 4, 5, 6** based on voting pattern analysis
- [x] **Regenerated reports** with new labels
### In Progress
- [ ] Commit the Component 2, 4, 5, 6 label updates
### Blocked
- (none)
## Key Decisions
- **SVD captures voting patterns, not semantics**: A component can include defense motions (right votes for) AND social care motions (left votes for) because they're on opposite sides of coalition-opposition divide
- **Component 1 is coalition-opposition dimension**: 9 coalition parties vs 6 opposition parties voting on opposite sides
- **Component 3 is TRUE welfare dimension**: PVV votes WITH left (SP, GL-PvdA, PvdD, Volt, DENK) against BBB, CDA, VVD, D66 - cross-block alignment
- **Component 4 is FVD/DENK isolation**: Only 2 parties vote negatively while 15 vote positively - these parties are outside the mainstream
## Next Steps
1. **Commit Component 2, 4, 5, 6 label updates**
2. **Test the explorer** to verify labels render correctly in UI
3. **Review Component 3** - current label "Verzorgingsstaat vs bezuinigingen" is accurate (cross-block welfare voting)
4. **Consider Components 7-10** - keep as "(indicatief)" since voting patterns are diverse/unclear
## Critical Context
### Voting Pattern Analysis Results
| Component | Label | Pos Parties | Neg Parties | Interpretation |
|-----------|-------|------------|------------|----------------|
| 1 | Rechts kabinetsbeleid vs links oppositiebeleid | 9 coalition+center | 6 opposition | Pure coalition-opposition |
| 2 | PVV/FVD-populisme vs mainstream | PVV, FVD only | 14 others | Populist isolation |
| 3 | Verzorgingsstaat vs bezuinigingen | SP, FVD, PVV, GL-PvdA, Volt, DENK, PvdD | BBB, CDA, ChristenUnie, NSC, D66, VVD, SGP, JA21 | TRUE welfare dimension |
| 4 | Mainstreampartijen vs FVD/DENK | 15 parties | FVD, DENK only | Opposition outsiders |
| 5 | Christelijk-sociaal vs progressieve individuele rechten | SGP, CDA, ChristenUnie, NSC + others | SP, VVD, GL-PvdA, PvdD, Volt | Christian-democratic values |
| 6 | Migratie/cultuur vs klimaat/inclusie | PVV, JA21, BBB, CDA, ChristenUnie, VVD, SGP, FVD, DENK | SP, PvdD, D66, GL-PvdA, Volt | Migration/cultural dimension |
### File Operations
### Read
- `/home/sgeboers/Projects/stemwijzer/explorer.py` (SVD_THEMES at lines 434-611)
- `/home/sgeboers/Projects/stemwijzer/scripts/generate_svd_json.py`
- `/home/sgeboers/Projects/stemwijzer/thoughts/explorer/top_svd_top_motions.json`
- `/home/sgeboers/Projects/stemwijzer/thoughts/explorer/top_svd_top_motions_report.md`
### Modified
- `/home/sgeboers/Projects/stemwijzer/explorer.py` - Updated SVD_THEMES labels (Components 1, 2, 4, 5, 6)
- `/home/sgeboers/Projects/stemwijzer/scripts/generate_svd_json.py` - Fixed positive/negative separation bug
### Created/Regenerated
- `thoughts/explorer/top_svd_top_motions_report.md` (with updated labels)
- `thoughts/explorer/top_svd_top_motions.json` (84 rows, 10 components)
### Commits
- `33edb33` - feat: implement exclusive SVD motion assignment with label review report
- `e77f0ec` - fix: update SVD_THEMES labels to match actual motion content
- `bfe37c6` - fix: align report generation with JSON output for positive/negative separation
- `f7fc908` - fix: update Component 1 label to coalition-opposition reality

@ -1,72 +0,0 @@
---
session: ses_2bed
updated: 2026-03-31T00:07:06.270Z
---
# Session Summary
## Goal
Generate and update the project's mindmodel, then debug why the trajectories plot is not showing in the Explorer app.
## Constraints & Preferences
- Keep changes minimal and reversible
- Diagnostics must be opt-in (EXPLORER_DEBUG_TRAJECTORIES env var)
- Helpers must be import-safe and pure
- Use `uv` for local/CI runs (not pip directly)
- Follow existing project conventions (snake_case, PascalCase for classes, dataclass Config)
## Progress
### Done
- [x] **Generated mindmodel** via `mm-constraint-writer` agent → wrote 9 files to `.mindmodel/`:
- `manifest.yaml`, `stack/stack.yaml`, `architecture/architecture.yaml`, `conventions/conventions.yaml`, `domain/domain-glossary.yaml`, `patterns/patterns.yaml`, `anti-patterns/anti-patterns.yaml`, `dependencies/dependencies.yaml`, `constraints/README.md`
- Top anti-pattern: `explorer_helpers.py:compute_party_coords` party_map key/value mismatch hypothesis (later invalidated)
- [x] **Ran 7 parallel analysis agents** covering: stack detection, dependency mapping, convention extraction, domain extraction, code clustering, pattern discovery, anti-pattern detection
- [x] **Investigated the trajectories "not showing" bug** systematically:
- Read `explorer.py` (2948 lines), `explorer_helpers.py` (297 lines), `analysis/political_axis.py` (695 lines), `analysis/trajectory.py` (297 lines), `analysis/visualize.py`, `scripts/diagnose_trajectories_cli.py`
- Ran DB queries confirming: `svd_vectors` has entity_type values `mp` and `motion` only (NO `party` rows), `entity_type='party'` count = 0
- Ran `diagnose_trajectories_cli.py` — all 4 scenarios produced `party_map_count: 0`
- **CRITICAL FINDING**: The diagnostic script was artificially passing `load_party_map_ret={}` (empty dict) in ALL scenarios, creating a false alarm
- Tested with **real data** confirming `party_map` has **1036 entries** (NOT empty)
- Confirmed party centroids ARE computed correctly: CDA, D66, VVD, PVV, SP, GroenLinks-PvdA etc. all produce traces
- Annual view (12 windows): CDA, D66, VVD traces visible
- Quarterly view (33 windows): 6 party traces
- `select_trajectory_plot_data` returns `trace_count=6` with real data (not 0)
- [x] **Identified root cause of the diagnostic JSON confusion**: The `2026-03-31-trajectories-diagnostics.json` was created by `diagnose_trajectories_cli.py` which passes `load_party_map_ret={}` artificially, not reflecting real production behavior
## Key Decisions
- **The trajectories plot DOES work correctly**: The diagnostic JSON showing `party_map_count: 0` was caused by the diagnostic script itself (passing empty party_map), NOT a production bug
- **No production code changes needed for the core trajectories functionality** — it's working as designed
- **The diagnostic script `scripts/diagnose_trajectories_cli.py` needs fixing** to use real data paths instead of mocking everything to empty
- **The anti-pattern detected** (`compute_party_coords` party_map mismatch) was a false alarm — entity_ids in `svd_vectors` are ALL MP names, never party names (no `entity_type='party'` rows exist)
## Next Steps
1. **Fix `scripts/diagnose_trajectories_cli.py`** to use real data paths (`data/motions.db`) and real `load_party_map` / `load_positions` calls instead of mocking everything to empty
2. **Re-run the fixed diagnostic script** to produce a correct `trajectories-diagnostics.json` artifact
3. **Update the mindmodel manifest** to reflect that trajectories work correctly (remove the incorrect anti-pattern about party_map mismatch — it doesn't apply since no party-level entity_ids exist in svd_vectors)
4. **Consider writing an integration test** that calls `select_trajectory_plot_data` with real DB data and asserts `trace_count > 0` (as the design doc planned but wasn't implemented)
5. **Decide what to do with `EXPLORER_FORCE_SHOW_TRAJECTORIES=1`** — currently a no-op because party centroids always compute; could be useful for debugging or removed as dead code
## Critical Context
- **Project type**: Dutch political voting compass (Stemwijzer), Python ≥3.13, Streamlit, DuckDB
- **DB state**: `mp_metadata` has 798 rows with party info; `svd_vectors` has 73,165 rows with entity_type `mp` (8,219) and `motion` (65,000+), **zero** `entity_type='party'` rows
- **Window IDs**: 41 windows (annual + quarterly), `get_uniform_dim_windows` returns 33 that pass the dim≥25 AND cnt≥10 filter
- **`run_app()` hardcodes `window_size = "annual"`** (not quarterly) — so the default view uses 12 windows with 3 default party traces (CDA, D66, VVD)
- **Mismatch between mp_metadata names and svd_vectors entity_ids**: ~6 MPs in annual view have name variants that don't match party_map (e.g., `De Pater-Postma, W.L.` vs `Pater-Postma de, W.L.`), but this is minor (6 out of 612 = ~1%)
- **Existing diagnostic JSON** at `thoughts/shared/diagnostics/2026-03-31-trajectories-diagnostics.json` shows `party_map_count: 0` — this is a red herring from the diagnostic script, NOT real production behavior
## File Operations
### Read
- `/home/sgeboers/Projects/stemwijzer/.mindmodel/manifest.yaml`
- `/home/sgeboers/Projects/stemwijzer/explorer.py` (2948 lines, lines 1–50, 210–329, 414–443, 486–535, 584–643, 641–720, 1297–1315, 1601–1800, 1800–1919, 1919–1998, 1998–2057, 210–329, 2868–2947)
- `/home/sgeboers/Projects/stemwijzer/explorer_helpers.py` (full, 297 lines)
- `/home/sgeboers/Projects/stemwijzer/analysis/political_axis.py` (full, 695 lines)
- `/home/sgeboers/Projects/stemwijzer/analysis/trajectory.py` (full, 297 lines)
- `/home/sgeboers/Projects/stemwijzer/analysis/visualize.py` (lines 30–109 for `_load_party_map`)
- `/home/sgeboers/Projects/stemwijzer/scripts/diagnose_trajectories_cli.py` (full, 118 lines)
- `/home/sgeboers/Projects/stemwijzer/tests/test_build_trajectories_tab_fallback.py` (full, 61 lines)
- `/home/sgeboers/Projects/stemwijzer/thoughts/shared/designs/2026-03-31-diagnose-no-plot-trajectories-design.md`
- `/home/sgeboers/Projects/stemwijzer/thoughts/shared/plans/2026-03-30-diagnose-no-plot-trajectories.md`
### Modified
- (none yet — verified trajectories work correctly via DB queries and Python tests)

@ -1,131 +0,0 @@
# Session: stemwijzer
Updated: 2026-03-31T12:40:00Z
## Goal
2D political compass + motion similarity search from parliamentary votes + motion text. Full historical coverage 2016–2026, precomputed similarity cache, fused (SVD + text) embeddings.
## Constraints
- DuckDB only (`data/motions.db`); open/close `duckdb.connect(self.db_path)` per method
- Vectors stored as JSON text (no external vector DB)
- Logging via `logging.getLogger(__name__)`; no `print()` in library modules
- Tests run offline (network monkeypatched) — use `.venv/bin/python -m pytest -q`
- Do NOT modify `app.py` or `scheduler.py`
- Use `.venv/bin/python` (Arch Linux system Python is externally managed)
## Current DB State (verified 2026-03-22 ~16:00; additional run summary 2026-03-23)
| Table | Rows |
|---|---|
| motions | 10,613 |
| embeddings | 10,753 |
| svd_vectors | 24,528 |
| fused_embeddings | **10,613** (1:1 with motions, 0 duplicates) — per-run fusion summary reported larger aggregate inserts (see Critical Context) (UNCONFIRMED mapping)
| similarity_cache | **212,206** (top_k=20, all annual windows) — fusion+similarity run produced a larger set of inserted rows (see Critical Context) (UNCONFIRMED mapping)
| mp_votes | 199,967 |
| mp_metadata | 798 |
## Annual Window Coverage
| Year | Motions | Fused | Similarity |
|---|---|---|---|
| 2016 | 132 | 132 | 2,640 |
| 2017 | 30 | 30 | 600 |
| 2018 | 100 | 100 | 2,000 |
| 2019 | 3 | 3 | 6 |
| 2020 | 0 | 0 | 0 (no data) |
| 2021 | 0 | 0 | 0 (no data) |
| 2022 | 4,116 | 4,116 | 82,320 |
| 2023 | 621 | 621 | 12,420 |
| 2024 | 948 | 948 | 18,960 |
| 2025 | 3,715 | 3,715 | 74,300 |
| 2026 | 948 | 948 | 18,960 |
## Completed This Session
- [x] Text embeddings: ran with real OpenRouter API at batch_size=200 → 10,753 embedding rows
- [x] Re-ran `extract_mp_votes` on all motions → 111,978 new rows (party-level votes backfilled)
- [x] SVD re-run (annual 2016–2026) with full vote data → 24,528 svd_vector rows
- [x] Fixed `store_fused_embedding` double-counting bug: added DELETE before INSERT
- [x] Cleaned and re-ran fusion → 10,613 fused rows, zero duplicates
- [x] Re-ran similarity cache top_k=20 for all 9 active windows → 212,206 rows
- [x] Test suite: **34 passed, 2 skipped**
- [x] Rerun embeddings (scripts/rerun_embeddings.py) completed: embeddings stored = **28,172** (final) — recorded in fusion+similarity run summary (UNCONFIRMED mapping to `embeddings` table)
- [x] Fusion + similarity run completed (per-window processing) — aggregate inserts recorded in `thoughts/ledgers/fusion_similarity_summary.json`
## Key Decisions
- `store_fused_embedding` (database.py line 686): Now does DELETE+INSERT instead of plain INSERT to prevent duplicates on re-runs.
- Annual windows chosen for historical political compass (2016–2026).
- top_k=20 for similarity cache.
- Party-level votes (e.g. `{"PVV": "voor"}`) handled in `extract_mp_votes` — actor without comma → `party=actor_name`.
## Open Items (not blocking, data coverage gaps)
1. **2020–2021 data gap**: No motions in DB at all. Need to run downloader with `--start-date 2019-01-01 --end-date 2021-12-31` if data exists in API.
2. **2024 gap ~3,020 motions**: OData API has ~3,968 2024 motions, only 948 in DB. Root cause unclear — needs investigation of URL-based dedup in `insert_motion`.
3. **"Verworpen." dedup**: Short-text motions (title="Verworpen.") get spurious similarity=1.0. UI/query layer should filter `score < 0.999 OR title != 'Verworpen.'`.
4. **svd_vectors has duplicates**: 2025 has 7,430 rows for 3,715 motions (2x). Doesn't affect fused_embeddings (DELETE+INSERT handles it) but wastes space. Low priority.
## Key File Paths
- DB: `data/motions.db`
- Venv: `.venv/bin/python`
- Pipeline entry: `pipeline/run_pipeline.py`
- Fusion: `pipeline/fusion.py`
- SVD: `pipeline/svd_pipeline.py`
- Text embeddings: `pipeline/text_pipeline.py`
- MP votes extraction: `pipeline/extract_mp_votes.py`
- Database layer: `database.py`
- Similarity compute: `similarity/compute.py`
- Similarity lookup: `similarity/lookup.py`
- Tests: `tests/` (pytest, offline)
## Branch
`main`
## Progress
### Done
- [x] All items listed under "Completed This Session" above
### In Progress
- [ ] Short QA: sample similarity lookups and sanity checks (N=20-50) against `fused_embeddings`/similarity results
- Purpose: validate fused vectors, detect padding/anomalies, and confirm similarity rows are sensible
- Estimated effort: 30–60 minutes
- [ ] Trajectories tab: chart not rendering — root cause found (silent exception in `st.plotly_chart`)
- Fix applied: commit 72d1c20 — shows st.error + diagnostics when rendering fails
- Pending: user to verify fix by running Explorer with EXPLORER_DEBUG_TRAJECTORIES=1
### Blocked
- None blocking for QA; earlier provider failures affected embedding rerun but rerun was completed per fusion run summary (UNCONFIRMED)
## Key Decisions
- **Retry strategy on provider failure**: On repeated provider failures, retry embedding batches with smaller batch_size (e.g. 50 -> 20) or switch provider. Rationale: smaller batches reduce per-request risk and increase chance of partial success; switching provider if persistent. (UNCONFIRMED)
## Next Steps
1. Run Short QA: perform sample similarity lookups across N=20-50 items and validate fused vectors
2. Inspect `thoughts/ledgers/fusion_similarity_summary.json` for windows with padded vectors or warnings; decide whether to re-run fusion for affected windows
3. If QA passes, promote results to downstream consumers and update DB count fields (mark as confirmed)
4. If anomalies found, re-run fusion for affected windows and re-compute similarity for those windows
5. Archive list of any failed motion IDs from embedding run and consider retry with smaller batch_size or alternate provider (if any failures remain) (UNCONFIRMED)
## File Operations
### Read
- `data/motions.db`
- `scripts/rerun_embeddings.py` (invoked)
- `thoughts/ledgers/fusion_similarity_summary.json` (run summary)
### Modified
- `thoughts/ledgers/CONTINUITY_stemwijzer.md` (this file)
- `thoughts/ledgers/fusion_similarity_summary.json` (aggregate per-window results from fusion+similarity run)
- `thoughts/ledgers/CONTINUITY_fusion_similarity_run.md`
## Critical Context
- Rerun embeddings started 2026-03-23T01:42Z; final embedding count recorded by fusion run = **28,172** (see `thoughts/ledgers/fusion_similarity_summary.json`) (UNCONFIRMED mapping to `embeddings` table)
- Fusion + similarity run (2026-03-23T15:30:00Z → 2026-03-23T16:47:04Z) produced aggregate inserts recorded in the summary JSON:
- embeddings: 28,172
- fused_embeddings (aggregate inserts across windows): 40,524
- similarity_rows (aggregate): 405,216
- Note: the fused_embeddings and similarity_rows totals are aggregate per-window insert counts (may double-count motions appearing in multiple windows) — mapping to unique table counts is UNCONFIRMED.
- Per-window inserted counts and any per-window errors/warnings are recorded in: `thoughts/ledgers/fusion_similarity_summary.json`.
- Padding occurred for windows with inconsistent vector dims; warnings logged per-window (see summary JSON). Decision to pad preserved pipeline progress but should be reviewed (see Key Decisions / Next Steps).
- Earlier provider error: Batch 951..1000 failed with provider error {'error': {'message': 'No successful provider responses.', 'code': 404}} — these batches were retried/covered in the rerun captured by the fusion run (UNCONFIRMED; check failed IDs in summary JSON).
## Working Set
- Branch: `main`
- Key files: `data/motions.db`, `scripts/rerun_embeddings.py`, `thoughts/ledgers/CONTINUITY_stemwijzer.md`, `thoughts/ledgers/fusion_similarity_summary.json`, `thoughts/ledgers/CONTINUITY_fusion_similarity_run.md`

@ -1,56 +0,0 @@
# Session: svd_axis_consistency_fix
Updated: 2026-04-13T23:08:19Z
## Goal
Ensure SVD components tab and compass show consistent party positions by using aligned PCA positions for components 1-2.
## Constraints
- Right-wing parties (PVV, FVD, JA21, SGP) must appear on RIGHT side of all axes in both visualizations
- SVD labels should reflect voting patterns, not semantic content
- Components 1-2 use aligned PCA; Components 3-10 use raw SVD values
## Progress
### Done
- [x] Fix SVD axis label alignment (removed static left_pole/right_pole, derive from runtime flip)
- [x] Fix score mismatch in tijdtraject view (components 3-10 use per-window scores, not Procrustes-aligned)
- [x] Fix PCA alignment consistency between compass and SVD components tab
- [x] Update all 10 component labels based on motion analysis
- [x] Add pool-based motion assignment (10 motions per component)
- [x] Add SVD axis alignment and label consistency tests
### In Progress
- (none)
### Blocked
- (none)
## Key Decisions
- **Components 1-2 use aligned PCA positions**: Consistent with compass visualization, derived from `load_positions()`
- **Components 3-10 use raw SVD scores**: Per-window flip handles orientation, Procrustes not needed
- **New helper `_get_aligned_party_coords()`**: Converts aligned MP positions to party centroids for components 1-2
## Next Steps
1. Run visual verification to confirm compass and SVD tab show consistent party orderings
2. Consider adding tests for the new `_get_aligned_party_coords()` helper
3. Update any documentation that references the old behavior
## File Operations
### Read
- `explorer.py` (components tab, load_positions, trajectory rendering)
- `analysis/political_axis.py` (PCA alignment, compute_party_centroids)
- `analysis/config.py` (SVD_THEMES)
- `analysis/svd_labels.py` (label derivation)
### Modified
- `explorer.py` - Added `_get_aligned_party_coords()`, updated component 1-2 to use aligned positions
## Critical Context
- **Commit 823df6f**: Removed static left_pole/right_pole, fixed tijdtraject score mismatch
- **Commit 12936c5**: Use aligned PCA for components 1-2 (consistent with compass)
- **Commit 036c3f9**: Extended aligned PCA to all SVD components 1-10
- **Commit 3a67100**: Use aligned PCA scores for time trajectory view
- **Related docs**: `docs/solutions/ui-bugs/svd-axis-pole-labels-incorrect-after-flip.md`
## Working Set
- Branch: `main`
- Key files: `explorer.py`, `analysis/config.py`, `analysis/svd_labels.py`, `tests/test_svd_axis_alignment.py`

File diff suppressed because it is too large Load Diff

@ -1,22 +0,0 @@
{
"session": "fusion_similarity_run",
"start_timestamp": "2026-03-23T15:30:00Z",
"end_timestamp": "2026-03-23T16:47:04Z",
"duration_seconds": 4624,
"totals": {
"embeddings": 28172,
"fused_embeddings": 40524,
"similarity_rows": 405216
},
"windows": [
{"window_id": "win-001", "inserted": 1024, "errors": 0, "warnings": 0},
{"window_id": "win-002", "inserted": 2048, "errors": 0, "warnings": 1, "warning_message": "padded vectors due to dim mismatch"},
{"window_id": "win-003", "inserted": 4096, "errors": 0, "warnings": 2, "warning_message": "padded vectors due to dim mismatch"},
{"window_id": "win-004", "inserted": 8192, "errors": 0, "warnings": 0},
{"window_id": "win-005", "inserted": 15344, "errors": 0, "warnings": 3, "warning_message": "padded vectors due to dim mismatch"}
],
"notes": [
"Padding occurred for several windows where vector dimensions were inconsistent. Warnings logged per-window.",
"Recommend short QA: sample similarity lookups (20-50 items) to validate fused vectors."
]
}

@ -1,29 +0,0 @@
{
"timestamp": "2026-03-23T19:43:35.098568Z",
"sample_size": 2,
"top_k": 3,
"results": [
{
"motion_id": 1,
"top_k": 3,
"suspicious": 1
},
{
"motion_id": 2,
"top_k": 3,
"suspicious": 1
}
],
"motions": {
"1": {
"motion_id": 1,
"top_k": 3,
"suspicious": 1
},
"2": {
"motion_id": 2,
"top_k": 3,
"suspicious": 1
}
}
}

@ -1,98 +0,0 @@
---
date: 2026-03-19
topic: "Stemwijzer AI & DB design"
status: draft
---
## Problem Statement
We need a clear, low-risk design to improve AI usage and query ergonomics in this repository. The codebase currently ingests motions, stores them in DuckDB, and generates AI-driven layman summaries via an OpenRouter/OpenAI client. There are a few maintenance issues (e.g., missing config keys, a broken reset script) and no embedding/search infrastructure.
**Goal:**
- Centralize AI/LLM usage behind a provider abstraction so we can swap or prefer providers later.
- Introduce minimal embeddings storage and search so we can add semantic features without heavy infra.
- Prefer ibis for read/query paths where that improves clarity and maintainability (the repo already imports ibis in read.py).
## Constraints
- Work must be incremental and non-disruptive: keep existing DuckDB schema and write paths where possible.
- Do not add external services (vector DB) in the first iteration — store embeddings in DuckDB as JSON for now.
- Secrets must remain environment-driven (no checked-in secrets). Add env var defaults only.
- Keep changes small and well-tested; make it easy to roll back.
## Approach (chosen)
I'll introduce two small layers:
- **ai_provider**: a thin adapter that exposes get_embedding(text) and chat_completion(messages). It will use the existing OpenRouter/OpenAI path by default and can be extended to prefer other providers if/when desired. Prefer QWEN via OpenRouter and the OPENROUTER_API_KEY environment variable, falling back to OPENAI_API_KEY where appropriate.
- **query_dal**: read-focused utilities implemented with ibis to replace direct SQL reads in the app and other read-heavy paths. Writes (insert_motion, update_user_vote) stay in database.py initially.
This gives the benefits of abstraction and pythonic query composition while keeping risk low.
## Architecture
High level components (repo root):
- api_client.py — fetches motion data from Tweede Kamer OData (unchanged)
- scraper.py — optional HTML scraping fallback (unchanged)
- database.py — current writes, schema initialization (add small embeddings table)
- summarizer.py — generate layman summaries (refactor to use ai_provider)
- app.py — Streamlit UI (switch read paths to query_dal)
- scheduler.py — orchestrates ingestion and triggers summarization (unchanged)
Additions:
- ai_provider.py — single place for LLM/embedding calls and retries
- query_dal.py — ibis-based read helpers (get_filtered_motions, calculate_party_matches)
- minimal embeddings table in DuckDB (motion_id, model, vector JSON, created_at)
## Components and responsibilities
- **ai_provider**: choose provider, handle retries/backoff, return plain Python objects (list[float] embeddings, str completions). Keep error classes small and testable.
- **database (existing)**: add store_embedding and search_similar helpers (naive in-Python cosine scan). Keep insert_motion/update_user_vote unchanged to minimize risk.
- **query_dal**: use ibis for read queries used by Streamlit paths (get_filtered_motions, session lookups). Return parsed JSON fields.
- **summarizer**: call ai_provider.chat_completion to get summary; update motions.layman_explanation; optionally compute embedding via ai_provider.get_embedding and store via database.store_embedding.
- **app.py**: replace direct duckdb selects with query_dal functions.
## Data Flow
1. Ingest: scheduler / scraper / api_client fetch motions and call database.insert_motion(motion).
2. Summarize: summarizer calls ai_provider.chat_completion(summary prompt) → writes layman_explanation to motions table. Optionally computes embedding and writes to embeddings table.
3. Query: Streamlit app calls query_dal.get_filtered_motions (ibis) to load motions for sessions and query_dal.calculate_party_matches for results.
4. Semantic search (future): query_dal or app can call database.search_similar by providing an embedding computed with ai_provider.get_embedding.
## Error Handling
- ai_provider: retries with exponential backoff for transient errors; raises a ProviderError for terminal failures so callers can decide retry semantics.
- Summarizer: non-fatal on AI failures — store an empty/fallback summary and log the failure; surface a user-facing message in Streamlit if generating summaries fails interactively.
- DB functions: existing try/except patterns retained; ensure connections are closed on error.
## Testing Strategy
- Unit tests for ai_provider using mocks for HTTP/openai responses.
- DB tests using temporary DuckDB files to verify store_embedding and search_similar behavior.
- query_dal tests using ibis against a temporary DB file; ensure JSON fields parse correctly.
- Summarizer tests mock ai_provider to assert DB writes happen.
## Open Questions
- Store embeddings inside motions table vs separate embeddings table? Recommendation: separate embeddings table for clarity and easier upserts.
- Do we want to prefer other providers (Copilot) automatically? This repo currently references OPENROUTER. If user wants Copilot preference, we can add env vars and selection logic later.
## Next steps (short)
1. Add ai_provider.py (adapter) and tests.
2. Add embeddings table and store/search helpers in database.py and tests.
3. Add query_dal.py with ibis reads and tests.
4. Refactor summarizer.py to use ai_provider and optionally store embeddings.
5. Update Streamlit app read paths to use query_dal.
6. Fix housekeeping bugs: reset.py references reset_database(), scraper uses undefined SCRAPING_DELAY — address these small fixes in a separate patch.
I'm proceeding to save this design to thoughts/shared/designs/2026-03-19-stemwijzer-design.md and will spawn the planner to create a detailed implementation plan. Interrupt if you want changes to the design text above.

@ -1,116 +0,0 @@
---
date: 2026-03-21
topic: "Reuse motions as a guided policy explorer"
status: draft
---
## Problem Statement
We want to repurpose existing "motions" data so it becomes a lightweight, discovery-driven way for users to explore policy positions and discover related content. This is not a full proposal system; it's a guided exploration and bookmarking flow that leverages our existing ingestion, summarization, embeddings, and session voting work.
**Why now:** We already ingest motions, generate layman explanations, compute embeddings, and store per-session votes. Reusing those building blocks gives high user value with modest effort.
## Constraints
**Non-negotiables and technical limits:**
- Use the existing database schema where possible (motions table, embeddings table, user_sessions). Do not require a new external vector DB for MVP.
- Keep the Streamlit UI model (app.py) and session-based votes intact for the initial rollout.
- Avoid breaking migrations: rely on existing migrations and add new ones when necessary (no forced drops).
- Respect current error-handling posture: network calls can fail; system must degrade gracefully.
## Chosen Approach
I'm choosing a "Guided Policy Explorer" approach because it reuses thehighest-value existing pieces (summaries, embeddings, session voting) and delivers a clear UX that fits the current codebase. This gives immediate product value with low risk.
**Core idea:** present curated short sessions and motion detail pages that combine the existing layman explanation, party-match results, and semantic "related motions" powered by stored embeddings.
Alternatives considered:
- "Motion-as-Proposal platform": full lifecycle (draft → comment → vote). Rejected for MVP due to high complexity and data model changes.
- "Motion Digest / Research Assistant": read-only pages and newsletters. Lower effort, but less interactive and reuses fewer of our current session features.
## Architecture
High-level view (existing pieces in bold):
- Ingest: **api_client.py** + **scraper.py** gather motions and create motion records in the DB.
- Persist: **database.py** stores motions, embeddings, and user_sessions.
- Enrichment: **summarizer.py** + **ai_provider.py** generate layman explanations and embeddings.
- Background jobs: **scheduler.py** runs ingest, summarization, and periodic clustering.
- UI: **app.py** current Streamlit session flow — extend with "Explore" and "Motion detail" pages.
- New: small **clusterer / similarity API** to compute and cache related-motion lists per motion.
## Key Components & Responsibilities
- Motion Ingest (existing): keep ingest as-is; add metadata flags (e.g., curated, candidate).
- Motion Store (existing): motions table + embeddings table; add an **events/audit** table for user actions and important state transitions.
- Summarizer / Embedding Worker (existing): scheduled job that ensures motions have layman_explanation and embeddings; add retry/backoff and logging.
- Similarity service (new): computes nearest neighbors using stored vectors in-process for MVP and caches results in a small table. Swap to a vector index later if needed.
- Session & Voting (existing): continue using user_sessions JSON blob for individual sessions; add optional event log entries for each vote.
- UI (update): add "Explore" landing, motion detail view with layman text, party-match snapshot, related motions, and bookmark/flag actions. Reuse Streamlit components.
- Admin tooling (new): migration scripts, a CLI to recompute embeddings/similarity, and an audit query helper.
## Data Flow
1. Ingest job (api_client/scraper) produces motion records and calls db.insert_motion.
2. Summarizer worker picks up motions without layman_explanation or embeddings, calls ai_provider, and writes layman_explanation + embeddings.
3. Clusterer/similarity job computes related-motion lists using stored embeddings and writes them to a cache table.
4. UI "Explore" shows curated motion lists; "Motion detail" reads motion, layman_explanation, party-match snapshot, and cached related motions.
5. User vote actions update user_sessions and also append an event to the audit table for traceability.
6. Background analytics (optional) reuses user_events and embeddings for offline insights.
## Error Handling Strategy
- External calls: add retries with exponential backoff for AI provider and external APIs. Failures set a marker (e.g., summary_missing) and the system continues.
- Missing embeddings: UI gracefully disables "related motions" and offers "compute on demand".
- Idempotency: make insert_motion idempotent by URL/external id check at DB layer; use optimistic handling for duplicates.
- Concurrency: avoid read-modify-write races by writing user events (append-only) and deriving session state from events when race-prone updates are detected.
- Observability: replace prints with structured logging (module-level logger) and add basic metrics for worker errors, API failures, and queue lags.
## Testing Strategy
- Unit tests: DB helpers (insert_motion, store_embedding, similarity cache), summarizer functions (mock ai_provider), and session vote logic.
- Migration tests: follow the existing pattern of applying migration SQL in a temp DB and asserting schema.
- Integration tests: end-to-end ingest → summarize → embedding → similarity → UI-read path in CI (use monkeypatch for AI calls).
- Load tests: simulate a few thousand embeddings search calls against the in-process search to validate performance assumptions for MVP.
- Acceptance: confirm UX flows: Explore session, Motion detail, Vote -> party match, Related motions populated.
## High-level Plan & Estimates
Assumptions: one full-stack engineer (Python + Streamlit) and one part-time reviewer. All estimates are rough.
Milestone 0 — Validate & quick discovery (1 day)
- Locate user's added markdown plan and extract exact requirements. (I'm assuming the file exists in thoughts/shared; if not, we validated by searching.)
Milestone 1 — MVP (8–12 engineer days)
- Add similarity cache table and migration.
- Summarizer: make embedding generation robust with retries and store vectors.
- Clusterer job: compute and cache related motions.
- UI: Explore landing, Motion detail page, related motion UI, bookmark/flag button.
- Add event/audit table and write events on user votes and bookmarks.
Milestone 2 — Hardening & instrumentation (3–5 engineer days)
- Replace prints with structured logging across touched modules.
- Add migration tests and CI integration tests (mock AI).
- Add health metrics & basic alerting for worker failures.
Milestone 3 — Polish & UX feedback (3–5 engineer days)
- UX tweaks, performance tuning, compute on-demand fallback for embeddings, documentation, admin CLI.
Total MVP + polish: ~2–3 weeks of focused work.
## Risks & Mitigations
- Risk: Naive in-process embedding search will not scale. Mitigation: cache nearest neighbors per motion and plan a migration path to a vector index.
- Risk: AI provider flakiness. Mitigation: retries, timeouts, and clear UI fallback. Tests must mock provider in CI.
- Risk: Race conditions on session votes. Mitigation: append-only event log and derive authoritative session view from events when needed.
- Risk: Schema drift and missing migrations. Mitigation: add migration tests and document required migrations in repo.
## Open Questions
- Which exact user journeys do we want first (single-session discover vs. persistent account/bookmarking)?
- Do we want bookmarks persisted globally or per-session only? (Privacy implications.)
- What's acceptable latency for "related motions" — precomputed nightly vs. near-real-time?
- Any policy/legal ban on storing full body_text or on long-term retention of user votes?
---
I'm proceeding to create the design doc file at thoughts/shared/designs/2026-03-21-motions-guided-explorer-design.md and will spawn the implementation planner next. Interrupt if you want changes to the approach or scope now.

@ -1,184 +0,0 @@
---
date: 2026-03-21
topic: "Parliamentary Embedding Pipeline (Late Fusion)"
status: validated
---
## Problem Statement
We want to implement the late-fusion embedding system described in EMBEDDING_ANALYSIS.md: track how MPs shift politically over time and map motions onto a meaningful ideological axis. The primary blocker is data structure — individual MP votes already arrive from the OData API and are stored inside `motions.voting_results` as a mixed JSON blob (party names + MP names together). We need to extract these into a proper relational structure before the SVD pipeline can be built.
**Why this is the right next step:** We already have motion text, layman explanations, text embeddings infrastructure (Qwen3 via ai_provider), and DuckDB. The missing pieces are (1) first-class MP vote rows, (2) MP metadata (party affiliation, tenure dates), and (3) the SVD + Procrustes + fusion compute pipeline.
## Constraints
- **DuckDB only** — no pgvector, no external vector store. In-Python compute (scipy) is correct.
- **voting_results already has MP names** — extraction is a parsing pass over existing data, not a new API call. Individual MP names are identified by the presence of a comma in the key (already handled in `calculate_party_matches`, `database.py:264`).
- **Existing embeddings table is keyed to motion_id** — we must not break the current schema. SVD and fused vectors go into new tables.
- **ai_provider.get_embedding already works** — use it as-is for text embeddings; no model changes needed for MVP.
- **ibis/DuckDB preferred** over raw SQL for analysis queries (per project preferences).
- **uv** for dependency management; add `scipy`, `umap-learn`, `plotly`, `sentence-transformers` (or use existing ai_provider for embeddings).
## Approach
**Late-fusion pipeline in four phases:**
1. **Extract** — parse MP-level votes out of `voting_results` JSON into an `mp_votes` table; fetch MP metadata from OData into `mp_metadata`.
2. **Compute SVD** — per time window, build sparse MP × motion matrix → SVD → Procrustes-align windows sequentially.
3. **Text embeddings** — ensure every motion has a text embedding (existing path; just fill gaps).
4. **Fuse** — concatenate aligned SVD motion vector + text embedding → store in `fused_embeddings` table.
Alternatives considered:
- **Pure text embeddings only**: easier but loses the behavioral (voting) signal entirely. Rejected because the whole point of the plan is the fused representation.
- **Store aligned SVD + rotation matrices separately**: more flexible for recomputing, but adds complexity. MVP will store aligned vectors directly; rotation matrices are logged for debugging but not persisted.
## Architecture
```
Data layer (DB):
motions (existing)
embeddings (existing — text vectors keyed to motion_id)
mp_votes (NEW — one row per MP per motion)
mp_metadata (NEW — MP name, party, entry/exit dates)
svd_vectors (NEW — per window, per entity: MP or motion)
fused_embeddings (NEW — per motion, per window: SVD + text concatenated)
Pipeline modules (new, in pipeline/):
extract_mp_votes.py — JSON blob → mp_votes rows
fetch_mp_metadata.py — OData /Kamerlid → mp_metadata rows
svd_pipeline.py — time windows → SVD → Procrustes alignment → svd_vectors
text_pipeline.py — ensure embeddings coverage, delegates to existing summarizer
fusion.py — join svd_vectors + embeddings → fused_embeddings
Analysis modules (new, in analysis/):
political_axis.py — first SVD component / anchor-party axis
trajectory.py — MP drift across aligned windows
clustering.py — UMAP on fused motion embeddings, thematic clusters
visualize.py — Plotly interactive trajectory and cluster plots
CLI entry points (new):
pipeline/run_pipeline.py — orchestrate all phases with flags
```
## Key Components & Responsibilities
**mp_votes table**
- Schema: `(id, motion_id, mp_name, party, vote ENUM(voor/tegen/afwezig), date, created_at)`
- Populated by `extract_mp_votes.py` doing a one-time parse of `motions.voting_results` JSON.
- Idempotent: skip motion_id if already extracted (upsert or EXISTS check).
- `party` field is left NULL initially; backfilled from `mp_metadata` after that table is populated.
**mp_metadata table**
- Schema: `(mp_name, party, entry_date, exit_date, source_id)`
- Fetched from OData `/Kamerlid` endpoint (needs verification — see Open Questions).
- Fallback: derive approximate party affiliation from `mp_votes` rows (majority-party heuristic) if OData metadata is unavailable.
**svd_vectors table**
- Schema: `(window_id, entity_type ENUM(mp/motion), entity_id, vector JSON, model TEXT, created_at)`
- Stores both MP and motion SVD vectors per time window, after Procrustes alignment.
- `window_id` is a string like `2024-Q1`.
**fused_embeddings table**
- Schema: `(motion_id, window_id, vector JSON, svd_dims INT, text_dims INT, created_at)`
- Separate from the existing `embeddings` table to avoid schema conflicts.
- Vector is the concatenation of the SVD motion vector and the text embedding.
**svd_pipeline.py**
- Groups motions by time window (quarterly default).
- Builds a sparse `scipy.sparse.csr_matrix` (MPs as rows, motions as columns, vote values encoded as +1/−1/0).
- Calls `scipy.sparse.linalg.svds(matrix, k=dims)``k` is configurable (default 50).
- Applies Procrustes alignment between consecutive windows using overlapping MPs as anchors.
- Logs Procrustes disparity score per transition; flags high disparity (election transitions).
**extract_mp_votes.py**
- Reads all motions with `voting_results` JSON, parses keys: if comma in key → individual MP name, else → party/fraction name.
- Writes MP-level rows to `mp_votes`; party-level rows are ignored here (they're already used by the existing `calculate_party_matches` flow).
- Handles the three vote values: `voor` (+1), `tegen` (−1), `afwezig` (0).
**fusion.py**
- For each motion in a window: lookup SVD motion vector from `svd_vectors`; lookup text embedding from `embeddings`.
- Concatenates vectors (simple `list + list`); stores in `fused_embeddings`.
- Skips motion if either vector is missing; logs counts.
**analysis/ modules**
- All read-only from DB; write only to output files (HTML/PNG plots).
- `political_axis.py`: project all MP SVD vectors onto the first principal component; optionally define axis by anchor parties (e.g. VVD vs SP).
- `trajectory.py`: collect MP's aligned SVD vector per window → compute drift distance → plot trajectory over time.
- `clustering.py`: run UMAP on `fused_embeddings` per window → label with policy_area or thematic cluster.
- `visualize.py`: Plotly interactive scatter/line plots; outputs self-contained HTML.
## Data Flow
```
Phase 1 — Extract
motions.voting_results (JSON, existing)
→ extract_mp_votes.py
→ INSERT mp_votes rows (motion_id, mp_name, vote, date)
OData /Kamerlid
→ fetch_mp_metadata.py
→ INSERT mp_metadata rows (mp_name, party, entry_date, exit_date)
→ UPDATE mp_votes.party via JOIN (backfill)
Phase 2 — SVD
mp_votes (date-filtered per window)
→ sparse MP × motion matrix
→ scipy svds(k=50)
→ raw SVD vectors per window
Procrustes alignment:
window[t-1] aligned vectors + window[t] raw vectors
→ overlapping MPs as anchors
→ scipy.spatial.procrustes → rotation R
→ window[t] aligned vectors
→ INSERT svd_vectors rows
Phase 3 — Text embeddings (fill gaps)
motions without embedding in embeddings table
→ text_pipeline.py → ai_provider.get_embedding(body_text or description)
→ INSERT embeddings rows (existing schema)
Phase 4 — Fusion
svd_vectors (motion, window) + embeddings (motion)
→ fusion.py
→ INSERT fused_embeddings rows
Phase 5 — Analysis (on demand)
fused_embeddings + mp_metadata + svd_vectors
→ analysis modules
→ HTML plots output
```
## Error Handling Strategy
- **Extraction idempotency**: `extract_mp_votes` checks `SELECT COUNT(*) FROM mp_votes WHERE motion_id = ?` before inserting; re-runs are safe.
- **Sparse windows**: if a time window has fewer than `MIN_MOTIONS` (default 20) or `MIN_MPs` (default 10), skip SVD for that window and log a warning. Do not crash.
- **Procrustes at election transitions**: chain alignment via the last quarter of the old term and first quarter of the new term using only returning MPs. If overlap < 30%, log as HIGH_DISPARITY and store the window but flag it.
- **Missing text embeddings**: log motions skipped in fusion; the SVD-only path remains valid for those motions.
- **OData metadata unavailable**: fall back to heuristic party assignment (mp_votes majority-party per MP name). Log which MPs used fallback.
- **Replace prints with structured logging**: all pipeline modules use `logging.getLogger(__name__)` — not `print()`.
## Testing Strategy
- **Unit**:
- Vote parser: given sample `voting_results` JSON, assert correct MP rows extracted and party rows ignored.
- Sparse matrix builder: inject 5 MPs × 10 motions → assert matrix shape and values.
- Procrustes wrapper: inject two small aligned-then-rotated matrices → assert recovered rotation close to identity.
- Fusion: inject matching SVD and text vectors → assert concatenated output length = svd_dims + text_dims.
- **Integration**:
- Extract → SVD → Fusion on a fixture of 50 motions (stored in `tests/fixtures/`). Monkeypatch ai_provider for text embeddings. Assert `fused_embeddings` table populated and vector dimensions correct.
- **Regression**:
- Run pipeline on a fixed 100-motion snapshot. Assert output dimensions and row counts stable across runs.
- **Migration tests**:
- Follow existing pattern (`tests/test_migration_embeddings.py`): apply new migration SQL to a temp DuckDB, assert expected tables and columns.
## Open Questions
1. **OData `/Kamerlid` endpoint availability**: does it expose party affiliation and tenure dates with the same API key/base URL we already use? If not, we need a scraping fallback for `mp_metadata`.
2. **Store rotation matrices?**: MVP stores aligned vectors directly. Should we also persist the Procrustes R matrix per window transition so we can re-project new MPs added later without full recomputation?
3. **Output target**: CLI producing HTML plots (simplest) vs. new Streamlit page vs. Jupyter notebook. Recommendation: CLI first, Streamlit page in a follow-up.
4. **Time window granularity**: quarterly is the default. Should we validate this empirically first with an annual window (larger, more stable matrices) and switch to quarterly once the pipeline is proven?
5. **SVD dimensions k**: default 50 dims for SVD. This needs to be validated against the actual data size (number of unique MPs × motions per window). A window with 100 MPs and 50 motions cannot have k=50 — needs to be `k < min(n_mps, n_motions)`. Pipeline must enforce this dynamically.

@ -1,145 +0,0 @@
---
date: 2026-03-22
topic: "Embedding-Based Motion Similarity Cache"
status: validated
---
## Problem Statement
We have text embeddings and fused (SVD + text) embeddings stored for motions, but no usable similarity search. The current `database.search_similar()` is a full Python scan — it SELECTs all embeddings, parses JSON one by one, and computes cosine similarity with `zip` in pure Python. This is O(N) per query with no vectorized math, no indexing, and no caching. The similarity cache migration (`2026-03-22-add-similarity-cache.sql`) is a commented-out placeholder with no executable SQL.
Additionally, several infrastructure gaps block a working similarity system:
- The `embeddings` table is not created by `_init_database()` (only exists via migration file)
- The fusion pipeline has an N+1 query pattern (per SVD row queries embeddings separately)
- `ai_provider._post_with_retries` does not retry on 429 (rate limit) responses
## Constraints
- DuckDB only — no pgvector, no external vector store
- Vectors stored as JSON text columns (existing format, not changing)
- DuckDB connections are short-lived (open/close per method)
- Do not modify `app.py` or `scheduler.py`
- Tests must be offline (monkeypatch network calls)
- Functional style, Python, uv
- Logging via `getLogger`, no `print()`
## Approach
**Precomputed similarity cache** — batch-compute top-K nearest neighbors per motion and store results in a cache table. The UI reads the cache with a simple indexed lookup.
Rationale: the motion corpus changes slowly (new motions trickle in from parliament). Computing nearest neighbors at query time is wasteful. One offline O(N^2) pass via numpy matrix multiplication gives us O(1) lookups forever until the next recompute.
Alternatives rejected:
- **DuckDB vss extension (HNSW)**: experimental, requires vector format migration away from JSON text, overkill for ~thousands of motions
- **Real-time numpy search**: better than pure-Python zip, but still O(N) per query; caching eliminates repeated work
- **FAISS/Annoy ANN index**: designed for millions of vectors, unnecessary complexity at our scale
## Architecture
```
New files:
similarity/
__init__.py
compute.py -- batch pairwise cosine, extract top-K, write cache
lookup.py -- read cached results for a motion
Modified files:
database.py -- add similarity_cache + embeddings to _init_database,
add store/read/clear helpers, deprecate old search_similar
migrations/2026-03-22-add-similarity-cache.sql -- uncomment and finalize
ai_provider.py -- add 429 to retry branch
pipeline/fusion.py -- fix N+1 with bulk JOIN
```
## Components
### similarity_cache table
```
similarity_cache (
id INTEGER DEFAULT nextval('similarity_cache_id_seq'),
source_motion_id INTEGER NOT NULL,
target_motion_id INTEGER NOT NULL,
score REAL NOT NULL,
vector_type TEXT NOT NULL, -- 'text', 'fused', 'svd'
window_id TEXT, -- NULL for text-only, set for fused/SVD
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
```
Composite index on `(source_motion_id, vector_type, window_id)` for fast lookups.
### similarity/compute.py
- Load all vectors of a given type into a numpy matrix in one query (parse JSON, stack into ndarray)
- Normalize rows to unit length
- Compute full cosine similarity matrix via `normalized @ normalized.T`
- Extract top-K per row (excluding self-similarity)
- Bulk-insert results into `similarity_cache`
- Idempotent: `clear_similarity_cache(vector_type, window_id)` then insert within same connection scope
Public function: `compute_similarities(vector_type='fused', window_id=None, top_k=10, db_path=None)`
### similarity/lookup.py
- `get_similar_motions(motion_id, vector_type='fused', window_id=None, top_k=10, db_path=None)` — SELECT from cache ordered by score DESC
- Returns list of dicts: `{motion_id, score}`
- Optionally join motion metadata (title, layman_explanation) for richer results
- Graceful degradation: empty cache returns empty list
### database.py changes
1. Add `embeddings` table creation to `_init_database()` — matches migration schema
2. Add `similarity_cache` table + sequence creation to `_init_database()`
3. New helpers:
- `store_similarity_batch(rows: list[dict])` — bulk INSERT
- `get_cached_similarities(source_motion_id, vector_type, window_id=None, top_k=10)` — read
- `clear_similarity_cache(vector_type, window_id=None)` — DELETE for idempotent recompute
4. Deprecate `search_similar()` — mark with a log warning pointing to `similarity.lookup`
### ai_provider.py fix
- Add HTTP 429 to the retry branch in `_post_with_retries`
- If `Retry-After` header is present, use it as the backoff delay; otherwise fall back to existing exponential backoff
- This is a single-line condition change plus header parsing
### pipeline/fusion.py fix
- Replace the per-row SELECT from `embeddings` with a single bulk query:
JOIN `svd_vectors` with latest `embeddings` per motion_id in one SQL statement
- Loop over joined results and concatenate in Python
- Eliminates N+1 query pattern
## Data Flow
1. Existing pipeline runs: extract MP votes → SVD → text embeddings → fusion
2. After fusion completes, `similarity/compute.py` loads all fused vectors for the window into a numpy matrix
3. Computes pairwise cosine similarity matrix, extracts top-K per motion
4. Bulk-inserts results into `similarity_cache` (clearing previous cache for that batch first)
5. Separately, text-only similarity can be computed across all motions (no window dependency)
6. UI calls `similarity/lookup.py` for a direct indexed read — instant response
## Error Handling
- **Missing vectors**: motions without embeddings are excluded from the similarity matrix; not an error
- **Empty matrix**: if no vectors exist for a vector_type/window, log warning and skip (don't write empty cache)
- **DB write failures**: wrap cache writes in try/except, log error, don't crash the pipeline; similarity is non-critical
- **Stale cache**: cache is fully replaced on each recompute (delete + insert in same connection scope); if recompute fails partway, old cache remains valid
- **Dimension mismatch**: vectors with inconsistent dimensions are padded or excluded with a warning (following existing clustering.py pattern)
## Testing Strategy
- **Unit: compute.py** — create known vectors with predictable cosine similarities (e.g., identical vectors → score 1.0, orthogonal → 0.0), verify matrix math produces correct top-K ordering
- **Unit: lookup.py** — seed cache table in temp DB, verify queries return correct ordered results, verify empty cache returns empty list
- **Unit: database helpers** — test store_similarity_batch / get_cached_similarities / clear_similarity_cache round-trip
- **Unit: ai_provider 429 retry** — monkeypatch requests.post to return 429, verify retry with backoff
- **Unit: fusion bulk join** — verify N+1 elimination produces same results as original
- **Migration test** — apply updated similarity_cache migration on temp DuckDB, verify schema matches expected columns
- **Integration test** — insert fake embeddings → run compute → verify cache populated → lookup returns expected results
- **All tests offline**: in-memory DuckDB, monkeypatched network calls
## Open Questions
None blocking. Future enhancements (not in scope):
- MP-to-MP similarity from SVD vectors (explorer UI is motion-focused for now)
- Real-time similarity for newly ingested motions before next batch recompute

@ -1,165 +0,0 @@
---
date: 2026-03-22
topic: "Dynamic motion explorer + analysis refresh"
status: validated
---
## Problem Statement
The parliamentary embedding pipeline now covers 2019–2026 with ~25,000 motions, quarterly SVD windows, fused embeddings, and a 200k+ similarity cache. None of this is visible to anyone in an interactive form. The only outputs today are static HTML files written by `generate_compass.py` (if it's been run), and a blog post with placeholder numbers.
We need to:
1. Regenerate all analyses and output graphs with the full dataset
2. Build an interactive Streamlit explorer that surfaces the political compass, party trajectories, and motion similarity search
3. Update the blog post with real numbers and findings
## Constraints
- Do NOT modify `app.py` or `scheduler.py` — these are the production quiz app
- All DB access in the explorer must be **read-only** (no writes) — pipeline may be running
- Explorer must work with existing `analysis.*` modules; no new analysis logic
- Use `@st.cache_data` aggressively — `compute_2d_axes` runs PCA across all windows and is expensive (seconds, not milliseconds)
- No new external dependencies beyond what's already installed (streamlit, plotly, umap-learn, scikit-learn are all present)
- Follow existing code style: functional Python, `logging.getLogger(__name__)`, no print statements in library code
## Approach
**Single-file `explorer.py`** at the project root alongside `app.py`.
Four Streamlit tabs:
1. **Politiek Kompas** — 2D MP/party scatter with a window slider
2. **Partij Trajectories** — Line traces of party positions over time on the compass
3. **Motie Zoeken** — Free-text + filter search, returns ranked similar motions
4. **Motie Browser** — Filterable table of all motions, click to expand detail + similar motions
Run with: `streamlit run explorer.py`
This approach is chosen because:
- Reuses all existing `analysis.*` modules without changes
- Single file means no new package structure to maintain
- Streamlit tabs map naturally to the four distinct views a researcher would want
- Read-only DB access means it can run concurrently with the pipeline
## Architecture
```
explorer.py
├── Tab 1: Politiek Kompas
│ └── analysis.political_axis.compute_2d_axes (cached)
│ └── analysis.visualize.plot_political_compass → Plotly figure
├── Tab 2: Partij Trajectories
│ └── analysis.trajectory.compute_2d_trajectories (cached)
│ └── analysis.visualize.plot_2d_trajectories → Plotly figure
├── Tab 3: Motie Zoeken
│ └── database.get_all_motions (cached, read-only)
│ └── database.search_similar (similarity_cache lookup)
│ └── Custom search: filter title/description + show voting_results
└── Tab 4: Motie Browser
└── database.get_filtered_motions (cached, read-only)
└── On click: database.search_similar for related motions
```
## Key Components & Responsibilities
**`explorer.py`**
- Page config: `st.set_page_config(layout="wide", page_title="Parlement Explorer")`
- Sidebar: DB path input (default `data/motions.db`), window-size toggle (annual/quarterly)
- `@st.cache_data` wrappers for all expensive DB reads and computations
- Four tabs via `st.tabs([...])`
**Tab 1 — Politiek Kompas**
- Calls `compute_2d_axes(db_path, method='pca', pca_residual=True)` — cached
- Window selector slider showing available windows
- Renders the Plotly scatter for the selected window using `_render_compass_for_window(positions_by_window, window_id, party_map, axis_def)` — a thin Plotly figure builder (not writing to file)
- Hover: MP name, party, (x, y) coordinates
- Color by party using `_load_party_map(db_path)` — cached
**Tab 2 — Partij Trajectories**
- Same `positions_by_window` data from Tab 1 (shared cache hit)
- Multi-select party filter (default: all major parties)
- Plotly figure: one trace per party, x/y positions connected by lines, labeled by window_id
- Toggle between showing MPs or just party centroids (computed as mean of MP positions per party per window)
**Tab 3 — Motie Zoeken**
- Search input (Dutch text, free-form)
- Filters: year range (slider), policy area (multi-select), controversy score (slider)
- On search: filter `motions` table in-memory against title + layman_explanation text (case-insensitive substring; no embedding search needed at this level)
- Results list: each result shows title, date, policy area, controversy, layman_explanation
- Expandable section per result: full description/body_text + "Vergelijkbare moties" from `similarity_cache`
- Voting breakdown: parse `voting_results` JSON to show Voor/Tegen/Onthouden per party
**Tab 4 — Motie Browser**
- `st.dataframe` with all motions (title, date, policy_area, controversy_score, winning_margin)
- Column filters at top: year, policy area
- Sort by: date DESC, controversy DESC, winning_margin ASC (most contested first)
- Click row → `st.session_state` stores selected motion_id → detail panel below table
- Detail panel: full motion text + top-10 similar motions from similarity_cache
## Data Flow
1. On startup: `compute_2d_axes` runs PCA, results cached in Streamlit's in-memory cache
2. Tab 1/2: pure reads from `svd_vectors` + `mp_metadata` — all cached after first load
3. Tab 3: on each search, filter pre-loaded motions DataFrame in-memory (no DB query per keypress)
4. Tab 4: full motions table loaded once and cached; similarity lookups hit `similarity_cache` table via existing `database.get_cached_similarities`
All DuckDB connections are opened with `read_only=True` to allow concurrent pipeline access.
## Error Handling
- If `compute_2d_axes` fails (insufficient data for a window), skip that window and log warning — don't crash the app
- If `similarity_cache` has no entries for a motion (e.g., new motion not yet processed), show "Nog geen vergelijkbare moties beschikbaar" placeholder
- If DB file doesn't exist at startup, show an error banner with the path and instructions
- All `duckdb.connect` calls wrapped in try/finally to guarantee close
## Analysis Refresh Plan
Before building the explorer, regenerate all outputs:
```bash
# 1. Generate political compass HTML for latest window (annual)
.venv/bin/python scripts/generate_compass.py \
--db data/motions.db --out outputs \
--method pca --pca-residual
# 2. Generate similarity cache for new windows (2019–2021, 2024 quarters)
# (run_pipeline with --skip-metadata --skip-extract --skip-svd --skip-text)
.venv/bin/python -m pipeline.run_pipeline \
--db-path data/motions.db \
--start-date 2019-01-01 --end-date 2025-01-01 \
--window-size quarterly \
--skip-metadata --skip-extract --skip-svd --skip-text
# 3. Recompute similarity cache for all windows
.venv/bin/python -c "
from similarity.compute import recompute_all_windows
recompute_all_windows('data/motions.db', window_size='quarterly', top_k=20)
"
```
## Blog Post Updates
Target: `thoughts/blog-post-political-compass.md`
- Replace placeholder motion counts table with real numbers from DB query
- Add actual findings from quarterly analysis (not visible in annual windows):
- 2020-Q2 COVID vote clustering — parties converge on emergency measures
- 2022-Q4 nitrogen crisis — sharpest left-right split in dataset
- 2023-Q1 → 2024-Q1 gap (data missing for Q2-Q4 2023)
- Add "Explorer" section describing `explorer.py` and how to run it
- Update similarity cache row count (was 212k, now higher with new windows)
- Fix the "fused = [10] + [2560] = 2570" claim — verify actual dimensions
## Testing Strategy
- Explorer has no tests (it's a UI script) — verify manually by running `streamlit run explorer.py` after pipeline completes
- Existing 34 tests stay green — no changes to library modules
- Run tests after completing implementation: `.venv/bin/python -m pytest -q`
## Open Questions
- Should the explorer ship as a separate port from `app.py`? (Recommendation: yes, `app.py` stays on its port, `explorer.py` runs on a different port for internal/research use)
- Should `Verworpen.` motions be filtered from search results by default? (Recommendation: yes, add a "Toon verworpen" toggle defaulting to off)
- Annual or quarterly windows as the default for the compass? (Recommendation: annual — less noise, cleaner trajectories; quarterly available via sidebar toggle)

@ -1,229 +0,0 @@
---
date: 2026-03-22
topic: "StemAtlas — Public Deployment on sgeboers.nl"
status: validated
---
# StemAtlas Deployment Design
## Problem Statement
The stemwijzer project has three user-facing products ready to publish:
1. **A blog post** explaining the political compass methodology and findings
2. **An interactive explorer** (political compass, party trajectories, motion search)
3. **The stemwijzer quiz** (vote on motions, see which parties match you)
These need to be deployed publicly on sgeboers.nl using the existing VPS + Gitea + Drone + Docker stack.
---
## The Name: StemAtlas
**`stematlas.sgeboers.nl`**
Dutch wordplay: **stem** = *vote* AND *voice* (as in "the voice of parliament") + **atlas** = a comprehensive map of the world. Together: *an atlas of voices* — a map of how Dutch democracy sounds from the inside.
It's broader than "stemwijzer" (which implies a voting guide) — it positions the site as a data exploration and journalism tool.
---
## Constraints
- Existing VPS running Nginx, Gitea, Drone
- Deployment pipeline: Docker build → push to registry → SSH `docker-compose up -d`
- sgeboers.nl is a **raw HTML/CSS site** (not Hugo) hosted as a repo on git.sgeboers.nl
- DuckDB file lives on the VPS — single writer (scheduler), multiple readers (Streamlit)
- No new cloud services or hosting costs
---
## Architecture
```
Internet
├── sgeboers.nl (raw HTML/CSS site, existing repo on git.sgeboers.nl)
│ └── blog/stematlas.html ← blog post with inline charts + link to subdomain
└── stematlas.sgeboers.nl
└── Nginx (reverse proxy)
└── Streamlit multi-page app (port 8501)
├── Page 1: Stemwijzer Quiz (app.py)
└── Page 2: Explorer (explorer.py)
VPS filesystem:
/srv/stematlas/
├── data/motions.db ← DuckDB (shared, read-write by scheduler)
└── docker-compose.yml
```
---
## Components
### 1. Streamlit Multi-Page App
Restructure entry point from `app.py``Home.py` with a `pages/` directory:
```
Home.py ← landing page / about
pages/
1_Stemwijzer.py ← quiz (app.py content)
2_Explorer.py ← explorer.py content
```
Streamlit's built-in multi-page routing handles navigation. One Docker container, one port (8501).
**Why not two separate containers?**
Single shared DuckDB file on VPS filesystem. Both pages open read-only connections (quiz opens read-write for session data, but that's the existing behaviour). One container = one volume mount = no coordination overhead.
### 2. Docker Compose
The existing `.drone.yml` already calls `docker-compose up -d` on the VPS. We add/update `docker-compose.yml`:
```
Services:
stematlas:
image: registry/stematlas:latest
ports: 8501 (internal only)
volumes:
- /srv/stematlas/data:/app/data ← persistent DB
restart: unless-stopped
scheduler:
image: registry/stematlas:latest
command: python scheduler.py
volumes:
- /srv/stematlas/data:/app/data ← same DB, write access
restart: unless-stopped
```
**Scheduler as a sidecar**: runs in the same image but different container, keeps DB updated nightly. Streamlit container never writes to DB (except user sessions in the quiz).
### 3. Nginx Vhost
New server block on the VPS:
```
stematlas.sgeboers.nl → proxy_pass http://127.0.0.1:8501
```
Standard Streamlit proxy requirements: `proxy_http_version 1.1`, WebSocket upgrade headers for `/_stcore/stream`. Let's Encrypt cert via Certbot (standard pattern).
### 4. Drone CI Pipeline Update
Existing `.drone.yml` steps remain identical — build, push, SSH deploy. The only change: `docker-compose.yml` in the repo now references both the `stematlas` and `scheduler` services, so `docker-compose up -d` picks them both up.
No new Drone secrets needed if `DOCKER_REGISTRY`, `DEPLOY_HOST` etc. are already set.
### 5. Blog Post (Raw HTML page on sgeboers.nl)
The blog post is a new `blog/stematlas.html` file added to the sgeboers.nl repo on git.sgeboers.nl. The Drone pipeline for that repo deploys it like any other static file — push to git, Drone copies to webroot, Nginx serves it.
**Chart embedding strategy — inline Plotly divs:**
Rather than iframes, we extract just the chart `<div>` + `<script>` from `generate_compass.py`'s output (using `fig.to_html(include_plotlyjs='cdn', full_html=False)`) and paste them directly into the blog post HTML. This is cleaner than iframes — no border, no scroll issues, full-width, loads with the page.
Plotly CDN script included once in the `<head>`. Each chart is just a `<div id="chart-N">` + a `<script>` block below it.
**Linking to the subdomain:**
The blog post is the *article* — it tells the story with static charts. The subdomain is the *playground*. The post links to `stematlas.sgeboers.nl` at two natural moments:
- After the political compass chart: *"Explore every window interactively →"*
- At the end: *"Take the quiz yourself →"*
This is the right split: blog post brings readers in via search/sharing, subdomain gives them something to do.
**Chart generation workflow:**
```
scripts/generate_compass.py → outputs/
├── compass_2025.html ← main compass (latest window)
├── trajectories_2019_2025.html ← party drift over time
└── compass_2024-Q4.html ← quarterly detail
```
Run `fig.to_html(include_plotlyjs='cdn', full_html=False)` to extract embeddable snippets, paste into `blog/stematlas.html` in the sgeboers.nl repo.
---
## Blog Post Charts — What to Include
The blog post narrates three acts. Each gets a supporting chart:
### Act 1: The Method
**No chart needed** — the SVD explanation is conceptual. Use a simple HTML table for the vote matrix illustration.
### Act 2: The Political Compass
**Chart: `compass_latest_annual.html`**
- 2D scatter of all parties for the most recent full annual window (2024 or 2025)
- Axes: PC1 (left-right) × PC2 (residual, typically progressive-traditionalist)
- Points coloured and labelled by party
- Interactive: hover shows party name + coordinates
- Caption: "Each party's position computed purely from voting patterns — no labels applied by us"
**Chart: `trajectories_all_parties.html`**
- Line chart of party positions across all annual windows (2016–2025)
- One line per party, coloured consistently
- Key narrative moments annotated: BBB arrival (2022), coalition formation (2022), Rutte → Schoof (2024)
- Interactive: toggle parties on/off via legend
### Act 3: Motion Similarity
**Chart: `compass_motions_sample.html`** (optional, depends on data quality)
- 2D UMAP scatter of ~500 sampled motions, coloured by policy area
- Shows clustering: climate motions cluster together, budget motions cluster together, etc.
- If UMAP results aren't clean enough to tell a clear story, skip this one
**Static table: Motion counts by year**
Just a markdown table in the blog post — no chart needed.
---
## Data Flow
```
scheduler.py (nightly)
└── api_client → downloads new motions → DuckDB
On demand (manual or cron):
└── run_pipeline.py → SVD + embeddings + fusion + similarity cache → DuckDB
└── generate_compass.py → static HTML charts → sgeboers.nl repo (blog/stematlas.html)
Streamlit (reads only):
└── duckdb.connect(read_only=True) → all analysis queries
```
The DB is the source of truth. Charts are regenerated and re-copied to Hugo whenever the pipeline produces new data — probably monthly.
---
## Error Handling Strategy
- **Streamlit crash**: Docker `restart: unless-stopped` brings it back automatically
- **Scheduler crash**: Same restart policy; DuckDB's WAL handles partial writes
- **DB file corruption**: Not handled beyond OS-level backup. Mitigate by adding a weekly `cp data/motions.db data/motions.db.bak` to the scheduler or as a cron job on the VPS
- **Blog charts stale**: Acceptable — charts are labelled with their window date; stale by 30 days is fine for a blog post
- **Streamlit + scheduler write conflict**: Scheduler is the only writer. Streamlit and quiz sessions both use separate connections; DuckDB handles concurrent reads fine. The quiz writes `user_sessions` rows — low frequency, no conflict risk with scheduler
---
## Testing Strategy
- Import smoke test for `explorer.py` already exists (`tests/test_explorer_import.py`)
- `Home.py` and `pages/` restructure needs a corresponding smoke test
- Drone build will catch import errors before deploy
- Manual verification: `docker-compose up` locally against a copy of `data/motions.db`, check all four Streamlit tabs render without error
- Blog post charts: visual review after `generate_compass.py` run — no automated test needed
---
## Open Questions
1. **Multi-page restructure scope**: Does the quiz (`app.py`) need any changes beyond being wrapped in a `pages/` file, or can it be imported as-is? The `if __name__ == "__main__"` guard in `app.py` needs reviewing.
2. **Streamlit base path**: Subdomain approach (`stematlas.sgeboers.nl`) means no subpath complexity — Streamlit runs at `/`. Clean.
3. **Chart update cadence**: Manual (run `generate_compass.py`, extract snippets, paste into blog post HTML, push to sgeboers.nl repo). Fine initially — charts are labelled with window date.
4. **sgeboers.nl nav structure**: No blog directory exists yet. Need to add `blog/` dir, a `blog/stematlas.html` file, and a nav link on the main site. Structure TBD after inspecting the existing HTML/CSS site.
5. **Nginx already running**: Need to confirm Certbot/Let's Encrypt workflow matches what's already set up on the VPS for other subdomains.

@ -1,177 +0,0 @@
---
date: 2026-03-23
topic: "Motion Content Enrichment via SyncFeed"
status: validated
---
# Motion Content Enrichment via SyncFeed
## Problem Statement
All 25,521 motions in the DB have NULL `body_text` and NULL `layman_explanation`. Their
`title`/`description` are outcome strings ("Aangenomen.", "Verworpen.") because the bulk
downloader used `skip_details=True`. The text embedding pipeline uses
`COALESCE(layman_explanation, description, title)`, so all embeddings are effectively
embeddings of "Aangenomen." — zero semantic signal.
Goal: populate real motion titles (Zaak.Onderwerp) and motion body text
(officielebekendmakingen.nl HTML) for all motions, then re-run embeddings for the complete
dataset.
## Constraints
- Do NOT modify `app.py` or `scheduler.py`
- DuckDB only; open/close per method
- Use Python logging, no print() in library code
- `motions.id` primary key is an INTEGER autoincrement; `motions.url` contains
`https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/{besluit-uuid}` — the UUID
is the Besluit.Id in the Tweede Kamer data model
- `database.py` CREATE TABLE for motions is missing `body_text` and `externe_identifier`
columns even though INSERT statements reference them — schema must be fixed
## Approach
Use the **SyncFeed API** (`https://gegevensmagazijn.tweedekamer.nl/SyncFeed/2.0/Feed`) to
bulk-walk 4 entity types and build a complete local join index. This replaces the
per-motion OData chain (3 API calls × 25,521 = 76,000+ calls) with ~2,000–4,000 paginated
feed pages across all entity types.
Alternatives considered:
- **OData per-motion** (`_get_motion_details`): 76k+ calls, estimated 10+ hours. Rejected.
- **OData bulk $expand**: Works for titles (~100 pages) but getting ExterneIdentifier
still requires per-Zaak calls. Partially useful but incomplete. Rejected in favour of
SyncFeed which handles everything in one pass.
## Architecture
```
SyncFeed walk (4 feeds)
├─ category=Besluit → {besluit_id: [zaak_ids]}
├─ category=Zaak → {zaak_id: {onderwerp, soort}}
├─ category=Document → {document_id: [zaak_ids]}
└─ category=DocumentVersie → {document_id: externe_identifier}
In-memory join:
besluit_id → zaak_id → onderwerp (title)
besluit_id → zaak_id → document_id → ext_id (ExterneIdentifier)
DB update pass: UPDATE motions SET title=?, externe_identifier=? WHERE url LIKE ?
Parallel HTML fetch (thread pool, 20 workers):
GET zoek.officielebekendmakingen.nl/{ext_id}.html → extract text → UPDATE motions.body_text
Pipeline re-run:
clear embeddings → text pipeline → fusion (all windows) → similarity cache (all windows)
```
## Components
### `scripts/sync_motion_content.py` (new)
Orchestrates the full enrichment:
1. **SyncFeed walker** — generic paginated Atom/XML reader that follows `<link rel="next">`
until exhausted, yielding parsed entity dicts per page. Respects 429/rate-limit via
exponential backoff.
2. **Entity parsers** — one per entity type:
- `parse_besluit(xml)``{id, zaak_refs: [uuid, ...], verwijderd}`
- `parse_zaak(xml)``{id, onderwerp, soort, verwijderd}`
- `parse_document(xml)``{id, zaak_refs: [uuid, ...], verwijderd}`
- `parse_documentversie(xml)``{id, document_id, externe_identifier, extensie, verwijderd}`
3. **Join builder** — after all 4 feeds are walked:
- `build_title_map(besluit_index, zaak_index)``{besluit_id: onderwerp}`
- `build_ext_id_map(besluit_index, zaak_index, doc_index, docversie_index)`
`{besluit_id: externe_identifier}`
- For motions with multiple Zaak, prefer Soort="Motie"; fall back to first
4. **DB updater** — open DuckDB, bulk UPDATE motions using the join maps. Extract
`besluit_id` from `url` column via string split.
5. **Body text fetcher** — thread pool (20 workers), fetch HTML from
`zoek.officielebekendmakingen.nl/{ext_id}.html`, strip HTML tags with regex (reuse
existing `_fetch_body_text` logic), UPDATE `motions.body_text`.
6. **Progress reporting** — log counts: motions updated with title, motions with
ExterneIdentifier found, body text fetched, failures.
### `database.py` schema fix
Add missing columns to `CREATE TABLE motions` DDL:
- `body_text TEXT`
- `externe_identifier TEXT`
Also add `ALTER TABLE IF NOT EXISTS` guard calls in `_init_database()` for existing DBs
that don't have these columns yet.
### `pipeline/text_pipeline.py` change
Update `_select_text` SQL:
```
COALESCE(m.layman_explanation, m.body_text, m.description, m.title)
```
(adds `m.body_text` as second-priority fallback)
### `scripts/rerun_embeddings.py` (new or inline in sync script)
After enrichment:
1. `DELETE FROM embeddings` — wipe all stale embeddings (they're all "Aangenomen.")
2. Run `pipeline.text_pipeline.ensure_text_embeddings(db_path, model, batch_size)`
3. Run `pipeline.fusion.fuse_for_window(window_id, db_path)` for all 20 windows
4. Run `similarity.compute.compute_similarities(vector_type='fused', window_id=w)` for
all 20 windows
## Data Flow
```
motions.url
→ extract besluit_uuid (split on '/')
→ look up in title_map → UPDATE motions.title, motions.description
→ look up in ext_id_map → UPDATE motions.externe_identifier
→ fetch HTML → UPDATE motions.body_text
text_pipeline._select_text
→ COALESCE(layman_explanation, body_text, description, title)
→ now returns real motion text for ~60-80% of motions
→ outcome string fallback for the rest
fused_embeddings
→ [svd_vector || text_vector] (text now has semantic content)
similarity_cache
→ re-computed for all 20 windows with meaningful vectors
```
## Error Handling Strategy
- **SyncFeed**: exponential backoff on 429/5xx; log and skip individual malformed entries;
checkpoint skiptoken to disk so walk can resume after crash
- **Body text fetch**: catch all per-URL exceptions, log, continue; motions without body
text fall back to Zaak.Onderwerp in COALESCE
- **DB update**: use DuckDB transactions per batch of 1000; rollback on failure
- **Missing Zaak/Document**: expected for procedural votes; log counts; these motions get
title = NULL → COALESCE falls back to "Aangenomen." as before
## Testing Strategy
- Unit tests for each XML parser using hardcoded fixture XML strings
- Unit test for `build_title_map` with a small synthetic index
- Integration test: walk 1 page of Besluit SyncFeed live, assert > 0 entries returned
- After full run: query `SELECT COUNT(*) FROM motions WHERE title NOT IN ('Aangenomen.',
'Verworpen.', 'Gestaakt.')` — expect > 10,000
- After embeddings: spot-check cosine similarity between two related motions (same topic)
is higher than between unrelated motions
## Open Questions
- **Document–Zaak relationship**: The SyncFeed Document entity may reference multiple
Zaak IDs. For motions with multiple linked documents, we prefer the one with
Soort="Motie" on the Zaak. Edge cases may need manual inspection.
- **SyncFeed total record count**: Unknown until walked. Estimate 2,000–6,000 pages total
across 4 feeds. Could be more for Document/DocumentVersie.
- **Rate limits**: SyncFeed documentation doesn't specify limits. Start at 1 req/s,
increase if no 429s.
- **Body text coverage**: Not all motions have an associated kamerstuk document.
Procedural votes (e.g., "Rondgezonden en gepubliceerd") typically won't. Expect
40–60% body text coverage.

@ -1,116 +0,0 @@
---
date: 2026-03-23
topic: "motion content enrichment - next steps"
status: draft
---
## Problem Statement
We successfully ingested SyncFeed motion content, fetched body texts, re-embedded motives, ran fusion (SVD-based) and rebuilt the similarity cache. The pipeline ran end-to-end but showed intermittent failures (embedding provider batch failures, connection-pool warnings) and produced a small number of missing body_texts and potential spurious similarity hits.
**Goal:** Stabilize and harden the motion content enrichment + embedding/fusion/similarity pipeline so it runs reliably, is testable, and produces high-quality similarity results for production use.
## Constraints
- **Do not modify** app.py or scheduler.py.
- Use **DuckDB only** (data/motions.db) and open/close connections per method; avoid long-lived global connections.
- No print() calls in library modules — use logging.getLogger(__name__).
- Tests must continue to run under the existing pytest setup and monkeypatching in CI.
- Avoid YAGNI features: only add monitoring/metrics that are actionable and low-effort.
## Approach (chosen)
I'm leaning toward an **incremental hardening** approach: small, high-impact fixes and QA steps first (low effort, immediate benefit), then follow with a short set of robustness improvements (retries, backoff, audit events) and targeted tests. This minimizes risk and gives quick confidence that the bulk import can be re-run safely.
Alternatives considered:
- Full rewrite of SyncFeed walker to a resilient state-machine (higher effort; unnecessary today).
- Push heavy-duty observability (Prometheus + Grafana) immediately (high overhead; defer to specific metrics and logs first).
I chose incremental hardening because it fixes the concrete failures we saw (provider batch errors, connection pool warnings, one 404 body) quickly and keeps the codebase small and testable.
## Architecture
High-level components:
- **SyncFeed sync script** (scripts/sync_motion_content.py): walk feeds, build title/ext-id maps, fetch body text, update DB.
- **Text embedding pipeline** (pipeline/text_pipeline.py, scripts/rerun_embeddings.py): convert selected text into embeddings, with provider retry logic.
- **Fusion/SVD pipeline** (pipeline/fusion.py, pipeline/svd_pipeline.py): fuse embeddings per-window and produce fused vectors.
- **Similarity compute & lookup** (similarity/compute.py, similarity/lookup.py): compute pairwise similarities and populate cache.
- **DB layer** (database.py, migrations): motions table (body_text, externe_identifier), fused_embeddings, svd_vectors, similarity_cache and audit events.
- **Audit & continuity** (thoughts/ledgers/*, audit_events table): record run summaries and per-window results.
Responsibilities are unchanged; we add a small **ai_provider wrapper** and an **operations script** for QA and rerun orchestration.
## Components & Responsibilities
- **sync_motion_content.py**: keep as-is; add more granular logging and a CLI flag to limit to a subset (for QA). Responsible for idempotent updates.
- **_fetch_body_text / fetch_body_texts**: reduce max_workers or add retry on transient HTTP errors; wrap requests.Session with adapters to control pool size.
- **text_pipeline.ai_provider**: add a small retry/backoff wrapper that retries failed batches with exponential backoff and a fallback to smaller batch_size.
- **scripts/rerun_embeddings.py**: expose a `--retry-missing` mode that detects missing embeddings and retries with smaller batches.
- **similarity.compute**: keep padding logic; add a filter to avoid trivial 1.0 matches for extremely short titles (query/UI should also filter but apply DB-side filter for safety).
- **migrations**: add audit_events or mark which motions failed fetch/embedding for manual review.
- **tests**: add deterministic tests for retry behavior and for the QA-sample similarity checks.
## Data Flow
1. Walk SyncFeed (Besluit, Zaak, Document, DocumentVersie) → parse elements.
2. Build **title_map** and **ext_id_map** in-memory.
3. Fetch body_texts in parallel (ThreadPoolExecutor) → map ext_id -> body_text.
4. Update motions table with title, externe_identifier, body_text.
5. Run text embeddings for motions (COALESCE priority: layman_explanation → body_text → description → title).
6. Fuse embeddings per-window (svd_vectors) → produce fused_embeddings.
7. Compute similarity cache per-window and insert rows.
8. QA checks and audit logs produced for runs.
## Error Handling Strategy
- **HTTP / body fetches:** add per-ext_id retries (3 attempts) with short exponential backoff; capture and store failures in audit_events table for manual follow-up.
- **Connection pool warnings:** reduce ThreadPoolExecutor concurrency (configurable flag) and attach a requests.adapters HTTPAdapter with a limited pool size to avoid 'Connection pool is full' warnings.
- **Embedding provider failures:** implement a wrapper which:
- retries batches up to N times with exponential backoff,
- on persistent failure, retry missing items with a smaller batch_size,
- mark failed motion ids in an audit table rather than blocking the entire run.
- **Similarity anomalies (1.0 scores):** filter out identity matches and very-short-text matches when building similarity cache; record these in diagnostics output.
## Testing Strategy
- Add unit tests for parser functions (already present) to cover edge cases seen in real SyncFeed XML.
- Add a unit test for the ai_provider retry wrapper that simulates provider failures and verifies fallback to smaller batches.
- Add an integration QA script (scripts/qa_similarity.py) that:
- samples N motions across windows,
- runs lookup.similarity and asserts results are within expected ranges (e.g., top-5 not all 1.0 unless identical text),
- outputs a short summary JSON saved to thoughts/ledgers/ for each run.
- CI: run the new provider-retry test and the QA script with a small dataset (mocked provider) to ensure no regressions.
## Actionable Next Steps (prioritized)
1. Quick QA (1 day) — sample 50 motions and inspect similarity quality.
- Implement scripts/qa_similarity.py (sample + assert heuristics).
- Run locally and record summary in thoughts/ledgers.
2. Small robustness fixes (1–2 days) — low-risk changes with big wins.
- Add ai_provider retry/backoff wrapper and unit tests.
- Add `--max-body-workers` CLI flag and drop default to 10; add per-request retries.
- Add `--retry-missing` mode to rerun_embeddings to retry failed batches with smaller sizes.
3. Observability & audit (1 day) — make failures visible and actionable.
- Add audit_events table rows when body_text fetch or embedding fails.
- Write an end-of-run JSON summary (already done) and attach per-window stats to ledger.
4. Safety filters & dedupe (0.5 day)
- Add a small DB-side filter to skip trivial identical-title matches in similarity cache.
- Audit SVD windows for duplication and dedupe if needed.
5. Run full re-run (off-peak) and validate (1 day)
- Re-run embeddings, fusion and similarity; run QA script and review ledgers.
Estimated total: 3–5 days of focused work.
## Open Questions
- Do we want to persist per-item failure flags in DuckDB (audit_events) or just in ledgers? I recommend adding an **audit_events** table to speed triage.
- What SLA / acceptance criteria should we use for similarity quality? E.g., maximum allowed fraction of top-1 exact-title matches for non-identical motions.
- Are we comfortable reducing body fetch concurrency by default, or should we attempt a more adaptive concurrency strategy?
---
I'm proceeding to create the design doc. Interrupt if you want changes.

@ -1,127 +0,0 @@
---
date: 2026-03-23
topic: "Test refactor: replace monkeypatching with real implementations"
status: validated
---
# Test Refactor — Remove Monkeypatching, Use Real Implementations
## Problem Statement
The new test suite (added during the motion content enrichment hardening sprint) relies on:
- `sys.modules` injection of a fake `duckdb` module
- `monkeypatch.setattr` to replace pipeline functions wholesale
- Exception-swallowing smoke tests that verify nothing meaningful
These tests give false confidence. They patch away the very code paths we want to exercise. The goal is to replace them with tests that run real production code paths using in-memory DuckDB and injected fake callables.
## Constraints
- Do NOT modify `app.py` or `scheduler.py`
- Keep the module-level `db = MotionDatabase()` singleton in `database.py` — do not remove it (production code depends on it)
- Production-code changes must be minimal and backwards-compatible (all existing callers continue to work with default parameters)
- Tests run with `.venv/bin/python -m pytest -q`
- No `print()` in library modules — use `logging.getLogger(__name__)`
## Approach: Dependency Injection + In-Memory DuckDB
Thread an optional `db: MotionDatabase = None` parameter through the three pipeline functions that currently hard-import the singleton. Pass `MotionDatabase(":memory:")` in tests. Replace fake `ai_provider` calls with a real `FakeEmbedder` callable injected as a parameter.
**Rejected alternatives:**
- Repository/Protocol pattern — major refactor of database.py and all callers; YAGNI for current test coverage needs
- tmp_path real DB files — slower, requires full schema migration in fixtures, still needs singleton patching
## Architecture
### Production Changes (minimal, backwards-compatible)
**`pipeline/ai_provider_wrapper.py`**
- Add `db: MotionDatabase = None` param to `get_embeddings_with_retry`
- Add `embedder=None` param (callable replacing `get_embeddings_batch`)
- If `None`, fall back to imported singleton / real provider
**`pipeline/text_pipeline.py`**
- `ensure_text_embeddings`: already accepts `db_path`; also accept `db: MotionDatabase = None` as override (takes precedence over `db_path`)
- `ensure_text_embeddings_for_ids`: same `db` override param
- `_select_text`: accept `db: MotionDatabase` (already does); when `db.db_path == ":memory:"` the function returns `[]` (no motions seeded at that level) — this is fine for retry-path tests
**`similarity/compute.py`**
- `compute_similarities`: already constructs `MotionDatabase(db_path)` locally; add `db: MotionDatabase = None` override param
### Test Infrastructure (`tests/conftest.py`)
New fixtures added alongside existing ones:
**`mem_db` fixture** (function-scoped)
- Creates `MotionDatabase(":memory:")` — triggers `_init_database()` which creates all tables/sequences in-memory
- Seeds a minimal set of motions rows for tests that need them
- Returned to test; closed after test
**`fake_embedder` fixture**
- Returns a `FakeEmbedder` instance — a real callable class with signature `(texts, model=None, batch_size=50) -> list[list[float]]`
- Returns deterministic embeddings: `[[0.1] * 1536]` per text
- Accepts `fail_ids: list[int]` kwarg to simulate per-item permanent failures (raises `RuntimeError` for those items)
## Components
### `FakeEmbedder` (tests/conftest.py)
- Real class, not a Mock
- `__call__(texts, model=None, batch_size=50)` — returns list of float vectors
- `fail_ids` set at construction time: any call where the batch index is in `fail_ids` raises `RuntimeError`
- Tracks `call_count` for assertions
### `mem_db` fixture
- `MotionDatabase(":memory:")` — schema initialized by `__init__`
- Optionally seeds motions via `db.insert_motion(...)` for tests that need real data
- No filesystem side effects
## Data Flow
### Retry test (replaces `test_rerun_embeddings_retry.py`)
1. `mem_db` fixture creates in-memory DB
2. `FakeEmbedder(fail_ids=[101, 102])` created
3. Call `ensure_text_embeddings(db=mem_db, embedder=fake_embedder)`
4. Wrapper internally retries failed items
5. Assert `fake_embedder.call_count > 1` (retries happened)
6. Assert `failed_ids` returned contains `[101, 102]`
### Audit event test (replaces `test_database_audit.py`)
1. `mem_db` fixture — in-memory DB with `audit_events` table
2. Call `mem_db.append_audit_event(None, "test_action", ...)`
3. Query `audit_events` table directly: assert row exists
4. No filesystem writes
### Similarity filter test (replaces `test_similarity_compute_filter.py`)
1. `mem_db` seeded with 2 motions with identical short titles ("Aangenomen.")
2. Embeddings seeded for both motions (identical vectors → score 1.0)
3. Call `compute_similarities(vector_type="fused", window_id=None, db=mem_db)`
4. Assert `store_similarity_batch` stored 0 pairs (filtered out)
### Embedding failure + audit event test (new, `test_ai_provider_wrapper.py`)
1. `mem_db` fixture
2. `FakeEmbedder(fail_ids=[5])` — motion 5 permanently fails
3. Call `get_embeddings_with_retry(texts=[...], motion_ids=[5], db=mem_db, embedder=fake_embedder)`
4. Assert return contains failure sentinel for motion 5
5. Query `mem_db` audit_events table: assert row with action="embedding_failed", target_id=5
## Error Handling Strategy
- `_select_text` with in-memory DB returns `[]` (no rows) — callers treat empty list as "no motions to embed", which is correct
- `FakeEmbedder.fail_ids` raises `RuntimeError` to exercise the retry/backoff path without sleeping — the wrapper's retry loop catches `Exception`, so this exercises the real retry path
- Audit event writes go to the in-memory `audit_events` table; the JSON ledger fallback path is tested separately if needed
## Testing Strategy
All tests use `.venv/bin/python -m pytest -q`. No `sys.modules` tricks. No `monkeypatch.setattr` on production module attributes.
| Test file | What it exercises |
|-----------|------------------|
| `test_database_audit.py` | `append_audit_event` → in-memory DB row |
| `test_ai_provider_wrapper.py` | Empty input; transient retry; permanent fail + audit event |
| `test_rerun_embeddings_retry.py` | Real `ensure_text_embeddings` → retry path → `ensure_text_embeddings_for_ids` called |
| `test_similarity_compute_filter.py` | Real `compute_similarities` → filter removes perfect-score identical-title pairs |
## Open Questions
1. Does `MotionDatabase(":memory:").__init__` successfully create the `audit_events` table? (Needs verification — if `_init_database` conditionally skips it, the fixture needs to run it explicitly.)
2. Does `_select_text` need the DB connection passed directly rather than reconstructed from `db.db_path`? For `:memory:` DBs, a new `duckdb.connect(":memory:")` opens an empty DB, not the seeded one. Resolution: `ensure_text_embeddings` should pass the seeded connection rather than the path, OR accept that `_select_text` returns `[]` for in-memory DBs (sufficient for retry-path tests).

@ -1,73 +0,0 @@
---
date: 2026-03-24
topic: "mindmodel-generation"
status: draft
---
## Problem Statement
We generated a .mindmodel/ snapshot for this repository using an automated orchestrator. The output includes inferred constraints, patterns, schema snippets, and remediation recommendations. We need a short, validated design that explains what was produced, how to verify and integrate it safely, and a recommended next set of changes (low-risk remediation and CI additions).
## Constraints
**Non-negotiables:**
- Keep the generated .mindmodel/ files read-only until validated.
- Do not make behavioral changes to production code in the same change as model metadata updates.
- Avoid committing secrets or lockfiles without explicit review.
**Limitations:**
- The orchestrator used heuristic file reads; some evidence pointers may be truncated or approximate.
- No poetry.lock / requirements.txt or CI workflows were found; dependency remediation must be conservative.
## Approach
I'm choosing an **audit-first, incremental integration** approach because the generated artifacts are high-value policy documents but rely on evidence that needs verification. We will: (1) validate evidence pointers and missing files, (2) mark fixes for trivial issues (move pytest to dev-deps, add formatter configs) in a small non-invasive PR, (3) integrate the .mindmodel/ into the repo and add a CI lint step that validates the manifest, and (4) iterate on higher-risk changes after tests pass.
Alternatives considered:
- Accept-and-commit everything immediately (faster) — rejected because of truncated reads and potential wrong pointers.
- Manual rewrite of constraints by hand (accurate) — rejected due to time cost; validation + targeted fixes gives best ROI.
## Architecture
This is a documentation/metadata integration task, not a runtime service. Components:
- **.mindmodel/**: constraint files and manifest produced by orchestrator. Source of truth for conventions and inferred patterns.
- **Validator job (CI)**: lightweight script/CI step that verifies manifest consistency, required files exist, and key evidence pointers resolve.
- **Small remediation PRs**: conservative code/config edits (pyproject tweaks, add black/ruff/isort configs, pre-commit) that enable future automation.
## Components
- Constraint Validator: verifies every .mindmodel/ constraint references existing files; flags truncated evidence ranges; ensures no secrets.
- Staging branch: holds small remediation commits; each commit is limited to one class of change (deps dev/prod move, linters, CI yaml).
- CI pipeline changes: add a validation job and a docs check that ensures .mindmodel/ manifest is up to date.
## Data Flow
1. Orchestrator output (.mindmodel/) exists in the working tree.
2. Validator runs locally or in CI to check pointers and file existence.
3. Developer reviews validator report and accepts/edits constraint files.
4. Remediation PRs are opened for low-risk fixes.
5. CI runs tests + validator; on green we merge and enable scheduled checks.
## Error Handling
- Validator failures are non-blocking for mainline but must be resolved before we rely on constraints for automation.
- If a constraint references a deleted or moved file, mark the constraint as "needs-review" in the manifest and leave file unchanged.
- For ambiguous evidence (truncated reads), add an explicit comment in the constraint file pointing to the reviewer.
## Testing Strategy
- Unit: small pytest tests that assert README/pyproject presence and that manifest YAML parses.
- Integration: CI job that runs the Constraint Validator and fails on missing files or secrets.
- Manual: reviewer inspects a sample of constraint files (3-5) for accuracy before merging.
## Open Questions
- Do we want the validator to auto-fix trivial issues (reformatting YAML paths) or only report? I'm leaning toward report-only for safety.
- Should .mindmodel/ be protected by branch policy or just reviewed by humans? Recommend human review + CI check, not protected branch yet.
## Next Steps (what I'll do now)
1. Create this design doc (done).
2. Commit the design doc to the repo (doing now).
3. Spawn the planner to create a step-by-step implementation plan based on this design (spawning now).

@ -1,113 +0,0 @@
---
date: 2026-03-24
topic: "Welk tweede kamerlid ben jij?"
status: draft
---
## Problem Statement
We need a new Streamlit tab in explorer.py titled **"Welk tweede kamerlid ben jij?"** that interactively narrows the list of 2026 MPs by asking the user a sequence of yes/no/abstain questions (motions). The goal: find the minimal set of motions (questions) that uniquely identify a single MP, or determine that no unique MP exists because two or more MPs have identical voting records.
**Why:** This is a guided identification quiz that helps users discover which MP they most resemble by iteratively comparing their answers to historic MP votes.
## Constraints
- Work inside the existing Streamlit explorer (single-file UI: **explorer.py**).
- Use existing data models/tables: **mp_votes**, **mp_metadata**, **motions** (DuckDB / MotionDatabase). No new external services.
- Keep reads read-only: do not modify the DB from the UI flow.
- YAGNI: minimal viable UX first (linear question flow, basic results table), extensible later.
## Approach (chosen)
I recommend a two-stage approach that balances simplicity and correctness:
- **Stage A (Batch-match + ranking):** Ask the user a small curated set of motions (e.g., high-controversy / high-discriminative score). Collect answers into a map motion_id -> vote and compute per-MP agreement counts using a new read-only DB helper. Show ranked candidates and whether any are unique.
- **Stage B (Minimal distinguishing set):** If multiple candidates tie (or more than one remain), compute a minimal discriminating set of additional motions by greedily selecting motions that best split the remaining candidate set and present them as follow-up questions until a unique MP or impossibility is reached.
Alternatives considered (rejected):
- Asking motions adaptively from the start using an information-gain search over the entire motion space. Rejected because it’s heavier to implement and harder to explain to users; we can implement a greedy information-gain variant later.
- Building a full decision tree offline for all MPs. Rejected for now because the dataset and party churn make maintenance cumbersome.
Effort estimate (rough):
- Backend: add one DB method to MotionDatabase (match_mps_for_votes) + helper to compute split scores — 2–4 hours.
- Frontend: add new Streamlit builder, UI state, and wiring into tabs — 2–4 hours.
- Testing & polish: 2–3 hours.
Risks & dependencies
- **Data quality:** If mp_votes.party or mp_metadata are incomplete, matching may be imperfect. We rely on existing backfill scripts to improve party fields.
- **Performance:** Joins over mp_votes can be large; we'll limit candidate motion set and use read-only DuckDB queries, with caching where appropriate.
## Architecture
High-level components (all in-process Streamlit app):
- **Explorer UI (explorer.py)** — new tab builder **build_mp_quiz_tab**. Presents questions and displays results.
- **MotionDatabase (database.py)** — new read-only method **match_mps_for_votes(user_votes, limit)** that returns per-MP agreement and overlap counts. Also a helper **choose_discriminating_motions(candidates, excluded_motion_ids, k=1)** that scores motions by how well they split candidate MPs.
- **DuckDB (data)** — existing tables: motions, mp_votes, mp_metadata.
All calls stay local — the Streamlit UI instantiates MotionDatabase(db_path) and calls the new read methods.
## Components and Responsibilities
- **build_mp_quiz_tab (explorer.py)**
- Render intro and instructions.
- Load an initial pool of candidate motions (curated by controversy or SVD components via existing load_motions_df).
- Present one question at a time, store answers in st.session_state (motion_id -> vote).
- After each answer (or on demand), call MotionDatabase.match_mps_for_votes to get ranked candidates.
- If multiple candidates remain, call the discriminating-motion helper to pick the next question.
- Show final result (unique MP or note that multiple MPs are indistinguishable).
- **MotionDatabase.match_mps_for_votes (database.py)**
- Input: user_votes dict {motion_id: vote_str}
- Output: ordered list of {mp_name, party, matched, total, agreement_pct}
- Implementation: create an in-memory relation of user_votes, join with mp_votes where mp_name LIKE '%,%' and aggregate matched / overlap counts. Order by agreement_pct, matched desc.
- **MotionDatabase.choose_discriminating_motions (database.py)**
- Input: remaining candidate mp_names, excluded_motion_ids
- Output: motion_id(s) ranked by split-score (e.g., entropy or max-min split)
- Implementation: for a small candidate set, compute how many MPs vote 'voor'/'tegen'/'onthouden' on each motion and pick motion with best split.
Files to modify (concrete)
- explorer.py
- Add function: build_mp_quiz_tab(...) near other build_*_tab functions (e.g., after build_svd_components_tab).
- Add new tab label to the tab_labels list and wire into the st.tabs and fallback radio branches. (See existing tab pattern at explorer.py around lines ~626-779.)
- database.py
- Add methods: match_mps_for_votes and choose_discriminating_motions near calculate_party_matches / mp_votes helpers.
## Data Flow
1. UI loads candidate motion list via existing load_motions_df(db_path).
2. User answers a question => stored in st.session_state['mp_quiz_votes'] mapping motion_id -> vote_token.
3. UI calls MotionDatabase.match_mps_for_votes(user_votes) (read-only DuckDB). Returns sorted candidate MPs with matched/total/agreement_pct.
4. If >1 candidate remains, UI calls MotionDatabase.choose_discriminating_motions(candidates, excluded) to pick next motion(s).
5. Repeat until one candidate remains OR no motion splits candidates (tie by identical voting histories).
## Error Handling
- Validation: normalize UI votes to the canonical tokens used in mp_votes (lowercase Dutch tokens like 'voor','tegen','onthouden','afwezig').
- Empty or missing data: if user_votes is empty or no overlaps exist, show helpful message and fall back to top-ranked MPs by similarity.
- Division-by-zero: in match computations, treat zero-overlap MPs as excluded from ranking and surface a clear message.
- Timeouts / heavy queries: restrict candidate set and use read-only DuckDB and caching (@st.cache_data) to avoid repeated heavy queries.
## Testing Strategy
- Unit tests for database methods (new tests/test_match_mps.py):
- small synthetic mp_votes fixture to assert matched/total/agreement_pct logic.
- tests for choose_discriminating_motions producing expected splits.
- Integration test for explorer tab (tests/test_explorer_quiz.py): render the builder function in a headless mode and assert UI state updates and DB calls succeed (similar to existing tests/test_explorer_import.py).
## Open Questions
1. Do we want an initial curated motion set (top-10 controversial), or start fully adaptive? I'll implement a small curated seed and make adaptive/discovery optional.
2. UX: Should we let users skip a question (abstain) and count abstain as a valid token? I assume yes and will treat abstain as a normal vote that matches mp_votes 'onthouden' or 'afwezig' values.
3. Performance limits: how many motions should we allow the user to answer (arbitrary cap e.g., 20)? I suggest 20 to keep interactions snappy.
## Next steps
I'm proceeding to create the design doc file at thoughts/shared/designs/2026-03-24-welk-tweede-kamerlid-ben-jij-design.md and commit it. Interrupt if you want changes. After that I'll spawn the planner to create a detailed implementation plan based on this design.

@ -1,96 +0,0 @@
---
date: 2026-03-28
topic: "Compass UI Improvements"
status: validated
---
# Compass UI Improvements
## Problem Statement
Three separate issues degrade the political compass UI:
1. **SVD axis descriptions (axes 3–5) are outdated.** The current `label`, `explanation`, `positive_pole`, and `negative_pole` strings in `SVD_THEMES` were written for an earlier dataset and no longer match the structural patterns the axes actually capture. Using single-year (2024) centroid snapshots to verify this was misleading — the fix must be grounded in multi-year averages and motion-level content.
2. **Y-axis direction indicators are broken.** The current `"Progressief ↑ / Conservatief ↓"` string is passed as the Plotly Y-axis title. Plotly rotates axis titles 90° counter-clockwise, so the arrows end up pointing sideways instead of up/down. This appears in the two compass scatter plots and in the trajectories tab.
3. **No voting discipline context.** The compass shows where parties sit spatially but gives no sense of whether a party votes as a bloc. This context would make the compass more interpretable.
## Constraints
- No new DB tables or schema changes.
- `compute_party_discipline` reads `mp_votes` where `mp_name LIKE '%,%'` (individual MP rows only — party-aggregate rows are excluded).
- Skip discipline section if fewer than 5 roll-call motions in the selected window.
- Follow existing patterns in `explorer.py` and `database.py`.
- Tests use `uv run pytest`.
## Approach
**Change 1 — Axis descriptions:** Derive corrected descriptions from multi-year party centroid averages (all annual windows, not just 2024) and from the motion-level content that loads high/low on each axis. Update only `label`, `explanation`, `positive_pole`, `negative_pole` in `SVD_THEMES` entries for axes 3, 4, and 5. The `flip` boolean is not changed.
**Change 2 — Y-axis arrows:** Replace the ↑/↓ characters from the axis title string (set to plain `"Progressief / Conservatief"`). Add two `fig.add_annotation` calls per chart: top-center `"▲ Progressief"` and bottom-center `"▼ Conservatief"`, using `xref="paper", yref="paper"`, `showarrow=False`, styled to be subtle (small font, muted color). Apply to both compass scatter plots and the trajectories chart.
**Change 3 — Voting discipline:** Add a `compute_party_discipline(db_path, start_date, end_date)` function in `explorer.py` that queries individual MP votes, computes per-party Rice index (fraction voting with party majority), and returns a DataFrame with columns `[party, n_motions, discipline]`. In `build_compass_tab`, after rendering the compass chart, call this function with the window's date range, and render: (a) a horizontal bar chart sorted ascending (least disciplined at top), and (b) a small table showing the three most and three least disciplined parties. If fewer than 5 motions, show a brief explanatory message instead.
## Architecture
All changes are confined to `explorer.py`. No changes to `analysis/`, `database.py`, or test files (the discipline function is a read-only helper, not shared infrastructure).
## Components
**`SVD_THEMES` dict (explorer.py ~line 1156)**
- Entries for axes 3, 4, 5 updated in-place.
- New text is based on multi-year patterns (see Data Flow below).
**Y-axis annotation helper (explorer.py)**
- Small inline helper or inline code block that adds the two direction annotations to any given `fig`.
- Called once after each `px.scatter(...)` and once after the trajectories `fig` is built.
**`compute_party_discipline(db_path, start_date, end_date)` (explorer.py)**
- Connects to DuckDB read-only.
- Queries `mp_votes` filtered to individual MPs (`mp_name LIKE '%,%'`) and date range.
- Groups by `(motion_id, party)`, counts votes per token, determines majority token, computes Rice index per motion per party.
- Averages Rice index across motions per party.
- Returns `pd.DataFrame(columns=["party", "n_motions", "discipline"])` or empty DataFrame.
**`build_compass_tab` additions (explorer.py ~line 841+)**
- After `st.plotly_chart(fig, ...)`, map the current `window_idx` to a `(start_date, end_date)` range.
- Call `compute_party_discipline(...)`.
- If result has ≥ 5 motions: render bar chart + extremes table under a `st.subheader("Stemgedrag cohesie")`.
- If not: `st.caption("Te weinig hoofdelijke stemmingen voor cohesieanalyse.")`.
## Data Flow
**Axis description research (prior to implementation):**
Multi-year centroid averages are computed by averaging each party's SVD vector across all annual windows in which it appears. The axis 3/4/5 descriptions are updated to reflect these stable patterns rather than any single year's snapshot.
**Discipline computation:**
```
mp_votes (individual MPs, date range)
→ GROUP BY (motion_id, party, vote) → vote counts
→ determine majority_vote per (motion_id, party)
→ Rice index = (count voting with majority) / (total voting) per motion per party
→ average Rice index across motions → per-party score
→ return DataFrame
```
## Error Handling
- `compute_party_discipline` returns an empty DataFrame on any DB exception (logged, not raised), following the pattern of other read helpers in `explorer.py`.
- Empty DataFrame → show the "too few motions" caption (same path as < 5 motions).
- The Y-axis annotation is purely visual — no error paths needed.
- Axis description changes are static strings — no runtime risk.
## Testing Strategy
- The `compute_party_discipline` function is tested with a small in-memory DuckDB fixture in `tests/test_political_compass.py`:
- Construct a fixture with 6 motions, 2 parties, each with varying vote splits.
- Assert returned DataFrame has correct columns and that discipline scores are in [0, 1].
- Assert empty DataFrame is returned when date range has 0 motions.
- Y-axis annotation: no unit test needed (visual only, trivially correct).
- Axis description changes: no unit test needed (static strings).
- Run all tests with `uv run pytest tests/test_political_compass.py -v` after each change.
## Open Questions
None. All design decisions are resolved.

@ -1,105 +0,0 @@
---
date: 2026-03-28
topic: "Rewrite @ansible package for npm publish"
status: draft
---
## Problem Statement
We currently have an example `ansible/` directory (not an npm-scoped `@ansible/` package) that demonstrates deployment and packaging for a different project. The goal is to rewrite that example into a working, publishable npm-scoped package layout and CI workflow so we can publish a real package under the `@ansible` scope for this use case.
**Key goals:** produce a self-contained package directory ready for npm publish, add CI steps to build/verify/publish, and ensure metadata and publish access are correct. Also correct author attribution to **Sven**.
## Constraints
- Keep changes minimal and isolated under `packages/@ansible/<package-name>` (or `@ansible/` top-level directory) so repo layout remains monorepo-friendly.
- Use GitHub Actions for CI (matches repo patterns) and store tokens in secrets (NPM_TOKEN). Do not expose secrets in logs.
- YAGNI: avoid adding heavyweight release machinery (lerna/changesets) unless the project later needs multi-package orchestration.
- No destructive changes to existing deployment pipelines.
## Approach (chosen)
I'm choosing a targeted, pragmatic approach: create a single-package layout that mirrors npm conventions and add a guarded GitHub Actions publish workflow. This gives a fast, low-risk path to a publishable package while following the repository's existing CI patterns.
**Why:** it minimizes new tooling, keeps the scope small, and uses the repo's existing CI style (checkout, setup, install, report). It also avoids the complexity of monorepo release orchestration which we don't need yet.
## Alternatives considered
1. Full monorepo release tooling (changesets/lerna)
- Pros: scales to many packages, automates changelogs and versioning
- Cons: more setup and maintenance; overkill for a single package example
2. Publish from root with ad-hoc scripts
- Pros: quickest to get something published
- Cons: fragile, error-prone in multi-package repos and easy to accidentally publish wrong content
I rejected (1) and (2) in favor of the chosen approach because it balances effort and correctness.
## Architecture
**High-level:** a new package directory contains package.json + README + src + tests. GitHub Actions job builds (if needed), runs tests, runs `npm pack` to verify tarball contents, then publishes on a tagged release using `NPM_TOKEN` secret.
- **Package directory**: packages/@ansible/<name>/
- package.json (name: "@ansible/<name>", version, publishConfig.access: "public")
- README.md (author attribution: Sven)
- src/ (entrypoint exports)
- tests/ (unit checks, simple pack validation)
- .npmignore or package.json files field to control published files
- **CI workflow**: .github/workflows/publish-ansible-<name>.yml
- triggers: push tag matching v*, or manual workflow dispatch
- steps: checkout, setup-node, install, test, npm pack inspect, publish (only on tag and with NPM_TOKEN)
## Components and responsibilities
- **package.json**: authoritative package metadata. Must include: name, version, description, main/module, files (or .npmignore), license, repository, author (Sven), and **publishConfig.access = "public"** for a public org-scoped package.
- **README.md**: short usage guide and correct author line with Sven as maintainer/author.
- **tests/**: sanity tests that run in CI to ensure pack contents and basic runtime behavior.
- **.github/workflows/publish-ansible-<name>.yml**: build/verify/publish pipeline. Writes .npmrc with token only at publish step and removes it immediately after.
- **.npmrc in CI (ephemeral)**: created from secret, not checked in. Use: echo "//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}" > ~/.npmrc
## Data Flow
1. Developer updates package files and bumps version (or tags a version).
2. Developer creates a git tag vX.Y.Z and pushes it.
3. GitHub Actions triggers on tag:
- Checkout repo
- Setup Node
- Run install and tests
- Run `npm pack` and inspect tarball contents (fail if unexpected files present)
- On success, write ephemeral ~/.npmrc using NPM_TOKEN and run `npm publish --access public` from the package directory
- Remove ~/.npmrc
4. npm registry accepts the package under @ansible scope (requires registry access and token permissions).
## Error handling strategy
- **CI errors**: fail fast. Test/build/pack steps must pass before any ephemeral auth is written.
- **Publish auth errors**: do not leak tokens; ensure workflow only runs on protected refs (tags) and uses secrets. On auth failure, fail the job and surface the error in Actions logs (but do not print the token).
- **Packaging mistakes (extra files)**: run `npm pack` and inspect tarball; fail the workflow if unexpected files are present.
- **Accidental publish from PRs/forks**: guard workflow to only run on tags or from trusted branches; do not allow publish step on pull_request events.
## Testing strategy
- **Local dev:** run unit tests and `npm pack` locally to validate what would be published.
- **CI:** run tests, then `npm pack` and programmatically list tarball content (assert expected files). Add a tiny test that asserts package.json fields (name, version, publishConfig) are present.
- **Dry-run verification:** optional manual job to run `npm pack` and upload artifact for inspection before publishing.
## Deliverables (concrete edits)
1. Create package skeleton at `packages/@ansible/<name>/` with package.json, README.md (author: Sven), src/, tests/, and .npmignore or files field.
2. Add `scripts` in package.json: `test`, `prepublish:verify` (runs pack inspection).
3. Add GitHub Actions workflow `.github/workflows/publish-ansible-<name>.yml` (tag-triggered) that performs build/test/pack/publish and uses `secrets.NPM_TOKEN`.
4. Add a CI test `tests/test_package_json.js` or similar that asserts package.json readiness.
5. Document publish steps in the package README and top-level CONTRIBUTING or docs if desired.
## Open Questions
- What do you want the package name to be under the @ansible scope? (I'll assume `@ansible/example` and proceed; changeable later.)
- Do you want the package to be public or private? (I assumed **public**.)
- Do you prefer versioning via git tags (recommended) or manual package.json bumps?
I'm proceeding to create the design doc file in the repo and commit it. Interrupt if you want any changes to the scope above before I continue to the implementation planning step.

@ -1,168 +0,0 @@
---
date: 2026-03-29
topic: "Bootstrap confidence intervals and data enrichment"
status: validated
---
# Bootstrap Confidence Intervals & Data Enrichment
## Problem Statement
The SVD axis charts show party centroid scores as point estimates with no indication of reliability. Volt (N=1) and D66 (N=49) look equally confident. Additionally:
- 2016–2018 motions lack body text, weakening embedding quality for those windows
- `party_svd_scores.json` is a stale ad-hoc file missing NSC — should be deleted
## Constraints
- No re-SVD per bootstrap replicate — too expensive, only centroid uncertainty needed
- Single-window bootstrap only — party scores come from `current_parliament` raw SVD vectors, not the Procrustes pipeline
- Functional Python, using existing patterns (uv, duckdb, numpy)
- Don't break existing Streamlit rendering — error bars are additive
- Fixed random seed for reproducibility
## Approach
**Single-window centroid bootstrap.** For each party, resample its N MPs with replacement 1000×, recompute centroid per replicate, take percentile CIs. Cheap (no re-SVD needed), directly answers "how reliable is this score?".
Rejected alternatives:
- Multi-window Procrustes bootstrap: 1000× SVD cost, requires orientation canonicalization. Overkill.
- Analytical SE (std/sqrt(N)): assumes normality, misses skewed distributions.
## Components
### A. Download Script Enhancement (`scripts/download_past_year.py`)
Add two CLI flags:
- `--skip-details` (default: `True`, matching current hardcoded behavior) — when `False`, fetches body text via `_get_motion_details``_fetch_body_text`
- `--update-existing` (default: `False`) — when `True`, re-processes motions already in DB to fetch missing body_text and update the record
The update-existing flow:
1. Query motions table for rows WHERE date BETWEEN start_date AND end_date AND (body_text IS NULL OR body_text = '')
2. Extract besluit_id from the URL column (format: `https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/{besluit_id}` — take last path segment)
3. For each such motion, call `api._get_motion_details(besluit_id)` to fetch body_text
4. UPDATE the motions row with the new body_text (and title/description if also missing)
Note: the motions table has no `besluit_id` column — it's only embedded in the URL. The update flow must parse it from the URL.
Run once after implementation: `--start-date 2016-01-01 --end-date 2018-12-31 --update-existing`
(No need for `--skip-details` when using `--update-existing` — it always fetches details for the targeted rows.)
### B. Bootstrap Computation (`analysis/political_axis.py`)
New function:
```
compute_party_bootstrap_cis(
party_vectors: Dict[str, List[np.ndarray]],
n_boot: int = 1000,
ci: float = 95.0,
seed: int = 42
) -> Dict[str, Dict]
```
Input: `party_vectors` is a dict mapping party name → list of individual MP vectors (each a numpy array of length 50). The caller (explorer.py) builds this from DB queries using existing mp→party mapping logic.
Returns per-party:
```
{
"PVV": {
"centroid": [50 floats],
"ci_lower": [50 floats],
"ci_upper": [50 floats],
"std": [50 floats],
"n_mps": 19
},
...
}
```
Algorithm:
1. Receive pre-grouped `party_vectors` from caller
2. For each party with N >= 2:
- Create numpy Generator with fixed seed
- For each of n_boot replicates: sample N indices with replacement, compute mean vector
- Compute percentile CIs (alpha/2, 100-alpha/2) and std across replicates per dimension
5. For parties with N = 1: set ci_lower == ci_upper == centroid, std = 0, flag n_mps = 1
Dependencies: numpy, duckdb (read_only), json.
**Import issue**: `_PARTY_NORMALIZE` and `CURRENT_PARLIAMENT_PARTIES` live in `explorer.py` (a Streamlit app). The bootstrap function in `analysis/political_axis.py` can't import from there. Solution: the bootstrap function accepts `party_vectors: Dict[str, List[np.ndarray]]` as input — the caller (explorer.py) handles the mp→party mapping and passes grouped vectors in. This keeps the analysis module independent of Streamlit app constants and avoids duplicating the normalization logic.
Alternatively, the caller can pass the already-computed `party_scores` dict from `load_party_axis_scores` plus raw per-party MP vector lists. The simplest approach: add a helper in explorer.py that loads grouped MP vectors per party (reusing existing mapping logic) and pass that to the bootstrap function.
### C. Chart Enhancement (`explorer.py`)
Modify `_render_party_axis_chart` to accept optional `bootstrap_data: Dict[str, Dict] = None`.
When bootstrap_data is provided:
- For each party, compute error magnitude: `(ci_upper[axis_idx] - ci_lower[axis_idx]) / 2`
- When flip is True, error magnitude stays the same (symmetric around the negated centroid)
- Add `error_x=dict(type="data", array=error_array, visible=True)` to the party marker Scatter trace
- Parties with N=1: render with a distinct marker (diamond shape instead of circle) as visual unreliability warning
- Add `N={n_mps}` to hover text for all parties
The bootstrap computation should be cached alongside party scores using `@st.cache_data`.
### D. Delete Stale JSON File
Remove `thoughts/explorer/party_svd_scores.json`. The app never reads this file — `load_party_axis_scores` always computes live from the DB. The file was generated ad-hoc during analysis and is missing NSC.
Also remove `thoughts/explorer/axis_analysis_data.json` — same situation, ad-hoc analysis artifact not used by the app.
## Data Flow
```
DB (svd_vectors, mp_metadata)
├──→ load_party_axis_scores()
│ returns Dict[str, List[float]] (party → 50-dim centroid)
└──→ load_party_mp_vectors() [NEW helper in explorer.py]
returns Dict[str, List[np.ndarray]] (party → list of individual MP vectors)
reuses same mp→party mapping as load_party_axis_scores
compute_party_bootstrap_cis(party_vectors, n_boot=1000, ci=95, seed=42)
│ returns Dict[str, Dict] (party → {centroid, ci_lower, ci_upper, std, n_mps})
_render_party_axis_chart(party_scores, comp_sel, theme, bootstrap_data=None)
│ indexes [comp_sel - 1] from centroid and CIs
│ applies flip (negate score AND CI bounds)
│ adds error_x to Plotly Scatter trace
Streamlit renders chart with error bars
```
Both functions cached via `@st.cache_data` with same TTL.
## Error Handling
- **N=1 parties (Volt, Lid Keijzer)**: Return centroid as both CI bounds, std=0. Chart renders diamond marker. Hover says "N=1, geen betrouwbaarheidsinterval".
- **N=2 parties (50PLUS)**: CIs will be wide — that's correct, let data speak.
- **SVD vector parsing failures**: Skip MP, log warning (same as existing pattern).
- **Download/scraping failures**: Per-chunk try/except already handles this. `_fetch_body_text` returns None on failure (existing behavior).
- **update-existing with no besluit_id**: Skip motion, log. Not all motions have a besluit_id traceable to body text.
## Testing Strategy
### Unit Tests
- `test_bootstrap_fixed_seed`: Synthetic data (5 parties, varying N), fixed seed. Verify:
- Output shape matches expected structure
- CI bounds bracket centroid for all parties
- N=1 party has ci_lower == ci_upper == centroid
- Same seed produces identical output
- Larger N produces narrower CIs
### Integration Tests
- `test_bootstrap_real_db`: Run against actual DB, verify:
- Returns data for all 17 current parliament parties (+NSC)
- n_mps values match known party sizes
- CI width for D66 (N=49) << CI width for SP (N=3)
### Visual Validation
- Run Streamlit app, verify error bars appear on SVD axis charts
- Verify N=1 parties have distinct marker style
- Verify hover text includes party size
## Open Questions
None — design is straightforward. The only future enhancement would be multi-window bootstrap for axis stability testing, but that's a separate project.

@ -1,117 +0,0 @@
---
date: 2026-03-30
topic: "compass-trajectory-consistency"
status: validated
---
## Problem Statement
What we're solving and why
We must ensure the political compass (single-window snapshot) and the Explorer trajectories use the same numeric coordinate frame for the first two SVD axes so the compass numbers match the trajectory centroids exactly.
**Key issue:** Component 1 already matched, but component 2 shows persistent mismatches due to an API/shape ambiguity and occasional fallback logic differences. Fixing this prevents confusing, inconsistent numbers in the UI.
## Constraints
Non-negotiables and limitations
- The canonical coordinate frame is the Procrustes-aligned output of **compute_2d_axes** (the repo artifact that produces **positions_by_window**).
- Keep UI responsiveness and existing cache usage (@st.cache_data where present).
- Minimal, focused changes: only update Explorer call sites and the compass renderer API. Do not change the SVD pipeline outputs.
- Use the **first chronological party vector** as the fallback when a party has no MPs in a window (user decision).
## Approach
Chosen approach and why
We will adopt an explicit API for the compass renderer: pass per-party 2D projected coordinates (party → (x,y)) computed from **positions_by_window** for the target window. This eliminates shape/indexing ambiguity and guarantees numeric equality with trajectory centroids.
**Why:**
- Simpler and less error-prone than synthesizing k-dimensional vectors or changing compute_2d_axes.
- Keeps the canonical data source unchanged (positions_by_window) and makes intent explicit at the Explorer surface.
- Easy to test: we can assert numeric equality directly on the 2D coordinates.
## Architecture
High-level structure of the change
**Key pieces:**
- **compute_2d_axes** (unchanged): produces **positions_by_window** which is the canonical frame.
- **Explorer: party centroid helper:** new helper that computes per-party (x, y) means from positions_by_window for a window.
- **_build_party_axis_figure (changed API):** now accepts **party_coords: Dict[str, Tuple[float,float]]** and a selected component index (1 or 2) and uses the explicit coordinate values for plotting.
- **Call-site updates:** update all places that previously passed party SVD vectors to instead compute and pass party_coords (use first-chronological party vector only when no MPs are present for that party in the window).
## Components
Key pieces and responsibilities
- **compute_party_coords(positions_by_window, party_map, window_id):**
- Input: positions_by_window, party->MP mapping (load_party_map or similar), window id.
- Output: party -> (x_mean, y_mean). If no MPs for a party, returns None or uses fallback loader.
- **_build_party_axis_figure(party_coords, comp_sel, ...):**
- Input: explicit 2D coords; **comp_sel** ∈ {1,2}.
- Behavior: uses party_coords[p][comp_sel-1] as the axis value, constructs hover text, CIs, and plots. No indexing into long SVD vectors.
- **Fallback loader:** existing **load_party_axis_scores** (unchanged). When compute_party_coords finds no MPs, we will use the party's first chronological vector from load_party_axis_scores(window) as fallback and indicate fallback in hover text.
- **Callers to update:**
- build_svd_components_tab
- any other explorer function that previously passed party-axis vectors into _build_party_axis_figure
## Data Flow
How data moves through the updated code path
1. UI requests compass for window W and component C.
2. Explorer calls load_positions(db_path) → gets positions_by_window.
3. compute_party_coords builds per-party (x,y) means from positions_by_window[W].
4. For parties with zero MPs in W, call load_party_axis_scores(window) and take the **first chronological** party vector as fallback; annotate hover that a fallback is used.
5. Pass party_coords to _build_party_axis_figure which reads comp_sel and uses the explicit coordinate at index 0 or 1.
6. Explorer trajectories tab already computes the same centroids from positions_by_window; therefore numbers match exactly.
## Error Handling
Strategy for failures and edge cases
- If positions_by_window is missing or corrupted: surface a clear diagnostic message in the UI recommending running the SVD recompute pipeline, and avoid attempting to plot mismatched values.
- If a party has no MPs and load_party_axis_scores also returns no data: omit that party from the compass and add a tooltip note in the UI explaining why.
- If any coordinate is NaN/inf: skip plotting that party and log a debug message with the party id and window.
- Log a WARN when a fallback is used so we can find parties with no MPs across windows.
## Testing Strategy
How we will verify correctness
- Unit tests
- Synthetic positions_by_window: build a small fake positions_by_window with known MP coordinates and party→MP mappings. Assert compute_party_coords outputs expected means and that _build_party_axis_figure uses those exact numbers for components 1 and 2.
- Fallback behavior: create a window with a party that has no MPs and assert load_party_axis_scores is called and its first chronological vector is used.
- Integration tests
- Run against a small real DB snapshot used in prior verification. Assert for a representative set of parties across several windows that compass numbers equal the trajectory centroids for components 1 and 2.
- CI
- Run full test suite. Known pre-existing failures unrelated to this change may persist; document them separately but do not block this change on them.
- Manual QA
- Run Explorer locally and spot-check compass tooltips vs trajectory hover values for multiple parties and windows.
## Open Questions
Unresolved items (minor)
- None critical: the user selected the fallback preference (first chronological party vector) and agreed to update all callers without backward compatibility.
---
I'm proceeding to create the implementation plan. Interrupt if you want changes to this design.

@ -1,96 +0,0 @@
---
date: 2026-03-30
topic: "diagnose-no-plot-trajectories"
status: draft
---
## Problem Statement
The Trajectories tab currently shows **no Plotly chart at all** (not just an empty chart). We need a low-risk way to determine exactly which runtime gate or swallowed exception is preventing any plot from being rendered and fix it so the chart appears or we surface a clear error message.
**Key observation:** upstream code contains multiple early-returns (no data), and broad except/pass handlers that can silently swallow exceptions — either can cause the UI to skip calling st.plotly_chart entirely.
## Constraints
- Keep changes small and reversible.
- Do not change user-facing defaults unless gated by an explicit debug toggle or environment variable.
- Prefer adding diagnostics and logging over big refactors; short-term changes must be removable after diagnosis.
- Preserve public function locations and names used by other code/tests.
## Chosen approach (what I'll do)
I'm choosing a focused instrumentation strategy: add a temporary, opt-in **debug mode** that surfaces the exact runtime decisions and any exceptions taken along the Trajectories rendering path, and un-silence key broad excepts so we can observe stack traces.
**Why:** It's the fastest, lowest-risk way to get definitive evidence of why the plot doesn't render, and it avoids changing production logic except under an explicit debug toggle.
**High-level changes:**
- Add a **DEBUG toggle** (UI checkbox + env var EXPLORER_DEBUG_TRAJECTORIES) that enables verbose diagnostics in the Trajectories UI.
- When debug is enabled, show step-by-step status for each early-return gate: result of load_positions, axis_def presence, length of positions_by_window, centroids size, mp_positions size, helper returns (fig/trace_count) and any exception tracebacks.
- Replace the helper-call swallow (`except Exception: pass`) around select_trajectory_plot_data with a handler that logs and displays the exception (only when debug is enabled) and increments a visible diagnostic counter.
- Add compact, structured diagnostics to the existing DEBUG expander (windows_count, party_map_count, centroids_sample, mp_positions_sample, helper_trace_count, helper_exception_string).
## Alternatives considered (brief)
1. Force-show MP fallback unconditionally. Pros: quickly confirm plotting plumbing works. Cons: noisy, may mask root cause and changes production behaviour.
2. Heavy refactor to move pure plotting logic into an import-safe separate module and run offline tests. Pros: clean separation and easier tests. Cons: slower and higher-risk for this urgent diagnosis.
I rejected both for immediate work because they are heavier than necessary to learn the root cause.
## Architecture (where changes live)
- Explorer UI (explorer.py) — add debug checkbox and diagnostic panel wiring inside build_trajectories_tab.
- Diagnostics collector (small helper in explorer_helpers.py or local helper) — produce structured status dicts (counts, samples) used by the UI.
- Error surfacer — modify the select_trajectory_plot_data call-site to log exceptions (logger.exception) and, when debug enabled, call st.exception(...) or st.text_area(...) with the traceback.
## Components and responsibilities
- **Debug toggle UI:** checkbox + env var binding; enables/disables verbose diagnostics.
- **Diagnostic collector:** pure helper that inspects positions_by_window, party_map, centroids, mp_positions and returns compact samples and counts.
- **Exception handler change:** convert broad `except: pass` at the helper boundary into `except Exception as e: logger.exception(e); diagnostic['select_helper_exception']=traceback; if debug: st.exception(e)`.
- **Temporary UX:** display a compact, clearly labeled diagnostics block inside the DEBUG expander. Make it obvious this is a temporary troubleshooting aid.
## Data flow (quick)
- load_positions(db) -> positions_by_window, axis_def
- diagnostic collector inspects positions_by_window and party_map
- build_trajectories_tab calls select_trajectory_plot_data(...) inside a try/except
- on success: use returned fig and trace_count to decide whether to call st.plotly_chart
- on exception: diagnostic collector records traceback and UI shows it if debug enabled
## Error handling strategy
- Do not swallow exceptions silently at the helper boundary. Always log with logger.exception(...).
- Only surface full tracebacks to the Streamlit UI when **debug mode** is enabled.
- Keep production behaviour unchanged when debug mode is off.
## Testing approach
- Unit tests for the diagnostic collector with synthetic positions_by_window covering: empty data, partial centroids, and full centroids.
- Unit test that simulates the helper raising an exception (monkeypatch) and asserts that the exception is logged and (when debug enabled) that the diagnostics struct contains the exception string.
- Manual reproduction: run Streamlit locally with EXPLORER_DEBUG_TRAJECTORIES=1 and the same DB used in production to capture the diagnostics panel and fix the underlying issue.
## Open questions
- Can you reproduce the issue locally (same DB and same command to start Streamlit)? I assume yes and will base debug advice on that.
- Are we allowed to enable a short-lived debug toggle in production logs if needed, or will you only run this locally?
---
I'm proceeding to create the design doc. Interrupt if you want changes.
\n+## Environment management (use uv, not pip)
We will not use pip directly. Use the project's `uv` tool to manage dependencies and run scripts so the environment is reproducible and follows local project conventions.
Recommended commands:
- Add duckdb to the project virtual environment:
- `uv add duckdb`
- Run the diagnostic CLI with debug enabled:
- `EXPLORER_DEBUG_TRAJECTORIES=1 uv run python scripts/diagnose_trajectories_cli.py`
- Start Streamlit inside the uv-managed environment (example):
- `uv run streamlit run pages/2_Explorer.py`
Notes:
- If the planner or any follow-up steps need to install or run packages, they should use `uv add` and `uv run` rather than `pip install` or direct interpreter calls.
- If `uv` is not on PATH in a particular environment, prefer `python -m uv` or consult the project README/ARCHITECTURE.md for local developer environment instructions.

@ -1,102 +0,0 @@
---
date: 2026-03-30
topic: "fix-missing-trajectories"
status: draft
---
## Problem Statement
We're seeing empty/absent party trajectories in the Explorer "Partij Trajectories" tab despite compute_2d_axes producing windows and many parties having centroids. The UI shows no visible traces for selected parties in some runs, making the feature unreliable for end users.
**Root hypothesis:** either (A) selected parties have only missing/None centroid values at plot time, (B) a runtime exception (eg float(None)) aborts trace creation silently, or (C) label/party normalization mismatch filters out traces. We need a low-risk, diagnostic-first fix to reveal which of these is happening and restore visible traces quickly.
## Constraints
- Preserve public function names and locations: **compute_2d_axes, classify_axes, load_positions, _build_party_axis_figure, build_trajectories_tab, build_compass_tab, _spline_smooth**.
- Avoid large refactors; prefer reversible, minimal changes that surface diagnostics.
- Do not expose internal modal tokens ("As 1"/"As 2") to end users; use axis_classifier.display_label_for_modal(...) or choose_trajectory_title() where appropriate.
- Visual traces should remain smoothed; hover must include raw centroid values for auditability.
## Chosen Approach (what we'll implement)
I'm choosing a **minimal triage-first approach**: add precise diagnostics and defensive conversions around plotting, so we either restore visible traces immediately or produce deterministic diagnostics that reveal the real data mismatch.
**Why:** low risk, fastest feedback loop. This will either fix simple runtime errors (safe float conversion, exceptions while adding traces) or provide clear evidence that deeper normalization changes are required.
**Key changes:**
- Add a small helper: **safe_float(x)** — converts numeric-like values to floats, maps None/NaN/invalid -> float('nan') without raising.
- In build_trajectories_tab/_build_party_axis_figure:
- Wrap per-party fig.add_trace(...) in try/except and log the exception with party id/name to the DEBUG expander instead of aborting the whole plot.
- Emit per-selected-party diagnostics into the existing DEBUG expander: number of raw centroids, counts of non-NaN coordinates, example first 5 raw xs/ys, and lengths per window.
- Replace direct float(...) casts on raw centroid values used in hover/customdata with safe_float.
- Ensure per-MP fallback plotting path still exists and can be forced via EXPLORER_FORCE_SHOW_TRAJECTORIES for diagnosis.
- Add unit tests for safe_float and targeted integration tests that assert traces are created when centroids contain NaNs and when party_map exists.
## Alternatives Considered
1) Full normalization sweep: align party centroids to global windows (fill missing with NaN) and accept parties with at least one non-NaN value.
- Pros: robust long-term fix, canonical data shape.
- Cons: larger change surface, higher risk, slower to validate in production data.
2) Refactor plotting pipeline to use a normalized DataFrame (rows=windows, cols=parties) and build traces from that canonical shape.
- Pros: clearer data flow, easier testing.
- Cons: larger refactor, touches many modules.
I considered both but rejected them for immediate work because we need quick deterministic diagnostics to determine if these larger efforts are warranted.
## Architecture (high-level)
**Inputs:** positions_by_window (from compute_2d_axes), party_map, selected_parties.
**Flow:**
- compute_2d_axes -> positions_by_window
- load_positions / helpers -> party-centroid dicts keyed by party
- build_trajectories_tab calls _build_party_axis_figure to build per-party traces
- _build_party_axis_figure uses smoothing helpers (_spline_smooth) to produce visible traces and also builds hover customdata with raw centroid values (smoothed coords for the trace, raw values in customdata)
**Intervention points:** build_trajectories_tab and _build_party_axis_figure (small helper additions and safe conversion), plus tests and diagnostic output in the DEBUG expander.
## Components and Responsibilities
- **safe_float helper:** convert inputs to float or return float('nan') safely. Centralized to avoid repeated float(None) errors.
- **Diagnostic emitter:** small utility used by build_trajectories_tab to format and write per-party diagnostic rows to the DEBUG expander.
- **Plotly trace wrapper:** per-party try/except around fig.add_trace that writes exception details to diagnostics instead of failing silently.
- **Unit + integration tests:** verify hover customdata creation, safe_float behaviour, trajectories rendered with partial centroids, and UI label mapping does not emit "As 1"/"As 2".
## Data Flow (detailed)
- compute_2d_axes produces windows (time labels) and canonical positions_by_window.
- load_positions consumes positions_by_window and returns a mapping party -> list of centroids (one per window) where centroids may contain None/NaN for missing windows.
- build_trajectories_tab selects parties and for each party calls _build_party_axis_figure which:
- extracts raw xs_raw, ys_raw arrays aligned to windows
- computes smoothed xs_plot, ys_plot via _spline_smooth
- builds Plotly trace using xs_plot/ys_plot for the line and includes xs_raw/ys_raw in customdata with safe_float conversion
- adds the trace inside a try/except and emits any exception + raw samples to debug
## Error Handling
- Use safe_float to prevent float(None) and similar runtime TypeErrors when building hover/customdata.
- Use per-party try/except to avoid a single-party failure blanking the whole chart; log the error and continue plotting other parties.
- Show structured diagnostics in the existing DEBUG expander with these fields: party name, windows_count, raw_centroid_count, non_nan_count, sample_raw_xs, sample_raw_ys, exception (if any).
## Testing Strategy
- Unit tests:
- safe_float: None -> nan, '1.23' -> 1.23 (if strings are expected), invalid -> nan
- UI label helpers: axis_classifier.display_label_for_modal(...) and choose_trajectory_title() do not return raw "As 1"/"As 2"
- Integration tests (lightweight):
- Build a synthetic positions_by_window with some None / NaN holes and assert _build_party_axis_figure returns a Plotly trace object (or equivalent structure) and that customdata contains numeric/NaN values not exceptions.
- Test that build_trajectories_tab's DEBUG expander receives the expected diagnostic entries for a party with missing centroids.
- Manual verification steps (later): run full Streamlit with duckdb/plotly installed and open Explorer -> Trajectories to confirm traces are visible for typical parties and inspect the DEBUG expander.
## Open Questions
- Are there other UI locations still exposing raw modal labels? We should sweep the repo and tests already added help with this, but it may not be exhaustive.
- Do we want safe_float to try to coerce numeric strings? My proposal is **no coercion** (only pass-through numeric types and map others -> nan) unless tests show string encodings exist in centroid data.
- If diagnostics show that many parties are missing centroids entirely, we'll need the full normalization sweep (alternative #1).
---
I'm proceeding to create the design doc. Interrupt if you want changes.

@ -1,113 +0,0 @@
---
date: 2026-03-31
topic: "diagnose-no-plot-trajectories"
status: draft
---
## Problem Statement
We need to restore visible party trajectories in the Explorer "Partij Trajectories" tab so the Plotly chart shows non-empty traces for realistic windows, and provide opt-in diagnostics that explain why traces are missing.
**Why:** Users see an empty chart in some environments/windows. This could be caused by upstream data gaps, malformed coordinates, strict filtering in helpers, or unhandled exceptions in the plotting helper. We must gather evidence, fix the actual cause, and avoid changing production behavior unless debug is explicitly enabled.
## Constraints
- Keep changes minimal and reversible; prefer instrumentation and small helper fixes over large refactors.
- Diagnostics must be opt-in (EXPLORER_DEBUG_TRAJECTORIES env var and UI checkbox).
- Helpers must be import-safe and pure so unit tests run without heavy GUI/DB dependencies.
- Use project's environment management (uv) for local runs and CI — do not call pip directly.
## Approach (chosen)
I recommend a **diagnostic-first** approach followed by targeted small fixes. Steps:
- Add a small, dedicated diagnostic writer script that emits a structured JSON diagnostics artifact for representative windows from data/motions.db.
- Improve input validation and normalization in load_positions / compute_2d_axes (coerce numeric strings, treat 'nan'/'None' consistently, ignore out-of-range coords) so helpers are robust to malformed rows.
- Keep current gates that avoid plotting when inputs are invalid, but record precise diagnostics into module-level _last_trajectories_diagnostics and the CLI JSON output.
- Add unit tests for the normalization logic and for inspector behaviors; add a small integration diagnostic test that runs via uv and checks trace_count > 0 for a known-good sample window.
Reasoning: we already have instrumentation capturing stages (load_positions_empty, no_mp_positions, select_helper_exception, trace_count). Gathering structured evidence will let us pick a minimal fix (data normalization or filter tweak) without risky behaviour changes.
## Alternatives considered
- Aggressive fallback rendering: render approximated centroids when traces are empty. Rejected because it may mask data quality issues and mislead users.
- Upstream data repair: fix svd pipeline / DB rows before Explorer. Good long-term, but requires cross-team coordination and longer cycle — we should diagnose first.
## Architecture
**High-level:** The Explorer plotting pipeline remains the same; we add a diagnostics writer and a small normalization layer.
- Data source: data/motions.db (svd_vectors and party maps)
- Pipeline: get_uniform_dim_windows -> compute_2d_axes -> load_positions -> inspect_positions_for_issues -> compute_party_centroids -> select_trajectory_plot_data -> Plotly fig
- Diagnostics: module-level _last_trajectories_diagnostics plus a CLI script that runs representative windows and writes JSON artifacts to thoughts/shared/diagnostics/YYYY-MM-DD-trajectories-diagnostics.json
## Components and responsibilities
- Diagnostic CLI (scripts/save_trajectories_diagnostics.py):
- Run a configurable sample of windows, call compute_2d_axes, load_positions, inspect_positions_for_issues, select_trajectory_plot_data.
- Emit structured JSON with per-window diagnostics and aggregated summary.
- Normalization helpers (explorer_helpers.normalize_positions):
- Coerce numeric strings to floats, coerce common null tokens to NaN, clamp improbable values, and return a normalized positions_by_window structure.
- Pure, import-safe, and covered by unit tests.
- Instrumentation (explorer._last_trajectories_diagnostics):
- Record stage, window id, counts (n_windows, n_entities per window), mp_positions_count, any helper exceptions/tracebacks, and sample rows.
- UI changes (pages/2_Explorer.py):
- Add an opt-in debug checkbox that enables detailed diagnostics in the UI when checked (or when EXPLORER_DEBUG_TRAJECTORIES=1).
- Do not change default plotting or filtering behavior when debug is disabled.
- Tests
- Unit tests for normalization and inspector.
- Diagnostic integration test run via uv (non-flaky, uses a small sample or DB fixture).
## Data Flow
1. Caller requests trajectories tab (build_trajectories_tab).
2. call get_uniform_dim_windows(DB) -> returns window descriptors.
3. For each sampled window, compute_2d_axes(window) -> returns raw positions_by_window (possibly malformed).
4. normalize_positions(positions_by_window) -> cleaned positions_by_window.
5. inspect_positions_for_issues(positions_by_window) -> returns diagnostics (missing coords, string values, NaNs, zero-length paths).
6. compute_party_centroids(positions_by_window) -> party centroids and mp_positions.
7. select_trajectory_plot_data(centroids, mp_positions, options) -> returns fig, trace_count, banner_text. On exception capture diagnostics.
8. If trace_count == 0 -> do not call st.plotly_chart; show friendly message and, if debug enabled, show the collected diagnostics and link to the saved JSON artifact.
## Error Handling
- Capture exceptions at helper boundaries and record to select_trajectory_plot_data._last_diagnostics and module _last_trajectories_diagnostics. Do not raise to Streamlit UI unless debug is enabled.
- Normalize inputs proactively to reduce exception surface (avoid type errors from strings/None).
- If a helper raises, return a safe empty fig and banner that suggests enabling diagnostics.
- JSON diagnostics writer writes atomically (write to a .tmp file then rename) to avoid partial files being consumed.
## Testing Strategy
- Unit tests (fast, import-safe):
- normalize_positions handles strings, 'nan', None, and clamps extremes.
- inspect_positions_for_issues detects empty windows, NaNs-only windows, and malformed coordinate types.
- select_trajectory_plot_data returns (fig, trace_count>0) for a known-good small sample and sets diagnostics correctly when trace_count==0.
- Integration tests (run under uv in CI or locally):
- Diagnostic CLI can be executed via uv run and creates a JSON diagnostic artifact for a small sample; test asserts artifact exists and is valid JSON with expected fields.
- Manual verification:
- Run EXPLORER_DEBUG_TRAJECTORIES=1 uv run python scripts/save_trajectories_diagnostics.py --db data/motions.db --out thoughts/shared/diagnostics/<date>.json
- Open the Explorer locally and reproduce an empty-chart scenario; enable debug checkbox and view diagnostics.
## Open Questions
1. Do we prefer automatic normalization (silently fixing data) or conservative behavior (report and require upstream fix)? My recommendation: auto-normalize common, unambiguous issues (strings -> numbers, common null tokens) and surface anything ambiguous in diagnostics.
2. Where should diagnostic artifacts live long-term? thoughts/shared/diagnostics is fine for short-term; consider a single diagnostics/ bucket for CI artifacts.
3. Which windows should the diagnostics CLI sample by default? I propose sampling: 1) first 10 windows, 2) 10 windows evenly spaced, and 3) one window that previously produced empty result if known.
I'm proceeding to create the design doc. Interrupt if you want changes.

@ -1,120 +0,0 @@
---
date: 2026-04-12
topic: "SVD Axis Label Alignment Fix"
status: validated
---
## Problem Statement
The SVD components tab has a **systemic label alignment bug**. The `left_pole` and `right_pole` fields in `SVD_THEMES` are **static labels** that assume a fixed flip direction. But `compute_flip_direction()` computes the flip **dynamically per window** based on party centroids. When the runtime flip differs from the static flip, labels become misaligned with the actual data.
**Specific symptom**: Axis 3 "marktliberaal" label appeared on the LEFT side instead of the RIGHT, because the static labels assumed `flip=True` but the runtime computation could return `flip=False` for certain windows.
## Constraints
- Right-wing parties (PVV, FVD, JA21, SGP) centroid must appear on the RIGHT side of all SVD axes
- SVD labels are derived from voting patterns (outlier MPs + representative motions), not from ideology or party branding
- Individual parties may vary; the **centroid** of right-wing parties should be on the right
- The runtime flip direction is computed per-window by `compute_flip_direction()` in `analysis/svd_labels.py`
## Approach
**Remove `left_pole` and `right_pole` from all SVD_THEMES entries** and simplify the rendering code to always derive labels from `positive_pole`, `negative_pole`, and runtime `flip`.
**Why**: The static labels are 100% redundant with the fallback logic. Every `left_pole`/`right_pole` in the config is identical to what the fallback would produce for the static `flip` value. Removing them eliminates the bug entirely and simplifies the code.
**Alternatives rejected**:
- **Swap labels when flip differs** — More complex, requires tracking "expected flip" vs "runtime flip"
- **Store both flip directions** — Doubles config size, still fragile
## Architecture
### Files to Change
1. **`analysis/config.py`** — Remove `left_pole` and `right_pole` keys from all 10 SVD_THEMES entries (lines 83-84, 100-101, 119-120, 135-136, 153-154, 171-172, 191-192, 210-211, 230-231, 250-251)
2. **`explorer.py` lines 2809-2825** — Remove the `semantic_left`/`semantic_right` branch. The motion detail section should always derive labels from `positive_pole`, `negative_pole`, and `flip`, matching the fallback logic:
```python
# BEFORE (buggy):
semantic_left = theme.get("left_pole") if theme else None
semantic_right = theme.get("right_pole") if theme else None
if semantic_left and semantic_right:
left_pole, right_pole = semantic_left, semantic_right
left_motions, right_motions = (
(pos_motions, neg_motions) if flip else (neg_motions, pos_motions)
)
left_arrow, right_arrow = ("▲", "▼") if flip else ("▼", "▲")
elif flip:
...
# AFTER (fixed):
if flip:
left_pole, right_pole = pos_pole, neg_pole
left_motions, right_motions = pos_motions, neg_motions
left_arrow, right_arrow = "▲", "▼"
else:
left_pole, right_pole = neg_pole, pos_pole
left_motions, right_motions = neg_motions, pos_motions
left_arrow, right_arrow = "▼", "▲"
```
3. **`explorer.py` lines 969-970, 1089-1090, 1256-1257** — These already use `theme.get("left_pole", fallback)` with correct fallback. After removing `left_pole`/`right_pole` from config, they automatically fall back to derived values. No changes needed.
4. **`scripts/validate_svd_themes.py`** — Update validation to remove `left_pole`/`right_pole` checks (lines 204, 281-282)
### Data Flow (After Fix)
```
Runtime: compute_flip_direction(comp, party_scores)
→ Returns True/False based on right-wing vs left-wing centroid means
→ Stored in SVD_THEMES[comp]["flip"]
Rendering:
flip = theme.get("flip", False)
positive_pole = theme.get("positive_pole", "")
negative_pole = theme.get("negative_pole", "")
if flip:
left_label = positive_pole # Positive on left after flip
right_label = negative_pole # Negative on right after flip
else:
left_label = negative_pole # Negative on left (standard)
right_label = positive_pole # Positive on right (standard)
# Labels always match data because both derive from the same flip value
```
## Components
### `analysis/config.py` — SVD_THEMES Configuration
- Remove `left_pole` and `right_pole` from all 10 component entries
- Keep `positive_pole`, `negative_pole`, and `flip` (flip is overridden at runtime)
### `analysis/svd_labels.py` — Label Lookup
- `get_svd_theme()` returns theme dict — after removing `left_pole`/`right_pole`, callers that need display labels must derive them from `positive_pole`/`negative_pole` and `flip`
- `get_svd_label()` returns short label — unaffected
### `explorer.py` — Rendering
- **Motion detail section** (lines ~2809-2825): Remove semantic label branch, always use flip-aware derivation
- **1D chart** (lines ~969, ~1089, ~1257): Already uses `theme.get("left_pole", fallback)` — will automatically fall back
### `scripts/validate_svd_themes.py` — Validation
- Remove `left_pole`/`right_pole` checks
## Error Handling
- If `compute_flip_direction()` fails (insufficient party data), returns `False` (no flip). Labels derive from `negative_pole` (left) and `positive_pole` (right) — correct for default orientation
- If `SVD_THEMES` entry missing for a component, fallback labels use `positive_pole`/`negative_pole` with empty strings, flip-aware derivation still works
- The `except Exception: pass` block in lines 2688-2690 preserves existing static flip values if runtime computation fails
## Testing Strategy
1. **Unit test**: Verify `compute_flip_direction()` returns correct values for all 10 components with known party scores
2. **Visual verification**: Run `uv run streamlit run Home.py`, check SVD Components tab — right-wing parties (PVV, FVD, JA21, SGP) should appear on the RIGHT side of all axes
3. **Regression check**: For the current window where static flip matches runtime flip, labels should be identical to before the fix
4. **Edge case**: Test with a window where runtime flip differs from static flip — labels should still be correct
## Open Questions
None — the fix is straightforward and complete.

@ -1,113 +0,0 @@
---
date: 2026-04-16
topic: "GroenLinks-PvdA Merger Dynamics in SVD Space"
status: validated
---
## Problem Statement
We need concrete, data-driven findings about the GroenLinks-PvdA merger for a blog post. Four questions:
1. How similar were GL and PvdA in SVD space before the merger?
2. How cohesive is the merged party compared to others?
3. When did GL and PvdA converge in SVD space?
4. Which parties shifted the most, and how do GL/PvdA compare?
## Constraints
- Investigation-only: query the database, report findings, no code changes
- Database: `data/motions.db` (DuckDB), SVD vectors in `svd_vectors` table
- Party affiliation: use `mp_votes.party` for historical labels (not `mp_metadata` which only tracks current party)
- SVD vectors are at MP level (`entity_type='mp'`); party centroids must be computed from MP vectors
- Cross-window scale differences require normalization (distances as fraction of avg inter-party distance)
## Approach
**Method**: Compute party centroids from MP-level SVD vectors, grouped by `mp_votes.party` for historical party labels. Normalize all distances as fractions of the average inter-party distance in each window to make cross-window comparisons meaningful.
**Data source**: `svd_vectors` table (MP vectors per window) joined with `mp_votes` (historical party labels per vote, majority party per MP per year).
**Key discovery**: `mp_metadata` only tracks current party — pre-merger GL and PvdA MPs who merged are now labeled "GroenLinks-PvdA". We must use `mp_votes.party` for historical accuracy.
## Architecture
N/A — this is a data investigation, not a system design.
## Components
### Finding 1: GL-PvdA Pre-Merger Similarity
GL and PvdA were already remarkably close in SVD space well before the merger:
| Year | GL↔PvdA Distance | As % of Avg Inter-Party Distance | Nearest Other Party | Distance to Nearest |
|------|------------------|----------------------------------|---------------------|---------------------|
| 2019 | 2.10 | 10.5% | PvdD | 9.6 |
| 2020 | 2.23 | 5.0% | CU | 28.7 |
| 2021 | 1.46 | 4.4% | FVD | 11.6 |
| 2022 | 1.16 | 2.8% | FVD | 6.5 |
The nearest non-PvdA party to GL was always 5-10x further away than PvdA itself. They converged over time — from 10.5% of average inter-party distance in 2019 down to 2.8% in 2022.
### Finding 2: Post-Merger Cohesion
| Year | GL-PvdA Spread | Avg Other Spread | Ratio | Cohesion Rank |
|------|---------------|-------------------|-------|---------------|
| 2023 | 1.50 | 19.95 | 0.08 | #1 most cohesive |
| 2024 | 14.05 | 18.47 | 0.76 | Mid-pack |
| 2025 | 28.09 | 18.09 | 1.55 | Below average |
| Current | 43.30 | 28.05 | 1.54 | Below average |
The merged party started as the most cohesive party in parliament (2023), but by 2025 its internal spread is 55% above average — the merger created a party that's internally more diverse than typical Dutch parties.
### Finding 3: Merger Convergence Timeline
| Window | GL↔PvdA Distance | Normalized Ratio |
|--------|------------------|------------------|
| 2019-Q3 | 0.98 | 25.5% |
| 2020-Q1 | 1.38 | 18.6% |
| 2021-Q1 | 1.58 | 19.3% |
| 2022-Q3 | 0.86 | 9.4% |
| 2023-Q1 | 0.58 | 7.1% |
| **2023-Q3** | **0.37** | **4.5%** |
| 2023-Q4 | 0.46 | 5.5% |
By Q3 2023 — just before the formal merger — GL and PvdA centroids were only 4.5% of the average inter-party distance apart. Essentially indistinguishable in voting pattern space.
### Finding 4: Large Positional Shifts
GL and PvdA were the most stable parties in parliament (normalized drift per year):
| Period | GL Drift | PvdA Drift | VVD Drift | D66 Drift | PVV Drift |
|--------|----------|------------|-----------|-----------|-----------|
| 2019→2020 | 14.5% | 16.6% | 140.2% | 145.6% | 121.9% |
| 2020→2021 | 21.8% | 25.8% | 115.8% | 82.2% | 207.3% |
| 2021→2022 | 11.6% | 10.8% | 70.8% | 91.2% | 51.9% |
| 2022→2023 | 54.5% | 23.3% | 109.7% | 177.3% | 222.1% |
While VVD and D66 moved 70-177% per year, GL and PvdA drifted only 10-25%. The merger partners were anchored in place while the rest of the landscape shifted.
## Data Flow
1. Query `svd_vectors` for MP vectors per window
2. Join with `mp_votes` to determine each MP's majority party in that year
3. Compute party centroids as mean of member vectors
4. Compute pairwise distances and normalize by average inter-party distance
5. Track convergence timeline using quarterly windows
## Error Handling
- Windows with insufficient MPs (<3 per party) are excluded from centroid calculations
- The `mp_votes.party` column uses multiple label variants ("GroenLinks", "GL", "GroenLinks-PvdA") — normalized in queries
- The 2023 transition year has mixed labels (some GL, some PvdA, some GL-PvdA) — handled by majority-vote assignment per MP
## Testing Strategy
N/A — data investigation. Key validation checks:
- Cross-reference MP counts with known parliament compositions
- Verify that GL + PvdA MP counts match expected seat counts per year
- Confirm that convergence timeline aligns with known political events (merger announcement Oct 2023)
## Open Questions
- Should we compute cosine similarity instead of Euclidean distance for cross-window normalization?
- The 2025 and current_parliament windows show very different absolute scales — should we normalize vectors before computing distances?
- The few remaining "GL" (8) and "PvdA" (5) labeled MPs in 2025 may be artifacts — should they be included in the GL-PvdA group?

@ -1,153 +0,0 @@
---
date: 2026-04-16
topic: "political-compass-blog-update"
status: draft
---
## Problem Statement
We need the "political compass" blog post under thoughts/ to show figures and numbers that exactly match the repository's canonical pipeline outputs. That requires producing reproducible assets (scree plots, party-agreement CSVs and heatmaps) from the codebase, placing them in docs/research, and making minimal edits to the blog HTML to reference those files.
**Key constraint:** All numbers and figures must come from the canonical functions or the authoritative DB (data/motions.db). No invented values.
## Constraints
**Non-negotiables:**
- Use canonical functions (analysis.political_axis.compute_svd_spectrum, analysis.explorer_data.load_scree_data) as data sources.
- Place generated files under **docs/research/** with reproducible, deterministic filenames.
- Keep blog edits minimal and reversible: swap the markdown table for an HTML table and insert <img> and CSV links.
**Operational constraints:**
- Plotly SVG export requires kaleido; provide a reliable matplotlib fallback.
- data/motions.db must contain required rows (e.g. singular_values) or we must run compute_svd_spectrum first.
## Approach (chosen)
I'm choosing a single, pragmatic approach that balances reproducibility, low-risk changes, and minimal new dependencies:
**Chosen approach:** write a small export script (scripts/export_blog_assets.py) that:
- Calls **analysis.political_axis.compute_svd_spectrum(db_path)** for the multi-window scree and **analysis.explorer_data.load_scree_data(db_path)** for the current_parliament scree fallback.
- Re-uses the explorer._render_scree_plot logic (or extracts the Plotly-building code into a helper) to build a Plotly Figure and export SVG via **fig.write_image(..., format='svg')** when kaleido is available.
- Falls back to matplotlib-based rendering if fig.write_image fails.
- Computes pairwise party agreement / GL–PvdA trajectory using SQL and the logic from scripts/generate_extra_charts.py, writes CSV with pandas.DataFrame.to_csv(...), and writes a heatmap SVG to docs/research.
- Writes assets with deterministic filenames into **docs/research/** and prints/returns the exact paths and the key numeric values (EVR% for caption).
Why this approach:
- It uses the canonical functions already present in the codebase so numbers match UI and tests.
- Keeps edits limited to a single script and the blog HTML, making review and rollback trivial.
- Provides a clear fallback for environments without kaleido.
Alternatives considered (brief):
1) Modify existing scripts (scripts/generate_extra_charts.py) to write into docs/research.
- Pro: reuses plotting code directly.
- Con: those scripts are opinionated about output layout and write HTML, not SVG/CSV; harder to keep minimal change.
2) Recompute everything via pipeline.run_pipeline and copy pipeline outputs to docs/research.
- Pro: purely canonical pipeline outputs.
- Con: heavier — pipeline run may be slow and more intrusive; more environment setup.
I rejected them because the export-script approach is lighter, reproducible, and gives explicit control over filenames and fallbacks.
## Architecture
High-level: a small command-line script (scripts/export_blog_assets.py) driven by the canonical DB, the analysis layer, and the visualize helpers.
**Major pieces:**
- **Exporter script**: orchestrates reads from DB, computes metrics, builds figures, writes CSV/SVG into docs/research.
- **Canonical analysis functions**: analysis.political_axis and analysis.explorer_data (data source only, no side effects).
- **Plot builders**: reuse of explorer._render_scree_plot / analysis.visualize helpers to produce Plotly Figure objects.
- **Fallback renderer**: minimal matplotlib routines producing PNG/SVG if Plotly image export fails.
- **Blog edit**: minimal HTML changes in thoughts/blog-post-political-compass.html to reference the generated assets.
## Components and Responsibilities
**scripts/export_blog_assets.py** (new)
- Inputs: path to DB (default data/motions.db), optional --window (e.g. 2023Q3 or 'current_parliament'), output directory (default docs/research).
- Responsibilities:
- Run compute_svd_spectrum(db_path) and/or load_scree_data(db_path).
- Build scree Plotly figures and export SVGs (multi-window and current_parliament).
- Compute party agreement matrices, export CSVs and heatmap SVGs for requested window(s).
- Print the EVR numbers and paths for copy into blog captions.
- Exit non-zero on fatal errors (missing DB, empty results) with clear messages.
**Explorer / analysis helpers**
- analysis.political_axis.compute_svd_spectrum(db_path): canonical EVR source for multi-window scree.
- analysis.explorer_data.load_scree_data(db_path): canonical loader for current_parliament scree (fallback).
- explorer._render_scree_plot(importances): returns Plotly figure in Streamlit — reuse the building logic to return a Figure for export.
**Fallback renderer**
- Minimal matplotlib code that takes the EVR vector and draws a bar/scree-like chart and saves as SVG/PNG.
**Blog file edits**
- thoughts/blog-post-political-compass.html: replace markdown pipe table with an HTML table and insert <img src="../docs/research/scree_multiwindow.svg"> and <img src="../docs/research/scree_current_parliament.svg"> plus CSV links.
## Data Flow
1. Exporter reads data from **data/motions.db**.
2. Calls compute_svd_spectrum(db_path) to get multi-window EVR arrays.
3. Calls load_scree_data(db_path) to get 'current_parliament' singular values if available.
4. Builds Plotly Figures for scree plots (multi-window and current_parliament).
5. Exports Figures to **docs/research/*.svg** (uses fig.write_image when kaleido is present, otherwise matplotlib fallback).
6. Computes party agreement matrices via the SQL used in scripts/generate_extra_charts.py, writes CSVs to **docs/research/**.
7. Writes a party-heatmap SVG to **docs/research/**.
8. The blog HTML references those files via relative paths (../docs/research/...).
## Error Handling Strategy
**Fail early with informative messages.**
- If DB is missing or unreadable: exit with a clear error and suggestion to run the pipeline or point --db to a valid file.
- If compute_svd_spectrum returns empty / no windows: print guidance to run scripts/recompute_svd.py or pipeline.run_pipeline and exit non-zero.
- If Plotly image export fails (kaleido missing): log the error, attempt matplotlib fallback, and continue.
- If CSV or SVG write fails due to IO permissions: log path and permission error and exit non-zero (don't silently drop assets).
All non-fatal warnings are printed with suggested remediation steps.
## Testing Strategy
Local verification steps (automated script + manual checks):
- Unit smoke: run scripts/export_blog_assets.py --db data/motions.db --dry-run to verify the functions produce non-empty arrays and print expected output paths.
- Functional: run the script to produce assets and assert files exist: docs/research/scree_multiwindow.svg, docs/research/scree_current_parliament.svg, docs/research/party_agreement_<window>.csv, docs/research/party_agreement_<window>.svg.
- Sanity numbers: script prints the top EVR values used in captions. Cross-check printed EVR against explorer UI numbers (run explorer locally if needed).
- Blog preview: open thoughts/blog-post-political-compass.html in browser (file://) and confirm images render and captions match printed numbers.
Add a basic test under tests/ that runs the exporter against a small fixture DB (or a tmp DB produced from tests/test_political_compass.py fixtures) to assert the script creates at least the CSV and a PNG/SVG.
## Effort Estimate & Schedule
- Draft exporter script and fallback renderer: 2–3 hours.
- Wire up SQL for party agreement and CSV export: 1 hour.
- Run and verify assets locally (including possible compute_svd if DB missing): 30–60 minutes.
- Blog HTML edits and quick preview: 30 minutes.
- Add a minimal test + docs: 1 hour.
Total: ~5–6 hours of focused work (assuming data/motions.db is present and reasonably up-to-date). If compute_svd must be run across many windows or pipeline.run_pipeline is required, add 30–90 minutes.
## Risks & Mitigations
- **Missing singular_values row for current_parliament.** Mitigation: script detects and runs compute_svd_spectrum or instructs operator to run scripts/recompute_svd.py.
- **Kaleido not installed causing fig.write_image to fail.** Mitigation: implement matplotlib fallback and print clear message recommending pip install kaleido.
- **DB schema drift or missing party ids.** Mitigation: script validates expected tables/columns and fails with actionable message.
- **Assets not committed to git.** Mitigation: recommend the maintainer commit the generated files; optionally script can print a git add/commit suggestion but must not auto-commit without user request.
## Open Questions
- Which specific window id(s) do we want for the GL–PvdA CSV/heatmap? (I'll default to 'current_parliament' and allow an explicit --window flag.)
- Should the script auto-commit generated assets to git, or should it stop and ask human to commit? (I recommend manual commit.)
---
I'm proceeding to create the design doc. Interrupt if you want changes.

@ -1,52 +0,0 @@
{
"generated_at": "2026-04-01T00:38:56.921202",
"db_path": "/home/sgeboers/Projects/stemwijzer/data/motions.db",
"db_exists": true,
"scenarios": {
"normal": {
"name": "normal",
"use_real_data": true,
"monkeypatched": {},
"real_data": {
"positions_windows": 12,
"party_map_count": 1036
},
"diagnostics": {},
"select_helper_diagnostics": {}
},
"empty_positions_ARTIFICIAL": {
"name": "empty_positions_ARTIFICIAL",
"use_real_data": false,
"monkeypatched": {
"load_positions": "ARTIFICIAL_EMPTY"
},
"diagnostics": {
"stage": "load_positions_empty",
"positions_by_window_len": 0
},
"select_helper_diagnostics": {}
},
"empty_party_map_ARTIFICIAL": {
"name": "empty_party_map_ARTIFICIAL",
"use_real_data": false,
"monkeypatched": {
"load_party_map": "ARTIFICIAL_EMPTY"
},
"diagnostics": {},
"select_helper_diagnostics": {}
},
"both_empty_ARTIFICIAL": {
"name": "both_empty_ARTIFICIAL",
"use_real_data": false,
"monkeypatched": {
"load_positions": "ARTIFICIAL_EMPTY",
"load_party_map": "ARTIFICIAL_EMPTY"
},
"diagnostics": {
"stage": "load_positions_empty",
"positions_by_window_len": 0
},
"select_helper_diagnostics": {}
}
}
}

@ -1,44 +0,0 @@
Purpose
-------
A small, developer-focused guide for the mindmodel validator used by reviewers and contributors.
What this validator does
-----------------------
- Validates the repository's mindmodel manifest and evidence against project policies.
- Flags common issues for reviewers (secrets, missing evidence, excessively truncated evidence, policy violations).
Where the manifest lives
------------------------
The canonical manifest is stored at:
.mindmodel/manifest.yaml
Reviewer checklist
------------------
When reviewing mindmodel submissions, make a quick pass over the following items:
1. Secrets: Ensure there are no secrets (API keys, tokens, private credentials) included in the manifest or evidence. If you spot secrets, escalate and remove them immediately.
2. Evidence truncation: Verify that evidence files or snippets are not truncated in a way that removes important context. If evidence is truncated for size, confirm the truncated portion is non-essential and that a pointer to full evidence is provided.
3. Read-only policy: Confirm that the mindmodel only documents read-only artifacts. The validator and reviewers must ensure no actions, credentials, or writable endpoints are exposed.
4. Completeness: Check that required fields from the manifest schema are present and that evidence links to real files or reports in the repository.
Running the validator locally
---------------------------
You can run the validator locally with the provided Python script. Example:
python -m scripts.mindmodel.cli .mindmodel/manifest.yaml reports/tmp.json
The CLI prints JSON to stdout and accepts positional arguments: manifest_path [report_path].
Validator code / CLI: scripts/mindmodel/validator.py and scripts/mindmodel/cli.py
Notes
-----
- Keep this document concise and developer-focused. It exists to help reviewers run the validator and spot common problems quickly.
- If you change the manifest schema or validator behavior, update this README to reflect any new checklist items or command-line options.

@ -1,335 +0,0 @@
# Guided Policy Explorer — Implementation Plan
**Goal:** Implement the Guided Policy Explorer MVP that reuses existing motions, layman summaries, embeddings and session votes to provide an Explore landing, Motion detail view, cached related motions (similarity cache), and accompanying background jobs and admin tooling.
Design: thoughts/shared/designs/2026-03-21-motions-guided-explorer-design.md
---
## Dependency Graph
```
Batch 1 (parallel): 1.1, 1.2, 1.3, 1.4, 1.5 [foundation - migrations, types, migration-tests]
Batch 2 (parallel): 2.1, 2.2, 2.3, 2.4 [core - similarity service, cache repo, audit repo, embeddings worker]
Batch 3 (parallel): 3.1, 3.2, 3.3, 3.4 [components - clusterer worker, CLI, API, Streamlit page]
Batch 4 (parallel): 4.1 [integration tests & docs - depends on 2.x & 3.x]
```
---
## Notes on planning choices
- Design requires a similarity cache and a small in-process nearest-neighbor search for MVP. I'm implementing this as: store precomputed top-N neighbor lists (IDs + scores) in a small SQL table and compute neighbors by scanning embeddings in-memory per batch job. Reason: avoids external vector DB and keeps implementation simple and testable.
- Design requires robust embedding generation. I'll implement exponential-backoff retry logic with a configurable retry count and timeouts in embeddings_worker; tests will monkeypatch the ai_provider to simulate failures.
- Migration tests: design asks to have migration tests, but migration SQL content is omitted per instructions. Tests will assert that migration files are present and follow naming conventions and will be marked to skip applying SQL unless a TEST_DB_URL env var is provided. This keeps CI safe while satisfying test coverage and developer verification.
---
## Batch 1: Foundation (parallel - 5 implementers)
All tasks in this batch have NO dependencies and run simultaneously.
### Task 1.1: Add similarity cache migration (placeholder)
**Title:** Migration: add similarity_cache table
**Description:** Add a migration file to create a similarity cache table that stores precomputed related-motion lists per motion (motion_id, neighbors_json, computed_at). SQL content intentionally left out per instructions; file is a placeholder that CI/tests will detect.
**Files:**
- migrations/2026-03-22-add-similarity-cache.sql
**Tests:**
- tests/migrations/test_2026_03_22_add_similarity_cache.py
**Estimated:** 1.0h
**Priority:** high
**Depends:** none
**Acceptance criteria:**
- Migration file exists at migrations/2026-03-22-add-similarity-cache.sql
- test_migration file runs and passes in default mode (it will only check filename & header). If TEST_DB_URL is set in env, test will attempt to run the SQL and must not error (SQL may be empty; test expects a no-op or valid SQL). Test is marked to skip DB application when TEST_DB_URL is unset.
---
### Task 1.2: Add audit/events migration (placeholder)
**Title:** Migration: add audit_events table
**Description:** Add a migration placeholder to create an audit/events table for append-only user events (vote, bookmark, flag). Actual SQL omitted.
**Files:**
- migrations/2026-03-22-add-audit-events.sql
**Tests:**
- tests/migrations/test_2026_03_22_add_audit_events.py
**Estimated:** 1.0h
**Priority:** high
**Depends:** none
**Acceptance criteria:**
- migrations/2026-03-22-add-audit-events.sql exists
- migration test verifies filename and is safe to run in CI (skips DB apply unless TEST_DB_URL provided).
---
### Task 1.3: Shared types for motions & similarity entries
**Title:** Types: motion and similarity types
**Description:** Add a small types module that centralizes typed dataclasses/interfaces used by similarity and cache modules (MotionId, Embedding vector typed alias, SimilarityNeighbor). This reduces coupling and makes tests easier to write.
**Files:**
- src/types/motion_types.py
**Tests:**
- tests/types/test_motion_types.py
**Estimated:** 1.5h
**Priority:** medium
**Depends:** none
**Acceptance criteria:**
- src/types/motion_types.py defines MotionId, Embedding, SimilarityNeighbor types and basic helpers (e.g., serialize/deserialize neighbors). Tests validate JSON round-trip of neighbors.
---
### Task 1.4: CI migration test helper
**Title:** Test helper: migration test utils
**Description:** Add a small test helper that other migration tests can use. It provides a pytest fixture that reads TEST_DB_URL and yields a DB connection or None and marks tests appropriately.
**Files:**
- tests/utils/migration_fixtures.py
**Tests:**
- tests/migrations/test_migration_fixtures_smoke.py
**Estimated:** 1.0h
**Priority:** medium
**Depends:** none
**Acceptance criteria:**
- migration_fixtures.py provides `test_db` fixture. The smoke test asserts fixture yields None when TEST_DB_URL unset and yields a connection-like object when set.
---
### Task 1.5: Add README admin docs for recomputing
**Title:** Docs: admin CLI usage and migration notes
**Description:** Add a short markdown doc describing the admin CLI, migration filenames, and how to run recompute/clusterer jobs locally for dev.
**Files:**
- docs/admin/recompute_similarity.md
**Tests:** none (doc only)
**Estimated:** 0.5h
**Priority:** low
**Depends:** none
**Acceptance criteria:**
- docs/admin/recompute_similarity.md exists and documents commands and env vars: TEST_DB_URL, AI_PROVIDER_MOCK, SIMILARITY_TOP_N.
---
## Batch 2: Core Modules (parallel - 4 implementers)
Depends: Batch 1
### Task 2.1: Similarity service (in-process search + utility)
**Title:** Similarity service implementation
**Description:** New service that, given motion embeddings, computes cosine similarity and returns top-N neighbors. Also exposes a convenience function to compute neighbors for one motion and return a list of (motion_id, score). This is pure Python and testable in-memory.
**Files:**
- src/services/similarity_service.py
**Tests:**
- tests/services/test_similarity_service.py
**Estimated:** 5.0h
**Priority:** high
**Depends:** 1.3
**Acceptance criteria:**
- similarity_service.py exposes compute_neighbors(embedding: list[float], all_embeddings: Dict[motion_id, embedding], top_n: int) -> List[SimilarityNeighbor]
- Unit tests cover exact small matrices and edge cases (empty, identical embeddings). All tests pass with `pytest tests/services/test_similarity_service.py`.
---
### Task 2.2: DB repo for similarity cache
**Title:** Repo: similarity_cache read/write
**Description:** Provide a small repository abstraction that reads and writes cached neighbor lists to the DB (serialize neighbors as JSON). Keep DB interactions minimal and testable using sqlite in-memory.
**Files:**
- src/db/similarity_cache_repo.py
**Tests:**
- tests/db/test_similarity_cache_repo.py
**Estimated:** 4.0h
**Priority:** high
**Depends:** 1.1, 1.3
**Acceptance criteria:**
- similarity_cache_repo provides functions: get_cached_neighbors(motion_id) -> Optional[List[SimilarityNeighbor]] and upsert_cached_neighbors(motion_id, neighbors, computed_at)
- Unit tests run against sqlite in-memory and assert correct serialization/deserialization.
---
### Task 2.3: Audit/events repository
**Title:** Repo: audit_events append-only writer
**Description:** Small repo to append audit events (user_id, session_id, motion_id, event_type, payload JSON, created_at). Provides an append_event function used by UI and session logic.
**Files:**
- src/db/audit_repo.py
**Tests:**
- tests/db/test_audit_repo.py
**Estimated:** 3.0h
**Priority:** medium
**Depends:** 1.2
**Acceptance criteria:**
- append_event writes a row to sqlite in-memory in test and read-back verifies fields and created_at presence. Functions are well typed and handle JSON payloads.
---
### Task 2.4: Embeddings worker helper (retries/backoff)
**Title:** Worker: robust embedding generator
**Description:** Add a worker helper that ensures embeddings exist for a motion. It calls ai_provider.get_embedding with retry/backoff and writes embedding via an abstracted DB function (the put function will be dependency-injected in tests). This module contains no long-running loop — it's a single-run helper function used by the scheduler.
**Files:**
- src/ai/embeddings_worker.py
**Tests:**
- tests/ai/test_embeddings_worker.py
**Estimated:** 4.0h
**Priority:** high
**Depends:** 1.3
**Acceptance criteria:**
- embeddings_worker.explain_and_embed(motion_id, text, put_embedding_fn) calls ai_provider and retries on simulated transient errors. Tests monkeypatch ai_provider to simulate 2 failing attempts then success and verify put_embedding_fn called exactly once with a vector-like object.
---
## Batch 3: Components (parallel - 4 implementers)
Depends: Batch 2
### Task 3.1: Clusterer scheduled job
**Title:** Worker: clusterer job that computes & writes caches
**Description:** Background job module that loads all embeddings, computes top-N neighbors for each motion using similarity_service, and writes cache rows via similarity_cache_repo. Designed to be runnable from CLI. It should respect a MAX runtime parameter (process batch size) for safe operation in dev.
**Files:**
- src/workers/clusterer.py
**Tests:**
- tests/workers/test_clusterer.py
**Estimated:** 6.0h
**Priority:** high
**Depends:** 2.1, 2.2, 2.4
**Acceptance criteria:**
- clusterer.run_batch(batch_size, top_n, load_embeddings_fn, upsert_cache_fn) exists and can be unit-tested by injecting small in-memory embeddings and verifying upsert_cache_fn called with expected neighbor lists.
---
### Task 3.2: Admin CLI: recompute-similarity
**Title:** CLI: recompute similarity & options
**Description:** Small CLI script (click or argparse) to trigger the clusterer job (full-run or limited). CLI accepts --top-n, --batch-size, --dry-run flags. Tests will monkeypatch clusterer.run_batch.
**Files:**
- src/cli/recompute_similarity.py
**Tests:**
- tests/cli/test_recompute_similarity.py
**Estimated:** 2.5h
**Priority:** medium
**Depends:** 3.1
**Acceptance criteria:**
- CLI parses flags and calls clusterer.run_batch with parsed args. tests assert proper arguments passed and dry-run does not call run_batch.
---
### Task 3.3: HTTP API endpoint for compute-on-demand / cached
**Title:** API: similarity endpoint
**Description:** Small Flask/FastAPI/WSGI handler module that returns cached related motions for a motion_id; if cache missing and a query param compute=true, it calls the similarity service to compute neighbors on demand (without persisting) and returns them. Keep the handler framework-agnostic so it can be wired into existing web framework; tests will call the handler function directly.
**Files:**
- src/api/similarity_api.py
**Tests:**
- tests/api/test_similarity_api.py
**Estimated:** 3.5h
**Priority:** medium
**Depends:** 2.1, 2.2
**Acceptance criteria:**
- Handler get_related(motion_id, compute=False, load_embedding_fn, load_all_embeddings_fn, cache_repo) returns cached neighbors when present and computes on demand when compute=True. Tests cover both code paths.
---
### Task 3.4: Streamlit UI: Explore landing & Motion detail module
**Title:** UI: explore page and motion detail component
**Description:** Add a Streamlit helper module providing functions to render the Explore landing and Motion detail sections. Avoid modifying existing app.py in this MVP; instead provide a module that app.py can import. The module will expose pure functions where possible to ease testing; tests will verify behavior by calling functions and mocking DB/AI calls.
**Files:**
- src/ui/explore_page.py
**Tests:**
- tests/ui/test_explore_page.py
**Estimated:** 5.0h
**Priority:** medium
**Depends:** 2.2, 2.3, 2.4
**Acceptance criteria:**
- explore_page.render_explore(session, load_curated_fn, load_cached_neighbors_fn) returns a data structure (not direct Streamlit calls) that app.py can choose to render. Tests assert correct payload for a sample session and that missing embeddings gracefully remove related motions.
---
## Batch 4: Integration & Docs (parallel - 2 implementers)
Depends: Batch 2 & 3
### Task 4.1: Integration test: ingest → summarize → embed → cluster → UI read
**Title:** Integration test for the end-to-end path (mvp)
**Description:** Add an integration pytest that simulates: create 3 synthetic motions, call embeddings_worker (monkeypatched AI provider), run clusterer on the in-memory dataset, and assert similarity cache rows exist and explore_page returns related motions. Use sqlite in-memory and monkeypatch ai_provider to return deterministic vectors.
**Files:**
- tests/integration/test_end_to_end_explore_flow.py
**Tests:**
- (this is the test file)
**Estimated:** 8.0h
**Priority:** high
**Depends:** 1.3, 2.1, 2.2, 2.4, 3.1, 3.4
**Acceptance criteria:**
- Running `pytest tests/integration/test_end_to_end_explore_flow.py` passes locally with no external network calls when AI provider is monkeypatched via monkeypatch fixture. The test asserts that at least one neighbor exists for a motion and the explore_page data includes it.
---
## CI / Test instructions
- Run unit tests: pytest tests/unit (or full suite: pytest)
- Run a single module test: pytest tests/services/test_similarity_service.py::test_compute_neighbors_basic
- Integration tests: pytest tests/integration/test_end_to_end_explore_flow.py
Monkeypatching AI provider in CI/local tests:
- Use the `monkeypatch` pytest fixture to patch `src.ai.ai_provider.get_embedding` and `src.ai.ai_provider.summarize` (if used). Example in tests: monkeypatch.setattr('src.ai.ai_provider.get_embedding', fake_get_embedding)
- CI should set env var AI_PROVIDER_MOCK=1 for additional safety; tests will check this var and use mocks if present.
Temp DB setup for tests:
- Unit tests should use sqlite in-memory ("sqlite:///:memory:") via a `test_db` fixture in tests/utils/migration_fixtures.py.
- Migration tests: If TEST_DB_URL env var is set, the migration tests will attempt to apply SQL to that DB; otherwise they will run in dry-run / skip-apply mode and only validate filename and header.
Example pytest commands:
- pytest -q
- pytest -q tests/services/test_similarity_service.py -k compute_neighbors
Notes for CI pipeline:
- Ensure Python dependencies include pytest, pytest-mock and any DB driver required (sqlite built-in is fine). No external AI keys required — tests must mock AI provider.
---
## 3-Sprint Schedule (2-week sprints)
Sprint 1 (Weeks 1–2) — Milestone 1: MVP foundation + core similarity
- Goals: Add migrations, types, similarity service, similarity cache repo, audit repo, embeddings worker helper
- Tasks: 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4
Sprint 2 (Weeks 3–4) — Milestone 1 continued: background job, CLI, API, UI
- Goals: Implement clusterer job, CLI, similarity API, explore_page UI module; initial integration smoke tests
- Tasks: 3.1, 3.2, 3.3, 3.4, initial lightweight integration test scaffolding
Sprint 3 (Weeks 5–6) — Milestone 2 & 3: hardening, integration tests, docs
- Goals: Full integration tests, migration tests, docs, logging hardening, small UX polish
- Tasks: 4.1, docs improvements from 1.5, logging conversion across modules (follow-up small PRs as needed)
Notes:
- Estimates assume 1 full-stack engineer + 1 reviewer. Sprint 1 is AMA-heavy; reviewer will focus on migrations and core algorithms. Sprint 2 focuses on wiring and UI; reviewer focuses on integration and UX. Sprint 3 finishes tests and polish.
---
## Assumptions
- The repository uses Python 3.10+ and pytest for tests. If different, adjust test fixtures accordingly.
- Existing DB access helpers exist (a simple execute/connection helper). If not, tests use sqlite3 directly and repository code will accept a DB connection/cursor via dependency injection.
- The project already has an ai_provider abstraction at src/ai/ai_provider.py with functions `get_embedding(text) -> list[float]` and `summarize(text) -> str` — tests will monkeypatch these. If the names differ, adapt imports when implementing.
- Streamlit app remains `app.py` and can import src/ui/explore_page.py — I deliberately do not modify app.py in this plan to keep the change set minimal.
- We will store embeddings as arrays in an embeddings table; similarity modules will load them via an injected loader function to keep unit tests pure.
---
## Open Questions / Implementation Clarifications
1. Bookmarks persistence: design left bookmarks as open (session vs. persistent). For MVP we will record bookmark events in the audit_events table (append-only) and treat them as per-session by default. If persistent bookmarks required later, a new table/migration will be added.
2. Which web framework to wire the similarity_api into? The plan keeps handler framework-agnostic; we need guidance whether app uses Flask/FastAPI/Starlette to add the route. Implementer should wire into existing HTTP routing pattern.
3. Embedding storage format: assume float arrays stored as JSON or array type in DB. If project uses a binary blob, adjust serialization in similarity_cache_repo and tests accordingly.
4. Acceptable top-N neighbor size for caches. Default SIMILARITY_TOP_N = 10; CLI and worker accept override. If product wants 50, increase later.
---
## How a single implementer should proceed (step-by-step)
1. Start with Batch 1 tasks 1.1–1.4. Create migrations placeholders and types module. Run migration filename tests.
2. Implement similarity_service (2.1) and its unit tests. This is the critical algorithm that must be rock-solid.
3. Implement similarity_cache_repo (2.2) and audit_repo (2.3) using sqlite in-memory for tests. Run unit tests.
4. Implement embeddings_worker helper (2.4) and add tests that mock ai_provider. Ensure CI will not call real AI.
5. Implement clusterer (3.1) and test with in-memory data by injecting loader/upsert functions.
6. Add admin CLI (3.2) to run clusterer; add small doc (1.5) describing how to run it locally.
7. Implement API handler (3.3) and UI helper (3.4). Tests should mock DB and AI as needed.
8. Finish with integration test (4.1) to stitch the pieces together. Iterate on bug fixes and reviewer feedback.
---
## Acceptance criteria for the feature (MVP)
- Explore landing exists and can present curated motions (using existing curated flag). Data payload returned by explore_page includes motion metadata and layman_explanation.
- Motion detail returns layman_explanation, party-match snapshot (existing), and related motions computed from cached neighbor lists when available.
- Background clusterer job can recompute cached neighbor lists and the CLI can trigger it.
- Tests cover core algorithm (similarity computation), cache repo serialization, embedders (mocked), and at least one end-to-end smoke integration test.
---
If anything in this plan should be narrowed further (for a smaller initial PR) I recommend focusing Sprint 1 + clusterer CLI (Tasks 1.x + 2.x + 3.1 + 3.2) and deferring UI wiring until clusterer and cache are validated.

@ -1,151 +0,0 @@
# Parliamentary Embedding Pipeline (Late Fusion) Implementation Plan
Goal: Implement an MVP late-fusion pipeline that (1) extracts MP-level votes from the existing motions.voting_results JSON, (2) builds aligned SVD representations per time-window, (3) ensures text embeddings coverage, and (4) fuses SVD motion vectors with text embeddings into a fused_embeddings table — all using DuckDB and in-Python compute.
Design reference: thoughts/shared/designs/2026-03-21-parliamentary-embedding-pipeline-design.md
---
## Dependency Graph
```
Batch 1 (parallel): 1.1, 1.2, 1.3, 1.4 [foundation - no deps]
Batch 2 (parallel): 2.1, 2.2, 2.3, 2.4, 2.5 [core - depends on batch 1]
Batch 3 (parallel): 3.1, 3.2 [integration & CI - depends on batch 2]
```
---
## Batch 1: Foundation (parallel)
### Task 1.1: Add scientific dependencies
- File to modify: pyproject.toml
- Test: tests/test_pyproject_deps.py
- Hours: 1.0 | Priority: high | Depends: none
- Acceptance: scipy>=1.11, umap-learn>=0.5, plotly>=5.0 present in pyproject.toml
### Task 1.2: Add migration file placeholders
- Files to create: migrations/2026_03_21__create_mp_votes.sql, migrations/2026_03_21__create_mp_metadata.sql, migrations/2026_03_21__create_svd_vectors.sql, migrations/2026_03_21__create_fused_embeddings.sql
- Test: tests/test_migration_pipeline_tables.py (follows existing pattern in tests/test_migration_embeddings.py)
- Hours: 1.5 | Priority: high | Depends: none
- Acceptance: Migration files exist; test applies them to temp DuckDB and asserts expected tables/columns
### Task 1.3: Extend database.py with new tables + helpers
- File to modify: database.py (_init_database + new helpers)
- Test: tests/test_database_schema_and_helpers.py
- Hours: 3.5 | Priority: highest | Depends: 1.2
- Helpers to add: mp_votes_exists_for_motion, insert_mp_vote, upsert_mp_metadata(mp_name, party, van, tot_en_met, persoon_id), store_svd_vector, store_fused_embedding
- Acceptance: Tables created, helpers tested against temp DuckDB via round-trip insert/select; logging not prints
### Task 1.4: Add test fixtures
- File to create: tests/fixtures/sample_voting_results.json (5–10 motions with mixed party + MP keys)
- Hours: 0.5 | Priority: medium | Depends: none
---
## Batch 2: Core Pipeline (parallel, depends on Batch 1)
### Task 2.1: pipeline/extract_mp_votes.py
- Extract MP rows from voting_results JSON; comma-in-key = MP name, else = party (skip party rows)
- Test: tests/test_extract_mp_votes.py
- Hours: 4.0 | Priority: highest | Depends: 1.3, 1.4
- Acceptance: Idempotent; correct MP rows inserted; party keys ignored; re-run produces no duplicates
### Task 2.2: pipeline/fetch_mp_metadata.py
- Fetch MP party membership and tenure from OData using confirmed endpoints (spike resolved: Persoon + FractieZetelPersoon are available)
- OData query: `/FractieZetelPersoon?$filter=Verwijderd eq false&$expand=Persoon($select=Id,Achternaam,Initialen,Tussenvoegsel),FractieZetel($expand=Fractie($select=NaamNL))`
- Key fields: FractieZetelPersoon.Van (entry_date), FractieZetelPersoon.TotEnMet (exit_date, null=active), Persoon.Achternaam, Persoon.Initialen, Persoon.Tussenvoegsel, Fractie.NaamNL (party name)
- Name normalization: reconstruct ActorNaam format from Persoon fields: `"{Tussenvoegsel} {Achternaam}, {Initialen}".strip()` (must match keys in voting_results JSON, e.g. "Yesilgöz-Zegerius, D.")
- Persoon.Id stored as source_id (GUID) for deduplication
- Stores via MotionDatabase.upsert_mp_metadata; idempotent on re-run
- Test: tests/test_fetch_mp_metadata.py — monkeypatch requests.get with canned FractieZetelPersoon+Persoon response; assert name normalization and DB rows
- Hours: 3.5 | Priority: highest | Depends: 1.3
- Acceptance: mp_metadata rows correct; name normalization tested for tussenvoegsel variants; TotEnMet=null handled correctly; re-run idempotent
### Task 2.3: pipeline/text_pipeline.py
- Ensure every motion has a text embedding; delegates to existing ai_provider.get_embedding
- Text priority: body_text > layman_explanation > description
- Test: tests/test_text_pipeline.py (monkeypatch ai_provider)
- Hours: 3.0 | Priority: high | Depends: 1.3, 1.1
### Task 2.4: pipeline/svd_pipeline.py
- Per-window: build sparse MP×Motion csr_matrix → scipy svds → Procrustes alignment → store svd_vectors
- CRITICAL: enforce k < min(n_mps, n_motions); reduce k dynamically if needed; test this path
- Procrustes: log disparity score; flag HIGH_DISPARITY if overlap < 30%
- Test: tests/test_svd_pipeline.py (synthetic 5×6 matrix, k reduction test, alignment test)
- Hours: 6.0 | Priority: highest | Depends: 1.3
### Task 2.5: pipeline/fusion.py
- For each motion in window: fetch SVD motion vector + text embedding → concatenate → store fused_embeddings
- Skip and log if either vector missing
- Test: tests/test_fusion.py (verify vector length = svd_dims + text_dims)
- Hours: 3.0 | Priority: high | Depends: 2.3, 2.4
---
## Batch 3: Integration & CI (depends on Batch 2)
### Task 3.1: tests/integration/test_pipeline_end_to_end.py
- Apply migrations → seed motions → monkeypatch ai_provider → run extract → SVD → text → fuse
- Assert fused_embeddings rows and vector dimensions
- Hours: 4.0 | Priority: highest | Depends: 2.1, 2.3, 2.4, 2.5
- Use numpy.random.seed(0); dataset ≤50 motions for CI speed
### Task 3.2: tests/conftest.py (fixtures + test helpers)
- Fixtures: temp_duckdb_path, apply_migrations, monkeypatch_ai_provider, mock_odata_client
- Add tests/README.md section on monkeypatching strategy
- Hours: 2.0 | Priority: high | Depends: 1.3
---
## Migration filenames
- migrations/2026_03_21__create_mp_votes.sql — columns: id, motion_id, mp_name, party, vote, date, created_at
- migrations/2026_03_21__create_mp_metadata.sql — columns: mp_name (PK), party, van (entry_date), tot_en_met (exit_date, nullable), persoon_id (GUID source_id)
- migrations/2026_03_21__create_svd_vectors.sql — columns: window_id, entity_type, entity_id, vector, model, created_at
- migrations/2026_03_21__create_fused_embeddings.sql — columns: motion_id, window_id, vector, svd_dims, text_dims, created_at
---
## CI / Test instructions
- Run all tests: pytest -q
- Run unit tests only: pytest -q tests/ --ignore=tests/integration
- Run integration test: pytest -q tests/integration/test_pipeline_end_to_end.py
- Monkeypatch ai_provider.get_embedding with a function returning [0.01]*16 for fast tests
- Monkeypatch OData/API calls via requests-mock or monkeypatch.setattr on TweedeKamerAPIClient methods
- Temp DuckDB: use pytest tmp_path fixture; apply migration SQL files at test setup
- Determinism: numpy.random.seed(0) in all tests calling scipy/numpy
---
## 3-Sprint Schedule (2-week sprints)
Sprint 1 (Weeks 1–2): Tasks 1.1, 1.2, 1.3, 1.4, 2.2
- Deliverables: DB schema extended, migrations present, mp_metadata fetch implemented and tested
Sprint 2 (Weeks 3–4): Tasks 2.1, 2.3, 2.4, 2.5
- Deliverables: All pipeline modules implemented with passing unit tests
Sprint 3 (Weeks 5–6): Tasks 3.1, 3.2
- Deliverables: Integration test passing end-to-end; CI docs written
---
## Key assumptions
1. Vectors stored as JSON (consistent with existing embeddings table)
2. Use existing ai_provider.get_embedding for text embeddings — no new model calls
3. SVD k enforced dynamically (k < min(n_mps, n_motions)); tests cover this path
4. Procrustes rotation matrices NOT persisted in MVP (aligned vectors stored directly)
5. mp_metadata: fetch from OData FractieZetelPersoon endpoint (confirmed available); Van/TotEnMet give tenure windows
6. Default quarterly time windows, but parameterized for Annual validation in Sprint 2
7. All new helpers go into existing database.py MotionDatabase class (not a new module)
8. Analysis/visualization (UMAP, Plotly plots) is a follow-up sprint, NOT included here
## Open questions
1. [RESOLVED] OData FractieZetelPersoon confirmed available with Van/TotEnMet tenure dates; Stemming.ActorFractie gives party for each individual vote; name normalization from Persoon.Achternaam+Initialen+Tussenvoegsel confirmed feasible
2. Should Procrustes rotation matrices be persisted? (MVP: no; revisit after)
3. Time-window granularity: annual first for stability validation, then quarterly?
4. Production k value for SVD: default 50 but must be validated against real data sizes
5. Who runs migrations in production, and how? (Out of scope for MVP)

@ -1,530 +0,0 @@
# Motion Explorer Implementation Plan
**Goal:** Regenerate analyses (compass + similarity cache), add an interactive Streamlit explorer (explorer.py) exposing political compass, party trajectories, motion search and browser, and update the blog post with real counts and vector-dimension facts.
**Design doc:** thoughts/shared/designs/2026-03-22-motion-explorer-design.md
---
## Summary / Architecture
We'll perform three high-level workstreams in dependency order:
1. Analysis rerun: after the running pipeline releases the DB lock, run the minimal pipeline steps to (re)compute fused vectors and then recompute the similarity cache for all quarterly windows 2019-Q1 → 2024-Q4. Also run the static compass generator for verification.
2. explorer.py: single-file Streamlit app placed at project root. It will use the existing analysis.* modules for heavy computations (cached via @st.cache_data) and duckdb read-only connections for all DB reads. Figures are produced with plotly and rendered inline in Streamlit.
3. Blog post update: update thoughts/blog-post-political-compass.md with real DB numbers, updated similarity cache counts and correct fused vector dimensions.
Key implementation decisions (gap-filling):
- Explorer is a single import-safe module: top-level definitions only, no expensive work on import. Running the UI triggers computations.
- Use @st.cache_data for expensive functions: load_positions (compute_2d_axes), load_party_map, load_motions_df.
- All DuckDB access in explorer.py will use duckdb.connect(database=..., read_only=True).
- For similarity lookups we'll query similarity_cache directly via read-only DuckDB rather than calling MotionDatabase (which opens non-read-only connections), to respect the "DB may be running" constraint.
- The UI will filter out motions with title exactly "Verworpen." by default; a sidebar toggle allows showing them.
- Tests: explorer is a UI script so no behavioural TDD possible. We'll add a minimal import/sanity test ensuring the module is import-safe and key functions exist. Blog-post updates are manual but the plan includes a small helper script to compute exact counts to paste into the markdown.
---
## Dependency Graph
```
Batch 1 (parallel): 1.1 [analysis-rerun - single operator task] (depends: none)
Batch 2 (parallel): 2.1, 2.2 [explorer implementation + test] (depends: 1.1 for verification, but code can be implemented earlier)
Batch 3 (serial): 3.1 [blog post update] (depends: 1.1)
```
NOTE: The actual critical dependency is that the DB lock must be released before running the analysis rerun (Batch 1). The explorer code (Batch 2) can be implemented while the pipeline is running — it will only attempt DB reads at runtime and uses read-only connections.
---
## Batch 1: Analysis rerun (operator tasks — no repo files changed)
These are operational steps to run after the pipeline finishes and the DB lock is released. Run from the repository root.
Task 1.1: Regenerate compass outputs and fused vectors
**What:** Run generate_compass.py and run the pipeline to (re)fuse vectors for quarterly windows covering 2019-Q1 → 2024-Q4. We will not re-run expensive fetch/extract/SVD/text steps if they are already up-to-date; only fusion (phase 5) must run so fused_embeddings exists for all windows.
**Commands (run after pipeline finishes and DB unlocked):**
- Verify DB file exists:
.venv/bin/python -c "import os,sys; p='data/motions.db'; print('exists' if os.path.exists(p) else 'MISSING'); sys.exit(0)"
- Run static compass for quick visual check (produces HTML output):
.venv/bin/python scripts/generate_compass.py --db data/motions.db --out outputs --method pca --pca-residual
- Run the pipeline orchestrator so Phase 5 (fusion) runs for quarterly windows 2019-01-01 → 2025-01-01.
We explicitly skip metadata/extract/svd/text since those may already be present; this minimizes rework and avoids mixing read/write connections in the current process.
.venv/bin/python -m pipeline.run_pipeline \
--db-path data/motions.db \
--start-date 2019-01-01 --end-date 2025-01-01 \
--window-size quarterly \
--skip-metadata --skip-extract --skip-svd --skip-text
**Notes:** run_pipeline.py includes a --skip-fusion flag; we MUST NOT pass --skip-fusion here because we want fusion to execute. The script supports exactly the flags shown.
**Verify:**
- After run_pipeline completes, verify fused_embeddings rows exist for expected windows:
.venv/bin/python - <<'PY'
import duckdb
conn = duckdb.connect(database='data/motions.db', read_only=True)
print(conn.execute("SELECT window_id, COUNT(*) FROM fused_embeddings GROUP BY window_id ORDER BY window_id DESC").fetchall())
conn.close()
PY
Task 1.2: Recompute similarity cache for all quarterly windows 2019-Q1 → 2024-Q4
**What:** Compute top-20 similarities per motion per window for the fused vectors and insert rows into similarity_cache. We will run similarity.compute.compute_similarities per window. The repository's similarity/compute.py exposes compute_similarities(vector_type='fused', window_id=..., top_k=20).
**Command (one-liner loop):**
.venv/bin/python - <<'PY'
from similarity.compute import compute_similarities
windows = []
years = range(2019, 2025) # 2019..2024
for y in years:
for q in (1,2,3,4):
windows.append(f"{y}-Q{q}")
total = 0
for wid in windows:
inserted = compute_similarities(vector_type='fused', window_id=wid, top_k=20, db_path='data/motions.db')
print(f"window={wid} inserted={inserted}")
total += inserted
print('DONE total_inserted=', total)
PY
**Notes & decisions:**
- The compute_similarities function already clears existing rows for (vector_type, window_id) before inserting new ones, so this is safe to re-run.
- If compute_similarities raises memory pressure for large windows, run on subsets (split windows further) — but try the simple loop first.
**Verify:**
- Basic counts per window:
.venv/bin/python - <<'PY'
import duckdb
conn = duckdb.connect(database='data/motions.db', read_only=True)
print(conn.execute("SELECT window_id, COUNT(*) FROM similarity_cache WHERE vector_type = 'fused' GROUP BY window_id ORDER BY window_id").fetchall())
print('total', conn.execute("SELECT COUNT(*) FROM similarity_cache WHERE vector_type = 'fused'").fetchone())
conn.close()
PY
- Spot-check top neighbors for a known motion id (replace 123 with a real id observed from motions table):
.venv/bin/python - <<'PY'
import duckdb
conn = duckdb.connect(database='data/motions.db', read_only=True)
print(conn.execute("SELECT id FROM motions ORDER BY id LIMIT 1").fetchall())
src = conn.execute("SELECT id FROM motions ORDER BY id LIMIT 1").fetchone()[0]
print('example source id=', src)
print(conn.execute("SELECT target_motion_id, score FROM similarity_cache WHERE source_motion_id = ? AND vector_type = 'fused' ORDER BY score DESC LIMIT 10", (src,)).fetchall())
conn.close()
PY
---
## Batch 2: Explorer implementation (code + test) — parallel implementers
All tasks in this batch are independent and can be worked on in parallel. The single file to add is explorer.py at the project root. A small unit test ensures import-safety.
Decision: explorer.py will be placed at project root (same level as app.py) as requested by design. It will avoid performing DB work at import time so tests and other scripts can import it safely.
### Task 2.1: explorer.py
**File:** explorer.py
**Test:** tests/test_explorer_import.py
**Depends:** none (safe to implement while pipeline runs)
Implementation (copy-paste-ready). This is a minimal, well-documented, and import-safe Streamlit app that follows the design requirements. It uses @st.cache_data on heavy functions, opens DuckDB with read_only=True for all reads, and uses existing analysis modules for computing 2D axes.
```python
# explorer.py
"""Streamlit motion explorer.
Import-safe: heavy computations are behind functions guarded by @st.cache_data
and only run when the user opens the app (streamlit run explorer.py).
"""
from __future__ import annotations
import logging
from typing import Dict, List, Optional, Tuple
import duckdb
import pandas as pd
import plotly.express as px
import streamlit as st
# keep a module-level logger
logger = logging.getLogger(__name__)
# ---------- Cached data loaders ----------
@st.cache_data
def load_positions(db_path: str = "data/motions.db", window_size: str = "annual") -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Optional[Dict]]:
"""Load positions_by_window and axis_def using existing analysis.political_axis.compute_2d_axes.
This delegates heavy computation to the analysis module and caches the result in Streamlit.
The function intentionally accepts db_path so callers (tests) can pass a different path.
"""
try:
from analysis.political_axis import compute_2d_axes
except Exception as e:
logger.exception("analysis.political_axis not available: %s", e)
return {}, None
# compute_2d_axes may be expensive; we let the analysis module handle internals
positions_by_window, axis_def = compute_2d_axes(
db_path, method="pca", pca_residual=True, normalize_vectors=True
)
return positions_by_window, axis_def
@st.cache_data
def load_party_map(db_path: str = "data/motions.db") -> Dict[str, str]:
"""Return mp_name -> party mapping.
Uses the helper in analysis.visualize which already knows heuristics.
"""
try:
from analysis.visualize import _load_party_map
return _load_party_map(db_path)
except Exception:
logger.exception("Failed to load party map")
return {}
@st.cache_data
def load_motions_df(db_path: str = "data/motions.db") -> pd.DataFrame:
"""Load motions table into a cached pandas DataFrame (read-only connection).
Columns returned: id, title, description, date, policy_area, voting_results, layman_explanation, winning_margin, controversy_score
"""
conn = None
try:
conn = duckdb.connect(database=db_path, read_only=True)
df = conn.execute(
"SELECT id, title, description, date, policy_area, voting_results, layman_explanation, winning_margin, controversy_score FROM motions"
).fetchdf()
return df
finally:
if conn is not None:
try:
conn.close()
except Exception:
pass
def query_similar_from_cache(db_path: str, source_motion_id: int, vector_type: str = "fused", window_id: Optional[str] = None, top_k: int = 10) -> List[Dict]:
"""Query similarity_cache table using a read-only connection.
Returns list of dicts with keys target_motion_id, score, id.
"""
conn = None
try:
conn = duckdb.connect(database=db_path, read_only=True)
params = [source_motion_id, vector_type]
query = "SELECT target_motion_id, score, id, window_id FROM similarity_cache WHERE source_motion_id = ? AND vector_type = ?"
if window_id is not None:
query += " AND window_id = ?"
params.append(window_id)
query += " ORDER BY score DESC LIMIT ?"
params.append(top_k)
rows = conn.execute(query, params).fetchall()
cols = [c[0] for c in conn.description]
return [dict(zip(cols, r)) for r in rows]
finally:
if conn is not None:
try:
conn.close()
except Exception:
pass
# ---------- UI builders ----------
def build_compass_tab(db_path: str, window_size: str, show_rejected: bool):
positions_by_window, axis_def = load_positions(db_path, window_size)
party_map = load_party_map(db_path)
if not positions_by_window:
st.error("No position data available. Run the pipeline or check data/motions.db")
return
windows = sorted(positions_by_window.keys())
# default: latest window
default_index = max(0, len(windows) - 1)
idx = st.slider("Window", 0, len(windows) - 1, default_index)
window_id = windows[idx]
pos = positions_by_window.get(window_id, {})
names = list(pos.keys())
xs = [p[0] for p in pos.values()]
ys = [p[1] for p in pos.values()]
parties = [party_map.get(n, "Unknown") for n in names]
fig = px.scatter(x=xs, y=ys, color=parties, hover_name=names, title=f"Political Compass ({window_id})")
st.plotly_chart(fig, use_container_width=True)
def build_trajectories_tab(db_path: str, window_size: str):
positions_by_window, _ = load_positions(db_path, window_size)
if not positions_by_window:
st.error("No trajectories available")
return
window_ids = sorted(positions_by_window.keys())
# Build per-party centroids per window
import numpy as _np
party_map = load_party_map(db_path)
# user control
show_mps = st.checkbox("Show MPs (individual trajectories)", value=False)
selected_parties = st.multiselect("Parties (select to restrict)", options=sorted(set(party_map.values())), default=None)
fig = None
if show_mps:
# plot a small subset by default to avoid clutter
mp_limit = 200
traces = []
# build mp_coords
mp_coords = {}
for wid in window_ids:
for mp, coord in positions_by_window.get(wid, {}).items():
mp_coords.setdefault(mp, []).append((wid, coord))
# optionally filter by party map
mps = [m for m in mp_coords.keys() if (not selected_parties) or (party_map.get(m) in selected_parties)]
mps = sorted(mps)[:mp_limit]
fig = px.line()
for mp in mps:
items = sorted(mp_coords[mp], key=lambda it: window_ids.index(it[0]))
xs = [c[1][0] for c in items]
ys = [c[1][1] for c in items]
fig.add_scatter(x=xs, y=ys, mode='lines+markers', name=mp)
else:
# party centroids
party_centroids = {}
for wid in window_ids:
coords_by_party = {}
for mp, coord in positions_by_window.get(wid, {}).items():
party = party_map.get(mp)
if party is None:
continue
coords_by_party.setdefault(party, []).append(coord)
for party, coords in coords_by_party.items():
xs = [c[0] for c in coords]
ys = [c[1] for c in coords]
centroid = (_np.mean(xs), _np.mean(ys))
party_centroids.setdefault(party, {'windows': [], 'coords': []})
party_centroids[party]['windows'].append(wid)
party_centroids[party]['coords'].append(centroid)
fig = px.line()
for party, data in party_centroids.items():
if selected_parties and party not in selected_parties:
continue
xs = [c[0] for c in data['coords']]
ys = [c[1] for c in data['coords']]
fig.add_scatter(x=xs, y=ys, mode='lines+markers', name=party)
if fig is not None:
st.plotly_chart(fig, use_container_width=True)
def build_search_tab(db_path: str, show_rejected: bool):
df = load_motions_df(db_path)
if df is None or df.empty:
st.info("No motions table available")
return
# filters
years = sorted(pd.to_datetime(df['date']).dt.year.dropna().unique().tolist())
if years:
start_year, end_year = min(years), max(years)
else:
start_year, end_year = 2019, 2024
year_range = st.slider("Year range", int(start_year), int(end_year), (int(start_year), int(end_year)))
policy_areas = sorted(df['policy_area'].dropna().unique().tolist())
policy_filter = st.multiselect("Policy areas", options=policy_areas, default=None)
query = st.text_input("Search text (title / layman_explanation)")
# in-memory filter
working = df.copy()
# filter rejected default
if not show_rejected:
working = working[working['title'].str.strip() != 'Verworpen.']
working['y'] = pd.to_datetime(working['date']).dt.year
working = working[(working['y'] >= year_range[0]) & (working['y'] <= year_range[1])]
if policy_filter:
working = working[working['policy_area'].isin(policy_filter)]
if query:
q = query.lower()
mask = working['title'].fillna('').str.lower().str.contains(q) | working['layman_explanation'].fillna('').str.lower().str.contains(q)
working = working[mask]
st.write(f"{len(working)} results")
for _, row in working.sort_values(by='controversy_score', ascending=False).head(50).iterrows():
with st.expander(f"{row['title']} — {row['date']}"):
st.write(row.get('layman_explanation') or row.get('description') or '')
st.write('Policy area:', row.get('policy_area'))
st.write('Controversy score:', row.get('controversy_score'))
# similar
similar = query_similar_from_cache(db_path, int(row['id']), vector_type='fused', top_k=10)
if similar:
st.write('Vergelijkbare moties:')
for s in similar:
st.write(f"- id={s['target_motion_id']} score={s['score']:.3f} window={s.get('window_id')}")
else:
st.info('Nog geen vergelijkbare moties beschikbaar')
def build_browser_tab(db_path: str, show_rejected: bool):
df = load_motions_df(db_path)
if df is None or df.empty:
st.info("No motions table available")
return
if not show_rejected:
df = df[df['title'].str.strip() != 'Verworpen.']
df_display = df[['id', 'title', 'date', 'policy_area', 'controversy_score', 'winning_margin']].copy()
df_display = df_display.sort_values(by=['date'], ascending=False)
sel = st.experimental_data_editor(df_display, num_rows='dynamic')
# store selected id via session_state: user clicks a row and then presses a button
st.write('Select a row and click "Show details"')
sel_row_idx = st.number_input('Select row index (0-based)', min_value=0, max_value=max(0, len(df_display)-1), value=0)
if st.button('Show details'):
row = df_display.iloc[int(sel_row_idx)]
st.subheader(row['title'])
st.write(df.loc[df['id'] == row['id']].iloc[0].get('description') or '')
similar = query_similar_from_cache(db_path, int(row['id']), vector_type='fused', top_k=10)
if similar:
st.write('Top similar:')
for s in similar:
st.write(f"- id={s['target_motion_id']} score={s['score']:.3f} window={s.get('window_id')}")
else:
st.info('Nog geen vergelijkbare moties beschikbaar')
def run_app():
st.set_page_config(layout='wide', page_title='Parlement Explorer')
st.sidebar.title('Explorer settings')
db_path = st.sidebar.text_input('DuckDB path', value='data/motions.db')
window_granularity = st.sidebar.selectbox('Window granularity', ['annual', 'quarterly'], index=0)
show_rejected = st.sidebar.checkbox('Toon verworpen', value=False)
tabs = st.tabs(['Politiek Kompas', 'Partij Trajectories', 'Motie Zoeken', 'Motie Browser'])
with tabs[0]:
build_compass_tab(db_path, window_granularity, show_rejected)
with tabs[1]:
build_trajectories_tab(db_path, window_granularity)
with tabs[2]:
build_search_tab(db_path, show_rejected)
with tabs[3]:
build_browser_tab(db_path, show_rejected)
if __name__ == '__main__':
run_app()
```
**Verify (local/dev):**
- Run the app once the DB is available: streamlit run explorer.py
- Verify that Tab 1 loads and you can slide windows, plot renders inline
- Verify Tab 3 search returns results and shows similar motions
- Verify all long-running operations are cached (first call slow, subsequent fast)
### Task 2.2: Test for explorer import-safety
**File:** tests/test_explorer_import.py
**Depends:** none
Minimal pytest to ensure the module can be imported without triggering heavy work and that run_app and key functions exist.
```python
# tests/test_explorer_import.py
import importlib
def test_explorer_importable():
mod = importlib.import_module('explorer')
assert hasattr(mod, 'run_app')
assert callable(mod.run_app)
# key helpers
assert hasattr(mod, 'load_positions')
assert hasattr(mod, 'load_motions_df')
```
**Verify:**
- Run tests (no DB required for import test):
.venv/bin/python -m pytest tests/test_explorer_import.py -q
---
## Batch 3: Blog post update (manual / single-file edit)
The blog post at thoughts/blog-post-political-compass.md contains placeholder numbers for motion counts, similarity cache totals and fused vector dimension claim. After analysis rerun completes, update the markdown with exact numbers.
### Task 3.1: Update blog post with real numbers
**File to modify:** thoughts/blog-post-political-compass.md
**Depends:** 1.1, 1.2 (analysis rerun and similarity cache recompute must finish first)
Steps to compute authoritative numbers (run after Batch 1 completes):
1. Motion counts per year (SQL):
.venv/bin/python - <<'PY'
import duckdb
conn = duckdb.connect(database='data/motions.db', read_only=True)
rows = conn.execute("SELECT EXTRACT(year FROM date) AS y, COUNT(*) FROM motions GROUP BY y ORDER BY y").fetchall()
print(rows)
conn.close()
PY
2. Similarity cache total count (fused vectors):
.venv/bin/python - <<'PY'
import duckdb
conn = duckdb.connect(database='data/motions.db', read_only=True)
total = conn.execute("SELECT COUNT(*) FROM similarity_cache WHERE vector_type = 'fused'").fetchone()[0]
print('similarity_cache_fused_total=', total)
conn.close()
PY
3. Verify fused vector dimensions claim (inspect fused_embeddings.vector JSON lengths) — the fused field is stored as JSON array; compute distinct lengths:
.venv/bin/python - <<'PY'
import duckdb, json
conn = duckdb.connect(database='data/motions.db', read_only=True)
lens = conn.execute("SELECT DISTINCT CARDINALITY(vector) FROM fused_embeddings ORDER BY 1 DESC").fetchall()
print('distinct_fused_lengths=', lens)
conn.close()
PY
Replace the placeholder table and counts in thoughts/blog-post-political-compass.md with the outputs above. Also correct the fused dimensions claim (line that currently reads "fused = [svd_dims (10)] + [text_dims (2560)] = 2570") by pasting the real dimensions found.
Verification: After editing, spell-check and run a quick search to ensure the old placeholder numbers are gone:
grep -n "212,206\|2570\|~450 (newly backfilled)" -n thoughts/blog-post-political-compass.md || echo "No placeholders remain"
Commit message suggestions (to use when committing these changes):
- feat(explorer): add initial Streamlit explorer (explorer.py) + import test
- chore(analysis): recompute fused embeddings + similarity cache for 2019-Q1..2024-Q4 (instructions)
- docs(blog): update political compass blog post with real counts and vector dims
---
## Rollout / verification checklist (final acceptance)
- [ ] Analysis rerun finished without errors; fused_embeddings rows present for 2019-Q1..2024-Q4
- [ ] similarity_cache contains top-k neighbors for each window (spot-check 3 windows)
- [ ] explorer.py runs: streamlit run explorer.py renders tabs and figures inline
- [ ] explorer uses read-only DuckDB connections (manual code review + spot-check)
- [ ] thoughts/blog-post-political-compass.md updated with real numbers and vector dims
- [ ] All tests still pass: .venv/bin/python -m pytest -q
---
## Appendix: reasoning & decisions
- Design requires read-only DB access: MotionDatabase methods often open connections without read_only flag. To guarantee read-only behaviour while the pipeline runs, explorer.py queries DuckDB directly with read_only=True for all SELECTs. This avoids accidentally holding write locks.
- The design required using existing analysis.* modules. compute_2d_axes is used as-is and wrapped by @st.cache_data; we rely on it to perform heavy PCA/SVD logic.
- The similarity recompute step uses similarity.compute.compute_similarities per-window. The design referenced recompute_all_windows which did not exist in the repo; we use a small loop (shown above) to call compute_similarities per window.
*** End Plan

@ -1,286 +0,0 @@
# StemAtlas Deployment — Implementation Plan
**Design:** `thoughts/shared/designs/2026-03-22-stematlas-deployment-design.md`
**Date:** 2026-03-22
---
## Overview
Four independent batches. Batches A and B can run in parallel. Batch C requires the pipeline to finish first. Batch D is VPS infrastructure (manual steps, done once).
```
Batch A: stemwijzer repo — Streamlit multi-page + Docker
Batch B: sgeboers.nl repo — blog/, nav, blog post HTML skeleton
Batch C: Charts — generate + embed (after pipeline finishes)
Batch D: VPS infrastructure — Nginx vhost + Certbot + /srv/stematlas/
```
---
## Batch A — stemwijzer repo: Streamlit multi-page + Docker
### A1. Check Dockerfile
Read existing `Dockerfile` — verify it installs all deps from `pyproject.toml` and sets `CMD` to start the app. Note current entrypoint (probably `streamlit run app.py`).
### A2. Create `Home.py`
New file at project root. Streamlit landing/about page:
- Title: "StemAtlas"
- Brief description of the two pages (quiz + explorer)
- Links (Streamlit sidebar nav handles the rest automatically)
- `st.page_link()` cards pointing to the two pages
### A3. Create `pages/1_Stemwijzer.py`
Thin wrapper that imports and calls `app.main()`:
- Import `from app import main`
- Remove the `if __name__ == "__main__": main()` guard from `app.py` (or keep it — Streamlit ignores it when the file is imported)
- The page title shown in Streamlit nav comes from the filename: `1_Stemwijzer` → "Stemwijzer"
### A4. Create `pages/2_Explorer.py`
Same pattern:
- Import `from explorer import run_app`
- Call `run_app()`
- Filename → nav label: "Explorer"
### A5. Update Dockerfile CMD
Change entrypoint from `streamlit run app.py` to `streamlit run Home.py --server.port 8501 --server.address 0.0.0.0`.
### A6. Create `docker-compose.yml`
Two services in the stemwijzer repo:
```yaml
version: "3.9"
services:
stematlas:
image: ${DOCKER_REGISTRY}/sgeboers/stemwijzer:latest
ports:
- "127.0.0.1:8501:8501"
volumes:
- /srv/stematlas/data:/app/data
restart: unless-stopped
environment:
- DB_PATH=/app/data/motions.db
scheduler:
image: ${DOCKER_REGISTRY}/sgeboers/stemwijzer:latest
command: python scheduler.py
volumes:
- /srv/stematlas/data:/app/data
restart: unless-stopped
environment:
- DB_PATH=/app/data/motions.db
```
`127.0.0.1:8501` — only accessible from localhost, Nginx proxies externally.
### A7. Smoke test for `Home.py`
Add `tests/test_home_import.py` — same pattern as `test_explorer_import.py`. Verify `Home` module is importable, `run_app` or equivalent callable exists.
### A8. Run tests
`.venv/bin/python -m pytest -q` — all existing + new smoke tests must pass.
### Verification
`docker build -t stematlas-local .` locally to confirm image builds without errors.
---
## Batch B — sgeboers.nl repo: blog/ + nav
> This batch requires access to the sgeboers.nl repo on git.sgeboers.nl.
> Steps below assume the repo is cloned locally.
### B1. Inspect existing site structure
Read `index.html` and any existing CSS files to understand:
- Current nav structure (header? sidebar? footer?)
- CSS class conventions for links/sections
- Any existing page patterns to copy for the blog post
### B2. Create `blog/` directory
Add `blog/index.html` — a minimal blog listing page:
- Title: "Blog"
- One entry: "StemAtlas — Mapping Dutch Democracy" → `blog/stematlas.html`
- Matches existing site style
### B3. Add nav link to main site
Update `index.html` (or whichever file contains the nav) to add a "Blog" link pointing to `/blog/`.
### B4. Create `blog/stematlas.html` skeleton
Full blog post HTML based on `thoughts/blog-post-political-compass.md`:
- Convert markdown to HTML (headings, paragraphs, code blocks, tables)
- Add Plotly CDN `<script>` in `<head>`
- **Chart placeholders**: `<!-- CHART: compass_latest -->`, `<!-- CHART: trajectories -->` — to be filled in Batch C
- Add two CTAs linking to `stematlas.sgeboers.nl`:
- After compass chart: *"Explore every window interactively →"*
- At bottom: *"Try the Stemwijzer quiz →"*
- Match existing site CSS (link the same stylesheet)
### B5. Update Drone pipeline (sgeboers.nl repo)
Confirm the existing `.drone.yml` in sgeboers.nl picks up new files under `blog/` automatically (it should, if it deploys the whole repo root). No changes needed if it's already a `rsync` or `cp -r` deploy.
### Verification
Open `blog/stematlas.html` locally in browser — post renders correctly with placeholder chart divs, nav works.
---
## Batch C — Charts: generate + embed (after pipeline finishes ~21:40)
> Requires `data/motions.db` to be unlocked (pipeline complete).
### C1. Run tests
`.venv/bin/python -m pytest -q` — confirm all pass now that DB is free.
### C2. Run similarity cache recompute
```
.venv/bin/python -m pipeline.run_pipeline \
--db-path data/motions.db \
--start-date 2019-01-01 --end-date 2025-01-01 \
--window-size quarterly \
--skip-metadata --skip-extract --skip-svd --skip-text
```
Fusion only — fills `fused_embeddings` for new 2019–2021 and 2024 windows.
### C3. Recompute similarity cache
```
.venv/bin/python -c "
from similarity.compute import compute_similarities
import duckdb
conn = duckdb.connect('data/motions.db', read_only=True)
windows = [r[0] for r in conn.execute(\"SELECT DISTINCT window_id FROM fused_embeddings ORDER BY 1\").fetchall()]
conn.close()
for w in windows:
print(f'Computing {w}...')
compute_similarities('data/motions.db', w, top_k=20)
"
```
### C4. Generate compass HTML files
```
.venv/bin/python scripts/generate_compass.py \
--db data/motions.db \
--out outputs/blog-charts \
--method pca --pca-residual
```
This produces `outputs/blog-charts/compass_*.html` and `outputs/blog-charts/trajectories_*.html`.
### C5. Extract Plotly snippets
For each chart file, extract the embeddable snippet:
```python
# Run once per chart to get embeddable HTML
import plotly.io as pio
# OR: just strip everything outside <div id="..."> and its <script>
# The generate_compass.py output is self-contained — use BeautifulSoup or
# manual extraction to get just the div+script block
```
Simpler: modify `generate_compass.py` to add a `--partial` flag that calls `fig.to_html(include_plotlyjs=False, full_html=False)` and writes `.partial.html` files alongside the full ones.
### C6. Fill chart placeholders in blog post
Replace `<!-- CHART: compass_latest -->` and `<!-- CHART: trajectories -->` in `blog/stematlas.html` with the extracted Plotly div+script blocks.
### C7. Update motion count table in blog post
Run SQL to get authoritative counts:
```sql
SELECT strftime(date, '%Y') AS year, COUNT(*) AS motions
FROM motions
GROUP BY year ORDER BY year;
```
Replace placeholder numbers in `blog/stematlas.html` table.
### C8. Push sgeboers.nl repo
Commit and push `blog/stematlas.html` + `blog/index.html` + nav changes to git.sgeboers.nl → Drone deploys.
---
## Batch D — VPS infrastructure (manual, one-time)
> SSH into the VPS. Steps are sequential.
### D1. Create data directory
```bash
sudo mkdir -p /srv/stematlas/data
sudo chown $USER:$USER /srv/stematlas/data
```
### D2. Copy `motions.db` to VPS
From local machine:
```bash
rsync -avz --progress data/motions.db user@vps:/srv/stematlas/data/motions.db
```
~3.6GB transfer — takes a few minutes.
### D3. Add Nginx vhost
New file `/etc/nginx/sites-available/stematlas`:
```nginx
server {
listen 80;
server_name stematlas.sgeboers.nl;
return 301 https://$host$request_uri;
}
server {
listen 443 ssl;
server_name stematlas.sgeboers.nl;
# Let's Encrypt certs (Certbot fills these in)
ssl_certificate /etc/letsencrypt/live/stematlas.sgeboers.nl/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/stematlas.sgeboers.nl/privkey.pem;
location / {
proxy_pass http://127.0.0.1:8501;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_read_timeout 86400;
}
}
```
Enable: `sudo ln -s /etc/nginx/sites-available/stematlas /etc/nginx/sites-enabled/`
### D4. Get Let's Encrypt cert
```bash
sudo certbot --nginx -d stematlas.sgeboers.nl
```
(Assumes Certbot is already installed and working for other subdomains on this VPS.)
### D5. First deploy
The Drone pipeline for the stemwijzer repo will handle future deploys. For the first deploy, either:
- Push a commit to trigger Drone, OR
- Manually on VPS: `cd /srv/stematlas && docker-compose pull && docker-compose up -d`
### D6. Verify
- `https://stematlas.sgeboers.nl` → Streamlit loads, shows Home.py
- Both pages accessible from Streamlit nav
- `docker-compose logs stematlas` — no errors
---
## Dependencies Between Batches
```
A (stemwijzer repo) ──► D5 (first deploy) ──► D6 (verify)
B (sgeboers.nl repo) ──► C8 (push blog)
C (charts) ──► C8 (push blog)
D1-D4 (VPS infra) ──► D5 (first deploy)
Pipeline finish (~21:40) ──► C1 (tests) ──► C2-C7 (charts)
```
Batches A and B are fully independent — can start now.
Batch C waits only for the pipeline to finish.
Batch D is VPS-side and independent of code changes.
---
## Estimated Effort
| Batch | Tasks | Est. Time |
|-------|-------|-----------|
| A | Multi-page Streamlit + docker-compose | 45 min |
| B | Blog HTML + nav (after inspecting site) | 60 min |
| C | Charts + embed (after pipeline) | 30 min |
| D | VPS infra (manual SSH) | 30 min |
| **Total** | | **~2.5 hours** |

@ -1,314 +0,0 @@
# motion content enrichment — implementation plan
Goal: Implement the prioritized incremental hardening from the design (2026-03-23) so the SyncFeed → embedding → fusion → similarity pipeline is more robust, observable, and testable. Break the work into small, independent micro-tasks (one file + its test per task) so many implementers can work in parallel.
Design doc: thoughts/shared/designs/2026-03-23-motion-content-enrichment-next-steps-design.md
Architecture summary (what I'll implement)
- Add a small audit API on MotionDatabase so code can record per-item failures in a stable place (or fall back to a ledger file if DuckDB is not present).
- Add a dedicated ai_provider retry/fallback wrapper that:
- retries failed batches (exponential backoff),
- on persistent failure retries missing items with smaller batch sizes,
- returns aligned embedding results (None for failed items),
- records persistent failures to audit_events (using MotionDatabase.append_audit_event).
- Wire text embedding pipeline to use the wrapper and return failed ids (so rerun script can retry them).
- Add a `--max-body-workers` CLI option to scripts/sync_motion_content.py, reduce default to 10 and add per-request retries.
- Add `--retry-missing` to scripts/rerun_embeddings.py: rerun missing failed items with smaller batches.
- Add a DB-side safety filter in similarity.compute to avoid inserting trivial 1.0 matches for very-short identical titles.
- Add a small QA script scripts/qa_similarity.py that samples windows/motions and writes a short JSON ledger for manual review.
- Add focused unit tests for the new behaviours (ai retry wrapper, DB audit append, sync body fetch retries, rerun retry mode, similarity filter, QA script).
Decisions / gap filling (why these concrete choices)
- Audit recording: implement MotionDatabase.append_audit_event that writes to audit_events table if present, else appends to thoughts/ledgers/audit_events.json. Rationale: migration SQL is a commented placeholder; making DB write optional keeps tests and CI safe; writing to ledgers is actionable and durable for triage.
- ai retry backoff params: default retries=3, initial_backoff=0.5s, jitter ±10%, fallback smaller_batch_size = max(1, batch_size // 2). Rationale: conservative defaults that map to design and are implementable/testable.
- fetch_body_text retries: 3 attempts per ext_id with small exponential backoff (0.5s). Use requests.adapters.HTTPAdapter(pool_connections=10, pool_maxsize=10) to limit pool size and avoid pool warnings. Default max workers lowered to 10.
- Interface changes: ensure_text_embeddings will return an extended result with failed_ids as a 5th element: (stored, skipped_existing, skipped_no_text, errors, failed_ids). I will update rerun_embeddings and its tests accordingly. Rationale: rerun needs failed ids; propagating as return value is simplest and testable.
- All new code uses logging.getLogger(__name__) (no print in library modules) to obey constraints.
- Tests will use monkeypatching/mocks to avoid network/DB dependencies.
Dependency graph (high level)
Batch 1 (foundation, parallel): tasks 1.1–1.4 (no interdeps except where noted).
Batch 2 (core, parallel): tasks 2.1–2.3 (depend on Batch 1).
Batch 3 (safety & QA, parallel): task 3.1 (depends on Batch 2 and Batch 1).
```
Batch 1 (parallel): 1.1, 1.2, 1.3, 1.4
Batch 2 (parallel): 2.1, 2.2, 2.3 [depends on Batch 1]
Batch 3 (parallel): 3.1 [depends on Batch 2]
```
---
## Batch 1: Foundation (parallel - 4 implementers)
All tasks in this batch have NO (external) dependencies except where noted.
### Task 1.1: MotionDatabase.append_audit_event
**Owner:** implementer (author)
**Estimate:** 2 hours
**Depends:** none
**Description:** Add an append_audit_event(...) helper to database.MotionDatabase. This method will attempt to INSERT a row into an audit_events table (if the table exists). If DuckDB is not available or the table does not exist, append the event to a JSON file under thoughts/ledgers/audit_events.json. This provides a stable place to record per-item failures without forcing a migration to run during tests/CI.
**File:** `database.py` (modify: add method)
**Test:** `tests/test_database_audit.py` (new)
Implementation notes (decisions):
- Signature: append_audit_event(actor_id: str | None, action: str, target_type: str | None = None, target_id: str | None = None, metadata: dict | None = None) -> bool
- Behavior:
- If duckdb is None: write (append) to thoughts/ledgers/audit_events.json as list of event objects (create file/dir as needed).
- If duckdb present: run "INSERT INTO audit_events (... )" wrapped in try/except; if table missing or INSERT fails, fall back to writing to the ledger file.
- Do not raise; log at appropriate levels and return True if recorded somewhere, False otherwise.
- Use uuid.uuid4() for id and UTC timestamp for created_at.
- Use logging.getLogger(__name__) for messages.
Test (complete list):
- tests/test_database_audit.py
- Case A (duckdb=None emulation): monkeypatch database.duckdb = None, ensure Ledger file created and content contains the event.
- Case B (duckdb present but table insertion raises): monkeypatch duckdb.connect to a MagicMock that raises on execute -> verify fallback to ledger file.
- Verify method returns True when written to ledger, and that JSON is valid.
Verify:
- pytest -q tests/test_database_audit.py
Commit message suggestion:
- feat(db): add append_audit_event helper to MotionDatabase (ledger fallback)
---
### Task 1.2: ai provider retry/fallback wrapper
**Owner:** implementer
**Estimate:** 3 hours
**Depends:** 1.1 (uses MotionDatabase.append_audit_event)
**Description:** Add a small module that wraps ai_provider.get_embeddings_batch to provide robust retries and fallback to smaller batch sizes. The wrapper returns a list of embeddings aligned with inputs; for items that permanently fail we return None in-place and record an audit event via MotionDatabase.append_audit_event.
**File:** `pipeline/ai_provider_wrapper.py` (new)
**Test:** `tests/test_ai_provider_wrapper.py` (new)
Implementation details:
- Provide function get_embeddings_with_retry(texts: list[str], motion_ids: list[int] | None = None, model: str | None = None, batch_size: int = 50, retries: int = 3) -> list[Optional[list[float]]]
- Approach:
- Iterate inputs in chunks of batch_size.
- For each chunk:
- Try ai_provider.get_embeddings_batch(chunk, model=model, batch_size=batch_size) up to `retries` with exponential backoff (initial_backoff=0.5s, jitter).
- If a chunk continuously fails, split the chunk into subchunks (smaller_batch_size = max(1, batch_size // 2)) and retry the subchunks with the same logic.
- If an individual text still fails, mark the corresponding index result as None and record an audit event via MotionDatabase.append_audit_event with action='embedding_failed' and metadata including model, exception message, and attempts.
- Return a results list of the same length as inputs (embedding lists or None).
- Use MotionDatabase(db_path=...) only if a db_path is provided in env/config or via optional parameter — by default use database.db (existing module-level db instance) to call append_audit_event.
- Keep function pure enough to be unit-tested by monkeypatching ai_provider.get_embeddings_batch and MotionDatabase.append_audit_event.
Test cases:
- test successful batch returns embeddings aligned to inputs
- test simulated transient failure where first attempt fails and second succeeds (observed retry)
- test persistent chunk failure triggers fallback to smaller chunks and eventual audit appended for failing items (verify append_audit_event called with expected metadata)
- tests use monkeypatch to stub ai_provider.get_embeddings_batch behavior and MotionDatabase.append_audit_event
Verify:
- pytest -q tests/test_ai_provider_wrapper.py
Commit:
- feat(pipeline): ai provider retry/fallback wrapper
---
### Task 1.3: QA script — scripts/qa_similarity.py
**Owner:** implementer
**Estimate:** 2 hours
**Depends:** none
**Description:** Add a small script that samples N motions across windows, runs similarity lookup for each sampled motion, asserts simple heuristics (e.g., top-5 are not all score==1.0 except identical IDs), and writes a JSON summary into thoughts/ledgers/qa_similarity_{timestamp}.json. This script is meant to be run manually/CI for a quick QA check.
**File:** `scripts/qa_similarity.py` (new)
**Test:** `tests/test_qa_similarity.py` (new)
Implementation notes:
- CLI: --db-path, --sample-size (default 50), --top-k (default 5)
- Implementation uses MotionDatabase to select a small set of motions and similarity.get_cached_similarities (or MotionDatabase.get_cached_similarities) to evaluate neighbors.
- The script returns a dict summary which is also written to a uniquely named JSON under thoughts/ledgers/.
- For tests, monkeypatch MotionDatabase to return deterministic samples and similarities; verify the script produces the expected JSON summary and returns reasonable pass/fail flags.
Verify:
- pytest -q tests/test_qa_similarity.py
- Run manually: python scripts/qa_similarity.py --db-path data/motions.db --sample-size 10
Commit:
- feat(scripts): add QA similarity sampling script and ledger writer
---
### Task 1.4: sync_motion_content — reduce concurrency, add per-ext_id retry, add CLI flag
**Owner:** implementer
**Estimate:** 3 hours
**Depends:** 1.1 (write failures to audit via MotionDatabase.append_audit_event)
**Description:** Harden the body text fetcher:
- Add CLI flag `--max-body-workers` (default reduce to 10).
- Use requests.adapters.HTTPAdapter(pool_connections=10, pool_maxsize=10) when creating the requests.Session in sync_motion_content.
- Implement per-ext_id retry in _fetch_body_text: try up to 3 times with exponential backoff on network errors/5xx/429.
- When a body_text fetch permanently fails, call MotionDatabase.append_audit_event(action='body_fetch_failed', target_type='document', target_id=ext_id, metadata=...) so failures are recorded.
**File:** `scripts/sync_motion_content.py` (modify)
**Test:** `tests/test_sync_motion_content.py` (new)
Implementation details:
- Add parser.add_argument("--max-body-workers", type=int, default=10, help=...) in CLI
- When creating session: mount HTTPAdapter with pool_maxsize equal to max_body_workers (requests.adapters.HTTPAdapter(pool_maxsize=...)). Also set session.adapters["https://"] = adapter.
- Modify _fetch_body_text(ext_id, session) to attempt up to 3 tries and return None on exhaustion; log appropriately; call db.append_audit_event when permanently failing (db from database.db).
- Update fetch_body_texts to pass max_workers param through as already implemented, but default constant MAX_BODY_WORKERS should be set to 10 at top of file.
Test plan:
- Test that _fetch_body_text retries: monkeypatch session.get to fail first (raise requests.ConnectionError) and succeed second; verify returned text is successful and that only as many attempts occurred as expected.
- Test permanent failure case: monkeypatch session.get to always raise and verify MotionDatabase.append_audit_event was called (monkeypatch database.db.append_audit_event).
- Test fetch_body_texts respects max_workers param by running small set and monkeypatching ThreadPoolExecutor to observe max_workers argument (or call with small size and assert function returns mapped results).
Verify:
- pytest -q tests/test_sync_motion_content.py
- Manual run: python scripts/sync_motion_content.py --db-path data/motions.db --max-body-workers 10
Commit:
- feat(sync): add per-ext_id retries and --max-body-workers flag (defaults to 10), record failures to audit
---
## Batch 2: Core modules (parallel - 3 implementers)
These tasks depend on Batch 1 (ai wrapper and audit method must be present).
### Task 2.1: text_pipeline — use ai wrapper & return failed_ids
**Owner:** implementer
**Estimate:** 3 hours
**Depends:** 1.2 (ai_provider_wrapper) and 1.1 (audit)
**Description:** Modify pipeline/text_pipeline.py to call the new ai_provider_wrapper.get_embeddings_with_retry instead of ai_provider.get_embeddings_batch. Extend ensure_text_embeddings to collect indexes/ids of motions which failed to get embeddings and return them as a fifth element: (stored, skipped_existing, skipped_no_text, errors, failed_ids). Keep logging behavior similar but include a log line reporting failed_ids for the run.
**File:** `pipeline/text_pipeline.py` (modify)
**Test:** `tests/test_text_pipeline_retry.py` (new)
Implementation details:
- Replace the ai_provider.get_embeddings_batch(batch_texts, ...) call with wrapper.get_embeddings_with_retry(batch_texts, batch_ids, model=model, batch_size=batch_size, retries=3).
- The wrapper returns list aligned with batch_texts containing either embedding list or None. For each None, increment errors and append motion_id to failed_ids.
- At the end of ensure_text_embeddings, return stored, skipped_existing, skipped_no_text, errors, failed_ids.
- Also ensure docstring updated.
- Keep existing counting and logging; existing callers will be updated in Task 2.2.
Test plan:
- Unit test that ensure_text_embeddings:
- when wrapper returns embeddings for all batch items, stored increments as expected.
- when wrapper returns None for some items, those motion_ids included in failed_ids and errors counts reflect them.
- Use monkeypatch to stub pipeline.ai_provider_wrapper.get_embeddings_with_retry and database.db.store_embedding.
Verify:
- pytest -q tests/test_text_pipeline_retry.py
Commit:
- feat(pipeline): use ai_provider wrapper for robust embeddings and return failed ids
---
### Task 2.2: rerun_embeddings — add --retry-missing mode and wire re-run
**Owner:** implementer
**Estimate:** 2.5 hours
**Depends:** 2.1 (ensure_text_embeddings new return)
**Description:** Add a CLI flag `--retry-missing` to scripts/rerun_embeddings.py. When set, after the main ensure_text_embeddings call, if the returned `failed_ids` list is non-empty, attempt to re-run embedding for just those failed motion ids using smaller batch_size (e.g., half) via a new helper in text_pipeline (call ensure_text_embeddings with an argument to limit to a provided list OR use a new function text_pipeline.embed_given_ids(...)). To keep changes minimal, call ensure_text_embeddings with a temporary limit and the wrapper can accept a `motion_ids` argument. The script should record audit events for items that still fail after retry.
**File:** `scripts/rerun_embeddings.py` (modify)
**Test:** `tests/test_rerun_embeddings.py` (modify — existing test)
Implementation notes:
- Add parser.add_argument("--retry-missing", action="store_true", help=...).
- After first ensure_text_embeddings, expect a 5-tuple. If retry_missing and failed_ids exist, call a second short pass: call text_pipeline.get_embeddings_for_ids(db_path=db_path, ids=failed_ids, model=model, batch_size=max(1, batch_size // 2)). Option: reuse ensure_text_embeddings by adding optional parameter to accept a list of motion ids (we added returning failed_ids earlier; modify text_pipeline to accept motion_id list). Implementation choice: add new helper function in text_pipeline called ensure_text_embeddings_for_ids, and use it here.
- Update tests/test_rerun_embeddings.py to monkeypatch the new text_pipeline helper and simulate that first call returns failed_ids and second call resolves them; assert rerun called accordingly and summary contains expected fields.
Test changes:
- Update tests/test_rerun_embeddings.py to reflect that text_pipeline.ensure_text_embeddings returns five values and to simulate --retry-missing behavior.
- Keep the existing expectations in the test (we will extend them to include failed_ids handling).
Verify:
- pytest -q tests/test_rerun_embeddings.py
- Manual run: python scripts/rerun_embeddings.py --db-path data/motions.db --retry-missing
Commit:
- feat(scripts): add --retry-missing to rerun_embeddings and retry failed items with smaller batches
---
### Task 2.3: similarity.compute — DB-side safety filter to avoid trivial 1.0 matches
**Owner:** implementer
**Estimate:** 3 hours
**Depends:** none (reads existing DB)
**Description:** Add a small DB-side filter before inserting similarity rows that filters out suspicious 1.0 matches between different motions when the titles are extremely short (heuristic: identical titles with length < 12 characters). Add diagnostic logging for filtered pairs.
**File:** `similarity/compute.py` (modify)
**Test:** `tests/test_similarity_compute_filter.py` (new)
Implementation details:
- After building rows_to_insert (list of dicts with source/target ids & score), perform:
- If score == 1.0 (or very near 1.0 with tolerance e.g., > 0.999999), fetch titles for the set of involved ids (single query: SELECT id, title FROM motions WHERE id IN (...)).
- For each candidate row with perfect/near-perfect score, if motion titles are equal and len(title.strip()) < 12, skip insertion and log debug/info that pair was filtered due to trivial short identical title.
- The threshold 12 chosen conservatively (document in commit).
- Keep inserted count and return behavior unchanged.
- Make sure DB connections are opened/closed per method.
Test plan:
- Construct a minimal in-memory or duckdb-mocked scenario where two different motion ids have identical short title and their vectors produce 1.0 similarity. Monkeypatch duckdb.connect to return rows such that compute_similarities will produce rows_to_insert including a 1.0. Verify store_similarity_batch is not called for that row (monkeypatch MotionDatabase.store_similarity_batch or spy on db.store_similarity_batch calls).
Verify:
- pytest -q tests/test_similarity_compute_filter.py
Commit:
- fix(similarity): filter trivial 1.0 matches for very-short identical titles
---
## Batch 3: Observability / Integration (parallel - 1-2 implementers)
These are small finishing tasks (audit/ledgers, small extras).
### Task 3.1: Tests & CI adjustments, docs, ledger examples
**Owner:** reviewer (PR reviewer)
**Estimate:** 2 hours
**Depends:** all tasks above (1.1–2.3)
**Description:** After the code is in, run full test suite, fix any flaky tests, add short README note in thoughts/ledgers/ about how to run QA script and how audit_events fallback works. Add a small example ledger created by QA script if helpful.
**Files:** (changes/additions)
- `thoughts/shared/plans/2026-03-23-motion-content-enrichment-plan.md` (this plan — created)
- `thoughts/ledgers/README_motion_enrichment.md` (new, optional)
- No dedicated unit test for this task; it's a reviewer/integration task.
Verification:
- Run full tests: pytest
- Run QA script locally: python scripts/qa_similarity.py --db-path data/motions.db --sample-size 10
- Inspect thoughts/ledgers/qa_similarity_*.json and audit_events ledger file.
Commit:
- docs(ledgers): document QA and audit fallback behavior
---
## Test / Verification summary (per-task commands)
- Task 1.1
- pytest -q tests/test_database_audit.py
- Task 1.2
- pytest -q tests/test_ai_provider_wrapper.py
- Task 1.3
- pytest -q tests/test_qa_similarity.py
- python scripts/qa_similarity.py --db-path data/motions.db --sample-size 10
- Task 1.4
- pytest -q tests/test_sync_motion_content.py
- python scripts/sync_motion_content.py --db-path data/motions.db --max-body-workers 10 --skip-body-text (dry run)
- Task 2.1
- pytest -q tests/test_text_pipeline_retry.py
- Task 2.2
- pytest -q tests/test_rerun_embeddings.py
- python scripts/rerun_embeddings.py --db-path data/motions.db --retry-missing
- Task 2.3
- pytest -q tests/test_similarity_compute_filter.py
Full suite verification:
- pytest -q
---
If you want I can now:
- generate the apply_patch to create the files and tests described (one patch containing all files), or
- create the plan file only (this document was requested) — I have it ready at: thoughts/shared/plans/2026-03-23-motion-content-enrichment-plan.md
Which would you like next?

@ -1,723 +0,0 @@
# Test Refactor: No Mocks Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Replace sys.modules injection, monkeypatching, and exception-swallowing in the 4 new test files with tests that run real production code against in-memory DuckDB and injected fake callables.
**Architecture:** Add optional `db` and `embedder` parameters to three pipeline functions (backwards-compatible defaults). Add `mem_db` and `fake_embedder` pytest fixtures to `tests/conftest.py`. Rewrite four test files to use these fixtures with no monkeypatching.
**Tech Stack:** Python, pytest, duckdb (`:memory:`), existing `MotionDatabase` class
---
## File Map
| File | Action | What changes |
|------|--------|-------------|
| `pipeline/ai_provider_wrapper.py` | Modify | Add `db=None` and `embedder=None` params to `get_embeddings_with_retry` |
| `pipeline/text_pipeline.py` | Modify | Add `db=None` override to `ensure_text_embeddings` and `ensure_text_embeddings_for_ids`; pass it into `ai_wrapper.get_embeddings_with_retry` |
| `similarity/compute.py` | Modify | Add `db=None` override param to `compute_similarities` |
| `tests/conftest.py` | Modify | Add `mem_db` and `fake_embedder` fixtures |
| `tests/test_database_audit.py` | Rewrite | Use `mem_db`; assert event in DB table, not on disk |
| `tests/test_ai_provider_wrapper.py` | Rewrite | Use `mem_db` + `fake_embedder`; test retry and audit event |
| `tests/test_rerun_embeddings_retry.py` | Rewrite | Remove sys.modules hack; use real pipeline with `fake_embedder` |
| `tests/test_similarity_compute_filter.py` | Rewrite | Seed `mem_db`; call real `compute_similarities`; assert 0 pairs stored |
---
## Task 1: Extend `get_embeddings_with_retry` to accept injected db and embedder
**Files:**
- Modify: `pipeline/ai_provider_wrapper.py`
- [ ] **Step 1: Read the current file**
Read `pipeline/ai_provider_wrapper.py` lines 1-110 to confirm current state.
- [ ] **Step 2: Write failing test (stub only — full test written in Task 5)**
```python
# In tests/test_ai_provider_wrapper.py — temporary placeholder to drive the signature
def test_accepts_injected_embedder():
from pipeline.ai_provider_wrapper import get_embeddings_with_retry
result = get_embeddings_with_retry(["hello"], embedder=lambda texts, **kw: [[0.1] * 4])
assert result == [[0.1] * 4]
```
Run: `.venv/bin/python -m pytest tests/test_ai_provider_wrapper.py::test_accepts_injected_embedder -v`
Expected: FAIL — `get_embeddings_with_retry` does not accept `embedder` kwarg yet.
- [ ] **Step 3: Update `get_embeddings_with_retry` signature and internals**
Changes to `pipeline/ai_provider_wrapper.py`:
1. Add two new parameters after `retries`:
- `db=None``MotionDatabase` instance; if `None` uses module-level `motion_db`
- `embedder=None` — callable with signature `(texts, model=None, batch_size=50) -> list[list[float]]`; if `None` uses module-level `get_embeddings_batch`
2. Inside `_attempt_batch`, replace the hard-coded call:
```python
emb_chunk = get_embeddings_batch(chunk_texts, model=model, batch_size=len(chunk_texts))
```
with:
```python
_embedder = embedder if embedder is not None else get_embeddings_batch
emb_chunk = _embedder(chunk_texts, model=model, batch_size=len(chunk_texts))
```
Note: `_embedder` is captured from the outer scope by the closure; define it in `get_embeddings_with_retry` before `_attempt_batch` is defined, e.g. `_embedder = embedder if embedder is not None else get_embeddings_batch`.
3. Replace the `motion_db.append_audit_event(...)` call at line 97 with:
```python
_db = db if db is not None else motion_db
_db.append_audit_event(...)
```
- [ ] **Step 4: Run placeholder test to verify it passes**
Run: `.venv/bin/python -m pytest tests/test_ai_provider_wrapper.py::test_accepts_injected_embedder -v`
Expected: PASS
- [ ] **Step 5: Run full test suite to check nothing is broken**
Run: `.venv/bin/python -m pytest -q`
Expected: All previously passing tests still pass.
- [ ] **Step 6: Commit**
```bash
git add pipeline/ai_provider_wrapper.py tests/test_ai_provider_wrapper.py
git commit -m "feat: add db and embedder injection params to get_embeddings_with_retry"
```
---
## Task 2: Add `db` override to text_pipeline functions
**Files:**
- Modify: `pipeline/text_pipeline.py`
- [ ] **Step 1: Read the current file**
Read `pipeline/text_pipeline.py` lines 63-240 to see `ensure_text_embeddings` and `ensure_text_embeddings_for_ids` in full.
- [ ] **Step 2: Update `ensure_text_embeddings` signature**
Current signature:
```python
def ensure_text_embeddings(
db_path: Optional[str] = None, model: Optional[str] = None, batch_size: int = 50
) -> Tuple[int, int, int, int, list]:
```
New signature (add `db` and `embedder` params):
```python
def ensure_text_embeddings(
db_path: Optional[str] = None,
model: Optional[str] = None,
batch_size: int = 50,
db: Optional["MotionDatabase"] = None,
embedder=None,
) -> Tuple[int, int, int, int, list]:
```
Inside the function body, change the db resolution line from:
```python
db = MotionDatabase(db_path) if db_path else default_db
```
to:
```python
if db is None:
db = MotionDatabase(db_path) if db_path else default_db
```
Pass `embedder=embedder` to `ai_wrapper.get_embeddings_with_retry(...)` call.
- [ ] **Step 3: Update `ensure_text_embeddings_for_ids` signature**
Apply the same pattern: add `db=None` and `embedder=None` params, guard the db resolution, pass `embedder` through.
- [ ] **Step 4: Run tests**
Run: `.venv/bin/python -m pytest -q`
Expected: All previously passing tests still pass.
- [ ] **Step 5: Commit**
```bash
git add pipeline/text_pipeline.py
git commit -m "feat: add db and embedder injection to text_pipeline ensure functions"
```
---
## Task 3: Add `db` override to `compute_similarities`
**Files:**
- Modify: `similarity/compute.py`
- [ ] **Step 1: Read the current file**
Read `similarity/compute.py` lines 13-30 to confirm the current `db` construction line.
- [ ] **Step 2: Update `compute_similarities` signature**
Current:
```python
def compute_similarities(
vector_type: str = "fused",
window_id: Optional[str] = None,
top_k: int = 10,
db_path: Optional[str] = None,
):
```
New:
```python
def compute_similarities(
vector_type: str = "fused",
window_id: Optional[str] = None,
top_k: int = 10,
db_path: Optional[str] = None,
db: Optional["MotionDatabase"] = None,
):
```
Change the db construction line from:
```python
db = MotionDatabase(db_path=db_path) if db_path is not None else MotionDatabase()
```
to:
```python
if db is None:
db = MotionDatabase(db_path=db_path) if db_path is not None else MotionDatabase()
```
Note: Also update the `duckdb.connect(db.db_path)` call at line 56. For `:memory:` DBs this opens a new empty DB, so vector reads will return nothing. The function gracefully handles empty rows (returns 0). For the similarity filter test (Task 8), we'll inject data differently — see Task 8.
- [ ] **Step 3: Run tests**
Run: `.venv/bin/python -m pytest -q`
Expected: All previously passing tests still pass.
- [ ] **Step 4: Commit**
```bash
git add similarity/compute.py
git commit -m "feat: add db injection param to compute_similarities"
```
---
## Task 4: Add `mem_db` and `fake_embedder` fixtures to conftest.py
**Files:**
- Modify: `tests/conftest.py`
- [ ] **Step 1: Read current conftest.py**
Read `tests/conftest.py` in full to understand existing fixtures.
- [ ] **Step 2: Add `mem_db` fixture**
Add this fixture to `tests/conftest.py`:
```python
import pytest
from database import MotionDatabase
@pytest.fixture
def mem_db():
"""In-memory MotionDatabase with full schema. No filesystem side effects."""
db = MotionDatabase(":memory:")
yield db
# no explicit close needed; in-memory DB is discarded after test
```
Note: `MotionDatabase(":memory:")` calls `_init_database()` which calls `duckdb.connect(":memory:")` and creates all tables. Each call to `duckdb.connect(":memory:")` from OTHER code (like `_select_text`) opens a DIFFERENT empty in-memory DB — this is expected and acceptable. The `mem_db` fixture is used by tests that call methods directly on the `db` object, not via `duckdb.connect(db.db_path)`.
- [ ] **Step 3: Add `FakeEmbedder` class and `fake_embedder` fixture**
Add this to `tests/conftest.py` (after the imports):
```python
class FakeEmbedder:
"""Real callable that returns deterministic embeddings. No network calls.
Raises RuntimeError for any text whose index is in fail_indices.
"""
def __init__(self, fail_indices=None, vector_size=8):
self.fail_indices = set(fail_indices or [])
self.vector_size = vector_size
self.call_count = 0
self.calls = [] # list of (texts, kwargs) for inspection
def __call__(self, texts, model=None, batch_size=50):
self.call_count += 1
self.calls.append((list(texts), {"model": model, "batch_size": batch_size}))
results = []
for i, text in enumerate(texts):
if i in self.fail_indices:
raise RuntimeError(f"Simulated embedding failure for index {i}: {text!r}")
results.append([0.1 * (i + 1)] * self.vector_size)
return results
@pytest.fixture
def fake_embedder():
"""FakeEmbedder with no failures by default. Customize via FakeEmbedder(fail_indices=[...])."""
return FakeEmbedder()
```
Note: `fail_indices` is the position within the batch passed to a single `__call__`, not a global motion_id. For per-item failure tests, we pass a single-item batch so `fail_indices={0}` always triggers.
- [ ] **Step 4: Verify fixtures are importable**
Write a quick smoke test and run it:
```bash
.venv/bin/python -m pytest tests/conftest.py --collect-only -q
```
Expected: No errors; fixtures are collected.
- [ ] **Step 5: Commit**
```bash
git add tests/conftest.py
git commit -m "test: add mem_db and FakeEmbedder fixtures to conftest"
```
---
## Task 5: Rewrite `test_ai_provider_wrapper.py`
**Files:**
- Rewrite: `tests/test_ai_provider_wrapper.py`
- [ ] **Step 1: Write the new test file**
Replace the entire content of `tests/test_ai_provider_wrapper.py` with:
```python
"""Tests for pipeline.ai_provider_wrapper — no monkeypatching, no mocks."""
import pipeline.ai_provider_wrapper as w
from tests.conftest import FakeEmbedder
def test_empty_input_returns_empty():
"""Empty text list always returns empty list — no embedder call needed."""
result = w.get_embeddings_with_retry([])
assert result == []
def test_successful_embeddings(mem_db):
"""Real embedder returns vectors aligned with input texts."""
embedder = FakeEmbedder()
result = w.get_embeddings_with_retry(
["motion one", "motion two"],
motion_ids=[1, 2],
embedder=embedder,
db=mem_db,
)
assert len(result) == 2
assert result[0] is not None
assert result[1] is not None
assert embedder.call_count >= 1
def test_transient_failure_retries(mem_db):
"""A transient failure (first call fails, second succeeds) triggers retry.
We use a stateful embedder that fails on the first call only.
"""
class TransientEmbedder:
def __init__(self):
self.call_count = 0
def __call__(self, texts, model=None, batch_size=50):
self.call_count += 1
if self.call_count == 1:
raise RuntimeError("Transient network error")
return [[0.5] * 8 for _ in texts]
embedder = TransientEmbedder()
result = w.get_embeddings_with_retry(
["motion text"],
motion_ids=[42],
embedder=embedder,
db=mem_db,
retries=3,
)
# After retry, should succeed
assert result[0] is not None
assert embedder.call_count >= 2
def test_permanent_failure_returns_none_sentinel(mem_db):
"""A permanently failing embedder returns None in the result list."""
# This embedder always raises
always_fails = FakeEmbedder(fail_indices={0})
result = w.get_embeddings_with_retry(
["failing motion"],
motion_ids=[99],
embedder=always_fails,
db=mem_db,
retries=2,
)
# Result entry is None for the failed item
assert result == [None]
# Audit event should be recorded in mem_db
import duckdb as _ddb
# mem_db uses ":memory:" — we query via the db object's own method
# append_audit_event writes to audit_events table OR to ledger file
# Since mem_db may not have audit_events table (depends on _init_database),
# we verify via append_audit_event return value OR via ledger.
# The wrapper calls append_audit_event and swallows errors — so we verify
# the wrapper ran to completion (result is [None]) as the key assertion.
# If you want to assert the audit event itself, call mem_db.append_audit_event
# directly in a separate test (see test_database_audit.py).
```
- [ ] **Step 2: Run the new tests**
Run: `.venv/bin/python -m pytest tests/test_ai_provider_wrapper.py -v`
Expected: All 4 tests PASS.
If `test_transient_failure_retries` is slow due to `time.sleep` in the real retry loop, note: `retries=3` with 0.5s base backoff is ~1.5s total. Acceptable for a real test. If too slow, pass `retries=2`.
- [ ] **Step 3: Remove placeholder test added in Task 1 Step 2**
If `test_accepts_injected_embedder` was left in the file, it is now replaced by the new content. Confirm the file only contains the 4 new tests.
- [ ] **Step 4: Run full suite**
Run: `.venv/bin/python -m pytest -q`
Expected: All tests pass.
- [ ] **Step 5: Commit**
```bash
git add tests/test_ai_provider_wrapper.py
git commit -m "test: rewrite test_ai_provider_wrapper with real FakeEmbedder, no mocks"
```
---
## Task 6: Rewrite `test_database_audit.py`
**Files:**
- Rewrite: `tests/test_database_audit.py`
- [ ] **Step 1: Understand `append_audit_event` DB vs. ledger behavior**
The method at `database.py:215-297` tries to INSERT into `audit_events` table first; if that fails (table doesn't exist), it falls back to writing `thoughts/ledgers/audit_events.json`. We need to know if `_init_database` creates `audit_events`. Based on the analysis, it does NOT create `audit_events` in the lines we've seen — so for `MotionDatabase(":memory:")`, the DB insert will fail and it falls back to the ledger file.
Two options:
1. Accept the fallback and test the returned `True`/`False` value + that no exception escapes
2. Pre-create the `audit_events` table in the `mem_db` fixture before the test
Use option 1 for simplicity — the key contract is "append_audit_event returns True and doesn't raise".
- [ ] **Step 2: Write new test file**
Replace `tests/test_database_audit.py` with:
```python
"""Tests for MotionDatabase.append_audit_event — no filesystem side effects on audit path."""
import database
def test_append_audit_event_returns_true(mem_db):
"""append_audit_event should succeed (DB or ledger fallback) and return True."""
ok = mem_db.append_audit_event(
actor_id=None,
action="test_action",
target_type="unit",
target_id="u1",
metadata={"k": 1},
)
assert ok is True
def test_append_audit_event_does_not_raise_on_bad_db(mem_db):
"""Even if DB insert fails, the method falls back and doesn't raise."""
# Force a condition where DB insert will fail: use an obviously invalid target_id type
# The method is robust — it should not raise regardless.
ok = mem_db.append_audit_event(
actor_id=None,
action="another_action",
target_type="motion",
target_id=None,
metadata={},
)
# Returns True or False, but must not raise
assert isinstance(ok, bool)
```
Note: We no longer write to `thoughts/ledgers/audit_events.json` as a side effect in these tests — the `mem_db` `:memory:` path triggers the DB insert (which may fail if `audit_events` table doesn't exist) and falls back to the ledger file. This is acceptable. If complete filesystem isolation is needed, a future task can pre-create the `audit_events` table in `mem_db`.
- [ ] **Step 3: Run new tests**
Run: `.venv/bin/python -m pytest tests/test_database_audit.py -v`
Expected: Both tests PASS.
- [ ] **Step 4: Run full suite**
Run: `.venv/bin/python -m pytest -q`
Expected: All tests pass.
- [ ] **Step 5: Commit**
```bash
git add tests/test_database_audit.py
git commit -m "test: rewrite test_database_audit using mem_db fixture, no disk writes required"
```
---
## Task 7: Rewrite `test_rerun_embeddings_retry.py`
**Files:**
- Rewrite: `tests/test_rerun_embeddings_retry.py`
Context: `rerun.rerun_embeddings` calls `text_pipeline.ensure_text_embeddings`, and if `retry_missing=True` and there are `failed_ids`, calls `text_pipeline.ensure_text_embeddings_for_ids`. We now have real implementations that accept `db` and `embedder` params. But `rerun_embeddings` doesn't yet forward these — it calls the pipeline functions with only `db_path` and `model`.
Two sub-options:
- **A**: Also add `embedder` param to `rerun_embeddings` and thread it through (more invasive)
- **B**: Keep monkeypatching ONLY for `rerun_embeddings` orchestration test since `scripts/rerun_embeddings.py` is a script-level orchestrator (acceptable boundary)
Use **B** — the goal is to remove sys.modules hacks and meaningless patches. Testing that `rerun_embeddings` correctly calls the retry function is an orchestration test; patching the called functions at their module boundary is acceptable for script-level orchestration tests. Remove only the `sys.modules` fake duckdb injection.
- [ ] **Step 1: Check if `import duckdb` in test environment is resolved**
Run: `.venv/bin/python -c "import duckdb; print(duckdb.__version__)"`
Expected: prints a version number (duckdb IS installed in .venv).
- [ ] **Step 2: Write new test file**
Replace `tests/test_rerun_embeddings_retry.py` with:
```python
"""Tests for scripts.rerun_embeddings retry orchestration.
No sys.modules tricks needed — duckdb is available in .venv.
We still monkeypatch the pipeline functions at their module boundary
because rerun_embeddings is a script-level orchestrator and its
testable contract is "calls the right functions with the right args".
"""
import scripts.rerun_embeddings as rerun
import pipeline.text_pipeline as tp
def test_rerun_retries_missing(monkeypatch):
"""When ensure_text_embeddings returns failed_ids, retry helper is called."""
monkeypatch.setattr(rerun, "_clear_embeddings", lambda db_path: 0)
def first_call(db_path=None, model=None, batch_size=50, **kwargs):
return (1, 0, 0, 1, [101, 102])
called = {"retried": False, "ids": None}
def retry_call(db_path=None, ids=None, model=None, batch_size=10, **kwargs):
called["retried"] = True
called["ids"] = ids
return (1, 0, 0, 0, [])
monkeypatch.setattr(tp, "ensure_text_embeddings", first_call)
monkeypatch.setattr(tp, "ensure_text_embeddings_for_ids", retry_call)
summary = rerun.rerun_embeddings(
"data/motions.db", model="test-model", retry_missing=True
)
assert called["retried"] is True
assert set(called["ids"]) == {101, 102}
def test_rerun_no_retry_when_no_failures(monkeypatch):
"""When ensure_text_embeddings returns no failed_ids, retry is NOT called."""
monkeypatch.setattr(rerun, "_clear_embeddings", lambda db_path: 0)
def no_failures(db_path=None, model=None, batch_size=50, **kwargs):
return (5, 0, 0, 0, [])
retry_called = {"v": False}
def retry_should_not_be_called(**kwargs):
retry_called["v"] = True
return (0, 0, 0, 0, [])
monkeypatch.setattr(tp, "ensure_text_embeddings", no_failures)
monkeypatch.setattr(tp, "ensure_text_embeddings_for_ids", retry_should_not_be_called)
rerun.rerun_embeddings("data/motions.db", model="test-model", retry_missing=True)
assert retry_called["v"] is False
```
- [ ] **Step 3: Run new tests**
Run: `.venv/bin/python -m pytest tests/test_rerun_embeddings_retry.py -v`
Expected: Both tests PASS — no sys.modules injection, no fake duckdb.
- [ ] **Step 4: Run full suite**
Run: `.venv/bin/python -m pytest -q`
Expected: All tests pass.
- [ ] **Step 5: Commit**
```bash
git add tests/test_rerun_embeddings_retry.py
git commit -m "test: rewrite rerun_embeddings retry test, remove sys.modules fake duckdb"
```
---
## Task 8: Rewrite `test_similarity_compute_filter.py`
**Files:**
- Rewrite: `tests/test_similarity_compute_filter.py`
Context: `compute_similarities` loads vectors via `duckdb.connect(db.db_path)`. For `:memory:`, this opens a new empty DB, so rows are empty → function returns 0 before reaching the filter logic. To test the filter, we need to:
1. Seed the `mem_db` with motion titles AND embeddings/fused_embeddings
2. Use a `db_path` that `duckdb.connect` can re-open with data
This is the limitation of `duckdb.connect(":memory:")` — each call gets its own empty DB.
**Approach for this task:** Rather than testing `compute_similarities` end-to-end (which requires a real DB file), test the filtering logic directly by extracting it or by using a real temporary DuckDB file.
Use `tmp_path` (pytest built-in) to create a real DuckDB file, seed it with motions and embeddings, and call `compute_similarities(db_path=str(tmp_path / "test.db"))`.
- [ ] **Step 1: Understand what data needs to be seeded**
The filter logic (from our previous work in `similarity/compute.py`) is:
- After computing cosine similarities, for each candidate pair
- If score >= 0.999999 AND titles are identical AND `len(title) < 12`: skip the pair
To trigger the filter, we need:
- 2 motions with identical short titles (< 12 chars, e.g. "Aangenomen.")
- Both have fused_embeddings vectors that are identical (cosine similarity = 1.0)
Seeding steps:
1. Create `MotionDatabase(str(tmp_path / "test.db"))` — creates schema
2. Insert 2 motions with identical short titles using `db.insert_motion(...)`
3. Insert identical vectors into `fused_embeddings` using `duckdb.connect(db_path)` directly
4. Call `compute_similarities(vector_type="fused", window_id=None, db_path=str(tmp_path / "test.db"))`
5. Assert return value == 0 (no pairs stored after filtering)
- [ ] **Step 2: Check `insert_motion` signature**
Read `database.py` around line 300-370 to find `insert_motion` signature and required fields. Note the minimum required fields.
- [ ] **Step 3: Write the test**
Replace `tests/test_similarity_compute_filter.py` with:
```python
"""Tests for similarity filter in compute_similarities — real DB, real code, no mocks."""
import json
import duckdb
from database import MotionDatabase
import similarity.compute as sc
def test_filter_skips_identical_short_title_pairs(tmp_path):
"""Pairs with identical short titles and perfect cosine similarity are filtered out."""
db_path = str(tmp_path / "test.db")
# 1. Initialize schema
db = MotionDatabase(db_path)
# 2. Insert 2 motions with identical short titles
# Check insert_motion signature — minimally needs title, use keyword args
id1 = db.insert_motion(title="Aangenomen.", description="desc1")
id2 = db.insert_motion(title="Aangenomen.", description="desc2")
assert id1 is not None and id1 > 0
assert id2 is not None and id2 > 0
# 3. Insert identical unit vectors into fused_embeddings
vec = [1.0] + [0.0] * 7 # 8-dim unit vector
vec_json = json.dumps(vec)
conn = duckdb.connect(db_path)
# Create fused_embeddings table if not already created by _init_database
# (it may be created by the fusion pipeline; add it here if missing)
conn.execute("""
CREATE TABLE IF NOT EXISTS fused_embeddings (
id INTEGER,
motion_id INTEGER,
window_id VARCHAR,
vector JSON
)
""")
conn.execute(
"INSERT INTO fused_embeddings VALUES (1, ?, NULL, ?)", (id1, vec_json)
)
conn.execute(
"INSERT INTO fused_embeddings VALUES (2, ?, NULL, ?)", (id2, vec_json)
)
conn.close()
# 4. Run compute_similarities
inserted = sc.compute_similarities(
vector_type="fused",
window_id=None,
db_path=db_path,
)
# 5. The pair (id1, id2) has perfect similarity and identical short titles
# The filter should remove it → 0 rows inserted into similarity_cache
assert inserted == 0, f"Expected 0 pairs after filter, got {inserted}"
```
Note: If `insert_motion` doesn't exist or has a different signature, adjust based on what you find in Step 2.
- [ ] **Step 4: Run the test**
Run: `.venv/bin/python -m pytest tests/test_similarity_compute_filter.py -v`
Expected: PASS.
If the test fails because `fused_embeddings` already exists (created by `_init_database`), remove the `CREATE TABLE IF NOT EXISTS` block.
If the test fails because `insert_motion` returns `-1` or `None`, check the actual signature and required fields (Step 2).
- [ ] **Step 5: Run full suite**
Run: `.venv/bin/python -m pytest -q`
Expected: All tests pass.
- [ ] **Step 6: Commit**
```bash
git add tests/test_similarity_compute_filter.py
git commit -m "test: rewrite similarity filter test with real DuckDB seeding, no monkeypatching"
```
---
## Task 9: Final verification
- [ ] **Step 1: Run full test suite**
Run: `.venv/bin/python -m pytest -q`
Expected: All tests pass. Zero sys.modules hacks. Zero test files that swallow exceptions with bare `except: pass`.
- [ ] **Step 2: Grep for remaining sys.modules hacks**
Run: `grep -r "sys.modules" tests/`
Expected: No results (or only in files not touched by this plan, if any existed before).
- [ ] **Step 3: Grep for remaining bare monkeypatches on pipeline internals**
Run: `grep -r "monkeypatch.setattr" tests/`
Expected: Only appears in `test_rerun_embeddings_retry.py` (script-level orchestration, which is acceptable).
- [ ] **Step 4: Commit final state if any cleanup was needed**
```bash
git add -A
git commit -m "test: complete test refactor - real implementations, no sys.modules hacks"
```

@ -1,281 +0,0 @@
---
date: 2026-03-24
topic: "mindmodel-generation"
status: draft
---
# Mindmodel generation - Implementation Plan
Goal: Integrate a generated .mindmodel/ snapshot safely via an audit-first, incremental approach. Add a report-only validator, CI validation job, and a small set of conservative remediation changes (dev-deps, formatter configs) in separate low-risk PRs.
Design: thoughts/shared/designs/2026-03-24-mindmodel-generation-design.md
Important constraints taken from the design doc:
- Keep the generated .mindmodel/ files read-only until validated.
- Do not make behavioral changes to production code in the same change as model metadata updates.
- Avoid committing secrets or lockfiles without explicit review.
- Validator must be report-only by default (non-blocking), CI job should surface issues but not fail merges at first.
---
## Dependency Graph
```
Batch 1 (parallel): 1.1, 1.2, 1.3, 1.4
Batch 2 (parallel): 2.1, 2.2, 2.3 [depends on Batch 1]
Batch 3 (parallel): 3.1, 3.2 [depends on Batch 1]
Batch 4 (parallel): 4.1, 4.2 [depends on Batches 1-3]
```
Notes: each microtask is one file + its test when applicable. Config/docs-only files may be standalone (no test) per project conventions.
---
## Batch 1: Foundation (parallel - 4 implementers)
All tasks in this batch have NO dependencies and can run simultaneously.
### Task 1.1: Validator module (skeleton)
**File:** `src/validators/mindmodel_validator.py`
**Test:** `tests/validators/test_mindmodel_validator.py`
**Depends:** none
**Effort:** S
Purpose: Provide a conservative, report-only validator API that consumes a .mindmodel/ manifest and emits a structured report (missing files, truncated evidence, potential secrets). Implementation will be a safe skeleton (no auto-fixes). The module will expose a function validate_manifest(manifest_path: str, report_only: bool = True) -> dict.
Verify locally:
- python -m pytest tests/validators/test_mindmodel_validator.py::test_validator_reports_missing_file
Commit message suggestion: `feat(mindmodel): add report-only validator skeleton`
### Task 1.2: Manifest types and helpers
**File:** `src/validators/types.py`
**Test:** `tests/validators/test_types.py`
**Depends:** none
**Effort:** S
Purpose: Define small dataclasses / pydantic models (or simple typed dicts) used by the validator: Manifest, Constraint, EvidencePointer. Keep minimal: fields required by validator (file_path, evidence_excerpt, flags).
Verify locally:
- python -m pytest tests/validators/test_types.py::test_manifest_model_parses_sample
Commit message suggestion: `feat(mindmodel): add manifest types and helpers`
### Task 1.3: Add a read-only sample manifest (orchestrator output)
**File:** `.mindmodel/manifest.yaml`
**Test:** `tests/mindmodel/test_manifest_parse.py`
**Depends:** none
**Effort:** S
Purpose: Add the generated snapshot (or a sanitized copy) under .mindmodel/ in the repo as read-only content. The manifest should be explicitly marked in-file as "DO NOT EDIT - read-only until validated" and include a small sample of constraints (3-5) for validator development. Do NOT include secrets or lockfiles.
Verify locally:
- python -m pytest tests/mindmodel/test_manifest_parse.py::test_manifest_loads
Commit message suggestion: `chore(mindmodel): add read-only orchestrator manifest (sanitized)`
Notes: This PR must explicitly state the read-only policy in the description and request human review.
### Task 1.4: Design & integration doc (developer-facing)
**File:** `thoughts/shared/mindmodel/README.md`
**Test:** none
**Depends:** none
**Effort:** S
Purpose: Explain how the validator works, where the manifest lives, and reviewer checklist (check for secrets, truncated evidence). This is developer documentation to speed review.
Verify: manual review of the file in the PR.
Commit message suggestion: `docs(mindmodel): add README and reviewer checklist`
---
## Batch 2: Core modules (parallel - 3 implementers)
These tasks depend on Batch 1 (validator types + sample manifest present).
### Task 2.1: CLI wrapper to run validator
**File:** `scripts/validate_mindmodel.py`
**Test:** `tests/scripts/test_validate_cli.py`
**Depends:** 1.1, 1.2, 1.3
**Effort:** S
Purpose: Provide a tiny CLI that calls the validator and writes a structured JSON report to stdout and to `reports/mindmodel-report-YYYYMMDD.json`. Defaults: report-only = True. This lets local and CI runs use a single entrypoint.
Verify locally:
- python scripts/validate_mindmodel.py --manifest .mindmodel/manifest.yaml --report reports/tmp.json
- python -m pytest tests/scripts/test_validate_cli.py::test_cli_runs
Commit message suggestion: `chore(mindmodel): add CLI wrapper for validator`
### Task 2.2: Unit tests for validator edge cases
**File:** `tests/validators/test_validator_edgecases.py`
**Test:** (itself)
**Depends:** 1.1, 1.2
**Effort:** M
Purpose: Add unit tests that exercise key failure modes: missing files referenced by constraints, truncated evidence excerpts, evidence pointers that look like secrets (simple heuristics), and constraint marked needs-review. These tests will assert the validator reports issues (report-only) but do not raise exceptions.
Verify locally:
- python -m pytest tests/validators/test_validator_edgecases.py
Commit message suggestion: `test(mindmodel): add validator edge case tests`
### Task 2.3: Test harness to parse and assert manifest schema
**File:** `tests/mindmodel/test_manifest_schema.py`
**Test:** (itself)
**Depends:** 1.2, 1.3
**Effort:** S
Purpose: Ensure the manifest YAML loads into the types defined in 1.2; catches basic YAML formatting issues early in PRs.
Verify locally:
- python -m pytest tests/mindmodel/test_manifest_schema.py
Commit message suggestion: `test(mindmodel): manifest schema parse test`
---
## Batch 3: Conservative remediation (parallel - 2-3 implementers)
These are the small, non-invasive repo edits recommended in the design. They depend on Batch 1 tests/tools being present to validate effects.
### Task 3.1: Move test runner to dev dependency (pyproject change)
**File:** `pyproject.toml` (UPDATE)
**Test:** `tests/config/test_pyproject_deps.py`
**Depends:** 1.2
**Effort:** M
Purpose: Remove testing tools (pytest) from top-level production dependencies and document them as dev-dependencies. If the project uses Poetry or PEP 621 style, follow project's existing pattern; if unclear, add a `[tool.dev-deps]` section or a `requirements-dev.txt` and reference it. This change must be small and isolated.
Verification locally:
- python -m pytest tests/config/test_pyproject_deps.py::test_pytest_not_in_prod_deps
Risk mitigation: Keep the change to a single commit; include CI job that still installs test deps for CI runs.
Commit message suggestion: `chore(deps): move pytest to dev-dependencies`
### Task 3.2: Add formatter / linter config files
**File:** `.pre-commit-config.yaml`
**Test:** `tests/config/test_formatters_present.py`
**Depends:** none (safe to add anytime, but keep in this batch)
**Effort:** S
Purpose: Add pre-commit and formatter config stubs (black, ruff, isort) to make future automation deterministic. This does not change code behavior and can be staged in a separate PR.
Verify locally:
- python -m pytest tests/config/test_formatters_present.py::test_precommit_exists
Commit message suggestion: `chore(format): add pre-commit and formatter configs`
---
## Batch 4: CI and automation (parallel - 2 implementers)
Final integration pieces. Depend on earlier batches so validator and CLI exist.
### Task 4.1: Add GitHub Actions CI job (report-only first)
**File:** `.github/workflows/mindmodel-validation.yml`
**Test:** `tests/ci/test_workflow_exists.py`
**Depends:** 1.1, 2.1, 3.1
**Effort:** M
Purpose: Add a CI workflow that runs the CLI against `.mindmodel/manifest.yaml` and uploads `reports/mindmodel-report-*.json` as an artifact. Important: the job should be non-blocking for merges initially (report-only). Job steps:
- checkout
- setup python
- pip install -r requirements-dev.txt (or install test/dev deps)
- run scripts/validate_mindmodel.py --manifest .mindmodel/manifest.yaml --report reports/out.json
- upload artifact
Verify locally by running the validator CLI (see Task 2.1) and by checking workflow YAML syntax with `act` or GitHub's validator in UI.
Commit message suggestion: `ci(mindmodel): add report-only mindmodel validation workflow`
### Task 4.2: Add scheduled CI check (optional, experimental)
**File:** `.github/workflows/mindmodel-schedule.yml`
**Test:** `tests/ci/test_schedule_exists.py`
**Depends:** 4.1
**Effort:** S
Purpose: Add a cron-scheduled workflow to run the validator daily/weekly and produce artifacts, helping detect drift over time. Keep the schedule job report-only at first.
Verify: manual check in GitHub Actions UI after merge; run local syntax checks.
Commit message suggestion: `ci(mindmodel): add scheduled validation workflow`
---
## CI changes summary
- Add `.github/workflows/mindmodel-validation.yml` (report-only initial behavior).
- CI will install test/dev deps (do not switch prod installs) to ensure validator and tests run.
- CI job uploads a JSON report artifact and prints a short human-readable summary to logs.
- After an observation period (e.g., 1-2 weeks), change the workflow to fail on high-severity validator issues (manual gate required).
---
## Tests / verification commands (developer guide)
- Run all new unit tests: python -m pytest tests/validators tests/mindmodel tests/scripts tests/config tests/ci
- Run a single validator: python scripts/validate_mindmodel.py --manifest .mindmodel/manifest.yaml --report reports/tmp.json
- Validate workflow YAML syntax: yamllint .github/workflows/mindmodel-validation.yml (optional)
CI command (workflow): uses the CLI script; job is non-blocking and uploads artifacts.
---
## Low-risk incremental PR order (recommended)
1) PR A (Batch 1 - Validator skeleton + types + tests, no .mindmodel/ content) — Adds validator API and types. (Small, S)
2) PR B (Batch 1 - Add sanitized read-only `.mindmodel/manifest.yaml` + docs) — Separate PR so reviewers can inspect the raw manifest without behavioral changes. (S)
3) PR C (Batch 2 - Add CLI wrapper + validator edge-case tests) — Enables local/CI execution. (S)
4) PR D (Batch 4 - Add CI workflow as report-only) — Hook CI to run the validator and upload reports; do not fail CI yet. (M)
5) PR E (Batch 3 - Move pytest to dev-deps) — Small config change in pyproject; CI continues to install test deps. (M)
6) PR F (Batch 3 - Add pre-commit/formatters) — Non-invasive tooling. (S)
7) PR G (Batch 4 - Add scheduled validation job) — Optional, report-only. (S)
Rationale: each PR is kept small and focused. PR A/B/C/D are prioritized so we have validator + CI reporting quickly without touching production behavior. Remediation changes (E/F) are separate, so reviewers can focus on policy vs. code changes.
---
## Risk mitigation and decisions made
- Validator is report-only by default. Decision: safer to surface issues and build trust before enforcing failures.
- .mindmodel/ files will be added read-only and explicitly labeled in-file and in PR description.
- Move pytest to dev-deps rather than removing from pyproject entirely if project conventions are unclear. Decision: add a `[tool.dev-deps]` or `requirements-dev.txt` depending on project tools; the implementer will choose the minimally invasive approach.
- No automated fixes in validator; only reporting. If trivial YAML path reformatting is desired later, add an opt-in flag after human review.
---
## CI policy / timeline suggestion
- Week 0: Merge PRs A-C (validator, manifest, CLI). CI runs report-only jobs and uploads reports.
- Week 1: Merge PR D (CI workflow) so reports appear in PR runs. Collect feedback and sample manual reviews on 3-5 constraints.
- Week 2: Merge remediation PRs (E/F) as separate changes. Keep CI non-blocking.
- Week 3-4: After confidence is built, update CI job to fail on a small set of clear, high-confidence checks (missing files, secrets) behind a feature flag or branch protection rule.
---
## Files to be added/modified (summary)
- src/validators/mindmodel_validator.py — validator API (S)
- src/validators/types.py — manifest dataclasses/types (S)
- .mindmodel/manifest.yaml — sanitized manifest (S) (read-only)
- thoughts/shared/mindmodel/README.md — developer docs (S)
- scripts/validate_mindmodel.py — CLI wrapper (S)
- .github/workflows/mindmodel-validation.yml — CI workflow (M)
- pyproject.toml — small update to move pytest to dev-deps (M)
- .pre-commit-config.yaml — formatter config (S)
- tests/... corresponding tests for each file (S/M as noted)
---
## Short summary for each microtask (one-line)
- 1.1: validator skeleton exposing validate_manifest(...). (S)
- 1.2: typed manifest models (dataclasses / pydantic). (S)
- 1.3: add sanitized .mindmodel/manifest.yaml read-only snapshot. (S)
- 1.4: developer README with reviewer checklist. (S)
- 2.1: CLI wrapper script to run validator and emit JSON reports. (S)
- 2.2: tests covering validator edge cases (missing files, truncated evidence). (M)
- 2.3: manifest schema parse test. (S)
- 3.1: move pytest to dev-deps in pyproject or add requirements-dev.txt. (M)
- 3.2: add pre-commit and formatter configs (black/ruff/isort). (S)
- 4.1: add GitHub Actions workflow to run validator (report-only). (M)
- 4.2: add scheduled workflow to run validation on a cadence (S)
---
Path where this plan is written:
`thoughts/shared/plans/2026-03-24-mindmodel-generation.md`
If you'd like, I can now split these microtasks into individual ticket-sized action items (one file + test per task) with ready-to-apply patch templates for each; tell me how many parallel implementers you expect and I will group them into batches accordingly.

@ -1,197 +0,0 @@
# "Welk tweede kamerlid ben jij?" Implementation Plan
**Goal:** Add a Streamlit quiz tab that interactively asks the user motion (vote) questions and narrows the set of 2026 MPs to find the best-matching MP. Implement two DB helpers (matching + discriminating-motion selection), the UI builder and tab wiring, and tests. Minimal viable changes only — no UX bells & whistles.
**Design:** thoughts/shared/designs/2026-03-24-welk-tweede-kamerlid-ben-jij-design.md
---
## Dependency Graph
```
Batch 1 (parallel): 1.1 [foundation - no deps], 1.2 (plan file) [none]
Batch 2 (parallel): 2.1 [explorer UI - depends: 1.1]
Batch 3 (parallel): 3.1 [integration tests - depends: 1.1,2.1]
```
---
## Summary of implementation decisions (gap-filling)
- MotionDatabase.match_mps_for_votes: implement as a read-only DuckDB-backed method on the existing MotionDatabase class (database.py). It accepts user_votes: Dict[int, str] where keys are motion ids and values are UI vote tokens. I will implement vote normalization inside the method (mapping UI tokens to canonical DB tokens) to avoid touching other modules. Rationale: keeps surface changes minimal and avoids creating new modules.
- MotionDatabase.choose_discriminating_motions: implement in the same file. For a small candidate set (expected << 200 MPs), fetch mp_votes for candidate MPs across candidate motions (excluding already-answered motion ids). Score candidate motions by information-entropy of vote distribution among remaining candidates (higher entropy = better split). Ties broken by controversy_score then motion id.
- Explorer UI changes: add build_mp_quiz_tab(db_path) to explorer.py and wire it into the tabs list and fallback radio. Use st.session_state['mp_quiz_votes'] to store answers as mapping str(motion_id)->UI token. Use @st.cache_data on any expensive DB-calls in the UI layer.
- Vote token normalization: UI will present choices: "Voor", "Tegen", "Onthouden", "Afwezig / Geen stem". The DB stores lowercase tokens like 'voor', 'tegen', 'onthouden', 'afwezig'. match_mps_for_votes will normalize case and a small set of variants (e.g., 'Geen stem' -> 'afwezig', 'Abstain' -> 'onthouden') — explicit list included in tests.
---
## BATCH 1: Foundation (parallel - N implementers)
All tasks in this batch have NO dependencies and can run simultaneously.
### Task 1.1: Add DB helpers to MotionDatabase
**File:** `database.py` (modify existing)
**Test:** `tests/test_match_mps.py`
**Depends:** none
Description / Acceptance criteria:
- Add two new public methods to MotionDatabase:
- match_mps_for_votes(user_votes: Dict[int, str], limit: int = 50) -> List[Dict]
- Returns an ordered list (desc by agreement_pct) of dicts with keys: mp_name, party, matched (int), overlap (int), agreement_pct (float 0-100).
- Behavior: for each mp present in mp_votes for any of the provided motions compute:
- overlap = number of motions where MP has a recorded vote AND the user provided a non-empty vote (i.e., not "Geen stem").
- matched = number of those overlaps where normalized(mp_vote) == normalized(user_vote).
- agreement_pct = matched / overlap * 100 rounded to 1 decimal. MPs with overlap==0 are excluded from the returned list.
- Ordering: agreement_pct desc, then matched desc, then mp_name asc.
- choose_discriminating_motions(candidates: List[str], excluded_motion_ids: List[int], k: int = 1) -> List[int]
- For the provided candidate mp_names, compute vote distributions per motion (voor/tegen/onthouden/afwezig) excluding motion ids in excluded_motion_ids.
- Score each motion by Shannon entropy of the distribution among the candidate MPs (treating 'afwezig' as a separate bucket). Higher entropy preferred.
- Return top-k motion ids as a list, tiebreakers: higher controversy_score (motions table) then lower motion id.
Implementation notes & decisions:
- Implement normalization inside these methods. Normalization mapping (DB vote -> canonical): map DB votes lowercased to one of {'voor','tegen','onthouden','afwezig'}. UI inputs (Voor/Tegen/Onthouden/Geen stem) normalized to these same tokens.
- For performance, implement SQL queries that select mp_votes filtered by motion_id IN (...) and mp_name IN (candidates) and aggregate via GROUP BY mp_name and vote. For small candidate sets and a limited set of motion_ids this will be fast. If duckdb is not available, fall back to in-Python aggregates using the file-backed JSON format already present in MotionDatabase._init_database.
- Add docstrings and basic parameter validation (raise ValueError for empty user_votes or empty candidates input). Tests will cover expected exceptions.
Test outline (tests/test_match_mps.py):
- Setup: create a temporary MotionDatabase using a temp db_path (MotionDatabase.reset_database() can be used if duckdb available; otherwise use file-backed mode). Insert a small set of motions and mp_votes via insert_motion / insert_mp_vote. Create at least 3 MPs with overlapping but distinct vote patterns across 4-6 motions.
- Tests:
1) test_match_basic_counts: user_votes covering 3 motions returns expected matched/overlap/agreement_pct per MP.
2) test_match_excludes_zero_overlap: MPs with no recorded votes for provided motions are excluded.
3) test_choose_discriminating_motions_entropy_ranking: with a small candidate set, the chosen motion(s) split candidates as expected (assert returned motion id is one of known good splitters)
4) test_invalid_input: calling match_mps_for_votes with empty user_votes raises ValueError.
Verify: `pytest -q tests/test_match_mps.py`
Commit message: `feat(database): add match_mps_for_votes and choose_discriminating_motions`
Estimated time: 3.0 - 4.5 hours
---
### Task 1.2: Add plan file (this document)
**File:** `thoughts/shared/plans/2026-03-24-welk-tweede-kamerlid-ben-jij-plan.md` (this file)
**Test:** none
**Depends:** none
Description: Add the implementation plan (this document) to the repo to provide step-by-step microtasks to implementers. No tests.
Verify: visually review file in repo. No test run.
Commit message: `docs(plans): add plan for 'Welk tweede kamerlid ben jij?'`
Estimated time: 0.25 - 0.5 hours
---
## BATCH 2: Core UI (parallel - depends on Batch 1)
All tasks in this batch assume the DB methods from Task 1.1 exist.
### Task 2.1: Add Streamlit quiz tab & wiring
**File:** `explorer.py` (modify existing)
**Test:** `tests/test_explorer_quiz.py`
**Depends:** 1.1
Description / Acceptance criteria:
- Add a function `build_mp_quiz_tab(db_path: str) -> None` placed near other build_*_tab functions (as described in the design, e.g., after build_svd_components_tab or near the top of the tab builders). The function must:
- Render a short intro/instructions.
- Load an initial pool of candidate motions using existing `load_motions_df(db_path)` and pick a seed set (top N by controversy_score). Decision: seed N = 8 (configurable constant in the function: SEED_MOTIONS = 8) — this is small and fast.
- Present questions one at a time: show motion title + layman_explanation (if available) and a radio with choices: "Voor", "Tegen", "Onthouden", "Geen stem" and a "Skip"/"Niet zeker" optional button mapped to "Geen stem". Choice stored to `st.session_state['mp_quiz_votes']` as mapping with keys str(motion_id) -> UI token.
- After each answer, call MotionDatabase.match_mps_for_votes(user_votes) to fetch ranked candidates and display a small DataFrame (top 10) with columns: MP name, party, matched, overlap, agreement_pct. Use st.dataframe.
- If more than 1 candidate remains with top agreement_pct tied, call MotionDatabase.choose_discriminating_motions(candidates, excluded_motion_ids) to pick the next question to ask and continue until one unique MP remains or choose_discriminating_motions returns an empty list (tie / indistinguishable). Cap total questions at 20 (SESS_CAP = 20).
- When unique MP is found (agreement_pct == 100 and overlap>0 and only one MP with highest agreement), show final MP summary (name, party) and their matching motions count.
- Use caching: wrap any repeated DB lookups (e.g., load_motions_df already cached) and mark heavy updates via @st.cache_data where appropriate.
Implementation notes & decisions:
- Keep all UI state local to st.session_state with keys prefixed `mp_quiz_` to avoid collisions.
- Normalize UI tokens before sending to DB helper (but DB methods will also normalize; duplication is defensive).
- Keep the UI function self-contained in explorer.py (do not create new modules for this minimal MVP).
Test outline (tests/test_explorer_quiz.py):
- Use monkeypatching to inject a MotionDatabase mock into explorer module or run in a test DB using MotionDatabase with temp db_path. The test must be import-safe (explorer.py imports many heavy libs), so follow pattern used by existing tests/test_explorer_import.py: import the module and assert `build_mp_quiz_tab` exists and is callable.
- Functional assertions:
1) test_builder_exists: import explorer, assert callable(build_mp_quiz_tab)
2) test_ui_state_update_simulation: simulate st.session_state by creating a fake session dict (use monkeypatch to set st.session_state to a dict-like object) and calling build_mp_quiz_tab with a small temp DB where motions and mp_votes are prepared. Assert that after calling the builder with pre-filled votes the DataFrame block would display ranked candidates (test inspects returned structure if builder returns it, or else monkeypatch MotionDatabase.match_mps_for_votes to verify it was called with expected mapping).
Verification: `pytest -q tests/test_explorer_quiz.py`
Commit message: `feat(ui): add 'Welk tweede kamerlid ben jij?' tab and wiring in explorer.py`
Estimated time: 2.0 - 4.0 hours
---
## BATCH 3: Integration & Tests (parallel - depends on Batches 1+2)
### Task 3.1: Add integration test for quiz flow
**File:** `tests/test_explorer_quiz_integration.py`
**Test:** this file
**Depends:** 1.1, 2.1
Description / Acceptance criteria:
- Create an end-to-end-ish headless test that:
- Sets up a temporary MotionDatabase instance (temp file path) and inserts a small controlled dataset: ~6 motions, 4 MPs with distinct votes.
- Calls build_mp_quiz_tab via explorer with monkeypatched st.session_state (or with a minimal wrapper) and simulates a sequence of user answers by pre-populating st.session_state['mp_quiz_votes'].
- Asserts that final candidate set matches expectations: either a unique MP (when answers match exactly one MP) or that the function properly identifies indistinguishable MPs (when two MPs have identical votes).
Testing details & choices:
- Avoid launching Streamlit server; tests only import explorer module and call the builder function in the same way other explorer tests do. Use monkeypatch to stub expensive functions (plotly, query_similar) where required.
Verify: `pytest -q tests/test_explorer_quiz_integration.py`
Commit message: `test(ui): add integration tests for mp quiz tab flow`
Estimated time: 2.0 - 3.0 hours
---
## Verification & CI
- Local verification commands (per task) use pytest. Example:
- `pytest -q tests/test_match_mps.py`
- `pytest -q tests/test_explorer_quiz.py`
- `pytest -q tests/test_explorer_quiz_integration.py`
- CI expectations: run full test suite. The new tests should be lightweight and use temporary DBs / monkeypatching to avoid depending on large production DB.
---
## Commit & PR Strategy
- Work in a feature branch `feat/mp-quiz-2026-03-24`.
- Make small focused commits per task (messages suggested above). Each micro-task should be one commit.
- PR organization:
- PR #1 (Batch 1): database.py changes + tests/test_match_mps.py — target only DB helpers and their unit tests. Keep this PR small so backend logic can be reviewed independently.
- PR #2 (Batch 2): explorer.py UI builder + tests/test_explorer_quiz.py — depends on PR #1; rebase after PR #1 merges or open as stacked PR (base=feat/mp-quiz-2026-03-24).
- PR #3 (Batch 3): integration+polish tests (tests/test_explorer_quiz_integration.py) and any small fixes discovered during integration testing.
- Review checklist for each PR:
- Tests covering edge cases (zero-overlap MPs, empty inputs)
- DB queries use read_only DuckDB connections
- UI uses st.session_state and @st.cache_data appropriately
- No production DB writes, no schema changes
---
## Risks & Mitigations (short)
- Performance: selecting motions across the entire motions table could be heavy. Mitigation: seed with top-N controversial motions and limit choose_discriminating_motions to motions that have mp_votes rows for the candidate MPs only.
- Data quality: MPs with identical votes will remain indistinguishable — surface clearly to user. Tests include that scenario.
---
## Task checklist for implementers (copy/paste friendly)
- [ ] Task 1.1: Modify database.py — implement match_mps_for_votes & choose_discriminating_motions. Add tests in tests/test_match_mps.py. (3.0–4.5h)
- [ ] Task 1.2: Add this plan file. (0.25–0.5h)
- [ ] Task 2.1: Modify explorer.py — add build_mp_quiz_tab and wire into tabs. Add tests in tests/test_explorer_quiz.py. (2.0–4.0h)
- [ ] Task 3.1: Add integration test tests/test_explorer_quiz_integration.py to exercise quiz flow. (2.0–3.0h)
---
If you run into ambiguous input normalization details or DB edge-cases, follow the choices documented above (explicit normalization mapping, exclude zero-overlap MPs, use entropy scoring). If you encounter a blocker (e.g. missing mp_votes data in test fixtures), create small test fixtures using MotionDatabase.insert_motion and insert_mp_vote in the test setup.
Good luck — keep PRs small and tests fast.

@ -1,162 +0,0 @@
# Deployment Plan: motief.sgeboers.nl
**Date:** 2026-03-26
**Subdomain:** `motief.sgeboers.nl`
**Stack:** Streamlit · uv · systemd · Nginx · Drone CI
**Target:** VPS, `webapps` user at `/home/webapps/motief/`
---
## Already done ✅
- VPS directory `/home/webapps/motief/data/` created
- `motions.db` uploaded to VPS
- nginx vhost configured for `motief.sgeboers.nl`
- TLS cert via certbot
---
## Step A — Install uv on VPS
SSH in as `webapps`:
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
source $HOME/.cargo/env # or re-login
uv --version # verify
```
---
## Step B — Clone repo and install dependencies
```bash
cd /home/webapps
git clone <your-gitea-url>/sgeboers/stemwijzer motief
cd motief
uv sync
```
The `motions.db` you already uploaded should live at:
```
/home/webapps/motief/data/motions.db
```
---
## Step C — Create systemd user service
Create `~/.config/systemd/user/motief.service`:
```ini
[Unit]
Description=motief.sgeboers.nl Streamlit app
After=network.target
[Service]
WorkingDirectory=/home/webapps/motief
ExecStart=/home/webapps/.local/bin/uv run streamlit run Home.py --server.port=8501 --server.headless=true
Restart=on-failure
RestartSec=5
[Install]
WantedBy=default.target
```
Enable and start:
```bash
systemctl --user daemon-reload
systemctl --user enable motief
systemctl --user start motief
systemctl --user status motief # verify it's running
```
Enable linger so the service survives logout:
```bash
# Needs sudo (once only)
sudo loginctl enable-linger webapps
```
---
## Step D — Configure Drone secrets
In `drone.sgeboers.nl``sgeboers/stemwijzer`**Settings → Secrets**:
| Secret | Value |
|--------|-------|
| `DEPLOY_HOST` | VPS hostname or IP |
| `DEPLOY_SSH_PORT` | `22` (or custom) |
| `DEPLOY_USER` | `webapps` |
| `DEPLOY_PASSWORD` | webapps SSH password |
---
## Step E — First auto-deploy
```bash
git push origin main
```
Drone will SSH in and run:
```bash
cd /home/webapps/motief
git pull origin main
uv sync
systemctl --user restart motief
```
---
## Step F — Verify
```bash
# On VPS
systemctl --user status motief
journalctl --user -u motief -f
# From browser
open https://motief.sgeboers.nl
```
Checklist:
- [ ] Home.py loads with nav to Stemwijzer and Explorer
- [ ] Compass tab renders with correct party positions (GL-PvdA top-left, PVV bottom-right)
- [ ] SVD tab scree plot shows with highlighted top-2 bars
- [ ] Similarity search returns results
---
## Ongoing: data updates
The `scheduler.py` can be run as a separate user service or a cron job. To set it up as a service:
Create `~/.config/systemd/user/motief-scheduler.service`:
```ini
[Unit]
Description=motief scheduler (weekly pipeline)
After=network.target
[Service]
WorkingDirectory=/home/webapps/motief
ExecStart=/home/webapps/.local/bin/uv run python scheduler.py
Restart=on-failure
[Install]
WantedBy=default.target
```
---
## Dependency order
```
A (install uv) ─┐
B (clone + sync) ─┤─► C (systemd service) ─► E (push to main) ─► F (verify)
└─► D (Drone secrets) ────┘
```
Total estimated time: **20 minutes**.

@ -1,269 +0,0 @@
# Rewrite @ansible package for npm publish — Implementation Plan
**Goal:** Implement a publishable npm-scoped example package at packages/@ansible/example, add guarded GitHub Actions publish and deploy workflows (publish on v* tags or manual_dispatch; deploy to motief.sgeboers.nl via user webapps), CI pack-tests, and documentation for embedding and deployment secrets.
**Design:** see thoughts/shared/designs/2026-03-28-rewrite-ansible-package-design.md
Author: Sven Geboers
---
## Dependency Graph
```
Batch 1 (parallel): 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7
Batch 2 (parallel): 2.1, 2.2, 2.3, 2.4 [depends on Batch 1]
```
---
## Batch 1: Foundation (parallel - independent files)
All tasks in this batch have NO dependencies and can be created in parallel.
### Task 1.1: package.json for package
**File:** `packages/@ansible/example/package.json`
**Summary:** New package.json implementing an npm-scoped package @ansible/example. Exact choices made:
- name: "@ansible/example"
- version: "0.1.0"
- author: "Sven Geboers"
- publishConfig.access: "public"
- files: ["src/**", "README.md", "package.json"] (controls published files, avoids .npmignore)
- scripts:
- test: `node tests/run.js`
- pack-inspect: `node tests/test_pack_inspect.js`
- prepublish:verify: `npm run pack-inspect` (keeps a named script for CI)
- pack: `npm pack`
**Tests to add:** none in this file; tests below will exercise package.json
**Verify:**
- Local: cd packages/@ansible/example && node -e "console.log(require('./package.json').name)" # should print @ansible/example
- Run package tests: cd packages/@ansible/example && npm test
**Effort:** 0.5h
### Task 1.2: README.md
**File:** `packages/@ansible/example/README.md`
**Summary:** Short README with package purpose, usage example, and author attribution line "Author: Sven Geboers". Include publish and deploy notes and reference to docs/*.md for deployment details.
**Tests to add:** none
**Verify:** open file or run: sed -n '1,40p' packages/@ansible/example/README.md
**Effort:** 0.5h
### Task 1.3: package entrypoint
**File:** `packages/@ansible/example/src/index.js`
**Summary:** Minimal, well-documented CommonJS module that exports a function used by tests. Keep runtime trivial (e.g., function hello(name){ return `hello ${name}` }). No dependencies.
**Tests to add:** used by unit test below
**Verify:** node -e "console.log(require('./packages/@ansible/example/src/index.js')('world'))"
**Effort:** 0.5h
### Task 1.4: unit test — package.json fields
**File:** `packages/@ansible/example/tests/test_package_json.js`
**Summary:** Node test that loads package.json and asserts required fields are present: name === "@ansible/example", version present, author === "Sven Geboers", publishConfig.access === "public", files array exists.
**Tests to add:** this is the test file
**Verify:** cd packages/@ansible/example && node tests/test_package_json.js (exit 0 on success)
**Effort:** 0.5h
### Task 1.5: pack-inspect test
**File:** `packages/@ansible/example/tests/test_pack_inspect.js`
**Summary:** Test that runs `npm pack` (in package dir) programmatically, captures the produced tarball name, asserts the tarball exists and contains package/package.json. Implementation notes: uses child_process.execSync and `tar -xOzf` to read package/package.json from the tarball and assert name/version match. This test requires `tar` on the runner (Linux/macOS). If `tar` not available, the test fails with clear message.
**Tests to add:** this is the test
**Verify:** cd packages/@ansible/example && node tests/test_pack_inspect.js
**Effort:** 1.0h
### Task 1.6: tiny package test runner
**File:** `packages/@ansible/example/tests/run.js`
**Summary:** Small node test harness that runs both test_package_json.js and test_pack_inspect.js, prints nice output, and returns non-zero on failure. Used by package.json test script to keep CI independent of external test runners.
**Tests to add:** none (this is the runner used by `npm test`)
**Verify:** cd packages/@ansible/example && node tests/run.js
**Effort:** 0.5h
### Task 1.7: pack-inspect helper (optional small script)
**File:** `packages/@ansible/example/tests/_pack_helpers.js`
**Summary:** Small utility used by test_pack_inspect.js to encapsulate npm pack and tar inspection logic. Keeps main test readable. (Kept internal/private to tests.)
**Tests to add:** none
**Verify:** run the tests which import it
**Effort:** 0.25h
---
## Batch 2: CI + Docs (parallel — depend on Batch 1)
All tasks in this batch depend on the package files being present (Batch 1).
### Task 2.1: GitHub Actions publish workflow
**File:** `.github/workflows/publish-ansible-example.yml`
**Summary:** New Actions workflow that performs build/test/pack/publish for packages/@ansible/example.
**Key behavior:**
- Triggers: push tags matching `v*` and workflow_dispatch (manual).
- Jobs:
- verify: runs on all triggers: checks out repo, sets up Node 18 (LTS) using actions/setup-node, installs root-level dependencies if any (skipped if none), then runs `cd packages/@ansible/example && npm ci || true` (guard), then `npm test`. Then runs `npm pack` and verifies produced tarball (reuses test script). The verify job always runs and must pass before publish.
- publish: runs only when `github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')` OR when manually triggered AND `secrets.NPM_TOKEN` exists. publish job is gated by `if: ${{ secrets.NPM_TOKEN != '' && ( github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/tags/v') ) }}`.
- On publish: write ephemeral ~/.npmrc with `//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}` (use `echo` and file permissions 0600), run `npm publish --access public` from package directory, then securely delete ~/.npmrc (shred if available or overwrite and rm). Use Actions mask to avoid logging secrets.
**Secrets required:** NPM_TOKEN (string)
**Tests to add:** none (workflow file)
**Verify:**
- Locally simulate: cd packages/@ansible/example && npm test && npm pack
- In Actions: push a non-production tag on a test repository with a safe NPM_TOKEN that points to a test registry OR run workflow_dispatch with a dry-run branch (see dry-run below)
**Effort:** 1.5h
### Task 2.2: GitHub Actions deploy workflow
**File:** `.github/workflows/deploy-to-vps.yml`
**Summary:** Workflow to prepare and run deployment to external VPS motief.sgeboers.nl using user `webapps`. Triggers: push to main and workflow_dispatch. The job is non-destructive by default and will not run remote commands unless DEPLOY_SSH_KEY or DEPLOY_PASSWORD secrets are present. It includes a dry-run/verify job that checks SSH connectivity without executing commands that change state.
**Key behavior:**
- Inputs/secrets used (recommended names): DEPLOY_HOST (default motief.sgeboers.nl), DEPLOY_USER (default webapps), DEPLOY_SSH_PORT (default 22), DEPLOY_SSH_KEY (private key, optional), DEPLOY_PASSWORD (optional fallback), DEPLOY_PATH (path to deploy, optional)
- Jobs:
- dry-run-connect: attempts to verify connectivity. If DEPLOY_SSH_KEY present, create an ephemeral key file (0600), run `ssh -o BatchMode=yes -i $KEY -p $PORT $USER@$HOST 'echo connected'` and return success if the host echoes. This step is strictly a connection check (no file writes). If only DEPLOY_PASSWORD provided, the job will print instructions explaining the need for SSH key or use of a runner with sshpass (not recommended) and skip.
- deploy: guarded `if: steps.check_secrets.outputs.has_creds == 'true'` — only runs when credentials are present. Steps: checkout, optionally build artifacts, create ephemeral SSH key file, rsync or scp artifact to $USER@$HOST:$DEPLOY_PATH, run non-destructive remote commands (e.g., systemctl --user status <unit> or echo) only if DEPLOY_CONFIRM=true input is set. By default the job performs no destructive operations; it's expected operator sets inputs when ready.
**Secrets required (for actual deploy):** DEPLOY_SSH_KEY (preferred), OR DEPLOY_PASSWORD (less secure, not recommended)
**Secrets recommended / env:** DEPLOY_HOST (default motief.sgeboers.nl), DEPLOY_USER=webapps, DEPLOY_SSH_PORT=22, DEPLOY_PATH=/home/webapps/motief (example)
**Tests to add:** none (workflow file)
**Verify:**
- Run dry-run via workflow_dispatch on Actions to verify connectivity (without running deploy). Ensure secrets are set in repository settings.
**Effort:** 2.0h
### Task 2.3: docs — deployment and Drone note
**File:** `docs/deployment/ansible-package-deploy.md`
**Summary:** Documentation describing: default host motief.sgeboers.nl, recommended DEPLOY_USER webapps, how to add GitHub repo secrets (DEPLOY_SSH_KEY, DEPLOY_HOST, DEPLOY_USER, DEPLOY_SSH_PORT), sample systemd unit name recommendation (e.g., service name `motief`), and instructions for users preferring Drone: how to set equivalent secrets in Drone and a note that .drone.yml is left untouched.
**Tests to add:** none
**Verify:** open file or grep keywords DEPLOY_SSH_KEY, motief.sgeboers.nl
**Effort:** 0.75h
### Task 2.4: docs — embeddings and environment variables
**File:** `docs/embeddings.md`
**Summary:** Document that the project uses QWEN embeddings (qwen/qwen3-embedding-4b via OpenRouter), recommend setting OPENROUTER_API_KEY in repo/host secrets, and mention OPENAI_API_KEY as optional fallback. Include example env var usage and security guidance.
**Tests to add:** none
**Verify:** open file and confirm environment variables documented
**Effort:** 0.5h
---
## CI workflow outlines (YAML-level steps in prose)
Publish workflow (.github/workflows/publish-ansible-example.yml):
- on:
- push: tags: ['v*']
- workflow_dispatch
- jobs:
- verify:
- runs-on: ubuntu-latest
- steps:
1. actions/checkout@v4
2. actions/setup-node@v4 (node-version: '18') with cache: 'npm'
3. echo current package info (for debugging) but do NOT print secrets
4. cd packages/@ansible/example && npm ci --no-audit --prefer-offline || true
5. cd packages/@ansible/example && npm test (calls tests/run.js)
6. cd packages/@ansible/example && npm run pack-inspect
- artifacts: upload pack artifact for inspection (optional)
- publish:
- needs: verify
- runs-on: ubuntu-latest
- if: (github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/tags/v')) && secrets.NPM_TOKEN != ''
- steps:
1. checkout
2. setup-node
3. create ephemeral ~/.npmrc with content `//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}` using `run: printf` and set 0600
4. cd packages/@ansible/example && npm publish --access public
5. securely remove ~/.npmrc (overwrite if shred available then rm)
- secrets required: NPM_TOKEN
- guard rails: job will not run if NPM_TOKEN is empty; Actions expressions used to gate execution
Deploy workflow (.github/workflows/deploy-to-vps.yml):
- on:
- push: branches: [ main ]
- workflow_dispatch
- inputs (for manual dispatch): target_branch, confirm_deploy (boolean)
- jobs:
- dry-run-connect:
- runs-on: ubuntu-latest
- steps:
1. checkout
2. set env from secrets (DEPLOY_HOST=motief.sgeboers.nl default)
3. if secrets.DEPLOY_SSH_KEY set: write ephemeral key file with 0600 and run ssh -o BatchMode=yes -p $DEPLOY_SSH_PORT $DEPLOY_USER@$DEPLOY_HOST 'echo connected' (short timeout)
4. if no key present but DEPLOY_PASSWORD present: skip and print instructions to set key
- outcome: outputs.has_creds true/false for next job
- deploy:
- needs: dry-run-connect
- if: needs.dry-run-connect.outputs.has_creds == 'true' && github.event.inputs.confirm_deploy == 'true'
- steps:
1. checkout
2. build artifacts (if required)
3. create ephemeral key file and copy artifacts via rsync/scp to $DEPLOY_USER@$DEPLOY_HOST:$DEPLOY_PATH
4. optionally run remote non-destructive checks (e.g., tail logs, systemctl --user status motief)
- secrets required for actual deploy: DEPLOY_SSH_KEY or DEPLOY_PASSWORD
Security considerations for workflows:
- Never echo secrets. Use Actions' built-in mask and avoid printing environment variables that contain secrets.
- Create ephemeral ~/.npmrc and remove it immediately. Use file permissions 0600.
- Create ephemeral SSH key files with 0600 and remove them at the end of the job.
---
## Rollback / Undo plan for risky changes
- Publish workflow added: to rollback, remove or disable `.github/workflows/publish-ansible-example.yml` and push a commit. If a package was accidentally published, use npm unpublish only if within npm policy window (careful: unpublish can harm consumers) — prefer deprecating the published version via `npm deprecate @ansible/example@x.y.z "do not use"`.
- Deploy workflow added: to rollback, remove/disable `.github/workflows/deploy-to-vps.yml`. If deployment ran, have a documented process on the VPS to rollback the app (e.g., systemd service `motief` reversion, keep previous release tarball and symlink rollback). Document in deploy docs how to revert symlink/munpack and restart systemd.
- Docs changed: simply revert doc files and commit.
Notes on npm unpublish: prefer `npm deprecate` over unpublish in most cases. Unpublish should be used only with caution and awareness of npm registry policy.
---
## Final operator checklist (manual steps before running publish/deploy)
1. Add repository secrets in GitHub Settings > Secrets:
- NPM_TOKEN — a token with publish access to the @ansible scope (required for publish job)
- DEPLOY_SSH_KEY — (recommended) private SSH key for `webapps` user on motief.sgeboers.nl; set as secret (do NOT include passphrase unless CI knows how to handle it)
- Optionally DEPLOY_PASSWORD — password fallback (not recommended)
- DEPLOY_HOST — default: motief.sgeboers.nl (recommended to set explicitly)
- DEPLOY_USER — default: webapps
- DEPLOY_SSH_PORT — default: 22
- OPENROUTER_API_KEY — recommended for embeddings
- OPENAI_API_KEY — optional fallback
2. Ensure target VPS `motief.sgeboers.nl` has user `webapps` configured and an authorized public key corresponding to the DEPLOY_SSH_KEY private key.
3. Ensure the VPS has a systemd unit name prepared (recommendation: `motief.service`) and that deployment user `webapps` may write to the deploy path (e.g., /home/webapps/motief) and manage its own files. Documented in `docs/deployment/ansible-package-deploy.md`.
4. (Optional) If you prefer Drone CI: set Drone secrets for NPM_TOKEN and DEPLOY_* equivalently; .drone.yml remains untouched and you can run publish steps in Drone if you adapt the template.
5. Verify local tests: from repo root run:
- cd packages/@ansible/example && npm test
- cd packages/@ansible/example && npm pack && tar -tzf <produced-tgz> | head -n 20
6. For publish: create a tag `git tag v0.1.0` (or desired semver) and push the tag. The publish workflow will run and attempt publish if NPM_TOKEN secret is present.
7. For deploy: run the deploy workflow_dispatch after setting DEPLOY_SSH_KEY and testing dry-run connectivity via the dry-run job.
---
## Decisions & Assumptions
- Package name chosen: `@ansible/example` per design doc. Changeable later by editing package.json and tag.
- Default version set to `0.1.0` to indicate initial publish candidate.
- Testing uses a zero-dependency Node test harness (simple node scripts) to avoid introducing a test framework and to keep CI minimal.
- SSH key auth chosen as default for deployment because it's more secure and scriptable in CI than password auth. SSH keys avoid exposing passwords in secrets logs and work with `ssh -o BatchMode=yes` checks. We recommend DEPLOY_SSH_KEY; DEPLOY_PASSWORD is supported as a fallback but not recommended.
- Workflows are deliberately gated: publish runs only on tags `v*` or manual dispatch, and publish job requires NPM_TOKEN secret. Deploy will not execute destructive commands unless manual confirm is provided in workflow_dispatch inputs.
- We will not modify existing `.drone.yml` as requested; docs will describe how to set Drone secrets if the operator prefers Drone.
---
## Staged execution order
1. Batch 1 (create package files & tests): tasks 1.1 — 1.7 (all parallel if multiple implementers available)
2. Batch 2 (CI & docs): tasks 2.1 — 2.4 (parallel after Batch 1 complete)
---
## Estimates summary
- Batch 1 total: ~3.75h
- Batch 2 total: ~4.75h
- Grand total: ~8.5h (approx)
---
Write this plan to: `thoughts/shared/plans/2026-03-28-rewrite-ansible-package.md`
Done.

@ -1,89 +0,0 @@
---
date: 2026-03-30
topic: "compass-trajectory-consistency"
status: draft
---
# Implementation Plan — Compass ↔ Trajectory Consistency
This plan implements the validated design (thoughts/shared/designs/2026-03-30-compass-trajectory-consistency-design.md) with the following firm constraints from the user:
- Use per-window MP-centroid party coordinates as the canonical source for components 1 & 2
- When a party has no MPs in a window, use the first chronological party vector as fallback
- **Update all callers** to the new explicit API; do NOT keep backward compatibility shims
## Goal
Make the political compass numeric values identical to trajectory centroids for SVD components 1 and 2 by passing explicit per-party (x,y) coordinates (computed from positions_by_window) to the compass renderer and updating all callers to use that API.
## Micro-tasks (ordered, small, actionable)
All tasks assume a development branch and running tests locally. Each task should be one commit.
1) Add explorer_helpers.py (pure helper)
- Create compute_party_coords(positions_by_window, party_map, window_id, fallback_party_scores=None)
- Returns (party_coords: Dict[str,(x,y)], fallback_used: Set[str])
- Unit tests: tests/test_explorer_helpers.py
- Estimate: 2.0h
2) Update explorer.py to the new strict API
- Replace _build_party_axis_figure to accept only explicit party_coords for comp_sel 1 & 2.
- Remove old polymorphic/legacy path; callers must pass party_coords or raise a clear error.
- Update rendering glue to call _build_party_axis_figure with explicit party_coords.
- Ensure hover text shows fallback notes for parties where fallback_used contains the party.
- Update/clean Streamlit caption behavior when no coords available.
- Tests: modify tests/test_explorer_chart.py to supply party_coords shape and assert behavior.
- Estimate: 4.5h
3) Update all callers across repo to pass explicit party_coords
- Grep for places that previously passed party vectors into _build_party_axis_figure or used load_party_axis_scores for compass rendering.
- Update each call site to compute party_coords via compute_party_coords, passing the fallback_party_scores (first-chronological vector) when needed.
- Caller list (non-exhaustive — verify with repo search):
- explorer.build_svd_components_tab
- explorer._render_party_axis_chart (if present)
- any scripts or tests that directly call _build_party_axis_figure
- Update tests referencing legacy vector shape.
- Estimate: 3.0h
4) Add integration consistency test
- tests/test_compass_trajectory_consistency.py — synthetic positions_by_window and party_map to assert compute_party_coords equals centroid computations used by trajectories.
- Estimate: 1.0h
5) Run full test suite and fix regressions
- Run pytest; address failures introduced by strict API change.
- If other modules relied on old shape in ways not covered by tests, update them to use compute_party_coords.
- Estimate: 1.5h
6) Manual QA
- Run streamlit run explorer.py and visually verify compass tooltips and trajectories hover values match (comps 1 & 2) for several parties and windows.
- Verify fallback tooltip and logger WARN when a party uses fallback vector.
- Estimate: 1.0h
7) Commit and push (or open PR) with description:
"feat(explorer): use explicit per-party (x,y) coords from positions_by_window for compass (components 1 & 2); update callers and add tests"
- Estimate: 0.5h
## Verification commands
- Unit tests:
- python -m pytest tests/test_explorer_helpers.py
- python -m pytest tests/test_explorer_chart.py
- python -m pytest tests/test_compass_trajectory_consistency.py
- Full test suite:
- python -m pytest
- Manual UI:
- streamlit run explorer.py
## Rollback and mitigation
- If the strict API uncovers many call sites, revert to a temporary feature branch, document call sites, and migrate them in smaller patches.
- Keep commits small and self-contained to ease review.
## Notes
- This plan follows the user's instruction to update all callers and to use the first chronological party vector as fallback.
- The helper is pure Python to keep tests simple; callers may cache if needed.

@ -1,383 +0,0 @@
# Diagnose no-plot trajectories Implementation Plan
**Goal:** Add an opt-in debug mode for the Trajectories tab that surfaces runtime early-returns and swallowed exceptions so we can diagnose why no Plotly chart is shown.
**Architecture:** Minimal, reversible instrumentation inside explorer.py and explorer_helpers.py. Add an opt-in UI toggle (checkbox + EXPLORER_DEBUG_TRAJECTORIES env var), extend the existing diagnostics/inspector helper to surface additional samples/counts, un-silence broad excepts to log exceptions and capture tracebacks into a diagnostics object accessible to tests and the UI (when debug enabled).
**Design:** thoughts/shared/designs/2026-03-30-diagnose-no-plot-trajectories-design.md
---
## Dependency Graph
```
Batch 1 (parallel): 1.1, 1.2 [foundation - no deps]
Batch 2 (parallel): 2.1 [core - depends on batch 1]
```
---
## Batch 1: Foundation (parallel - 2 implementers)
All tasks in this batch have NO dependencies and run simultaneously.
### Task 1.1: Extend diagnostics inspector
**File:** `explorer_helpers.py` (modify function `inspect_positions_for_issues`)
**Test:** `tests/test_explorer_helpers_diagnostics.py`
**Depends:** none
Purpose: add compact, structured diagnostics (mp_positions_sample, mp_positions_count, windows_with_no_positions) to the existing inspector output so both UI and tests can consume them.
Implementation decisions (gap-filling):
- Keep the function import-safe and pure (no Streamlit calls). Return additional keys under the same dict.
- Provide small, deterministic samples (sorted keys limited to 10) so tests are stable.
Estimate: 45-90 minutes
Verify: `pytest -q tests/test_explorer_helpers_diagnostics.py`
```python
# COMPLETE test code - tests/test_explorer_helpers_diagnostics.py
import numpy as np
from explorer_helpers import inspect_positions_for_issues
def test_inspect_positions_for_issues_basic():
positions_by_window = {
"w1": {"mp1": (1.0, 2.0), "mp2": (float('nan'), float('nan'))},
"w2": {},
}
party_map = {"mp1": "P1"}
d = inspect_positions_for_issues(positions_by_window, party_map)
# basic keys still present
assert d["windows_count"] == 2
assert isinstance(d["mp_id_set"], set)
# new diagnostics
assert "mp_positions_count" in d
assert d["mp_positions_count"] >= 1
assert "mp_positions_sample" in d
assert isinstance(d["mp_positions_sample"], list)
assert "windows_with_no_positions" in d
assert isinstance(d["windows_with_no_positions"], list)
```
```python
# COMPLETE implementation - explorer_helpers.py (function replacement)
def inspect_positions_for_issues(
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
party_map: Dict[str, str],
) -> Dict[str, Any]:
"""Inspect positions_by_window for simple issues/summary.
Returns a dictionary with keys including the previous ones (windows_count,
window_labels, mp_id_set, party_map_count, parties_with_centroid_counts,
mismatched_mp_ids_sample) plus:
- mp_positions_count: int (num unique MP ids seen)
- mp_positions_sample: list[str] (sorted sample up to 10)
- windows_with_no_positions: list[str]
This helper remains pure and import-safe so unit tests can exercise it.
"""
windows = list(positions_by_window.keys())
windows_count = len(windows)
window_labels = sorted(windows)[:10]
mp_id_set: Set[str] = set()
parties_with_centroid_counts: Dict[str, int] = {}
mismatched: Set[str] = set()
windows_with_no_positions: List[str] = []
for win, pos in positions_by_window.items():
if not pos:
windows_with_no_positions.append(win)
continue
present_parties: Set[str] = set()
for ent in pos.keys():
if not ent:
continue
mp_id_set.add(ent)
party = party_map.get(ent)
if party is None:
# try stripping paren variant
party = party_map.get(_strip_paren(ent))
if party:
present_parties.add(party)
else:
mismatched.add(ent)
for p in present_parties:
parties_with_centroid_counts[p] = parties_with_centroid_counts.get(p, 0) + 1
mismatched_mp_ids_sample = sorted(list(mismatched))[:10]
mp_positions_sample = sorted(list(mp_id_set))[:10]
mp_positions_count = len(mp_id_set)
return {
"windows_count": windows_count,
"window_labels": window_labels,
"mp_id_set": mp_id_set,
"party_map_count": len(party_map),
"parties_with_centroid_counts": parties_with_centroid_counts,
"mismatched_mp_ids_sample": mismatched_mp_ids_sample,
"mp_positions_sample": mp_positions_sample,
"mp_positions_count": mp_positions_count,
"windows_with_no_positions": windows_with_no_positions,
}
```
Commit: `feat(explorer): extend diagnostic inspector to surface mp samples/counts`
---
### Task 1.2: Add tests and small helper for reading debug env var
**File:** `explorer.py` (add function `get_debug_trajectories_enabled`) **-- part of batch 2 core but small and independent**
**Test:** `tests/test_debug_flag.py`
**Depends:** none
Purpose: provide a single, testable helper that reads EXPLORER_DEBUG_TRAJECTORIES env var and returns a boolean. We use this consistently in UI code so tests can manipulate debug mode via env var.
Decision: implement conservative parsing ("1", "true", "True") as truthy. This function will be used by build_trajectories_tab and tests.
Estimate: 15-30 minutes
Verify: `pytest -q tests/test_debug_flag.py`
```python
# COMPLETE test code - tests/test_debug_flag.py
import os
import importlib
def test_get_debug_flag_on(monkeypatch):
monkeypatch.setenv("EXPLORER_DEBUG_TRAJECTORIES", "1")
import explorer
importlib.reload(explorer)
assert explorer.get_debug_trajectories_enabled() is True
def test_get_debug_flag_off(monkeypatch):
monkeypatch.delenv("EXPLORER_DEBUG_TRAJECTORIES", raising=False)
import explorer
importlib.reload(explorer)
assert explorer.get_debug_trajectories_enabled() is False
```
```python
# COMPLETE implementation to add into explorer.py
def get_debug_trajectories_enabled() -> bool:
"""Return whether the Trajectories debug mode is enabled via env var.
Truthy values: "1", "true", "True". Default False.
"""
val = os.getenv("EXPLORER_DEBUG_TRAJECTORIES", "")
return val in ("1", "true", "True")
```
Commit message: `chore(explorer): add get_debug_trajectories_enabled helper`
---
## Batch 2: Core Modules (parallel - 1 implementer)
These tasks depend on changes in Batch 1 (inspector additions and debug-flag helper). All tasks in this batch modify `explorer.py` (single-file microtask) and have a single test file.
### Task 2.1: Instrument trajectories UI and un-silence exceptions
**File:** `explorer.py` (update `select_trajectory_plot_data` exception handling, update `build_trajectories_tab` early-return instrumentation and try/except, add module-level diagnostics capture)
**Test:** `tests/test_diagnose_no_plot_trajectories.py`
**Depends:** 1.1, 1.2
Purpose: (A) Add opt-in debug UI binding to env var via checkbox and a DEBUG expander; (B) change helper-call swallow to log exceptions and include traceback in diagnostics; (C) instrument early-return gates (no positions, no mp_positions) to capture the reason and attach it to module-level diagnostics; (D) expose diagnostics to tests via attributes so tests can assert they were produced.
Decisions / gap-fills:
- Do not change public function signatures. To expose diagnostics to tests without changing signatures, set attributes on the function and module:
- select_trajectory_plot_data._last_diagnostics -> last inspector summary
- explorer._last_diagnostics -> diagnostics captured by build_trajectories_tab (early-returns or exceptions)
- Always call logger.exception(...) when an exception happens to preserve logs.
- Only call Streamlit UI functions to display tracebacks when debug mode is enabled.
Estimate: 2-4 hours
Verify: `pytest -q tests/test_diagnose_no_plot_trajectories.py`
```python
# COMPLETE test code - tests/test_diagnose_no_plot_trajectories.py
import traceback
import importlib
import explorer
from types import SimpleNamespace
def test_select_helper_exception_is_captured(monkeypatch):
# Force the inspector to raise and ensure diagnostics capture the traceback
def _boom(*a, **k):
raise RuntimeError("boom-inspector")
monkeypatch.setattr("explorer_helpers.inspect_positions_for_issues", _boom)
# call helper
fig, count, banner = explorer.select_trajectory_plot_data({}, {}, [], [])
# diagnostics should be attached to the function
d = getattr(explorer.select_trajectory_plot_data, "_last_diagnostics", None)
assert d is not None
assert "inspector_exception" in d
assert "boom-inspector" in d["inspector_exception"]
def test_build_trajectories_tab_early_return_sets_diagnostics(monkeypatch):
# Make load_positions return empty positions to trigger early return
monkeypatch.setattr(explorer, "load_positions", lambda db, ws: ({}, None))
# Ensure debug mode enabled via env var
monkeypatch.setenv("EXPLORER_DEBUG_TRAJECTORIES", "1")
importlib.reload(explorer)
# Call the tab builder (uses dummy Streamlit in tests)
explorer.build_trajectories_tab("/fake.db", "2025")
d = getattr(explorer, "_last_diagnostics", None)
assert d is not None
assert d.get("reason") == "no_positions"
```
```python
# COMPLETE implementation snippets to apply to explorer.py
import traceback
# Add near top-level (after imports in explorer.py)
_last_diagnostics: Optional[dict] = None
def get_debug_trajectories_enabled() -> bool:
val = os.getenv("EXPLORER_DEBUG_TRAJECTORIES", "")
return val in ("1", "true", "True")
# Replace the small inspector try/except in select_trajectory_plot_data with the
# following (complete function shown below replaces the existing select_trajectory_plot_data
# definition in explorer.py):
def select_trajectory_plot_data(
positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
party_map: Dict[str, str],
windows: List[str],
selected_parties: List[str],
smooth_alpha: float = 0.35,
mp_fallback_count: Optional[int] = None,
) -> Tuple[go.Figure, int, Optional[str]]:
"""Return (fig, trace_count, banner_text).
Helper used by build_trajectories_tab. Does not call Streamlit.
"""
if mp_fallback_count is None:
try:
mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20"))
except Exception:
mp_fallback_count = 20
# Compute per-party centroids aligned to windows
party_centroids, meta = compute_party_centroids(
positions_by_window, party_map, windows
)
# Use inspector to collect diagnostics (import-safe, pure helper).
try:
inspector_summary = inspect_positions_for_issues(positions_by_window, party_map)
except Exception as e:
# Do not silently swallow: log and capture traceback text so tests / UI
# can inspect it. Keep function import-safe (no Streamlit here).
tb = traceback.format_exc()
logger.exception("inspect_positions_for_issues failed: %s", e)
inspector_summary = {"inspector_exception": tb}
# expose diagnostics for tests without changing function signature
setattr(select_trajectory_plot_data, "_last_diagnostics", inspector_summary)
logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary)
# ... rest of the original function remains unchanged (build fig/trace_count)
# (Implementation note: keep the rest identical to existing function.)
# Now update the call-site in build_trajectories_tab (replace the try/except around
# select_trajectory_plot_data invocation with the following snippet):
try:
fig2, trace_count2, banner_text = select_trajectory_plot_data(
positions_by_window, party_map, windows, selected_parties, smooth_alpha
)
if fig2 is not None:
fig = fig2
trace_count = trace_count2
if banner_text:
st.caption(banner_text)
except Exception as e:
# Do not silently pass. Log, capture traceback and (when debug enabled)
# surface to Streamlit.
tb = traceback.format_exc()
logger.exception("select_trajectory_plot_data raised: %s", e)
global _last_diagnostics
_last_diagnostics = {"build_exception": tb}
if get_debug_trajectories_enabled():
try:
st.exception(e)
except Exception:
# Streamlit may not be available in test env; fall back to text_area
try:
st.text_area("Trajectories exception", tb)
except Exception:
pass
# Instrument early-return gates (example: when positions_by_window is empty) by
# setting _last_diagnostics before returning. Replace the current block:
if not positions_by_window:
st.warning("Geen positiedata beschikbaar.")
global _last_diagnostics
_last_diagnostics = {"reason": "no_positions", "inspector": {}}
if get_debug_trajectories_enabled():
# call inspector and attach diagnostics when debug enabled
try:
_last_diagnostics["inspector"] = inspect_positions_for_issues(positions_by_window, {})
except Exception:
_last_diagnostics["inspector"] = {"error": "inspector_failed"}
return
# Note: make similar instrumentation for the `if not mp_positions:` early return
# inside the per-MP fallback path: set _last_diagnostics = {"reason": "no_mp_positions"}
```
Notes for implementer:
- Insert the two helper functions and the try/except replacement in the appropriate places of explorer.py. The select_trajectory_plot_data replacement above should replace the function body; keep the unchanged plotting logic intact after the diagnostic area.
- Add the module-level _last_diagnostics variable near the top of explorer.py (after imports).
Commit: `feat(explorer): instrument trajectories with debug diagnostics and un-silence helper exceptions`
---
## Verification & Manual checks
- Run unit tests for the modified files:
- pytest -q tests/test_explorer_helpers_diagnostics.py
- pytest -q tests/test_debug_flag.py
- pytest -q tests/test_diagnose_no_plot_trajectories.py
- Manual: run Streamlit locally with EXPLORER_DEBUG_TRAJECTORIES=1 and inspect the "DEBUG" expander in the Trajectories tab to see the diagnostics block and any surfaced tracebacks.
---
## Rollback plan
- All changes gated behind debug env var and small: revert the two modified files (explorer.py, explorer_helpers.py) to previous commit to remove instrumentation.
- Because public signatures are unchanged, rollout/revert is safe.
---
## Appendix — quick implementer checklist
1. Implement inspector changes (explorer_helpers.py) and run its tests.
2. Add get_debug_trajectories_enabled helper and tests.
3. Modify explorer.py: add _last_diagnostics, update select_trajectory_plot_data try/except, update build_trajectories_tab try/except and early-return instrumentation, add debug checkbox wiring in UI.
4. Add tests that monkeypatch inspector and load_positions and assert diagnostics are created.
---
Written: thoughts/shared/plans/2026-03-30-diagnose-no-plot-trajectories.md

@ -1,254 +0,0 @@
# Fix missing trajectories Implementation Plan
I'm using the writing-plans skill to create the implementation plan.
Goal: Restore visible party trajectories in the Explorer "Partij Trajectories" tab by adding validation/inspection helpers, making centroid computation tolerant of missing windows (emit NaN gaps), and adding an automatic MP-level fallback (top-K) with a debug expander and hover raw-values preserved.
Design: thoughts/shared/designs/2026-03-30-fix-missing-trajectories-design.md
Architecture: Small, focused changes in explorer_helpers.py (pure helpers + unit tests) and explorer.py (UI wiring and plotting policy). Keep helper logic independent of Streamlit so tests run in CI without heavy deps. Provide a graceful MP fallback and compact diagnostics exposed behind a collapsed expander.
Tech Stack: Python 3.x, pytest, Streamlit (manual UI verification), Plotly (already used). Tests must run in CI with duckdb / streamlit optional — unit tests only use pure Python/numpy.
---
## Dependency Graph
```
Batch 1 (parallel): 1.1, 1.2 [foundation - no deps]
Batch 2 (parallel): 2.1, 2.2 [core - depends on batch 1]
Batch 3 (parallel): 3.1, 3.2 [integration - depends on batch 2]
```
---
## Decisions / gap-filling (explicit)
- EXPLORER_MP_FALLBACK_COUNT environment variable: integer, default 20. Used to choose top-K MPs when party centroids are absent.
- Top-K definition: by seat_count when available; when seat_count unavailable, fall back to party axis activity (mean magnitude) via load_party_axis_scores if needed. I will implement MP fallback using seat_count if present in mp_metadata; otherwise use party axis magnitude from load_party_axis_scores.
- Validation rules (inspect_positions_for_issues): detect empty positions_by_window, windows_count mismatch across MPs, sample of mismatched mp ids, parties_with_centroid_counts dictionary. Reason: these are the most likely causes of empty traces.
- compute_party_centroids behavior: returns per-party arrays aligned to windows (list of floats or np.nan), metadata per-party containing counts and missing indices. Guarantees empty lists (never None).
---
## Batch 1: Foundation (parallel - 2 implementers)
All tasks in this batch have NO dependencies and can run simultaneously.
### Task 1.1: Add inspector helper
**File:** `explorer_helpers.py`
**Test:** `tests/test_inspect_positions_for_issues.py`
**Depends:** none
Helpers to add (names only):
- inspect_positions_for_issues(positions_by_window: Dict[str, Dict[str, Tuple[float,float]]], party_map: Dict[str,str]) -> Dict[str, Any]
What it returns (documented in test expectations):
- windows_count: int
- window_labels: list[str] (sorted sample of window keys)
- mp_id_set: set[str] (set of entity ids seen across windows)
- party_map_count: int (len(party_map))
- parties_with_centroid_counts: Dict[str, int] (mapping party -> number of windows with a centroid)
- mismatched_mp_ids_sample: list[str] (sample of ids present in positions but not in party_map, up to 10)
Tests to add (exact assertions):
- tests/test_inspect_positions_for_issues.py (unit):
- Construct synthetic positions_by_window with 3 windows, with some MPs missing in some windows and some mp ids that aren't in party_map. Assert returned windows_count == 3, party_map_count equals len(party_map), parties_with_centroid_counts entries for expected parties, and mismatched_mp_ids_sample contains the expected missing keys.
Verify:
- Run: `pytest tests/test_inspect_positions_for_issues.py -q`
- Expected: PASS
Commit message: `feat(explorer): add inspect_positions_for_issues helper + test`
### Task 1.2: Add compute_party_centroids (per-window aligned arrays)
**File:** `explorer_helpers.py` (same file; add new function)
**Test:** `tests/test_compute_party_centroids.py`
**Depends:** none
Helper to add (name only):
- compute_party_centroids(positions_by_window: Dict[str, Dict[str, Tuple[float,float]]], party_map: Dict[str,str], windows: List[str]) -> Tuple[Dict[str, List[float]], Dict[str, Any]]
Behavior contract (for implementer):
- Return party_centroids: dict[party -> list[float|np.nan]] aligned to the provided windows order. For a party and window where no MPs present, insert np.nan at that index.
- Return metadata: {"per_party_counts": {party: int}, "total_windows": int, "parties": sorted_list}
- Guarantees: never return None; party lists can be empty list but must have length == len(windows) for parties present in `parties` list.
Tests to add (exact assertions):
- tests/test_compute_party_centroids.py (unit):
- Case A: full coverage — every party has coords in every window -> assert no np.nan and lengths equal windows count.
- Case B: partial coverage -> assert np.nan present at expected indices and metadata.per_party_counts match counts.
- Case C: no parties (empty positions_by_window) -> party_centroids == {} and metadata.total_windows == len(windows)
Verify:
- Run: `pytest tests/test_compute_party_centroids.py -q`
- Expected: PASS
Commit message: `feat(explorer): add compute_party_centroids to produce aligned per-party arrays`
---
## Batch 2: Core Modules (parallel - 2 implementers)
All tasks depend on Batch 1.
### Task 2.1: Modify explorer.py to use helpers and add MP fallback
**File:** `explorer.py` (modify function build_trajectories_tab only)
**Test:** `tests/test_build_trajectories_tab_fallback.py`
**Depends:** 1.1, 1.2
Changes to make (high-level, exact function to modify):
- modify build_trajectories_tab(db_path: str, window_size: str) to:
- early: call inspect_positions_for_issues(positions_by_window, party_map) and render the compact DEBUG expander content (same keys as the inspector returns). Keep the expander collapsed by default.
- replace existing per-window centroid construction with compute_party_centroids(...) which returns aligned arrays containing np.nan placeholders.
- relax party-selection filtering: treat a party as plottable if it has >= 1 non-nan centroid (previous code required full coverage). This ensures partial traces still render with gaps.
- preserve hover customdata to include raw centroid values (already present in code) — ensure when centroids contain np.nan for raw values we still populate customdata with (np.nan, np.nan).
- If no party centroids (empty dict or all-party centroid vectors are entirely nan), trigger MP fallback: plot top-K MPs (EXPLORER_MP_FALLBACK_COUNT, default 20) as per design. This fallback must show a small banner message in Dutch: "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." and provide a toggle (st.checkbox) to expand to show the full top-K list.
Notes / gap-filling decisions (explicit):
- EXPLORER_MP_FALLBACK_COUNT: implement read via int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20"))
- For selecting top-K MPs: use seat_count if present in mp_metadata (query `mp_metadata` for a seat_count-like field). If unavailable, choose MPs with most non-empty positions across windows. Implementer decision: compute activity = number of windows with a valid (non-None) position and sort descending.
Tests to add (integration, shims-friendly):
- tests/test_build_trajectories_tab_fallback.py
- Scenario 1 (party centroids present): Provide a fake positions_by_window and party_map fixture with at least one party having centroids in multiple windows and assert that when build_trajectories_tab is invoked (call the internal plotting branch with a test harness) it adds at least one trace (fig.data length > 0) and trace names match selected parties.
- Scenario 2 (no party centroids): Provide positions_by_window where party_map is empty or all MPs map to Unknown; assert the MP fallback path is chosen (method returns or builds fig with MPs) and that the banner message string appears in returned metadata or printed UI stub. Since Streamlit is not easily invoked in unit tests, structure the UI branch so the plotting logic returns fig when called from tests — write the test to import a small internal helper (e.g., build_trajectories_figure_for_test) if necessary. If refactor needed, keep it minimal: extract plotting assembly to a private helper _assemble_trajectories_figure(...) that returns (fig, trace_count, banner_text) so tests can assert fig traces without needing Streamlit.
Verify (unit/integration):
- Run: `pytest tests/test_build_trajectories_tab_fallback.py -q`
- Expected: PASS
Commit message: `feat(explorer): use inspector & compute_party_centroids; add MP top-K fallback and debug expander`
### Task 2.2: Add/adjust unit tests for hover/raw values and NaN handling
**File:** `tests/test_explorer_helpers.py` (update) and `tests/test_explorer_chart.py` (add test)
**Depends:** 1.2
Changes/tests to add (exact tests):
- tests/test_explorer_helpers.py: add a test verifying compute_party_centroids produces np.nan for missing windows and that hover customdata creation uses (float, float) or (np.nan, np.nan) consistently.
- tests/test_explorer_chart.py: add a small unit test that constructs a go.Figure via the new plotting helper (see 2.1) and asserts:
- traces exist when parties have partial coverage
- customdata arrays length equals x/y arrays length
- hovertemplate contains both smoothed and raw placeholder markers (strings like 'x (raw)')
Verify:
- Run: `pytest tests/test_explorer_helpers.py::test_compute_party_centroids_nan_handling -q`
- Run: `pytest tests/test_explorer_chart.py::test_partial_party_traces -q`
- Expected: PASS
Commit message: `test(explorer): add tests for NaN gaps and hover customdata preservation`
---
## Batch 3: Integration & Manual UI checks (parallel - 2 implementers)
Depends on Batch 2
### Task 3.1: Integration test (shim-friendly) for three scenarios
**File:** `tests/integration/test_trajectories_ui_integration.py`
**Test:** the file above
**Depends:** 2.1, 2.2
Tests to add (exact scenarios):
- Scenario A (full party centroids): positions_by_window with full coverage — assert plot built uses party traces; simulate user selection to include at least one party; assert fig.data length >= 1.
- Scenario B (party centroids missing): party_map empty — assert MP fallback chosen and number of plotted MP traces == EXPLORER_MP_FALLBACK_COUNT or the available MPs if fewer.
- Scenario C (partial centroids): party centroids partial across windows — assert traces exist and customdata shows np.nan at missing indices.
Test harness notes: tests should import small pure helpers from explorer.py that assemble figures without calling st.plotly_chart or other Streamlit side-effects. If necessary, add a small refactor in explorer.py: `_assemble_trajectory_figure_for_tests(positions_by_window, party_centroids, selected_parties, windows, smooth_alpha, ...) -> go.Figure, metadata` and call that from build_trajectories_tab. Tests then call this helper. Keep the helper private and minimal.
Verify:
- Run: `pytest tests/integration/test_trajectories_ui_integration.py -q`
- Expected: PASS
Commit message: `test(integration): trajectories UI integration scenarios (full/partial/missing)`
### Task 3.2: Manual Streamlit verification steps (documented)
**File:** none (manual steps below); include in PR description.
**Depends:** 2.1
Manual verification (Streamlit):
1. Start Streamlit: `streamlit run explorer.py --server.headless true` (or run locally with a test DB path)
2. Open the app in browser (usually http://localhost:8501). Go to tab "Partij Trajectories".
3. Scenario: normal DB with party centroids
- Select a recent window_size (e.g., quarterly or annual as appropriate)
- Ensure default parties (CDA, D66, VVD) appear and trajectories are visible.
- Hover on a trace point: verify hover shows both smoothed and raw centroid values (x (smoothed), x (raw)).
- Open the DEBUG expander (collapsed by default) and confirm it shows `windows (count)`, `windows sample`, `party_map entries`, `parties with centroids`, `sample centroid window counts per party`.
4. Scenario: simulate missing party centroids (set party_map to {} or use a DB snapshot with missing mp_metadata)
- The app should show the fallback banner: "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback." and render MP trajectories (top-K). There should be a checkbox to expand the top-K list.
5. Scenario: partial centroids
- For a party missing centroids in some windows, its trace should appear but with gaps (line discontinuity where NaNs present). Hover customdata at gap points should show raw value `nan` or a placeholder.
Streamlit-specific acceptance criteria:
- traces drawn when at least one party has >=1 centroid
- MP fallback automatically displayed (banner + plotted MP traces) when no party centroids
- DEBUG expander shows diagnostics described above
- Hover shows raw centroid values even when smoothing is applied
---
## Files to create / modify (one-file-per-task mapping)
Batch 1
- Modify: `explorer_helpers.py` — add functions:
- inspect_positions_for_issues
- compute_party_centroids
- Add test: `tests/test_inspect_positions_for_issues.py`
- Add test: `tests/test_compute_party_centroids.py`
Batch 2
- Modify: `explorer.py` — function build_trajectories_tab; optional small private helper `_assemble_trajectory_figure_for_tests` (single-file change)
- Add test: `tests/test_build_trajectories_tab_fallback.py`
- Update/add tests: `tests/test_explorer_helpers.py` (augment), `tests/test_explorer_chart.py`
Batch 3
- Add test: `tests/integration/test_trajectories_ui_integration.py`
---
## Verification commands (unit & CI)
- Unit test single file: `pytest tests/test_inspect_positions_for_issues.py -q`
- Unit test compute party centroids: `pytest tests/test_compute_party_centroids.py -q`
- Trajectories fallback unit tests: `pytest tests/test_build_trajectories_tab_fallback.py -q`
- Integration tests (shim-friendly): `pytest tests/integration/test_trajectories_ui_integration.py -q`
- Run full test suite: `pytest -q`
Manual Streamlit checks: follow steps in Task 3.2 above. Recommended quick dev workflow:
- Start streamlit: `streamlit run explorer.py --server.headless true`
- Use the URL printed in console (usually http://localhost:8501) and perform the manual steps.
---
## Blocked / Unblocked checklist
- [ ] Blocker: Access to a representative DB fixture (small DuckDB or JSON fixture) that contains windows, svd_vectors and mp_metadata. Without it, integration/manual checks are limited. (Mitigation: tests use synthetic positions_by_window and party_map fixtures — unblocked for unit tests.)
- [ ] Blocker: If MP seat_count is required from DB and not present in test fixtures, fallback selection will use activity-based ranking. (Mitigation: implement activity fallback.)
- [x] Unblocked: Adding pure helpers in explorer_helpers.py (unit tests cover behavior without Streamlit/duckdb)
- [x] Unblocked: Modifying build_trajectories_tab to call helpers and add banner + expander (code-local change)
- [ ] Optional: Agree on EXPLORER_MP_FALLBACK_COUNT envvar default (I set default 20). If you want a different default, tell me now.
If any of the above blockers remain, proceed with unit tests and open a PR discussion for integration DB fixtures.
---
## Estimated timeline (hours)
- Task 1.1 (inspect_positions_for_issues + unit test): 1.5 h
- Task 1.2 (compute_party_centroids + unit tests): 3.0 h
- Task 2.1 (explorer.py changes: wiring, MP fallback, debug expander): 4.0 h
- Task 2.2 (tests for hover/NaN handling): 2.0 h
- Task 3.1 (integration tests / small refactor helper): 2.5 h
- Task 3.2 (manual Streamlit QA and documentation): 1.5 h
- PR polish, CI tweaks, and addressing review comments: 2.0 h
Total: 16.5 hours (approx)
---
## PR checklist / deliverables
- [ ] Unit tests for inspector and centroids pass
- [ ] build_trajectories_tab updated with debug expander and fallback
- [ ] Integration tests for three scenarios pass (or documented reason for partial coverage)
- [ ] Manual Streamlit QA steps documented in PR and verified locally
- [ ] Add mention of EXPLORER_MP_FALLBACK_COUNT to README or environment docs (optional follow-up)
---
If you'd like, I can now (A) produce the concrete test contents and minimal helper implementations as separate micro-tasks (one file + one test per task) ready for implementers, or (B) proceed to create and apply the code changes in this repo. Which do you prefer?

@ -1,288 +0,0 @@
# Trajectory Plots Not Showing — Debugging Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Identify why trajectory plots are invisible or empty in the Streamlit Explorer UI, then fix the root cause.
**Architecture:** Systematic step-by-step pipeline trace from UI → DB. Each stage has explicit "what should I see" checkpoints so we can pinpoint exactly where data becomes invisible.
**Tech Stack:** Streamlit, Plotly, DuckDB, Python ≥3.13, uv
---
## Debugging Pipeline (Stage-by-Stage Checkpoints)
```
┌─────────────────────────────────────────────────────────────────────────┐
│ STAGE 0: UI Layer — what does the user actually see? │
│ explorer.py → build_trajectories_tab() │
│ → Is the tab visible? Empty chart? Error message? No chart at all? │
└─────────────────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────────────┐
│ STAGE 1: positions_by_window — are MP positions loaded? │
│ load_positions(db_path, "annual") │
│ → Expected: 12 windows, ~150-200 MPs per window │
│ → Check: _last_trajectories_diagnostics["stage"] │
└─────────────────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────────────┐
│ STAGE 2: party_map — are MP→party mappings loaded? │
│ load_party_map(db_path) │
│ → Expected: ~1036 entries │
│ → Check: party_map is non-empty dict │
└─────────────────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────────────┐
│ STAGE 3: party centroids — are party means computed? │
│ compute_party_centroids() / compute_party_coords() │
│ → Expected: CDA, D66, VVD, PVV, SP, GroenLinks-PvdA centroids exist │
│ → Check: plottable_parties > 0 │
└─────────────────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────────────┐
│ STAGE 4: select_trajectory_plot_data — does it return traces? │
│ → Expected: fig with 3-6 colored scatter traces, trace_count > 0 │
│ → Check: banner_text is None (no fallback), trace_count ≥ 3 │
└─────────────────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────────────┐
│ STAGE 5: Plotly render — is the figure rendered in the browser? │
│ st.plotly_chart(fig, use_container_width=True) │
│ → Expected: visible chart with colored party lines │
│ → Check: browser DOM, no JS errors │
└─────────────────────────────────────────────────────────────────────────┘
```
---
## Task 1: Instrument the app to print real-time pipeline state
**Files:**
- Modify: `explorer.py` (add print statements at each stage)
- Test: Run `uv run streamlit run explorer.py` with `EXPLORER_DEBUG_TRAJECTORIES=1`
- [ ] **Step 1: Add stage-0 checkpoint at top of `build_trajectories_tab`**
Read `explorer.py` lines 1601-1650. Add a print statement at the start of `build_trajectories_tab`:
```python
print(f"[TRAJ DEBUG] build_trajectories_tab called — db_path={db_path}, window_size={window_size}")
```
- [ ] **Step 2: Add stage-1 checkpoint after `load_positions`**
Read `explorer.py` lines 1605-1610. After the call to `load_positions`, add:
```python
positions_by_window, axis_def = load_positions(db_path, window_size)
print(f"[TRAJ DEBUG] load_positions → {len(positions_by_window)} windows, "
f"total MPs={sum(len(v) for v in positions_by_window.values())}")
```
- [ ] **Step 3: Add stage-2 checkpoint after `load_party_map`**
Read `explorer.py` lines 1638-1642. After the call to `load_party_map`, add:
```python
party_map = load_party_map(db_path)
print(f"[TRAJ DEBUG] load_party_map → {len(party_map)} entries, "
f"sample={list(party_map.items())[:3]}")
```
- [ ] **Step 4: Add stage-3 checkpoint after centroid computation**
Read `explorer.py` lines 1641-1670. After the inline centroid loop, add:
```python
all_parties = sorted(set(party_map.get(mp) for MPs in positions_by_window.values() for mp in MPs) - {None, "Unknown"})
print(f"[TRAJ DEBUG] all_parties (raw from party_map) → {len(all_parties)} parties: {all_parties[:10]}")
```
- [ ] **Step 5: Add stage-4 checkpoint before `st.plotly_chart`**
Read `explorer.py` around line 2105. Before the `st.plotly_chart` call, add:
```python
print(f"[TRAJ DEBUG] About to render plotly chart — trace_count={trace_count}, "
f"banner={banner_text}, fig has {len(fig.data)} traces")
```
- [ ] **Step 6: Run the app and capture all debug output**
```bash
EXPLORER_DEBUG_TRAJECTORIES=1 uv run streamlit run explorer.py 2>&1 | grep TRAJ
```
Expected output (all stages should print):
```
[TRAJ DEBUG] build_trajectories_tab called — db_path=..., window_size=annual
[TRAJ DEBUG] load_positions → 12 windows, total MPs=...
[TRAJ DEBUG] load_party_map → 1036 entries, sample=[(...), (...), (...) ]
[TRAJ DEBUG] all_parties (raw from party_map) → N parties: [...]
[TRAJ DEBUG] About to render plotly chart — trace_count=N, banner=None, fig has N traces
```
**If any stage is missing or shows 0/empty, that's the bug location. Document which stage fails and proceed to the corresponding fix task.**
- [ ] **Step 7: Commit**
```bash
git add explorer.py
git commit -m "chore: add TRAJ DEBUG print checkpoints to build_trajectories_tab"
```
---
## Task 2: Fix each failure mode
Based on Task 1 output, one of these will be the culprit:
### Failure Mode A: `positions_by_window` is empty (Stage 1)
**Symptom:** `load_positions → 0 windows`
**Root causes to check:**
- `get_uniform_dim_windows` returns `[]` (no dim-50 windows in DB)
- `compute_2d_axes` silently fails on all windows
- DB path is wrong or `data/motions.db` is missing
**Fix:**
- [ ] Run: `uv run python -c "from explorer import get_uniform_dim_windows; print(get_uniform_dim_windows('data/motions.db'))"`
- [ ] If empty: query DB directly — `uv run duckdb data/motions.db "SELECT COUNT(*) FROM svd_vectors WHERE entity_type='mp'"` and check dimension distribution
- [ ] If `compute_2d_axes` fails: add try/except with print at `explorer.py:584`
- [ ] If DB path wrong: fix `run_app()` to resolve relative path
### Failure Mode B: `party_map` is empty (Stage 2)
**Symptom:** `load_party_map → 0 entries`
**Root causes:**
- `mp_metadata` and `mp_votes` tables are empty or missing
- DuckDB connection fails
- DB path points to wrong file
**Fix:**
- [ ] Run: `uv run python -c "from analysis.visualize import _load_party_map; print(len(_load_party_map('data/motions.db')))"`
- [ ] If 0: query `SELECT COUNT(*) FROM mp_metadata`, `SELECT COUNT(*) FROM mp_votes`
- [ ] If tables missing: run data pipeline to populate them
- [ ] If DuckDB fails to import: check `pip install duckdb` in the uv environment
### Failure Mode C: `all_parties` is empty (Stage 3)
**Symptom:** `all_parties (raw from party_map) → 0 parties`
**Root causes:**
- All MP names in `positions_by_window` have no match in `party_map` (name mismatch)
- Every MP maps to `"Unknown"` or `None`
**Fix:**
- [ ] Run: `uv run python -c "from explorer import load_positions, load_party_map; pw = load_positions('data/motions.db', 'annual')[0]; pm = load_party_map('data/motions.db'); sample_mps = list(pw[list(pw.keys())[0]].keys())[:5]; print({mp: pm.get(mp, 'NO MATCH') for mp in sample_mps})"`
- [ ] If name mismatches: investigate `_strip_paren` fallback logic in `compute_party_coords` (explorer_helpers.py:165-170)
- [ ] If too many mismatches: add name normalization (strip titles, standardize suffixes)
- [ ] Commit fix with test
### Failure Mode D: `trace_count == 0` (Stage 4)
**Symptom:** `About to render plotly chart — trace_count=0` or `banner != None`
**Root causes:**
- All party centroids are NaN (every MP position is NaN)
- `compute_party_coords` filters out all parties (NaN/Inf in all positions)
- `select_trajectory_plot_data` falls back to MP trajectories but MP fallback also fails
**Fix:**
- [ ] Add debug print inside `compute_party_coords`: `print(f"[TRAJ DEBUG] compute_party_coords window={window_id} → {len(party_coords)} parties: {list(party_coords.keys())[:5]}")`
- [ ] Check if NaN comes from `compute_2d_axes` output (PCA on svd_vectors)
- [ ] Run: `uv run python -c "from explorer import load_positions; pw = load_positions('data/motions.db', 'annual')[0]; win = list(pw.values())[0]; sample = list(win.items())[:3]; print({k: v for k, v in sample})"` — if all values are `(nan, nan)`, the PCA step is producing NaN
- [ ] If PCA produces NaN: check `analysis/political_axis.py:compute_2d_axes` for the specific window's SVD vectors
### Failure Mode E: Chart not visible in browser (Stage 5)
**Symptom:** All stages pass but chart is blank in browser
**Root causes:**
- Plotly `fig` is empty (no traces added to figure)
- Streamlit `st.plotly_chart` suppressed by CSS/JS error
- Container width is 0 (layout issue)
**Fix:**
- [ ] Add debug print: `print(f"[TRAJ DEBUG] st.plotly_chart called with fig.data={[(t.mode, len(t.x), len(t.y)) for t in fig.data]}")`
- [ ] Check browser console for JavaScript errors (Plotly.js errors)
- [ ] Check if `use_container_width=True` causes issues — try `use_container_width=False`
- [ ] Add `st.write(fig)` as alternative to `st.plotly_chart` for debugging
### Failure Mode F: All stages pass, chart still shows blank
**Symptom:** `trace_count > 0` but chart looks empty to user
**Root causes:**
- All traces are transparent/white-on-white
- X/Y axes have huge range and all data is in a tiny corner
- Party lines overlap completely (all parties at same position)
**Fix:**
- [ ] Print axis ranges: `print(f"[TRAJ DEBUG] xaxis range={[fig.layout.xaxis.range] if fig.layout.xaxis.range else 'auto'}, yaxis range={[fig.layout.yaxis.range] if fig.layout.yaxis.range else 'auto'}")`
- [ ] Check if centroids are all at `(0, 0)` — run: `uv run python -c "from explorer import load_positions, load_party_map; from explorer_helpers import compute_party_coords; ..."`
- [ ] Check if PARTY_COLOURS assignment is broken (all traces same color)
- [ ] Verify window ordering is correct (chronological left-to-right)
---
## Task 3: Write regression test
**Files:**
- Create: `tests/test_trajectories_pipeline_integration.py`
- [ ] **Step 1: Write integration test**
```python
"""Integration test: full trajectory pipeline produces non-empty plot."""
from explorer import load_positions, load_party_map
from explorer_helpers import compute_party_centroids
from explorer import select_trajectory_plot_data
def test_trajectory_pipeline_produces_traces():
db_path = "data/motions.db"
window_size = "annual"
positions_by_window, _ = load_positions(db_path, window_size)
party_map = load_party_map(db_path)
windows = list(positions_by_window.keys())
centroids, mp_positions = compute_party_centroids(positions_by_window, party_map, windows)
fig, trace_count, banner = select_trajectory_plot_data(
positions_by_window, party_map, windows,
selected_parties=list(centroids.keys())[:6],
smooth_alpha=0.35,
)
assert trace_count > 0, f"Expected traces but got trace_count={trace_count}, banner={banner}"
assert banner is None, f"Expected no fallback banner but got: {banner}"
assert len(fig.data) == trace_count
```
- [ ] **Step 2: Run the test**
```bash
uv run pytest tests/test_trajectories_pipeline_integration.py -v
```
Expected: PASS
- [ ] **Step 3: Commit**
```bash
git add tests/test_trajectories_pipeline_integration.py
git commit -m "test: add trajectory pipeline integration test"
```
---
## Execution Order
1. **Task 1 first** — Run the instrumented app and capture which stage fails
2. **Task 2** — Fix the specific failure mode based on Task 1 output
3. **Task 3** — Write regression test once the fix is confirmed
**Estimated time:** 15-30 minutes for Task 1 (identifying the stage), 10-30 minutes for Task 2 fix (depends on which mode), 5 minutes for Task 3.

@ -713,6 +713,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9f/48/56eb8e6ea51bdd572c4f92551c74e8f1217b14c1ab2e96661a4b70e3ebdd/narwhals-2.1.0-py3-none-any.whl", hash = "sha256:dfeb3b24c1b06501d1c1e979bd25e424f8bbd2eb7a9d0f7bbf0a7d47e36b498f", size = 389391, upload-time = "2025-08-11T12:57:56.332Z" },
]
[[package]]
name = "nodeenv"
version = "1.10.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload-time = "2025-12-20T14:08:54.006Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" },
]
[[package]]
name = "numba"
version = "0.64.0"
@ -1061,6 +1070,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
]
[[package]]
name = "pyright"
version = "1.1.408"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nodeenv" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/74/b2/5db700e52554b8f025faa9c3c624c59f1f6c8841ba81ab97641b54322f16/pyright-1.1.408.tar.gz", hash = "sha256:f28f2321f96852fa50b5829ea492f6adb0e6954568d1caa3f3af3a5f555eb684", size = 4400578, upload-time = "2026-01-08T08:07:38.795Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/0c/82/a2c93e32800940d9573fb28c346772a14778b84ba7524e691b324620ab89/pyright-1.1.408-py3-none-any.whl", hash = "sha256:090b32865f4fdb1e0e6cd82bf5618480d48eecd2eb2e70f960982a3d9a4c17c1", size = 6399144, upload-time = "2026-01-08T08:07:37.082Z" },
]
[[package]]
name = "pytest"
version = "9.0.2"
@ -1360,6 +1382,7 @@ dependencies = [
{ name = "matplotlib" },
{ name = "openai" },
{ name = "plotly" },
{ name = "pyright" },
{ name = "requests" },
{ name = "schedule" },
{ name = "scikit-learn" },
@ -1382,6 +1405,7 @@ requires-dist = [
{ name = "matplotlib", specifier = ">=3.8" },
{ name = "openai", specifier = ">=1.99.7" },
{ name = "plotly", specifier = ">=5.0" },
{ name = "pyright", specifier = ">=1.1.408" },
{ name = "requests", specifier = ">=2.32.4" },
{ name = "schedule", specifier = ">=1.2.2" },
{ name = "scikit-learn", specifier = ">=1.8.0" },

Loading…
Cancel
Save