diff --git a/.github/workflows/mindmodel-schedule.yml b/.github/workflows/mindmodel-schedule.yml index e9ea5f3..7f4fcbc 100644 --- a/.github/workflows/mindmodel-schedule.yml +++ b/.github/workflows/mindmodel-schedule.yml @@ -11,25 +11,27 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + version: "0.6.x" + - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.11' + python-version: "3.13" - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt || true + run: uv sync --locked - name: Run tests - run: | - python -m pytest -q + run: uv run pytest tests/ -q - name: Run mindmodel validator if manifest exists if: ${{ always() }} run: | if [ -f .mindmodel/manifest.yaml ]; then - python -m scripts.mindmodel.cli || true + uv run python -m scripts.mindmodel.cli || true else echo "No .mindmodel/manifest.yaml present — skipping validator" fi diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 0000000..d489801 --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,53 @@ +name: Pytest + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + version: "0.6.x" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install dependencies + run: uv sync --locked + + - name: Run tests + run: uv run pytest tests/ -q + + typecheck: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + version: "0.6.x" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install dependencies + run: uv sync --locked + + - name: Run pyright + continue-on-error: true + run: uv run pyright diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1d7a6e1..257a659 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,17 +1,18 @@ -# Minimal pre-commit config stub -# This file is intentionally minimal and does not enable hooks by installing them. repos: - repo: https://github.com/psf/black - rev: 23.9.1 + rev: 25.1.0 hooks: - id: black + language_version: python3.13 - - repo: https://github.com/charliermarsh/ruff + - repo: https://github.com/charliermarsh/ruff-pre-commit rev: v0.11.1 hooks: - id: ruff + args: [--fix] - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + rev: 6.0.1 hooks: - id: isort + args: [--profile, black] diff --git a/AGENTS.md b/AGENTS.md index 87b309d..fe0153d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,6 +4,10 @@ `docs/solutions/` — documented solutions to past problems (bugs, best practices, workflow patterns), organized by category with YAML frontmatter (`module`, `tags`, `problem_type`). Relevant when implementing or debugging in documented areas. +## Infrastructure Notes + +- Git is hosted on a **Gitea** server, not GitHub directly. The `gh` CLI is not available for this repo; use standard `git` commands instead. + ## Project Conventions - Right-wing parties (PVV, FVD, JA21, SGP) must appear on the RIGHT side of all axes in visualizations diff --git a/README.md b/README.md index 7aa988c..f5ece33 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,81 @@ -# stemwijzer +# Stemwijzer -A small project that uses QWEN embeddings for semantic features. The codebase includes an example Ansible package under packages/@ansible/example and helper scripts for deployment. +A Dutch parliamentary voting compass that lets you vote on real Tweede Kamer motions and see which parties match your positions. -Embeddings -- This project uses QWEN embeddings (model: `qwen/qwen3-embedding-4b`) via OpenRouter-compatible APIs. -- Preferred environment variable: `OPENROUTER_API_KEY` with a fallback to `OPENAI_API_KEY`. +![Stemwijzer Explorer](docs/assets/stemwijzer-screenshot.png) -Publishing and deploying the Ansible package +## What is Stemwijzer? -- Package location: `packages/@ansible/example` — this contains the Ansible playbooks and packaging used by CI. -- To publish the package (CI): create a git tag for the version and provide `NPM_TOKEN` as a secret to the CI runner so it can publish to npm. -- To deploy the package (CI): set the following repository secrets in your CI pipeline: - - `DEPLOY_HOST` (default: `motief.sgeboers.nl`) - - `DEPLOY_SSH_KEY` (private key for the `webapps` user) - - `DEPLOY_USER` (default: `webapps`) +Stemwijzer ingests motions and voting records from the Dutch House of Representatives (Tweede Kamer), stores them in DuckDB, generates AI-powered explanations with an LLM, and presents a Streamlit UI where users can vote on real motions and explore party positions through SVD visualizations, trajectory analysis, and embedding-based similarity search. -Defaults -- DEPLOY_HOST: `motief.sgeboers.nl` -- DEPLOY_USER: `webapps` +## Features -See docs/deployment/ansible-package-deploy.md for more detailed deploy instructions and defaults. +- **Voting Compass** — Vote on real parliamentary motions and see which parties align with your choices +- **Explorer** — Interactive SVD visualizations, party trajectories over time, motion browser, and semantic search +- **Analytics** — SVD decomposition of voting patterns, UMAP projections, clustering, and drift analysis +- **LLM Enrichment** — Automatic generation of layman-friendly motion explanations using QWEN via OpenRouter + +## Prerequisites + +- Python >= 3.13 +- [uv](https://docs.astral.sh/uv/) for dependency management +- (Optional) `OPENROUTER_API_KEY` for LLM enrichment + +## Quickstart + +```bash +# Clone and enter the repository +git clone /sgeboers/stemwijzer.git +cd stemwijzer + +# Install dependencies +uv sync + +# Run the Streamlit app +uv run streamlit run Home.py + +# Run the data pipeline (fetch motions, compute embeddings, etc.) +uv run python pipeline/run_pipeline.py + +# Run tests +uv run pytest tests/ -q +``` + +The app will be available at http://localhost:8501. + +## Project Structure + +``` +├── app.py # Streamlit UI entrypoint +├── database.py # DuckDB schema and queries +├── api_client.py # Tweede Kamer OData API client +├── explorer.py # Explorer page with SVD visualizations +├── pipeline/ # Data ingestion and analysis pipelines +├── analysis/ # SVD, clustering, trajectory modules +├── tests/ # pytest test suite +├── docs/ # Documentation, research, and plans +└── data/motions.db # DuckDB database (~18 GB) +``` + +## Documentation + +- **[ARCHITECTURE.md](ARCHITECTURE.md)** — Comprehensive architecture overview, tech stack, and contributor guidance +- **[CODE_STYLE.md](CODE_STYLE.md)** — Coding conventions, naming, typing, and testing standards +- **[docs/solutions/](docs/solutions/)** — Documented solutions to past bugs and best practices + +## Tech Stack + +- **Language:** Python 3.13+ +- **Data:** DuckDB via ibis-framework +- **UI:** Streamlit + Plotly +- **ML/Analysis:** scipy, scikit-learn, umap-learn +- **LLM:** QWEN via OpenRouter (OpenAI-compatible) +- **Package Manager:** uv + +## Deployment + +See [docs/deployment/ansible-package-deploy.md](docs/deployment/ansible-package-deploy.md) for server deployment instructions using the Ansible package. + +## License + +[Your license here] diff --git a/analysis/config.py b/analysis/config.py index 1e94d5e..55e7255 100644 --- a/analysis/config.py +++ b/analysis/config.py @@ -267,3 +267,65 @@ _PARTY_NORMALIZE: dict[str, str] = { "Lid Keijzer": "BBB", "Groep Markuszower": "PVV", } + +# --------------------------------------------------------------------------- +# Application configuration (migrated from root config.py) +# --------------------------------------------------------------------------- + +import os +from dataclasses import dataclass + + +@dataclass +class Config: + # Database settings + DATABASE_PATH = "data/motions.db" + + # API settings + TWEEDE_KAMER_ODATA_API = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0" + API_TIMEOUT = 30 + API_BATCH_SIZE = 250 + API_MAX_LIMIT = 250 + + # AI settings + OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") + OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" + QWEN_MODEL = "qwen/qwen-2.5-72b-instruct" + + # App settings + DEFAULT_MOTION_COUNT = 10 + DEFAULT_WINNING_MARGIN_MIN = 0 + DEFAULT_WINNING_MARGIN_MAX = 100 + SESSION_TIMEOUT_DAYS = 30 + + # Policy areas + POLICY_AREAS = [ + "Alle", + "Economie", + "Klimaat", + "Immigratie", + "Zorg", + "Onderwijs", + "Defensie", + "Sociale Zaken", + "Algemeen", + ] + + # Scraper defaults + BASE_URL = "https://www.tweedekamer.nl/zoeken/zoekresultaten" + SCRAPING_DELAY = int(os.getenv("SCRAPING_DELAY", "5")) + + +config = Config() + +__all__ = [ + "PARTY_COLOURS", + "SVD_THEMES", + "KNOWN_MAJOR_PARTIES", + "CURRENT_PARLIAMENT_PARTIES", + "_PARTY_NORMALIZE", + "CANONICAL_RIGHT", + "CANONICAL_LEFT", + "Config", + "config", +] diff --git a/config.py b/config.py index 1c443fe..d0fc20c 100644 --- a/config.py +++ b/config.py @@ -1,51 +1,2 @@ -# config.py (complete updated version) -import os -from dataclasses import dataclass -from typing import List - - -@dataclass -class Config: - # Database settings - DATABASE_PATH = "data/motions.db" - - # API settings (updated) - TWEEDE_KAMER_ODATA_API = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0" - API_TIMEOUT = 30 - API_BATCH_SIZE = 250 # Increased based on API capabilities - API_MAX_LIMIT = 250 - - # AI settings - OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") - OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" - QWEN_MODEL = "qwen/qwen-2.5-72b-instruct" - - # App settings - DEFAULT_MOTION_COUNT = 10 - DEFAULT_WINNING_MARGIN_MIN = ( - 0 # % - include all, filter by layman_explanation instead - ) - DEFAULT_WINNING_MARGIN_MAX = 100 # % - SESSION_TIMEOUT_DAYS = 30 - - # Policy areas - POLICY_AREAS = [ - "Alle", - "Economie", - "Klimaat", - "Immigratie", - "Zorg", - "Onderwijs", - "Defensie", - "Sociale Zaken", - "Algemeen", - ] - - # Scraper defaults (previously missing) - BASE_URL = ( - "https://www.tweedekamer.nl/zoeken/zoekresultaten" # base for scraping motions - ) - SCRAPING_DELAY = int(os.getenv("SCRAPING_DELAY", "5")) - - -config = Config() +# Backward-compatibility shim — root config now lives in analysis.config +from analysis.config import Config, config # noqa: F401 diff --git a/docker-compose.yml b/docker-compose.yml index 6478d0b..9070d7b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,15 +18,3 @@ services: timeout: 3s retries: 3 start_period: 15s - - scheduler: - image: ${DOCKER_REGISTRY}/sgeboers/stemwijzer:latest - command: python scheduler.py - volumes: - - ${DATA_DIR:-/home/webapps/motief/data}:/home/app/app/data - restart: unless-stopped - environment: - - PYTHONPATH=/home/app/app - - OPENROUTER_API_KEY - - OPENAI_API_KEY - - DB_PATH=/home/app/app/data/motions.db