- Project-local skill .opencode/skills/score-extremity/ for subagent dispatch - Orchestrator extremity_rescore_2d.py with load_skill/sample/format/validate/store - 16 TDD tests covering all orchestrator functions - 117 motions scored by deepseek v4 flash subagents (12 parallel batches) - Pearson r=0.45 between stylistic and material dimensions — separable - Key finding: 36.8% of motions use restrained language for consequential policies - 2d_extremity_correlation_report.md documents distribution, divergence patterns, and implications for the Overton acceptance-without-conversion narrativemain
parent
10fc002ef9
commit
bf37f84a8b
@ -0,0 +1,362 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
"""Two-dimensional extremity rescoring orchestrator. |
||||||
|
|
||||||
|
Scores Dutch parliamentary motions on two independent dimensions: |
||||||
|
1. stijl_extremiteit (stylistic extremity, 1-5) |
||||||
|
2. materiele_impact (material impact, 1-5) |
||||||
|
|
||||||
|
Usage: |
||||||
|
uv run python analysis/right_wing/extremity_rescore_2d.py --db data/motions.db |
||||||
|
uv run python analysis/right_wing/extremity_rescore_2d.py --db data/motions.db --dry-run |
||||||
|
""" |
||||||
|
|
||||||
|
from __future__ import annotations |
||||||
|
|
||||||
|
import argparse |
||||||
|
import json |
||||||
|
import logging |
||||||
|
import re |
||||||
|
from pathlib import Path |
||||||
|
from typing import Any |
||||||
|
|
||||||
|
import duckdb |
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") |
||||||
|
logger = logging.getLogger(__name__) |
||||||
|
|
||||||
|
|
||||||
|
# ── prompt / schema loading ────────────────────────────────────────────────── |
||||||
|
|
||||||
|
SKILL_MD_PATH = Path(__file__).parent.parent.parent / ".opencode" / "skills" / "score-extremity" / "SKILL.md" |
||||||
|
|
||||||
|
|
||||||
|
def load_skill(skill_path: str | None = None) -> dict[str, Any]: |
||||||
|
"""Read SKILL.md and extract prompt template and output schemas. |
||||||
|
|
||||||
|
Returns: |
||||||
|
dict with keys "prompt_template", "single_schema", "batch_schema". |
||||||
|
""" |
||||||
|
path = Path(skill_path) if skill_path else SKILL_MD_PATH |
||||||
|
if not path.exists(): |
||||||
|
raise FileNotFoundError(f"Skill file not found: {path}") |
||||||
|
|
||||||
|
content = path.read_text(encoding="utf-8") |
||||||
|
|
||||||
|
# Extract prompt template from ```text ... ``` block |
||||||
|
prompt_match = re.search(r"```text\n(.*?)```", content, re.DOTALL) |
||||||
|
prompt_template = prompt_match.group(1).strip() if prompt_match else "" |
||||||
|
|
||||||
|
# Extract JSON schema blocks (first = single, second = batch) |
||||||
|
json_blocks = re.findall(r"```json\n(.*?)```", content, re.DOTALL) |
||||||
|
|
||||||
|
single_schema: dict[str, Any] = {} |
||||||
|
batch_schema: dict[str, Any] = {} |
||||||
|
if len(json_blocks) >= 1: |
||||||
|
try: |
||||||
|
single_schema = json.loads(json_blocks[0].strip()) |
||||||
|
except json.JSONDecodeError: |
||||||
|
logger.warning("Failed to parse single schema JSON block") |
||||||
|
if len(json_blocks) >= 2: |
||||||
|
try: |
||||||
|
batch_schema = json.loads(json_blocks[1].strip()) |
||||||
|
except json.JSONDecodeError: |
||||||
|
logger.warning("Failed to parse batch schema JSON block") |
||||||
|
|
||||||
|
return { |
||||||
|
"prompt_template": prompt_template, |
||||||
|
"single_schema": single_schema, |
||||||
|
"batch_schema": batch_schema, |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
# ── sampling ───────────────────────────────────────────────────────────────── |
||||||
|
|
||||||
|
def sample_motions( |
||||||
|
db_path: str, |
||||||
|
n_per_bucket: int = 25, |
||||||
|
seed: int = 42, |
||||||
|
) -> list[dict[str, Any]]: |
||||||
|
"""Stratified sample from right_wing_motions JOIN extremity_scores. |
||||||
|
|
||||||
|
Samples n_per_bucket motions from each text_score bucket (1-5). |
||||||
|
|
||||||
|
Returns: |
||||||
|
List of dicts with keys: motion_id, title, text, layman, text_score. |
||||||
|
""" |
||||||
|
con = duckdb.connect(db_path) |
||||||
|
try: |
||||||
|
# Ensure tables exist |
||||||
|
tables = {t[0] for t in con.execute("SHOW TABLES").fetchall()} |
||||||
|
required = {"right_wing_motions", "motions", "extremity_scores"} |
||||||
|
missing = required - tables |
||||||
|
if missing: |
||||||
|
logger.warning("Missing tables: %s, returning empty sample", missing) |
||||||
|
return [] |
||||||
|
|
||||||
|
# Apply seed for reproducibility |
||||||
|
con.execute(f"SELECT setseed({seed / 1000000.0})") |
||||||
|
|
||||||
|
rows = con.execute( |
||||||
|
""" |
||||||
|
SELECT m.id, m.title, m.body_text, m.layman_explanation, e.text_score |
||||||
|
FROM right_wing_motions r |
||||||
|
JOIN motions m ON r.motion_id = m.id |
||||||
|
JOIN extremity_scores e ON r.motion_id = e.motion_id |
||||||
|
WHERE r.classified = TRUE |
||||||
|
AND e.text_score IS NOT NULL |
||||||
|
AND e.error IS NULL |
||||||
|
ORDER BY RANDOM() |
||||||
|
""" |
||||||
|
).fetchall() |
||||||
|
|
||||||
|
if not rows: |
||||||
|
return [] |
||||||
|
|
||||||
|
# Bucket by text_score |
||||||
|
buckets: dict[int, list[dict[str, Any]]] = {} |
||||||
|
for row in rows: |
||||||
|
mid, title, body_text, layman, text_score = row |
||||||
|
score_bucket = int(text_score) |
||||||
|
buckets.setdefault(score_bucket, []).append({ |
||||||
|
"motion_id": mid, |
||||||
|
"title": title or "", |
||||||
|
"text": body_text or "", |
||||||
|
"layman": layman or "", |
||||||
|
"text_score": score_bucket, |
||||||
|
}) |
||||||
|
|
||||||
|
# Sample n_per_bucket from each bucket |
||||||
|
result: list[dict[str, Any]] = [] |
||||||
|
for bucket_id in sorted(buckets.keys()): |
||||||
|
bucket = buckets[bucket_id] |
||||||
|
result.extend(bucket[:n_per_bucket]) |
||||||
|
|
||||||
|
logger.info( |
||||||
|
"Sampled %d motions from %d buckets (n_per_bucket=%d)", |
||||||
|
len(result), len(buckets), n_per_bucket, |
||||||
|
) |
||||||
|
return result |
||||||
|
|
||||||
|
finally: |
||||||
|
con.close() |
||||||
|
|
||||||
|
|
||||||
|
# ── batch formatting ───────────────────────────────────────────────────────── |
||||||
|
|
||||||
|
def format_batches( |
||||||
|
motions: list[dict[str, Any]], |
||||||
|
prompt_template: str, |
||||||
|
batch_size: int = 10, |
||||||
|
) -> list[list[str]]: |
||||||
|
"""Split motions into batches and fill prompt template for each motion. |
||||||
|
|
||||||
|
Args: |
||||||
|
motions: List of dicts with keys title, text, layman. |
||||||
|
prompt_template: Template string with {title}, {text}, {layman} placeholders. |
||||||
|
batch_size: Number of motions per batch. |
||||||
|
|
||||||
|
Returns: |
||||||
|
List of batches; each batch is a list of filled prompt strings, one per motion. |
||||||
|
""" |
||||||
|
batches: list[list[str]] = [] |
||||||
|
for i in range(0, len(motions), batch_size): |
||||||
|
batch_motions = motions[i : i + batch_size] |
||||||
|
batch_prompts: list[str] = [] |
||||||
|
for m in batch_motions: |
||||||
|
prompt = prompt_template.format( |
||||||
|
title=m.get("title", ""), |
||||||
|
text=m.get("text", ""), |
||||||
|
layman=m.get("layman", ""), |
||||||
|
) |
||||||
|
batch_prompts.append(prompt) |
||||||
|
batches.append(batch_prompts) |
||||||
|
return batches |
||||||
|
|
||||||
|
|
||||||
|
# ── validation ─────────────────────────────────────────────────────────────── |
||||||
|
|
||||||
|
EXPECTED_FIELDS = [ |
||||||
|
"stijl_extremiteit", |
||||||
|
"stijl_toelichting", |
||||||
|
"materiele_impact", |
||||||
|
"materiele_toelichting", |
||||||
|
] |
||||||
|
|
||||||
|
|
||||||
|
def validate_single_result(result: dict[str, Any]) -> tuple[bool, str | None]: |
||||||
|
"""Validate a single motion 2d scoring result. |
||||||
|
|
||||||
|
Returns: |
||||||
|
(True, None) if valid, (False, error_message) otherwise. |
||||||
|
""" |
||||||
|
# Check all required fields exist |
||||||
|
for field in EXPECTED_FIELDS: |
||||||
|
if field not in result: |
||||||
|
return False, f"missing field: {field}" |
||||||
|
|
||||||
|
# Validate stijl_extremiteit (int, 1-5) |
||||||
|
se = result["stijl_extremiteit"] |
||||||
|
if not isinstance(se, int) or se < 1 or se > 5: |
||||||
|
return False, f"stijl_extremiteit out of range 1-5: {se}" |
||||||
|
|
||||||
|
# Validate materiele_impact (int, 1-5) |
||||||
|
mi = result["materiele_impact"] |
||||||
|
if not isinstance(mi, int) or mi < 1 or mi > 5: |
||||||
|
return False, f"materiele_impact out of range 1-5: {mi}" |
||||||
|
|
||||||
|
return True, None |
||||||
|
|
||||||
|
|
||||||
|
# ── storage ────────────────────────────────────────────────────────────────── |
||||||
|
|
||||||
|
def store_scores(db_path: str, results: list[dict[str, Any]]) -> int: |
||||||
|
"""Store validated 2d scores in the extremity_scores_2d table. |
||||||
|
|
||||||
|
Creates the table if it doesn't exist. |
||||||
|
|
||||||
|
Args: |
||||||
|
db_path: Path to DuckDB database. |
||||||
|
results: List of dicts with keys: motion_id, stijl_extremiteit, |
||||||
|
stijl_toelichting, materiele_impact, materiele_toelichting. |
||||||
|
|
||||||
|
Returns: |
||||||
|
Number of rows inserted. |
||||||
|
""" |
||||||
|
con = duckdb.connect(db_path) |
||||||
|
try: |
||||||
|
con.execute( |
||||||
|
""" |
||||||
|
CREATE TABLE IF NOT EXISTS extremity_scores_2d ( |
||||||
|
motion_id INTEGER PRIMARY KEY, |
||||||
|
stylistic_score INTEGER NOT NULL, |
||||||
|
material_score INTEGER NOT NULL, |
||||||
|
stylistic_rationale TEXT, |
||||||
|
material_rationale TEXT |
||||||
|
) |
||||||
|
""" |
||||||
|
) |
||||||
|
|
||||||
|
count = 0 |
||||||
|
for r in results: |
||||||
|
con.execute( |
||||||
|
""" |
||||||
|
INSERT OR REPLACE INTO extremity_scores_2d |
||||||
|
(motion_id, stylistic_score, material_score, stylistic_rationale, material_rationale) |
||||||
|
VALUES (?, ?, ?, ?, ?) |
||||||
|
""", |
||||||
|
( |
||||||
|
r["motion_id"], |
||||||
|
r["stijl_extremiteit"], |
||||||
|
r["materiele_impact"], |
||||||
|
r.get("stijl_toelichting"), |
||||||
|
r.get("materiele_toelichting"), |
||||||
|
), |
||||||
|
) |
||||||
|
count += 1 |
||||||
|
|
||||||
|
con.commit() |
||||||
|
logger.info("Stored %d scores in extremity_scores_2d", count) |
||||||
|
return count |
||||||
|
|
||||||
|
finally: |
||||||
|
con.close() |
||||||
|
|
||||||
|
|
||||||
|
# ── orchestrator ───────────────────────────────────────────────────────────── |
||||||
|
|
||||||
|
def rescore_2d( |
||||||
|
db_path: str, |
||||||
|
n_per_bucket: int = 25, |
||||||
|
batch_size: int = 10, |
||||||
|
dry_run: bool = False, |
||||||
|
) -> dict[str, Any]: |
||||||
|
"""Two-dimensional extremity rescoring orchestrator. |
||||||
|
|
||||||
|
Samples motions from right_wing_motions/extremity_scores, formats batches, |
||||||
|
and (in non-dry-run mode) dispatches subagents for scoring. |
||||||
|
|
||||||
|
Args: |
||||||
|
db_path: Path to DuckDB database. |
||||||
|
n_per_bucket: Number of motions to sample per text_score bucket. |
||||||
|
batch_size: Motions per subagent batch. |
||||||
|
dry_run: If True, only print the plan without spawning subagents. |
||||||
|
|
||||||
|
Returns: |
||||||
|
Dict with summary stats. |
||||||
|
""" |
||||||
|
skill = load_skill() |
||||||
|
prompt_template = skill["prompt_template"] |
||||||
|
|
||||||
|
motions = sample_motions(db_path, n_per_bucket=n_per_bucket) |
||||||
|
|
||||||
|
if not motions: |
||||||
|
logger.warning("No motions to rescore.") |
||||||
|
return {"motions_count": 0, "batch_count": 0, "dry_run": dry_run} |
||||||
|
|
||||||
|
batches = format_batches(motions, prompt_template, batch_size=batch_size) |
||||||
|
|
||||||
|
logger.info("Plan: %d motions in %d batches (batch_size=%d)", len(motions), len(batches), batch_size) |
||||||
|
|
||||||
|
if dry_run: |
||||||
|
logger.info("DRY RUN — no subagents will be spawned.") |
||||||
|
return { |
||||||
|
"motions_count": len(motions), |
||||||
|
"batch_count": len(batches), |
||||||
|
"dry_run": True, |
||||||
|
} |
||||||
|
|
||||||
|
# ── subagent dispatch (placeholder) ────────────────────────────────── |
||||||
|
# In production, each batch would be sent to a subagent via the `task` tool. |
||||||
|
# The subagent receives: |
||||||
|
# - The prompt_template filled with motion data |
||||||
|
# - Instruction to return JSON matching the batch_schema |
||||||
|
# |
||||||
|
# Example dispatch (not executed in script): |
||||||
|
# for batch_idx, batch_prompts in enumerate(batches): |
||||||
|
# combined_prompt = "\n\n---\n\n".join(batch_prompts) |
||||||
|
# result = task( |
||||||
|
# description=f"Score batch {batch_idx + 1}/{len(batches)}", |
||||||
|
# prompt=combined_prompt, |
||||||
|
# subagent_type="general", |
||||||
|
# ) |
||||||
|
# validated_results = [r for r in json.loads(result)["motions"] if validate_single_result(r)[0]] |
||||||
|
# store_scores(db_path, validated_results) |
||||||
|
|
||||||
|
logger.info( |
||||||
|
"Subagent dispatch placeholder: %d batches ready for scoring. " |
||||||
|
"Run via an agent context (e.g. opencode task) to execute.", |
||||||
|
len(batches), |
||||||
|
) |
||||||
|
|
||||||
|
return { |
||||||
|
"motions_count": len(motions), |
||||||
|
"batch_count": len(batches), |
||||||
|
"dry_run": False, |
||||||
|
"subagents_spawned": 0, |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
# ── CLI ────────────────────────────────────────────────────────────────────── |
||||||
|
|
||||||
|
def main() -> int: |
||||||
|
parser = argparse.ArgumentParser( |
||||||
|
description="Two-dimensional extremity rescoring orchestrator" |
||||||
|
) |
||||||
|
parser.add_argument("--db", default="data/motions.db", help="Path to DuckDB database") |
||||||
|
parser.add_argument("--n-per-bucket", type=int, default=25, help="Motions per text_score bucket") |
||||||
|
parser.add_argument("--batch-size", type=int, default=10, help="Motions per subagent batch") |
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Print plan without spawning subagents") |
||||||
|
args = parser.parse_args() |
||||||
|
|
||||||
|
result = rescore_2d( |
||||||
|
db_path=args.db, |
||||||
|
n_per_bucket=args.n_per_bucket, |
||||||
|
batch_size=args.batch_size, |
||||||
|
dry_run=args.dry_run, |
||||||
|
) |
||||||
|
print(json.dumps(result, indent=2)) |
||||||
|
return 0 |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
raise SystemExit(main()) |
||||||
@ -0,0 +1,112 @@ |
|||||||
|
# Two-Dimensional Extremity Correlation Report |
||||||
|
|
||||||
|
**Date:** 2026-05-24 |
||||||
|
**Motions scored:** 117 (stratified sample: ~25 per original extremity bucket) |
||||||
|
**Scoring model:** Deepseek v4 flash (subagents via project skill) |
||||||
|
|
||||||
|
## Purpose |
||||||
|
|
||||||
|
The original extremity score is a single 1–5 rating of policy radicalism. This conflates two potentially independent dimensions: |
||||||
|
- **Stylistic extremity (stijl-extremiteit):** How inflammatory, hostile, or polarizing the language is |
||||||
|
- **Material impact (materiële impact):** How much the proposed policy would substantively affect people's rights, institutions, or freedoms |
||||||
|
|
||||||
|
This validation samples motions across the full extremity range and scores both dimensions independently to test whether they correlate strongly enough for a single score, or whether they should be tracked separately. |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
## Results |
||||||
|
|
||||||
|
### Overall correlation |
||||||
|
|
||||||
|
| Metric | Value | |
||||||
|
|--------|-------| |
||||||
|
| N | 117 | |
||||||
|
| Pearson r | **0.453** (moderate) | |
||||||
|
| Mean stylistic | 2.01 | |
||||||
|
| Mean material | 2.86 | |
||||||
|
| Mean absolute difference | 1.11 | |
||||||
|
| S ≤ 2 AND M ≥ 3 (masking) | 43 (36.8%) | |
||||||
|
|
||||||
|
**r = 0.453 is moderate — the dimensions are partly correlated but clearly separable.** Stylistic extremism explains only ~20% of the variance in material impact (R² = 0.205). A motion can be inflammatory without being consequential, and vice versa. |
||||||
|
|
||||||
|
### Joint distribution |
||||||
|
|
||||||
|
| | M=1 | M=2 | M=3 | M=4 | M=5 | |
||||||
|
|---|---|---|---|---|---| |
||||||
|
| **S=1** | 11 | 17 | 10 | 5 | 1 | |
||||||
|
| **S=2** | 4 | 9 | 15 | 8 | 4 | |
||||||
|
| **S=3** | 2 | 4 | 9 | 4 | 5 | |
||||||
|
| **S=4** | 0 | 1 | 0 | 3 | 2 | |
||||||
|
| **S=5** | 0 | 0 | 0 | 1 | 2 | |
||||||
|
|
||||||
|
### By original extremity bucket |
||||||
|
|
||||||
|
| Bucket | N | Mean style | Mean material | Gap | |
||||||
|
|--------|---|-----------|--------------|-----| |
||||||
|
| 1–2 (mild) | 50 | 1.56 | 2.24 | +0.68 | |
||||||
|
| 2–3 (moderate) | 25 | 2.00 | 2.88 | +0.88 | |
||||||
|
| 3–4 (high) | 25 | 2.56 | 3.56 | +1.00 | |
||||||
|
| 4–5 (extreme) | 17 | 2.53 | 3.65 | +1.12 | |
||||||
|
|
||||||
|
Material impact consistently rates higher than stylistic extremity across all buckets. The gap widens at higher original extremity levels — suggesting the original LLM scoring was more sensitive to language style, while subagents systematically identify greater material consequences in the same motions. |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
## Key findings |
||||||
|
|
||||||
|
### 1. "Low style, high impact" is the dominant divergence pattern |
||||||
|
|
||||||
|
**36.8% of motions (43 of 117)** use restrained language (S ≤ 2) for policies with substantial material impact (M ≥ 3). These are the motions most poorly captured by a single-dimensional score: |
||||||
|
|
||||||
|
- **Motion 16227** (S=1, M=5): "Verzoekt de regering kennis te geven van het voornemen tot uittreding uit de Europese Unie conform artikel 50 VWEU." Neutral, procedural language invoking an EU treaty article — but the policy is fundamental dissolution of the entire Dutch-EU legal framework. |
||||||
|
|
||||||
|
- **Motion 7713** (S=1, M=4): "Verzoekt de regering per direct te stoppen met arbeidsmigratie." Restrained, single-sentence motion with no inflammatory language — but it would suspend free movement of persons, a fundamental EU treaty right. |
||||||
|
|
||||||
|
- **Motion 16704** (S=1, M=3): Formal Raad van State advice and technical amendment text. No political rhetoric — but a concrete law change with measurable employment and investment effects. |
||||||
|
|
||||||
|
- **Motion 687** (S=1, M=3): Technical-juridical language about the scope of "emissiegegevens" in the EU environmental information directive — but would significantly restrict public transparency about agricultural emissions. |
||||||
|
|
||||||
|
### 2. Material impact averages significantly higher |
||||||
|
|
||||||
|
Across all buckets, material impact scores are 0.68–1.12 points higher than stylistic scores. This suggests: |
||||||
|
- Parliamentarians write motions using formal, restrained language even when proposing consequential policies |
||||||
|
- The original LLM scoring (which showed mean extremity = 2.19 overall) likely understates how radical these policies are in material terms |
||||||
|
- Dutch parliamentary language norms mask policy radicalism |
||||||
|
|
||||||
|
### 3. "High style" motions are rare and concentrated |
||||||
|
|
||||||
|
Only 3 motions scored S=5 (the most inflammatory end), and all had M=4 or M=5. Explicitly discriminatory or hostile language — when it occurs — is paired with substantively extreme policies. But the vast majority of consequential right-wing motions use parliamentary language: |
||||||
|
|
||||||
|
- **Motion 11956** (S=4, M=5): Explicitly hostile language ("à la Turkije," "vreemdelingen die we hier niet willen hebben") paired with fundamental rights violation (forced deportation without country-of-origin consent) |
||||||
|
- **Motion 18064** (S=5, M=4): Explicit ethnic targeting ("niet-westerse allochtonen" as COVID rulebreakers) — discriminatory state action |
||||||
|
|
||||||
|
### 4. The original LLM audit gap is partially explained |
||||||
|
|
||||||
|
The manual audit found 75% agreement with the original LLM scores and noted "systematic overrating of anti-institutional language." The two-dimensional data clarifies this: the original LLM was more sensitive to *stylistic* extremity (inflammatory language) than to *material* policy impact. The 25% disagreement likely occurred on "low style, high impact" motions where the single-dimensional score was anchored to language rather than substance. |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
## Implications for Overton analysis |
||||||
|
|
||||||
|
### For the current findings |
||||||
|
|
||||||
|
The "no content extremity increase" (d = −0.09) finding in the Overton report relied on single-dimensional LLM scores. The two-dimensional data suggests this may be an **artifact of the language-focused scoring**: if right-wing motions became more consequential while maintaining or softening their language, the single score would miss the shift entirely. |
||||||
|
|
||||||
|
The "acceptance without conversion" interpretation — centrists vote more with right-wing despite spatial divergence — is **strengthened** by these findings. It is consistent with right-wing motions becoming *substantively* consequential (high material impact) while maintaining procedural language norms, making them harder for centrists to vote against without appearing obstructionist. |
||||||
|
|
||||||
|
### Recommendations |
||||||
|
|
||||||
|
1. **Re-score all 2,986 motions with two-dimensional scoring.** The moderate r = 0.453 confirms the dimensions are separable. A single score obscures the most important category: motions with low stylistic extremism but high material impact. |
||||||
|
|
||||||
|
2. **Re-run the extremity-stratified centrist support analysis with material impact buckets.** The critical question: did centrist support for *high material impact* motions increase after 2024? If low-language, high-impact motions are the ones gaining centrist tolerance, that is stronger Overton evidence than the current analysis captures. |
||||||
|
|
||||||
|
3. **For mechanism analysis (U4):** Score mechanisms specifically for *material impact* rather than general extremity. The question is not "how extreme is this motion?" but "what specific rights, institutions, or groups does this motion affect, and how much?" |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
## Data |
||||||
|
|
||||||
|
- **Full results:** `data/motions.db` → `extremity_scores_2d` (117 rows) |
||||||
|
- **Raw JSON:** `/tmp/extremity_2d_results.json` |
||||||
|
- **Scoring skill:** `.opencode/skills/score-extremity/SKILL.md` |
||||||
|
- **Orchestrator:** `analysis/right_wing/extremity_rescore_2d.py` |
||||||
@ -0,0 +1,360 @@ |
|||||||
|
"""Tests for two-dimensional extremity rescoring orchestrator.""" |
||||||
|
|
||||||
|
import json |
||||||
|
|
||||||
|
import duckdb |
||||||
|
import pytest |
||||||
|
|
||||||
|
pytest.importorskip("duckdb") |
||||||
|
|
||||||
|
|
||||||
|
# ── fixtures ──────────────────────────────────────────────────────────────── |
||||||
|
|
||||||
|
@pytest.fixture |
||||||
|
def synthetic_motions(): |
||||||
|
"""Return 103 synthetic motion dicts for testing batch formatting.""" |
||||||
|
motions = [] |
||||||
|
for i in range(103): |
||||||
|
motions.append({ |
||||||
|
"motion_id": i + 1, |
||||||
|
"title": f"Motion {i + 1}", |
||||||
|
"text": f"Body text for motion {i + 1}", |
||||||
|
"layman": f"Layman explanation {i + 1}", |
||||||
|
}) |
||||||
|
return motions |
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture |
||||||
|
def prompt_template(): |
||||||
|
"""Minimal prompt template with {title}, {text}, {layman} placeholders.""" |
||||||
|
return ( |
||||||
|
"Titel: {title}\n" |
||||||
|
"Tekst: {text}\n" |
||||||
|
"Uitleg: {layman}\n" |
||||||
|
) |
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture |
||||||
|
def valid_single_result(): |
||||||
|
"""A valid single-motion 2d result dict.""" |
||||||
|
return { |
||||||
|
"stijl_extremiteit": 3, |
||||||
|
"stijl_toelichting": "Neutraal taalgebruik", |
||||||
|
"materiele_impact": 4, |
||||||
|
"materiele_toelichting": "Beperkt rechten voor specifieke groep", |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
# ── load_skill tests ──────────────────────────────────────────────────────── |
||||||
|
|
||||||
|
class TestLoadSkill: |
||||||
|
def test_returns_prompt_and_schema(self): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import load_skill |
||||||
|
|
||||||
|
result = load_skill() |
||||||
|
assert isinstance(result, dict) |
||||||
|
assert "prompt_template" in result |
||||||
|
assert "batch_schema" in result |
||||||
|
assert "single_schema" in result |
||||||
|
assert isinstance(result["prompt_template"], str) |
||||||
|
assert len(result["prompt_template"]) > 0 |
||||||
|
assert "STIJL-EXTREMITEIT" in result["prompt_template"] |
||||||
|
assert "MATERIELE IMPACT" in result["prompt_template"] |
||||||
|
assert isinstance(result["batch_schema"], dict) |
||||||
|
assert "motions" in result["batch_schema"] |
||||||
|
assert isinstance(result["single_schema"], dict) |
||||||
|
|
||||||
|
def test_missing_file_raises(self): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import load_skill |
||||||
|
|
||||||
|
with pytest.raises(FileNotFoundError, match="not found"): |
||||||
|
load_skill(skill_path="/nonexistent/path/skill.md") |
||||||
|
|
||||||
|
|
||||||
|
# ── format_batches tests ──────────────────────────────────────────────────── |
||||||
|
|
||||||
|
class TestFormatBatches: |
||||||
|
def test_splits_into_batches(self, synthetic_motions, prompt_template): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import format_batches |
||||||
|
|
||||||
|
batches = format_batches(synthetic_motions[:100], prompt_template, batch_size=10) |
||||||
|
assert isinstance(batches, list) |
||||||
|
assert len(batches) == 10 |
||||||
|
for batch in batches: |
||||||
|
assert isinstance(batch, list) |
||||||
|
assert len(batch) == 10 |
||||||
|
for prompt_str in batch: |
||||||
|
assert "Motion" in prompt_str |
||||||
|
|
||||||
|
def test_uneven_batches(self, synthetic_motions, prompt_template): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import format_batches |
||||||
|
|
||||||
|
batches = format_batches(synthetic_motions, prompt_template, batch_size=10) |
||||||
|
assert len(batches) == 11 |
||||||
|
for batch in batches[:-1]: |
||||||
|
assert len(batch) == 10 |
||||||
|
assert len(batches[-1]) == 3 |
||||||
|
|
||||||
|
def test_substitutes_placeholders(self, prompt_template): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import format_batches |
||||||
|
|
||||||
|
motions = [{ |
||||||
|
"motion_id": 1, |
||||||
|
"title": "Test Title", |
||||||
|
"text": "Test Text", |
||||||
|
"layman": "Test Layman", |
||||||
|
}] |
||||||
|
batches = format_batches(motions, prompt_template, batch_size=1) |
||||||
|
prompt_str = batches[0][0] |
||||||
|
assert "Test Title" in prompt_str |
||||||
|
assert "Test Text" in prompt_str |
||||||
|
assert "Test Layman" in prompt_str |
||||||
|
|
||||||
|
|
||||||
|
# ── validate_single_result tests ──────────────────────────────────────────── |
||||||
|
|
||||||
|
class TestValidateSingleResult: |
||||||
|
def test_valid_result(self, valid_single_result): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import validate_single_result |
||||||
|
|
||||||
|
ok, err = validate_single_result(valid_single_result) |
||||||
|
assert ok is True |
||||||
|
assert err is None |
||||||
|
|
||||||
|
def test_missing_field(self, valid_single_result): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import validate_single_result |
||||||
|
|
||||||
|
invalid = dict(valid_single_result) |
||||||
|
del invalid["materiele_impact"] |
||||||
|
ok, err = validate_single_result(invalid) |
||||||
|
assert ok is False |
||||||
|
assert "materiele_impact" in err |
||||||
|
|
||||||
|
def test_out_of_range_high(self, valid_single_result): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import validate_single_result |
||||||
|
|
||||||
|
invalid = dict(valid_single_result) |
||||||
|
invalid["stijl_extremiteit"] = 6 |
||||||
|
ok, err = validate_single_result(invalid) |
||||||
|
assert ok is False |
||||||
|
assert "stijl_extremiteit" in err |
||||||
|
|
||||||
|
def test_out_of_range_low(self, valid_single_result): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import validate_single_result |
||||||
|
|
||||||
|
invalid = dict(valid_single_result) |
||||||
|
invalid["materiele_impact"] = 0 |
||||||
|
ok, err = validate_single_result(invalid) |
||||||
|
assert ok is False |
||||||
|
assert "materiele_impact" in err |
||||||
|
|
||||||
|
def test_non_integer_score(self, valid_single_result): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import validate_single_result |
||||||
|
|
||||||
|
invalid = dict(valid_single_result) |
||||||
|
invalid["stijl_extremiteit"] = "3" |
||||||
|
ok, err = validate_single_result(invalid) |
||||||
|
assert ok is False |
||||||
|
assert "stijl_extremiteit" in err |
||||||
|
|
||||||
|
|
||||||
|
# ── store_scores tests ────────────────────────────────────────────────────── |
||||||
|
|
||||||
|
class TestStoreScores: |
||||||
|
def test_stores_and_returns_count(self, tmp_duckdb_path): |
||||||
|
import duckdb |
||||||
|
from analysis.right_wing.extremity_rescore_2d import store_scores |
||||||
|
|
||||||
|
results = [ |
||||||
|
{"motion_id": 1, "stijl_extremiteit": 3, "stijl_toelichting": "a", |
||||||
|
"materiele_impact": 4, "materiele_toelichting": "b"}, |
||||||
|
{"motion_id": 2, "stijl_extremiteit": 2, "stijl_toelichting": "c", |
||||||
|
"materiele_impact": 1, "materiele_toelichting": "d"}, |
||||||
|
] |
||||||
|
count = store_scores(tmp_duckdb_path, results) |
||||||
|
assert count == 2 |
||||||
|
|
||||||
|
con = duckdb.connect(tmp_duckdb_path) |
||||||
|
try: |
||||||
|
rows = con.execute( |
||||||
|
"SELECT motion_id, stylistic_score, material_score " |
||||||
|
"FROM extremity_scores_2d ORDER BY motion_id" |
||||||
|
).fetchall() |
||||||
|
assert len(rows) == 2 |
||||||
|
assert rows[0] == (1, 3, 4) |
||||||
|
assert rows[1] == (2, 2, 1) |
||||||
|
finally: |
||||||
|
con.close() |
||||||
|
|
||||||
|
def test_replace_existing(self, tmp_duckdb_path): |
||||||
|
import duckdb |
||||||
|
from analysis.right_wing.extremity_rescore_2d import store_scores |
||||||
|
|
||||||
|
results = [{ |
||||||
|
"motion_id": 1, "stijl_extremiteit": 1, "stijl_toelichting": "x", |
||||||
|
"materiele_impact": 1, "materiele_toelichting": "y", |
||||||
|
}] |
||||||
|
store_scores(tmp_duckdb_path, results) |
||||||
|
|
||||||
|
updated = [{ |
||||||
|
"motion_id": 1, "stijl_extremiteit": 5, "stijl_toelichting": "z", |
||||||
|
"materiele_impact": 5, "materiele_toelichting": "w", |
||||||
|
}] |
||||||
|
count = store_scores(tmp_duckdb_path, updated) |
||||||
|
assert count == 1 |
||||||
|
|
||||||
|
con = duckdb.connect(tmp_duckdb_path) |
||||||
|
try: |
||||||
|
rows = con.execute( |
||||||
|
"SELECT stylistic_score, material_score FROM extremity_scores_2d WHERE motion_id = 1" |
||||||
|
).fetchall() |
||||||
|
assert rows[0] == (5, 5) |
||||||
|
finally: |
||||||
|
con.close() |
||||||
|
|
||||||
|
|
||||||
|
# ── sample_motions tests ──────────────────────────────────────────────────── |
||||||
|
|
||||||
|
class TestSampleMotions: |
||||||
|
@pytest.fixture(autouse=True) |
||||||
|
def setup_db(self, tmp_duckdb_path): |
||||||
|
"""Set up right_wing_motions and extremity_scores tables with synthetic data.""" |
||||||
|
con = duckdb.connect(tmp_duckdb_path) |
||||||
|
try: |
||||||
|
con.execute(""" |
||||||
|
CREATE TABLE IF NOT EXISTS right_wing_motions ( |
||||||
|
motion_id INTEGER PRIMARY KEY, |
||||||
|
classified BOOLEAN DEFAULT TRUE |
||||||
|
) |
||||||
|
""") |
||||||
|
con.execute(""" |
||||||
|
CREATE TABLE IF NOT EXISTS motions ( |
||||||
|
id INTEGER PRIMARY KEY, |
||||||
|
title VARCHAR, |
||||||
|
body_text VARCHAR, |
||||||
|
layman_explanation VARCHAR |
||||||
|
) |
||||||
|
""") |
||||||
|
con.execute(""" |
||||||
|
CREATE TABLE IF NOT EXISTS extremity_scores ( |
||||||
|
motion_id INTEGER PRIMARY KEY, |
||||||
|
text_score INTEGER, |
||||||
|
text_explanation VARCHAR, |
||||||
|
layman_score INTEGER, |
||||||
|
layman_explanation VARCHAR, |
||||||
|
error VARCHAR |
||||||
|
) |
||||||
|
""") |
||||||
|
# Insert motions across 4 text_score buckets: 1, 2, 4, 5 |
||||||
|
records = [] |
||||||
|
for bucket, score in enumerate([1, 2, 4, 5], start=1): |
||||||
|
for i in range(15): |
||||||
|
mid = (bucket - 1) * 15 + i + 1 |
||||||
|
con.execute( |
||||||
|
"INSERT INTO motions VALUES (?, ?, ?, ?)", |
||||||
|
(mid, f"Title {mid}", f"Text {mid}", f"Layman {mid}"), |
||||||
|
) |
||||||
|
con.execute( |
||||||
|
"INSERT INTO right_wing_motions VALUES (?, TRUE)", |
||||||
|
(mid,), |
||||||
|
) |
||||||
|
con.execute( |
||||||
|
"INSERT OR REPLACE INTO extremity_scores VALUES (?, ?, '', ?, '', NULL)", |
||||||
|
(mid, score, score), |
||||||
|
) |
||||||
|
con.commit() |
||||||
|
finally: |
||||||
|
con.close() |
||||||
|
|
||||||
|
def test_returns_stratified_sample(self, tmp_duckdb_path): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import sample_motions |
||||||
|
|
||||||
|
result = sample_motions(tmp_duckdb_path, n_per_bucket=5, seed=42) |
||||||
|
assert isinstance(result, list) |
||||||
|
assert len(result) == 20 # 4 buckets * 5 each |
||||||
|
for row in result: |
||||||
|
assert "motion_id" in row |
||||||
|
assert "title" in row |
||||||
|
assert "text" in row |
||||||
|
assert "layman" in row |
||||||
|
assert "text_score" in row |
||||||
|
|
||||||
|
def test_respects_seed(self, tmp_duckdb_path): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import sample_motions |
||||||
|
|
||||||
|
result_a = sample_motions(tmp_duckdb_path, n_per_bucket=3, seed=99) |
||||||
|
result_b = sample_motions(tmp_duckdb_path, n_per_bucket=3, seed=99) |
||||||
|
ids_a = sorted(r["motion_id"] for r in result_a) |
||||||
|
ids_b = sorted(r["motion_id"] for r in result_b) |
||||||
|
assert ids_a == ids_b |
||||||
|
|
||||||
|
def test_n_per_bucket_limits(self, tmp_duckdb_path): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import sample_motions |
||||||
|
|
||||||
|
result = sample_motions(tmp_duckdb_path, n_per_bucket=2, seed=1) |
||||||
|
assert len(result) == 8 # 4 buckets * 2 |
||||||
|
|
||||||
|
|
||||||
|
# ── rescore_2d dry_run tests ──────────────────────────────────────────────── |
||||||
|
|
||||||
|
class TestRescore2dDryRun: |
||||||
|
@pytest.fixture(autouse=True) |
||||||
|
def setup_db(self, tmp_duckdb_path): |
||||||
|
"""Set up minimal tables for dry_run test.""" |
||||||
|
con = duckdb.connect(tmp_duckdb_path) |
||||||
|
try: |
||||||
|
con.execute(""" |
||||||
|
CREATE TABLE IF NOT EXISTS right_wing_motions ( |
||||||
|
motion_id INTEGER PRIMARY KEY, |
||||||
|
classified BOOLEAN DEFAULT TRUE |
||||||
|
) |
||||||
|
""") |
||||||
|
con.execute(""" |
||||||
|
CREATE TABLE IF NOT EXISTS motions ( |
||||||
|
id INTEGER PRIMARY KEY, |
||||||
|
title VARCHAR, |
||||||
|
body_text VARCHAR, |
||||||
|
layman_explanation VARCHAR |
||||||
|
) |
||||||
|
""") |
||||||
|
con.execute(""" |
||||||
|
CREATE TABLE IF NOT EXISTS extremity_scores ( |
||||||
|
motion_id INTEGER PRIMARY KEY, |
||||||
|
text_score INTEGER, |
||||||
|
text_explanation VARCHAR, |
||||||
|
layman_score INTEGER, |
||||||
|
layman_explanation VARCHAR, |
||||||
|
error VARCHAR |
||||||
|
) |
||||||
|
""") |
||||||
|
for mid in range(1, 21): |
||||||
|
con.execute( |
||||||
|
"INSERT INTO motions VALUES (?, ?, ?, ?)", |
||||||
|
(mid, f"Title {mid}", f"Text {mid}", f"Layman {mid}"), |
||||||
|
) |
||||||
|
con.execute( |
||||||
|
"INSERT INTO right_wing_motions VALUES (?, TRUE)", |
||||||
|
(mid,), |
||||||
|
) |
||||||
|
con.execute( |
||||||
|
"INSERT OR REPLACE INTO extremity_scores VALUES (?, ?, '', ?, '', NULL)", |
||||||
|
(mid, (mid % 5) + 1, (mid % 5) + 1), |
||||||
|
) |
||||||
|
con.commit() |
||||||
|
finally: |
||||||
|
con.close() |
||||||
|
|
||||||
|
def test_dry_run_no_subagents(self, tmp_duckdb_path, caplog): |
||||||
|
from analysis.right_wing.extremity_rescore_2d import rescore_2d |
||||||
|
|
||||||
|
import logging |
||||||
|
caplog.set_level(logging.INFO) |
||||||
|
|
||||||
|
result = rescore_2d(tmp_duckdb_path, n_per_bucket=3, dry_run=True) |
||||||
|
assert isinstance(result, dict) |
||||||
|
assert result.get("dry_run") is True |
||||||
|
assert "motions_count" in result |
||||||
|
assert "batch_count" in result |
||||||
|
|
||||||
|
combined = caplog.text.lower() |
||||||
|
assert "dry run" in combined |
||||||
Loading…
Reference in new issue