#!/usr/bin/env python3 """Score ALL motions with 2D extremity (stijl + materieel) using subagents. Usage: # Sanity check: score 200 random motions, print summary uv run python analysis/right_wing/extremity_score_all.py --sample 200 # Full run: output all batches as JSON for subagent dispatch uv run python analysis/right_wing/extremity_score_all.py --all --output /tmp/all_batches.json """ from __future__ import annotations import argparse import json import logging import sys from pathlib import Path import duckdb from analysis.right_wing.extremity_rescore_2d import ( load_skill, format_batches, validate_single_result, store_scores, ) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) DB_PATH = str(Path(__file__).parent.parent.parent / "data" / "motions.db") def sample_all_motions(db_path: str, n: int | None = None, seed: int = 42) -> list[dict]: """Sample motions from the full motions table (not just right_wing). Skips motions already in extremity_scores_2d. Args: db_path: Path to DuckDB database. n: Number of motions to sample (None = all). seed: Random seed. Returns: List of dicts with keys: motion_id, title, text, layman. """ con = duckdb.connect(db_path) try: con.execute(f"SELECT setseed({seed / 1_000_000.0})") already = con.execute( "SELECT motion_id FROM extremity_scores_2d" ).fetchall() already_ids = {r[0] for r in already} rows = con.execute(""" SELECT id, title, body_text, layman_explanation FROM motions WHERE body_text IS NOT NULL AND length(trim(body_text)) > 0 ORDER BY RANDOM() """).fetchall() motions = [] for row in rows: mid = row[0] if mid in already_ids: continue motions.append({ "motion_id": mid, "title": (row[1] or "").strip(), "text": (row[2] or "").strip(), "layman": (row[3] or "").strip(), }) if n and len(motions) >= n: break total = len(rows) new = len(motions) logger.info( "Found %d motions total, %d already scored, %d new (%d skipped)", total, len(already_ids), new, total - len(already_ids) - new, ) return motions finally: con.close() def prepare_batches( db_path: str, n: int | None = None, batch_size: int = 20, ) -> tuple[list[dict], list[list[str]]]: """Sample motions and format into prompt batches. Returns (motions, batches). """ skill = load_skill() prompt = skill["prompt_template"] motions = sample_all_motions(db_path, n=n) batches = format_batches(motions, prompt, batch_size=batch_size) logger.info( "%d motions → %d batches (batch_size=%d)", len(motions), len(batches), batch_size, ) return motions, batches def main() -> int: parser = argparse.ArgumentParser( description="Score ALL motions with 2D extremity scoring" ) parser.add_argument("--sample", type=int, metavar="N", help="Number of motions to sample for sanity check") parser.add_argument("--all", action="store_true", help="Prepare all unscored motions for dispatch") parser.add_argument("--batch-size", type=int, default=20, help="Motions per subagent batch (default: 20)") parser.add_argument("--output", type=str, help="Write batch JSON to this file") parser.add_argument("--preview", type=int, default=3, help="Number of batch previews to print (default: 3)") args = parser.parse_args() if not args.sample and not args.all: parser.error("Must specify --sample N or --all") n = args.sample if args.sample else None motions, batches = prepare_batches(DB_PATH, n=n, batch_size=args.batch_size) if not batches: logger.info("No batches to dispatch.") return 0 # Print preview print(f"\n{'='*60}") print(f"Motions: {len(motions)} Batches: {len(batches)} Batch size: {args.batch_size}") print(f"{'='*60}") preview_n = min(args.preview, len(batches)) for i in range(preview_n): print(f"\n--- Batch {i+1}/{len(batches)} ---") for j, prompt_text in enumerate(batches[i]): first_line = prompt_text.split("\n")[0] if prompt_text else "(empty)" print(f" {j+1}. {first_line[:120]}...") if len(batches) > preview_n: print(f"\n... and {len(batches) - preview_n} more batches") # Build output structure output = { "total_motions": len(motions), "total_batches": len(batches), "batch_size": args.batch_size, "batches": [ { "batch_id": i, "motion_ids": [m["motion_id"] for m in motions[i * args.batch_size:(i + 1) * args.batch_size]], "motion_count": len(batches[i]), "prompts": batches[i], } for i in range(len(batches)) ], } if args.output: Path(args.output).write_text(json.dumps(output, ensure_ascii=False, indent=2)) logger.info("Wrote %d batches to %s", len(batches), args.output) else: # Save to default location outpath = Path("/tmp/extremity_all_batches.json") outpath.write_text(json.dumps(output, ensure_ascii=False, indent=2)) logger.info("Wrote %d batches to %s", len(batches), outpath) return 0 if __name__ == "__main__": raise SystemExit(main())