import json import logging from typing import Optional import duckdb from database import MotionDatabase _logger = logging.getLogger(__name__) def extract_mp_votes(db_path: Optional[str] = None, limit: Optional[int] = None): """Extract individual MP votes from motions.voting_results and store them in the mp_votes table. Returns a dict with summary counts: - motions_scanned: number of motions inspected - mp_rows_inserted: number of mp_votes rows inserted - motions_skipped: number of motions skipped because mp_votes already existed """ db = MotionDatabase(db_path=db_path) if db_path else MotionDatabase() conn = duckdb.connect(db.db_path) try: # support optional limit to only scan a subset of motions if limit is not None: rows = conn.execute( "SELECT id, voting_results, date FROM motions LIMIT ?", (limit,) ).fetchall() else: rows = conn.execute( "SELECT id, voting_results, date FROM motions" ).fetchall() finally: conn.close() mp_rows_inserted = 0 motions_skipped = 0 motions_scanned = 0 for motion_id, voting_results_json, date in rows: motions_scanned += 1 try: if db.mp_votes_exists_for_motion(motion_id): _logger.debug( "Skipping motion %s because mp_votes already exist", motion_id ) motions_skipped += 1 continue # voting_results may be stored as JSON text or as native JSON; ensure it's a dict if isinstance(voting_results_json, str): voting_results = json.loads(voting_results_json) else: voting_results = voting_results_json for actor, vote in (voting_results or {}).items(): # Individual MP names contain a comma (e.g. "Last, F.") if "," not in actor: continue inserted_id = db.insert_mp_vote( motion_id=motion_id, mp_name=actor, vote=vote, date=date, party=None ) if inserted_id and inserted_id > 0: mp_rows_inserted += 1 except Exception as e: _logger.error("Error processing motion %s: %s", motion_id, e) return { "motions_scanned": motions_scanned, "mp_rows_inserted": mp_rows_inserted, "motions_skipped": motions_skipped, }