import json import duckdb import logging from pipeline.extract_mp_votes import extract_mp_votes from database import MotionDatabase def test_extract_mp_votes(tmp_path): db_file = tmp_path / "test.db" # Initialize database mdb = MotionDatabase(db_path=str(db_file)) # Load fixture fixture_path = "tests/fixtures/sample_voting_results.json" with open(fixture_path, "r") as fh: fixtures = json.load(fh) # Insert motions into motions table conn = duckdb.connect(str(db_file)) try: for item in fixtures: motion_id = item.get("motion_id") date = item.get("date") voting_results = item.get("voting_results") conn.execute( """ INSERT INTO motions (id, title, description, date, policy_area, voting_results, winning_margin, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, ( motion_id, f"Test Motion {motion_id}", "", date, "Test", json.dumps(voting_results), 0.5, f"http://example/{motion_id}", ), ) finally: conn.close() # Run extraction res = extract_mp_votes(db_path=str(db_file)) # Expected rows: ALL actors (both individual MPs and party-level), across all motions expected_total = sum(len(item.get("voting_results", {})) for item in fixtures) assert res["mp_rows_inserted"] == expected_total assert res["motions_skipped"] == 0 # Verify row count matches and both comma-name (individual) and no-comma (party) actors present conn = duckdb.connect(str(db_file)) try: rows = conn.execute("SELECT mp_name, party FROM mp_votes").fetchall() finally: conn.close() assert len(rows) == expected_total # Individual MPs (comma in name) should have party = None (metadata not yet fetched) # Party-level actors (no comma) should have party = mp_name for mp_name, party in rows: if "," not in mp_name: # Party-level actor: party column should equal the actor name assert party == mp_name, ( f"Party actor '{mp_name}' should have party=mp_name, got {party!r}" ) # Running again should be idempotent: no new mp rows, motions_skipped > 0 res2 = extract_mp_votes(db_path=str(db_file)) assert res2["mp_rows_inserted"] == 0 assert res2["motions_skipped"] > 0