import json import duckdb import logging from pipeline.extract_mp_votes import extract_mp_votes from database import MotionDatabase def test_extract_mp_votes(tmp_path): db_file = tmp_path / "test.db" # Initialize database mdb = MotionDatabase(db_path=str(db_file)) # Load fixture fixture_path = "tests/fixtures/sample_voting_results.json" with open(fixture_path, "r") as fh: fixtures = json.load(fh) # Insert motions into motions table conn = duckdb.connect(str(db_file)) try: for item in fixtures: motion_id = item.get("motion_id") date = item.get("date") voting_results = item.get("voting_results") conn.execute( """ INSERT INTO motions (id, title, description, date, policy_area, voting_results, winning_margin, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, ( motion_id, f"Test Motion {motion_id}", "", date, "Test", json.dumps(voting_results), 0.5, f"http://example/{motion_id}", ), ) finally: conn.close() # Run extraction res = extract_mp_votes(db_path=str(db_file)) # Expected MP rows: count keys that contain a comma in fixtures expected_mp_count = 0 for item in fixtures: for k in item.get("voting_results", {}).keys(): if "," in k: expected_mp_count += 1 assert res["mp_rows_inserted"] == expected_mp_count assert res["motions_skipped"] == 0 # Verify mp_votes table contains only rows with comma in mp_name and count matches conn = duckdb.connect(str(db_file)) try: rows = conn.execute("SELECT mp_name FROM mp_votes").fetchall() finally: conn.close() assert len(rows) == expected_mp_count for (mp_name,) in rows: assert "," in mp_name # Running again should be idempotent: no new mp rows, motions_skipped > 0 res2 = extract_mp_votes(db_path=str(db_file)) assert res2["mp_rows_inserted"] == 0 assert res2["motions_skipped"] > 0