You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/tests/test_extract_mp_votes.py

77 lines
2.5 KiB

import json
import duckdb
import logging
from pipeline.extract_mp_votes import extract_mp_votes
from database import MotionDatabase
def test_extract_mp_votes(tmp_path):
db_file = tmp_path / "test.db"
# Initialize database
mdb = MotionDatabase(db_path=str(db_file))
# Load fixture
fixture_path = "tests/fixtures/sample_voting_results.json"
with open(fixture_path, "r") as fh:
fixtures = json.load(fh)
# Insert motions into motions table
conn = duckdb.connect(str(db_file))
try:
for item in fixtures:
motion_id = item.get("motion_id")
date = item.get("date")
voting_results = item.get("voting_results")
conn.execute(
"""
INSERT INTO motions (id, title, description, date, policy_area, voting_results, winning_margin, url)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(
motion_id,
f"Test Motion {motion_id}",
"",
date,
"Test",
json.dumps(voting_results),
0.5,
f"http://example/{motion_id}",
),
)
finally:
conn.close()
# Run extraction
res = extract_mp_votes(db_path=str(db_file))
# Expected rows: ALL actors (both individual MPs and party-level), across all motions
expected_total = sum(len(item.get("voting_results", {})) for item in fixtures)
assert res["mp_rows_inserted"] == expected_total
assert res["motions_skipped"] == 0
# Verify row count matches and both comma-name (individual) and no-comma (party) actors present
conn = duckdb.connect(str(db_file))
try:
rows = conn.execute("SELECT mp_name, party FROM mp_votes").fetchall()
finally:
conn.close()
assert len(rows) == expected_total
# Individual MPs (comma in name) should have party = None (metadata not yet fetched)
# Party-level actors (no comma) should have party = mp_name
for mp_name, party in rows:
if "," not in mp_name:
# Party-level actor: party column should equal the actor name
assert party == mp_name, (
f"Party actor '{mp_name}' should have party=mp_name, got {party!r}"
)
# Running again should be idempotent: no new mp rows, motions_skipped > 0
res2 = extract_mp_votes(db_path=str(db_file))
assert res2["mp_rows_inserted"] == 0
assert res2["motions_skipped"] > 0