You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/tests/test_extract_mp_votes.py

74 lines
2.2 KiB

import json
import duckdb
import logging
from pipeline.extract_mp_votes import extract_mp_votes
from database import MotionDatabase
def test_extract_mp_votes(tmp_path):
db_file = tmp_path / "test.db"
# Initialize database
mdb = MotionDatabase(db_path=str(db_file))
# Load fixture
fixture_path = "tests/fixtures/sample_voting_results.json"
with open(fixture_path, "r") as fh:
fixtures = json.load(fh)
# Insert motions into motions table
conn = duckdb.connect(str(db_file))
try:
for item in fixtures:
motion_id = item.get("motion_id")
date = item.get("date")
voting_results = item.get("voting_results")
conn.execute(
"""
INSERT INTO motions (id, title, description, date, policy_area, voting_results, winning_margin, url)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(
motion_id,
f"Test Motion {motion_id}",
"",
date,
"Test",
json.dumps(voting_results),
0.5,
f"http://example/{motion_id}",
),
)
finally:
conn.close()
# Run extraction
res = extract_mp_votes(db_path=str(db_file))
# Expected MP rows: count keys that contain a comma in fixtures
expected_mp_count = 0
for item in fixtures:
for k in item.get("voting_results", {}).keys():
if "," in k:
expected_mp_count += 1
assert res["mp_rows_inserted"] == expected_mp_count
assert res["motions_skipped"] == 0
# Verify mp_votes table contains only rows with comma in mp_name and count matches
conn = duckdb.connect(str(db_file))
try:
rows = conn.execute("SELECT mp_name FROM mp_votes").fetchall()
finally:
conn.close()
assert len(rows) == expected_mp_count
for (mp_name,) in rows:
assert "," in mp_name
# Running again should be idempotent: no new mp rows, motions_skipped > 0
res2 = extract_mp_votes(db_path=str(db_file))
assert res2["mp_rows_inserted"] == 0
assert res2["motions_skipped"] > 0