motief/tests/test_extract_mp_votes.py

import json
import duckdb
import logging

from pipeline.extract_mp_votes import extract_mp_votes
from database import MotionDatabase


def test_extract_mp_votes(tmp_path):
    db_file = tmp_path / "test.db"

    # Initialize database
    mdb = MotionDatabase(db_path=str(db_file))

    # Load fixture
    fixture_path = "tests/fixtures/sample_voting_results.json"
    with open(fixture_path, "r") as fh:
        fixtures = json.load(fh)

    # Insert motions into motions table
    conn = duckdb.connect(str(db_file))
    try:
        for item in fixtures:
            motion_id = item.get("motion_id")
            date = item.get("date")
            voting_results = item.get("voting_results")

            conn.execute(
                """
                INSERT INTO motions (id, title, description, date, policy_area, voting_results, winning_margin, url)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                """,
                (
                    motion_id,
                    f"Test Motion {motion_id}",
                    "",
                    date,
                    "Test",
                    json.dumps(voting_results),
                    0.5,
                    f"http://example/{motion_id}",
                ),
            )
    finally:
        conn.close()

    # Run extraction
    res = extract_mp_votes(db_path=str(db_file))

    # Expected MP rows: count keys that contain a comma in fixtures
    expected_mp_count = 0
    for item in fixtures:
        for k in item.get("voting_results", {}).keys():
            if "," in k:
                expected_mp_count += 1

    assert res["mp_rows_inserted"] == expected_mp_count
    assert res["motions_skipped"] == 0

    # Verify mp_votes table contains only rows with comma in mp_name and count matches
    conn = duckdb.connect(str(db_file))
    try:
        rows = conn.execute("SELECT mp_name FROM mp_votes").fetchall()
    finally:
        conn.close()

    assert len(rows) == expected_mp_count
    for (mp_name,) in rows:
        assert "," in mp_name

    # Running again should be idempotent: no new mp rows, motions_skipped > 0
    res2 = extract_mp_votes(db_path=str(db_file))
    assert res2["mp_rows_inserted"] == 0
    assert res2["motions_skipped"] > 0