diff --git a/database.py b/database.py index f068feb..dbc1efa 100644 --- a/database.py +++ b/database.py @@ -691,7 +691,9 @@ class MotionDatabase: return "tegen" if s_low in ("onthouden", "abstain", "abstained"): return "onthouden" - if s_low in ("geen stem", "afwezig", "absent", "no vote"): + if s_low in ("geen stem", "no vote"): + return None # user chose not to answer — skip entirely + if s_low in ("afwezig", "absent"): return "afwezig" # already canonical? if s_low in ("voor", "tegen", "onthouden", "afwezig"): diff --git a/tests/test_explorer_quiz.py b/tests/test_explorer_quiz.py new file mode 100644 index 0000000..7e89d9a --- /dev/null +++ b/tests/test_explorer_quiz.py @@ -0,0 +1,158 @@ +"""Tests for build_mp_quiz_tab and related quiz DB integration. + +Task 2.1 + Task 3.1: smoke test that the builder is exported and callable, +plus an end-to-end simulation of the quiz matching logic via real temp DuckDB. +""" + +import importlib +from pathlib import Path + +import duckdb + +from database import MotionDatabase + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _create_motion(db: MotionDatabase, url: str, title: str, controversy: float = 0.5): + md = { + "title": title, + "description": title, + "date": "2023-01-01", + "policy_area": "test", + "voting_results": {}, + "winning_margin": controversy, + "layman_explanation": f"Uitleg over {title}", + "url": url, + } + ok = db.insert_motion(md) + assert ok, f"insert_motion failed for {url}" + conn = duckdb.connect(db.db_path) + row = conn.execute("SELECT id FROM motions WHERE url = ?", (url,)).fetchone() + conn.close() + assert row is not None + return int(row[0]) + + +# --------------------------------------------------------------------------- +# Task 2.1 — smoke test: builder exported and callable +# --------------------------------------------------------------------------- + + +def test_builder_exists(): + """build_mp_quiz_tab must be importable from explorer and callable.""" + mod = importlib.import_module("explorer") + assert hasattr(mod, "build_mp_quiz_tab"), "build_mp_quiz_tab not found in explorer" + assert callable(mod.build_mp_quiz_tab) + + +# --------------------------------------------------------------------------- +# Task 3.1 — end-to-end quiz matching simulation +# --------------------------------------------------------------------------- + + +def test_quiz_unique_match(tmp_path: Path): + """Full quiz flow: 6 motions, 4 MPs — pre-filled votes produce a unique match.""" + db_path = str(tmp_path / "quiz_e2e.db") + db = MotionDatabase(db_path) + + # 6 motions + mids = [_create_motion(db, f"http://qe{i}", f"Motion {i}") for i in range(6)] + + # 4 MPs with distinct voting patterns + mpA = "Alpha, A." + mpB = "Beta, B." + mpC = "Gamma, G." + mpD = "Delta, D." + + patterns = { + mpA: ["voor", "voor", "voor", "voor", "voor", "voor"], # all voor + mpB: ["tegen", "tegen", "voor", "voor", "voor", "voor"], # 2 tegen then 4 voor + mpC: ["voor", "tegen", "voor", "tegen", "voor", "tegen"], # alternating + mpD: ["tegen", "tegen", "tegen", "tegen", "tegen", "voor"], # mostly tegen + } + + for idx, mid in enumerate(mids): + for mp, votes in patterns.items(): + db.insert_mp_vote(mid, mp, votes[idx], "2023-01-01") + + # User votes matching mpA exactly (all Voor) + user_votes = {mid: "Voor" for mid in mids} + results = db.match_mps_for_votes(user_votes, limit=50) + + assert results, "Expected ranked results" + top = results[0] + assert top["mp_name"] == mpA + assert top["agreement_pct"] == 100.0 + + # Unique match: only mpA should be at 100% + top_matches = [r for r in results if r["agreement_pct"] == top["agreement_pct"]] + assert len(top_matches) == 1, f"Expected unique top match, got {top_matches}" + + +def test_quiz_indistinguishable_mps(tmp_path: Path): + """When two MPs vote identically, both appear at the top with same agreement_pct.""" + db_path = str(tmp_path / "quiz_indist.db") + db = MotionDatabase(db_path) + + mids = [ + _create_motion(db, f"http://ind{i}", f"Indist Motion {i}") for i in range(3) + ] + + mpA = "Alice, A." + mpB = "Bob, B." + + # Both vote identically + for mid in mids: + db.insert_mp_vote(mid, mpA, "voor", "2023-01-01") + db.insert_mp_vote(mid, mpB, "voor", "2023-01-01") + + user_votes = {mid: "Voor" for mid in mids} + results = db.match_mps_for_votes(user_votes, limit=50) + + assert len(results) == 2, "Both identical MPs should appear" + assert results[0]["agreement_pct"] == results[1]["agreement_pct"] == 100.0 + + +def test_quiz_discriminating_reduces_candidates(tmp_path: Path): + """After one discriminating question, candidate set should shrink.""" + db_path = str(tmp_path / "quiz_disc.db") + db = MotionDatabase(db_path) + + # 4 motions: motion0 splits mpA from mpB/mpC + mids = [_create_motion(db, f"http://disc{i}", f"Disc Motion {i}") for i in range(4)] + + mpA = "Alpha, A." + mpB = "Beta, B." + mpC = "Gamma, G." + + # motion0 splits; motions 1-3 they all agree + db.insert_mp_vote(mids[0], mpA, "voor", "2023-01-01") + db.insert_mp_vote(mids[0], mpB, "tegen", "2023-01-01") + db.insert_mp_vote(mids[0], mpC, "tegen", "2023-01-01") + + for i in range(1, 4): + for mp in (mpA, mpB, mpC): + db.insert_mp_vote(mids[i], mp, "voor", "2023-01-01") + + # All three agree on motions 1-3; choose_discriminating_motions should pick motion0 + all_mps = [mpA, mpB, mpC] + chosen = db.choose_discriminating_motions(all_mps, excluded_motion_ids=[], k=1) + assert chosen[0] == mids[0], ( + f"Expected motion {mids[0]} as discriminating, got {chosen}" + ) + + # After user answers Voor on motion0, only mpA should rank at 100% + user_votes = {mids[0]: "Voor"} + results = db.match_mps_for_votes(user_votes, limit=50) + top = results[0] + assert top["mp_name"] == mpA + assert top["agreement_pct"] == 100.0 + + # mpB and mpC should be at 0% + for r in results: + if r["mp_name"] in (mpB, mpC): + assert r["agreement_pct"] == 0.0 diff --git a/tests/test_match_mps.py b/tests/test_match_mps.py new file mode 100644 index 0000000..56a4aea --- /dev/null +++ b/tests/test_match_mps.py @@ -0,0 +1,207 @@ +import duckdb +import json +import os +from pathlib import Path + +from database import MotionDatabase + + +def _create_motion_and_get_id( + db: MotionDatabase, url: str, title: str, layman: str = "x" +): + md = { + "title": title, + "description": title, + "date": "2023-01-01", + "policy_area": "test", + "voting_results": {}, + "winning_margin": 0.5, + "layman_explanation": layman, + "url": url, + } + ok = db.insert_motion(md) + assert ok, "insert_motion failed" + conn = duckdb.connect(db.db_path) + row = conn.execute("SELECT id FROM motions WHERE url = ?", (url,)).fetchone() + conn.close() + assert row is not None, "couldn't find inserted motion" + return int(row[0]) + + +def test_match_mps_basic(tmp_path: Path): + db_path = str(tmp_path / "test_motions.db") + db = MotionDatabase(db_path) + + # create 4 motions + mids = [] + for i in range(1, 5): + mids.append(_create_motion_and_get_id(db, f"http://m{i}", f"Motion {i}")) + + # MPs + mpA = "Alpha, A." + mpB = "Beta, B." + mpC = "Gamma, G." + + # Voting patterns (motions 1..4) + # A: v v t v (3/4) + # B: t t t t (0/4) + # C: v v v v (4/4) + votes = { + mpA: ["voor", "voor", "tegen", "voor"], + mpB: ["tegen", "tegen", "tegen", "tegen"], + mpC: ["voor", "voor", "voor", "voor"], + } + + for idx, mid in enumerate(mids): + for mp_name, vlist in votes.items(): + db.insert_mp_vote( + motion_id=mid, + mp_name=mp_name, + vote=vlist[idx], + date="2023-01-01", + party=None, + ) + + # User votes matching Gamma exactly + user_votes = {mids[0]: "Voor", mids[1]: "Voor", mids[2]: "Voor", mids[3]: "Voor"} + + results = db.match_mps_for_votes(user_votes, limit=10) + assert results, "No results returned" + + # Top candidate should be Gamma + top = results[0] + assert top["mp_name"] == mpC + assert top["matched"] == 4 + assert top["overlap"] == 4 + assert top["agreement_pct"] == 100.0 + + # Check Alpha is second with 3 matched + names = [r["mp_name"] for r in results] + assert mpA in names + a = next(r for r in results if r["mp_name"] == mpA) + assert a["matched"] == 3 + assert a["overlap"] == 4 + assert a["agreement_pct"] == 75.0 + + +def test_choose_discriminating_motions(tmp_path: Path): + db_path = str(tmp_path / "test_motions2.db") + db = MotionDatabase(db_path) + + # create 3 motions + mids = [] + for i in range(1, 4): + mids.append(_create_motion_and_get_id(db, f"http://d{i}", f"DMotion {i}")) + + mpA = "Alice, A." + mpB = "Bob, B." + mpC = "Carol, C." + + # Votes: motion1 splits A vs B/C + # motion1: A=voor, B=tegen, C=tegen + # motion2: all voor + # motion3: all tegen + db.insert_mp_vote(mids[0], mpA, "voor", "2023-01-01") + db.insert_mp_vote(mids[0], mpB, "tegen", "2023-01-01") + db.insert_mp_vote(mids[0], mpC, "tegen", "2023-01-01") + + for mp in (mpA, mpB, mpC): + db.insert_mp_vote(mids[1], mp, "voor", "2023-01-01") + db.insert_mp_vote(mids[2], mp, "tegen", "2023-01-01") + + candidates = [mpA, mpB, mpC] + chosen = db.choose_discriminating_motions(candidates, excluded_motion_ids=[], k=1) + assert chosen, "No discriminating motion returned" + # best splitter should be motion1 (mids[0]) + assert chosen[0] == mids[0] + + +def test_match_excludes_zero_overlap(tmp_path: Path): + """MPs who voted on none of the user's motions must not appear in results.""" + db_path = str(tmp_path / "zo.db") + db = MotionDatabase(db_path) + + mid1 = _create_motion_and_get_id(db, "http://zo1", "ZO Motion 1") + mid2 = _create_motion_and_get_id(db, "http://zo2", "ZO Motion 2") + + mp_overlap = "Overlap, O." + mp_noshow = "Noshow, N." + + db.insert_mp_vote(mid1, mp_overlap, "voor", "2023-01-01") + # mp_noshow only voted on mid2, not mid1 + db.insert_mp_vote(mid2, mp_noshow, "voor", "2023-01-01") + + results = db.match_mps_for_votes({mid1: "Voor"}, limit=10) + names = [r["mp_name"] for r in results] + + assert mp_overlap in names, "mp_overlap should appear" + assert mp_noshow not in names, "mp_noshow had no overlap and must be excluded" + + +def test_invalid_input_empty_user_votes(tmp_path: Path): + """Passing an empty dict must raise ValueError.""" + db_path = str(tmp_path / "inv.db") + db = MotionDatabase(db_path) + + import pytest + + with pytest.raises(ValueError, match="non-empty"): + db.match_mps_for_votes({}) + + +def test_invalid_input_empty_candidates(tmp_path: Path): + """Passing empty candidates to choose_discriminating_motions must raise ValueError.""" + db_path = str(tmp_path / "inv2.db") + db = MotionDatabase(db_path) + + import pytest + + with pytest.raises(ValueError): + db.choose_discriminating_motions([], excluded_motion_ids=[]) + + +def test_geen_stem_not_counted_in_overlap(tmp_path: Path): + """'Geen stem' user votes should be skipped (not counted in overlap or matched).""" + db_path = str(tmp_path / "gs.db") + db = MotionDatabase(db_path) + + mid1 = _create_motion_and_get_id(db, "http://gs1", "GS Motion 1") + mid2 = _create_motion_and_get_id(db, "http://gs2", "GS Motion 2") + + mpA = "Alpha, A." + db.insert_mp_vote(mid1, mpA, "voor", "2023-01-01") + db.insert_mp_vote(mid2, mpA, "voor", "2023-01-01") + + # user says Geen stem on mid1 (skip), Voor on mid2 + results = db.match_mps_for_votes({mid1: "Geen stem", mid2: "Voor"}, limit=10) + assert results, "Expected at least one result" + r = results[0] + # overlap should only be 1 (mid2 counted, mid1 skipped) + assert r["overlap"] == 1 + assert r["matched"] == 1 + assert r["agreement_pct"] == 100.0 + + +def test_choose_excluded_motions_respected(tmp_path: Path): + """Excluded motion ids must not be returned by choose_discriminating_motions.""" + db_path = str(tmp_path / "excl.db") + db = MotionDatabase(db_path) + + mid1 = _create_motion_and_get_id(db, "http://ex1", "EX Motion 1") + mid2 = _create_motion_and_get_id(db, "http://ex2", "EX Motion 2") + + mpA = "Alice, A." + mpB = "Bob, B." + + # mid1 splits them; mid2 they agree + db.insert_mp_vote(mid1, mpA, "voor", "2023-01-01") + db.insert_mp_vote(mid1, mpB, "tegen", "2023-01-01") + db.insert_mp_vote(mid2, mpA, "voor", "2023-01-01") + db.insert_mp_vote(mid2, mpB, "voor", "2023-01-01") + + # Exclude mid1 — only mid2 is available, should return mid2 + chosen = db.choose_discriminating_motions( + [mpA, mpB], excluded_motion_ids=[mid1], k=1 + ) + assert mid1 not in chosen, "Excluded motion must not be returned" + assert mid2 in chosen, "mid2 should be chosen as only available motion"