- Fix match_mps_for_votes: 'Geen stem'/'no vote' now normalize to None (skipped), not 'afwezig' — so unanswered questions don't inflate overlap count - Add 5 additional tests to test_match_mps.py: zero-overlap exclusion, empty input validation, Geen stem overlap skip, excluded motions respected - Add tests/test_explorer_quiz.py: builder import smoke test plus 3 real-DB end-to-end scenarios (unique match, indistinguishable MPs, discriminating question reduces candidate set) - Full suite: 73 passed, 2 skippedmain
parent
eb73275f32
commit
238d9e9ec2
@ -0,0 +1,158 @@ |
|||||||
|
"""Tests for build_mp_quiz_tab and related quiz DB integration. |
||||||
|
|
||||||
|
Task 2.1 + Task 3.1: smoke test that the builder is exported and callable, |
||||||
|
plus an end-to-end simulation of the quiz matching logic via real temp DuckDB. |
||||||
|
""" |
||||||
|
|
||||||
|
import importlib |
||||||
|
from pathlib import Path |
||||||
|
|
||||||
|
import duckdb |
||||||
|
|
||||||
|
from database import MotionDatabase |
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- |
||||||
|
# Helpers |
||||||
|
# --------------------------------------------------------------------------- |
||||||
|
|
||||||
|
|
||||||
|
def _create_motion(db: MotionDatabase, url: str, title: str, controversy: float = 0.5): |
||||||
|
md = { |
||||||
|
"title": title, |
||||||
|
"description": title, |
||||||
|
"date": "2023-01-01", |
||||||
|
"policy_area": "test", |
||||||
|
"voting_results": {}, |
||||||
|
"winning_margin": controversy, |
||||||
|
"layman_explanation": f"Uitleg over {title}", |
||||||
|
"url": url, |
||||||
|
} |
||||||
|
ok = db.insert_motion(md) |
||||||
|
assert ok, f"insert_motion failed for {url}" |
||||||
|
conn = duckdb.connect(db.db_path) |
||||||
|
row = conn.execute("SELECT id FROM motions WHERE url = ?", (url,)).fetchone() |
||||||
|
conn.close() |
||||||
|
assert row is not None |
||||||
|
return int(row[0]) |
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- |
||||||
|
# Task 2.1 — smoke test: builder exported and callable |
||||||
|
# --------------------------------------------------------------------------- |
||||||
|
|
||||||
|
|
||||||
|
def test_builder_exists(): |
||||||
|
"""build_mp_quiz_tab must be importable from explorer and callable.""" |
||||||
|
mod = importlib.import_module("explorer") |
||||||
|
assert hasattr(mod, "build_mp_quiz_tab"), "build_mp_quiz_tab not found in explorer" |
||||||
|
assert callable(mod.build_mp_quiz_tab) |
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------- |
||||||
|
# Task 3.1 — end-to-end quiz matching simulation |
||||||
|
# --------------------------------------------------------------------------- |
||||||
|
|
||||||
|
|
||||||
|
def test_quiz_unique_match(tmp_path: Path): |
||||||
|
"""Full quiz flow: 6 motions, 4 MPs — pre-filled votes produce a unique match.""" |
||||||
|
db_path = str(tmp_path / "quiz_e2e.db") |
||||||
|
db = MotionDatabase(db_path) |
||||||
|
|
||||||
|
# 6 motions |
||||||
|
mids = [_create_motion(db, f"http://qe{i}", f"Motion {i}") for i in range(6)] |
||||||
|
|
||||||
|
# 4 MPs with distinct voting patterns |
||||||
|
mpA = "Alpha, A." |
||||||
|
mpB = "Beta, B." |
||||||
|
mpC = "Gamma, G." |
||||||
|
mpD = "Delta, D." |
||||||
|
|
||||||
|
patterns = { |
||||||
|
mpA: ["voor", "voor", "voor", "voor", "voor", "voor"], # all voor |
||||||
|
mpB: ["tegen", "tegen", "voor", "voor", "voor", "voor"], # 2 tegen then 4 voor |
||||||
|
mpC: ["voor", "tegen", "voor", "tegen", "voor", "tegen"], # alternating |
||||||
|
mpD: ["tegen", "tegen", "tegen", "tegen", "tegen", "voor"], # mostly tegen |
||||||
|
} |
||||||
|
|
||||||
|
for idx, mid in enumerate(mids): |
||||||
|
for mp, votes in patterns.items(): |
||||||
|
db.insert_mp_vote(mid, mp, votes[idx], "2023-01-01") |
||||||
|
|
||||||
|
# User votes matching mpA exactly (all Voor) |
||||||
|
user_votes = {mid: "Voor" for mid in mids} |
||||||
|
results = db.match_mps_for_votes(user_votes, limit=50) |
||||||
|
|
||||||
|
assert results, "Expected ranked results" |
||||||
|
top = results[0] |
||||||
|
assert top["mp_name"] == mpA |
||||||
|
assert top["agreement_pct"] == 100.0 |
||||||
|
|
||||||
|
# Unique match: only mpA should be at 100% |
||||||
|
top_matches = [r for r in results if r["agreement_pct"] == top["agreement_pct"]] |
||||||
|
assert len(top_matches) == 1, f"Expected unique top match, got {top_matches}" |
||||||
|
|
||||||
|
|
||||||
|
def test_quiz_indistinguishable_mps(tmp_path: Path): |
||||||
|
"""When two MPs vote identically, both appear at the top with same agreement_pct.""" |
||||||
|
db_path = str(tmp_path / "quiz_indist.db") |
||||||
|
db = MotionDatabase(db_path) |
||||||
|
|
||||||
|
mids = [ |
||||||
|
_create_motion(db, f"http://ind{i}", f"Indist Motion {i}") for i in range(3) |
||||||
|
] |
||||||
|
|
||||||
|
mpA = "Alice, A." |
||||||
|
mpB = "Bob, B." |
||||||
|
|
||||||
|
# Both vote identically |
||||||
|
for mid in mids: |
||||||
|
db.insert_mp_vote(mid, mpA, "voor", "2023-01-01") |
||||||
|
db.insert_mp_vote(mid, mpB, "voor", "2023-01-01") |
||||||
|
|
||||||
|
user_votes = {mid: "Voor" for mid in mids} |
||||||
|
results = db.match_mps_for_votes(user_votes, limit=50) |
||||||
|
|
||||||
|
assert len(results) == 2, "Both identical MPs should appear" |
||||||
|
assert results[0]["agreement_pct"] == results[1]["agreement_pct"] == 100.0 |
||||||
|
|
||||||
|
|
||||||
|
def test_quiz_discriminating_reduces_candidates(tmp_path: Path): |
||||||
|
"""After one discriminating question, candidate set should shrink.""" |
||||||
|
db_path = str(tmp_path / "quiz_disc.db") |
||||||
|
db = MotionDatabase(db_path) |
||||||
|
|
||||||
|
# 4 motions: motion0 splits mpA from mpB/mpC |
||||||
|
mids = [_create_motion(db, f"http://disc{i}", f"Disc Motion {i}") for i in range(4)] |
||||||
|
|
||||||
|
mpA = "Alpha, A." |
||||||
|
mpB = "Beta, B." |
||||||
|
mpC = "Gamma, G." |
||||||
|
|
||||||
|
# motion0 splits; motions 1-3 they all agree |
||||||
|
db.insert_mp_vote(mids[0], mpA, "voor", "2023-01-01") |
||||||
|
db.insert_mp_vote(mids[0], mpB, "tegen", "2023-01-01") |
||||||
|
db.insert_mp_vote(mids[0], mpC, "tegen", "2023-01-01") |
||||||
|
|
||||||
|
for i in range(1, 4): |
||||||
|
for mp in (mpA, mpB, mpC): |
||||||
|
db.insert_mp_vote(mids[i], mp, "voor", "2023-01-01") |
||||||
|
|
||||||
|
# All three agree on motions 1-3; choose_discriminating_motions should pick motion0 |
||||||
|
all_mps = [mpA, mpB, mpC] |
||||||
|
chosen = db.choose_discriminating_motions(all_mps, excluded_motion_ids=[], k=1) |
||||||
|
assert chosen[0] == mids[0], ( |
||||||
|
f"Expected motion {mids[0]} as discriminating, got {chosen}" |
||||||
|
) |
||||||
|
|
||||||
|
# After user answers Voor on motion0, only mpA should rank at 100% |
||||||
|
user_votes = {mids[0]: "Voor"} |
||||||
|
results = db.match_mps_for_votes(user_votes, limit=50) |
||||||
|
top = results[0] |
||||||
|
assert top["mp_name"] == mpA |
||||||
|
assert top["agreement_pct"] == 100.0 |
||||||
|
|
||||||
|
# mpB and mpC should be at 0% |
||||||
|
for r in results: |
||||||
|
if r["mp_name"] in (mpB, mpC): |
||||||
|
assert r["agreement_pct"] == 0.0 |
||||||
@ -0,0 +1,207 @@ |
|||||||
|
import duckdb |
||||||
|
import json |
||||||
|
import os |
||||||
|
from pathlib import Path |
||||||
|
|
||||||
|
from database import MotionDatabase |
||||||
|
|
||||||
|
|
||||||
|
def _create_motion_and_get_id( |
||||||
|
db: MotionDatabase, url: str, title: str, layman: str = "x" |
||||||
|
): |
||||||
|
md = { |
||||||
|
"title": title, |
||||||
|
"description": title, |
||||||
|
"date": "2023-01-01", |
||||||
|
"policy_area": "test", |
||||||
|
"voting_results": {}, |
||||||
|
"winning_margin": 0.5, |
||||||
|
"layman_explanation": layman, |
||||||
|
"url": url, |
||||||
|
} |
||||||
|
ok = db.insert_motion(md) |
||||||
|
assert ok, "insert_motion failed" |
||||||
|
conn = duckdb.connect(db.db_path) |
||||||
|
row = conn.execute("SELECT id FROM motions WHERE url = ?", (url,)).fetchone() |
||||||
|
conn.close() |
||||||
|
assert row is not None, "couldn't find inserted motion" |
||||||
|
return int(row[0]) |
||||||
|
|
||||||
|
|
||||||
|
def test_match_mps_basic(tmp_path: Path): |
||||||
|
db_path = str(tmp_path / "test_motions.db") |
||||||
|
db = MotionDatabase(db_path) |
||||||
|
|
||||||
|
# create 4 motions |
||||||
|
mids = [] |
||||||
|
for i in range(1, 5): |
||||||
|
mids.append(_create_motion_and_get_id(db, f"http://m{i}", f"Motion {i}")) |
||||||
|
|
||||||
|
# MPs |
||||||
|
mpA = "Alpha, A." |
||||||
|
mpB = "Beta, B." |
||||||
|
mpC = "Gamma, G." |
||||||
|
|
||||||
|
# Voting patterns (motions 1..4) |
||||||
|
# A: v v t v (3/4) |
||||||
|
# B: t t t t (0/4) |
||||||
|
# C: v v v v (4/4) |
||||||
|
votes = { |
||||||
|
mpA: ["voor", "voor", "tegen", "voor"], |
||||||
|
mpB: ["tegen", "tegen", "tegen", "tegen"], |
||||||
|
mpC: ["voor", "voor", "voor", "voor"], |
||||||
|
} |
||||||
|
|
||||||
|
for idx, mid in enumerate(mids): |
||||||
|
for mp_name, vlist in votes.items(): |
||||||
|
db.insert_mp_vote( |
||||||
|
motion_id=mid, |
||||||
|
mp_name=mp_name, |
||||||
|
vote=vlist[idx], |
||||||
|
date="2023-01-01", |
||||||
|
party=None, |
||||||
|
) |
||||||
|
|
||||||
|
# User votes matching Gamma exactly |
||||||
|
user_votes = {mids[0]: "Voor", mids[1]: "Voor", mids[2]: "Voor", mids[3]: "Voor"} |
||||||
|
|
||||||
|
results = db.match_mps_for_votes(user_votes, limit=10) |
||||||
|
assert results, "No results returned" |
||||||
|
|
||||||
|
# Top candidate should be Gamma |
||||||
|
top = results[0] |
||||||
|
assert top["mp_name"] == mpC |
||||||
|
assert top["matched"] == 4 |
||||||
|
assert top["overlap"] == 4 |
||||||
|
assert top["agreement_pct"] == 100.0 |
||||||
|
|
||||||
|
# Check Alpha is second with 3 matched |
||||||
|
names = [r["mp_name"] for r in results] |
||||||
|
assert mpA in names |
||||||
|
a = next(r for r in results if r["mp_name"] == mpA) |
||||||
|
assert a["matched"] == 3 |
||||||
|
assert a["overlap"] == 4 |
||||||
|
assert a["agreement_pct"] == 75.0 |
||||||
|
|
||||||
|
|
||||||
|
def test_choose_discriminating_motions(tmp_path: Path): |
||||||
|
db_path = str(tmp_path / "test_motions2.db") |
||||||
|
db = MotionDatabase(db_path) |
||||||
|
|
||||||
|
# create 3 motions |
||||||
|
mids = [] |
||||||
|
for i in range(1, 4): |
||||||
|
mids.append(_create_motion_and_get_id(db, f"http://d{i}", f"DMotion {i}")) |
||||||
|
|
||||||
|
mpA = "Alice, A." |
||||||
|
mpB = "Bob, B." |
||||||
|
mpC = "Carol, C." |
||||||
|
|
||||||
|
# Votes: motion1 splits A vs B/C |
||||||
|
# motion1: A=voor, B=tegen, C=tegen |
||||||
|
# motion2: all voor |
||||||
|
# motion3: all tegen |
||||||
|
db.insert_mp_vote(mids[0], mpA, "voor", "2023-01-01") |
||||||
|
db.insert_mp_vote(mids[0], mpB, "tegen", "2023-01-01") |
||||||
|
db.insert_mp_vote(mids[0], mpC, "tegen", "2023-01-01") |
||||||
|
|
||||||
|
for mp in (mpA, mpB, mpC): |
||||||
|
db.insert_mp_vote(mids[1], mp, "voor", "2023-01-01") |
||||||
|
db.insert_mp_vote(mids[2], mp, "tegen", "2023-01-01") |
||||||
|
|
||||||
|
candidates = [mpA, mpB, mpC] |
||||||
|
chosen = db.choose_discriminating_motions(candidates, excluded_motion_ids=[], k=1) |
||||||
|
assert chosen, "No discriminating motion returned" |
||||||
|
# best splitter should be motion1 (mids[0]) |
||||||
|
assert chosen[0] == mids[0] |
||||||
|
|
||||||
|
|
||||||
|
def test_match_excludes_zero_overlap(tmp_path: Path): |
||||||
|
"""MPs who voted on none of the user's motions must not appear in results.""" |
||||||
|
db_path = str(tmp_path / "zo.db") |
||||||
|
db = MotionDatabase(db_path) |
||||||
|
|
||||||
|
mid1 = _create_motion_and_get_id(db, "http://zo1", "ZO Motion 1") |
||||||
|
mid2 = _create_motion_and_get_id(db, "http://zo2", "ZO Motion 2") |
||||||
|
|
||||||
|
mp_overlap = "Overlap, O." |
||||||
|
mp_noshow = "Noshow, N." |
||||||
|
|
||||||
|
db.insert_mp_vote(mid1, mp_overlap, "voor", "2023-01-01") |
||||||
|
# mp_noshow only voted on mid2, not mid1 |
||||||
|
db.insert_mp_vote(mid2, mp_noshow, "voor", "2023-01-01") |
||||||
|
|
||||||
|
results = db.match_mps_for_votes({mid1: "Voor"}, limit=10) |
||||||
|
names = [r["mp_name"] for r in results] |
||||||
|
|
||||||
|
assert mp_overlap in names, "mp_overlap should appear" |
||||||
|
assert mp_noshow not in names, "mp_noshow had no overlap and must be excluded" |
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_input_empty_user_votes(tmp_path: Path): |
||||||
|
"""Passing an empty dict must raise ValueError.""" |
||||||
|
db_path = str(tmp_path / "inv.db") |
||||||
|
db = MotionDatabase(db_path) |
||||||
|
|
||||||
|
import pytest |
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="non-empty"): |
||||||
|
db.match_mps_for_votes({}) |
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_input_empty_candidates(tmp_path: Path): |
||||||
|
"""Passing empty candidates to choose_discriminating_motions must raise ValueError.""" |
||||||
|
db_path = str(tmp_path / "inv2.db") |
||||||
|
db = MotionDatabase(db_path) |
||||||
|
|
||||||
|
import pytest |
||||||
|
|
||||||
|
with pytest.raises(ValueError): |
||||||
|
db.choose_discriminating_motions([], excluded_motion_ids=[]) |
||||||
|
|
||||||
|
|
||||||
|
def test_geen_stem_not_counted_in_overlap(tmp_path: Path): |
||||||
|
"""'Geen stem' user votes should be skipped (not counted in overlap or matched).""" |
||||||
|
db_path = str(tmp_path / "gs.db") |
||||||
|
db = MotionDatabase(db_path) |
||||||
|
|
||||||
|
mid1 = _create_motion_and_get_id(db, "http://gs1", "GS Motion 1") |
||||||
|
mid2 = _create_motion_and_get_id(db, "http://gs2", "GS Motion 2") |
||||||
|
|
||||||
|
mpA = "Alpha, A." |
||||||
|
db.insert_mp_vote(mid1, mpA, "voor", "2023-01-01") |
||||||
|
db.insert_mp_vote(mid2, mpA, "voor", "2023-01-01") |
||||||
|
|
||||||
|
# user says Geen stem on mid1 (skip), Voor on mid2 |
||||||
|
results = db.match_mps_for_votes({mid1: "Geen stem", mid2: "Voor"}, limit=10) |
||||||
|
assert results, "Expected at least one result" |
||||||
|
r = results[0] |
||||||
|
# overlap should only be 1 (mid2 counted, mid1 skipped) |
||||||
|
assert r["overlap"] == 1 |
||||||
|
assert r["matched"] == 1 |
||||||
|
assert r["agreement_pct"] == 100.0 |
||||||
|
|
||||||
|
|
||||||
|
def test_choose_excluded_motions_respected(tmp_path: Path): |
||||||
|
"""Excluded motion ids must not be returned by choose_discriminating_motions.""" |
||||||
|
db_path = str(tmp_path / "excl.db") |
||||||
|
db = MotionDatabase(db_path) |
||||||
|
|
||||||
|
mid1 = _create_motion_and_get_id(db, "http://ex1", "EX Motion 1") |
||||||
|
mid2 = _create_motion_and_get_id(db, "http://ex2", "EX Motion 2") |
||||||
|
|
||||||
|
mpA = "Alice, A." |
||||||
|
mpB = "Bob, B." |
||||||
|
|
||||||
|
# mid1 splits them; mid2 they agree |
||||||
|
db.insert_mp_vote(mid1, mpA, "voor", "2023-01-01") |
||||||
|
db.insert_mp_vote(mid1, mpB, "tegen", "2023-01-01") |
||||||
|
db.insert_mp_vote(mid2, mpA, "voor", "2023-01-01") |
||||||
|
db.insert_mp_vote(mid2, mpB, "voor", "2023-01-01") |
||||||
|
|
||||||
|
# Exclude mid1 — only mid2 is available, should return mid2 |
||||||
|
chosen = db.choose_discriminating_motions( |
||||||
|
[mpA, mpB], excluded_motion_ids=[mid1], k=1 |
||||||
|
) |
||||||
|
assert mid1 not in chosen, "Excluded motion must not be returned" |
||||||
|
assert mid2 in chosen, "mid2 should be chosen as only available motion" |
||||||
Loading…
Reference in new issue