- Fix match_mps_for_votes: 'Geen stem'/'no vote' now normalize to None (skipped), not 'afwezig' — so unanswered questions don't inflate overlap count - Add 5 additional tests to test_match_mps.py: zero-overlap exclusion, empty input validation, Geen stem overlap skip, excluded motions respected - Add tests/test_explorer_quiz.py: builder import smoke test plus 3 real-DB end-to-end scenarios (unique match, indistinguishable MPs, discriminating question reduces candidate set) - Full suite: 73 passed, 2 skippedmain
parent
eb73275f32
commit
238d9e9ec2
@ -0,0 +1,158 @@ |
||||
"""Tests for build_mp_quiz_tab and related quiz DB integration. |
||||
|
||||
Task 2.1 + Task 3.1: smoke test that the builder is exported and callable, |
||||
plus an end-to-end simulation of the quiz matching logic via real temp DuckDB. |
||||
""" |
||||
|
||||
import importlib |
||||
from pathlib import Path |
||||
|
||||
import duckdb |
||||
|
||||
from database import MotionDatabase |
||||
|
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Helpers |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
|
||||
def _create_motion(db: MotionDatabase, url: str, title: str, controversy: float = 0.5): |
||||
md = { |
||||
"title": title, |
||||
"description": title, |
||||
"date": "2023-01-01", |
||||
"policy_area": "test", |
||||
"voting_results": {}, |
||||
"winning_margin": controversy, |
||||
"layman_explanation": f"Uitleg over {title}", |
||||
"url": url, |
||||
} |
||||
ok = db.insert_motion(md) |
||||
assert ok, f"insert_motion failed for {url}" |
||||
conn = duckdb.connect(db.db_path) |
||||
row = conn.execute("SELECT id FROM motions WHERE url = ?", (url,)).fetchone() |
||||
conn.close() |
||||
assert row is not None |
||||
return int(row[0]) |
||||
|
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Task 2.1 — smoke test: builder exported and callable |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
|
||||
def test_builder_exists(): |
||||
"""build_mp_quiz_tab must be importable from explorer and callable.""" |
||||
mod = importlib.import_module("explorer") |
||||
assert hasattr(mod, "build_mp_quiz_tab"), "build_mp_quiz_tab not found in explorer" |
||||
assert callable(mod.build_mp_quiz_tab) |
||||
|
||||
|
||||
# --------------------------------------------------------------------------- |
||||
# Task 3.1 — end-to-end quiz matching simulation |
||||
# --------------------------------------------------------------------------- |
||||
|
||||
|
||||
def test_quiz_unique_match(tmp_path: Path): |
||||
"""Full quiz flow: 6 motions, 4 MPs — pre-filled votes produce a unique match.""" |
||||
db_path = str(tmp_path / "quiz_e2e.db") |
||||
db = MotionDatabase(db_path) |
||||
|
||||
# 6 motions |
||||
mids = [_create_motion(db, f"http://qe{i}", f"Motion {i}") for i in range(6)] |
||||
|
||||
# 4 MPs with distinct voting patterns |
||||
mpA = "Alpha, A." |
||||
mpB = "Beta, B." |
||||
mpC = "Gamma, G." |
||||
mpD = "Delta, D." |
||||
|
||||
patterns = { |
||||
mpA: ["voor", "voor", "voor", "voor", "voor", "voor"], # all voor |
||||
mpB: ["tegen", "tegen", "voor", "voor", "voor", "voor"], # 2 tegen then 4 voor |
||||
mpC: ["voor", "tegen", "voor", "tegen", "voor", "tegen"], # alternating |
||||
mpD: ["tegen", "tegen", "tegen", "tegen", "tegen", "voor"], # mostly tegen |
||||
} |
||||
|
||||
for idx, mid in enumerate(mids): |
||||
for mp, votes in patterns.items(): |
||||
db.insert_mp_vote(mid, mp, votes[idx], "2023-01-01") |
||||
|
||||
# User votes matching mpA exactly (all Voor) |
||||
user_votes = {mid: "Voor" for mid in mids} |
||||
results = db.match_mps_for_votes(user_votes, limit=50) |
||||
|
||||
assert results, "Expected ranked results" |
||||
top = results[0] |
||||
assert top["mp_name"] == mpA |
||||
assert top["agreement_pct"] == 100.0 |
||||
|
||||
# Unique match: only mpA should be at 100% |
||||
top_matches = [r for r in results if r["agreement_pct"] == top["agreement_pct"]] |
||||
assert len(top_matches) == 1, f"Expected unique top match, got {top_matches}" |
||||
|
||||
|
||||
def test_quiz_indistinguishable_mps(tmp_path: Path): |
||||
"""When two MPs vote identically, both appear at the top with same agreement_pct.""" |
||||
db_path = str(tmp_path / "quiz_indist.db") |
||||
db = MotionDatabase(db_path) |
||||
|
||||
mids = [ |
||||
_create_motion(db, f"http://ind{i}", f"Indist Motion {i}") for i in range(3) |
||||
] |
||||
|
||||
mpA = "Alice, A." |
||||
mpB = "Bob, B." |
||||
|
||||
# Both vote identically |
||||
for mid in mids: |
||||
db.insert_mp_vote(mid, mpA, "voor", "2023-01-01") |
||||
db.insert_mp_vote(mid, mpB, "voor", "2023-01-01") |
||||
|
||||
user_votes = {mid: "Voor" for mid in mids} |
||||
results = db.match_mps_for_votes(user_votes, limit=50) |
||||
|
||||
assert len(results) == 2, "Both identical MPs should appear" |
||||
assert results[0]["agreement_pct"] == results[1]["agreement_pct"] == 100.0 |
||||
|
||||
|
||||
def test_quiz_discriminating_reduces_candidates(tmp_path: Path): |
||||
"""After one discriminating question, candidate set should shrink.""" |
||||
db_path = str(tmp_path / "quiz_disc.db") |
||||
db = MotionDatabase(db_path) |
||||
|
||||
# 4 motions: motion0 splits mpA from mpB/mpC |
||||
mids = [_create_motion(db, f"http://disc{i}", f"Disc Motion {i}") for i in range(4)] |
||||
|
||||
mpA = "Alpha, A." |
||||
mpB = "Beta, B." |
||||
mpC = "Gamma, G." |
||||
|
||||
# motion0 splits; motions 1-3 they all agree |
||||
db.insert_mp_vote(mids[0], mpA, "voor", "2023-01-01") |
||||
db.insert_mp_vote(mids[0], mpB, "tegen", "2023-01-01") |
||||
db.insert_mp_vote(mids[0], mpC, "tegen", "2023-01-01") |
||||
|
||||
for i in range(1, 4): |
||||
for mp in (mpA, mpB, mpC): |
||||
db.insert_mp_vote(mids[i], mp, "voor", "2023-01-01") |
||||
|
||||
# All three agree on motions 1-3; choose_discriminating_motions should pick motion0 |
||||
all_mps = [mpA, mpB, mpC] |
||||
chosen = db.choose_discriminating_motions(all_mps, excluded_motion_ids=[], k=1) |
||||
assert chosen[0] == mids[0], ( |
||||
f"Expected motion {mids[0]} as discriminating, got {chosen}" |
||||
) |
||||
|
||||
# After user answers Voor on motion0, only mpA should rank at 100% |
||||
user_votes = {mids[0]: "Voor"} |
||||
results = db.match_mps_for_votes(user_votes, limit=50) |
||||
top = results[0] |
||||
assert top["mp_name"] == mpA |
||||
assert top["agreement_pct"] == 100.0 |
||||
|
||||
# mpB and mpC should be at 0% |
||||
for r in results: |
||||
if r["mp_name"] in (mpB, mpC): |
||||
assert r["agreement_pct"] == 0.0 |
||||
@ -0,0 +1,207 @@ |
||||
import duckdb |
||||
import json |
||||
import os |
||||
from pathlib import Path |
||||
|
||||
from database import MotionDatabase |
||||
|
||||
|
||||
def _create_motion_and_get_id( |
||||
db: MotionDatabase, url: str, title: str, layman: str = "x" |
||||
): |
||||
md = { |
||||
"title": title, |
||||
"description": title, |
||||
"date": "2023-01-01", |
||||
"policy_area": "test", |
||||
"voting_results": {}, |
||||
"winning_margin": 0.5, |
||||
"layman_explanation": layman, |
||||
"url": url, |
||||
} |
||||
ok = db.insert_motion(md) |
||||
assert ok, "insert_motion failed" |
||||
conn = duckdb.connect(db.db_path) |
||||
row = conn.execute("SELECT id FROM motions WHERE url = ?", (url,)).fetchone() |
||||
conn.close() |
||||
assert row is not None, "couldn't find inserted motion" |
||||
return int(row[0]) |
||||
|
||||
|
||||
def test_match_mps_basic(tmp_path: Path): |
||||
db_path = str(tmp_path / "test_motions.db") |
||||
db = MotionDatabase(db_path) |
||||
|
||||
# create 4 motions |
||||
mids = [] |
||||
for i in range(1, 5): |
||||
mids.append(_create_motion_and_get_id(db, f"http://m{i}", f"Motion {i}")) |
||||
|
||||
# MPs |
||||
mpA = "Alpha, A." |
||||
mpB = "Beta, B." |
||||
mpC = "Gamma, G." |
||||
|
||||
# Voting patterns (motions 1..4) |
||||
# A: v v t v (3/4) |
||||
# B: t t t t (0/4) |
||||
# C: v v v v (4/4) |
||||
votes = { |
||||
mpA: ["voor", "voor", "tegen", "voor"], |
||||
mpB: ["tegen", "tegen", "tegen", "tegen"], |
||||
mpC: ["voor", "voor", "voor", "voor"], |
||||
} |
||||
|
||||
for idx, mid in enumerate(mids): |
||||
for mp_name, vlist in votes.items(): |
||||
db.insert_mp_vote( |
||||
motion_id=mid, |
||||
mp_name=mp_name, |
||||
vote=vlist[idx], |
||||
date="2023-01-01", |
||||
party=None, |
||||
) |
||||
|
||||
# User votes matching Gamma exactly |
||||
user_votes = {mids[0]: "Voor", mids[1]: "Voor", mids[2]: "Voor", mids[3]: "Voor"} |
||||
|
||||
results = db.match_mps_for_votes(user_votes, limit=10) |
||||
assert results, "No results returned" |
||||
|
||||
# Top candidate should be Gamma |
||||
top = results[0] |
||||
assert top["mp_name"] == mpC |
||||
assert top["matched"] == 4 |
||||
assert top["overlap"] == 4 |
||||
assert top["agreement_pct"] == 100.0 |
||||
|
||||
# Check Alpha is second with 3 matched |
||||
names = [r["mp_name"] for r in results] |
||||
assert mpA in names |
||||
a = next(r for r in results if r["mp_name"] == mpA) |
||||
assert a["matched"] == 3 |
||||
assert a["overlap"] == 4 |
||||
assert a["agreement_pct"] == 75.0 |
||||
|
||||
|
||||
def test_choose_discriminating_motions(tmp_path: Path): |
||||
db_path = str(tmp_path / "test_motions2.db") |
||||
db = MotionDatabase(db_path) |
||||
|
||||
# create 3 motions |
||||
mids = [] |
||||
for i in range(1, 4): |
||||
mids.append(_create_motion_and_get_id(db, f"http://d{i}", f"DMotion {i}")) |
||||
|
||||
mpA = "Alice, A." |
||||
mpB = "Bob, B." |
||||
mpC = "Carol, C." |
||||
|
||||
# Votes: motion1 splits A vs B/C |
||||
# motion1: A=voor, B=tegen, C=tegen |
||||
# motion2: all voor |
||||
# motion3: all tegen |
||||
db.insert_mp_vote(mids[0], mpA, "voor", "2023-01-01") |
||||
db.insert_mp_vote(mids[0], mpB, "tegen", "2023-01-01") |
||||
db.insert_mp_vote(mids[0], mpC, "tegen", "2023-01-01") |
||||
|
||||
for mp in (mpA, mpB, mpC): |
||||
db.insert_mp_vote(mids[1], mp, "voor", "2023-01-01") |
||||
db.insert_mp_vote(mids[2], mp, "tegen", "2023-01-01") |
||||
|
||||
candidates = [mpA, mpB, mpC] |
||||
chosen = db.choose_discriminating_motions(candidates, excluded_motion_ids=[], k=1) |
||||
assert chosen, "No discriminating motion returned" |
||||
# best splitter should be motion1 (mids[0]) |
||||
assert chosen[0] == mids[0] |
||||
|
||||
|
||||
def test_match_excludes_zero_overlap(tmp_path: Path): |
||||
"""MPs who voted on none of the user's motions must not appear in results.""" |
||||
db_path = str(tmp_path / "zo.db") |
||||
db = MotionDatabase(db_path) |
||||
|
||||
mid1 = _create_motion_and_get_id(db, "http://zo1", "ZO Motion 1") |
||||
mid2 = _create_motion_and_get_id(db, "http://zo2", "ZO Motion 2") |
||||
|
||||
mp_overlap = "Overlap, O." |
||||
mp_noshow = "Noshow, N." |
||||
|
||||
db.insert_mp_vote(mid1, mp_overlap, "voor", "2023-01-01") |
||||
# mp_noshow only voted on mid2, not mid1 |
||||
db.insert_mp_vote(mid2, mp_noshow, "voor", "2023-01-01") |
||||
|
||||
results = db.match_mps_for_votes({mid1: "Voor"}, limit=10) |
||||
names = [r["mp_name"] for r in results] |
||||
|
||||
assert mp_overlap in names, "mp_overlap should appear" |
||||
assert mp_noshow not in names, "mp_noshow had no overlap and must be excluded" |
||||
|
||||
|
||||
def test_invalid_input_empty_user_votes(tmp_path: Path): |
||||
"""Passing an empty dict must raise ValueError.""" |
||||
db_path = str(tmp_path / "inv.db") |
||||
db = MotionDatabase(db_path) |
||||
|
||||
import pytest |
||||
|
||||
with pytest.raises(ValueError, match="non-empty"): |
||||
db.match_mps_for_votes({}) |
||||
|
||||
|
||||
def test_invalid_input_empty_candidates(tmp_path: Path): |
||||
"""Passing empty candidates to choose_discriminating_motions must raise ValueError.""" |
||||
db_path = str(tmp_path / "inv2.db") |
||||
db = MotionDatabase(db_path) |
||||
|
||||
import pytest |
||||
|
||||
with pytest.raises(ValueError): |
||||
db.choose_discriminating_motions([], excluded_motion_ids=[]) |
||||
|
||||
|
||||
def test_geen_stem_not_counted_in_overlap(tmp_path: Path): |
||||
"""'Geen stem' user votes should be skipped (not counted in overlap or matched).""" |
||||
db_path = str(tmp_path / "gs.db") |
||||
db = MotionDatabase(db_path) |
||||
|
||||
mid1 = _create_motion_and_get_id(db, "http://gs1", "GS Motion 1") |
||||
mid2 = _create_motion_and_get_id(db, "http://gs2", "GS Motion 2") |
||||
|
||||
mpA = "Alpha, A." |
||||
db.insert_mp_vote(mid1, mpA, "voor", "2023-01-01") |
||||
db.insert_mp_vote(mid2, mpA, "voor", "2023-01-01") |
||||
|
||||
# user says Geen stem on mid1 (skip), Voor on mid2 |
||||
results = db.match_mps_for_votes({mid1: "Geen stem", mid2: "Voor"}, limit=10) |
||||
assert results, "Expected at least one result" |
||||
r = results[0] |
||||
# overlap should only be 1 (mid2 counted, mid1 skipped) |
||||
assert r["overlap"] == 1 |
||||
assert r["matched"] == 1 |
||||
assert r["agreement_pct"] == 100.0 |
||||
|
||||
|
||||
def test_choose_excluded_motions_respected(tmp_path: Path): |
||||
"""Excluded motion ids must not be returned by choose_discriminating_motions.""" |
||||
db_path = str(tmp_path / "excl.db") |
||||
db = MotionDatabase(db_path) |
||||
|
||||
mid1 = _create_motion_and_get_id(db, "http://ex1", "EX Motion 1") |
||||
mid2 = _create_motion_and_get_id(db, "http://ex2", "EX Motion 2") |
||||
|
||||
mpA = "Alice, A." |
||||
mpB = "Bob, B." |
||||
|
||||
# mid1 splits them; mid2 they agree |
||||
db.insert_mp_vote(mid1, mpA, "voor", "2023-01-01") |
||||
db.insert_mp_vote(mid1, mpB, "tegen", "2023-01-01") |
||||
db.insert_mp_vote(mid2, mpA, "voor", "2023-01-01") |
||||
db.insert_mp_vote(mid2, mpB, "voor", "2023-01-01") |
||||
|
||||
# Exclude mid1 — only mid2 is available, should return mid2 |
||||
chosen = db.choose_discriminating_motions( |
||||
[mpA, mpB], excluded_motion_ids=[mid1], k=1 |
||||
) |
||||
assert mid1 not in chosen, "Excluded motion must not be returned" |
||||
assert mid2 in chosen, "mid2 should be chosen as only available motion" |
||||
Loading…
Reference in new issue