test: add full quiz tab test suite and fix Geen stem normalization

- Fix match_mps_for_votes: 'Geen stem'/'no vote' now normalize to None (skipped),
  not 'afwezig' — so unanswered questions don't inflate overlap count
- Add 5 additional tests to test_match_mps.py: zero-overlap exclusion,
  empty input validation, Geen stem overlap skip, excluded motions respected
- Add tests/test_explorer_quiz.py: builder import smoke test plus 3 real-DB
  end-to-end scenarios (unique match, indistinguishable MPs, discriminating
  question reduces candidate set)
- Full suite: 73 passed, 2 skipped
main
Sven Geboers 1 month ago
parent eb73275f32
commit 238d9e9ec2
  1. 4
      database.py
  2. 158
      tests/test_explorer_quiz.py
  3. 207
      tests/test_match_mps.py

@ -691,7 +691,9 @@ class MotionDatabase:
return "tegen" return "tegen"
if s_low in ("onthouden", "abstain", "abstained"): if s_low in ("onthouden", "abstain", "abstained"):
return "onthouden" return "onthouden"
if s_low in ("geen stem", "afwezig", "absent", "no vote"): if s_low in ("geen stem", "no vote"):
return None # user chose not to answer — skip entirely
if s_low in ("afwezig", "absent"):
return "afwezig" return "afwezig"
# already canonical? # already canonical?
if s_low in ("voor", "tegen", "onthouden", "afwezig"): if s_low in ("voor", "tegen", "onthouden", "afwezig"):

@ -0,0 +1,158 @@
"""Tests for build_mp_quiz_tab and related quiz DB integration.
Task 2.1 + Task 3.1: smoke test that the builder is exported and callable,
plus an end-to-end simulation of the quiz matching logic via real temp DuckDB.
"""
import importlib
from pathlib import Path
import duckdb
from database import MotionDatabase
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _create_motion(db: MotionDatabase, url: str, title: str, controversy: float = 0.5):
md = {
"title": title,
"description": title,
"date": "2023-01-01",
"policy_area": "test",
"voting_results": {},
"winning_margin": controversy,
"layman_explanation": f"Uitleg over {title}",
"url": url,
}
ok = db.insert_motion(md)
assert ok, f"insert_motion failed for {url}"
conn = duckdb.connect(db.db_path)
row = conn.execute("SELECT id FROM motions WHERE url = ?", (url,)).fetchone()
conn.close()
assert row is not None
return int(row[0])
# ---------------------------------------------------------------------------
# Task 2.1 — smoke test: builder exported and callable
# ---------------------------------------------------------------------------
def test_builder_exists():
"""build_mp_quiz_tab must be importable from explorer and callable."""
mod = importlib.import_module("explorer")
assert hasattr(mod, "build_mp_quiz_tab"), "build_mp_quiz_tab not found in explorer"
assert callable(mod.build_mp_quiz_tab)
# ---------------------------------------------------------------------------
# Task 3.1 — end-to-end quiz matching simulation
# ---------------------------------------------------------------------------
def test_quiz_unique_match(tmp_path: Path):
"""Full quiz flow: 6 motions, 4 MPs — pre-filled votes produce a unique match."""
db_path = str(tmp_path / "quiz_e2e.db")
db = MotionDatabase(db_path)
# 6 motions
mids = [_create_motion(db, f"http://qe{i}", f"Motion {i}") for i in range(6)]
# 4 MPs with distinct voting patterns
mpA = "Alpha, A."
mpB = "Beta, B."
mpC = "Gamma, G."
mpD = "Delta, D."
patterns = {
mpA: ["voor", "voor", "voor", "voor", "voor", "voor"], # all voor
mpB: ["tegen", "tegen", "voor", "voor", "voor", "voor"], # 2 tegen then 4 voor
mpC: ["voor", "tegen", "voor", "tegen", "voor", "tegen"], # alternating
mpD: ["tegen", "tegen", "tegen", "tegen", "tegen", "voor"], # mostly tegen
}
for idx, mid in enumerate(mids):
for mp, votes in patterns.items():
db.insert_mp_vote(mid, mp, votes[idx], "2023-01-01")
# User votes matching mpA exactly (all Voor)
user_votes = {mid: "Voor" for mid in mids}
results = db.match_mps_for_votes(user_votes, limit=50)
assert results, "Expected ranked results"
top = results[0]
assert top["mp_name"] == mpA
assert top["agreement_pct"] == 100.0
# Unique match: only mpA should be at 100%
top_matches = [r for r in results if r["agreement_pct"] == top["agreement_pct"]]
assert len(top_matches) == 1, f"Expected unique top match, got {top_matches}"
def test_quiz_indistinguishable_mps(tmp_path: Path):
"""When two MPs vote identically, both appear at the top with same agreement_pct."""
db_path = str(tmp_path / "quiz_indist.db")
db = MotionDatabase(db_path)
mids = [
_create_motion(db, f"http://ind{i}", f"Indist Motion {i}") for i in range(3)
]
mpA = "Alice, A."
mpB = "Bob, B."
# Both vote identically
for mid in mids:
db.insert_mp_vote(mid, mpA, "voor", "2023-01-01")
db.insert_mp_vote(mid, mpB, "voor", "2023-01-01")
user_votes = {mid: "Voor" for mid in mids}
results = db.match_mps_for_votes(user_votes, limit=50)
assert len(results) == 2, "Both identical MPs should appear"
assert results[0]["agreement_pct"] == results[1]["agreement_pct"] == 100.0
def test_quiz_discriminating_reduces_candidates(tmp_path: Path):
"""After one discriminating question, candidate set should shrink."""
db_path = str(tmp_path / "quiz_disc.db")
db = MotionDatabase(db_path)
# 4 motions: motion0 splits mpA from mpB/mpC
mids = [_create_motion(db, f"http://disc{i}", f"Disc Motion {i}") for i in range(4)]
mpA = "Alpha, A."
mpB = "Beta, B."
mpC = "Gamma, G."
# motion0 splits; motions 1-3 they all agree
db.insert_mp_vote(mids[0], mpA, "voor", "2023-01-01")
db.insert_mp_vote(mids[0], mpB, "tegen", "2023-01-01")
db.insert_mp_vote(mids[0], mpC, "tegen", "2023-01-01")
for i in range(1, 4):
for mp in (mpA, mpB, mpC):
db.insert_mp_vote(mids[i], mp, "voor", "2023-01-01")
# All three agree on motions 1-3; choose_discriminating_motions should pick motion0
all_mps = [mpA, mpB, mpC]
chosen = db.choose_discriminating_motions(all_mps, excluded_motion_ids=[], k=1)
assert chosen[0] == mids[0], (
f"Expected motion {mids[0]} as discriminating, got {chosen}"
)
# After user answers Voor on motion0, only mpA should rank at 100%
user_votes = {mids[0]: "Voor"}
results = db.match_mps_for_votes(user_votes, limit=50)
top = results[0]
assert top["mp_name"] == mpA
assert top["agreement_pct"] == 100.0
# mpB and mpC should be at 0%
for r in results:
if r["mp_name"] in (mpB, mpC):
assert r["agreement_pct"] == 0.0

@ -0,0 +1,207 @@
import duckdb
import json
import os
from pathlib import Path
from database import MotionDatabase
def _create_motion_and_get_id(
db: MotionDatabase, url: str, title: str, layman: str = "x"
):
md = {
"title": title,
"description": title,
"date": "2023-01-01",
"policy_area": "test",
"voting_results": {},
"winning_margin": 0.5,
"layman_explanation": layman,
"url": url,
}
ok = db.insert_motion(md)
assert ok, "insert_motion failed"
conn = duckdb.connect(db.db_path)
row = conn.execute("SELECT id FROM motions WHERE url = ?", (url,)).fetchone()
conn.close()
assert row is not None, "couldn't find inserted motion"
return int(row[0])
def test_match_mps_basic(tmp_path: Path):
db_path = str(tmp_path / "test_motions.db")
db = MotionDatabase(db_path)
# create 4 motions
mids = []
for i in range(1, 5):
mids.append(_create_motion_and_get_id(db, f"http://m{i}", f"Motion {i}"))
# MPs
mpA = "Alpha, A."
mpB = "Beta, B."
mpC = "Gamma, G."
# Voting patterns (motions 1..4)
# A: v v t v (3/4)
# B: t t t t (0/4)
# C: v v v v (4/4)
votes = {
mpA: ["voor", "voor", "tegen", "voor"],
mpB: ["tegen", "tegen", "tegen", "tegen"],
mpC: ["voor", "voor", "voor", "voor"],
}
for idx, mid in enumerate(mids):
for mp_name, vlist in votes.items():
db.insert_mp_vote(
motion_id=mid,
mp_name=mp_name,
vote=vlist[idx],
date="2023-01-01",
party=None,
)
# User votes matching Gamma exactly
user_votes = {mids[0]: "Voor", mids[1]: "Voor", mids[2]: "Voor", mids[3]: "Voor"}
results = db.match_mps_for_votes(user_votes, limit=10)
assert results, "No results returned"
# Top candidate should be Gamma
top = results[0]
assert top["mp_name"] == mpC
assert top["matched"] == 4
assert top["overlap"] == 4
assert top["agreement_pct"] == 100.0
# Check Alpha is second with 3 matched
names = [r["mp_name"] for r in results]
assert mpA in names
a = next(r for r in results if r["mp_name"] == mpA)
assert a["matched"] == 3
assert a["overlap"] == 4
assert a["agreement_pct"] == 75.0
def test_choose_discriminating_motions(tmp_path: Path):
db_path = str(tmp_path / "test_motions2.db")
db = MotionDatabase(db_path)
# create 3 motions
mids = []
for i in range(1, 4):
mids.append(_create_motion_and_get_id(db, f"http://d{i}", f"DMotion {i}"))
mpA = "Alice, A."
mpB = "Bob, B."
mpC = "Carol, C."
# Votes: motion1 splits A vs B/C
# motion1: A=voor, B=tegen, C=tegen
# motion2: all voor
# motion3: all tegen
db.insert_mp_vote(mids[0], mpA, "voor", "2023-01-01")
db.insert_mp_vote(mids[0], mpB, "tegen", "2023-01-01")
db.insert_mp_vote(mids[0], mpC, "tegen", "2023-01-01")
for mp in (mpA, mpB, mpC):
db.insert_mp_vote(mids[1], mp, "voor", "2023-01-01")
db.insert_mp_vote(mids[2], mp, "tegen", "2023-01-01")
candidates = [mpA, mpB, mpC]
chosen = db.choose_discriminating_motions(candidates, excluded_motion_ids=[], k=1)
assert chosen, "No discriminating motion returned"
# best splitter should be motion1 (mids[0])
assert chosen[0] == mids[0]
def test_match_excludes_zero_overlap(tmp_path: Path):
"""MPs who voted on none of the user's motions must not appear in results."""
db_path = str(tmp_path / "zo.db")
db = MotionDatabase(db_path)
mid1 = _create_motion_and_get_id(db, "http://zo1", "ZO Motion 1")
mid2 = _create_motion_and_get_id(db, "http://zo2", "ZO Motion 2")
mp_overlap = "Overlap, O."
mp_noshow = "Noshow, N."
db.insert_mp_vote(mid1, mp_overlap, "voor", "2023-01-01")
# mp_noshow only voted on mid2, not mid1
db.insert_mp_vote(mid2, mp_noshow, "voor", "2023-01-01")
results = db.match_mps_for_votes({mid1: "Voor"}, limit=10)
names = [r["mp_name"] for r in results]
assert mp_overlap in names, "mp_overlap should appear"
assert mp_noshow not in names, "mp_noshow had no overlap and must be excluded"
def test_invalid_input_empty_user_votes(tmp_path: Path):
"""Passing an empty dict must raise ValueError."""
db_path = str(tmp_path / "inv.db")
db = MotionDatabase(db_path)
import pytest
with pytest.raises(ValueError, match="non-empty"):
db.match_mps_for_votes({})
def test_invalid_input_empty_candidates(tmp_path: Path):
"""Passing empty candidates to choose_discriminating_motions must raise ValueError."""
db_path = str(tmp_path / "inv2.db")
db = MotionDatabase(db_path)
import pytest
with pytest.raises(ValueError):
db.choose_discriminating_motions([], excluded_motion_ids=[])
def test_geen_stem_not_counted_in_overlap(tmp_path: Path):
"""'Geen stem' user votes should be skipped (not counted in overlap or matched)."""
db_path = str(tmp_path / "gs.db")
db = MotionDatabase(db_path)
mid1 = _create_motion_and_get_id(db, "http://gs1", "GS Motion 1")
mid2 = _create_motion_and_get_id(db, "http://gs2", "GS Motion 2")
mpA = "Alpha, A."
db.insert_mp_vote(mid1, mpA, "voor", "2023-01-01")
db.insert_mp_vote(mid2, mpA, "voor", "2023-01-01")
# user says Geen stem on mid1 (skip), Voor on mid2
results = db.match_mps_for_votes({mid1: "Geen stem", mid2: "Voor"}, limit=10)
assert results, "Expected at least one result"
r = results[0]
# overlap should only be 1 (mid2 counted, mid1 skipped)
assert r["overlap"] == 1
assert r["matched"] == 1
assert r["agreement_pct"] == 100.0
def test_choose_excluded_motions_respected(tmp_path: Path):
"""Excluded motion ids must not be returned by choose_discriminating_motions."""
db_path = str(tmp_path / "excl.db")
db = MotionDatabase(db_path)
mid1 = _create_motion_and_get_id(db, "http://ex1", "EX Motion 1")
mid2 = _create_motion_and_get_id(db, "http://ex2", "EX Motion 2")
mpA = "Alice, A."
mpB = "Bob, B."
# mid1 splits them; mid2 they agree
db.insert_mp_vote(mid1, mpA, "voor", "2023-01-01")
db.insert_mp_vote(mid1, mpB, "tegen", "2023-01-01")
db.insert_mp_vote(mid2, mpA, "voor", "2023-01-01")
db.insert_mp_vote(mid2, mpB, "voor", "2023-01-01")
# Exclude mid1 — only mid2 is available, should return mid2
chosen = db.choose_discriminating_motions(
[mpA, mpB], excluded_motion_ids=[mid1], k=1
)
assert mid1 not in chosen, "Excluded motion must not be returned"
assert mid2 in chosen, "mid2 should be chosen as only available motion"
Loading…
Cancel
Save