fix: quiz seed from motions with actual individual MP votes

Root causes:
- Seed selection sorted by controversy_score across all 28k motions, but
  only 282 have individual MP vote records. Top controversial motions only
  have party-level votes, so match_mps_for_votes always returned empty.
- global_db singleton was used for match/discriminate instead of the db_path
  passed to the tab builder.

Fixes:
- Add MotionDatabase.get_motions_with_individual_votes(k) which queries
  motions with comma-formatted mp_name votes, ordered by controversy_score
- Replace broken seed logic in build_mp_quiz_tab with this new method
- Replace global_db usages with a local MotionDatabase(db_path) instance
- Guard against motion IDs present in votes but absent from motions DataFrame
main
Sven Geboers 1 month ago
parent 238d9e9ec2
commit 504400faf2
  1. 34
      database.py
  2. 46
      explorer.py

@ -657,6 +657,40 @@ class MotionDatabase:
return sorted(results, key=lambda x: x["agreement_percentage"], reverse=True) return sorted(results, key=lambda x: x["agreement_percentage"], reverse=True)
def get_motions_with_individual_votes(self, k: int = 20) -> List[int]:
"""Return up to k motion IDs that have individual MP vote records.
Selects motions where at least one mp_name contains a comma (i.e.
individual MPs in 'Lastname, F.' format), ordered by controversy_score
descending so the most discriminating motions come first.
Args:
k: maximum number of motion IDs to return.
Returns:
List of motion IDs (ints), sorted by controversy_score DESC.
"""
if duckdb is None:
return []
try:
conn = duckdb.connect(self.db_path, read_only=True)
rows = conn.execute(
"""
SELECT DISTINCT mv.motion_id, m.controversy_score
FROM mp_votes mv
JOIN motions m ON mv.motion_id = m.id
WHERE mv.mp_name LIKE '%,%'
ORDER BY m.controversy_score DESC, mv.motion_id ASC
LIMIT ?
""",
(int(k),),
).fetchall()
conn.close()
return [int(r[0]) for r in rows]
except Exception:
_logger.exception("Error in get_motions_with_individual_votes")
return []
def match_mps_for_votes( def match_mps_for_votes(
self, user_votes: Dict[int, str], limit: int = 50 self, user_votes: Dict[int, str], limit: int = 50
) -> List[Dict]: ) -> List[Dict]:

@ -731,21 +731,20 @@ def build_mp_quiz_tab(db_path: str) -> None:
if "mp_quiz_asked" not in st.session_state: if "mp_quiz_asked" not in st.session_state:
st.session_state["mp_quiz_asked"] = [] st.session_state["mp_quiz_asked"] = []
from database import MotionDatabase as _MotionDatabase
db_inst = _MotionDatabase(db_path)
df = load_motions_df(db_path) df = load_motions_df(db_path)
if df.empty: if df.empty:
st.warning("Geen moties beschikbaar om de quiz te starten.") st.warning("Geen moties beschikbaar om de quiz te starten.")
return return
# seed motions by controversy_score, prefer those with layman_explanation # seed from motions that actually have individual MP vote records
candidates_df = df[df["layman_explanation"].notna()] seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
if candidates_df.empty: if not seed_ids:
candidates_df = df st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
seed = ( return
candidates_df.sort_values(by="controversy_score", ascending=False)
.head(SEED_MOTIONS)
.copy()
)
seed_ids = [int(x) for x in seed["id"].tolist()]
# Determine next motion to ask # Determine next motion to ask
def _next_motion_id(): def _next_motion_id():
@ -755,13 +754,12 @@ def build_mp_quiz_tab(db_path: str) -> None:
return mid return mid
# otherwise ask discriminating motion based on remaining candidate MPs # otherwise ask discriminating motion based on remaining candidate MPs
# compute current candidate set # compute current candidate set
from database import db as global_db
try: try:
user_votes = { user_votes = {
int(k): v for k, v in st.session_state["mp_quiz_votes"].items() int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
} }
ranked = global_db.match_mps_for_votes(user_votes, limit=200) ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
except Exception: except Exception:
ranked = [] ranked = []
@ -770,9 +768,7 @@ def build_mp_quiz_tab(db_path: str) -> None:
if not candidates: if not candidates:
return None return None
try: try:
next_ids = global_db.choose_discriminating_motions( next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
candidates, excluded, k=1
)
return next_ids[0] if next_ids else None return next_ids[0] if next_ids else None
except Exception: except Exception:
return None return None
@ -786,14 +782,20 @@ def build_mp_quiz_tab(db_path: str) -> None:
if st.button("Reset quiz"): if st.button("Reset quiz"):
st.session_state["mp_quiz_votes"] = {} st.session_state["mp_quiz_votes"] = {}
st.session_state["mp_quiz_asked"] = [] st.session_state["mp_quiz_asked"] = []
st.experimental_rerun() st.rerun()
# main question loop (single question per render) # main question loop (single question per render)
next_mid = _next_motion_id() next_mid = _next_motion_id()
if next_mid is None: if next_mid is None:
st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.") st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
else: else:
motion_row = df[df["id"] == next_mid].iloc[0] motion_rows = df[df["id"] == next_mid]
if motion_rows.empty:
# motion has votes but isn't in the motions DataFrame — skip it
st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
st.rerun()
return
motion_row = motion_rows.iloc[0]
st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}") st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
if motion_row.get("layman_explanation"): if motion_row.get("layman_explanation"):
st.info(motion_row.get("layman_explanation")) st.info(motion_row.get("layman_explanation"))
@ -808,14 +810,12 @@ def build_mp_quiz_tab(db_path: str) -> None:
if st.button("Beantwoord en verder", key=f"mp_quiz_submit_{next_mid}"): if st.button("Beantwoord en verder", key=f"mp_quiz_submit_{next_mid}"):
st.session_state["mp_quiz_votes"][str(next_mid)] = choice st.session_state["mp_quiz_votes"][str(next_mid)] = choice
st.session_state["mp_quiz_asked"].append(next_mid) st.session_state["mp_quiz_asked"].append(next_mid)
st.experimental_rerun() st.rerun()
# display current ranking # display current ranking
from database import db as global_db
try: try:
user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()} user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
ranking = global_db.match_mps_for_votes(user_votes, limit=50) ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
except Exception: except Exception:
ranking = [] ranking = []
@ -901,9 +901,9 @@ def run_app() -> None:
build_search_tab(db_path, show_rejected) build_search_tab(db_path, show_rejected)
with tab4: with tab4:
build_browser_tab(db_path, show_rejected) build_browser_tab(db_path, show_rejected)
with tab6:
build_mp_quiz_tab(db_path)
with tab5: with tab5:
build_mp_quiz_tab(db_path)
with tab6:
build_svd_components_tab(db_path) build_svd_components_tab(db_path)
else: else:
# Fallback for environments where `st.tabs` is not available: use a radio selector # Fallback for environments where `st.tabs` is not available: use a radio selector

Loading…
Cancel
Save