fix: quiz seed from motions with actual individual MP votes

Root causes: - Seed selection sorted by controversy_score across all 28k motions, but only 282 have individual MP vote records. Top controversial motions only have party-level votes, so match_mps_for_votes always returned empty. - global_db singleton was used for match/discriminate instead of the db_path passed to the tab builder. Fixes: - Add MotionDatabase.get_motions_with_individual_votes(k) which queries motions with comma-formatted mp_name votes, ordered by controversy_score - Replace broken seed logic in build_mp_quiz_tab with this new method - Replace global_db usages with a local MotionDatabase(db_path) instance - Guard against motion IDs present in votes but absent from motions DataFrame
1 month ago · 504400faf2
parent 238d9e9ec2
commit 504400faf2
2 changed files with 57 additions and 23 deletions
--- a/database.py
+++ b/database.py
@ -657,6 +657,40 @@ class MotionDatabase:
        return sorted(results, key=lambda x: x["agreement_percentage"], reverse=True)
    def get_motions_with_individual_votes(self, k: int = 20) -> List[int]:
        """Return up to k motion IDs that have individual MP vote records.
        Selects motions where at least one mp_name contains a comma (i.e.
        individual MPs in 'Lastname, F.' format), ordered by controversy_score
        descending so the most discriminating motions come first.
        Args:
            k: maximum number of motion IDs to return.
        Returns:
            List of motion IDs (ints), sorted by controversy_score DESC.
        """
        if duckdb is None:
            return []
        try:
            conn = duckdb.connect(self.db_path, read_only=True)
            rows = conn.execute(
                """
                SELECT DISTINCT mv.motion_id, m.controversy_score
                FROM mp_votes mv
                JOIN motions m ON mv.motion_id = m.id
                WHERE mv.mp_name LIKE '%,%'
                ORDER BY m.controversy_score DESC, mv.motion_id ASC
                LIMIT ?
                """,
                (int(k),),
            ).fetchall()
            conn.close()
            return [int(r[0]) for r in rows]
        except Exception:
            _logger.exception("Error in get_motions_with_individual_votes")
            return []
    def match_mps_for_votes(
        self, user_votes: Dict[int, str], limit: int = 50
    ) -> List[Dict]:
--- a/explorer.py
+++ b/explorer.py
@ -731,21 +731,20 @@ def build_mp_quiz_tab(db_path: str) -> None:
    if "mp_quiz_asked" not in st.session_state:
        st.session_state["mp_quiz_asked"] = []
    from database import MotionDatabase as _MotionDatabase
    db_inst = _MotionDatabase(db_path)
    df = load_motions_df(db_path)
    if df.empty:
        st.warning("Geen moties beschikbaar om de quiz te starten.")
        return
-    # seed motions by controversy_score, prefer those with layman_explanation
+    # seed from motions that actually have individual MP vote records
-    candidates_df = df[df["layman_explanation"].notna()]
+    seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
-    if candidates_df.empty:
+    if not seed_ids:
-        candidates_df = df
+        st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
-    seed = (
+        return
        candidates_df.sort_values(by="controversy_score", ascending=False)
        .head(SEED_MOTIONS)
        .copy()
    )
    seed_ids = [int(x) for x in seed["id"].tolist()]
    # Determine next motion to ask
    def _next_motion_id():
@ -755,13 +754,12 @@ def build_mp_quiz_tab(db_path: str) -> None:
                return mid
        # otherwise ask discriminating motion based on remaining candidate MPs
        # compute current candidate set
        from database import db as global_db
        try:
            user_votes = {
                int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
            }
-            ranked = global_db.match_mps_for_votes(user_votes, limit=200)
+            ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
        except Exception:
            ranked = []
@ -770,9 +768,7 @@ def build_mp_quiz_tab(db_path: str) -> None:
        if not candidates:
            return None
        try:
-            next_ids = global_db.choose_discriminating_motions(
+            next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
                candidates, excluded, k=1
            )
            return next_ids[0] if next_ids else None
        except Exception:
            return None
@ -786,14 +782,20 @@ def build_mp_quiz_tab(db_path: str) -> None:
        if st.button("Reset quiz"):
            st.session_state["mp_quiz_votes"] = {}
            st.session_state["mp_quiz_asked"] = []
-            st.experimental_rerun()
+            st.rerun()
    # main question loop (single question per render)
    next_mid = _next_motion_id()
    if next_mid is None:
        st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
    else:
-        motion_row = df[df["id"] == next_mid].iloc[0]
+        motion_rows = df[df["id"] == next_mid]
        if motion_rows.empty:
            # motion has votes but isn't in the motions DataFrame — skip it
            st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
            st.rerun()
            return
        motion_row = motion_rows.iloc[0]
        st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
        if motion_row.get("layman_explanation"):
            st.info(motion_row.get("layman_explanation"))
@ -808,14 +810,12 @@ def build_mp_quiz_tab(db_path: str) -> None:
        if st.button("Beantwoord en verder", key=f"mp_quiz_submit_{next_mid}"):
            st.session_state["mp_quiz_votes"][str(next_mid)] = choice
            st.session_state["mp_quiz_asked"].append(next_mid)
-            st.experimental_rerun()
+            st.rerun()
    # display current ranking
    from database import db as global_db
    try:
        user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
-        ranking = global_db.match_mps_for_votes(user_votes, limit=50)
+        ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
    except Exception:
        ranking = []
@ -901,9 +901,9 @@ def run_app() -> None:
            build_search_tab(db_path, show_rejected)
        with tab4:
            build_browser_tab(db_path, show_rejected)
        with tab6:
            build_mp_quiz_tab(db_path)
        with tab5:
            build_mp_quiz_tab(db_path)
        with tab6:
            build_svd_components_tab(db_path)
    else:
        # Fallback for environments where `st.tabs` is not available: use a radio selector