motief/explorer.py

"""Parlement Explorer — Streamlit data analysis app.

Four tabs:
  1. Politiek Kompas  — 2D scatter of MPs/parties, window slider
  2. Partij Trajectories — party centroid lines over time
  3. Motie Zoeken      — text search + similarity lookup
  4. Motie Browser     — sortable table + detail panel

Run with: streamlit run explorer.py

Import-safe: heavy computation is behind @st.cache_data and only runs at UI time.
All DuckDB connections are read_only=True so the app can run alongside the pipeline.
"""

from __future__ import annotations

import json
import logging
import os
import re
from typing import Dict, List, Optional, Tuple

import duckdb
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st

logger = logging.getLogger(__name__)

# Party colour palette (consistent across tabs)
PARTY_COLOURS: Dict[str, str] = {
    "VVD": "#1E73BE",
    "PVV": "#002366",
    "D66": "#00A36C",
    "CDA": "#4CAF50",
    "SP": "#E53935",
    "PvdA": "#D32F2F",
    "GroenLinks": "#388E3C",
    "GroenLinks-PvdA": "#2E7D32",
    "CU": "#0288D1",
    "SGP": "#F4511E",
    "PvdD": "#43A047",
    "FVD": "#6A1B9A",
    "JA21": "#7B1FA2",
    "BBB": "#8D6E63",
    "NSC": "#FF8F00",
    "Nieuw Sociaal Contract": "#FF8F00",  # alias used in mp_metadata
    "DENK": "#00897B",
    "50PLUS": "#7E57C2",
    "Volt": "#572AB7",
    "ChristenUnie": "#0288D1",
    "Unknown": "#9E9E9E",
}

# Ordered list of well-known parties for trajectory default selection.
# Keeps the chart readable without overwhelming users with all parties.
KNOWN_MAJOR_PARTIES = [
    "VVD",
    "PVV",
    "D66",
    "GroenLinks-PvdA",
    "GroenLinks",
    "PvdA",
    "CDA",
    "SP",
    "NSC",
    "CU",
    "BBB",
]


# Parties currently seated in the Tweede Kamer (2023 election cycle).
# Deze zijn de entity_ids zoals opgeslagen in svd_vectors voor window='2025'.
CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset(
    {
        "PVV",
        "VVD",
        "NSC",
        "BBB",
        "D66",
        "GroenLinks-PvdA",
        "CDA",
        "SP",
        "ChristenUnie",
        "SGP",
        "Volt",
        "DENK",
        "PvdD",
        "JA21",
        "FVD",
    }
)

# Normalize variant party names to canonical display names in CURRENT_PARLIAMENT_PARTIES
_PARTY_NORMALIZE: dict[str, str] = {
    "Nieuw Sociaal Contract": "NSC",
    "CU": "ChristenUnie",
    "GL": "GroenLinks-PvdA",
    "GroenLinks": "GroenLinks-PvdA",
    "PvdA": "GroenLinks-PvdA",
    "Gündoğan": "Volt",  # confirmed Volt, left parliament 2023-12-05
    "Lid Keijzer": "BBB",  # Keijzer left CDA, joined BBB cabinet
    "Groep Markuszower": "PVV",  # Markuszower sits with PVV faction
}


# ---------------------------------------------------------------------------
# Cached loaders
# ---------------------------------------------------------------------------


@st.cache_data(show_spinner="Beschikbare tijdsvensters laden…")
def get_available_windows(db_path: str) -> List[str]:
    """Return sorted list of distinct window_ids from svd_vectors."""
    con = duckdb.connect(database=db_path, read_only=True)
    try:
        rows = con.execute(
            "SELECT DISTINCT window_id FROM svd_vectors ORDER BY window_id"
        ).fetchall()
        return [r[0] for r in rows]
    except Exception:
        logger.exception("Failed to query available windows")
        return []
    finally:
        con.close()


@st.cache_data(show_spinner=False)
def get_uniform_dim_windows(db_path: str) -> List[str]:
    """Return only windows whose dominant MP-vector dimension is 50.

    Some windows contain a mix of vector lengths due to multiple pipeline runs
    (e.g. 2016 has both dim=1 and dim=50 rows). We find the most common dimension
    per window and include only windows where that dominant dim equals 50.
    Windows with too few dim-50 entities (< 10) are also excluded to avoid
    degenerate PCA inputs.
    """
    con = duckdb.connect(database=db_path, read_only=True)
    try:
        rows = con.execute(
            """
            WITH vec_dims AS (
                SELECT window_id, json_array_length(vector) AS dim
                FROM svd_vectors
                WHERE entity_type = 'mp'
            ),
            window_dim_counts AS (
                SELECT window_id, dim, COUNT(*) AS cnt
                FROM vec_dims
                GROUP BY window_id, dim
            ),
            dominant AS (
                SELECT DISTINCT ON (window_id) window_id, dim, cnt
                FROM window_dim_counts
                ORDER BY window_id, cnt DESC, dim DESC
            )
            SELECT window_id
            FROM dominant
            WHERE dim >= 25 AND cnt >= 10
            ORDER BY window_id
            """
        ).fetchall()
        return [r[0] for r in rows]
    except Exception:
        logger.exception("Failed to query uniform-dim windows")
        return []
    finally:
        con.close()


@st.cache_data(show_spinner="2D posities berekenen (kan even duren)…")
def load_positions(
    db_path: str, window_size: str = "quarterly"
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
    """Compute 2D positions per window using PCA on aligned SVD vectors.

    Returns:
        positions_by_window: {window_id: {entity_name: (x, y)}}
        axis_def: dict with x_axis, y_axis, method keys
    """
    from analysis.political_axis import compute_2d_axes

    # Always compute PCA on ALL uniform-dim windows (quarterly + annual) so that
    # the principal components are determined by the full temporal spread of data.
    # Using only annual windows (11) causes PC1 to capture cross-temporal drift
    # instead of left-right ideology, resulting in a ~90° rotation.
    all_available = get_uniform_dim_windows(db_path)

    if not all_available:
        return {}, {}

    positions_by_window, axis_def = compute_2d_axes(
        db_path,
        window_ids=all_available,
        method="pca",
        pca_residual=True,
        normalize_vectors=True,
    )

    # Filter displayed windows by window_size AFTER PCA computation.
    if window_size == "annual":
        annual_keys = set(w for w in all_available if "-Q" not in w)
        positions_by_window = {
            w: v for w, v in positions_by_window.items() if w in annual_keys
        }

    return positions_by_window, axis_def


@st.cache_data(show_spinner="Partijkaart laden…")
def load_party_map(db_path: str) -> Dict[str, str]:
    """Return {mp_name: party} mapping, with party names normalised to abbreviations."""
    from analysis.visualize import _load_party_map

    _PARTY_ALIASES: Dict[str, str] = {
        "Nieuw Sociaal Contract": "NSC",
    }

    try:
        raw = _load_party_map(db_path)
        return {mp: _PARTY_ALIASES.get(party, party) for mp, party in raw.items()}
    except Exception:
        logger.exception("Failed to load party map")
        return {}


@st.cache_data(show_spinner="Actieve Kamerleden laden…")
def load_active_mps(db_path: str) -> set:
    """Return the set of mp_name values that are currently seated in parliament.

    An MP is considered active if their mp_metadata row has tot_en_met IS NULL,
    meaning they have no recorded end date for their current seat.
    """
    try:
        con = duckdb.connect(database=db_path, read_only=True)
        rows = con.execute(
            "SELECT mp_name FROM mp_metadata WHERE tot_en_met IS NULL"
        ).fetchall()
        con.close()
        return {r[0] for r in rows}
    except Exception:
        logger.exception("Failed to load active MPs")
        return set()


@st.cache_data(show_spinner="Partijposities op SVD-assen laden…")
def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
    """Return per-party SVD vectors, computed as mean of individual MP vectors.

    Loads individual MP rows (entity_id LIKE '%,%') from window='current_parliament',
    assigns each MP their party using the dominant party from mp_votes, then
    averages SVD vectors per party.

    This matches the political compass data source (also averages individual MPs),
    so axis rankings are consistent between the SVD tab and the compass.

    Returns:
        {party_name: [float * k]}  — k = 50, mean over all MPs in that party.
    """
    try:
        con = duckdb.connect(database=db_path, read_only=True)

        # Build mp → party mapping from mp_metadata (most recent party during current parliament).
        # mp_metadata format: mp_name like "Van Baarle, S.R.T.", party = "GroenLinks-PvdA"
        # We take the party record with the latest `van` date (most recent assignment).
        meta_rows = con.execute(
            "SELECT mp_name, party FROM mp_metadata "
            "WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22'"
        ).fetchall()
        # For MPs with multiple records (party switches), keep the one with latest van date.
        # Simple approach: last-write-wins per mp_name after sorting by van ascending.
        mp_party_raw: Dict[str, str] = {}
        for mp_name, party in meta_rows:
            if mp_name and party:
                mp_party_raw[mp_name] = party  # later rows (after ORDER BY van) win

        # Re-query ordered so latest van wins reliably
        meta_ordered = con.execute(
            "SELECT mp_name, party FROM mp_metadata "
            "WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' "
            "ORDER BY van ASC"
        ).fetchall()
        mp_party_raw = {}
        for mp_name, party in meta_ordered:
            if mp_name and party:
                mp_party_raw[mp_name] = party

        # Normalize party names to canonical abbreviations
        mp_party: Dict[str, str] = {}
        for mp_name, party in mp_party_raw.items():
            canonical = _PARTY_NORMALIZE.get(party, party)
            mp_party[mp_name] = canonical

        # Individual MP vectors from current_parliament
        rows = con.execute(
            "SELECT entity_id, vector FROM svd_vectors "
            "WHERE entity_type='mp' AND window_id='current_parliament'"
        ).fetchall()

        party_vecs: Dict[str, list] = {}
        for entity_id, raw_vec in rows:
            party = mp_party.get(entity_id)
            if party is None or party not in CURRENT_PARLIAMENT_PARTIES:
                continue
            if isinstance(raw_vec, str):
                vec = json.loads(raw_vec)
            elif isinstance(raw_vec, (bytes, bytearray)):
                vec = json.loads(raw_vec.decode())
            elif isinstance(raw_vec, list):
                vec = raw_vec
            else:
                try:
                    vec = list(raw_vec)
                except Exception:
                    continue
            fvec = [float(v) if v is not None else 0.0 for v in vec]
            party_vecs.setdefault(party, []).append(fvec)

        # Average vectors per party
        result: Dict[str, List[float]] = {}
        for party, vecs in party_vecs.items():
            result[party] = np.array(vecs).mean(axis=0).tolist()
        return result
    except Exception:
        logger.exception("Failed to load party axis scores")
        return {}
    finally:
        try:
            con.close()
        except Exception:
            pass


@st.cache_data(show_spinner="Scree-plot laden…")
def load_scree_data(db_path: str) -> List[float]:
    """Return per-component importances (L2-norm per SVD dim), sorted descending.

    Uses individual MP vectors from current_parliament (entity_id LIKE '%,%').
    Computes L2-norm per SVD dimension across all MPs, then sorts descending
    so the elbow shape is visible in the scree chart.
    """
    try:
        con = duckdb.connect(database=db_path, read_only=True)
        rows = con.execute(
            "SELECT entity_id, vector FROM svd_vectors "
            "WHERE entity_type='mp' AND window_id='current_parliament' "
            "AND entity_id LIKE '%,%'"
        ).fetchall()
        vectors: List[List[float]] = []
        for entity_id, raw_vec in rows:
            if isinstance(raw_vec, str):
                vec = json.loads(raw_vec)
            elif isinstance(raw_vec, (bytes, bytearray)):
                vec = json.loads(raw_vec.decode())
            elif isinstance(raw_vec, list):
                vec = raw_vec
            else:
                try:
                    vec = list(raw_vec)
                except Exception:
                    continue
            fvec = [float(v) if v is not None else 0.0 for v in vec]
            vectors.append(fvec)
        if not vectors:
            return []
        n_dims = len(vectors[0])
        importances: List[float] = []
        for dim in range(n_dims):
            col = [v[dim] for v in vectors if dim < len(v)]
            l2 = sum(x**2 for x in col) ** 0.5
            importances.append(l2)
        return sorted(importances, reverse=True)
    except Exception:
        logger.exception("Failed to load scree data")
        return []
    finally:
        try:
            con.close()
        except Exception:
            pass


def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
    """Render a scree plot showing relative SVD component importance.

    Highlighted bars for the top-2 components (used in the compass); muted bars
    for the rest.  A cumulative-variance dashed line on the same y-axis helps
    spot the elbow.  A 50 % cumulative threshold line is drawn for reference.

    Args:
        importances: List of importance values sorted descending (from load_scree_data).
        n_show: How many components to display (default: first 15).
    """
    if not importances:
        return
    total = sum(importances) or 1.0
    raw = importances[:n_show]
    data = [v / total * 100 for v in raw]
    ranks = list(range(1, len(data) + 1))

    # Cumulative variance for the dashed overlay line
    cumsum = []
    running = 0.0
    for v in data:
        running += v
        cumsum.append(running)

    # Colour: first 2 bars highlighted (compass axes), rest muted
    n_highlight = 2
    bar_colours = [
        "#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data))
    ]

    fig = go.Figure()

    # Bars
    fig.add_trace(
        go.Bar(
            x=ranks,
            y=data,
            marker_color=bar_colours,
            hovertemplate="As %{x}<br><b>%{y:.1f}%</b> van totaal<extra></extra>",
            showlegend=False,
        )
    )

    # Cumulative variance line (dashed, warm amber)
    fig.add_trace(
        go.Scatter(
            x=ranks,
            y=cumsum,
            mode="lines+markers",
            line={"color": "#F57C00", "width": 2, "dash": "dot"},
            marker={"size": 5, "color": "#F57C00"},
            hovertemplate="As %{x}<br>Cumulatief: <b>%{y:.1f}%</b><extra></extra>",
            name="Cumulatief",
            showlegend=True,
        )
    )

    # 50 % reference line
    fig.add_hline(
        y=50,
        line_dash="dash",
        line_color="#BDBDBD",
        line_width=1,
        annotation_text="50%",
        annotation_position="right",
        annotation_font_color="#9E9E9E",
        annotation_font_size=11,
    )

    # Annotations on the top-2 bars showing their % value
    for i in range(min(n_highlight, len(data))):
        fig.add_annotation(
            x=ranks[i],
            y=data[i] + 0.3,
            text=f"{data[i]:.1f}%",
            showarrow=False,
            font={"size": 11, "color": "#1565C0"},
            yanchor="bottom",
        )

    fig.update_layout(
        height=280,
        margin={"l": 10, "r": 50, "t": 30, "b": 40},
        title={
            "text": "Belang per SVD-as",
            "font": {"size": 13, "color": "#555555"},
            "x": 0.02,
            "xanchor": "left",
        },
        legend={
            "orientation": "h",
            "x": 0.5,
            "xanchor": "center",
            "y": 1.08,
            "font": {"size": 11},
        },
        xaxis={
            "title": {"text": "As (rang)", "font": {"size": 11}},
            "tickmode": "linear",
            "tick0": 1,
            "dtick": 1,
            "showline": False,
            "showgrid": False,
        },
        yaxis={
            "title": {"text": "% van totale variantie", "font": {"size": 11}},
            "showline": False,
            "showgrid": True,
            "gridcolor": "#eeeeee",
            "ticksuffix": "%",
            "range": [0, max(cumsum) * 1.08],
        },
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        bargap=0.25,
    )
    st.plotly_chart(fig, use_container_width=True)


def _render_party_axis_chart(
    party_scores: Dict[str, List[float]], comp_sel: int, theme: dict
) -> None:
    """Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.

    Each party is plotted at its score on a single horizontal axis (y=0).
    When theme['flip'] is True the scores are negated so that the progressive/left
    side always appears on the left of the chart.
    """
    if not party_scores:
        st.caption("_Partijdata niet beschikbaar voor deze as._")
        return

    axis_idx = comp_sel - 1  # 0-based index into the 50-dim vector
    flip = theme.get("flip", False)
    data: list[dict] = []
    for party, vec in party_scores.items():
        if axis_idx < len(vec):
            score = vec[axis_idx]
            if flip:
                score = -score
            data.append({"party": party, "score": score})

    if not data:
        st.caption("_Geen partijscores voor deze as._")
        return

    scores = [d["score"] for d in data]
    parties = [d["party"] for d in data]
    colours = [PARTY_COLOURS.get(p, "#9E9E9E") for p in parties]
    hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]

    # Determine axis labels: left = progressive pole, right = conservative pole
    pos_pole = theme.get("positive_pole", "")
    neg_pole = theme.get("negative_pole", "")
    left_label = pos_pole if flip else neg_pole
    right_label = neg_pole if flip else pos_pole

    fig = go.Figure()
    # Baseline
    x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
    if x_min == x_max:
        x_min, x_max = x_min - 1, x_max + 1
    fig.add_trace(
        go.Scatter(
            x=[x_min, x_max],
            y=[0, 0],
            mode="lines",
            line={"color": "#cccccc", "width": 1},
            hoverinfo="skip",
            showlegend=False,
        )
    )
    # Party markers
    fig.add_trace(
        go.Scatter(
            x=scores,
            y=[0] * len(scores),
            mode="markers+text",
            text=parties,
            textposition="top center",
            marker={"size": 18, "color": colours},
            hovertext=hover,
            hoverinfo="text",
            showlegend=False,
        )
    )
    fig.update_layout(
        height=160,
        margin={"l": 10, "r": 10, "t": 10, "b": 30},
        xaxis={
            "title": f"← {left_label}  |  {right_label} →",
            "showticklabels": False,
            "showline": False,
            "showgrid": False,
            "zeroline": False,
        },
        yaxis={"visible": False, "range": [-1, 2]},
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
    )
    st.plotly_chart(fig, use_container_width=True)


@st.cache_data(show_spinner="Moties laden…")
def load_motions_df(db_path: str) -> pd.DataFrame:
    """Load the full motions table as a pandas DataFrame (read-only)."""
    con = duckdb.connect(database=db_path, read_only=True)
    try:
        df = con.execute(
            """
            SELECT id, title, description, date, policy_area,
                   voting_results, layman_explanation,
                   winning_margin, controversy_score, url
            FROM motions
            """
        ).fetchdf()
        df["date"] = pd.to_datetime(df["date"], errors="coerce")
        df["year"] = df["date"].dt.year
        return df
    except Exception:
        logger.exception("Failed to load motions")
        return pd.DataFrame()
    finally:
        con.close()


def query_similar(
    db_path: str,
    source_motion_id: int,
    vector_type: str = "fused",
    top_k: int = 10,
) -> pd.DataFrame:
    """Return top-k similar motions from similarity_cache (read-only)."""
    con = duckdb.connect(database=db_path, read_only=True)
    try:
        rows = con.execute(
            """
            SELECT sc.target_motion_id, sc.score, sc.window_id,
                   m.title, m.date, m.policy_area
            FROM similarity_cache sc
            JOIN motions m ON m.id = sc.target_motion_id
            WHERE sc.source_motion_id = ?
              AND sc.vector_type = ?
            ORDER BY sc.score DESC
            LIMIT ?
            """,
            [source_motion_id, vector_type, top_k],
        ).fetchdf()
        return rows
    except Exception:
        logger.exception(
            "Failed to query similarity cache for motion %s", source_motion_id
        )
        return pd.DataFrame()
    finally:
        con.close()


# ---------------------------------------------------------------------------
# Shared rendering helpers
# ---------------------------------------------------------------------------


def _render_voting_results(voting_results_json) -> None:
    """Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.

    The JSON is stored as {party_or_mp: vote} where vote is one of
    'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
    """
    if not voting_results_json:
        return
    try:
        vdata = (
            json.loads(voting_results_json)
            if isinstance(voting_results_json, str)
            else voting_results_json
        )
        if not isinstance(vdata, dict) or not vdata:
            return
        # Group {vote: [actor, ...]}
        by_vote: Dict[str, List[str]] = {}
        for actor, vote in vdata.items():
            vote_str = str(vote).lower().strip()
            by_vote.setdefault(vote_str, []).append(str(actor))
        # Render in fixed order
        vote_order = ["voor", "tegen", "onthouden", "afwezig"]
        vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "⬜"}
        rows_shown = False
        for v in vote_order + [k for k in by_vote if k not in vote_order]:
            actors = by_vote.get(v)
            if not actors:
                continue
            emoji = vote_emoji.get(v, "▪️")
            st.markdown(
                f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
            )
            rows_shown = True
        if not rows_shown:
            st.caption("_Geen stemuitslag beschikbaar_")
    except Exception:
        pass


# ---------------------------------------------------------------------------
# Tab 1: Politiek Kompas
# ---------------------------------------------------------------------------


def build_compass_tab(db_path: str, window_size: str) -> None:
    st.subheader("Politiek Kompas")
    st.markdown(
        "2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
    )

    # Compass always uses annual windows regardless of the sidebar window_size setting.
    positions_by_window, axis_def = load_positions(db_path, "annual")
    if not positions_by_window:
        st.warning(
            "Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
        )
        return

    party_map = load_party_map(db_path)
    active_mps = load_active_mps(db_path)

    # Sort windows: year windows first (ascending), current_parliament last.
    year_windows = sorted(w for w in positions_by_window if w != "current_parliament")
    has_current = "current_parliament" in positions_by_window
    windows = year_windows + (["current_parliament"] if has_current else [])

    # Motion counts per year — sparse years get a warning label.
    _SPARSE_YEARS = {"2016", "2017", "2018"}

    def _window_label(w: str) -> str:
        if w == "current_parliament":
            return "Huidig parlement"
        if w in _SPARSE_YEARS:
            return f"{w} ⚠️"
        return w

    col1, col2 = st.columns([3, 1])
    with col2:
        window_idx = st.selectbox(
            "Jaar",
            options=windows,
            index=len(windows) - 1,  # default: current_parliament
            format_func=_window_label,
        )
        level = st.radio(
            "Weergave",
            options=["Kamerleden", "Partijen"],
            index=0,
            horizontal=True,
        )
        min_mps = st.number_input(
            "Min. Kamerleden per partij",
            min_value=1,
            max_value=20,
            value=3,
            step=1,
            help="Partijen met minder dan dit aantal zetels worden niet weergegeven.",
        )

    pos = positions_by_window.get(window_idx, {})
    if not pos:
        st.info(f"Geen data voor venster {window_idx}")
        return

    # For current_parliament, restrict to MPs who are still seated (tot_en_met IS NULL).
    # Historical windows include all MPs active at the time — no restriction needed.
    if window_idx == "current_parliament":
        pos = {mp: xy for mp, xy in pos.items() if mp in active_mps}

    # Deduplicate MPs whose names appear both with and without a parenthetical first name,
    # e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and
    # average positions if both variants are present.
    def _strip_paren(name: str) -> str:
        return re.sub(r"\s*\([^)]*\)", "", name).strip()

    deduped: Dict[str, Tuple[float, float]] = {}
    for name, (x, y) in pos.items():
        base = _strip_paren(name)
        if base in deduped:
            ox, oy = deduped[base]
            deduped[base] = ((ox + x) / 2, (oy + y) / 2)
        else:
            deduped[base] = (x, y)
    pos = deduped

    rows = []
    for name, (x, y) in pos.items():
        party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown")
        rows.append({"name": name, "x": x, "y": y, "party": party})

    df_pos = pd.DataFrame(rows)

    # Drop parties below the minimum MP threshold (unreliable centroids).
    party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts()
    valid_parties = set(party_counts[party_counts >= min_mps].index)
    df_pos = df_pos[df_pos["party"].isin(valid_parties)]

    if df_pos.empty:
        st.info("Geen partijen met genoeg Kamerleden voor dit venster.")
        return

    if level == "Partijen":
        # Aggregate to party centroids
        df_party = df_pos.groupby("party", as_index=False).agg(
            x=("x", "mean"), y=("y", "mean"), n=("name", "count")
        )
        df_party["name"] = df_party["party"]
        colour_map = {
            p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique()
        }
        fig = px.scatter(
            df_party,
            x="x",
            y="y",
            color="party",
            text="party",
            hover_name="party",
            hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True},
            color_discrete_map=colour_map,
            title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",
            labels={
                "x": "Links ← → Rechts",
                "y": "Progressief ↑ / Conservatief ↓",
                "n": "Kamerleden",
            },
        )
        fig.update_traces(textposition="top center", marker_size=14)
    else:
        colour_map = {
            p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()
        }
        fig = px.scatter(
            df_pos,
            x="x",
            y="y",
            color="party",
            hover_name="name",
            hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
            color_discrete_map=colour_map,
            title=f"Politiek Kompas — {_window_label(window_idx)}",
            labels={"x": "Links ← → Rechts", "y": "Progressief ↑ / Conservatief ↓"},
        )

    fig.update_layout(
        height=600,
        legend_title_text="Partij",
        xaxis={"range": [-1, 1]},
        yaxis={"range": [-0.6, 0.6]},
    )

    with col1:
        st.plotly_chart(fig, use_container_width=True)


# ---------------------------------------------------------------------------
# Tab 2: Partij Trajectories
# ---------------------------------------------------------------------------


def build_trajectories_tab(db_path: str, window_size: str) -> None:
    st.subheader("Partij Trajectories")
    st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")

    positions_by_window, _ = load_positions(db_path, window_size)
    if not positions_by_window:
        st.warning("Geen positiedata beschikbaar.")
        return

    party_map = load_party_map(db_path)
    windows = sorted(positions_by_window.keys())

    # Compute party centroids per window
    centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
    all_parties: set = set()
    for wid in windows:
        pos = positions_by_window.get(wid, {})
        per_party: Dict[str, List[Tuple[float, float]]] = {}
        for mp_name, (x, y) in pos.items():
            party = party_map.get(mp_name, "Unknown")
            if party == "Unknown":
                continue
            per_party.setdefault(party, []).append((x, y))
        for party, coords in per_party.items():
            all_parties.add(party)
            xs = [c[0] for c in coords]
            ys = [c[1] for c in coords]
            centroids.setdefault(party, {})[wid] = (
                float(np.mean(xs)),
                float(np.mean(ys)),
            )

    all_parties_sorted = sorted(all_parties)

    # Default: show CDA, D66, VVD — the three parties that span the political centre
    default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties]
    if not default_parties:
        default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
    if not default_parties:
        default_parties = all_parties_sorted[:6]

    selected_parties = st.multiselect(
        "Selecteer partijen",
        options=all_parties_sorted,
        default=default_parties,
    )

    fig = go.Figure()
    for party in selected_parties:
        if party not in centroids:
            continue
        wids_sorted = sorted(centroids[party].keys())
        xs = [centroids[party][w][0] for w in wids_sorted]
        ys = [centroids[party][w][1] for w in wids_sorted]
        colour = PARTY_COLOURS.get(party, "#9E9E9E")
        fig.add_trace(
            go.Scatter(
                x=xs,
                y=ys,
                mode="lines+markers",
                name=party,
                text=wids_sorted,  # full window ID for hover
                line=dict(color=colour, shape="spline", smoothing=1.3),
                marker=dict(color=colour, size=8),
                hovertemplate=(
                    f"<b>{party}</b><br>"
                    "venster: %{text}<br>"
                    "x: %{x:.3f}<br>y: %{y:.3f}<extra></extra>"
                ),
            )
        )

    fig.update_layout(
        title="Partij trajectories",
        xaxis_title="Links ← → Rechts",
        yaxis_title="Progressief ↑ / Conservatief ↓",
        height=600,
        legend_title_text="Partij",
    )
    st.plotly_chart(fig, use_container_width=True)


# ---------------------------------------------------------------------------
# Tab 3: Motie Zoeken
# ---------------------------------------------------------------------------


def build_search_tab(db_path: str, show_rejected: bool) -> None:
    st.subheader("Motie Zoeken")

    df = load_motions_df(db_path)
    if df.empty:
        st.warning("Geen moties beschikbaar.")
        return

    if not show_rejected:
        df = df[df["title"].fillna("").str.strip() != "Verworpen."]

    # Controls
    col1, col2, col3 = st.columns([2, 1, 1])
    with col1:
        query = st.text_input(
            "Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
        )
    with col2:
        years = sorted(df["year"].dropna().astype(int).unique().tolist())
        if years:
            year_range = st.select_slider(
                "Jaar", options=years, value=(years[0], years[-1])
            )
        else:
            year_range = (2019, 2024)
    with col3:
        min_controversy = st.slider(
            "Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
        )

    # Apply filters in-memory
    working = df.copy()
    working = working[
        (working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
    ]
    if min_controversy > 0:
        working = working[working["controversy_score"] >= min_controversy]
    if query:
        q = query.lower()
        mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
        working = working[mask]

    working = working.sort_values(by="controversy_score", ascending=False)
    st.caption(f"{len(working)} resultaten (top 50 getoond)")

    for _, row in working.head(50).iterrows():
        title = row.get("title") or f"Motie #{row['id']}"
        date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
        controversy = row.get("controversy_score") or 0
        with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
            cols = st.columns(3)
            cols[0].metric("Controverse", f"{controversy:.2f}")
            cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
            cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")

            # Voting breakdown
            _render_voting_results(row.get("voting_results"))

            # Link to original motion
            url = row.get("url")
            if url and str(url).startswith("http"):
                st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")

            # Similar motions
            sim = query_similar(db_path, int(row["id"]), top_k=5)
            if not sim.empty:
                st.markdown("**Vergelijkbare moties:**")
                for _, s in sim.iterrows():
                    s_date = (
                        pd.to_datetime(s["date"]).strftime("%Y")
                        if pd.notna(s.get("date"))
                        else ""
                    )
                    st.markdown(
                        f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
                    )
            else:
                st.caption("_Nog geen vergelijkbare moties beschikbaar_")


# ---------------------------------------------------------------------------
# Tab 4: Motie Browser
# ---------------------------------------------------------------------------


def build_browser_tab(db_path: str, show_rejected: bool) -> None:
    st.subheader("Motie Browser")

    df = load_motions_df(db_path)
    if df.empty:
        st.warning("Geen moties beschikbaar.")
        return

    if not show_rejected:
        df = df[df["title"].fillna("").str.strip() != "Verworpen."]

    # Controls
    col1, col2, col3 = st.columns(3)
    with col1:
        years = sorted(df["year"].dropna().astype(int).unique().tolist())
        year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
    with col2:
        min_controversy_b = st.slider(
            "Min. controverse",
            min_value=0.0,
            max_value=1.0,
            value=0.0,
            step=0.05,
            key="browser_controversy",
        )
    with col3:
        sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])

    # Filter
    working = df.copy()
    if year_filter != "(Alle)":
        working = working[working["year"] == int(year_filter)]
    if min_controversy_b > 0:
        working = working[working["controversy_score"] >= min_controversy_b]

    sort_map = {
        "Datum (nieuw)": ("date", False),
        "Controverse": ("controversy_score", False),
        "Marge": ("winning_margin", True),
    }
    sort_col, sort_asc = sort_map[sort_by]
    working = working.sort_values(by=sort_col, ascending=sort_asc)

    # Display table
    display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
    available_display = [c for c in display_cols if c in working.columns]
    st.dataframe(
        working[available_display].reset_index(drop=True),
        use_container_width=True,
        height=350,
    )

    st.divider()

    # Detail panel
    st.markdown("**Detail weergave** — vul een motie-ID in:")
    sel_id = st.number_input(
        "Motie ID",
        min_value=int(working["id"].min()) if not working.empty else 1,
        max_value=int(working["id"].max()) if not working.empty else 99999,
        value=int(working["id"].iloc[0]) if not working.empty else 1,
        step=1,
    )
    motion_row = df[df["id"] == sel_id]
    if not motion_row.empty:
        row = motion_row.iloc[0]
        st.markdown(f"### {row.get('title') or 'Onbekend'}")
        date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
        st.caption(
            f"📅 {date_str}  | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
        )

        # Link to original source
        url = row.get("url")
        if url and str(url).startswith("http"):
            st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")

        # Voting breakdown
        st.markdown("**Stemuitslag:**")
        _render_voting_results(row.get("voting_results"))

        # Similar motions
        sim = query_similar(db_path, int(sel_id), top_k=10)
        if not sim.empty:
            st.markdown("**Vergelijkbare moties:**")
            st.dataframe(
                sim[["title", "score", "date", "policy_area"]],
                use_container_width=True,
            )
        else:
            st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")


def build_svd_components_tab(db_path: str) -> None:
    """New tab: show top motions contributing to top SVD components.

    Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
    for components 1..10 with theme labels/explanations and a detail pane per motion.
    """
    # Political polarisation themes per SVD component (1-indexed, window=2025)
    # Produced by per-axis analysis of all 10 unique top motions (zero cross-axis overlap).
    SVD_THEMES: dict[int, dict[str, str]] = {
        1: {
            "label": "Links-rechts hoofdas: progressief versus conservatief-nationalistisch",
            "explanation": (
                "De dominante dimensie van het parlement: partijen aan de linkerkant (PvdD, GL-PvdA, "
                "DENK, SP) stemmen progressief — voor sociale voorzieningen, klimaat, internationale "
                "solidariteit — terwijl partijen aan de rechterkant (PVV, NSC, BBB, SGP) inzetten op "
                "nationaal belang, migratiebeheer en conservatieve waarden. Linkse moties omvatten "
                "boycots van Israëlische defensiebedrijven, huurverlaging en het oprichten van "
                "zorgbuurthuizen; rechtse moties gaan over NAVO-verplichtingen, juridische ruimte voor "
                "drones en gaswinningsprojecten. Dit is de klassieke links-rechts tegenstelling die "
                "het meeste verschil in stemgedrag verklaart."
            ),
            "positive_pole": "Nationalistisch-conservatief: PVV, NSC, BBB, SGP, VVD",
            "negative_pole": "Progressief-links: PvdD, GL-PvdA, DENK, SP",
            "flip": False,
        },
        2: {
            "label": "PVV/FVD populistisch isolationisme versus het overige parlement",
            "explanation": (
                "Deze as isoleert PVV en FVD van alle andere partijen. Aan de positieve kant staan "
                "moties die artsen vrijpleiten die hydroxychloroquine voorschreven, Syriërs direct "
                "willen terugsturen, geen geld aan Jordanië willen geven en de richtlijn tijdelijke "
                "bescherming voor Oekraïners willen beëindigen. Aan de negatieve kant staan "
                "mainstream-moties van CU, CDA, VVD en NSC over digitale toegankelijkheid, "
                "jongerenzorg en zorgstandaarden — partijen die in de positieve ruimte van as 1 "
                "zitten maar hier op één lijn staan met links. Dit is geen links-rechts as maar een "
                "populistisch-isolationisme-as: PVV en FVD vormen een eigen cluster dat los staat "
                "van de rest van het politieke spectrum."
            ),
            "positive_pole": "PVV/FVD populistisch isolationisme: anti-EU, anti-Oekraïne, antiwetenschap",
            "negative_pole": "Gehele overige parlement: mainstream links én rechts",
            "flip": False,
        },
        3: {
            "label": "Sociaal-economisch links versus marktliberaal en landelijk rechts",
            "explanation": (
                "Deze as weerspiegelt de klassieke sociaal-economische breuklijn. Aan de linkerkant "
                "staan moties van SP die bezuinigingen op zorg en gemeentefonds willen schrappen, "
                "winstuitkeringen in de zorg willen verbieden en instemmingsrecht bij "
                "ziekenhuisfusies eisen — allemaal gericht op bescherming van publieke voorzieningen. "
                "Aan de rechterkant staan moties van BBB (wolvenzendering), VVD (langetermijn"
                "investeerders zorg, controversieel verklaren) en NSC (belastingplichtigen 2023/2024) "
                "die een marktgerichtere koers voorstaan of agrarische belangen verdedigen. SP scoort "
                "sterk links, VVD en NSC sterk rechts."
            ),
            "positive_pole": "Sociaal-economisch links: publieke zorg, tegengaan marktwerking",
            "negative_pole": "Marktliberaal en agrarisch-rechts: VVD, NSC, BBB",
            "flip": True,
        },
        4: {
            "label": "Christelijk-sociaal centrum versus populistisch-soevereinistisch",
            "explanation": (
                "Deze as scheidt christelijk-sociale en gematigde centrumpartijen (CU, CDA, D66) van "
                "populistisch-soevereinistische partijen (FVD, NSC). Aan de linkerkant staan "
                "CU-moties over vaderbetrokkenheid, long covid vergoeding en internationale "
                "samenwerking; aan de rechterkant FVD-moties over het verbieden van pleegzorg bij "
                "paren van hetzelfde geslacht, een migratiesaldo van max 60.000 en het verlaten van "
                "de WHO. NSC scoort sterk rechts op deze as door amendementen die evaluaties en "
                "grondwetswijzigingen (artikel 23) willen blokkeren. Dit is een cultureel-"
                "institutionele as: vertrouwen in internationale instituties en pluralisme tegenover "
                "soevereinistisch-traditioneel wantrouwen."
            ),
            "positive_pole": "Christelijk-sociaal en institutioneel: CU, CDA, D66",
            "negative_pole": "Populistisch-soevereinistisch: FVD, NSC-rechtsflank",
            "flip": True,
        },
        5: {
            "label": "Christelijk-conservatief en ruraal sociaal versus seculier-progressief",
            "explanation": (
                "Deze as reflecteert de tegenstelling tussen christelijk-conservatieve en ruraal-"
                "sociale partijen enerzijds (NSC, CU, SGP, CDA) en seculier-progressieve partijen "
                "anderzijds (D66, GL-PvdA, SP). Rechtse moties omvatten vrijwilligers in "
                "schuldhulpverlening ondersteunen, maatschappelijke diensttijd koppelen aan "
                "arbeidsmarktafstand en WW-duur alleen verkorten met omscholing. Linkse moties "
                "bepleiten erkenning van meerouderschap, het recht op abortus in het EU-handvest "
                "en een nationaal coördinator buitenlandse beïnvloeding. NSC en CU scoren sterk "
                "rechts; D66 en GL-PvdA sterk links."
            ),
            "positive_pole": "Christelijk-conservatief en ruraal: NSC, CU, SGP, CDA",
            "negative_pole": "Seculier-progressief: D66, GL-PvdA, SP",
            "flip": False,
        },
        6: {
            "label": "Energiepragmatisme en liberale fiscaliteit versus klimaatactivisme en anti-discriminatie",
            "explanation": (
                "Aan de rechterkant staan moties die kernenergie als CO₂-arm alternatief willen "
                "erkennen op COP30, lng-capaciteit prefereren boven vulgraadverplichtingen en "
                "discriminatiemeldpunten willen inventariseren (JA21). Aan de linkerkant staan "
                "moties die fossiele industrie van klimaatconferenties willen weren (GL), de "
                "integratieparadox willen meenemen in beleid en aanvallen van Israël op Libanon "
                "veroordelen (DENK, SP). FVD en JA21 scoren sterk rechts; GL-PvdA, DENK en SP "
                "sterk links. Dit is een combinatie van energie-ideologie en culturele polarisatie "
                "rondom klimaat, integratie en buitenlandspolitiek."
            ),
            "positive_pole": "Energiepragmatisme, kernenergie, liberale fiscaliteit: FVD, JA21, SGP, CU",
            "negative_pole": "Klimaatactivisme, anti-discriminatie en internationale verantwoordelijkheid: GL, DENK, SP",
            "flip": False,
        },
        7: {
            "label": "Pragmatisch coalitiebeleid versus ecologisch-progressief en religieuze bescherming",
            "explanation": (
                "Aan de rechterkant staan pragmatische coalitiemoties: voedselprijzen inzichtelijk "
                "maken (PVV/CU), papieren schoolboeken behouden (CDA), invoeringstoets voor mkb "
                "(NSC) en het controversieel verklaren van bepaalde dossiers (VVD). Aan de "
                "linkerkant staan progressief-ecologische moties: een landelijk stookverbod (PvdD), "
                "verbranding van religieuze geschriften strafbaar stellen (DENK), chroom-6 "
                "schadevergoedingen (SP/D66) en tegenhouden van nieuwe gaswinning (SP). De "
                "partijscores zijn smal maar consistent: PvdD, DENK en SP links; CU, NSC en CDA "
                "rechts."
            ),
            "positive_pole": "Ecologisch-progressief en religieuze bescherming: PvdD, DENK, SP",
            "negative_pole": "Pragmatisch coalitiebeleid: PVV, CU, NSC, CDA, VVD",
            "flip": True,
        },
        8: {
            "label": "Pro-Europees defensie en investering versus nationaal-populistisch wantrouwen",
            "explanation": (
                "Aan de rechterkant staan moties van D66, Volt en CDA die NAVO-militaire mobiliteit "
                "in het Schengengebied regelen, 35% van defensiematerieel Europees willen inkopen "
                "en een Nationaal Groeifonds-ronde willen lanceren — allemaal pro-Europees en "
                "investerings-georiënteerd. Aan de linkerkant staan moties over coronastrategie "
                "en oversterfte (PVV/BBB), ketenverantwoordelijkheid bij toeslagen (DENK) en "
                "vraagresponsovereenkomsten zonder opzegtermijn (PVV). SP scoort sterk links door "
                "wantrouwen jegens institutionele processen; D66 en Volt sterk rechts door hun "
                "pro-Europese en investeringsgerichte koers."
            ),
            "positive_pole": "Pro-Europees, NAVO en investering: D66, Volt, CDA",
            "negative_pole": "Nationaal-populistisch wantrouwen: PVV, SP-controlereflex, DENK",
            "flip": False,
        },
        9: {
            "label": "Gereformeerd-sociaal centrum versus progressief regulerend",
            "explanation": (
                "Aan de linkerkant staan moties van NSC, CU en SGP over naleving van de Financiële-"
                "verhoudingswet, beperking van arbeidsmigratie binnen de EU, een nieuwe "
                "opleidingsplek voor tandartsen en een actieplan tegen misbruik van "
                "hallucinerende geneesmiddelen. Aan de rechterkant staan moties van PvdD, GL "
                "en D66: moratorium op geitenstallen, verbod op gokadvertenties in zoekmachines, "
                "verduidelijking van voorlopige hechtenis en leegstandbelasting voor woningen. "
                "CU en SGP scoren sterk links; PvdD en D66 sterk rechts. Dit is een as van "
                "gereformeerd-sociaal pragmatisme tegenover progressieve regulering."
            ),
            "positive_pole": "Gereformeerd-sociaal centrum: NSC, CU, SGP — naleving, arbeidsmarkt, volksgezondheid",
            "negative_pole": "Progressief regulerend: PvdD, GL, D66 — milieu, wonen, rechtsstaat",
            "flip": True,
        },
        10: {
            "label": "Residuele as: individuele dienstverlening versus collectieve handhaving",
            "explanation": (
                "De tiende as vangt kleine resterende variantie op na de eerste negen. Aan de "
                "linkerkant staan moties die individuele dienstverlening verbeteren: minder "
                "tijdsintensieve inspectiebezoeken (VVD), een persoonlijk dossier voor ouders "
                "binnen één maand (SP), tegemoetkoming arbeidsongeschikten in stand houden (SP) "
                "en een verlaging van de leeftijd voor kindgesprekken (PVV). Aan de rechterkant "
                "staan handhavings- en regelgevingsmoties: aangifteplicht voor scholen bij "
                "veiligheidsincidenten (VVD), rookvrije auto's met kinderen (NSC/CDA) en "
                "beloningsgeld voor tipgevers op de Nationale Opsporingslijst (VVD). De scores "
                "zijn klein (max ±6,5) en de coalitie op elke pool is gemengd — dit is geen "
                "duidelijke ideologische as maar een restfactor."
            ),
            "positive_pole": "Individuele dienstverlening en ontzorging: VVD, SP, PVV-elementen",
            "negative_pole": "Collectieve handhaving en regelgeving: VVD-handhavingsflank, CDA, NSC",
            "flip": True,
        },
    }

    st.subheader("🔬 SVD Assen — politieke polarisatiethema's")
    st.markdown(
        "Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
        "van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
        "het spanningsveld dat de as beschrijft."
    )

    # Scree plot: relative importance of each SVD component
    scree_importances = load_scree_data(db_path)
    if scree_importances:
        st.markdown(
            "**Scree-plot** — het relatieve gewicht van elke SVD-as. "
            "De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
            "latere assen zijn subtieler maar politiek nog steeds betekenisvol."
        )
        _render_scree_plot(scree_importances)

    json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
    if not os.path.exists(json_path):
        st.warning(
            f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
        )
        return

    try:
        with open(json_path, "r", encoding="utf-8") as fh:
            j = json.load(fh)
    except Exception as e:
        st.error(f"Failed to load SVD importance JSON: {e}")
        return

    window = j.get("window")
    rows = j.get("rows", [])
    if not rows:
        st.info("Geen top-moties in dataset")
        return

    st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")

    # Build mapping component -> list of motions (deduplicate by motion_id per component)
    comp_map: dict[int, list] = {}
    for r in rows:
        comp = int(r.get("component", 0))
        bucket = comp_map.setdefault(comp, [])
        existing_ids = {m.get("motion_id") for m in bucket}
        if r.get("motion_id") not in existing_ids:
            bucket.append(r)

    comp_options = sorted(comp_map.keys())

    # Build display labels for selectbox: "As 1 — Regulering vs. status-quo"
    def _comp_label(c: int) -> str:
        theme = SVD_THEMES.get(c, {})
        lbl = theme.get("label", "")
        return f"As {c} — {lbl}" if lbl else f"As {c}"

    comp_display = [_comp_label(c) for c in comp_options]
    comp_sel_idx = st.selectbox(
        "Selecteer SVD-as",
        options=list(range(len(comp_options))),
        format_func=lambda i: comp_display[i],
        index=0,
    )
    comp_sel = comp_options[comp_sel_idx]

    # Show theme explanation
    theme = SVD_THEMES.get(comp_sel, {})
    if theme:
        st.info(f"**{theme['label']}** — {theme['explanation']}")

    motions = comp_map.get(comp_sel, [])

    # Party axis chart
    party_scores = load_party_axis_scores(db_path)
    _render_party_axis_chart(party_scores, comp_sel, theme)

    # Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
    motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
    motion_details: Dict[int, tuple] = {}
    if motion_ids:
        # Defensively convert motion_ids to integers, skipping invalid values
        ids_int: List[int] = []
        for mid in motion_ids:
            try:
                ids_int.append(int(mid))
            except Exception:
                logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)

        # If no valid ids remain, skip the DB query
        if ids_int:
            con = None
            try:
                placeholders = ", ".join("?" for _ in ids_int)
                con = duckdb.connect(database=db_path, read_only=True)
                db_rows = con.execute(
                    f"SELECT id, title, date, policy_area, url, body_text, voting_results "
                    f"FROM motions WHERE id IN ({placeholders})",
                    ids_int,
                ).fetchall()
                motion_details = {r[0]: r for r in db_rows}
            except Exception:
                logger.exception("Failed to batch-fetch motion details")
            finally:
                if con:
                    con.close()

    # Split motions by pole sign
    pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
    neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]

    flip = theme.get("flip", False) if theme else False
    pos_pole = theme.get("positive_pole", "") if theme else ""
    neg_pole = theme.get("negative_pole", "") if theme else ""

    # Determine which pole goes left (progressive) and which goes right
    if flip:
        left_pole, right_pole = pos_pole, neg_pole
        left_motions, right_motions = pos_motions, neg_motions
        left_arrow, right_arrow = "▲", "▼"
    else:
        left_pole, right_pole = neg_pole, pos_pole
        left_motions, right_motions = neg_motions, pos_motions
        left_arrow, right_arrow = "▼", "▲"

    lcol, rcol = st.columns(2)

    with lcol:
        st.markdown(f"**← {left_pole}**")
        for m in left_motions:
            mid = m.get("motion_id")
            raw_title = m.get("title") or f"Motie #{mid}"
            with st.expander(f"{left_arrow} {raw_title}"):
                row = motion_details.get(int(mid)) if mid is not None else None
                if row:
                    try:
                        date_str = str(row[2])[:10]
                    except Exception:
                        date_str = "?"
                    st.caption(f"📅 {date_str}  |  {row[3] or '—'}")
                    if row[4] and str(row[4]).startswith("http"):
                        st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
                    if row[5]:
                        with st.expander("Toon volledige tekst"):
                            st.write(row[5])
                    _render_voting_results(row[6])
                else:
                    st.caption("_Geen metadata beschikbaar_")

    with rcol:
        st.markdown(f"**{right_pole} →**")
        for m in right_motions:
            mid = m.get("motion_id")
            raw_title = m.get("title") or f"Motie #{mid}"
            with st.expander(f"{right_arrow} {raw_title}"):
                row = motion_details.get(int(mid)) if mid is not None else None
                if row:
                    try:
                        date_str = str(row[2])[:10]
                    except Exception:
                        date_str = "?"
                    st.caption(f"📅 {date_str}  |  {row[3] or '—'}")
                    if row[4] and str(row[4]).startswith("http"):
                        st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
                    if row[5]:
                        with st.expander("Toon volledige tekst"):
                            st.write(row[5])
                    _render_voting_results(row[6])
                else:
                    st.caption("_Geen metadata beschikbaar_")


def build_mp_quiz_tab(db_path: str) -> None:
    """Interactive quiz: narrow MPs by asking motion vote questions.

    Minimal viable flow:
    - seed with top-N controversial motions (SEED_MOTIONS)
    - present one question at a time, store answers in st.session_state['mp_quiz_votes']
    - after each answer call MotionDatabase.match_mps_for_votes to rank MPs
    - if multiple candidates remain, call choose_discriminating_motions to pick next question
    - stop when unique MP found or no discriminating motions remain
    """
    st.subheader("🧑‍⚖️ Welk tweede kamerlid ben jij?")
    st.markdown(
        "Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt."
    )

    SEED_MOTIONS = 8
    MAX_QUESTIONS = 20

    # initialize session state
    if "mp_quiz_votes" not in st.session_state:
        st.session_state["mp_quiz_votes"] = {}
    if "mp_quiz_asked" not in st.session_state:
        st.session_state["mp_quiz_asked"] = []

    from database import MotionDatabase as _MotionDatabase

    db_inst = _MotionDatabase(db_path)

    df = load_motions_df(db_path)
    if df.empty:
        st.warning("Geen moties beschikbaar om de quiz te starten.")
        return

    # seed from motions that actually have individual MP vote records
    seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
    if not seed_ids:
        st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
        return

    # Determine next motion to ask
    def _next_motion_id():
        # prefer seed motions not yet asked
        for mid in seed_ids:
            if str(mid) not in st.session_state["mp_quiz_votes"]:
                return mid
        # otherwise ask discriminating motion based on remaining candidate MPs
        # compute current candidate set

        try:
            user_votes = {
                int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
            }
            ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
        except Exception:
            ranked = []

        candidates = [r["mp_name"] for r in ranked]
        excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()]
        if not candidates:
            return None
        try:
            next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
            return next_ids[0] if next_ids else None
        except Exception:
            return None

    # show progress and controls
    col1, col2 = st.columns([3, 1])
    with col2:
        st.caption(
            f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}"
        )
        if st.button("Reset quiz"):
            st.session_state["mp_quiz_votes"] = {}
            st.session_state["mp_quiz_asked"] = []
            st.rerun()

    # main question loop (single question per render, wrapped in a form to avoid
    # premature reruns when the user changes the radio selection)
    next_mid = _next_motion_id()
    if next_mid is None:
        st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
    else:
        motion_rows = df[df["id"] == next_mid]
        if motion_rows.empty:
            # motion has votes but isn't in the motions DataFrame — skip it
            st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
            st.rerun()
            return
        motion_row = motion_rows.iloc[0]
        st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
        if motion_row.get("layman_explanation"):
            st.info(motion_row.get("layman_explanation"))

        with st.form(key=f"mp_quiz_form_{next_mid}"):
            choice = st.radio(
                "Wat zou jij stemmen?",
                options=["Voor", "Tegen", "Onthouden", "Geen stem"],
                index=3,
            )
            submitted = st.form_submit_button("Beantwoord en verder")

        if submitted:
            st.session_state["mp_quiz_votes"][str(next_mid)] = choice
            st.session_state["mp_quiz_asked"].append(next_mid)
            st.rerun()

    # display current ranking
    try:
        user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
        ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
    except Exception:
        ranking = []

    if ranking:
        st.markdown("**Top kandidaten**")
        # show as table
        import pandas as pd

        rdf = pd.DataFrame(ranking)
        st.dataframe(rdf.head(10), use_container_width=True)

        # check uniqueness
        top_pct = ranking[0]["agreement_pct"] if ranking else 0.0
        top_matches = [r for r in ranking if r["agreement_pct"] == top_pct]
        if len(top_matches) == 1 and top_matches[0]["overlap"] > 0:
            st.success(
                f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})"
            )
        else:
            if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS:
                st.warning(
                    "Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten."
                )
            else:
                st.info("Nog geen unieke match — vraag meer om verder te verfijnen.")
    else:
        st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.")


# ---------------------------------------------------------------------------
# App entry
# ---------------------------------------------------------------------------


def run_app() -> None:
    st.set_page_config(
        layout="wide",
        page_title="Parlement Explorer",
        page_icon="🏛️",
    )
    st.title("🏛️ Parlement Explorer")

    # Sidebar
    st.sidebar.title("Instellingen")
    db_path = "data/motions.db"
    window_size = "annual"
    show_rejected = st.sidebar.checkbox("Toon verworpen moties", value=False)

    # About section
    with st.sidebar.expander("ℹ️ Over", expanded=False):
        try:
            con = duckdb.connect(database=db_path, read_only=True)
            n_motions = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0]
            n_fused = con.execute("SELECT COUNT(*) FROM fused_embeddings").fetchone()[0]
            n_sim = con.execute("SELECT COUNT(*) FROM similarity_cache").fetchone()[0]
            con.close()
            st.markdown(
                f"**Moties:** {n_motions:,}  \n"
                f"**Fused embeddings:** {n_fused:,}  \n"
                f"**Similarity cache:** {n_sim:,}"
            )
        except Exception as e:
            st.warning(f"DB niet bereikbaar: {e}")

    # Main tabs
    # Streamlit tabs compatibility: some older/newer Streamlit builds expose different APIs.
    tab_labels = [
        "🧭 Politiek Kompas",
        "📈 Trajectories",
        "🔍 Motie Zoeken",
        "📋 Motie Browser",
        "🔬 SVD Components",
    ]

    if hasattr(st, "tabs") and callable(getattr(st, "tabs")):
        tab1, tab2, tab3, tab4, tab5 = st.tabs(tab_labels)
        with tab1:
            build_compass_tab(db_path, window_size)
        with tab2:
            build_trajectories_tab(db_path, window_size)
        with tab3:
            build_search_tab(db_path, show_rejected)
        with tab4:
            build_browser_tab(db_path, show_rejected)
        with tab5:
            build_svd_components_tab(db_path)
    else:
        # Fallback for environments where `st.tabs` is not available: use a radio selector
        selection = st.radio("Tab", tab_labels)
        if selection == tab_labels[0]:
            build_compass_tab(db_path, window_size)
        elif selection == tab_labels[1]:
            build_trajectories_tab(db_path, window_size)
        elif selection == tab_labels[2]:
            build_search_tab(db_path, show_rejected)
        elif selection == tab_labels[3]:
            build_browser_tab(db_path, show_rejected)
        else:
            build_svd_components_tab(db_path)


if __name__ == "__main__":
    logging.basicConfig(
        level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
    )
    run_app()