motief/explorer.py

"""Parlement Explorer — Streamlit data analysis app.

Four tabs:
  1. Politiek Kompas  — 2D scatter of MPs/parties, window slider
  2. Partij Trajectories — party centroid lines over time
  3. Motie Zoeken      — text search + similarity lookup
  4. Motie Browser     — sortable table + detail panel

Run with: streamlit run explorer.py

Import-safe: heavy computation is behind @st.cache_data and only runs at UI time.
All DuckDB connections are read_only=True so the app can run alongside the pipeline.
"""

from __future__ import annotations

import json
import logging
import os
import re
import traceback
from typing import Dict, List, Optional, Tuple

try:
    import duckdb

    _DUCKDB_AVAILABLE = True
except Exception:
    duckdb = None
    _DUCKDB_AVAILABLE = False
import numpy as np
import pandas as pd

try:
    import plotly.express as px
    import plotly.graph_objects as go
except Exception:
    # Plotly may be unavailable in lightweight test environments. Provide a tiny
    # local fallback that exposes a Figure-like object with `.data` and
    # `add_trace()` so unit tests can run without installing plotly.
    px = None
    import types

    class _DummyTrace:
        def __init__(self, **kwargs):
            # Preserve commonly-used attributes accessed by tests
            self.name = kwargs.get("name")
            self.x = kwargs.get("x")
            self.y = kwargs.get("y")
            self.text = kwargs.get("text")
            self.customdata = kwargs.get("customdata")

    class _DummyFigure:
        def __init__(self):
            self.data = []

        def add_trace(self, trace):
            # plotly passes a Scatter object; our tests only inspect `.data`
            # elements for `.name` and `.customdata`. Accept both our
            # _DummyTrace and dict-like kwargs.
            if isinstance(trace, _DummyTrace):
                self.data.append(trace)
            else:
                # Some code may call go.Scatter(...) which returns an object;
                # if a mapping is passed here instead, coerce to _DummyTrace.
                try:
                    # attempt attribute access
                    name = getattr(trace, "name", None)
                    x = getattr(trace, "x", None)
                    y = getattr(trace, "y", None)
                    text = getattr(trace, "text", None)
                    customdata = getattr(trace, "customdata", None)
                except Exception:
                    # Last resort: treat as mapping
                    name = trace.get("name") if hasattr(trace, "get") else None
                    x = trace.get("x") if hasattr(trace, "get") else None
                    y = trace.get("y") if hasattr(trace, "get") else None
                    text = trace.get("text") if hasattr(trace, "get") else None
                    customdata = (
                        trace.get("customdata") if hasattr(trace, "get") else None
                    )
                self.data.append(
                    _DummyTrace(name=name, x=x, y=y, text=text, customdata=customdata)
                )

        def add_annotation(self, *args, **kwargs):
            # noop for tests that don't import full plotly
            return None

    go = types.SimpleNamespace(
        Figure=_DummyFigure, Scatter=lambda **kwargs: _DummyTrace(**kwargs)
    )
try:
    import streamlit as st
except Exception:
    # Minimal dummy replacement for Streamlit used during tests / import-time.
    # We only need a tiny subset so unit tests can import explorer without
    # installing streamlit. All functions here are no-ops or simple fallbacks.
    class _DummySt:
        def cache_data(self, *args, **kwargs):
            def _decorator(func):
                return func

            return _decorator

        def markdown(self, *args, **kwargs):
            return None

        def subheader(self, *args, **kwargs):
            return None

        def plotly_chart(self, *args, **kwargs):
            return None

        def caption(self, *args, **kwargs):
            return None

        def text_area(self, *args, **kwargs):
            return None

        def json(self, *args, **kwargs):
            return None

        def checkbox(self, *args, **kwargs):
            # default to False unless value provided
            return kwargs.get("value", False)

        def warning(self, *args, **kwargs):
            return None

        def info(self, *args, **kwargs):
            return None

        def selectbox(self, *args, **kwargs):
            # return first option if options provided
            opts = (
                kwargs.get("options")
                if kwargs.get("options") is not None
                else (args[1] if len(args) > 1 else [])
            )
            return opts[0] if opts else None

        def multiselect(self, *args, **kwargs):
            opts = (
                kwargs.get("options")
                if kwargs.get("options") is not None
                else (args[1] if len(args) > 1 else [])
            )
            default = kwargs.get("default")
            if default is not None:
                return default
            return opts[:6] if opts else []

        def number_input(self, *args, **kwargs):
            return kwargs.get("value") if "value" in kwargs else 1

        def slider(self, *args, **kwargs):
            return kwargs.get("value") if "value" in kwargs else 0.35

        def expander(self, *args, **kwargs):
            class _Ctx:
                def __enter__(self_inner):
                    return self_inner

                def __exit__(self_inner, exc_type, exc, tb):
                    return False

            return _Ctx()

        def columns(self, *args, **kwargs):
            # Return a tuple of simple objects with the methods used in the UI
            class _Col:
                def markdown(self, *a, **k):
                    return None

                def metric(self, *a, **k):
                    return None

                def dataframe(self, *a, **k):
                    return None

            n = len(args[0]) if args else 1
            return tuple(_Col() for _ in range(n))

    st = _DummySt()
# Temporary diagnostics for Trajectories plotting — set by instrumentation when
# EXPLORER_DEBUG_TRAJECTORIES is enabled. This is intended to be small, opt-in and
# reversible once root cause is found.
_last_trajectories_diagnostics: dict = {}
# Backwards/alternate name used by instrumentation: keep a second module-level
# reference so callers/tests can look for either name.
_last_diagnostics = _last_trajectories_diagnostics


def get_debug_trajectories_enabled() -> bool:
    """Return True when EXPLORER_DEBUG_TRAJECTORIES env var indicates debug mode.

    Accepts '1', 'true', 'True'. Used as default for a per-tab checkbox.
    """
    v = os.getenv("EXPLORER_DEBUG_TRAJECTORIES")
    return str(v) in ("1", "true", "True")


from explorer_helpers import (
    inspect_positions_for_issues,
    compute_party_centroids,
)


def select_trajectory_plot_data(
    positions_by_window: Dict[str, Dict[str, Tuple[float, float]]],
    party_map: Dict[str, str],
    windows: List[str],
    selected_parties: List[str],
    smooth_alpha: float = 0.35,
    mp_fallback_count: Optional[int] = None,
) -> Tuple[go.Figure, int, Optional[str]]:
    """Return (fig, trace_count, banner_text).

    Helper used by build_trajectories_tab. Does not call Streamlit.
    """
    # Use env var default if not provided
    if mp_fallback_count is None:
        try:
            mp_fallback_count = int(os.getenv("EXPLORER_MP_FALLBACK_COUNT", "20"))
        except Exception:
            mp_fallback_count = 20

    # Compute per-party centroids aligned to windows
    party_centroids, meta = compute_party_centroids(
        positions_by_window, party_map, windows
    )

    # Use inspector to collect diagnostics (import-safe, pure helper). Keep this
    # call local to the helper to ensure the inspector is exercised and the
    # diagnostics are available for logging/debugging. Do not call Streamlit
    # from here so the function remains import-safe for tests.
    try:
        inspector_summary = inspect_positions_for_issues(positions_by_window, party_map)
    except Exception:
        # Capture traceback diagnostics so callers (and tests) can inspect what went wrong.
        tb = traceback.format_exc()
        inspector_summary = {}
        try:
            # Attach diagnostics to the helper function for callers that want to inspect
            # the last error directly on the function object.
            select_trajectory_plot_data._last_diagnostics = {
                "stage": "inspector_exception",
                "exception": tb,
            }
        except Exception:
            # best-effort only
            pass
        try:
            # Also update the module-level trajectories diagnostics so the UI can show
            # a compact summary when debugging is enabled.
            _last_trajectories_diagnostics.update(
                {"stage": "inspector_exception", "exception": tb}
            )
        except Exception:
            pass
    logger.debug("select_trajectory_plot_data inspector summary: %s", inspector_summary)

    # Determine which parties have at least one non-nan centroid
    plottable_parties = []
    for p, vals in party_centroids.items():
        has_valid = any(not (np.isnan(x) and np.isnan(y)) for x, y in vals)
        if has_valid:
            plottable_parties.append(p)

    fig = go.Figure()
    trace_count = 0
    banner_text: Optional[str] = None

    def _ema_smooth(values: List[float], alpha: float) -> List[float]:
        if not values or alpha >= 1.0:
            return values
        smoothed: List[float] = []
        prev = None
        for v in values:
            if v is None or (isinstance(v, float) and np.isnan(v)):
                smoothed.append(float(np.nan))
                continue
            v = float(v)
            if prev is None:
                prev = v
            else:
                prev = alpha * v + (1 - alpha) * prev
            smoothed.append(float(prev))
        return smoothed

    # If no plottable parties, fallback to MP trajectories
    if not plottable_parties:
        # Build mp_positions across windows
        mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
        for wid in windows:
            pos = positions_by_window.get(wid, {})
            for mp_name, xy in pos.items():
                try:
                    x, y = float(xy[0]), float(xy[1])
                except Exception:
                    continue
                mp_positions.setdefault(mp_name, {})[wid] = (x, y)

        # Rank MPs by activity (number of windows with positions)
        mp_activity = sorted(
            [(mp, len(wdict)) for mp, wdict in mp_positions.items()],
            key=lambda t: t[1],
            reverse=True,
        )
        top_mps = [mp for mp, _ in mp_activity[:mp_fallback_count]]

        for mp in top_mps:
            wids_sorted = sorted(mp_positions.get(mp, {}).keys())
            if not wids_sorted:
                continue
            xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
            ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
            xs = _ema_smooth(xs_raw, smooth_alpha)
            ys = _ema_smooth(ys_raw, smooth_alpha)
            custom_raw = [
                (
                    float(rx) if rx is not None else float(np.nan),
                    float(ry) if ry is not None else float(np.nan),
                )
                for rx, ry in zip(xs_raw, ys_raw)
            ]
            fig.add_trace(
                go.Scatter(
                    x=xs,
                    y=ys,
                    mode="lines+markers",
                    name=mp,
                    text=wids_sorted,
                    customdata=custom_raw,
                    line=dict(color="#888888", shape="spline", smoothing=1.3),
                    marker=dict(color="#888888", size=6),
                )
            )
            trace_count += 1

        banner_text = "Partijcentroiden niet beschikbaar — tonen individuele MP-trajecten als fallback."
        return fig, trace_count, banner_text

    # Otherwise plot party centroids for selected parties intersecting plottable
    to_plot = [p for p in selected_parties if p in plottable_parties]
    # If none selected, default to all plottable
    if not to_plot:
        to_plot = plottable_parties

    for party in to_plot:
        vals = party_centroids.get(party, [])
        if not vals:
            continue
        xs_raw = [v[0] for v in vals]
        ys_raw = [v[1] for v in vals]
        xs = _ema_smooth(xs_raw, smooth_alpha)
        ys = _ema_smooth(ys_raw, smooth_alpha)
        # Ensure customdata preserves NaNs
        custom_raw = [
            (
                float(x) if (x is not None and not np.isnan(x)) else float(np.nan),
                float(y) if (y is not None and not np.isnan(y)) else float(np.nan),
            )
            for x, y in zip(xs_raw, ys_raw)
        ]
        colour = PARTY_COLOURS.get(party, "#9E9E9E")
        fig.add_trace(
            go.Scatter(
                x=xs,
                y=ys,
                mode="lines+markers",
                name=party,
                text=windows,
                customdata=custom_raw,
                line=dict(color=colour, shape="spline", smoothing=1.3),
                marker=dict(color=colour, size=8),
            )
        )
        trace_count += 1

    return fig, trace_count, None


logger = logging.getLogger(__name__)

# Party colour palette (consistent across tabs)
PARTY_COLOURS: Dict[str, str] = {
    "VVD": "#1E73BE",
    "PVV": "#002366",
    "D66": "#00A36C",
    "CDA": "#4CAF50",
    "SP": "#E53935",
    "PvdA": "#D32F2F",
    "GroenLinks": "#388E3C",
    "GroenLinks-PvdA": "#2E7D32",
    "CU": "#0288D1",
    "SGP": "#F4511E",
    "PvdD": "#43A047",
    "FVD": "#6A1B9A",
    "JA21": "#7B1FA2",
    "BBB": "#8D6E63",
    "NSC": "#FF8F00",
    "Nieuw Sociaal Contract": "#FF8F00",  # alias used in mp_metadata
    "DENK": "#00897B",
    "50PLUS": "#7E57C2",
    "Volt": "#572AB7",
    "ChristenUnie": "#0288D1",
    "Unknown": "#9E9E9E",
}

# Ordered list of well-known parties for trajectory default selection.
# Keeps the chart readable without overwhelming users with all parties.
KNOWN_MAJOR_PARTIES = [
    "VVD",
    "PVV",
    "D66",
    "GroenLinks-PvdA",
    "GroenLinks",
    "PvdA",
    "CDA",
    "SP",
    "NSC",
    "CU",
    "BBB",
]


# Parties currently seated in the Tweede Kamer (2023 election cycle).
# Deze zijn de entity_ids zoals opgeslagen in svd_vectors voor window='2025'.
CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset(
    {
        "PVV",
        "VVD",
        "NSC",
        "BBB",
        "D66",
        "GroenLinks-PvdA",
        "CDA",
        "SP",
        "ChristenUnie",
        "SGP",
        "Volt",
        "DENK",
        "PvdD",
        "JA21",
        "FVD",
    }
)

# Normalize variant party names to canonical display names in CURRENT_PARLIAMENT_PARTIES
_PARTY_NORMALIZE: dict[str, str] = {
    "Nieuw Sociaal Contract": "NSC",
    "CU": "ChristenUnie",
    "GL": "GroenLinks-PvdA",
    "GroenLinks": "GroenLinks-PvdA",
    "PvdA": "GroenLinks-PvdA",
    "Gündoğan": "Volt",  # confirmed Volt, left parliament 2023-12-05
    "Lid Keijzer": "BBB",  # Keijzer left CDA, joined BBB cabinet
    "Groep Markuszower": "PVV",  # Markuszower sits with PVV faction
}


# ---------------------------------------------------------------------------
# Cached loaders
# ---------------------------------------------------------------------------


@st.cache_data(show_spinner="Beschikbare tijdsvensters laden…")
def get_available_windows(db_path: str) -> List[str]:
    """Return sorted list of distinct window_ids from svd_vectors."""
    con = duckdb.connect(database=db_path, read_only=True)
    try:
        rows = con.execute(
            "SELECT DISTINCT window_id FROM svd_vectors ORDER BY window_id"
        ).fetchall()
        return [r[0] for r in rows]
    except Exception:
        logger.exception("Failed to query available windows")
        return []
    finally:
        con.close()


@st.cache_data(show_spinner=False)
def get_uniform_dim_windows(db_path: str) -> List[str]:
    """Return only windows whose dominant MP-vector dimension is 50.

    Some windows contain a mix of vector lengths due to multiple pipeline runs
    (e.g. 2016 has both dim=1 and dim=50 rows). We find the most common dimension
    per window and include only windows where that dominant dim equals 50.
    Windows with too few dim-50 entities (< 10) are also excluded to avoid
    degenerate PCA inputs.
    """
    con = duckdb.connect(database=db_path, read_only=True)
    try:
        rows = con.execute(
            """
            WITH vec_dims AS (
                SELECT window_id, json_array_length(vector) AS dim
                FROM svd_vectors
                WHERE entity_type = 'mp'
            ),
            window_dim_counts AS (
                SELECT window_id, dim, COUNT(*) AS cnt
                FROM vec_dims
                GROUP BY window_id, dim
            ),
            dominant AS (
                SELECT DISTINCT ON (window_id) window_id, dim, cnt
                FROM window_dim_counts
                ORDER BY window_id, cnt DESC, dim DESC
            )
            SELECT window_id
            FROM dominant
            WHERE dim >= 25 AND cnt >= 10
            ORDER BY window_id
            """
        ).fetchall()
        return [r[0] for r in rows]
    except Exception:
        logger.exception("Failed to query uniform-dim windows")
        return []
    finally:
        con.close()


def _should_swap_axes(axis_def: dict) -> bool:
    """Return True if the Y axis is 'Links–Rechts' and the X axis is not.

    When true, caller should swap x/y positions and metadata so left-right
    is conventionally on the horizontal axis.
    """
    lr = "Links\u2013Rechts"
    return axis_def.get("y_label") == lr and axis_def.get("x_label") != lr


def _swap_axes(
    positions_by_window: dict,
    axis_def: dict,
) -> tuple:
    """Swap x and y in all positions and axis metadata.

    Pure function — returns (new_positions_by_window, new_axis_def).
    """
    new_positions: dict = {}
    for wid, pos_dict in positions_by_window.items():
        new_positions[wid] = {ent: (y, x) for ent, (x, y) in pos_dict.items()}

    new_ax = dict(axis_def)
    # Non-paired keys pass through unchanged
    # Swap paired scalar keys
    new_ax["x_label"] = axis_def.get("y_label")
    new_ax["y_label"] = axis_def.get("x_label")

    # Swap paired dict keys
    for x_key, y_key in [
        ("x_quality", "y_quality"),
        ("x_interpretation", "y_interpretation"),
        ("x_top_motions", "y_top_motions"),
        ("x_label_confidence", "y_label_confidence"),
        ("x_axis", "y_axis"),
    ]:
        new_ax[x_key] = axis_def.get(y_key)
        new_ax[y_key] = axis_def.get(x_key)

    return new_positions, new_ax


def _render_axis_motions(label: str, conf_pct: str, top: dict) -> None:
    st.markdown(f"**{label}**{conf_pct}")
    for sign, icon in (("+", "➕"), ("-", "➖")):
        titles = top.get(sign, [])
        if titles:
            st.markdown(
                "&nbsp;&nbsp;"
                + icon
                + " "
                + " · ".join(f"{t} ({d})" for t, d in titles[:3])
            )


@st.cache_data(show_spinner="2D posities berekenen (kan even duren)…")
def load_positions(
    db_path: str, window_size: str = "quarterly"
) -> Tuple[Dict[str, Dict[str, Tuple[float, float]]], Dict]:
    """Compute 2D positions per window using PCA on aligned SVD vectors.

    Returns:
        positions_by_window: {window_id: {entity_name: (x, y)}}
        axis_def: dict with x_axis, y_axis, method keys
    """
    from analysis.political_axis import compute_2d_axes

    # Always compute PCA on ALL uniform-dim windows (quarterly + annual) so that
    # the principal components are determined by the full temporal spread of data.
    # Using only annual windows (11) causes PC1 to capture cross-temporal drift
    # instead of left-right ideology, resulting in a ~90° rotation.
    all_available = get_uniform_dim_windows(db_path)

    if not all_available:
        return {}, {}

    positions_by_window, axis_def = compute_2d_axes(
        db_path,
        window_ids=all_available,
        method="pca",
        pca_residual=True,
        normalize_vectors=True,
    )

    try:
        from analysis.axis_classifier import classify_axes

        axis_def = classify_axes(positions_by_window, axis_def, db_path)
    except Exception:
        import logging

        logging.getLogger(__name__).exception(
            "classify_axes failed; using generic axis labels"
        )

    # Axis orientation is guaranteed by compute_2d_axes via canonical party anchors
    # (Procrustes alignment + sign-fixing). We do NOT forcibly override axis labels
    # here so the classifier output (if available) can be surfaced conditionally in
    # the UI based on per-window confidence. Label selection is performed at render
    # time in the tabs so we can show fallback labels while still surfacing the
    # classifier interpretation and confidence when informative.

    # Filter displayed windows by window_size AFTER PCA computation.
    if window_size == "annual":
        annual_keys = set(w for w in all_available if "-Q" not in w)
        positions_by_window = {
            w: v for w, v in positions_by_window.items() if w in annual_keys
        }

    return positions_by_window, axis_def


@st.cache_data(show_spinner="Partijkaart laden…")
def load_party_map(db_path: str) -> Dict[str, str]:
    """Return {mp_name: party} mapping, with party names normalised to abbreviations."""
    from analysis.visualize import _load_party_map

    _PARTY_ALIASES: Dict[str, str] = {
        "Nieuw Sociaal Contract": "NSC",
    }

    try:
        raw = _load_party_map(db_path)
        return {mp: _PARTY_ALIASES.get(party, party) for mp, party in raw.items()}
    except Exception:
        logger.exception("Failed to load party map")
        return {}


@st.cache_data(show_spinner="Actieve Kamerleden laden…")
def load_active_mps(db_path: str) -> set:
    """Return the set of mp_name values that are currently seated in parliament.

    An MP is considered active if their mp_metadata row has tot_en_met IS NULL,
    meaning they have no recorded end date for their current seat.
    """
    try:
        con = duckdb.connect(database=db_path, read_only=True)
        rows = con.execute(
            "SELECT mp_name FROM mp_metadata WHERE tot_en_met IS NULL"
        ).fetchall()
        con.close()
        return {r[0] for r in rows}
    except Exception:
        logger.exception("Failed to load active MPs")
        return set()


def compute_party_discipline(
    db_path: str,
    start_date: str,
    end_date: str,
) -> pd.DataFrame:
    """Compute per-party voting discipline (Rice index) for roll-call votes in a date range.

    Only individual MP vote rows are used (mp_name LIKE '%,%').
    Returns a DataFrame with columns [party, n_motions, discipline] sorted by discipline ascending.
    Returns an empty DataFrame if fewer than 1 qualifying motion exists or on any DB error.

    Rice index per motion per party = fraction of party MPs voting with the party majority.
    The per-party score is the average Rice index across all motions in the date range.
    Only 'voor' and 'tegen' votes are counted; absent and abstaining MPs are excluded from the
    Rice index calculation.
    """
    conn = None
    try:
        conn = duckdb.connect(db_path, read_only=True)
        result = conn.execute(
            """
            WITH individual_votes AS (
                SELECT
                    motion_id,
                    party,
                    LOWER(vote) AS vote
                FROM mp_votes
                WHERE mp_name LIKE '%,%'
                  AND date >= CAST(? AS DATE)
                  AND date <= CAST(? AS DATE)
                  AND vote IN ('voor', 'tegen')
            ),
            vote_counts AS (
                SELECT
                    motion_id,
                    party,
                    vote,
                    COUNT(*) AS cnt
                FROM individual_votes
                GROUP BY motion_id, party, vote
            ),
            majority_vote AS (
                SELECT
                    motion_id,
                    party,
                    FIRST(vote ORDER BY cnt DESC, vote ASC) AS maj_vote,
                    SUM(cnt) AS total_mp_votes
                FROM vote_counts
                GROUP BY motion_id, party
            ),
            rice_per_motion AS (
                SELECT
                    mv.motion_id,
                    mv.party,
                    SUM(CASE WHEN vc.vote = mv.maj_vote THEN vc.cnt ELSE 0 END)
                        * 1.0 / mv.total_mp_votes AS rice
                FROM majority_vote mv
                JOIN vote_counts vc
                  ON mv.motion_id = vc.motion_id AND mv.party = vc.party
                GROUP BY mv.motion_id, mv.party, mv.total_mp_votes
            )
            SELECT
                party,
                COUNT(DISTINCT motion_id) AS n_motions,
                AVG(rice) AS discipline
            FROM rice_per_motion
            GROUP BY party
            ORDER BY discipline ASC
            """,
            [start_date, end_date],
        ).fetchdf()
        return result
    except Exception as exc:
        logger.warning("compute_party_discipline failed: %s", exc)
        return pd.DataFrame(columns=["party", "n_motions", "discipline"])
    finally:
        if conn is not None:
            try:
                conn.close()
            except Exception:
                pass


def _load_mp_vectors_by_party(db_path: str) -> Dict[str, List[np.ndarray]]:
    """Load individual MP SVD vectors grouped by party.

    Queries mp_metadata for the mp→party mapping (latest assignment during the
    current parliament), normalises party names, loads SVD vectors from the
    ``current_parliament`` window, and filters to CURRENT_PARLIAMENT_PARTIES.

    Returns:
        {party_name: [np.ndarray(50,), ...]}  — one array per MP.
    """
    con = duckdb.connect(database=db_path, read_only=True)
    try:
        # Build mp → party mapping.  ORDER BY van ASC so latest assignment wins
        # via last-write-wins when an MP switched party.
        meta_rows = con.execute(
            "SELECT mp_name, party FROM mp_metadata "
            "WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' "
            "ORDER BY van ASC"
        ).fetchall()
        mp_party: Dict[str, str] = {}
        for mp_name, party in meta_rows:
            if mp_name and party:
                mp_party[mp_name] = _PARTY_NORMALIZE.get(party, party)

        # Individual MP vectors from current_parliament
        rows = con.execute(
            "SELECT entity_id, vector FROM svd_vectors "
            "WHERE entity_type='mp' AND window_id='current_parliament'"
        ).fetchall()

        party_vecs: Dict[str, List[np.ndarray]] = {}
        for entity_id, raw_vec in rows:
            party = mp_party.get(entity_id)
            if party is None or party not in CURRENT_PARLIAMENT_PARTIES:
                continue
            if isinstance(raw_vec, str):
                vec = json.loads(raw_vec)
            elif isinstance(raw_vec, (bytes, bytearray)):
                vec = json.loads(raw_vec.decode())
            elif isinstance(raw_vec, list):
                vec = raw_vec
            else:
                try:
                    vec = list(raw_vec)
                except Exception:
                    continue
            fvec = np.array([float(v) if v is not None else 0.0 for v in vec])
            party_vecs.setdefault(party, []).append(fvec)

        return party_vecs
    finally:
        try:
            con.close()
        except Exception:
            pass


@st.cache_data(show_spinner="Partijposities op SVD-assen laden…")
def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
    """Return per-party SVD vectors, computed as mean of individual MP vectors.

    Loads individual MP rows from window='current_parliament', assigns each MP
    their party, then averages SVD vectors per party.

    Returns:
        {party_name: [float * k]}  — k = 50, mean over all MPs in that party.
    """
    try:
        party_vecs = _load_mp_vectors_by_party(db_path)
        return {
            party: np.array(vecs).mean(axis=0).tolist()
            for party, vecs in party_vecs.items()
        }
    except Exception:
        logger.exception("Failed to load party axis scores")
        return {}


@st.cache_data(show_spinner="Partij-MP vectoren laden…")
def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]:
    """Return per-party lists of individual MP SVD vectors.

    Same MP→party mapping as load_party_axis_scores(), suitable for bootstrap
    CI computation.

    Returns:
        {party_name: [np.ndarray(50,), ...]}  — one array per MP.
    """
    try:
        return _load_mp_vectors_by_party(db_path)
    except Exception:
        logger.exception("Failed to load party MP vectors")
        return {}


@st.cache_data(show_spinner="Bootstrap CI berekenen…")
def _cached_bootstrap_cis(
    party_mp_vectors: Dict[str, List[np.ndarray]],
) -> Dict[str, Dict]:
    """Thin caching wrapper around compute_party_bootstrap_cis."""
    from analysis.political_axis import compute_party_bootstrap_cis

    return compute_party_bootstrap_cis(party_mp_vectors)


@st.cache_data(show_spinner="Scree-plot laden…")
def load_scree_data(db_path: str) -> List[float]:
    """Return explained variance ratios (%) for all SVD components, sorted descending.

    Uses the same Procrustes-aligned multi-window matrix as the compass axes so the
    scree plot is consistent with what the compass actually uses.
    """
    try:
        from analysis.political_axis import compute_svd_spectrum

        return compute_svd_spectrum(db_path)
    except Exception:
        logger.exception("Failed to load scree data")
        return []


def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
    """Render a scree plot showing relative SVD component importance.

    Highlighted bars for the top-2 components (used in the compass); muted bars
    for the rest.  A cumulative-variance dashed line on the same y-axis helps
    spot the elbow.  A 50 % cumulative threshold line is drawn for reference.

    Args:
        importances: List of importance values sorted descending (from load_scree_data).
        n_show: How many components to display (default: first 15).
    """
    if not importances:
        return
    # importances are already EVR percentages summing to ~100 over all components.
    # Slice to n_show for display; cumulative line shows how much variance is covered.
    data = list(importances[:n_show])
    ranks = list(range(1, len(data) + 1))

    # Cumulative variance for the dashed overlay line
    cumsum = []
    running = 0.0
    for v in data:
        running += v
        cumsum.append(running)

    # Colour: first 2 bars highlighted (compass axes), rest muted
    n_highlight = 2
    bar_colours = [
        "#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data))
    ]

    fig = go.Figure()

    # Bars
    fig.add_trace(
        go.Bar(
            x=ranks,
            y=data,
            marker_color=bar_colours,
            hovertemplate="As %{x}<br><b>%{y:.1f}%</b> verklaarde variantie<extra></extra>",
            showlegend=False,
        )
    )

    # Cumulative variance line (dashed, warm amber)
    fig.add_trace(
        go.Scatter(
            x=ranks,
            y=cumsum,
            mode="lines+markers",
            line={"color": "#F57C00", "width": 2, "dash": "dot"},
            marker={"size": 5, "color": "#F57C00"},
            hovertemplate="As %{x}<br>Cumulatief: <b>%{y:.1f}%</b><extra></extra>",
            name="Cumulatief",
            showlegend=True,
        )
    )

    # 50 % reference line
    fig.add_hline(
        y=50,
        line_dash="dash",
        line_color="#BDBDBD",
        line_width=1,
        annotation_text="50%",
        annotation_position="right",
        annotation_font_color="#9E9E9E",
        annotation_font_size=11,
    )

    # Annotations on the top-2 bars showing their % value
    for i in range(min(n_highlight, len(data))):
        fig.add_annotation(
            x=ranks[i],
            y=data[i] + 0.3,
            text=f"{data[i]:.1f}%",
            showarrow=False,
            font={"size": 11, "color": "#1565C0"},
            yanchor="bottom",
        )

    fig.update_layout(
        height=280,
        margin={"l": 10, "r": 50, "t": 30, "b": 40},
        title={
            "text": "Belang per SVD-as",
            "font": {"size": 13, "color": "#555555"},
            "x": 0.02,
            "xanchor": "left",
        },
        legend={
            "orientation": "h",
            "x": 0.5,
            "xanchor": "center",
            "y": 1.08,
            "font": {"size": 11},
        },
        xaxis={
            "title": {"text": "As (rang)", "font": {"size": 11}},
            "tickmode": "linear",
            "tick0": 1,
            "dtick": 1,
            "showline": False,
            "showgrid": False,
        },
        yaxis={
            "title": {"text": "% van totale variantie", "font": {"size": 11}},
            "showline": False,
            "showgrid": True,
            "gridcolor": "#eeeeee",
            "ticksuffix": "%",
            "range": [0, max(cumsum) * 1.08],
        },
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        bargap=0.25,
    )
    st.plotly_chart(fig, use_container_width=True)


def _build_party_axis_figure(
    party_coords: Dict[str, Tuple[float, float]],
    comp_sel: int,
    theme: dict,
    bootstrap_data: Optional[Dict[str, Dict]] = None,
) -> Optional[go.Figure]:
    """Build a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.

    Accepts explicit per-party 2D coordinates (x,y) and uses the component selection to
    pick the value (comp_sel==1 -> x, comp_sel==2 -> y). This makes the API explicit and
    avoids indexing into long SVD vectors.

    Returns go.Figure or None if no data available.
    """
    if not party_coords:
        return None

    if comp_sel not in (1, 2):
        raise ValueError(
            "_build_party_axis_figure only supports comp_sel 1 or 2 when using explicit coords"
        )

    axis_idx = comp_sel - 1
    flip = theme.get("flip", False)

    parties = []
    scores = []
    colours = []

    # Support two shapes for party_coords:
    #  - explicit 2D coords: (x, y)
    #  - full SVD vectors (len>2) where we should pick the axis_idx element
    for party, val in party_coords.items():
        try:
            # explicit (x, y)
            if hasattr(val, "__len__") and len(val) == 2:
                x, y = val
                score = float(x if axis_idx == 0 else y)
            else:
                # treat as sequence/array-like of full SVD vector
                score = float(val[axis_idx])

            if flip:
                score = -score
        except Exception:
            # skip malformed entries silently
            continue

        parties.append(party)
        scores.append(score)
        colours.append(PARTY_COLOURS.get(party, "#9E9E9E"))

    if not scores:
        return None

    # Build hover text: include N when bootstrap data available
    hover = []
    symbols = []
    if bootstrap_data:
        for p, s in zip(parties, scores):
            bd = bootstrap_data.get(p)
            if bd:
                n_mps = bd.get("n_mps", "?")
                ci_low = None
                ci_high = None
                try:
                    ci_low = float(bd["ci_lower"][axis_idx])
                    ci_high = float(bd["ci_upper"][axis_idx])
                except Exception:
                    pass
                if ci_low is not None and ci_high is not None:
                    hover.append(
                        f"{p}: {s:.3f} (N={n_mps}, 95%-BI: [{ci_low:.3f}, {ci_high:.3f}])"
                    )
                else:
                    hover.append(f"{p}: {s:.3f} (N={n_mps})")
                symbols.append("diamond" if n_mps == 1 else "circle")
            else:
                hover.append(f"{p}: {s:.3f}")
                symbols.append("circle")
        marker_kwargs = {"size": 14, "color": colours, "symbol": symbols}
    else:
        hover = [f"{p}: {s:.3f}" for p, s in zip(parties, scores)]
        marker_kwargs = {"size": 14, "color": colours}

    fig = go.Figure()
    x_min, x_max = min(scores) * 1.15, max(scores) * 1.15
    if x_min == x_max:
        x_min, x_max = x_min - 1, x_max + 1
    fig.add_trace(
        go.Scatter(
            x=[x_min, x_max],
            y=[0, 0],
            mode="lines",
            line={"color": "#cccccc", "width": 1},
            hoverinfo="skip",
            showlegend=False,
        )
    )

    scatter_kwargs = {
        "x": scores,
        "y": [0] * len(scores),
        "mode": "markers+text",
        "text": parties,
        "textposition": "top center",
        "marker": marker_kwargs,
        "hovertext": hover,
        "hoverinfo": "text",
        "showlegend": False,
    }
    fig.add_trace(go.Scatter(**scatter_kwargs))

    pos_pole = theme.get("positive_pole", "")
    neg_pole = theme.get("negative_pole", "")
    left_label = pos_pole if flip else neg_pole
    right_label = neg_pole if flip else pos_pole

    fig.update_layout(
        height=160,
        margin={"l": 10, "r": 10, "t": 10, "b": 30},
        xaxis={
            "title": f"← {left_label}  |  {right_label} →",
            "showticklabels": False,
            "showline": False,
            "showgrid": False,
            "zeroline": False,
        },
        yaxis={"visible": False, "range": [-1, 2]},
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
    )
    return fig


def _render_party_axis_chart(
    party_coords: Dict[str, Tuple[float, float]],
    comp_sel: int,
    theme: dict,
    bootstrap_data: Optional[Dict[str, Dict]] = None,
) -> None:
    """Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.

    Expects explicit per-party coords mapping (party -> (x,y)) for components 1 & 2.
    """
    fig = _build_party_axis_figure(party_coords, comp_sel, theme, bootstrap_data)
    if fig is None:
        st.caption("_Partijdata niet beschikbaar voor deze as._")
        return
    st.plotly_chart(fig, use_container_width=True)


@st.cache_data(show_spinner="Moties laden…")
def load_motions_df(db_path: str) -> pd.DataFrame:
    """Load the full motions table as a pandas DataFrame (read-only)."""
    con = duckdb.connect(database=db_path, read_only=True)
    try:
        df = con.execute(
            """
            SELECT id, title, description, date, policy_area,
                   voting_results, layman_explanation,
                   winning_margin, controversy_score, url
            FROM motions
            """
        ).fetchdf()
        df["date"] = pd.to_datetime(df["date"], errors="coerce")
        df["year"] = df["date"].dt.year
        return df
    except Exception:
        logger.exception("Failed to load motions")
        return pd.DataFrame()
    finally:
        con.close()


def query_similar(
    db_path: str,
    source_motion_id: int,
    vector_type: str = "fused",
    top_k: int = 10,
) -> pd.DataFrame:
    """Return top-k similar motions from similarity_cache (read-only)."""
    con = duckdb.connect(database=db_path, read_only=True)
    try:
        rows = con.execute(
            """
            SELECT sc.target_motion_id, sc.score, sc.window_id,
                   m.title, m.date, m.policy_area
            FROM similarity_cache sc
            JOIN motions m ON m.id = sc.target_motion_id
            WHERE sc.source_motion_id = ?
              AND sc.vector_type = ?
            ORDER BY sc.score DESC
            LIMIT ?
            """,
            [source_motion_id, vector_type, top_k],
        ).fetchdf()
        return rows
    except Exception:
        logger.exception(
            "Failed to query similarity cache for motion %s", source_motion_id
        )
        return pd.DataFrame()
    finally:
        con.close()


# ---------------------------------------------------------------------------
# Shared rendering helpers
# ---------------------------------------------------------------------------


def _render_voting_results(voting_results_json) -> None:
    """Render a voting_results JSON blob as a grouped voor/tegen/onthouden table.

    The JSON is stored as {party_or_mp: vote} where vote is one of
    'voor', 'tegen', 'onthouden', 'afwezig'. We group by vote for readability.
    """
    if not voting_results_json:
        return
    try:
        vdata = (
            json.loads(voting_results_json)
            if isinstance(voting_results_json, str)
            else voting_results_json
        )
        if not isinstance(vdata, dict) or not vdata:
            return
        # Group {vote: [actor, ...]}
        by_vote: Dict[str, List[str]] = {}
        for actor, vote in vdata.items():
            vote_str = str(vote).lower().strip()
            by_vote.setdefault(vote_str, []).append(str(actor))
        # Render in fixed order
        vote_order = ["voor", "tegen", "onthouden", "afwezig"]
        vote_emoji = {"voor": "✅", "tegen": "❌", "onthouden": "🟡", "afwezig": "⬜"}
        rows_shown = False
        for v in vote_order + [k for k in by_vote if k not in vote_order]:
            actors = by_vote.get(v)
            if not actors:
                continue
            emoji = vote_emoji.get(v, "▪️")
            st.markdown(
                f"**{emoji} {v.capitalize()}** ({len(actors)}): {', '.join(sorted(actors))}"
            )
            rows_shown = True
        if not rows_shown:
            st.caption("_Geen stemuitslag beschikbaar_")
    except Exception:
        pass


# ---------------------------------------------------------------------------
# Tab 1: Politiek Kompas
# ---------------------------------------------------------------------------


def _add_y_direction_annotations(fig: go.Figure) -> None:
    """Add ▲ Progressief / ▼ Conservatief labels above and below the Y axis."""
    common = dict(
        xref="paper",
        yref="paper",
        x=-0.07,
        showarrow=False,
        font=dict(size=11, color="#666666"),
    )
    fig.add_annotation(**common, y=1.02, text="▲ Progressief", xanchor="center")
    fig.add_annotation(**common, y=-0.06, text="▼ Conservatief", xanchor="center")


def _window_to_dates(window_id: str) -> tuple[str, str]:
    """Return (start_date, end_date) ISO strings for a given window_id.

    Annual windows like '2024' → ('2024-01-01', '2024-12-31').
    'current_parliament' → ('2023-11-22', '2099-12-31') (2023 formation date, open end).
    Unknown formats → ('2000-01-01', '2099-12-31') (effectively all time).
    """
    if window_id == "current_parliament":
        return ("2023-11-22", "2099-12-31")
    if re.fullmatch(r"\d{4}", window_id):
        return (f"{window_id}-01-01", f"{window_id}-12-31")
    m = re.fullmatch(r"(\d{4})-Q([1-4])", window_id)
    if m:
        year, q = int(m.group(1)), int(m.group(2))
        starts = {1: "01-01", 2: "04-01", 3: "07-01", 4: "10-01"}
        ends = {1: "03-31", 2: "06-30", 3: "09-30", 4: "12-31"}
        return (f"{year}-{starts[q]}", f"{year}-{ends[q]}")
    return ("2000-01-01", "2099-12-31")


def build_compass_tab(db_path: str, window_size: str) -> None:
    st.subheader("Politiek Kompas")
    st.markdown(
        "2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
    )

    # Compass always uses annual windows regardless of the sidebar window_size setting.
    positions_by_window, axis_def = load_positions(db_path, "annual")
    # load_positions may return None for axis_def when resources are missing
    # (e.g. classifier fallback or failed enrichment). Guard so UI rendering
    # code doesn't crash on axis_def.get calls.
    if axis_def is None:
        axis_def = {}
    if not positions_by_window:
        st.warning(
            "Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
        )
        return

    party_map = load_party_map(db_path)
    active_mps = load_active_mps(db_path)

    # Sort windows: year windows first (ascending), current_parliament last.
    year_windows = sorted(w for w in positions_by_window if w != "current_parliament")
    has_current = "current_parliament" in positions_by_window
    windows = year_windows + (["current_parliament"] if has_current else [])

    # Motion counts per year — sparse years get a warning label.
    _SPARSE_YEARS = {"2016", "2017", "2018"}
    _THRESHOLD = 0.65

    def _window_label(w: str) -> str:
        if w == "current_parliament":
            return "Huidig parlement"
        if w in _SPARSE_YEARS:
            return f"{w} ⚠️"
        return w

    col1, col2 = st.columns([3, 1])
    with col2:
        window_idx = st.selectbox(
            "Jaar",
            options=windows,
            index=len(windows) - 1,  # default: current_parliament
            format_func=_window_label,
        )
        level = st.radio(
            "Weergave",
            options=["Kamerleden", "Partijen"],
            index=0,
            horizontal=True,
        )
        min_mps = st.number_input(
            "Min. Kamerleden per partij",
            min_value=1,
            max_value=20,
            value=3,
            step=1,
            help="Partijen met minder dan dit aantal zetels worden niet weergegeven.",
        )

    pos = positions_by_window.get(window_idx, {})
    if not pos:
        st.info(f"Geen data voor venster {window_idx}")
        return

    # For current_parliament, restrict to MPs who are still seated (tot_en_met IS NULL).
    # Historical windows include all MPs active at the time — no restriction needed.
    if window_idx == "current_parliament":
        pos = {mp: xy for mp, xy in pos.items() if mp in active_mps}

    # Deduplicate MPs whose names appear both with and without a parenthetical first name,
    # e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and
    # average positions if both variants are present.
    def _strip_paren(name: str) -> str:
        return re.sub(r"\s*\([^)]*\)", "", name).strip()

    deduped: Dict[str, Tuple[float, float]] = {}
    for name, (x, y) in pos.items():
        base = _strip_paren(name)
        if base in deduped:
            ox, oy = deduped[base]
            deduped[base] = ((ox + x) / 2, (oy + y) / 2)
        else:
            deduped[base] = (x, y)
    pos = deduped

    rows = []
    for name, (x, y) in pos.items():
        party = party_map.get(name) or party_map.get(_strip_paren(name), "Unknown")
        rows.append({"name": name, "x": x, "y": y, "party": party})

    df_pos = pd.DataFrame(rows)

    # Drop parties below the minimum MP threshold (unreliable centroids).
    party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts()
    valid_parties = set(party_counts[party_counts >= min_mps].index)
    df_pos = df_pos[df_pos["party"].isin(valid_parties)]

    if df_pos.empty:
        st.info("Geen partijen met genoeg Kamerleden voor dit venster.")
        return

    # The first two SVD axes are clear, interpretable axes for our dataset.
    # Show the classifier-provided full labels on the compass unconditionally
    # so users see the canonical interpretation. We keep the confidence-based
    # captions/interpretations in the expander but do not hide the axis titles
    # for the compass. Note: the vertical axis title is rotated by Plotly —
    # this can make "Progressief–Conservatief" look reversed because the word
    # "Progressief" appears at the top when rendered; we therefore add explicit
    # directional annotations to make the polarity unambiguous.
    # Prefer classifier-provided labels for the first two axes. However, the
    # classifier sometimes returns the concise numeric fallbacks "As 1"/"As 2"
    # when it couldn't find an interpretable label. For the compass we prefer
    # conventional semantic defaults instead of the generic "As N" strings so
    # the chart remains readable.
    _raw_x = axis_def.get("x_label")
    _raw_y = axis_def.get("y_label")

    # Use the classifier helper to map internal/modal labels (e.g. "As 1") to
    # user-facing labels. Import at function-time to avoid module import cycles
    # and keep explorer lightweight. If the helper is unavailable fall back to
    # conventional semantic defaults so the UI remains readable.
    try:
        from analysis.axis_classifier import display_label_for_modal

        _x_label = display_label_for_modal(_raw_x, "x")
        _y_label = display_label_for_modal(_raw_y, "y")
    except Exception:
        _x_label = _raw_x or "Links\u2013Rechts"
        _y_label = _raw_y or "Progressief\u2013Conservatief"

    if level == "Partijen":
        # Aggregate to party centroids
        df_party = df_pos.groupby("party", as_index=False).agg(
            x=("x", "mean"), y=("y", "mean"), n=("name", "count")
        )
        df_party["name"] = df_party["party"]
        colour_map = {
            p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_party["party"].unique()
        }
        fig = px.scatter(
            df_party,
            x="x",
            y="y",
            color="party",
            text="party",
            hover_name="party",
            hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True},
            color_discrete_map=colour_map,
            title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",
            labels={
                "x": _x_label,
                "y": _y_label,
                "n": "Kamerleden",
            },
        )
        fig.update_traces(textposition="top center", marker_size=14)
    else:
        colour_map = {
            p: PARTY_COLOURS.get(p, "#9E9E9E") for p in df_pos["party"].unique()
        }
        fig = px.scatter(
            df_pos,
            x="x",
            y="y",
            color="party",
            hover_name="name",
            hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
            color_discrete_map=colour_map,
            title=f"Politiek Kompas — {_window_label(window_idx)}",
            labels={"x": _x_label, "y": _y_label},
        )

    fig.update_layout(
        height=600,
        legend_title_text="Partij",
        xaxis={"range": [-1, 1]},
        yaxis={"range": [-0.6, 0.6]},
    )
    _add_y_direction_annotations(fig)

    with col1:
        st.plotly_chart(fig, use_container_width=True)
        _x_interp = axis_def.get("x_interpretation", {}).get(window_idx, "")
        _y_interp = axis_def.get("y_interpretation", {}).get(window_idx, "")
        if (
            _x_interp
            and axis_def.get("x_quality", {}).get(window_idx, 1.0) < _THRESHOLD
        ):
            st.caption(_x_interp)
        if (
            _y_interp
            and axis_def.get("y_quality", {}).get(window_idx, 1.0) < _THRESHOLD
        ):
            st.caption(_y_interp)

        # Motion expander — show which motions define each axis for this window
        x_top = axis_def.get("x_top_motions", {}).get(window_idx, {})
        y_top = axis_def.get("y_top_motions", {}).get(window_idx, {})
        x_conf = axis_def.get("x_label_confidence", {}).get(window_idx)
        y_conf = axis_def.get("y_label_confidence", {}).get(window_idx)
        evr = axis_def.get("explained_variance_ratio", [None, None])
        evr0 = evr[0] if evr else None

        _has_motion_data = bool(
            x_top.get("+") or x_top.get("-") or y_top.get("+") or y_top.get("-")
        )
        if _has_motion_data:
            with st.expander("🔍 Wat bepaalt deze assen?"):
                x_conf_pct = (
                    f" (vertrouwen: {x_conf:.0%})" if x_conf is not None else ""
                )
                y_conf_pct = (
                    f" (vertrouwen: {y_conf:.0%})" if y_conf is not None else ""
                )

                _render_axis_motions(f"Horizontale as: {_x_label}", x_conf_pct, x_top)
                _render_axis_motions(f"Verticale as: {_y_label}", y_conf_pct, y_top)

                if evr0 is not None:
                    st.caption(
                        f"De sterkste component verklaart {evr0:.1%} van de variantie in stemgedrag."
                    )

    # --- Voting discipline section ---
    _MIN_MOTIONS_FOR_DISCIPLINE = 5
    start_date, end_date = _window_to_dates(window_idx)
    disc_df = compute_party_discipline(db_path, start_date, end_date)

    st.subheader("Stemgedrag cohesie")
    if disc_df.empty:
        st.caption(
            "Te weinig hoofdelijke stemmingen in dit venster voor een cohesieanalyse."
        )
    else:
        disc_df = disc_df[disc_df["n_motions"] >= _MIN_MOTIONS_FOR_DISCIPLINE].copy()
        if disc_df.empty:
            st.caption(
                "Te weinig hoofdelijke stemmingen in dit venster voor een cohesieanalyse."
            )
        else:
            compass_parties = set(df_pos["party"].unique())
            disc_df = disc_df[disc_df["party"].isin(compass_parties)].copy()
            if disc_df.empty:
                st.caption("Geen overlappende partijen tussen kompas en stemmingsdata.")
            else:
                disc_df["discipline_pct"] = (disc_df["discipline"] * 100).round(1)
                disc_df["party_label"] = disc_df.apply(
                    lambda r: f"{r['party']} ({int(r['n_motions'])} moties)", axis=1
                )

                bar_fig = px.bar(
                    disc_df.sort_values("discipline"),
                    x="discipline_pct",
                    y="party_label",
                    orientation="h",
                    color="discipline_pct",
                    color_continuous_scale="RdYlGn",
                    range_color=[80, 100],
                    labels={"discipline_pct": "Cohesie (%)", "party_label": "Partij"},
                    title="Cohesie bij hoofdelijke stemmingen",
                )
                bar_fig.update_layout(
                    height=max(300, len(disc_df) * 35 + 80),
                    showlegend=False,
                    coloraxis_showscale=False,
                    yaxis_title="",
                )
                st.plotly_chart(bar_fig, use_container_width=True)

                top3 = disc_df.nlargest(3, "discipline")[
                    ["party", "discipline_pct", "n_motions"]
                ]
                bot3 = disc_df.nsmallest(3, "discipline")[
                    ["party", "discipline_pct", "n_motions"]
                ]
                col_a, col_b = st.columns(2)
                with col_a:
                    st.markdown("**Meest eensgezind**")
                    st.dataframe(
                        top3.rename(
                            columns={
                                "party": "Partij",
                                "discipline_pct": "Cohesie (%)",
                                "n_motions": "Moties",
                            }
                        ),
                        hide_index=True,
                        use_container_width=True,
                    )
                with col_b:
                    st.markdown("**Meest verdeeld**")
                    st.dataframe(
                        bot3.rename(
                            columns={
                                "party": "Partij",
                                "discipline_pct": "Cohesie (%)",
                                "n_motions": "Moties",
                            }
                        ),
                        hide_index=True,
                        use_container_width=True,
                    )


# ---------------------------------------------------------------------------
# Tab 2: Partij Trajectories
# ---------------------------------------------------------------------------


def build_trajectories_tab(db_path: str, window_size: str) -> None:
    st.subheader("Partij Trajectories")
    st.markdown("Hoe bewegen partijen over de tijdsvensters heen?")

    positions_by_window, axis_def = load_positions(db_path, window_size)
    if axis_def is None:
        axis_def = {}
    if not positions_by_window:
        # Instrumentation: record why trajectories tab aborted early
        try:
            _last_trajectories_diagnostics.update(
                {
                    "stage": "load_positions_empty",
                    "positions_by_window_len": len(positions_by_window),
                }
            )
        except Exception:
            pass
        try:
            st.warning("Geen positiedata beschikbaar.")
        except Exception:
            pass
        # If debug enabled, show diagnostics in UI (best-effort)
        try:
            if get_debug_trajectories_enabled():
                try:
                    st.text_area(
                        "Trajectories diagnostics",
                        json.dumps(_last_trajectories_diagnostics, default=str),
                        height=160,
                    )
                except Exception:
                    pass
        except Exception:
            pass
        return

    party_map = load_party_map(db_path)
    windows = sorted(positions_by_window.keys())

    # Compute party centroids per window
    centroids: Dict[str, Dict[str, Tuple[float, float]]] = {}
    all_parties: set = set()

    # Helper to normalise MP names (strip parenthetical first names) to match
    # entries in the party_map. This mirrors the behaviour used in the compass
    # tab so both tabs resolve parties the same way.
    def _strip_paren(name: str) -> str:
        return re.sub(r"\s*\([^)]*\)", "", name).strip()

    for wid in windows:
        pos = positions_by_window.get(wid, {})
        per_party: Dict[str, List[Tuple[float, float]]] = {}
        for mp_name, (x, y) in pos.items():
            # Try exact match first, then stripped-name match to handle
            # variants like "Dijk, J.P. (Jimmy)" -> "Dijk, J.P." used in mp_metadata
            party = party_map.get(mp_name) or party_map.get(
                _strip_paren(mp_name), "Unknown"
            )
            if party == "Unknown":
                continue
            per_party.setdefault(party, []).append((x, y))
        for party, coords in per_party.items():
            all_parties.add(party)
            xs = [c[0] for c in coords]
            ys = [c[1] for c in coords]
            centroids.setdefault(party, {})[wid] = (
                float(np.mean(xs)),
                float(np.mean(ys)),
            )

    all_parties_sorted = sorted(all_parties)

    # If no parties were found after mapping MPs to parties, show a helpful
    # message instead of rendering an empty chart. This commonly happens when
    # the party map failed to load (DB error) or the min_mps threshold filtered
    # out all parties.
    if not all_parties_sorted:
        st.info(
            "Geen partijen beschikbaar om trajecten te tekenen. Controleer of de party mapping is geladen (mp_metadata) en of de minimum Kamerleden-instelling te hoog staat."
        )
        try:
            st.caption(f"Bekende partijen in party_map: {len(party_map)}")
        except Exception:
            pass
        # Do not return here: allow per-MP fallback plotting below when no
        # party-level centroids are available so the user still sees trajectories.

    # Default: show CDA, D66, VVD — the three parties that span the political centre
    default_parties = [p for p in ["CDA", "D66", "VVD"] if p in all_parties]
    if not default_parties:
        default_parties = [p for p in KNOWN_MAJOR_PARTIES if p in all_parties]
    if not default_parties:
        default_parties = all_parties_sorted[:6]

    selected_parties = st.multiselect(
        "Selecteer partijen",
        options=all_parties_sorted,
        default=default_parties,
    )

    # Ensure EMA smoothing helper is available for per-MP fallback plotting which
    # appears earlier in the function. Define here so calls above won't fail.
    def _ema_smooth(values: List[float], alpha: float) -> List[float]:
        if not values or alpha >= 1.0:
            return values
        smoothed = [values[0]]
        for v in values[1:]:
            smoothed.append(alpha * v + (1 - alpha) * smoothed[-1])
        return smoothed

    # default smoothing alpha used for inline per-MP plotting; may be overridden
    # by the smoothing controls shown later in the UI.
    smooth_alpha = 0.35

    # If no party-level centroids were computed, fall back to per-MP trajectories
    # so the user still sees a plot even when the party_map is missing or empty.
    if not centroids:
        # Build per-MP time series from positions_by_window
        mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
        for wid in windows:
            pos = positions_by_window.get(wid, {})
            for mp_name, xy in pos.items():
                # Defensive conversion: skip malformed coordinates instead of raising
                try:
                    x, y = float(xy[0]), float(xy[1])
                except Exception:
                    # skip malformed entries silently (diagnostics will show counts)
                    continue
                mp_positions.setdefault(mp_name, {})[wid] = (x, y)

        if not mp_positions:
            try:
                _last_trajectories_diagnostics.update(
                    {
                        "stage": "no_mp_positions",
                        "mp_positions_count": len(mp_positions),
                    }
                )
            except Exception:
                pass
            try:
                st.info("Geen positiedata beschikbaar voor trajectplotten.")
            except Exception:
                pass
            # show diagnostics when debug enabled
            try:
                if get_debug_trajectories_enabled():
                    try:
                        st.text_area(
                            "Trajectories diagnostics",
                            json.dumps(_last_trajectories_diagnostics, default=str),
                            height=160,
                        )
                    except Exception:
                        pass
            except Exception:
                pass
            return

        mp_list = sorted(mp_positions.keys())
        default_mps = mp_list[:6]
        selected_mps = st.multiselect(
            "Selecteer Kamerleden (fallback)", options=mp_list, default=default_mps
        )

        # Plot per-MP trajectories
        fig = go.Figure()
        trace_count = 0
        for mp in selected_mps:
            wids_sorted = sorted(mp_positions[mp].keys())
            xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
            ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
            xs = _ema_smooth(xs_raw, smooth_alpha)
            ys = _ema_smooth(ys_raw, smooth_alpha)
            custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
            fig.add_trace(
                go.Scatter(
                    x=xs,
                    y=ys,
                    mode="lines+markers",
                    name=mp,
                    text=wids_sorted,
                    customdata=custom_raw,
                    line=dict(color="#888888", shape="spline", smoothing=1.3),
                    marker=dict(color="#888888", size=6),
                    hovertemplate=(
                        f"<b>{mp}</b><br>"
                        "venster: %{text}<br>"
                        "x (smoothed): %{x:.3f}<br>"
                        "x (raw): %{customdata[0]:.3f}<br>"
                        "y (smoothed): %{y:.3f}<br>"
                        "y (raw): %{customdata[1]:.3f}<extra></extra>"
                    ),
                )
            )
            trace_count += 1

        _add_y_direction_annotations(fig)
        if trace_count == 0:
            st.info(
                "Geen trajecten getekend: geen geselecteerde Kamerleden met voldoende data."
            )
        else:
            st.plotly_chart(fig, use_container_width=True)
        return

    # Developer override: if EXPLORER_FORCE_SHOW_TRAJECTORIES=1 in the
    # environment, bypass party filtering and show the first MPs' trajectories
    # directly (helps diagnose production environments where party mapping
    # or filtering prevents any traces from appearing). This is safe to keep
    # in main because it only triggers when explicitly enabled.
    if os.getenv("EXPLORER_FORCE_SHOW_TRAJECTORIES") in ("1", "true", "True"):
        # Build per-MP time series from positions_by_window and plot first 6 MPs
        mp_positions: Dict[str, Dict[str, Tuple[float, float]]] = {}
        for wid in windows:
            pos = positions_by_window.get(wid, {})
            for mp_name, (x, y) in pos.items():
                mp_positions.setdefault(mp_name, {})[wid] = (float(x), float(y))

        mp_list = sorted(mp_positions.keys())
        if not mp_list:
            st.info("Geen MP-positiegegevens beschikbaar om te tonen.")
            return

        sample_mps = mp_list[:6]
        fig = go.Figure()
        for mp in sample_mps:
            wids_sorted = sorted(mp_positions[mp].keys())
            xs_raw = [mp_positions[mp][w][0] for w in wids_sorted]
            ys_raw = [mp_positions[mp][w][1] for w in wids_sorted]
            xs = _ema_smooth(xs_raw, 0.35)
            ys = _ema_smooth(ys_raw, 0.35)
            custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
            fig.add_trace(
                go.Scatter(
                    x=xs,
                    y=ys,
                    mode="lines+markers",
                    name=mp,
                    text=wids_sorted,
                    customdata=custom_raw,
                    line=dict(color="#444444", shape="spline", smoothing=1.3),
                    marker=dict(color="#444444", size=6),
                    hovertemplate=(
                        f"<b>{mp}</b><br>"
                        "venster: %{text}<br>"
                        "x (smoothed): %{x:.3f}<br>"
                        "x (raw): %{customdata[0]:.3f}<br>"
                        "y (smoothed): %{y:.3f}<br>"
                        "y (raw): %{customdata[1]:.3f}<extra></extra>"
                    ),
                )
            )
        _add_y_direction_annotations(fig)
        st.plotly_chart(fig, use_container_width=True)
        return

    # Debug expander: show data used to build trajectories so we can diagnose
    # why no traces are appearing. Leave this collapsed by default in normal
    # runs; when troubleshooting it will show counts and small samples.
    try:
        # Add a little opt-in checkbox in the UI to enable debug diagnostic output
        debug_checkbox = False
        try:
            debug_checkbox = st.checkbox(
                "Enable trajectories diagnostics (show extra info)",
                value=get_debug_trajectories_enabled(),
            )
        except Exception:
            debug_checkbox = get_debug_trajectories_enabled()
        if debug_checkbox:
            try:
                with st.expander(
                    "DEBUG: Trajectories data (showing diagnostics)", expanded=False
                ):
                    st.write("windows (count):", len(windows))
                    st.write("windows sample:", windows[:10])
                    st.write("party_map entries:", len(party_map))
                    st.write("parties with centroids:", len(all_parties_sorted))
                    st.write("default_parties:", default_parties)
                    st.write("selected_parties:", selected_parties)
                    st.write("min_mps setting:", min_mps)
                    # sample centroid counts per party
                    sample = {
                        p: len(centroids.get(p, {}))
                        for p in list(all_parties_sorted)[:8]
                    }
                    st.write("sample centroid window counts per party:", sample)
            except Exception:
                pass
    except Exception:
        # Don't crash UI if st isn't available or expander fails
        pass

    # Smoothing controls
    smoothing_method = st.selectbox(
        "Smoothing methode",
        options=["EMA", "Spline", "None"],
        index=0,
        help="EMA = exponential moving average; Spline = low-degree polynomial spline fit; None = raw centroids",
    )

    # EMA alpha only shown/used when EMA is selected
    smooth_alpha = 1.0
    if smoothing_method == "EMA":
        smooth_alpha = st.slider(
            "Glad maken (EMA-\u03b1)",
            min_value=0.1,
            max_value=1.0,
            value=0.35,
            step=0.05,
            help=(
                "\u03b1=1.0 toont de ruwe data; lagere waarden maken de lijn gladder. "
                "Standaard 0.35 voor een goed evenwicht tussen detail en ruis."
            ),
        )

    def _ema_smooth(values: List[float], alpha: float) -> List[float]:
        """Apply exponential moving average; alpha=1.0 means no smoothing."""
        if not values or alpha >= 1.0:
            return values
        smoothed = [values[0]]
        for v in values[1:]:
            smoothed.append(alpha * v + (1 - alpha) * smoothed[-1])
        return smoothed

    def _spline_smooth(values: List[float]) -> List[float]:
        """Perform a basic low-degree polynomial fit over index -> value and evaluate at indices.

        This provides a simple spline-like smoothing without adding scipy as a dependency.
        For very small N this returns the raw values.
        """
        n = len(values)
        if n <= 2:
            return values
        deg = min(3, n - 1)
        try:
            idx = np.arange(n, dtype=float)
            coeffs = np.polyfit(idx, np.array(values, dtype=float), deg=deg)
            smooth = np.polyval(coeffs, idx)
            return [float(v) for v in smooth]
        except Exception:
            return values

    fig = go.Figure()
    trace_count = 0
    # New: delegate plotting selection to helper for testability
    # Note: select_trajectory_plot_data returns (fig, trace_count, banner_text)
    try:
        fig2, trace_count2, banner_text = select_trajectory_plot_data(
            positions_by_window, party_map, windows, selected_parties, smooth_alpha
        )
        # If helper returned a figure, replace
        if fig2 is not None:
            fig = fig2
            trace_count = trace_count2
            if banner_text:
                try:
                    st.caption(banner_text)
                except Exception:
                    pass
                try:
                    _last_trajectories_diagnostics.update({"banner_text": banner_text})
                except Exception:
                    pass
    except Exception as e:
        tb = traceback.format_exc()
        # attach diagnostics to the helper and module
        try:
            select_trajectory_plot_data._last_diagnostics = {"exception": tb}
        except Exception:
            pass
        try:
            _last_trajectories_diagnostics.update(
                {"stage": "select_helper_exception", "exception": tb}
            )
        except Exception:
            pass
        logger.exception("select_trajectory_plot_data failed")
        debug_enabled = get_debug_trajectories_enabled()
        if debug_enabled:
            try:
                st.text_area("select_trajectory_plot_data traceback", tb, height=240)
            except Exception:
                pass
    for party in selected_parties:
        if party not in centroids:
            continue
        wids_sorted = sorted(centroids[party].keys())
        xs_raw = [centroids[party][w][0] for w in wids_sorted]
        ys_raw = [centroids[party][w][1] for w in wids_sorted]
        xs = _ema_smooth(xs_raw, smooth_alpha)
        ys = _ema_smooth(ys_raw, smooth_alpha)
        # Preserve raw (unsmoothed) values per-point so hover can show both raw and smoothed
        custom_raw = [(float(rx), float(ry)) for rx, ry in zip(xs_raw, ys_raw)]
        colour = PARTY_COLOURS.get(party, "#9E9E9E")
        fig.add_trace(
            go.Scatter(
                x=xs,
                y=ys,
                mode="lines+markers",
                name=party,
                text=wids_sorted,  # full window ID for hover
                customdata=custom_raw,
                line=dict(color=colour, shape="spline", smoothing=1.3),
                marker=dict(color=colour, size=8),
                hovertemplate=(
                    f"<b>{party}</b><br>"
                    "venster: %{text}<br>"
                    "x (smoothed): %{x:.3f}<br>"
                    "x (raw): %{customdata[0]:.3f}<br>"
                    "y (smoothed): %{y:.3f}<br>"
                    "y (raw): %{customdata[1]:.3f}<extra></extra>"
                ),
            )
        )
        trace_count += 1

    # For trajectories, the chart spans multiple windows. Use the classifier's
    # per-window confidences aggregated (mean) to decide whether to use the
    # classifier label or fall back to the conventional short label.
    _THRESHOLD = 0.65
    x_conf_map = axis_def.get("x_label_confidence", {}) or {}
    y_conf_map = axis_def.get("y_label_confidence", {}) or {}

    def _mean_conf(m: dict) -> Optional[float]:
        vals = [v for v in m.values() if v is not None]
        if not vals:
            return None
        return float(sum(vals) / len(vals))

    x_mean = _mean_conf(x_conf_map)
    y_mean = _mean_conf(y_conf_map)


def choose_trajectory_title(axis_def: dict, axis: str, threshold: float = 0.65) -> str:
    """Choose a short trajectory axis title based on aggregated confidence.

    axis: 'x' or 'y'. Returns axis_def label when its mean confidence >= threshold,
    otherwise returns the compact fallback 'As 1' / 'As 2'. Matches previous logic.
    """
    _TH = threshold
    conf_map = axis_def.get(f"{axis}_label_confidence", {}) or {}
    vals = [v for v in conf_map.values() if v is not None]
    mean = float(sum(vals) / len(vals)) if vals else None
    label = axis_def.get(f"{axis}_label")
    if mean is not None and mean >= _TH and label:
        return label
    # Prefer the user-facing semantic fallback via the classifier helper
    try:
        from analysis.axis_classifier import display_label_for_modal

        fallback_modal = "As 1" if axis == "x" else "As 2"
        return display_label_for_modal(fallback_modal, axis)
    except Exception:
        return "As 1" if axis == "x" else "As 2"

    x_title = choose_trajectory_title(axis_def, "x", threshold=_THRESHOLD)
    y_title = choose_trajectory_title(axis_def, "y", threshold=_THRESHOLD)

    fig.update_layout(
        title="Partij trajectories",
        xaxis_title=x_title,
        yaxis_title=y_title,
        height=600,
        legend_title_text="Partij",
    )
    _add_y_direction_annotations(fig)
    # If no traces were added to the figure, show a diagnostic message so the
    # user knows why the plot is empty.
    try:
        _last_trajectories_diagnostics.update({"trace_count": trace_count})
    except Exception:
        pass
    debug_enabled = get_debug_trajectories_enabled()
    if trace_count == 0:
        try:
            st.info(
                "Geen trajecten getekend: geen geselecteerde partijen met voldoende data. Controleer de partijselectie en de 'Min. Kamerleden per partij' instelling."
            )
        except Exception:
            pass
        if debug_enabled:
            try:
                st.text_area(
                    "Trajectories diagnostics",
                    json.dumps(_last_trajectories_diagnostics, default=str),
                    height=240,
                )
            except Exception:
                try:
                    st.json(_last_trajectories_diagnostics)
                except Exception:
                    pass
    else:
        try:
            st.plotly_chart(fig, use_container_width=True)
        except Exception as e:
            st.error(f"Trajectories rendering failed: {e}")
            # Always show diagnostics when rendering fails, regardless of trace_count
            if get_debug_trajectories_enabled():
                try:
                    st.json(_last_trajectories_diagnostics)
                except Exception:
                    st.text_area(
                        "Trajectories diagnostics (JSON failed)",
                        json.dumps(_last_trajectories_diagnostics, default=str),
                        height=240,
                    )


# ---------------------------------------------------------------------------
# Tab 3: Motie Zoeken
# ---------------------------------------------------------------------------


def build_search_tab(db_path: str, show_rejected: bool) -> None:
    st.subheader("Motie Zoeken")

    df = load_motions_df(db_path)
    if df.empty:
        st.warning("Geen moties beschikbaar.")
        return

    if not show_rejected:
        df = df[df["title"].fillna("").str.strip() != "Verworpen."]

    # Controls
    col1, col2, col3 = st.columns([2, 1, 1])
    with col1:
        query = st.text_input(
            "Zoek op titel", placeholder="bijv. stikstof, klimaat, wonen"
        )
    with col2:
        years = sorted(df["year"].dropna().astype(int).unique().tolist())
        if years:
            year_range = st.select_slider(
                "Jaar", options=years, value=(years[0], years[-1])
            )
        else:
            year_range = (2019, 2024)
    with col3:
        min_controversy = st.slider(
            "Min. controverse", min_value=0.0, max_value=1.0, value=0.0, step=0.05
        )

    # Apply filters in-memory
    working = df.copy()
    working = working[
        (working["year"] >= year_range[0]) & (working["year"] <= year_range[1])
    ]
    if min_controversy > 0:
        working = working[working["controversy_score"] >= min_controversy]
    if query:
        q = query.lower()
        mask = working["title"].fillna("").str.lower().str.contains(q, regex=False)
        working = working[mask]

    working = working.sort_values(by="controversy_score", ascending=False)
    st.caption(f"{len(working)} resultaten (top 50 getoond)")

    for _, row in working.head(50).iterrows():
        title = row.get("title") or f"Motie #{row['id']}"
        date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
        controversy = row.get("controversy_score") or 0
        with st.expander(f"**{title}** — {date_str} — 🔥 {controversy:.2f}"):
            cols = st.columns(3)
            cols[0].metric("Controverse", f"{controversy:.2f}")
            cols[1].metric("Marge", f"{row.get('winning_margin', 0):.2f}")
            cols[2].metric("Jaar", int(row["year"]) if pd.notna(row["year"]) else "?")

            # Voting breakdown
            _render_voting_results(row.get("voting_results"))

            # Link to original motion
            url = row.get("url")
            if url and str(url).startswith("http"):
                st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")

            # Similar motions
            sim = query_similar(db_path, int(row["id"]), top_k=5)
            if not sim.empty:
                st.markdown("**Vergelijkbare moties:**")
                for _, s in sim.iterrows():
                    s_date = (
                        pd.to_datetime(s["date"]).strftime("%Y")
                        if pd.notna(s.get("date"))
                        else ""
                    )
                    st.markdown(
                        f"- {s.get('title', 'Onbekend')} *(score: {s['score']:.3f}, {s_date})*"
                    )
            else:
                st.caption("_Nog geen vergelijkbare moties beschikbaar_")


# ---------------------------------------------------------------------------
# Tab 4: Motie Browser
# ---------------------------------------------------------------------------


def build_browser_tab(db_path: str, show_rejected: bool) -> None:
    st.subheader("Motie Browser")

    df = load_motions_df(db_path)
    if df.empty:
        st.warning("Geen moties beschikbaar.")
        return

    if not show_rejected:
        df = df[df["title"].fillna("").str.strip() != "Verworpen."]

    # Controls
    col1, col2, col3 = st.columns(3)
    with col1:
        years = sorted(df["year"].dropna().astype(int).unique().tolist())
        year_filter = st.selectbox("Jaar", ["(Alle)"] + [str(y) for y in years])
    with col2:
        min_controversy_b = st.slider(
            "Min. controverse",
            min_value=0.0,
            max_value=1.0,
            value=0.0,
            step=0.05,
            key="browser_controversy",
        )
    with col3:
        sort_by = st.selectbox("Sorteren op", ["Datum (nieuw)", "Controverse", "Marge"])

    # Filter
    working = df.copy()
    if year_filter != "(Alle)":
        working = working[working["year"] == int(year_filter)]
    if min_controversy_b > 0:
        working = working[working["controversy_score"] >= min_controversy_b]

    sort_map = {
        "Datum (nieuw)": ("date", False),
        "Controverse": ("controversy_score", False),
        "Marge": ("winning_margin", True),
    }
    sort_col, sort_asc = sort_map[sort_by]
    working = working.sort_values(by=sort_col, ascending=sort_asc)

    # Display table
    display_cols = ["id", "title", "date", "controversy_score", "winning_margin"]
    available_display = [c for c in display_cols if c in working.columns]
    st.dataframe(
        working[available_display].reset_index(drop=True),
        use_container_width=True,
        height=350,
    )

    st.divider()

    # Detail panel
    st.markdown("**Detail weergave** — vul een motie-ID in:")
    sel_id = st.number_input(
        "Motie ID",
        min_value=int(working["id"].min()) if not working.empty else 1,
        max_value=int(working["id"].max()) if not working.empty else 99999,
        value=int(working["id"].iloc[0]) if not working.empty else 1,
        step=1,
    )
    motion_row = df[df["id"] == sel_id]
    if not motion_row.empty:
        row = motion_row.iloc[0]
        st.markdown(f"### {row.get('title') or 'Onbekend'}")
        date_str = row["date"].strftime("%d %b %Y") if pd.notna(row["date"]) else "?"
        st.caption(
            f"📅 {date_str}  | 🔥 Controverse: {row.get('controversy_score', 0):.2f}"
        )

        # Link to original source
        url = row.get("url")
        if url and str(url).startswith("http"):
            st.markdown(f"[🔗 Bekijk op Tweede Kamer]({url})")

        # Voting breakdown
        st.markdown("**Stemuitslag:**")
        _render_voting_results(row.get("voting_results"))

        # Similar motions
        sim = query_similar(db_path, int(sel_id), top_k=10)
        if not sim.empty:
            st.markdown("**Vergelijkbare moties:**")
            st.dataframe(
                sim[["title", "score", "date", "policy_area"]],
                use_container_width=True,
            )
        else:
            st.caption("_Nog geen vergelijkbare moties beschikbaar voor deze motie_")


def build_svd_components_tab(db_path: str) -> None:
    """New tab: show top motions contributing to top SVD components.

    Reads thoughts/explorer/top_svd_top_motions.json and displays a selector
    for components 1..10 with theme labels/explanations and a detail pane per motion.
    """
    # Political polarisation themes per SVD component (1-indexed, window=2025)
    # Produced by per-axis analysis of all 10 unique top motions (zero cross-axis overlap).
    SVD_THEMES: dict[int, dict[str, str]] = {
        1: {
            "label": "Links-rechts hoofdas",
            "explanation": (
                "De dominante dimensie van het parlement: de klassieke links-rechts tegenstelling "
                "die het meeste verschil in stemgedrag verklaart. Aan de rechterkant (PVV, SGP, VVD, "
                "ChristenUnie) staan moties over defensie-uitbreiding, NAVO-verplichtingen, "
                "juridische ruimte voor drones en gaswinning. Aan de linkerkant (PvdD, SP, DENK, "
                "GroenLinks-PvdA) staan moties over huurverlaging, het veroordelen van "
                "antipersoneelslandmijnen, het opzeggen van het militaire verdrag met Israël en het "
                "oprichten van zorgbuurthuizen. De scheidslijn loopt dwars door thema's als "
                "veiligheid, economie, internationaal recht en sociale bescherming."
            ),
            "positive_pole": "Nationalistisch-conservatief: PVV, SGP, VVD, ChristenUnie",
            "negative_pole": "Progressief-links: PvdD, SP, DENK, GroenLinks-PvdA",
            "flip": False,
        },
        2: {
            "label": "Populistisch nationalisme versus institutioneel progressivisme",
            "explanation": (
                "Deze as scheidt het populistisch-nationalistische bloc (PVV, FVD, Groep Markuszower, "
                "BBB) van het volledige overige parlement. Alleen PVV (+18), FVD (+4) en Groep "
                "Markuszower (+2) scoren positief; alle andere partijen scoren negatief, inclusief "
                "VVD (−15), CDA (−14), SGP (−25) en ChristenUnie (−59). Positieve moties: artsen "
                "vrijpleiten voor hydroxychloroquine/ivermectine, Syriërs terugsturen, geen geld "
                "aan Jordanië, tijdelijke bescherming Oekraïne beëindigen. Negatieve moties: "
                "digitale toegankelijkheid Caribisch Nederland, ethiekprogramma Defensie, zorg voor "
                "slachtoffers bombardement Hawija, zorgkwaliteitsstandaarden. Dit is geen links-rechts "
                "verdeling maar een nativistisch-populistisch vs. institutioneel onderscheid."
            ),
            "positive_pole": "Populistisch-nationalistisch: PVV, FVD, Groep Markuszower, BBB",
            "negative_pole": "Institutioneel: alle overige partijen — van VVD en SGP tot GroenLinks-PvdA en Volt",
            "flip": False,
        },
        3: {
            "label": "Verzorgingsstaat versus bezuinigingen en marktwerking",
            "explanation": (
                "Deze as weerspiegelt de spanning tussen staatsingrijpen en marktliberalisme, "
                "aangescherpt door de kabinetscrisis van 2025. Aan de positieve kant staan moties "
                "die bezuinigingen op zorg en het gemeentefonds willen terugdraaien, winstuitkeringen "
                "in de zorg verbieden en publieke controle over ziekenhuisfusies eisen. SP, PvdD, "
                "GroenLinks-PvdA en PVV stemmen hier gelijk — ondanks hun tegengestelde PC1-posities. "
                "Aan de negatieve kant staan moties "
                "over marktwerking in de zorg, fiscale bedrijfsopvolgingsfaciliteiten (VVD), "
                "doorgaan met besturen ondanks de kabinetscrisis (VVD/Yeşilgöz) en defensie-"
                "uitgaven van 3,5% bbp."
            ),
            "positive_pole": "Pro-verzorgingsstaat: SP, PvdD, GroenLinks-PvdA, PVV (anti-bezuinigingen)",
            "negative_pole": "Marktliberaal en fiscaal conservatief: VVD, D66, CDA, SGP",
            "flip": True,
        },
        4: {
            "label": "Pragmatisch centrisme versus ideologische radicaliteit",
            "explanation": (
                "De gevestigde centrumpartijen (D66, CDA, VVD, 50PLUS) staan tegenover zowel "
                "rechts-radicale als identiteitspolitieke posities. Aan de positieve kant staan "
                "moties over openbare toiletten, vaderbetrokkenheid bij opvoeding, internationale "
                "samenwerking met Australië en Canada, en long covid-expertise. Dit zijn pragmatische, "
                "institutionele beleidsposities. Aan de negatieve kant staan moties over een "
                "migratiesaldo-cap van 60.000, het verlaten van de WHO, kinderen in pleeggezinnen "
                "van hetzelfde geslacht (FVD) en de bescherming van religieuze schoolidentiteit "
                "via artikel 23. De negatieve pool combineert populistisch-rechts met "
                "identiteitsgerichte posities van zowel rechts als links."
            ),
            "positive_pole": "Constructief centrum: D66, CDA, VVD, 50PLUS — pragmatisch en internationaal",
            "negative_pole": "Radicaal-ideologisch: FVD, Groep Markuszower (rechts), ChristenUnie, DENK (religieus/identiteit)",
            "flip": True,
        },
        5: {
            "label": "Christelijk-sociaal communitarisme",
            "explanation": (
                "Deze as scheidt partijen die gemeenschapszorg, burgerplicht en informele "
                "ondersteuningsstructuren benadrukken van partijen die individuele vrijheden en "
                "progressieve maatschappelijke hervorming voorstaan. Aan de positieve kant staan "
                "moties over schuldhulpverlening via vrijwilligersorganisaties, de maatschappelijke "
                "diensttijd voor jongeren met een afstand tot de arbeidsmarkt, en de gastouderopvang. "
                "ChristenUnie, SGP en CDA voeren hier de toon; ook D66 scoort positief door steun "
                "aan sociaal beleid. Aan de negatieve kant staan moties over wettelijke erkenning "
                "van meerouderschap, abortusrecht in het EU-Handvest, armoedebeleid en "
                "buitenlandse beïnvloeding. PvdD, GroenLinks-PvdA en VVD scoren hier negatief."
            ),
            "positive_pole": "Gemeenschapsgericht: ChristenUnie, SGP, CDA, D66 — vrijwilligers, diensttijd, zorgsystemen",
            "negative_pole": "Individualistisch-progressief: PvdD, GroenLinks-PvdA, VVD, PVV",
            "flip": False,
        },
        6: {
            "label": "Klimaat, energie en culturele integratie",
            "explanation": (
                "Aan de positieve kant staan moties die LNG-capaciteit prefereren als alternatief "
                "voor strenge vulgraadverplichtingen, kernenergie als volwaardig CO₂-arm onderdeel "
                "van de energiemix willen erkennen op COP30, en discriminatie- en inclusiemeldpunten "
                "willen inventariseren. SGP, JA21, FVD en PVV scoren sterk positief. Aan de "
                "negatieve kant staan moties die fossiele-industrie-vertegenwoordigers willen weren "
                "van klimaatconferenties, structureel overleg met moslimgemeenschappen willen bij "
                "integratiebeleid, en aanvallen van Israël op Libanon veroordelen. "
                "PvdD, GroenLinks-PvdA, Volt en D66 scoren negatief. "
                "Deze as combineert energieideologie met culturele polarisatie rondom klimaat, "
                "integratie en buitenlandspolitiek."
            ),
            "positive_pole": "Pro-fossiel, nationaal energiebeleid: SGP, JA21, FVD, PVV",
            "negative_pole": "Klimaatgericht en inclusief: PvdD, GroenLinks-PvdA, Volt, D66",
            "flip": False,
        },
        7: {
            "label": "Bestuurlijk pragmatisme en implementatie (indicatief)",
            "explanation": (
                "Een residuele as die overwegend beleidsdossiers uit 2024 (vorige parlementaire "
                "periode) omvat. De scores zijn smal (max ~11 punten) en de partijcombinaties "
                "ideologisch divers — dit label is indicatief. Aan de positieve kant staan "
                "pragmatische bestuursmoties: een compleet kostenoverzicht van producten van eigen "
                "bodem, papieren schoolboeken voor basisvaardigheden, een invoeringstoets voor het "
                "minimumloon en de A2-snelwegplanning. ChristenUnie, Volt, DENK en SP scoren "
                "positief. Aan de negatieve kant staan meer ideologisch geladen moties: een "
                "landelijk stookverbod (PvdD), het strafbaar stellen van verbranding van religieuze "
                "geschriften (DENK), chroom-6 schadevergoedingen en tegenhouden van nieuwe "
                "gaswinning. GroenLinks-PvdA, VVD, FVD en JA21 scoren negatief."
            ),
            "positive_pole": "Praktisch-bestuurlijk: ChristenUnie, Volt, SGP, DENK, SP",
            "negative_pole": "Ideologisch-principieel: GroenLinks-PvdA, VVD, FVD, JA21",
            "flip": True,
        },
        8: {
            "label": "Europese defensie-integratie (indicatief)",
            "explanation": (
                "Aan de positieve kant staan moties die pleiten voor militaire mobiliteit als "
                "topprioriteit in EU/NAVO-verband en toewerken naar een militair Schengengebied, "
                "35% van defensiematerieel Europees inkopen en een Europees defensie-R&D-instituut "
                "oprichten. Ook het Nationaal Groeifonds en gewasbeschermingsonderzoek vallen "
                "positief. Volt en D66 scoren sterk positief. Aan de negatieve kant staan moties "
                "over ketenverantwoordelijkheid bij toeslagen (DENK), het coronaoversterfte-onderzoek "
                "(PVV/BBB), energiecontracten en huisvestingsregulering. SP (−39), DENK (−35) en "
                "PvdD (−26) scoren sterk negatief — dit betekent dat zij actief tégen deze "
                "EU-defensiemoties stemmen, niet simpelweg het thema negeren. Volt (N=1) domineert "
                "de positieve pool maar is als centroïde van één Kamerlid statistisch onbetrouwbaar."
            ),
            "positive_pole": "Pro-EU defensie en innovatie: Volt, D66",
            "negative_pole": "Nationaal/pacifistisch of binnenlandsgericht: SP, DENK, PvdD, 50PLUS",
            "flip": False,
        },
        9: {
            "label": "Decentraal bestuur en gemeenschapswaarden (indicatief)",
            "explanation": (
                "Aan de positieve kant staan moties over naleving van de Financiële-verhoudingswet "
                "voor gemeenten, beperking van arbeidsmigratie binnen de EU, een nieuwe "
                "tandartsopleiding in Rotterdam, een actieplan tegen misbruik van hallucinerende "
                "geneesmiddelen en een oplossing voor milieuproblemen op Bonaire. SGP en "
                "ChristenUnie scoren sterk positief; ook DENK en SP. Aan de negatieve kant staan "
                "moties over een moratorium op geitenstallen, een verbod op gokadvertenties, "
                "verduidelijking van gronden voor voorlopige hechtenis, een leegstandbelasting voor "
                "woningen en end-to-end-encryptie. D66, JA21 en PVV scoren negatief. Deze as "
                "scheidt een nadruk op decentrale dienstverlening en gemeenschapsregulering van "
                "progressieve systeem- en rechtshervorming."
            ),
            "positive_pole": "Lokaal en gemeenschapsgericht: SGP, ChristenUnie, DENK, SP",
            "negative_pole": "Progressieve systemen en rechten: D66, JA21, PVV",
            "flip": True,
        },
        10: {
            "label": "Institutioneel toezicht en handhaving (indicatief)",
            "explanation": (
                "De tiende as vangt resterende variantie op en scheidt partijen die sceptisch zijn "
                "over staatstoezicht van partijen die strikte regulering en handhaving steunen. "
                "Aan de positieve kant staan moties over minder tijdsintensieve schoolinspecties, "
                "het recht van toeslagenouders op hun persoonlijk dossier, behoud van de "
                "tegemoetkoming voor arbeidsongeschikten en een verlaging van de leeftijdsdrempel "
                "voor kindgesprekken. DENK, SP en PvdD scoren positief. Aan de negatieve kant "
                "staan moties over een aangifteplicht voor scholen bij veiligheidsincidenten, een "
                "rookverbod in auto's met kinderen, braakliggende landbouwgrond en verhoogd "
                "beloningsgeld voor tipgevers. GroenLinks-PvdA scoort opvallend sterk negatief, "
                "waarmee het zich onderscheidt van SP en DENK op handhavingsthema's."
            ),
            "positive_pole": "Kritisch op overheidstoezicht: DENK, SP, PvdD, Volt — minder inspectielast",
            "negative_pole": "Pro-handhaving en regulering: GroenLinks-PvdA, CDA, SGP — veiligheid en naleving",
            "flip": True,
        },
    }

    st.subheader("🔬 SVD Assen — politieke polarisatiethema's")
    st.markdown(
        "Elke SVD-as representeert een latente politieke dimensie afgeleid uit stempatronen "
        "van alle Kamerleden. De top-10 moties per as zijn uniek (geen overlap) en illustreren "
        "het spanningsveld dat de as beschrijft."
    )

    # Scree plot: relative importance of each SVD component
    scree_importances = load_scree_data(db_path)
    if scree_importances:
        st.markdown(
            "**Scree-plot** — het relatieve gewicht van elke SVD-as. "
            "De eerste assen verklaren het meeste van de stemverschillen in de Kamer; "
            "latere assen (7+) zijn fragiel en mogelijk niet boven ruisniveau."
        )
        _render_scree_plot(scree_importances)

    json_path = os.path.join("thoughts", "explorer", "top_svd_top_motions.json")
    if not os.path.exists(json_path):
        st.warning(
            f"Top-SVD data not found at {json_path}. Run the importance job to generate it."
        )
        return

    try:
        with open(json_path, "r", encoding="utf-8") as fh:
            j = json.load(fh)
    except Exception as e:
        st.error(f"Failed to load SVD importance JSON: {e}")
        return

    window = j.get("window")
    rows = j.get("rows", [])
    if not rows:
        st.info("Geen top-moties in dataset")
        return

    st.caption(f"Top SVD-bijdragers berekend voor venster: **{window}**")

    # Build mapping component -> list of motions (deduplicate by motion_id per component)
    comp_map: dict[int, list] = {}
    for r in rows:
        comp = int(r.get("component", 0))
        bucket = comp_map.setdefault(comp, [])
        existing_ids = {m.get("motion_id") for m in bucket}
        if r.get("motion_id") not in existing_ids:
            bucket.append(r)

    comp_options = sorted(comp_map.keys())

    # Build display labels for selectbox: "As 1 — Regulering vs. status-quo"
    def _comp_label(c: int) -> str:
        theme = SVD_THEMES.get(c, {})
        lbl = theme.get("label", "")
        return f"As {c} — {lbl}" if lbl else f"As {c}"

    comp_display = [_comp_label(c) for c in comp_options]
    comp_sel_idx = st.selectbox(
        "Selecteer SVD-as",
        options=list(range(len(comp_options))),
        format_func=lambda i: comp_display[i],
        index=0,
    )
    comp_sel = comp_options[comp_sel_idx]

    # Show theme explanation
    theme = SVD_THEMES.get(comp_sel, {})
    if theme:
        st.info(f"**{theme['label']}** — {theme['explanation']}")

    motions = comp_map.get(comp_sel, [])

    # Party axis chart
    # Default party scores (single-window mean vectors) as a fallback
    party_scores_default = load_party_axis_scores(db_path)
    party_mp_vectors = load_party_mp_vectors(db_path)
    bootstrap_data = (
        _cached_bootstrap_cis(party_mp_vectors) if party_mp_vectors else None
    )

    # For components 1 and 2, prefer MP-centroid values from the Procrustes-aligned
    # positions_by_window so the compass matches the trajectories (MP-mean centroids).
    if comp_sel in (1, 2):
        try:
            positions_by_window, axis_def = load_positions(db_path)
            if axis_def is None:
                axis_def = {}
            # choose the current parliament window if present
            window = (
                "current_parliament"
                if "current_parliament" in positions_by_window
                else sorted(positions_by_window.keys())[-1]
            )
            pos = positions_by_window.get(window, {})

            # build party -> list of MP x/y coords
            party_map = load_party_map(db_path)
            per_party_coords: dict = {}
            for ent, (x, y) in pos.items():
                party = party_map.get(ent)
                if party is None:
                    continue
                per_party_coords.setdefault(party, []).append((x, y))

            # construct party_scores mapping: prefer MP centroid [x,y], fallback to default vector
            party_scores = {}
            for party in set(
                list(per_party_coords.keys()) + list(party_scores_default.keys())
            ):
                coords = per_party_coords.get(party)
                if coords:
                    xs = [c[0] for c in coords]
                    ys = [c[1] for c in coords]
                    party_scores[party] = [float(np.mean(xs)), float(np.mean(ys))]
                else:
                    # fallback: use the default single-window SVD mean vector
                    party_scores[party] = party_scores_default.get(party, [])

        except Exception:
            # On any error, fall back to the old behaviour
            logger.exception(
                "Failed to derive party centroids from positions_by_window; falling back to load_party_axis_scores"
            )
            party_scores = party_scores_default
    else:
        party_scores = party_scores_default

    # Convert party_scores (possibly [x,y] lists or legacy vectors) into explicit (x,y) coords
    party_coords: dict = {}
    for p, v in party_scores.items():
        try:
            if v and len(v) >= 2:
                party_coords[p] = (float(v[0]), float(v[1]))
        except Exception:
            continue

    _render_party_axis_chart(
        party_coords, comp_sel, theme, bootstrap_data=bootstrap_data
    )

    # Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
    motion_ids = [m.get("motion_id") for m in motions if m.get("motion_id") is not None]
    motion_details: Dict[int, tuple] = {}
    if motion_ids:
        # Defensively convert motion_ids to integers, skipping invalid values
        ids_int: List[int] = []
        for mid in motion_ids:
            try:
                ids_int.append(int(mid))
            except Exception:
                logger.warning("Skipping invalid motion id in SVD batch fetch: %r", mid)

        # If no valid ids remain, skip the DB query
        if ids_int:
            con = None
            try:
                placeholders = ", ".join("?" for _ in ids_int)
                con = duckdb.connect(database=db_path, read_only=True)
                db_rows = con.execute(
                    f"SELECT id, title, date, policy_area, url, body_text, voting_results "
                    f"FROM motions WHERE id IN ({placeholders})",
                    ids_int,
                ).fetchall()
                motion_details = {r[0]: r for r in db_rows}
            except Exception:
                logger.exception("Failed to batch-fetch motion details")
            finally:
                if con:
                    con.close()

    # Split motions by pole sign
    pos_motions = [m for m in motions if float(m.get("score", 0.0)) >= 0]
    neg_motions = [m for m in motions if float(m.get("score", 0.0)) < 0]

    flip = theme.get("flip", False) if theme else False
    pos_pole = theme.get("positive_pole", "") if theme else ""
    neg_pole = theme.get("negative_pole", "") if theme else ""

    # Determine which pole goes left (progressive) and which goes right
    if flip:
        left_pole, right_pole = pos_pole, neg_pole
        left_motions, right_motions = pos_motions, neg_motions
        left_arrow, right_arrow = "▲", "▼"
    else:
        left_pole, right_pole = neg_pole, pos_pole
        left_motions, right_motions = neg_motions, pos_motions
        left_arrow, right_arrow = "▼", "▲"

    lcol, rcol = st.columns(2)

    with lcol:
        st.markdown(f"**← {left_pole}**")
        for m in left_motions:
            mid = m.get("motion_id")
            raw_title = m.get("title") or f"Motie #{mid}"
            with st.expander(f"{left_arrow} {raw_title}"):
                row = motion_details.get(int(mid)) if mid is not None else None
                if row:
                    try:
                        date_str = str(row[2])[:10]
                    except Exception:
                        date_str = "?"
                    st.caption(f"📅 {date_str}  |  {row[3] or '—'}")
                    if row[4] and str(row[4]).startswith("http"):
                        st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
                    if row[5]:
                        with st.expander("Toon volledige tekst"):
                            st.write(row[5])
                    _render_voting_results(row[6])
                else:
                    st.caption("_Geen metadata beschikbaar_")

    with rcol:
        st.markdown(f"**{right_pole} →**")
        for m in right_motions:
            mid = m.get("motion_id")
            raw_title = m.get("title") or f"Motie #{mid}"
            with st.expander(f"{right_arrow} {raw_title}"):
                row = motion_details.get(int(mid)) if mid is not None else None
                if row:
                    try:
                        date_str = str(row[2])[:10]
                    except Exception:
                        date_str = "?"
                    st.caption(f"📅 {date_str}  |  {row[3] or '—'}")
                    if row[4] and str(row[4]).startswith("http"):
                        st.markdown(f"[🔗 Bekijk op Tweede Kamer]({row[4]})")
                    if row[5]:
                        with st.expander("Toon volledige tekst"):
                            st.write(row[5])
                    _render_voting_results(row[6])
                else:
                    st.caption("_Geen metadata beschikbaar_")


def build_mp_quiz_tab(db_path: str) -> None:
    """Interactive quiz: narrow MPs by asking motion vote questions.

    Minimal viable flow:
    - seed with top-N controversial motions (SEED_MOTIONS)
    - present one question at a time, store answers in st.session_state['mp_quiz_votes']
    - after each answer call MotionDatabase.match_mps_for_votes to rank MPs
    - if multiple candidates remain, call choose_discriminating_motions to pick next question
    - stop when unique MP found or no discriminating motions remain
    """
    st.subheader("🧑‍⚖️ Welk tweede kamerlid ben jij?")
    st.markdown(
        "Beantwoord een paar eenvoudige ja/nee/onthoud vragen over moties om te zien welk Kamerlid het meest op jou lijkt."
    )

    SEED_MOTIONS = 8
    MAX_QUESTIONS = 20

    # initialize session state
    if "mp_quiz_votes" not in st.session_state:
        st.session_state["mp_quiz_votes"] = {}
    if "mp_quiz_asked" not in st.session_state:
        st.session_state["mp_quiz_asked"] = []

    from database import MotionDatabase as _MotionDatabase

    db_inst = _MotionDatabase(db_path)

    df = load_motions_df(db_path)
    if df.empty:
        st.warning("Geen moties beschikbaar om de quiz te starten.")
        return

    # seed from motions that actually have individual MP vote records
    seed_ids = db_inst.get_motions_with_individual_votes(k=SEED_MOTIONS)
    if not seed_ids:
        st.warning("Geen individuele stemdata beschikbaar voor de quiz.")
        return

    # Determine next motion to ask
    def _next_motion_id():
        # prefer seed motions not yet asked
        for mid in seed_ids:
            if str(mid) not in st.session_state["mp_quiz_votes"]:
                return mid
        # otherwise ask discriminating motion based on remaining candidate MPs
        # compute current candidate set

        try:
            user_votes = {
                int(k): v for k, v in st.session_state["mp_quiz_votes"].items()
            }
            ranked = db_inst.match_mps_for_votes(user_votes, limit=200)
        except Exception:
            ranked = []

        candidates = [r["mp_name"] for r in ranked]
        excluded = [int(k) for k in st.session_state["mp_quiz_votes"].keys()]
        if not candidates:
            return None
        try:
            next_ids = db_inst.choose_discriminating_motions(candidates, excluded, k=1)
            return next_ids[0] if next_ids else None
        except Exception:
            return None

    # show progress and controls
    col1, col2 = st.columns([3, 1])
    with col2:
        st.caption(
            f"Vragen beantwoord: {len(st.session_state['mp_quiz_votes'])}/{MAX_QUESTIONS}"
        )
        if st.button("Reset quiz"):
            st.session_state["mp_quiz_votes"] = {}
            st.session_state["mp_quiz_asked"] = []
            st.rerun()

    # main question loop (single question per render, wrapped in a form to avoid
    # premature reruns when the user changes the radio selection)
    next_mid = _next_motion_id()
    if next_mid is None:
        st.info("Geen nieuwe vragen beschikbaar om kandidaten te scheiden.")
    else:
        motion_rows = df[df["id"] == next_mid]
        if motion_rows.empty:
            # motion has votes but isn't in the motions DataFrame — skip it
            st.session_state["mp_quiz_votes"][str(next_mid)] = "Geen stem"
            st.rerun()
            return
        motion_row = motion_rows.iloc[0]
        st.markdown(f"### {motion_row.get('title') or f'Motie #{next_mid}'}")
        if motion_row.get("layman_explanation"):
            st.info(motion_row.get("layman_explanation"))

        with st.form(key=f"mp_quiz_form_{next_mid}"):
            choice = st.radio(
                "Wat zou jij stemmen?",
                options=["Voor", "Tegen", "Onthouden", "Geen stem"],
                index=3,
            )
            submitted = st.form_submit_button("Beantwoord en verder")

        if submitted:
            st.session_state["mp_quiz_votes"][str(next_mid)] = choice
            st.session_state["mp_quiz_asked"].append(next_mid)
            st.rerun()

    # display current ranking
    try:
        user_votes = {int(k): v for k, v in st.session_state["mp_quiz_votes"].items()}
        ranking = db_inst.match_mps_for_votes(user_votes, limit=50)
    except Exception:
        ranking = []

    if ranking:
        st.markdown("**Top kandidaten**")
        # show as table
        import pandas as pd

        rdf = pd.DataFrame(ranking)
        st.dataframe(rdf.head(10), use_container_width=True)

        # check uniqueness
        top_pct = ranking[0]["agreement_pct"] if ranking else 0.0
        top_matches = [r for r in ranking if r["agreement_pct"] == top_pct]
        if len(top_matches) == 1 and top_matches[0]["overlap"] > 0:
            st.success(
                f"Unieke match gevonden: {top_matches[0]['mp_name']} ({top_matches[0]['party']})"
            )
        else:
            if len(st.session_state["mp_quiz_asked"]) >= MAX_QUESTIONS:
                st.warning(
                    "Maximaal aantal vragen beantwoord. Je hebt meerdere vergelijkbare kandidaten."
                )
            else:
                st.info("Nog geen unieke match — vraag meer om verder te verfijnen.")
    else:
        st.info("Nog geen antwoorden of geen overlapping met bestaande stemdata.")


# ---------------------------------------------------------------------------
# App entry
# ---------------------------------------------------------------------------


def run_app() -> None:
    st.set_page_config(
        layout="wide",
        page_title="Parlement Explorer",
        page_icon="🏛️",
    )
    st.title("🏛️ Parlement Explorer")

    # Sidebar
    st.sidebar.title("Instellingen")
    db_path = "data/motions.db"
    window_size = "annual"
    show_rejected = st.sidebar.checkbox("Toon verworpen moties", value=False)

    # About section
    with st.sidebar.expander("ℹ️ Over", expanded=False):
        try:
            if _DUCKDB_AVAILABLE:
                con = duckdb.connect(database=db_path, read_only=True)
                n_motions = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0]
                n_fused = con.execute(
                    "SELECT COUNT(*) FROM fused_embeddings"
                ).fetchone()[0]
                n_sim = con.execute("SELECT COUNT(*) FROM similarity_cache").fetchone()[
                    0
                ]
                con.close()
                st.markdown(
                    f"**Moties:** {n_motions:,}  \n"
                    f"**Fused embeddings:** {n_fused:,}  \n"
                    f"**Similarity cache:** {n_sim:,}"
                )
            else:
                st.warning(
                    "DuckDB niet beschikbaar in deze Python-omgeving; DB diagnostics zijn niet beschikbaar."
                )
        except Exception as e:
            st.warning(f"DB niet bereikbaar: {e}")

    # Main tabs
    # Streamlit tabs compatibility: some older/newer Streamlit builds expose different APIs.
    tab_labels = [
        "🧭 Politiek Kompas",
        "📈 Trajectories",
        "🔍 Motie Zoeken",
        "📋 Motie Browser",
        "🔬 SVD Components",
    ]

    if hasattr(st, "tabs") and callable(getattr(st, "tabs")):
        tab1, tab2, tab3, tab4, tab5 = st.tabs(tab_labels)
        with tab1:
            build_compass_tab(db_path, window_size)
        with tab2:
            build_trajectories_tab(db_path, window_size)
        with tab3:
            build_search_tab(db_path, show_rejected)
        with tab4:
            build_browser_tab(db_path, show_rejected)
        with tab5:
            build_svd_components_tab(db_path)
    else:
        # Fallback for environments where `st.tabs` is not available: use a radio selector
        selection = st.radio("Tab", tab_labels)
        if selection == tab_labels[0]:
            build_compass_tab(db_path, window_size)
        elif selection == tab_labels[1]:
            build_trajectories_tab(db_path, window_size)
        elif selection == tab_labels[2]:
            build_search_tab(db_path, show_rejected)
        elif selection == tab_labels[3]:
            build_browser_tab(db_path, show_rejected)
        else:
            build_svd_components_tab(db_path)


if __name__ == "__main__":
    logging.basicConfig(
        level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
    )
    run_app()