motief/analysis/right_wing/extremity_2d_temporal.py

#!/usr/bin/env python3
"""U2: 2D Extremity Temporal Decomposition.

Tests whether the "flat single-dimension trend" masks diverging trajectories
when stylistic and material extremity scores are analyzed separately over time.

Usage:
    uv run python analysis/right_wing/extremity_2d_temporal.py

Output:
    reports/overton_window/extremity_2d_temporal.md
    reports/overton_window/extremity_2d_temporal_figure.png
"""

from __future__ import annotations

import logging
import sys
from pathlib import Path
from typing import Any

import duckdb
import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr, wilcoxon

ROOT = Path(__file__).parent.parent.parent.resolve()
sys.path.insert(0, str(ROOT))

DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)

YEAR_MIN, YEAR_MAX = 2016, 2026
BREAK_YEAR = 2024
CONFIDENCE_N_MIN = 50


def fetch_2d_yearly_data(con: duckdb.DuckDBPyConnection) -> dict[int, dict[str, list[float]]]:
    """Join extremity_scores_2d with right_wing_motions to get yearly scores.

    Returns dict keyed by year, each containing lists of stylistic, material,
    and original text_score values, plus gravity-filtered buckets (M>=3, M>=4).
    """
    rows = con.execute("""
        SELECT
            r.year,
            e2d.stijl_extremiteit,
            e2d.materiele_impact,
            e.text_score,
            r.category
        FROM extremity_scores_2d e2d
        JOIN right_wing_motions r ON e2d.motion_id = r.motion_id
        LEFT JOIN extremity_scores e ON e2d.motion_id = e.motion_id
        WHERE r.classified = TRUE
          AND r.year IS NOT NULL
        ORDER BY r.year
    """).fetchall()

    yearly: dict[int, dict[str, list[float]]] = {}
    for year in range(YEAR_MIN, YEAR_MAX + 1):
        yearly[year] = {
            "stijl": [],
            "materieel": [],
            "text": [],
            "mig_stijl": [],
            "mig_materieel": [],
            "mig_text": [],
            "non_mig_stijl": [],
            "non_mig_materieel": [],
            "non_mig_text": [],
            "ge3_stijl": [],
            "ge3_materieel": [],
            "ge4_stijl": [],
            "ge4_materieel": [],
        }

    for year, stijl, materieel, text_score, category in rows:
        y = int(year)
        if y < YEAR_MIN or y > YEAR_MAX:
            continue
        is_mig = category == "asiel/vreemdelingen"

        if stijl is not None:
            yearly[y]["stijl"].append(float(stijl))
            (yearly[y]["mig_stijl"] if is_mig else yearly[y]["non_mig_stijl"]).append(float(stijl))
        if materieel is not None:
            yearly[y]["materieel"].append(float(materieel))
            (yearly[y]["mig_materieel"] if is_mig else yearly[y]["non_mig_materieel"]).append(float(materieel))
        if text_score is not None:
            yearly[y]["text"].append(float(text_score))
            (yearly[y]["mig_text"] if is_mig else yearly[y]["non_mig_text"]).append(float(text_score))

        if stijl is not None and materieel is not None:
            if float(materieel) >= 3:
                yearly[y]["ge3_stijl"].append(float(stijl))
                yearly[y]["ge3_materieel"].append(float(materieel))
            if float(materieel) >= 4:
                yearly[y]["ge4_stijl"].append(float(stijl))
                yearly[y]["ge4_materieel"].append(float(materieel))

    return yearly


def fetch_all_motion_yearly(con: duckdb.DuckDBPyConnection) -> dict[int, dict[str, list[float]]]:
    """Join extremity_scores_all with motions to get yearly scores for ALL motions.

    Returns dict keyed by year, each containing stijl and materieel lists.
    """
    logger.info("Fetching all-motion extremity data by year...")
    rows = con.execute("""
        SELECT
            EXTRACT(YEAR FROM m.date) AS year,
            esa.stijl_extremiteit,
            esa.materiele_impact
        FROM extremity_scores_all esa
        JOIN motions m ON esa.motion_id = m.id
        WHERE m.date IS NOT NULL
          AND EXTRACT(YEAR FROM m.date) BETWEEN ? AND ?
        ORDER BY year
    """, (YEAR_MIN, YEAR_MAX)).fetchall()

    yearly: dict[int, dict[str, list[float]]] = {}
    for year in range(YEAR_MIN, YEAR_MAX + 1):
        yearly[year] = {"stijl": [], "materieel": []}

    for year, stijl, materieel in rows:
        y = int(year)
        yearly[y]["stijl"].append(float(stijl))
        yearly[y]["materieel"].append(float(materieel))

    total = sum(len(v["stijl"]) for v in yearly.values())
    logger.info("Fetched %d all-motion scored motions across %d years", total, len(yearly))
    return yearly


def compute_yearly_summary(
    yearly: dict[int, dict[str, list[float]]],
) -> dict[int, dict[str, Any]]:
    """Compute means, counts, SEM, and per-year stijl-materieel correlations."""
    summary: dict[int, dict[str, Any]] = {}
    rng = np.random.default_rng(42)

    for year, d in yearly.items():
        s: dict[str, Any] = {"year": year}

        for prefix, keys in [
            ("", ["stijl", "materieel", "text"]),
            ("mig_", ["mig_stijl", "mig_materieel", "mig_text"]),
            ("non_mig_", ["non_mig_stijl", "non_mig_materieel", "non_mig_text"]),
            ("ge3_", ["ge3_stijl", "ge3_materieel"]),
            ("ge4_", ["ge4_stijl", "ge4_materieel"]),
        ]:
            for key in keys:
                short = key.replace("non_mig_", "").replace("mig_", "").replace("ge3_", "").replace("ge4_", "")
                vals = np.array(d.get(key, []))
                n = len(vals)
                s[f"{prefix}n_{short}"] = n
                if n > 0:
                    s[f"{prefix}mean_{short}"] = float(np.mean(vals))
                    s[f"{prefix}std_{short}"] = float(np.std(vals, ddof=1)) if n > 1 else 0.0
                    s[f"{prefix}sem_{short}"] = float(np.std(vals, ddof=1) / np.sqrt(n)) if n > 1 else 0.0
                    if n >= 20:
                        boot_means = [
                            float(np.mean(rng.choice(vals, size=n, replace=True)))
                            for _ in range(1000)
                        ]
                        s[f"{prefix}ci_lo_{short}"] = float(np.percentile(boot_means, 2.5))
                        s[f"{prefix}ci_hi_{short}"] = float(np.percentile(boot_means, 97.5))
                    else:
                        s[f"{prefix}ci_lo_{short}"] = float("nan")
                        s[f"{prefix}ci_hi_{short}"] = float("nan")
                else:
                    s[f"{prefix}mean_{short}"] = float("nan")
                    s[f"{prefix}std_{short}"] = float("nan")
                    s[f"{prefix}sem_{short}"] = float("nan")
                    s[f"{prefix}ci_lo_{short}"] = float("nan")
                    s[f"{prefix}ci_hi_{short}"] = float("nan")

        # Per-year stijl-materieel correlation
        stijl_arr = np.array(d.get("stijl", []))
        mat_arr = np.array(d.get("materieel", []))
        if len(stijl_arr) >= 10 and len(mat_arr) >= 10:
            r, p = pearsonr(stijl_arr, mat_arr)
            s["r_stijl_mat"] = float(r)
            s["p_stijl_mat"] = float(p)
        else:
            s["r_stijl_mat"] = float("nan")
            s["p_stijl_mat"] = float("nan")

        # Per-year stijl-materieel correlation for migration
        mig_stijl_arr = np.array(d.get("mig_stijl", []))
        mig_mat_arr = np.array(d.get("mig_materieel", []))
        if len(mig_stijl_arr) >= 10 and len(mig_mat_arr) >= 10:
            r_mig, p_mig = pearsonr(mig_stijl_arr, mig_mat_arr)
            s["r_mig_stijl_mat"] = float(r_mig)
            s["p_mig_stijl_mat"] = float(p_mig)
        else:
            s["r_mig_stijl_mat"] = float("nan")
            s["p_mig_stijl_mat"] = float("nan")

        # Per-year stijl-materieel correlation for non-migration
        nm_stijl_arr = np.array(d.get("non_mig_stijl", []))
        nm_mat_arr = np.array(d.get("non_mig_materieel", []))
        if len(nm_stijl_arr) >= 10 and len(nm_mat_arr) >= 10:
            r_nm, p_nm = pearsonr(nm_stijl_arr, nm_mat_arr)
            s["r_non_mig_stijl_mat"] = float(r_nm)
            s["p_non_mig_stijl_mat"] = float(p_nm)
        else:
            s["r_non_mig_stijl_mat"] = float("nan")
            s["p_non_mig_stijl_mat"] = float("nan")

        # Gap (material - stylistic)
        if s.get("mean_materieel") is not None and not np.isnan(s.get("mean_materieel", float("nan"))) and \
           s.get("mean_stijl") is not None and not np.isnan(s.get("mean_stijl", float("nan"))):
            s["gap"] = s["mean_materieel"] - s["mean_stijl"]
        else:
            s["gap"] = float("nan")

        s["gap_mig"] = float("nan")
        if s.get("mean_mig_materieel") is not None and not np.isnan(s.get("mean_mig_materieel", float("nan"))) and \
           s.get("mean_mig_stijl") is not None and not np.isnan(s.get("mean_mig_stijl", float("nan"))):
            s["gap_mig"] = s["mean_mig_materieel"] - s["mean_mig_stijl"]

        s["gap_non_mig"] = float("nan")
        if s.get("mean_non_mig_materieel") is not None and not np.isnan(s.get("mean_non_mig_materieel", float("nan"))) and \
           s.get("mean_non_mig_stijl") is not None and not np.isnan(s.get("mean_non_mig_stijl", float("nan"))):
            s["gap_non_mig"] = s["mean_non_mig_materieel"] - s["mean_non_mig_stijl"]

        # Gravity gaps
        s["gap_ge3"] = float("nan")
        if s.get("ge3_mean_materieel") is not None and not np.isnan(s.get("ge3_mean_materieel", float("nan"))) and \
           s.get("ge3_mean_stijl") is not None and not np.isnan(s.get("ge3_mean_stijl", float("nan"))):
            s["gap_ge3"] = s["ge3_mean_materieel"] - s["ge3_mean_stijl"]

        s["gap_ge4"] = float("nan")
        if s.get("ge4_mean_materieel") is not None and not np.isnan(s.get("ge4_mean_materieel", float("nan"))) and \
           s.get("ge4_mean_stijl") is not None and not np.isnan(s.get("ge4_mean_stijl", float("nan"))):
            s["gap_ge4"] = s["ge4_mean_materieel"] - s["ge4_mean_stijl"]

        summary[year] = s

    return summary


def compute_all_motion_summary(
    yearly: dict[int, dict[str, list[float]]],
) -> dict[int, dict[str, Any]]:
    """Compute simple yearly means for all-motion data (no stratification)."""
    summary: dict[int, dict[str, Any]] = {}
    for year, d in yearly.items():
        s: dict[str, Any] = {"year": year}
        for key in ["stijl", "materieel"]:
            vals = np.array(d.get(key, []))
            n = len(vals)
            s[f"n_{key}"] = n
            if n > 0:
                s[f"mean_{key}"] = float(np.mean(vals))
                s[f"std_{key}"] = float(np.std(vals, ddof=1)) if n > 1 else 0.0
                s[f"sem_{key}"] = float(np.std(vals, ddof=1) / np.sqrt(n)) if n > 1 else 0.0
            else:
                s[f"mean_{key}"] = float("nan")
                s[f"std_{key}"] = float("nan")
                s[f"sem_{key}"] = float("nan")
        summary[year] = s
    return summary


def compute_divergence_test(
    yearly: dict[int, dict[str, list[float]]],
) -> dict[str, Any]:
    """Paired Wilcoxon signed-rank test on yearly (stylistic_mean, material_mean) pairs."""
    years = sorted(yearly.keys())
    stijl_means = []
    mat_means = []
    for y in years:
        svals = yearly[y]["stijl"]
        mvals = yearly[y]["materieel"]
        if len(svals) > 0 and len(mvals) > 0:
            stijl_means.append(np.mean(svals))
            mat_means.append(np.mean(mvals))

    result: dict[str, Any] = {"n_years": len(stijl_means)}

    if len(stijl_means) < 3:
        result["test"] = "insufficient_years"
        result["statistic"] = float("nan")
        result["p_value"] = float("nan")
        result["conclusion"] = "Not enough yearly data points for a paired test"
        return result

    try:
        stat, p = wilcoxon(mat_means, stijl_means)
        result["test"] = "wilcoxon_signed_rank"
        result["statistic"] = float(stat)
        result["p_value"] = float(p)
        if p < 0.05:
            result["conclusion"] = (
                "Significant divergence: material and stylistic yearly means differ "
                f"(W={stat:.1f}, p={p:.4f})"
            )
        else:
            result["conclusion"] = (
                f"No significant divergence detected (W={stat:.1f}, p={p:.4f})"
            )
    except Exception as e:
        result["test"] = "wilcoxon_error"
        result["statistic"] = float("nan")
        result["p_value"] = float("nan")
        result["conclusion"] = f"Test failed: {e}"

    return result


def compute_temporal_correlations(summary: dict[int, dict[str, Any]]) -> dict[str, Any]:
    """Analyze whether the per-year stijl-material correlation changes over time."""
    years = sorted(summary.keys())
    pre_years = [y for y in years if y < BREAK_YEAR]
    post_years = [y for y in years if y >= BREAK_YEAR]

    pre_rs = [summary[y].get("r_stijl_mat", float("nan")) for y in pre_years]
    post_rs = [summary[y].get("r_stijl_mat", float("nan")) for y in post_years]

    pre_rs_valid = [r for r in pre_rs if not np.isnan(r)]
    post_rs_valid = [r for r in post_rs if not np.isnan(r)]

    result: dict[str, Any] = {
        "pre_years": pre_years,
        "post_years": post_years,
        "pre_mean_r": float(np.mean(pre_rs_valid)) if pre_rs_valid else float("nan"),
        "post_mean_r": float(np.mean(post_rs_valid)) if post_rs_valid else float("nan"),
        "pre_correlations": {str(y): summary[y].get("r_stijl_mat", float("nan")) for y in pre_years},
        "post_correlations": {str(y): summary[y].get("r_stijl_mat", float("nan")) for y in post_years},
    }

    if len(pre_rs_valid) >= 2 and len(post_rs_valid) >= 2:
        from scipy.stats import mannwhitneyu
        try:
            u, p = mannwhitneyu(pre_rs_valid, post_rs_valid, alternative="two-sided")
            result["mannwhitney_u"] = float(u)
            result["mannwhitney_p"] = float(p)
            if p < 0.05:
                result["correlation_change"] = (
                    f"Significant change in stijl-material correlation pre vs post-2024 "
                    f"(U={u:.1f}, p={p:.4f})"
                )
            else:
                result["correlation_change"] = (
                    f"No significant change in stijl-material correlation (U={u:.1f}, p={p:.4f})"
                )
        except Exception:
            result["mannwhitney_u"] = float("nan")
            result["mannwhitney_p"] = float("nan")
            result["correlation_change"] = "Insufficient valid data for comparison"
    else:
        result["mannwhitney_u"] = float("nan")
        result["mannwhitney_p"] = float("nan")
        result["correlation_change"] = "Insufficient valid data for pre/post comparison"

    return result


def create_figure(
    summary: dict[int, dict[str, Any]],
    all_summary: dict[int, dict[str, Any]],
) -> str:
    """Generate the 2D extremity temporal figure with 4 panels."""
    years = sorted(summary.keys())
    years_arr = np.array(years)

    def _val(yr, key):
        return summary[yr].get(key, float("nan"))

    def _all_val(yr, key):
        return all_summary[yr].get(key, float("nan")) if yr in all_summary else float("nan")

    stijl_means = np.array([_val(y, "mean_stijl") for y in years])
    mat_means = np.array([_val(y, "mean_materieel") for y in years])
    text_means = np.array([_val(y, "mean_text") for y in years])

    stijl_ci_lo = np.array([_val(y, "ci_lo_stijl") for y in years])
    stijl_ci_hi = np.array([_val(y, "ci_hi_stijl") for y in years])
    mat_ci_lo = np.array([_val(y, "ci_lo_materieel") for y in years])
    mat_ci_hi = np.array([_val(y, "ci_hi_materieel") for y in years])

    mig_stijl = np.array([_val(y, "mean_mig_stijl") for y in years])
    mig_mat = np.array([_val(y, "mean_mig_materieel") for y in years])
    non_mig_stijl = np.array([_val(y, "mean_non_mig_stijl") for y in years])
    non_mig_mat = np.array([_val(y, "mean_non_mig_materieel") for y in years])

    gaps = np.array([_val(y, "gap") for y in years])
    gaps_mig = np.array([_val(y, "gap_mig") for y in years])
    gaps_non_mig = np.array([_val(y, "gap_non_mig") for y in years])

    rs = np.array([_val(y, "r_stijl_mat") for y in years])
    ns = np.array([_val(y, "n_stijl") for y in years])

    # Gravity data
    ge3_stijl = np.array([_val(y, "ge3_mean_stijl") for y in years])
    ge3_mat = np.array([_val(y, "ge3_mean_materieel") for y in years])
    ge4_stijl = np.array([_val(y, "ge4_mean_stijl") for y in years])
    ge4_mat = np.array([_val(y, "ge4_mean_materieel") for y in years])

    # All-motion data
    all_stijl = np.array([_all_val(y, "mean_stijl") for y in years])
    all_mat = np.array([_all_val(y, "mean_materieel") for y in years])

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(14, 18), sharex=True)

    colour_stijl = "#E53935"
    colour_mat = "#1E88E5"
    colour_text = "#9E9E9E"
    colour_ge3 = "#F9A825"
    colour_ge4 = "#E65100"

    # Panel 1: Yearly means with CIs + gravity-weighted trends
    mask_stijl = ~np.isnan(stijl_means)
    mask_mat = ~np.isnan(mat_means)
    mask_text = ~np.isnan(text_means)

    ax1.fill_between(
        years_arr[mask_stijl],
        stijl_ci_lo[mask_stijl],
        stijl_ci_hi[mask_stijl],
        alpha=0.12,
        color=colour_stijl,
    )
    ax1.fill_between(
        years_arr[mask_mat],
        mat_ci_lo[mask_mat],
        mat_ci_hi[mask_mat],
        alpha=0.12,
        color=colour_mat,
    )

    ax1.plot(years_arr[mask_stijl], stijl_means[mask_stijl],
             marker="o", color=colour_stijl, linewidth=2, label="Stylistic extremity (all RW)")
    ax1.plot(years_arr[mask_mat], mat_means[mask_mat],
             marker="s", color=colour_mat, linewidth=2, label="Material impact (all RW)")
    ax1.plot(years_arr[mask_text], text_means[mask_text],
             marker="^", color=colour_text, linewidth=1.5, linestyle="--", alpha=0.7,
             label="Original single-score")

    # Gravity-weighted lines on Panel 1
    mask_ge3_stijl = ~np.isnan(ge3_stijl)
    mask_ge3_mat = ~np.isnan(ge3_mat)
    mask_ge4_stijl = ~np.isnan(ge4_stijl)
    mask_ge4_mat = ~np.isnan(ge4_mat)

    ax1.plot(years_arr[mask_ge3_mat], ge3_mat[mask_ge3_mat],
             marker="s", color=colour_ge3, linewidth=1.5, linestyle="--", alpha=0.8,
             label="Material impact (M≥3)")
    ax1.plot(years_arr[mask_ge4_mat], ge4_mat[mask_ge4_mat],
             marker="s", color=colour_ge4, linewidth=1.5, linestyle=":", alpha=0.8,
             label="Material impact (M≥4)")

    ax1.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
    ax1.annotate("2024", xy=(BREAK_YEAR - 0.3, ax1.get_ylim()[1] * 0.95),
                 fontsize=9, color="black", alpha=0.7)

    for i, (xi, n) in enumerate(zip(years_arr, ns)):
        if not np.isnan(n) and n < CONFIDENCE_N_MIN:
            y_pos = 1.05
            ax1.annotate(f"n={int(n)}", xy=(xi, y_pos), fontsize=6,
                         color="grey", alpha=0.5, ha="center", va="bottom")

    ax1.set_ylabel("Mean score (1-5 scale)")
    ax1.set_title("2D Extremity Temporal Decomposition: Stylistic vs Material Impact Over Time", fontweight="bold")
    ax1.legend(loc="upper left", fontsize=8)
    ax1.set_ylim(0.5, 5.5)
    ax1.grid(True, alpha=0.3)

    # Panel 2: Gap trajectory (material - stylistic)
    mask_gap = ~np.isnan(gaps)
    ax2.plot(years_arr[mask_gap], gaps[mask_gap],
             marker="D", color="#FF8F00", linewidth=2, label="All domains")
    mask_gap_mig = ~np.isnan(gaps_mig)
    ax2.plot(years_arr[mask_gap_mig], gaps_mig[mask_gap_mig],
             marker="^", color=colour_stijl, linewidth=1.5, linestyle=":", label="Migration")
    mask_gap_nm = ~np.isnan(gaps_non_mig)
    ax2.plot(years_arr[mask_gap_nm], gaps_non_mig[mask_gap_nm],
             marker="v", color=colour_mat, linewidth=1.5, linestyle="-.", label="Non-migration")

    ax2.axhline(y=0, color="black", linestyle="--", alpha=0.3, linewidth=1)
    ax2.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
    ax2.annotate("2024", xy=(BREAK_YEAR - 0.3, ax2.get_ylim()[1] * 0.95),
                 fontsize=9, color="black", alpha=0.7)

    ax2.set_ylabel("Gap (material - stylistic)")
    ax2.set_title("Divergence Gap: Material Impact Minus Stylistic Extremity Over Time", fontweight="bold")
    ax2.legend(loc="upper left", fontsize=8)
    ax2.grid(True, alpha=0.3)

    # Panel 3: Stijl-materieel correlation over time
    mask_rs = ~np.isnan(rs)
    ax3.bar(years_arr[mask_rs], rs[mask_rs], color="#6A1B9A", alpha=0.85, edgecolor="white")
    ax3.axhline(y=0, color="black", linestyle="--", alpha=0.3, linewidth=1)
    ax3.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
    ax3.annotate("2024", xy=(BREAK_YEAR - 0.3, ax3.get_ylim()[1] * 0.95),
                 fontsize=9, color="black", alpha=0.7)

    for xi, r_val, n_val in zip(years_arr[mask_rs], rs[mask_rs], ns[mask_rs]):
        if not np.isnan(r_val):
            ax3.annotate(f"r={r_val:.2f}\nn={int(n_val)}", xy=(xi, r_val),
                         fontsize=7, ha="center", va="bottom", color="#4A148C")

    ax3.set_ylabel("Pearson r (stijl, materieel)")
    ax3.set_title("Per-Year Correlation: Stylistic vs Material Impact", fontweight="bold")
    ax3.grid(True, alpha=0.3, axis="y")

    # Panel 4: All-motion vs right-wing comparison
    mask_all_stijl = ~np.isnan(all_stijl)
    mask_all_mat = ~np.isnan(all_mat)

    ax4.plot(years_arr[mask_stijl], stijl_means[mask_stijl],
             marker="o", color=colour_stijl, linewidth=2, label="RW Stylistic")
    ax4.plot(years_arr[mask_mat], mat_means[mask_mat],
             marker="s", color=colour_mat, linewidth=2, label="RW Material")
    ax4.plot(years_arr[mask_all_stijl], all_stijl[mask_all_stijl],
             marker="o", color=colour_stijl, linewidth=1.5, linestyle="--", alpha=0.6,
             label="All-motion Stylistic")
    ax4.plot(years_arr[mask_all_mat], all_mat[mask_all_mat],
             marker="s", color=colour_mat, linewidth=1.5, linestyle="--", alpha=0.6,
             label="All-motion Material")

    ax4.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
    ax4.annotate("2024", xy=(BREAK_YEAR - 0.3, ax4.get_ylim()[1] * 0.95),
                 fontsize=9, color="black", alpha=0.7)

    ax4.set_xlabel("Year")
    ax4.set_ylabel("Mean score (1-5 scale)")
    ax4.set_title("All-Motion vs Right-Wing: Stylistic and Material Extremity", fontweight="bold")
    ax4.legend(loc="upper left", fontsize=8)
    ax4.grid(True, alpha=0.3)

    ax1.set_xticks(years_arr)
    ax2.set_xticks(years_arr)
    ax3.set_xticks(years_arr)
    ax4.set_xticks(years_arr)
    ax4.set_xticklabels([str(y) for y in years], rotation=45)
    ax1.tick_params(labelbottom=False)
    ax2.tick_params(labelbottom=False)
    ax3.tick_params(labelbottom=False)

    plt.tight_layout()
    path = str(REPORTS_DIR / "extremity_2d_temporal_figure.png")
    fig.savefig(path, dpi=150, bbox_inches="tight")
    plt.close(fig)
    logger.info("Saved figure to %s", path)
    return path


def generate_report(
    summary: dict[int, dict[str, Any]],
    divergence: dict[str, Any],
    temporal_corr: dict[str, Any],
    yearly: dict[int, dict[str, list[float]]],
    fig_path: str,
    all_summary: dict[int, dict[str, Any]],
) -> str:
    """Write the markdown report."""
    years = sorted(summary.keys())

    def fmt(val, precision=3):
        if val is None or (isinstance(val, float) and np.isnan(val)):
            return "N/A"
        return f"{val:.{precision}f}"

    def flag_n(year, key_prefix):
        n_key = f"{key_prefix}n_stijl"
        n = summary[year].get(n_key, 0)
        return " *" if n < CONFIDENCE_N_MIN else ""

    # Yearly means table
    table_header = (
        "| Year | N | Stylistic | Material | Text (orig) | Gap (M-S) | "
        "N Mig | Styl Mig | Mat Mig | N Non-Mig | Styl NM | Mat NM | r(stijl,mat) |"
    )
    table_sep = (
        "|------|---|-----------|----------|-------------|-----------|"
        "-------|----------|---------|-----------|----------|---------|---------------|"
    )
    table_rows = []
    for y in years:
        s = summary[y]
        row = (
            f"| {y}{flag_n(y, '')} "
            f"| {int(s.get('n_stijl', 0))} "
            f"| {fmt(s.get('mean_stijl'))} "
            f"| {fmt(s.get('mean_materieel'))} "
            f"| {fmt(s.get('mean_text'))} "
            f"| {fmt(s.get('gap'))} "
            f"| {int(s.get('mig_n_stijl', 0))} "
            f"| {fmt(s.get('mig_mean_stijl'))} "
            f"| {fmt(s.get('mig_mean_materieel'))} "
            f"| {int(s.get('non_mig_n_stijl', 0))} "
            f"| {fmt(s.get('non_mig_mean_stijl'))} "
            f"| {fmt(s.get('non_mig_mean_materieel'))} "
            f"| {fmt(s.get('r_stijl_mat'))} |"
        )
        table_rows.append(row)

    # Pre/post means
    pre_years = [y for y in years if y < BREAK_YEAR]
    post_years = [y for y in years if y >= BREAK_YEAR]

    def pre_post_means(key):
        pre = [summary[y].get(key, float("nan")) for y in pre_years]
        post = [summary[y].get(key, float("nan")) for y in post_years]
        pre_valid = [v for v in pre if not np.isnan(v)]
        post_valid = [v for v in post if not np.isnan(v)]
        return (np.mean(pre_valid) if pre_valid else float("nan"),
                np.mean(post_valid) if post_valid else float("nan"))

    pre_stijl, post_stijl = pre_post_means("mean_stijl")
    pre_mat, post_mat = pre_post_means("mean_materieel")
    pre_text, post_text = pre_post_means("mean_text")
    pre_gap, post_gap = pre_post_means("gap")

    # Divergence test text
    div_text = f"**Test:** {divergence.get('test', 'N/A')}\n\n"
    div_text += f"**Statistic:** {divergence.get('statistic', 'N/A')}\n\n"
    div_text += f"**p-value:** {divergence.get('p_value', 'N/A')}\n\n"
    div_text += f"**N yearly pairs:** {divergence.get('n_years', 'N/A')}\n\n"
    div_text += f"**Conclusion:** {divergence.get('conclusion', 'N/A')}"

    # Correlation change text
    corr_text = f"**Pre-2024 mean r(stijl,mat):** {fmt(temporal_corr.get('pre_mean_r', float('nan')))}\n\n"
    corr_text += f"**Post-2024 mean r(stijl,mat):** {fmt(temporal_corr.get('post_mean_r', float('nan')))}\n\n"
    corr_text += f"**Change test (Mann-Whitney):** U={fmt(temporal_corr.get('mannwhitney_u', float('nan')))}"
    corr_text += f", p={fmt(temporal_corr.get('mannwhitney_p', float('nan')))}\n\n"
    corr_text += f"**Interpretation:** {temporal_corr.get('correlation_change', 'N/A')}"

    # Overall correlation (all data pooled)
    all_stijl = []
    all_mat = []
    for y in years:
        all_stijl.extend(yearly[y]["stijl"])
        all_mat.extend(yearly[y]["materieel"])
    overall_r, overall_p = pearsonr(all_stijl, all_mat) if len(all_stijl) >= 3 else (float("nan"), float("nan"))

    # Migration domain correlations
    all_mig_stijl, all_mig_mat = [], []
    all_nm_stijl, all_nm_mat = [], []
    for y in years:
        all_mig_stijl.extend(yearly[y]["mig_stijl"])
        all_mig_mat.extend(yearly[y]["mig_materieel"])
        all_nm_stijl.extend(yearly[y]["non_mig_stijl"])
        all_nm_mat.extend(yearly[y]["non_mig_materieel"])
    mig_r, mig_p = pearsonr(all_mig_stijl, all_mig_mat) if len(all_mig_stijl) >= 3 else (float("nan"), float("nan"))
    nm_r, nm_p = pearsonr(all_nm_stijl, all_nm_mat) if len(all_nm_stijl) >= 3 else (float("nan"), float("nan"))

    # Gravity-weighted means (pre/post)
    def pre_post_all_gap(key):
        pre = [all_summary[y].get(key, float("nan")) for y in pre_years if y in all_summary]
        post = [all_summary[y].get(key, float("nan")) for y in post_years if y in all_summary]
        pre_valid = [v for v in pre if not np.isnan(v)]
        post_valid = [v for v in post if not np.isnan(v)]
        return (np.mean(pre_valid) if pre_valid else float("nan"),
                np.mean(post_valid) if post_valid else float("nan"))

    pre_all_stijl, post_all_stijl = pre_post_all_gap("mean_stijl")
    pre_all_mat, post_all_mat = pre_post_all_gap("mean_materieel")

    # Gravity-weighted means for right-wing
    def pre_post_ge(key):
        pre = [summary[y].get(key, float("nan")) for y in pre_years]
        post = [summary[y].get(key, float("nan")) for y in post_years]
        pre_valid = [v for v in pre if not np.isnan(v)]
        post_valid = [v for v in post if not np.isnan(v)]
        return (np.mean(pre_valid) if pre_valid else float("nan"),
                np.mean(post_valid) if post_valid else float("nan"))

    pre_ge3_stijl, post_ge3_stijl = pre_post_ge("ge3_mean_stijl")
    pre_ge3_mat, post_ge3_mat = pre_post_ge("ge3_mean_materieel")
    pre_ge4_stijl, post_ge4_stijl = pre_post_ge("ge4_mean_stijl")
    pre_ge4_mat, post_ge4_mat = pre_post_ge("ge4_mean_materieel")

    lines = [
        "# 2D Extremity Temporal Decomposition",
        "",
        "**Goal:** Test whether the \"flat single-dimension trend\" masks diverging trajectories",
        "when stylistic and material extremity scores are analyzed separately over time.",
        "",
        "**Analysis period:** 2016-2026",
        "**Data source (right-wing):** `extremity_scores_2d` (2,869 motions scored) joined with `right_wing_motions`",
        "**Data source (all motions):** `extremity_scores_all` (29,570 motions scored) joined with `motions`",
        "**Domains:** Migration = `asiel/vreemdelingen`; Non-migration = all other categories",
        "",
        "> *Years with <50 scored motions are flagged for low confidence.",
        "",
        "---",
        "",
        "## 1. Key Findings",
        "",
        f"**Overall correlation r(stijl, materieel):** {fmt(overall_r)} (p={fmt(overall_p, 6)})",
        f"**Migration domain r(stijl, materieel):** {fmt(mig_r)} (p={fmt(mig_p, 6)}, n={len(all_mig_stijl)})",
        f"**Non-migration domain r(stijl, materieel):** {fmt(nm_r)} (p={fmt(nm_p, 6)}, n={len(all_nm_stijl)})",
        "",
        "---",
        "",
        "## 2. Pre/Post 2024 Comparison",
        "",
        f"| Dimension | Pre-2024 Mean | Post-2024 Mean | Δ |",
        f"|-----------|--------------|---------------|-----|",
        f"| Stylistic extremity | {fmt(pre_stijl)} | {fmt(post_stijl)} | {fmt(post_stijl - pre_stijl if not np.isnan(pre_stijl) and not np.isnan(post_stijl) else float('nan'))} |",
        f"| Material impact | {fmt(pre_mat)} | {fmt(post_mat)} | {fmt(post_mat - pre_mat if not np.isnan(pre_mat) and not np.isnan(post_mat) else float('nan'))} |",
        f"| Text score (original) | {fmt(pre_text)} | {fmt(post_text)} | {fmt(post_text - pre_text if not np.isnan(pre_text) and not np.isnan(post_text) else float('nan'))} |",
        f"| Gap (M-S) | {fmt(pre_gap)} | {fmt(post_gap)} | {fmt(post_gap - pre_gap if not np.isnan(pre_gap) and not np.isnan(post_gap) else float('nan'))} |",
        "",
        "---",
        "",
        "## 3. Yearly Data Table",
        "",
        table_header,
        table_sep,
        *table_rows,
        "",
        "> * Years with <50 scored motions; confidence intervals are wider or N/A.",
        "",
        "---",
        "",
        "## 4. Divergence Test (Wilcoxon Signed-Rank)",
        "",
        div_text,
        "",
        "The Wilcoxon signed-rank test compares yearly mean stylistic vs yearly mean material scores.",
        "A significant result (p < 0.05) indicates the two dimensions systematically differ,",
        "meaning the flat single-dimension trend masks a genuine divergence between stylistic",
        "and material extremity.",
        "",
        "---",
        "",
        "## 5. Per-Year Correlation Analysis",
        "",
        "| Year | r(stijl,mat) | p | N | Domain |",
        "|------|--------------|---|---|--------|",
    ]

    for y in years:
        s = summary[y]
        r_val = s.get("r_stijl_mat", float("nan"))
        p_val = s.get("p_stijl_mat", float("nan"))
        n_val = s.get("n_stijl", 0)
        r_mig_val = s.get("r_mig_stijl_mat", float("nan"))
        p_mig_val = s.get("p_mig_stijl_mat", float("nan"))
        n_mig_val = s.get("mig_n_stijl", 0)
        r_nm_val = s.get("r_non_mig_stijl_mat", float("nan"))
        p_nm_val = s.get("p_non_mig_stijl_mat", float("nan"))
        n_nm_val = s.get("non_mig_n_stijl", 0)

        lines.append(
            f"| {y} | {fmt(r_val)} | {fmt(p_val, 6)} | {int(n_val)} | All |"
        )
        if not np.isnan(r_mig_val):
            lines.append(
                f"| | {fmt(r_mig_val)} | {fmt(p_mig_val, 6)} | {int(n_mig_val)} | Migration |"
            )
        if not np.isnan(r_nm_val):
            lines.append(
                f"| | {fmt(r_nm_val)} | {fmt(p_nm_val, 6)} | {int(n_nm_val)} | Non-migration |"
            )

    lines += [
        "",
        "---",
        "",
        "## 6. Correlation Change Pre vs Post 2024",
        "",
        corr_text,
        "",
        "A significant change in the per-year stijl-material correlation would suggest",
        "that the relationship between the two dimensions itself shifted across the break period —",
        "e.g., if right-wing parties post-2024 began moderating style while maintaining material",
        "impact, the correlation would decrease.",
        "",
        "---",
        "",
        "## 7. Gap Trajectory Interpretation",
        "",
        f"- **Pre-2024 mean gap:** {fmt(pre_gap)}",
        f"- **Post-2024 mean gap:** {fmt(post_gap)}",
        f"- **Gap change:** {fmt(post_gap - pre_gap if not np.isnan(pre_gap) and not np.isnan(post_gap) else float('nan'))}",
        "",
        "A widening gap (increasing material > stylistic) would indicate that right-wing motions",
        "became less stylistically extreme but maintained or increased their material impact —",
        "consistent with the 'strategic moderation of rhetoric' hypothesis.",
        "",
        "A narrowing gap would suggest that stylistic and material dimensions are converging,",
        "meaning the distinctions between the two become less meaningful over time.",
        "",
        "A stable gap suggests the two dimensions move in parallel, and the flat single-dimension",
        "trend is an accurate summary (no masked divergence).",
        "",
        "---",
        "",
        "## 8. Domain Stratification",
        "",
        "| Domain | Pre Mean Stijl | Pre Mean Mat | Post Mean Stijl | Post Mean Mat | Pre Gap | Post Gap | Pre r | Post r |",
        "|--------|---------------|-------------|----------------|---------------|---------|----------|-------|--------|",
    ]

    for domain_name, prefix in [("Migration", "mig_"), ("Non-migration", "non_mig_")]:
        def _nanmean(vals):
            valid = [v for v in vals if not np.isnan(v)]
            return float(np.mean(valid)) if valid else float("nan")
        pre_s = _nanmean([summary[y].get(f"{prefix}mean_stijl", float("nan")) for y in pre_years])
        pre_m = _nanmean([summary[y].get(f"{prefix}mean_materieel", float("nan")) for y in pre_years])
        post_s = _nanmean([summary[y].get(f"{prefix}mean_stijl", float("nan")) for y in post_years])
        post_m = _nanmean([summary[y].get(f"{prefix}mean_materieel", float("nan")) for y in post_years])
        pre_g = pre_m - pre_s if not np.isnan(pre_s) and not np.isnan(pre_m) else float("nan")
        post_g = post_m - post_s if not np.isnan(post_s) and not np.isnan(post_m) else float("nan")

        pre_r_list = [summary[y].get(f"r_{prefix}stijl_mat", float("nan")) for y in pre_years]
        post_r_list = [summary[y].get(f"r_{prefix}stijl_mat", float("nan")) for y in post_years]
        pre_r_mean = np.nanmean(pre_r_list) if any(not np.isnan(v) for v in pre_r_list) else float("nan")
        post_r_mean = np.nanmean(post_r_list) if any(not np.isnan(v) for v in post_r_list) else float("nan")

        lines.append(
            f"| {domain_name} | {fmt(pre_s)} | {fmt(pre_m)} | {fmt(post_s)} | {fmt(post_m)} | "
            f"{fmt(pre_g)} | {fmt(post_g)} | {fmt(pre_r_mean)} | {fmt(post_r_mean)} |"
        )

    lines += [
        "",
        "---",
        "",
        "## 9. Gravity-Weighted Trends (Right-Wing)",
        "",
        "Yearly means for right-wing motions filtered by material impact thresholds.",
        "M≥3 = motions with substantive material impact (score ≥ 3).",
        "M≥4 = motions with fundamental material impact (score ≥ 4).",
        "",
        "| Year | N (all RW) | M≥3 N | M≥4 N | Stijl (all) | Stijl M≥3 | Stijl M≥4 | Mat (all) | Mat M≥3 | Mat M≥4 |",
        "|------|-----------|-------|-------|-------------|-----------|-----------|-----------|---------|---------|",
    ]

    for y in years:
        s = summary[y]
        lines.append(
            f"| {y} "
            f"| {int(s.get('n_stijl', 0))} "
            f"| {int(s.get('ge3_n_stijl', 0))} "
            f"| {int(s.get('ge4_n_stijl', 0))} "
            f"| {fmt(s.get('mean_stijl'))} "
            f"| {fmt(s.get('ge3_mean_stijl'))} "
            f"| {fmt(s.get('ge4_mean_stijl'))} "
            f"| {fmt(s.get('mean_materieel'))} "
            f"| {fmt(s.get('ge3_mean_materieel'))} "
            f"| {fmt(s.get('ge4_mean_materieel'))} |"
        )

    lines += [
        "",
        "| Bucket | Pre-2024 Mean Stijl | Pre-2024 Mean Mat | Post-2024 Mean Stijl | Post-2024 Mean Mat |",
        "|--------|-------------------|-------------------|---------------------|-------------------|",
        f"| All RW | {fmt(pre_stijl)} | {fmt(pre_mat)} | {fmt(post_stijl)} | {fmt(post_mat)} |",
        f"| M≥3 | {fmt(pre_ge3_stijl)} | {fmt(pre_ge3_mat)} | {fmt(post_ge3_stijl)} | {fmt(post_ge3_mat)} |",
        f"| M≥4 | {fmt(pre_ge4_stijl)} | {fmt(pre_ge4_mat)} | {fmt(post_ge4_stijl)} | {fmt(post_ge4_mat)} |",
        "",
        "---",
        "",
        "## 10. Figure",
        "",
        f"![2D Extremity Temporal Figure]({Path(fig_path).name})",
        "",
        "**Figure panels:**",
        "- **Top panel:** Yearly mean stylistic (red) and material (blue) extremity scores with",
        "  95% bootstrap confidence intervals. Grey dashed line = original single-dimension",
        "  `text_score` for comparison. Gold/orange lines show material impact for M≥3 and M≥4 subsets.",
        "- **Second panel:** Gap trajectory (material minus stylistic) for all domains, migration,",
        "  and non-migration. Positive gap = material impact exceeds stylistic extremity.",
        "  A widening gap indicates increasing divergence between dimensions.",
        "- **Third panel:** Per-year Pearson correlation between stylistic and material scores.",
        "  Declining correlation over time suggests the two dimensions are decoupling.",
        "- **Fourth panel:** All-motion (dashed) vs right-wing (solid) comparison for both stylistic",
        "  and material dimensions. Shows how right-wing trends compare to the full motion landscape.",
        "",
        "---",
        "",
        "## 11. Limitations",
        "",
        "- **Yearly resolution:** Year-level aggregation necessarily smooths within-year trends.",
        "  The quarterly framework from U1 provides finer resolution for other metrics.",
        "- **Low-N years:** Some years (especially 2016-2018 and 2026) have fewer than 50 scored",
        "  motions, reducing confidence in those yearly means.",
        "- **2D scores are LLM-generated:** The `stijl_extremiteit` and `materiele_impact` scores",
        "  come from LLM-based assessment and may contain systematic biases.",
        "- **Correlation vs causation:** Per-year correlations describe association, not causation.",
        "  A declining correlation could reflect scoring drift rather than genuine decoupling.",
        "- **Domain imbalance:** Migration-domain motions are a minority of all right-wing motions,",
        "  so domain-stratified analyses have lower statistical power.",
        "",
        "---",
        "",
        "## 12. All-Motion Comparison",
        "",
        "Yearly means for ALL motions (from `extremity_scores_all`) compared to right-wing-only means.",
        "This provides context for whether right-wing trends reflect party-specific dynamics or broader",
        "parliamentary trends.",
        "",
        "| Year | N (all) | All Stijl | All Mat | N (RW) | RW Stijl | RW Mat | Diff Stijl | Diff Mat |",
        "|------|---------|-----------|---------|--------|----------|--------|------------|----------|",
    ]

    for y in years:
        s = summary[y]
        a = all_summary.get(y, {})
        all_n = int(a.get("n_stijl", 0))
        all_s = fmt(a.get("mean_stijl"))
        all_m = fmt(a.get("mean_materieel"))
        rw_n = int(s.get("n_stijl", 0))
        rw_s = fmt(s.get("mean_stijl"))
        rw_m = fmt(s.get("mean_materieel"))
        diff_s = fmt(s.get("mean_stijl", float("nan")) - a.get("mean_stijl", float("nan")) if not np.isnan(s.get("mean_stijl", float("nan"))) and not np.isnan(a.get("mean_stijl", float("nan"))) else float("nan"))
        diff_m = fmt(s.get("mean_materieel", float("nan")) - a.get("mean_materieel", float("nan")) if not np.isnan(s.get("mean_materieel", float("nan"))) and not np.isnan(a.get("mean_materieel", float("nan"))) else float("nan"))
        lines.append(
            f"| {y} | {all_n} | {all_s} | {all_m} | {rw_n} | {rw_s} | {rw_m} | {diff_s} | {diff_m} |"
        )

    # Pre/post for all-motion
    lines += [
        "",
        "| Period | All Stijl | All Mat | RW Stijl | RW Mat | Stijl Δ | Mat Δ |",
        "|--------|-----------|---------|----------|--------|---------|-------|",
        f"| Pre-2024 | {fmt(pre_all_stijl)} | {fmt(pre_all_mat)} | {fmt(pre_stijl)} | {fmt(pre_mat)} | {fmt(pre_stijl - pre_all_stijl if not np.isnan(pre_stijl) and not np.isnan(pre_all_stijl) else float('nan'))} | {fmt(pre_mat - pre_all_mat if not np.isnan(pre_mat) and not np.isnan(pre_all_mat) else float('nan'))} |",
        f"| Post-2024 | {fmt(post_all_stijl)} | {fmt(post_all_mat)} | {fmt(post_stijl)} | {fmt(post_mat)} | {fmt(post_stijl - post_all_stijl if not np.isnan(post_stijl) and not np.isnan(post_all_stijl) else float('nan'))} | {fmt(post_mat - post_all_mat if not np.isnan(post_mat) and not np.isnan(post_all_mat) else float('nan'))} |",
        "",
    ]

    lines += [
        "---",
        "",
        "## 13. Conclusion",
        "",
        f"The overall stijl-materieel correlation is r={fmt(overall_r)} (p={fmt(overall_p, 6)}),",
        "consistent with the aggregate finding of r≈0.47.",
        "",
        f"The divergence test ({divergence.get('test', 'N/A')}) "
        f"{'found' if divergence.get('p_value', 1) is not None and not np.isnan(divergence.get('p_value', float('nan'))) and divergence.get('p_value', 1) < 0.05 else 'did not find'} "
        f"significant systematic divergence between stylistic and material yearly means "
        f"(p={fmt(divergence.get('p_value', float('nan')))}).",
        "",
        f"The pre/post correlation change analysis {temporal_corr.get('correlation_change', 'could not be performed').lower()}.",
        "",
        f"The gap (material minus stylistic) {'widened' if not np.isnan(post_gap) and not np.isnan(pre_gap) and post_gap > pre_gap else 'narrowed'} "
        f"from {fmt(pre_gap)} pre-2024 to {fmt(post_gap)} post-2024.",
    ]

    report_path = REPORTS_DIR / "extremity_2d_temporal.md"
    with open(report_path, "w") as f:
        f.write("\n".join(lines))
    logger.info("Report written to %s", report_path)
    return str(report_path)


def main() -> int:
    logger.info("Connecting to database: %s", DB_PATH)
    con = duckdb.connect(DB_PATH, read_only=True)

    logger.info("Fetching 2D extremity data by year...")
    yearly = fetch_2d_yearly_data(con)

    total_motions = sum(len(yearly[y]["stijl"]) for y in yearly)
    logger.info("Fetched %d scored motions across %d years", total_motions, len(yearly))

    logger.info("Fetching all-motion extremity data...")
    all_yearly = fetch_all_motion_yearly(con)

    con.close()

    logger.info("Computing yearly summary statistics...")
    summary = compute_yearly_summary(yearly)

    logger.info("Computing all-motion yearly summary...")
    all_summary = compute_all_motion_summary(all_yearly)

    logger.info("Running divergence test (Wilcoxon)...")
    divergence = compute_divergence_test(yearly)

    logger.info("Computing temporal correlation changes...")
    temporal_corr = compute_temporal_correlations(summary)

    logger.info("Generating figure...")
    fig_path = create_figure(summary, all_summary)

    logger.info("Generating report...")
    report_path = generate_report(summary, divergence, temporal_corr, yearly, fig_path, all_summary)

    print(f"\nReport: {report_path}")
    print(f"Figure: {fig_path}")
    print(f"\nDivergence test: {divergence.get('conclusion', 'N/A')}")
    print(f"Temporal correlation: {temporal_corr.get('correlation_change', 'N/A')}")

    return 0


if __name__ == "__main__":
    raise SystemExit(main())