motief/analysis/right_wing/extremity_2d_temporal.py

#!/usr/bin/env python3
"""U2: 2D Extremity Temporal Decomposition.

Tests whether the "flat single-dimension trend" masks diverging trajectories
when stylistic and material extremity scores are analyzed separately over time.

Usage:
    uv run python analysis/right_wing/extremity_2d_temporal.py

Output:
    reports/overton_window/extremity_2d_temporal.md
    reports/overton_window/extremity_2d_temporal_figure.png
"""

from __future__ import annotations

import logging
import sys
from pathlib import Path
from typing import Any

import duckdb
import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr, wilcoxon

ROOT = Path(__file__).parent.parent.parent.resolve()
sys.path.insert(0, str(ROOT))

DB_PATH = str(ROOT / "data" / "motions.db")
REPORTS_DIR = ROOT / "reports" / "overton_window"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)

YEAR_MIN, YEAR_MAX = 2016, 2026
BREAK_YEAR = 2024
CONFIDENCE_N_MIN = 50


def fetch_2d_yearly_data(con: duckdb.DuckDBPyConnection) -> dict[int, dict[str, list[float]]]:
    """Join extremity_scores_2d with right_wing_motions to get yearly scores.

    Returns dict keyed by year, each containing lists of stylistic, material,
    and original text_score values.
    """
    rows = con.execute("""
        SELECT
            r.year,
            e2d.stijl_extremiteit,
            e2d.materiele_impact,
            e.text_score,
            r.category
        FROM extremity_scores_2d e2d
        JOIN right_wing_motions r ON e2d.motion_id = r.motion_id
        LEFT JOIN extremity_scores e ON e2d.motion_id = e.motion_id
        WHERE r.classified = TRUE
          AND r.year IS NOT NULL
        ORDER BY r.year
    """).fetchall()

    yearly: dict[int, dict[str, list[float]]] = {}
    for year in range(YEAR_MIN, YEAR_MAX + 1):
        yearly[year] = {
            "stijl": [],
            "materieel": [],
            "text": [],
            "mig_stijl": [],
            "mig_materieel": [],
            "mig_text": [],
            "non_mig_stijl": [],
            "non_mig_materieel": [],
            "non_mig_text": [],
        }

    for year, stijl, materieel, text_score, category in rows:
        y = int(year)
        if y < YEAR_MIN or y > YEAR_MAX:
            continue
        is_mig = category == "asiel/vreemdelingen"

        if stijl is not None:
            yearly[y]["stijl"].append(float(stijl))
            (yearly[y]["mig_stijl"] if is_mig else yearly[y]["non_mig_stijl"]).append(float(stijl))
        if materieel is not None:
            yearly[y]["materieel"].append(float(materieel))
            (yearly[y]["mig_materieel"] if is_mig else yearly[y]["non_mig_materieel"]).append(float(materieel))
        if text_score is not None:
            yearly[y]["text"].append(float(text_score))
            (yearly[y]["mig_text"] if is_mig else yearly[y]["non_mig_text"]).append(float(text_score))

    return yearly


def compute_yearly_summary(
    yearly: dict[int, dict[str, list[float]]],
) -> dict[int, dict[str, Any]]:
    """Compute means, counts, SEM, and per-year stijl-materieel correlations."""
    summary: dict[int, dict[str, Any]] = {}
    rng = np.random.default_rng(42)

    for year, d in yearly.items():
        s: dict[str, Any] = {"year": year}

        for prefix, keys in [
            ("", ["stijl", "materieel", "text"]),
            ("mig_", ["mig_stijl", "mig_materieel", "mig_text"]),
            ("non_mig_", ["non_mig_stijl", "non_mig_materieel", "non_mig_text"]),
        ]:
            for key in keys:
                short = key.replace("non_mig_", "").replace("mig_", "")
                vals = np.array(d.get(key, []))
                n = len(vals)
                s[f"{prefix}n_{short}"] = n
                if n > 0:
                    s[f"{prefix}mean_{short}"] = float(np.mean(vals))
                    s[f"{prefix}std_{short}"] = float(np.std(vals, ddof=1)) if n > 1 else 0.0
                    s[f"{prefix}sem_{short}"] = float(np.std(vals, ddof=1) / np.sqrt(n)) if n > 1 else 0.0
                    if n >= 20:
                        boot_means = [
                            float(np.mean(rng.choice(vals, size=n, replace=True)))
                            for _ in range(1000)
                        ]
                        s[f"{prefix}ci_lo_{short}"] = float(np.percentile(boot_means, 2.5))
                        s[f"{prefix}ci_hi_{short}"] = float(np.percentile(boot_means, 97.5))
                    else:
                        s[f"{prefix}ci_lo_{short}"] = float("nan")
                        s[f"{prefix}ci_hi_{short}"] = float("nan")
                else:
                    s[f"{prefix}mean_{short}"] = float("nan")
                    s[f"{prefix}std_{short}"] = float("nan")
                    s[f"{prefix}sem_{short}"] = float("nan")
                    s[f"{prefix}ci_lo_{short}"] = float("nan")
                    s[f"{prefix}ci_hi_{short}"] = float("nan")

        # Per-year stijl-materieel correlation
        stijl_arr = np.array(d.get("stijl", []))
        mat_arr = np.array(d.get("materieel", []))
        if len(stijl_arr) >= 10 and len(mat_arr) >= 10:
            r, p = pearsonr(stijl_arr, mat_arr)
            s["r_stijl_mat"] = float(r)
            s["p_stijl_mat"] = float(p)
        else:
            s["r_stijl_mat"] = float("nan")
            s["p_stijl_mat"] = float("nan")

        # Per-year stijl-materieel correlation for migration
        mig_stijl_arr = np.array(d.get("mig_stijl", []))
        mig_mat_arr = np.array(d.get("mig_materieel", []))
        if len(mig_stijl_arr) >= 10 and len(mig_mat_arr) >= 10:
            r_mig, p_mig = pearsonr(mig_stijl_arr, mig_mat_arr)
            s["r_mig_stijl_mat"] = float(r_mig)
            s["p_mig_stijl_mat"] = float(p_mig)
        else:
            s["r_mig_stijl_mat"] = float("nan")
            s["p_mig_stijl_mat"] = float("nan")

        # Per-year stijl-materieel correlation for non-migration
        nm_stijl_arr = np.array(d.get("non_mig_stijl", []))
        nm_mat_arr = np.array(d.get("non_mig_materieel", []))
        if len(nm_stijl_arr) >= 10 and len(nm_mat_arr) >= 10:
            r_nm, p_nm = pearsonr(nm_stijl_arr, nm_mat_arr)
            s["r_non_mig_stijl_mat"] = float(r_nm)
            s["p_non_mig_stijl_mat"] = float(p_nm)
        else:
            s["r_non_mig_stijl_mat"] = float("nan")
            s["p_non_mig_stijl_mat"] = float("nan")

        # Gap (material - stylistic)
        if s.get("mean_materieel") is not None and not np.isnan(s.get("mean_materieel", float("nan"))) and \
           s.get("mean_stijl") is not None and not np.isnan(s.get("mean_stijl", float("nan"))):
            s["gap"] = s["mean_materieel"] - s["mean_stijl"]
        else:
            s["gap"] = float("nan")

        s["gap_mig"] = float("nan")
        if s.get("mean_mig_materieel") is not None and not np.isnan(s.get("mean_mig_materieel", float("nan"))) and \
           s.get("mean_mig_stijl") is not None and not np.isnan(s.get("mean_mig_stijl", float("nan"))):
            s["gap_mig"] = s["mean_mig_materieel"] - s["mean_mig_stijl"]

        s["gap_non_mig"] = float("nan")
        if s.get("mean_non_mig_materieel") is not None and not np.isnan(s.get("mean_non_mig_materieel", float("nan"))) and \
           s.get("mean_non_mig_stijl") is not None and not np.isnan(s.get("mean_non_mig_stijl", float("nan"))):
            s["gap_non_mig"] = s["mean_non_mig_materieel"] - s["mean_non_mig_stijl"]

        summary[year] = s

    return summary


def compute_divergence_test(
    yearly: dict[int, dict[str, list[float]]],
) -> dict[str, Any]:
    """Paired Wilcoxon signed-rank test on yearly (stylistic_mean, material_mean) pairs."""
    years = sorted(yearly.keys())
    stijl_means = []
    mat_means = []
    for y in years:
        svals = yearly[y]["stijl"]
        mvals = yearly[y]["materieel"]
        if len(svals) > 0 and len(mvals) > 0:
            stijl_means.append(np.mean(svals))
            mat_means.append(np.mean(mvals))

    result: dict[str, Any] = {"n_years": len(stijl_means)}

    if len(stijl_means) < 3:
        result["test"] = "insufficient_years"
        result["statistic"] = float("nan")
        result["p_value"] = float("nan")
        result["conclusion"] = "Not enough yearly data points for a paired test"
        return result

    try:
        stat, p = wilcoxon(mat_means, stijl_means)
        result["test"] = "wilcoxon_signed_rank"
        result["statistic"] = float(stat)
        result["p_value"] = float(p)
        if p < 0.05:
            result["conclusion"] = (
                "Significant divergence: material and stylistic yearly means differ "
                f"(W={stat:.1f}, p={p:.4f})"
            )
        else:
            result["conclusion"] = (
                f"No significant divergence detected (W={stat:.1f}, p={p:.4f})"
            )
    except Exception as e:
        result["test"] = "wilcoxon_error"
        result["statistic"] = float("nan")
        result["p_value"] = float("nan")
        result["conclusion"] = f"Test failed: {e}"

    return result


def compute_temporal_correlations(summary: dict[int, dict[str, Any]]) -> dict[str, Any]:
    """Analyze whether the per-year stijl-material correlation changes over time."""
    years = sorted(summary.keys())
    pre_years = [y for y in years if y < BREAK_YEAR]
    post_years = [y for y in years if y >= BREAK_YEAR]

    pre_rs = [summary[y].get("r_stijl_mat", float("nan")) for y in pre_years]
    post_rs = [summary[y].get("r_stijl_mat", float("nan")) for y in post_years]

    pre_rs_valid = [r for r in pre_rs if not np.isnan(r)]
    post_rs_valid = [r for r in post_rs if not np.isnan(r)]

    result: dict[str, Any] = {
        "pre_years": pre_years,
        "post_years": post_years,
        "pre_mean_r": float(np.mean(pre_rs_valid)) if pre_rs_valid else float("nan"),
        "post_mean_r": float(np.mean(post_rs_valid)) if post_rs_valid else float("nan"),
        "pre_correlations": {str(y): summary[y].get("r_stijl_mat", float("nan")) for y in pre_years},
        "post_correlations": {str(y): summary[y].get("r_stijl_mat", float("nan")) for y in post_years},
    }

    if len(pre_rs_valid) >= 2 and len(post_rs_valid) >= 2:
        from scipy.stats import mannwhitneyu
        try:
            u, p = mannwhitneyu(pre_rs_valid, post_rs_valid, alternative="two-sided")
            result["mannwhitney_u"] = float(u)
            result["mannwhitney_p"] = float(p)
            if p < 0.05:
                result["correlation_change"] = (
                    f"Significant change in stijl-material correlation pre vs post-2024 "
                    f"(U={u:.1f}, p={p:.4f})"
                )
            else:
                result["correlation_change"] = (
                    f"No significant change in stijl-material correlation (U={u:.1f}, p={p:.4f})"
                )
        except Exception:
            result["mannwhitney_u"] = float("nan")
            result["mannwhitney_p"] = float("nan")
            result["correlation_change"] = "Insufficient valid data for comparison"
    else:
        result["mannwhitney_u"] = float("nan")
        result["mannwhitney_p"] = float("nan")
        result["correlation_change"] = "Insufficient valid data for pre/post comparison"

    return result


def create_figure(summary: dict[int, dict[str, Any]]) -> str:
    """Generate the 2D extremity temporal figure with 3 panels."""
    years = sorted(summary.keys())
    years_arr = np.array(years)

    def _val(yr, key):
        return summary[yr].get(key, float("nan"))

    stijl_means = np.array([_val(y, "mean_stijl") for y in years])
    mat_means = np.array([_val(y, "mean_materieel") for y in years])
    text_means = np.array([_val(y, "mean_text") for y in years])

    stijl_ci_lo = np.array([_val(y, "ci_lo_stijl") for y in years])
    stijl_ci_hi = np.array([_val(y, "ci_hi_stijl") for y in years])
    mat_ci_lo = np.array([_val(y, "ci_lo_materieel") for y in years])
    mat_ci_hi = np.array([_val(y, "ci_hi_materieel") for y in years])

    mig_stijl = np.array([_val(y, "mean_mig_stijl") for y in years])
    mig_mat = np.array([_val(y, "mean_mig_materieel") for y in years])
    non_mig_stijl = np.array([_val(y, "mean_non_mig_stijl") for y in years])
    non_mig_mat = np.array([_val(y, "mean_non_mig_materieel") for y in years])

    gaps = np.array([_val(y, "gap") for y in years])
    gaps_mig = np.array([_val(y, "gap_mig") for y in years])
    gaps_non_mig = np.array([_val(y, "gap_non_mig") for y in years])

    rs = np.array([_val(y, "r_stijl_mat") for y in years])
    ns = np.array([_val(y, "n_stijl") for y in years])

    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14, 14), sharex=True)

    colour_stijl = "#E53935"
    colour_mat = "#1E88E5"
    colour_text = "#9E9E9E"

    # Panel 1: Yearly means with CIs
    mask_stijl = ~np.isnan(stijl_means)
    mask_mat = ~np.isnan(mat_means)
    mask_text = ~np.isnan(text_means)

    ax1.fill_between(
        years_arr[mask_stijl],
        stijl_ci_lo[mask_stijl],
        stijl_ci_hi[mask_stijl],
        alpha=0.12,
        color=colour_stijl,
    )
    ax1.fill_between(
        years_arr[mask_mat],
        mat_ci_lo[mask_mat],
        mat_ci_hi[mask_mat],
        alpha=0.12,
        color=colour_mat,
    )

    ax1.plot(years_arr[mask_stijl], stijl_means[mask_stijl],
             marker="o", color=colour_stijl, linewidth=2, label="Stylistic extremity")
    ax1.plot(years_arr[mask_mat], mat_means[mask_mat],
             marker="s", color=colour_mat, linewidth=2, label="Material impact")
    ax1.plot(years_arr[mask_text], text_means[mask_text],
             marker="^", color=colour_text, linewidth=1.5, linestyle="--", alpha=0.7,
             label="Original single-score")

    ax1.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
    ax1.annotate("2024", xy=(BREAK_YEAR - 0.3, ax1.get_ylim()[1] * 0.95),
                 fontsize=9, color="black", alpha=0.7)

    for i, (xi, n) in enumerate(zip(years_arr, ns)):
        if not np.isnan(n) and n < CONFIDENCE_N_MIN:
            y_pos = 1.05
            ax1.annotate(f"n={int(n)}", xy=(xi, y_pos), fontsize=6,
                         color="grey", alpha=0.5, ha="center", va="bottom")

    ax1.set_ylabel("Mean score (1-5 scale)")
    ax1.set_title("2D Extremity Temporal Decomposition: Stylistic vs Material Impact Over Time", fontweight="bold")
    ax1.legend(loc="upper left", fontsize=8)
    ax1.set_ylim(0.5, 5.5)
    ax1.grid(True, alpha=0.3)

    # Panel 2: Gap trajectory (material - stylistic)
    mask_gap = ~np.isnan(gaps)
    ax2.plot(years_arr[mask_gap], gaps[mask_gap],
             marker="D", color="#FF8F00", linewidth=2, label="All domains")
    mask_gap_mig = ~np.isnan(gaps_mig)
    ax2.plot(years_arr[mask_gap_mig], gaps_mig[mask_gap_mig],
             marker="^", color=colour_stijl, linewidth=1.5, linestyle=":", label="Migration")
    mask_gap_nm = ~np.isnan(gaps_non_mig)
    ax2.plot(years_arr[mask_gap_nm], gaps_non_mig[mask_gap_nm],
             marker="v", color=colour_mat, linewidth=1.5, linestyle="-.", label="Non-migration")

    ax2.axhline(y=0, color="black", linestyle="--", alpha=0.3, linewidth=1)
    ax2.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
    ax2.annotate("2024", xy=(BREAK_YEAR - 0.3, ax2.get_ylim()[1] * 0.95),
                 fontsize=9, color="black", alpha=0.7)

    ax2.set_ylabel("Gap (material - stylistic)")
    ax2.set_title("Divergence Gap: Material Impact Minus Stylistic Extremity Over Time", fontweight="bold")
    ax2.legend(loc="upper left", fontsize=8)
    ax2.grid(True, alpha=0.3)

    # Panel 3: Stijl-materieel correlation over time
    mask_rs = ~np.isnan(rs)
    ax3.bar(years_arr[mask_rs], rs[mask_rs], color="#6A1B9A", alpha=0.85, edgecolor="white")
    ax3.axhline(y=0, color="black", linestyle="--", alpha=0.3, linewidth=1)
    ax3.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
    ax3.annotate("2024", xy=(BREAK_YEAR - 0.3, ax3.get_ylim()[1] * 0.95),
                 fontsize=9, color="black", alpha=0.7)

    for xi, r_val, n_val in zip(years_arr[mask_rs], rs[mask_rs], ns[mask_rs]):
        if not np.isnan(r_val):
            ax3.annotate(f"r={r_val:.2f}\nn={int(n_val)}", xy=(xi, r_val),
                         fontsize=7, ha="center", va="bottom", color="#4A148C")

    ax3.set_xlabel("Year")
    ax3.set_ylabel("Pearson r (stijl, materieel)")
    ax3.set_title("Per-Year Correlation: Stylistic vs Material Impact", fontweight="bold")
    ax3.grid(True, alpha=0.3, axis="y")

    ax1.set_xticks(years_arr)
    ax2.set_xticks(years_arr)
    ax3.set_xticks(years_arr)
    ax3.set_xticklabels([str(y) for y in years], rotation=45)
    ax1.tick_params(labelbottom=False)
    ax2.tick_params(labelbottom=False)

    plt.tight_layout()
    path = str(REPORTS_DIR / "extremity_2d_temporal_figure.png")
    fig.savefig(path, dpi=150, bbox_inches="tight")
    plt.close(fig)
    logger.info("Saved figure to %s", path)
    return path


def generate_report(
    summary: dict[int, dict[str, Any]],
    divergence: dict[str, Any],
    temporal_corr: dict[str, Any],
    yearly: dict[int, dict[str, list[float]]],
    fig_path: str,
) -> str:
    """Write the markdown report."""
    years = sorted(summary.keys())

    def fmt(val, precision=3):
        if val is None or (isinstance(val, float) and np.isnan(val)):
            return "N/A"
        return f"{val:.{precision}f}"

    def flag_n(year, key_prefix):
        n_key = f"{key_prefix}n_stijl"
        n = summary[year].get(n_key, 0)
        return " *" if n < CONFIDENCE_N_MIN else ""

    # Yearly means table
    table_header = (
        "| Year | N | Stylistic | Material | Text (orig) | Gap (M-S) | "
        "N Mig | Styl Mig | Mat Mig | N Non-Mig | Styl NM | Mat NM | r(stijl,mat) |"
    )
    table_sep = (
        "|------|---|-----------|----------|-------------|-----------|"
        "-------|----------|---------|-----------|----------|---------|---------------|"
    )
    table_rows = []
    for y in years:
        s = summary[y]
        row = (
            f"| {y}{flag_n(y, '')} "
            f"| {int(s.get('n_stijl', 0))} "
            f"| {fmt(s.get('mean_stijl'))} "
            f"| {fmt(s.get('mean_materieel'))} "
            f"| {fmt(s.get('mean_text'))} "
            f"| {fmt(s.get('gap'))} "
            f"| {int(s.get('mig_n_stijl', 0))} "
            f"| {fmt(s.get('mig_mean_stijl'))} "
            f"| {fmt(s.get('mig_mean_materieel'))} "
            f"| {int(s.get('non_mig_n_stijl', 0))} "
            f"| {fmt(s.get('non_mig_mean_stijl'))} "
            f"| {fmt(s.get('non_mig_mean_materieel'))} "
            f"| {fmt(s.get('r_stijl_mat'))} |"
        )
        table_rows.append(row)

    # Pre/post means
    pre_years = [y for y in years if y < BREAK_YEAR]
    post_years = [y for y in years if y >= BREAK_YEAR]

    def pre_post_means(key):
        pre = [summary[y].get(key, float("nan")) for y in pre_years]
        post = [summary[y].get(key, float("nan")) for y in post_years]
        pre_valid = [v for v in pre if not np.isnan(v)]
        post_valid = [v for v in post if not np.isnan(v)]
        return (np.mean(pre_valid) if pre_valid else float("nan"),
                np.mean(post_valid) if post_valid else float("nan"))

    pre_stijl, post_stijl = pre_post_means("mean_stijl")
    pre_mat, post_mat = pre_post_means("mean_materieel")
    pre_text, post_text = pre_post_means("mean_text")
    pre_gap, post_gap = pre_post_means("gap")

    # Divergence test text
    div_text = f"**Test:** {divergence.get('test', 'N/A')}\n\n"
    div_text += f"**Statistic:** {divergence.get('statistic', 'N/A')}\n\n"
    div_text += f"**p-value:** {divergence.get('p_value', 'N/A')}\n\n"
    div_text += f"**N yearly pairs:** {divergence.get('n_years', 'N/A')}\n\n"
    div_text += f"**Conclusion:** {divergence.get('conclusion', 'N/A')}"

    # Correlation change text
    corr_text = f"**Pre-2024 mean r(stijl,mat):** {fmt(temporal_corr.get('pre_mean_r', float('nan')))}\n\n"
    corr_text += f"**Post-2024 mean r(stijl,mat):** {fmt(temporal_corr.get('post_mean_r', float('nan')))}\n\n"
    corr_text += f"**Change test (Mann-Whitney):** U={fmt(temporal_corr.get('mannwhitney_u', float('nan')))}"
    corr_text += f", p={fmt(temporal_corr.get('mannwhitney_p', float('nan')))}\n\n"
    corr_text += f"**Interpretation:** {temporal_corr.get('correlation_change', 'N/A')}"

    # Overall correlation (all data pooled)
    all_stijl = []
    all_mat = []
    for y in years:
        all_stijl.extend(yearly[y]["stijl"])
        all_mat.extend(yearly[y]["materieel"])
    overall_r, overall_p = pearsonr(all_stijl, all_mat) if len(all_stijl) >= 3 else (float("nan"), float("nan"))

    # Migration domain correlations
    all_mig_stijl, all_mig_mat = [], []
    all_nm_stijl, all_nm_mat = [], []
    for y in years:
        all_mig_stijl.extend(yearly[y]["mig_stijl"])
        all_mig_mat.extend(yearly[y]["mig_materieel"])
        all_nm_stijl.extend(yearly[y]["non_mig_stijl"])
        all_nm_mat.extend(yearly[y]["non_mig_materieel"])
    mig_r, mig_p = pearsonr(all_mig_stijl, all_mig_mat) if len(all_mig_stijl) >= 3 else (float("nan"), float("nan"))
    nm_r, nm_p = pearsonr(all_nm_stijl, all_nm_mat) if len(all_nm_stijl) >= 3 else (float("nan"), float("nan"))

    lines = [
        "# 2D Extremity Temporal Decomposition",
        "",
        "**Goal:** Test whether the \"flat single-dimension trend\" masks diverging trajectories",
        "when stylistic and material extremity scores are analyzed separately over time.",
        "",
        "**Analysis period:** 2016-2026",
        "**Data source:** `extremity_scores_2d` (2,869 motions scored) joined with `right_wing_motions`",
        "**Domains:** Migration = `asiel/vreemdelingen`; Non-migration = all other categories",
        "",
        "> *Years with <50 scored motions are flagged for low confidence.",
        "",
        "---",
        "",
        "## 1. Key Findings",
        "",
        f"**Overall correlation r(stijl, materieel):** {fmt(overall_r)} (p={fmt(overall_p, 6)})",
        f"**Migration domain r(stijl, materieel):** {fmt(mig_r)} (p={fmt(mig_p, 6)}, n={len(all_mig_stijl)})",
        f"**Non-migration domain r(stijl, materieel):** {fmt(nm_r)} (p={fmt(nm_p, 6)}, n={len(all_nm_stijl)})",
        "",
        "---",
        "",
        "## 2. Pre/Post 2024 Comparison",
        "",
        f"| Dimension | Pre-2024 Mean | Post-2024 Mean | Δ |",
        f"|-----------|--------------|---------------|-----|",
        f"| Stylistic extremity | {fmt(pre_stijl)} | {fmt(post_stijl)} | {fmt(post_stijl - pre_stijl if not np.isnan(pre_stijl) and not np.isnan(post_stijl) else float('nan'))} |",
        f"| Material impact | {fmt(pre_mat)} | {fmt(post_mat)} | {fmt(post_mat - pre_mat if not np.isnan(pre_mat) and not np.isnan(post_mat) else float('nan'))} |",
        f"| Text score (original) | {fmt(pre_text)} | {fmt(post_text)} | {fmt(post_text - pre_text if not np.isnan(pre_text) and not np.isnan(post_text) else float('nan'))} |",
        f"| Gap (M-S) | {fmt(pre_gap)} | {fmt(post_gap)} | {fmt(post_gap - pre_gap if not np.isnan(pre_gap) and not np.isnan(post_gap) else float('nan'))} |",
        "",
        "---",
        "",
        "## 3. Yearly Data Table",
        "",
        table_header,
        table_sep,
        *table_rows,
        "",
        "> * Years with <50 scored motions; confidence intervals are wider or N/A.",
        "",
        "---",
        "",
        "## 4. Divergence Test (Wilcoxon Signed-Rank)",
        "",
        div_text,
        "",
        "The Wilcoxon signed-rank test compares yearly mean stylistic vs yearly mean material scores.",
        "A significant result (p < 0.05) indicates the two dimensions systematically differ,",
        "meaning the flat single-dimension trend masks a genuine divergence between stylistic",
        "and material extremity.",
        "",
        "---",
        "",
        "## 5. Per-Year Correlation Analysis",
        "",
        "| Year | r(stijl,mat) | p | N | Domain |",
        "|------|--------------|---|---|--------|",
    ]

    for y in years:
        s = summary[y]
        r_val = s.get("r_stijl_mat", float("nan"))
        p_val = s.get("p_stijl_mat", float("nan"))
        n_val = s.get("n_stijl", 0)
        r_mig_val = s.get("r_mig_stijl_mat", float("nan"))
        p_mig_val = s.get("p_mig_stijl_mat", float("nan"))
        n_mig_val = s.get("mig_n_stijl", 0)
        r_nm_val = s.get("r_non_mig_stijl_mat", float("nan"))
        p_nm_val = s.get("p_non_mig_stijl_mat", float("nan"))
        n_nm_val = s.get("non_mig_n_stijl", 0)

        lines.append(
            f"| {y} | {fmt(r_val)} | {fmt(p_val, 6)} | {int(n_val)} | All |"
        )
        if not np.isnan(r_mig_val):
            lines.append(
                f"| | {fmt(r_mig_val)} | {fmt(p_mig_val, 6)} | {int(n_mig_val)} | Migration |"
            )
        if not np.isnan(r_nm_val):
            lines.append(
                f"| | {fmt(r_nm_val)} | {fmt(p_nm_val, 6)} | {int(n_nm_val)} | Non-migration |"
            )

    lines += [
        "",
        "---",
        "",
        "## 6. Correlation Change Pre vs Post 2024",
        "",
        corr_text,
        "",
        "A significant change in the per-year stijl-material correlation would suggest",
        "that the relationship between the two dimensions itself shifted across the break period —",
        "e.g., if right-wing parties post-2024 began moderating style while maintaining material",
        "impact, the correlation would decrease.",
        "",
        "---",
        "",
        "## 7. Gap Trajectory Interpretation",
        "",
        f"- **Pre-2024 mean gap:** {fmt(pre_gap)}",
        f"- **Post-2024 mean gap:** {fmt(post_gap)}",
        f"- **Gap change:** {fmt(post_gap - pre_gap if not np.isnan(pre_gap) and not np.isnan(post_gap) else float('nan'))}",
        "",
        "A widening gap (increasing material > stylistic) would indicate that right-wing motions",
        "became less stylistically extreme but maintained or increased their material impact —",
        "consistent with the 'strategic moderation of rhetoric' hypothesis.",
        "",
        "A narrowing gap would suggest that stylistic and material dimensions are converging,",
        "meaning the distinctions between the two become less meaningful over time.",
        "",
        "A stable gap suggests the two dimensions move in parallel, and the flat single-dimension",
        "trend is an accurate summary (no masked divergence).",
        "",
        "---",
        "",
        "## 8. Domain Stratification",
        "",
        "| Domain | Pre Mean Stijl | Pre Mean Mat | Post Mean Stijl | Post Mean Mat | Pre Gap | Post Gap | Pre r | Post r |",
        "|--------|---------------|-------------|----------------|---------------|---------|----------|-------|--------|",
    ]

    for domain_name, prefix in [("Migration", "mig_"), ("Non-migration", "non_mig_")]:
        pre_s = np.nanmean([summary[y].get(f"{prefix}mean_stijl", float("nan")) for y in pre_years])
        pre_m = np.nanmean([summary[y].get(f"{prefix}mean_materieel", float("nan")) for y in pre_years])
        post_s = np.nanmean([summary[y].get(f"{prefix}mean_stijl", float("nan")) for y in post_years])
        post_m = np.nanmean([summary[y].get(f"{prefix}mean_materieel", float("nan")) for y in post_years])
        pre_g = pre_m - pre_s if not np.isnan(pre_s) and not np.isnan(pre_m) else float("nan")
        post_g = post_m - post_s if not np.isnan(post_s) and not np.isnan(post_m) else float("nan")

        pre_r_list = [summary[y].get(f"r_{prefix}stijl_mat", float("nan")) for y in pre_years]
        post_r_list = [summary[y].get(f"r_{prefix}stijl_mat", float("nan")) for y in post_years]
        pre_r_mean = np.nanmean(pre_r_list) if any(not np.isnan(v) for v in pre_r_list) else float("nan")
        post_r_mean = np.nanmean(post_r_list) if any(not np.isnan(v) for v in post_r_list) else float("nan")

        lines.append(
            f"| {domain_name} | {fmt(pre_s)} | {fmt(pre_m)} | {fmt(post_s)} | {fmt(post_m)} | "
            f"{fmt(pre_g)} | {fmt(post_g)} | {fmt(pre_r_mean)} | {fmt(post_r_mean)} |"
        )

    lines += [
        "",
        "---",
        "",
        "## 9. Figure",
        "",
        f"![2D Extremity Temporal Figure]({Path(fig_path).name})",
        "",
        "**Figure panels:**",
        "- **Top panel:** Yearly mean stylistic (red) and material (blue) extremity scores with",
        "  95% bootstrap confidence intervals. Grey dashed line = original single-dimension",
        "  `text_score` for comparison.",
        "- **Middle panel:** Gap trajectory (material minus stylistic) for all domains, migration,",
        "  and non-migration. Positive gap = material impact exceeds stylistic extremity.",
        "  A widening gap indicates increasing divergence between dimensions.",
        "- **Bottom panel:** Per-year Pearson correlation between stylistic and material scores.",
        "  Declining correlation over time suggests the two dimensions are decoupling.",
        "",
        "---",
        "",
        "## 10. Limitations",
        "",
        "- **Yearly resolution:** Year-level aggregation necessarily smooths within-year trends.",
        "  The quarterly framework from U1 provides finer resolution for other metrics.",
        "- **Low-N years:** Some years (especially 2016-2018 and 2026) have fewer than 50 scored",
        "  motions, reducing confidence in those yearly means.",
        "- **2D scores are LLM-generated:** The `stijl_extremiteit` and `materiele_impact` scores",
        "  come from LLM-based assessment and may contain systematic biases.",
        "- **Correlation vs causation:** Per-year correlations describe association, not causation.",
        "  A declining correlation could reflect scoring drift rather than genuine decoupling.",
        "- **Domain imbalance:** Migration-domain motions are a minority of all right-wing motions,",
        "  so domain-stratified analyses have lower statistical power.",
        "",
        "---",
        "",
        "## 11. Conclusion",
        "",
        f"The overall stijl-materieel correlation is r={fmt(overall_r)} (p={fmt(overall_p, 6)}),",
        "consistent with the aggregate finding of r≈0.47.",
        "",
        f"The divergence test ({divergence.get('test', 'N/A')}) "
        f"{'found' if divergence.get('p_value', 1) is not None and not np.isnan(divergence.get('p_value', float('nan'))) and divergence.get('p_value', 1) < 0.05 else 'did not find'} "
        f"significant systematic divergence between stylistic and material yearly means "
        f"(p={fmt(divergence.get('p_value', float('nan')))}).",
        "",
        f"The pre/post correlation change analysis {temporal_corr.get('correlation_change', 'could not be performed').lower()}.",
        "",
        f"The gap (material minus stylistic) {'widened' if not np.isnan(post_gap) and not np.isnan(pre_gap) and post_gap > pre_gap else 'narrowed'} "
        f"from {fmt(pre_gap)} pre-2024 to {fmt(post_gap)} post-2024.",
    ]

    report_path = REPORTS_DIR / "extremity_2d_temporal.md"
    with open(report_path, "w") as f:
        f.write("\n".join(lines))
    logger.info("Report written to %s", report_path)
    return str(report_path)


def main() -> int:
    logger.info("Connecting to database: %s", DB_PATH)
    con = duckdb.connect(DB_PATH, read_only=True)

    logger.info("Fetching 2D extremity data by year...")
    yearly = fetch_2d_yearly_data(con)

    total_motions = sum(len(yearly[y]["stijl"]) for y in yearly)
    logger.info("Fetched %d scored motions across %d years", total_motions, len(yearly))

    con.close()

    logger.info("Computing yearly summary statistics...")
    summary = compute_yearly_summary(yearly)

    logger.info("Running divergence test (Wilcoxon)...")
    divergence = compute_divergence_test(yearly)

    logger.info("Computing temporal correlation changes...")
    temporal_corr = compute_temporal_correlations(summary)

    logger.info("Generating figure...")
    fig_path = create_figure(summary)

    logger.info("Generating report...")
    report_path = generate_report(summary, divergence, temporal_corr, yearly, fig_path)

    print(f"\nReport: {report_path}")
    print(f"Figure: {fig_path}")
    print(f"\nDivergence test: {divergence.get('conclusion', 'N/A')}")
    print(f"Temporal correlation: {temporal_corr.get('correlation_change', 'N/A')}")

    return 0


if __name__ == "__main__":
    raise SystemExit(main())