You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1005 lines
44 KiB
1005 lines
44 KiB
#!/usr/bin/env python3
|
|
"""U2: 2D Extremity Temporal Decomposition.
|
|
|
|
Tests whether the "flat single-dimension trend" masks diverging trajectories
|
|
when stylistic and material extremity scores are analyzed separately over time.
|
|
|
|
Usage:
|
|
uv run python analysis/right_wing/extremity_2d_temporal.py
|
|
|
|
Output:
|
|
reports/overton_window/extremity_2d_temporal.md
|
|
reports/overton_window/extremity_2d_temporal_figure.png
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import duckdb
|
|
import matplotlib
|
|
|
|
matplotlib.use("Agg")
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
from scipy.stats import pearsonr, wilcoxon
|
|
|
|
ROOT = Path(__file__).parent.parent.parent.resolve()
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
DB_PATH = str(ROOT / "data" / "motions.db")
|
|
REPORTS_DIR = ROOT / "reports" / "overton_window"
|
|
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
|
logger = logging.getLogger(__name__)
|
|
|
|
YEAR_MIN, YEAR_MAX = 2016, 2026
|
|
BREAK_YEAR = 2024
|
|
CONFIDENCE_N_MIN = 50
|
|
|
|
|
|
def fetch_2d_yearly_data(con: duckdb.DuckDBPyConnection) -> dict[int, dict[str, list[float]]]:
|
|
"""Join extremity_scores_2d with right_wing_motions to get yearly scores.
|
|
|
|
Returns dict keyed by year, each containing lists of stylistic, material,
|
|
and original text_score values, plus gravity-filtered buckets (M>=3, M>=4).
|
|
"""
|
|
rows = con.execute("""
|
|
SELECT
|
|
r.year,
|
|
e2d.stijl_extremiteit,
|
|
e2d.materiele_impact,
|
|
e.text_score,
|
|
r.category
|
|
FROM extremity_scores_2d e2d
|
|
JOIN right_wing_motions r ON e2d.motion_id = r.motion_id
|
|
LEFT JOIN extremity_scores e ON e2d.motion_id = e.motion_id
|
|
WHERE r.classified = TRUE
|
|
AND r.year IS NOT NULL
|
|
ORDER BY r.year
|
|
""").fetchall()
|
|
|
|
yearly: dict[int, dict[str, list[float]]] = {}
|
|
for year in range(YEAR_MIN, YEAR_MAX + 1):
|
|
yearly[year] = {
|
|
"stijl": [],
|
|
"materieel": [],
|
|
"text": [],
|
|
"mig_stijl": [],
|
|
"mig_materieel": [],
|
|
"mig_text": [],
|
|
"non_mig_stijl": [],
|
|
"non_mig_materieel": [],
|
|
"non_mig_text": [],
|
|
"ge3_stijl": [],
|
|
"ge3_materieel": [],
|
|
"ge4_stijl": [],
|
|
"ge4_materieel": [],
|
|
}
|
|
|
|
for year, stijl, materieel, text_score, category in rows:
|
|
y = int(year)
|
|
if y < YEAR_MIN or y > YEAR_MAX:
|
|
continue
|
|
is_mig = category == "asiel/vreemdelingen"
|
|
|
|
if stijl is not None:
|
|
yearly[y]["stijl"].append(float(stijl))
|
|
(yearly[y]["mig_stijl"] if is_mig else yearly[y]["non_mig_stijl"]).append(float(stijl))
|
|
if materieel is not None:
|
|
yearly[y]["materieel"].append(float(materieel))
|
|
(yearly[y]["mig_materieel"] if is_mig else yearly[y]["non_mig_materieel"]).append(float(materieel))
|
|
if text_score is not None:
|
|
yearly[y]["text"].append(float(text_score))
|
|
(yearly[y]["mig_text"] if is_mig else yearly[y]["non_mig_text"]).append(float(text_score))
|
|
|
|
if stijl is not None and materieel is not None:
|
|
if float(materieel) >= 3:
|
|
yearly[y]["ge3_stijl"].append(float(stijl))
|
|
yearly[y]["ge3_materieel"].append(float(materieel))
|
|
if float(materieel) >= 4:
|
|
yearly[y]["ge4_stijl"].append(float(stijl))
|
|
yearly[y]["ge4_materieel"].append(float(materieel))
|
|
|
|
return yearly
|
|
|
|
|
|
def fetch_all_motion_yearly(con: duckdb.DuckDBPyConnection) -> dict[int, dict[str, list[float]]]:
|
|
"""Join extremity_scores_all with motions to get yearly scores for ALL motions.
|
|
|
|
Returns dict keyed by year, each containing stijl and materieel lists.
|
|
"""
|
|
logger.info("Fetching all-motion extremity data by year...")
|
|
rows = con.execute("""
|
|
SELECT
|
|
EXTRACT(YEAR FROM m.date) AS year,
|
|
esa.stijl_extremiteit,
|
|
esa.materiele_impact
|
|
FROM extremity_scores_all esa
|
|
JOIN motions m ON esa.motion_id = m.id
|
|
WHERE m.date IS NOT NULL
|
|
AND EXTRACT(YEAR FROM m.date) BETWEEN ? AND ?
|
|
ORDER BY year
|
|
""", (YEAR_MIN, YEAR_MAX)).fetchall()
|
|
|
|
yearly: dict[int, dict[str, list[float]]] = {}
|
|
for year in range(YEAR_MIN, YEAR_MAX + 1):
|
|
yearly[year] = {"stijl": [], "materieel": []}
|
|
|
|
for year, stijl, materieel in rows:
|
|
y = int(year)
|
|
yearly[y]["stijl"].append(float(stijl))
|
|
yearly[y]["materieel"].append(float(materieel))
|
|
|
|
total = sum(len(v["stijl"]) for v in yearly.values())
|
|
logger.info("Fetched %d all-motion scored motions across %d years", total, len(yearly))
|
|
return yearly
|
|
|
|
|
|
def compute_yearly_summary(
|
|
yearly: dict[int, dict[str, list[float]]],
|
|
) -> dict[int, dict[str, Any]]:
|
|
"""Compute means, counts, SEM, and per-year stijl-materieel correlations."""
|
|
summary: dict[int, dict[str, Any]] = {}
|
|
rng = np.random.default_rng(42)
|
|
|
|
for year, d in yearly.items():
|
|
s: dict[str, Any] = {"year": year}
|
|
|
|
for prefix, keys in [
|
|
("", ["stijl", "materieel", "text"]),
|
|
("mig_", ["mig_stijl", "mig_materieel", "mig_text"]),
|
|
("non_mig_", ["non_mig_stijl", "non_mig_materieel", "non_mig_text"]),
|
|
("ge3_", ["ge3_stijl", "ge3_materieel"]),
|
|
("ge4_", ["ge4_stijl", "ge4_materieel"]),
|
|
]:
|
|
for key in keys:
|
|
short = key.replace("non_mig_", "").replace("mig_", "").replace("ge3_", "").replace("ge4_", "")
|
|
vals = np.array(d.get(key, []))
|
|
n = len(vals)
|
|
s[f"{prefix}n_{short}"] = n
|
|
if n > 0:
|
|
s[f"{prefix}mean_{short}"] = float(np.mean(vals))
|
|
s[f"{prefix}std_{short}"] = float(np.std(vals, ddof=1)) if n > 1 else 0.0
|
|
s[f"{prefix}sem_{short}"] = float(np.std(vals, ddof=1) / np.sqrt(n)) if n > 1 else 0.0
|
|
if n >= 20:
|
|
boot_means = [
|
|
float(np.mean(rng.choice(vals, size=n, replace=True)))
|
|
for _ in range(1000)
|
|
]
|
|
s[f"{prefix}ci_lo_{short}"] = float(np.percentile(boot_means, 2.5))
|
|
s[f"{prefix}ci_hi_{short}"] = float(np.percentile(boot_means, 97.5))
|
|
else:
|
|
s[f"{prefix}ci_lo_{short}"] = float("nan")
|
|
s[f"{prefix}ci_hi_{short}"] = float("nan")
|
|
else:
|
|
s[f"{prefix}mean_{short}"] = float("nan")
|
|
s[f"{prefix}std_{short}"] = float("nan")
|
|
s[f"{prefix}sem_{short}"] = float("nan")
|
|
s[f"{prefix}ci_lo_{short}"] = float("nan")
|
|
s[f"{prefix}ci_hi_{short}"] = float("nan")
|
|
|
|
# Per-year stijl-materieel correlation
|
|
stijl_arr = np.array(d.get("stijl", []))
|
|
mat_arr = np.array(d.get("materieel", []))
|
|
if len(stijl_arr) >= 10 and len(mat_arr) >= 10:
|
|
r, p = pearsonr(stijl_arr, mat_arr)
|
|
s["r_stijl_mat"] = float(r)
|
|
s["p_stijl_mat"] = float(p)
|
|
else:
|
|
s["r_stijl_mat"] = float("nan")
|
|
s["p_stijl_mat"] = float("nan")
|
|
|
|
# Per-year stijl-materieel correlation for migration
|
|
mig_stijl_arr = np.array(d.get("mig_stijl", []))
|
|
mig_mat_arr = np.array(d.get("mig_materieel", []))
|
|
if len(mig_stijl_arr) >= 10 and len(mig_mat_arr) >= 10:
|
|
r_mig, p_mig = pearsonr(mig_stijl_arr, mig_mat_arr)
|
|
s["r_mig_stijl_mat"] = float(r_mig)
|
|
s["p_mig_stijl_mat"] = float(p_mig)
|
|
else:
|
|
s["r_mig_stijl_mat"] = float("nan")
|
|
s["p_mig_stijl_mat"] = float("nan")
|
|
|
|
# Per-year stijl-materieel correlation for non-migration
|
|
nm_stijl_arr = np.array(d.get("non_mig_stijl", []))
|
|
nm_mat_arr = np.array(d.get("non_mig_materieel", []))
|
|
if len(nm_stijl_arr) >= 10 and len(nm_mat_arr) >= 10:
|
|
r_nm, p_nm = pearsonr(nm_stijl_arr, nm_mat_arr)
|
|
s["r_non_mig_stijl_mat"] = float(r_nm)
|
|
s["p_non_mig_stijl_mat"] = float(p_nm)
|
|
else:
|
|
s["r_non_mig_stijl_mat"] = float("nan")
|
|
s["p_non_mig_stijl_mat"] = float("nan")
|
|
|
|
# Gap (material - stylistic)
|
|
if s.get("mean_materieel") is not None and not np.isnan(s.get("mean_materieel", float("nan"))) and \
|
|
s.get("mean_stijl") is not None and not np.isnan(s.get("mean_stijl", float("nan"))):
|
|
s["gap"] = s["mean_materieel"] - s["mean_stijl"]
|
|
else:
|
|
s["gap"] = float("nan")
|
|
|
|
s["gap_mig"] = float("nan")
|
|
if s.get("mean_mig_materieel") is not None and not np.isnan(s.get("mean_mig_materieel", float("nan"))) and \
|
|
s.get("mean_mig_stijl") is not None and not np.isnan(s.get("mean_mig_stijl", float("nan"))):
|
|
s["gap_mig"] = s["mean_mig_materieel"] - s["mean_mig_stijl"]
|
|
|
|
s["gap_non_mig"] = float("nan")
|
|
if s.get("mean_non_mig_materieel") is not None and not np.isnan(s.get("mean_non_mig_materieel", float("nan"))) and \
|
|
s.get("mean_non_mig_stijl") is not None and not np.isnan(s.get("mean_non_mig_stijl", float("nan"))):
|
|
s["gap_non_mig"] = s["mean_non_mig_materieel"] - s["mean_non_mig_stijl"]
|
|
|
|
# Gravity gaps
|
|
s["gap_ge3"] = float("nan")
|
|
if s.get("ge3_mean_materieel") is not None and not np.isnan(s.get("ge3_mean_materieel", float("nan"))) and \
|
|
s.get("ge3_mean_stijl") is not None and not np.isnan(s.get("ge3_mean_stijl", float("nan"))):
|
|
s["gap_ge3"] = s["ge3_mean_materieel"] - s["ge3_mean_stijl"]
|
|
|
|
s["gap_ge4"] = float("nan")
|
|
if s.get("ge4_mean_materieel") is not None and not np.isnan(s.get("ge4_mean_materieel", float("nan"))) and \
|
|
s.get("ge4_mean_stijl") is not None and not np.isnan(s.get("ge4_mean_stijl", float("nan"))):
|
|
s["gap_ge4"] = s["ge4_mean_materieel"] - s["ge4_mean_stijl"]
|
|
|
|
summary[year] = s
|
|
|
|
return summary
|
|
|
|
|
|
def compute_all_motion_summary(
|
|
yearly: dict[int, dict[str, list[float]]],
|
|
) -> dict[int, dict[str, Any]]:
|
|
"""Compute simple yearly means for all-motion data (no stratification)."""
|
|
summary: dict[int, dict[str, Any]] = {}
|
|
for year, d in yearly.items():
|
|
s: dict[str, Any] = {"year": year}
|
|
for key in ["stijl", "materieel"]:
|
|
vals = np.array(d.get(key, []))
|
|
n = len(vals)
|
|
s[f"n_{key}"] = n
|
|
if n > 0:
|
|
s[f"mean_{key}"] = float(np.mean(vals))
|
|
s[f"std_{key}"] = float(np.std(vals, ddof=1)) if n > 1 else 0.0
|
|
s[f"sem_{key}"] = float(np.std(vals, ddof=1) / np.sqrt(n)) if n > 1 else 0.0
|
|
else:
|
|
s[f"mean_{key}"] = float("nan")
|
|
s[f"std_{key}"] = float("nan")
|
|
s[f"sem_{key}"] = float("nan")
|
|
summary[year] = s
|
|
return summary
|
|
|
|
|
|
def compute_divergence_test(
|
|
yearly: dict[int, dict[str, list[float]]],
|
|
) -> dict[str, Any]:
|
|
"""Paired Wilcoxon signed-rank test on yearly (stylistic_mean, material_mean) pairs."""
|
|
years = sorted(yearly.keys())
|
|
stijl_means = []
|
|
mat_means = []
|
|
for y in years:
|
|
svals = yearly[y]["stijl"]
|
|
mvals = yearly[y]["materieel"]
|
|
if len(svals) > 0 and len(mvals) > 0:
|
|
stijl_means.append(np.mean(svals))
|
|
mat_means.append(np.mean(mvals))
|
|
|
|
result: dict[str, Any] = {"n_years": len(stijl_means)}
|
|
|
|
if len(stijl_means) < 3:
|
|
result["test"] = "insufficient_years"
|
|
result["statistic"] = float("nan")
|
|
result["p_value"] = float("nan")
|
|
result["conclusion"] = "Not enough yearly data points for a paired test"
|
|
return result
|
|
|
|
try:
|
|
stat, p = wilcoxon(mat_means, stijl_means)
|
|
result["test"] = "wilcoxon_signed_rank"
|
|
result["statistic"] = float(stat)
|
|
result["p_value"] = float(p)
|
|
if p < 0.05:
|
|
result["conclusion"] = (
|
|
"Significant divergence: material and stylistic yearly means differ "
|
|
f"(W={stat:.1f}, p={p:.4f})"
|
|
)
|
|
else:
|
|
result["conclusion"] = (
|
|
f"No significant divergence detected (W={stat:.1f}, p={p:.4f})"
|
|
)
|
|
except Exception as e:
|
|
result["test"] = "wilcoxon_error"
|
|
result["statistic"] = float("nan")
|
|
result["p_value"] = float("nan")
|
|
result["conclusion"] = f"Test failed: {e}"
|
|
|
|
return result
|
|
|
|
|
|
def compute_temporal_correlations(summary: dict[int, dict[str, Any]]) -> dict[str, Any]:
|
|
"""Analyze whether the per-year stijl-material correlation changes over time."""
|
|
years = sorted(summary.keys())
|
|
pre_years = [y for y in years if y < BREAK_YEAR]
|
|
post_years = [y for y in years if y >= BREAK_YEAR]
|
|
|
|
pre_rs = [summary[y].get("r_stijl_mat", float("nan")) for y in pre_years]
|
|
post_rs = [summary[y].get("r_stijl_mat", float("nan")) for y in post_years]
|
|
|
|
pre_rs_valid = [r for r in pre_rs if not np.isnan(r)]
|
|
post_rs_valid = [r for r in post_rs if not np.isnan(r)]
|
|
|
|
result: dict[str, Any] = {
|
|
"pre_years": pre_years,
|
|
"post_years": post_years,
|
|
"pre_mean_r": float(np.mean(pre_rs_valid)) if pre_rs_valid else float("nan"),
|
|
"post_mean_r": float(np.mean(post_rs_valid)) if post_rs_valid else float("nan"),
|
|
"pre_correlations": {str(y): summary[y].get("r_stijl_mat", float("nan")) for y in pre_years},
|
|
"post_correlations": {str(y): summary[y].get("r_stijl_mat", float("nan")) for y in post_years},
|
|
}
|
|
|
|
if len(pre_rs_valid) >= 2 and len(post_rs_valid) >= 2:
|
|
from scipy.stats import mannwhitneyu
|
|
try:
|
|
u, p = mannwhitneyu(pre_rs_valid, post_rs_valid, alternative="two-sided")
|
|
result["mannwhitney_u"] = float(u)
|
|
result["mannwhitney_p"] = float(p)
|
|
if p < 0.05:
|
|
result["correlation_change"] = (
|
|
f"Significant change in stijl-material correlation pre vs post-2024 "
|
|
f"(U={u:.1f}, p={p:.4f})"
|
|
)
|
|
else:
|
|
result["correlation_change"] = (
|
|
f"No significant change in stijl-material correlation (U={u:.1f}, p={p:.4f})"
|
|
)
|
|
except Exception:
|
|
result["mannwhitney_u"] = float("nan")
|
|
result["mannwhitney_p"] = float("nan")
|
|
result["correlation_change"] = "Insufficient valid data for comparison"
|
|
else:
|
|
result["mannwhitney_u"] = float("nan")
|
|
result["mannwhitney_p"] = float("nan")
|
|
result["correlation_change"] = "Insufficient valid data for pre/post comparison"
|
|
|
|
return result
|
|
|
|
|
|
def create_figure(
|
|
summary: dict[int, dict[str, Any]],
|
|
all_summary: dict[int, dict[str, Any]],
|
|
) -> str:
|
|
"""Generate the 2D extremity temporal figure with 4 panels."""
|
|
years = sorted(summary.keys())
|
|
years_arr = np.array(years)
|
|
|
|
def _val(yr, key):
|
|
return summary[yr].get(key, float("nan"))
|
|
|
|
def _all_val(yr, key):
|
|
return all_summary[yr].get(key, float("nan")) if yr in all_summary else float("nan")
|
|
|
|
stijl_means = np.array([_val(y, "mean_stijl") for y in years])
|
|
mat_means = np.array([_val(y, "mean_materieel") for y in years])
|
|
text_means = np.array([_val(y, "mean_text") for y in years])
|
|
|
|
stijl_ci_lo = np.array([_val(y, "ci_lo_stijl") for y in years])
|
|
stijl_ci_hi = np.array([_val(y, "ci_hi_stijl") for y in years])
|
|
mat_ci_lo = np.array([_val(y, "ci_lo_materieel") for y in years])
|
|
mat_ci_hi = np.array([_val(y, "ci_hi_materieel") for y in years])
|
|
|
|
mig_stijl = np.array([_val(y, "mean_mig_stijl") for y in years])
|
|
mig_mat = np.array([_val(y, "mean_mig_materieel") for y in years])
|
|
non_mig_stijl = np.array([_val(y, "mean_non_mig_stijl") for y in years])
|
|
non_mig_mat = np.array([_val(y, "mean_non_mig_materieel") for y in years])
|
|
|
|
gaps = np.array([_val(y, "gap") for y in years])
|
|
gaps_mig = np.array([_val(y, "gap_mig") for y in years])
|
|
gaps_non_mig = np.array([_val(y, "gap_non_mig") for y in years])
|
|
|
|
rs = np.array([_val(y, "r_stijl_mat") for y in years])
|
|
ns = np.array([_val(y, "n_stijl") for y in years])
|
|
|
|
# Gravity data
|
|
ge3_stijl = np.array([_val(y, "ge3_mean_stijl") for y in years])
|
|
ge3_mat = np.array([_val(y, "ge3_mean_materieel") for y in years])
|
|
ge4_stijl = np.array([_val(y, "ge4_mean_stijl") for y in years])
|
|
ge4_mat = np.array([_val(y, "ge4_mean_materieel") for y in years])
|
|
|
|
# All-motion data
|
|
all_stijl = np.array([_all_val(y, "mean_stijl") for y in years])
|
|
all_mat = np.array([_all_val(y, "mean_materieel") for y in years])
|
|
|
|
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(14, 18), sharex=True)
|
|
|
|
colour_stijl = "#E53935"
|
|
colour_mat = "#1E88E5"
|
|
colour_text = "#9E9E9E"
|
|
colour_ge3 = "#F9A825"
|
|
colour_ge4 = "#E65100"
|
|
|
|
# Panel 1: Yearly means with CIs + gravity-weighted trends
|
|
mask_stijl = ~np.isnan(stijl_means)
|
|
mask_mat = ~np.isnan(mat_means)
|
|
mask_text = ~np.isnan(text_means)
|
|
|
|
ax1.fill_between(
|
|
years_arr[mask_stijl],
|
|
stijl_ci_lo[mask_stijl],
|
|
stijl_ci_hi[mask_stijl],
|
|
alpha=0.12,
|
|
color=colour_stijl,
|
|
)
|
|
ax1.fill_between(
|
|
years_arr[mask_mat],
|
|
mat_ci_lo[mask_mat],
|
|
mat_ci_hi[mask_mat],
|
|
alpha=0.12,
|
|
color=colour_mat,
|
|
)
|
|
|
|
ax1.plot(years_arr[mask_stijl], stijl_means[mask_stijl],
|
|
marker="o", color=colour_stijl, linewidth=2, label="Stylistic extremity (all RW)")
|
|
ax1.plot(years_arr[mask_mat], mat_means[mask_mat],
|
|
marker="s", color=colour_mat, linewidth=2, label="Material impact (all RW)")
|
|
ax1.plot(years_arr[mask_text], text_means[mask_text],
|
|
marker="^", color=colour_text, linewidth=1.5, linestyle="--", alpha=0.7,
|
|
label="Original single-score")
|
|
|
|
# Gravity-weighted lines on Panel 1
|
|
mask_ge3_stijl = ~np.isnan(ge3_stijl)
|
|
mask_ge3_mat = ~np.isnan(ge3_mat)
|
|
mask_ge4_stijl = ~np.isnan(ge4_stijl)
|
|
mask_ge4_mat = ~np.isnan(ge4_mat)
|
|
|
|
ax1.plot(years_arr[mask_ge3_mat], ge3_mat[mask_ge3_mat],
|
|
marker="s", color=colour_ge3, linewidth=1.5, linestyle="--", alpha=0.8,
|
|
label="Material impact (M≥3)")
|
|
ax1.plot(years_arr[mask_ge4_mat], ge4_mat[mask_ge4_mat],
|
|
marker="s", color=colour_ge4, linewidth=1.5, linestyle=":", alpha=0.8,
|
|
label="Material impact (M≥4)")
|
|
|
|
ax1.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
|
|
ax1.annotate("2024", xy=(BREAK_YEAR - 0.3, ax1.get_ylim()[1] * 0.95),
|
|
fontsize=9, color="black", alpha=0.7)
|
|
|
|
for i, (xi, n) in enumerate(zip(years_arr, ns)):
|
|
if not np.isnan(n) and n < CONFIDENCE_N_MIN:
|
|
y_pos = 1.05
|
|
ax1.annotate(f"n={int(n)}", xy=(xi, y_pos), fontsize=6,
|
|
color="grey", alpha=0.5, ha="center", va="bottom")
|
|
|
|
ax1.set_ylabel("Mean score (1-5 scale)")
|
|
ax1.set_title("2D Extremity Temporal Decomposition: Stylistic vs Material Impact Over Time", fontweight="bold")
|
|
ax1.legend(loc="upper left", fontsize=8)
|
|
ax1.set_ylim(0.5, 5.5)
|
|
ax1.grid(True, alpha=0.3)
|
|
|
|
# Panel 2: Gap trajectory (material - stylistic)
|
|
mask_gap = ~np.isnan(gaps)
|
|
ax2.plot(years_arr[mask_gap], gaps[mask_gap],
|
|
marker="D", color="#FF8F00", linewidth=2, label="All domains")
|
|
mask_gap_mig = ~np.isnan(gaps_mig)
|
|
ax2.plot(years_arr[mask_gap_mig], gaps_mig[mask_gap_mig],
|
|
marker="^", color=colour_stijl, linewidth=1.5, linestyle=":", label="Migration")
|
|
mask_gap_nm = ~np.isnan(gaps_non_mig)
|
|
ax2.plot(years_arr[mask_gap_nm], gaps_non_mig[mask_gap_nm],
|
|
marker="v", color=colour_mat, linewidth=1.5, linestyle="-.", label="Non-migration")
|
|
|
|
ax2.axhline(y=0, color="black", linestyle="--", alpha=0.3, linewidth=1)
|
|
ax2.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
|
|
ax2.annotate("2024", xy=(BREAK_YEAR - 0.3, ax2.get_ylim()[1] * 0.95),
|
|
fontsize=9, color="black", alpha=0.7)
|
|
|
|
ax2.set_ylabel("Gap (material - stylistic)")
|
|
ax2.set_title("Divergence Gap: Material Impact Minus Stylistic Extremity Over Time", fontweight="bold")
|
|
ax2.legend(loc="upper left", fontsize=8)
|
|
ax2.grid(True, alpha=0.3)
|
|
|
|
# Panel 3: Stijl-materieel correlation over time
|
|
mask_rs = ~np.isnan(rs)
|
|
ax3.bar(years_arr[mask_rs], rs[mask_rs], color="#6A1B9A", alpha=0.85, edgecolor="white")
|
|
ax3.axhline(y=0, color="black", linestyle="--", alpha=0.3, linewidth=1)
|
|
ax3.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
|
|
ax3.annotate("2024", xy=(BREAK_YEAR - 0.3, ax3.get_ylim()[1] * 0.95),
|
|
fontsize=9, color="black", alpha=0.7)
|
|
|
|
for xi, r_val, n_val in zip(years_arr[mask_rs], rs[mask_rs], ns[mask_rs]):
|
|
if not np.isnan(r_val):
|
|
ax3.annotate(f"r={r_val:.2f}\nn={int(n_val)}", xy=(xi, r_val),
|
|
fontsize=7, ha="center", va="bottom", color="#4A148C")
|
|
|
|
ax3.set_ylabel("Pearson r (stijl, materieel)")
|
|
ax3.set_title("Per-Year Correlation: Stylistic vs Material Impact", fontweight="bold")
|
|
ax3.grid(True, alpha=0.3, axis="y")
|
|
|
|
# Panel 4: All-motion vs right-wing comparison
|
|
mask_all_stijl = ~np.isnan(all_stijl)
|
|
mask_all_mat = ~np.isnan(all_mat)
|
|
|
|
ax4.plot(years_arr[mask_stijl], stijl_means[mask_stijl],
|
|
marker="o", color=colour_stijl, linewidth=2, label="RW Stylistic")
|
|
ax4.plot(years_arr[mask_mat], mat_means[mask_mat],
|
|
marker="s", color=colour_mat, linewidth=2, label="RW Material")
|
|
ax4.plot(years_arr[mask_all_stijl], all_stijl[mask_all_stijl],
|
|
marker="o", color=colour_stijl, linewidth=1.5, linestyle="--", alpha=0.6,
|
|
label="All-motion Stylistic")
|
|
ax4.plot(years_arr[mask_all_mat], all_mat[mask_all_mat],
|
|
marker="s", color=colour_mat, linewidth=1.5, linestyle="--", alpha=0.6,
|
|
label="All-motion Material")
|
|
|
|
ax4.axvline(x=BREAK_YEAR - 0.5, color="black", linestyle=":", alpha=0.5, linewidth=1)
|
|
ax4.annotate("2024", xy=(BREAK_YEAR - 0.3, ax4.get_ylim()[1] * 0.95),
|
|
fontsize=9, color="black", alpha=0.7)
|
|
|
|
ax4.set_xlabel("Year")
|
|
ax4.set_ylabel("Mean score (1-5 scale)")
|
|
ax4.set_title("All-Motion vs Right-Wing: Stylistic and Material Extremity", fontweight="bold")
|
|
ax4.legend(loc="upper left", fontsize=8)
|
|
ax4.grid(True, alpha=0.3)
|
|
|
|
ax1.set_xticks(years_arr)
|
|
ax2.set_xticks(years_arr)
|
|
ax3.set_xticks(years_arr)
|
|
ax4.set_xticks(years_arr)
|
|
ax4.set_xticklabels([str(y) for y in years], rotation=45)
|
|
ax1.tick_params(labelbottom=False)
|
|
ax2.tick_params(labelbottom=False)
|
|
ax3.tick_params(labelbottom=False)
|
|
|
|
plt.tight_layout()
|
|
path = str(REPORTS_DIR / "extremity_2d_temporal_figure.png")
|
|
fig.savefig(path, dpi=150, bbox_inches="tight")
|
|
plt.close(fig)
|
|
logger.info("Saved figure to %s", path)
|
|
return path
|
|
|
|
|
|
def generate_report(
|
|
summary: dict[int, dict[str, Any]],
|
|
divergence: dict[str, Any],
|
|
temporal_corr: dict[str, Any],
|
|
yearly: dict[int, dict[str, list[float]]],
|
|
fig_path: str,
|
|
all_summary: dict[int, dict[str, Any]],
|
|
) -> str:
|
|
"""Write the markdown report."""
|
|
years = sorted(summary.keys())
|
|
|
|
def fmt(val, precision=3):
|
|
if val is None or (isinstance(val, float) and np.isnan(val)):
|
|
return "N/A"
|
|
return f"{val:.{precision}f}"
|
|
|
|
def flag_n(year, key_prefix):
|
|
n_key = f"{key_prefix}n_stijl"
|
|
n = summary[year].get(n_key, 0)
|
|
return " *" if n < CONFIDENCE_N_MIN else ""
|
|
|
|
# Yearly means table
|
|
table_header = (
|
|
"| Year | N | Stylistic | Material | Text (orig) | Gap (M-S) | "
|
|
"N Mig | Styl Mig | Mat Mig | N Non-Mig | Styl NM | Mat NM | r(stijl,mat) |"
|
|
)
|
|
table_sep = (
|
|
"|------|---|-----------|----------|-------------|-----------|"
|
|
"-------|----------|---------|-----------|----------|---------|---------------|"
|
|
)
|
|
table_rows = []
|
|
for y in years:
|
|
s = summary[y]
|
|
row = (
|
|
f"| {y}{flag_n(y, '')} "
|
|
f"| {int(s.get('n_stijl', 0))} "
|
|
f"| {fmt(s.get('mean_stijl'))} "
|
|
f"| {fmt(s.get('mean_materieel'))} "
|
|
f"| {fmt(s.get('mean_text'))} "
|
|
f"| {fmt(s.get('gap'))} "
|
|
f"| {int(s.get('mig_n_stijl', 0))} "
|
|
f"| {fmt(s.get('mig_mean_stijl'))} "
|
|
f"| {fmt(s.get('mig_mean_materieel'))} "
|
|
f"| {int(s.get('non_mig_n_stijl', 0))} "
|
|
f"| {fmt(s.get('non_mig_mean_stijl'))} "
|
|
f"| {fmt(s.get('non_mig_mean_materieel'))} "
|
|
f"| {fmt(s.get('r_stijl_mat'))} |"
|
|
)
|
|
table_rows.append(row)
|
|
|
|
# Pre/post means
|
|
pre_years = [y for y in years if y < BREAK_YEAR]
|
|
post_years = [y for y in years if y >= BREAK_YEAR]
|
|
|
|
def pre_post_means(key):
|
|
pre = [summary[y].get(key, float("nan")) for y in pre_years]
|
|
post = [summary[y].get(key, float("nan")) for y in post_years]
|
|
pre_valid = [v for v in pre if not np.isnan(v)]
|
|
post_valid = [v for v in post if not np.isnan(v)]
|
|
return (np.mean(pre_valid) if pre_valid else float("nan"),
|
|
np.mean(post_valid) if post_valid else float("nan"))
|
|
|
|
pre_stijl, post_stijl = pre_post_means("mean_stijl")
|
|
pre_mat, post_mat = pre_post_means("mean_materieel")
|
|
pre_text, post_text = pre_post_means("mean_text")
|
|
pre_gap, post_gap = pre_post_means("gap")
|
|
|
|
# Divergence test text
|
|
div_text = f"**Test:** {divergence.get('test', 'N/A')}\n\n"
|
|
div_text += f"**Statistic:** {divergence.get('statistic', 'N/A')}\n\n"
|
|
div_text += f"**p-value:** {divergence.get('p_value', 'N/A')}\n\n"
|
|
div_text += f"**N yearly pairs:** {divergence.get('n_years', 'N/A')}\n\n"
|
|
div_text += f"**Conclusion:** {divergence.get('conclusion', 'N/A')}"
|
|
|
|
# Correlation change text
|
|
corr_text = f"**Pre-2024 mean r(stijl,mat):** {fmt(temporal_corr.get('pre_mean_r', float('nan')))}\n\n"
|
|
corr_text += f"**Post-2024 mean r(stijl,mat):** {fmt(temporal_corr.get('post_mean_r', float('nan')))}\n\n"
|
|
corr_text += f"**Change test (Mann-Whitney):** U={fmt(temporal_corr.get('mannwhitney_u', float('nan')))}"
|
|
corr_text += f", p={fmt(temporal_corr.get('mannwhitney_p', float('nan')))}\n\n"
|
|
corr_text += f"**Interpretation:** {temporal_corr.get('correlation_change', 'N/A')}"
|
|
|
|
# Overall correlation (all data pooled)
|
|
all_stijl = []
|
|
all_mat = []
|
|
for y in years:
|
|
all_stijl.extend(yearly[y]["stijl"])
|
|
all_mat.extend(yearly[y]["materieel"])
|
|
overall_r, overall_p = pearsonr(all_stijl, all_mat) if len(all_stijl) >= 3 else (float("nan"), float("nan"))
|
|
|
|
# Migration domain correlations
|
|
all_mig_stijl, all_mig_mat = [], []
|
|
all_nm_stijl, all_nm_mat = [], []
|
|
for y in years:
|
|
all_mig_stijl.extend(yearly[y]["mig_stijl"])
|
|
all_mig_mat.extend(yearly[y]["mig_materieel"])
|
|
all_nm_stijl.extend(yearly[y]["non_mig_stijl"])
|
|
all_nm_mat.extend(yearly[y]["non_mig_materieel"])
|
|
mig_r, mig_p = pearsonr(all_mig_stijl, all_mig_mat) if len(all_mig_stijl) >= 3 else (float("nan"), float("nan"))
|
|
nm_r, nm_p = pearsonr(all_nm_stijl, all_nm_mat) if len(all_nm_stijl) >= 3 else (float("nan"), float("nan"))
|
|
|
|
# Gravity-weighted means (pre/post)
|
|
def pre_post_all_gap(key):
|
|
pre = [all_summary[y].get(key, float("nan")) for y in pre_years if y in all_summary]
|
|
post = [all_summary[y].get(key, float("nan")) for y in post_years if y in all_summary]
|
|
pre_valid = [v for v in pre if not np.isnan(v)]
|
|
post_valid = [v for v in post if not np.isnan(v)]
|
|
return (np.mean(pre_valid) if pre_valid else float("nan"),
|
|
np.mean(post_valid) if post_valid else float("nan"))
|
|
|
|
pre_all_stijl, post_all_stijl = pre_post_all_gap("mean_stijl")
|
|
pre_all_mat, post_all_mat = pre_post_all_gap("mean_materieel")
|
|
|
|
# Gravity-weighted means for right-wing
|
|
def pre_post_ge(key):
|
|
pre = [summary[y].get(key, float("nan")) for y in pre_years]
|
|
post = [summary[y].get(key, float("nan")) for y in post_years]
|
|
pre_valid = [v for v in pre if not np.isnan(v)]
|
|
post_valid = [v for v in post if not np.isnan(v)]
|
|
return (np.mean(pre_valid) if pre_valid else float("nan"),
|
|
np.mean(post_valid) if post_valid else float("nan"))
|
|
|
|
pre_ge3_stijl, post_ge3_stijl = pre_post_ge("ge3_mean_stijl")
|
|
pre_ge3_mat, post_ge3_mat = pre_post_ge("ge3_mean_materieel")
|
|
pre_ge4_stijl, post_ge4_stijl = pre_post_ge("ge4_mean_stijl")
|
|
pre_ge4_mat, post_ge4_mat = pre_post_ge("ge4_mean_materieel")
|
|
|
|
lines = [
|
|
"# 2D Extremity Temporal Decomposition",
|
|
"",
|
|
"**Goal:** Test whether the \"flat single-dimension trend\" masks diverging trajectories",
|
|
"when stylistic and material extremity scores are analyzed separately over time.",
|
|
"",
|
|
"**Analysis period:** 2016-2026",
|
|
"**Data source (right-wing):** `extremity_scores_2d` (2,869 motions scored) joined with `right_wing_motions`",
|
|
"**Data source (all motions):** `extremity_scores_all` (29,570 motions scored) joined with `motions`",
|
|
"**Domains:** Migration = `asiel/vreemdelingen`; Non-migration = all other categories",
|
|
"",
|
|
"> *Years with <50 scored motions are flagged for low confidence.",
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 1. Key Findings",
|
|
"",
|
|
f"**Overall correlation r(stijl, materieel):** {fmt(overall_r)} (p={fmt(overall_p, 6)})",
|
|
f"**Migration domain r(stijl, materieel):** {fmt(mig_r)} (p={fmt(mig_p, 6)}, n={len(all_mig_stijl)})",
|
|
f"**Non-migration domain r(stijl, materieel):** {fmt(nm_r)} (p={fmt(nm_p, 6)}, n={len(all_nm_stijl)})",
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 2. Pre/Post 2024 Comparison",
|
|
"",
|
|
f"| Dimension | Pre-2024 Mean | Post-2024 Mean | Δ |",
|
|
f"|-----------|--------------|---------------|-----|",
|
|
f"| Stylistic extremity | {fmt(pre_stijl)} | {fmt(post_stijl)} | {fmt(post_stijl - pre_stijl if not np.isnan(pre_stijl) and not np.isnan(post_stijl) else float('nan'))} |",
|
|
f"| Material impact | {fmt(pre_mat)} | {fmt(post_mat)} | {fmt(post_mat - pre_mat if not np.isnan(pre_mat) and not np.isnan(post_mat) else float('nan'))} |",
|
|
f"| Text score (original) | {fmt(pre_text)} | {fmt(post_text)} | {fmt(post_text - pre_text if not np.isnan(pre_text) and not np.isnan(post_text) else float('nan'))} |",
|
|
f"| Gap (M-S) | {fmt(pre_gap)} | {fmt(post_gap)} | {fmt(post_gap - pre_gap if not np.isnan(pre_gap) and not np.isnan(post_gap) else float('nan'))} |",
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 3. Yearly Data Table",
|
|
"",
|
|
table_header,
|
|
table_sep,
|
|
*table_rows,
|
|
"",
|
|
"> * Years with <50 scored motions; confidence intervals are wider or N/A.",
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 4. Divergence Test (Wilcoxon Signed-Rank)",
|
|
"",
|
|
div_text,
|
|
"",
|
|
"The Wilcoxon signed-rank test compares yearly mean stylistic vs yearly mean material scores.",
|
|
"A significant result (p < 0.05) indicates the two dimensions systematically differ,",
|
|
"meaning the flat single-dimension trend masks a genuine divergence between stylistic",
|
|
"and material extremity.",
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 5. Per-Year Correlation Analysis",
|
|
"",
|
|
"| Year | r(stijl,mat) | p | N | Domain |",
|
|
"|------|--------------|---|---|--------|",
|
|
]
|
|
|
|
for y in years:
|
|
s = summary[y]
|
|
r_val = s.get("r_stijl_mat", float("nan"))
|
|
p_val = s.get("p_stijl_mat", float("nan"))
|
|
n_val = s.get("n_stijl", 0)
|
|
r_mig_val = s.get("r_mig_stijl_mat", float("nan"))
|
|
p_mig_val = s.get("p_mig_stijl_mat", float("nan"))
|
|
n_mig_val = s.get("mig_n_stijl", 0)
|
|
r_nm_val = s.get("r_non_mig_stijl_mat", float("nan"))
|
|
p_nm_val = s.get("p_non_mig_stijl_mat", float("nan"))
|
|
n_nm_val = s.get("non_mig_n_stijl", 0)
|
|
|
|
lines.append(
|
|
f"| {y} | {fmt(r_val)} | {fmt(p_val, 6)} | {int(n_val)} | All |"
|
|
)
|
|
if not np.isnan(r_mig_val):
|
|
lines.append(
|
|
f"| | {fmt(r_mig_val)} | {fmt(p_mig_val, 6)} | {int(n_mig_val)} | Migration |"
|
|
)
|
|
if not np.isnan(r_nm_val):
|
|
lines.append(
|
|
f"| | {fmt(r_nm_val)} | {fmt(p_nm_val, 6)} | {int(n_nm_val)} | Non-migration |"
|
|
)
|
|
|
|
lines += [
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 6. Correlation Change Pre vs Post 2024",
|
|
"",
|
|
corr_text,
|
|
"",
|
|
"A significant change in the per-year stijl-material correlation would suggest",
|
|
"that the relationship between the two dimensions itself shifted across the break period —",
|
|
"e.g., if right-wing parties post-2024 began moderating style while maintaining material",
|
|
"impact, the correlation would decrease.",
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 7. Gap Trajectory Interpretation",
|
|
"",
|
|
f"- **Pre-2024 mean gap:** {fmt(pre_gap)}",
|
|
f"- **Post-2024 mean gap:** {fmt(post_gap)}",
|
|
f"- **Gap change:** {fmt(post_gap - pre_gap if not np.isnan(pre_gap) and not np.isnan(post_gap) else float('nan'))}",
|
|
"",
|
|
"A widening gap (increasing material > stylistic) would indicate that right-wing motions",
|
|
"became less stylistically extreme but maintained or increased their material impact —",
|
|
"consistent with the 'strategic moderation of rhetoric' hypothesis.",
|
|
"",
|
|
"A narrowing gap would suggest that stylistic and material dimensions are converging,",
|
|
"meaning the distinctions between the two become less meaningful over time.",
|
|
"",
|
|
"A stable gap suggests the two dimensions move in parallel, and the flat single-dimension",
|
|
"trend is an accurate summary (no masked divergence).",
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 8. Domain Stratification",
|
|
"",
|
|
"| Domain | Pre Mean Stijl | Pre Mean Mat | Post Mean Stijl | Post Mean Mat | Pre Gap | Post Gap | Pre r | Post r |",
|
|
"|--------|---------------|-------------|----------------|---------------|---------|----------|-------|--------|",
|
|
]
|
|
|
|
for domain_name, prefix in [("Migration", "mig_"), ("Non-migration", "non_mig_")]:
|
|
def _nanmean(vals):
|
|
valid = [v for v in vals if not np.isnan(v)]
|
|
return float(np.mean(valid)) if valid else float("nan")
|
|
pre_s = _nanmean([summary[y].get(f"{prefix}mean_stijl", float("nan")) for y in pre_years])
|
|
pre_m = _nanmean([summary[y].get(f"{prefix}mean_materieel", float("nan")) for y in pre_years])
|
|
post_s = _nanmean([summary[y].get(f"{prefix}mean_stijl", float("nan")) for y in post_years])
|
|
post_m = _nanmean([summary[y].get(f"{prefix}mean_materieel", float("nan")) for y in post_years])
|
|
pre_g = pre_m - pre_s if not np.isnan(pre_s) and not np.isnan(pre_m) else float("nan")
|
|
post_g = post_m - post_s if not np.isnan(post_s) and not np.isnan(post_m) else float("nan")
|
|
|
|
pre_r_list = [summary[y].get(f"r_{prefix}stijl_mat", float("nan")) for y in pre_years]
|
|
post_r_list = [summary[y].get(f"r_{prefix}stijl_mat", float("nan")) for y in post_years]
|
|
pre_r_mean = np.nanmean(pre_r_list) if any(not np.isnan(v) for v in pre_r_list) else float("nan")
|
|
post_r_mean = np.nanmean(post_r_list) if any(not np.isnan(v) for v in post_r_list) else float("nan")
|
|
|
|
lines.append(
|
|
f"| {domain_name} | {fmt(pre_s)} | {fmt(pre_m)} | {fmt(post_s)} | {fmt(post_m)} | "
|
|
f"{fmt(pre_g)} | {fmt(post_g)} | {fmt(pre_r_mean)} | {fmt(post_r_mean)} |"
|
|
)
|
|
|
|
lines += [
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 9. Gravity-Weighted Trends (Right-Wing)",
|
|
"",
|
|
"Yearly means for right-wing motions filtered by material impact thresholds.",
|
|
"M≥3 = motions with substantive material impact (score ≥ 3).",
|
|
"M≥4 = motions with fundamental material impact (score ≥ 4).",
|
|
"",
|
|
"| Year | N (all RW) | M≥3 N | M≥4 N | Stijl (all) | Stijl M≥3 | Stijl M≥4 | Mat (all) | Mat M≥3 | Mat M≥4 |",
|
|
"|------|-----------|-------|-------|-------------|-----------|-----------|-----------|---------|---------|",
|
|
]
|
|
|
|
for y in years:
|
|
s = summary[y]
|
|
lines.append(
|
|
f"| {y} "
|
|
f"| {int(s.get('n_stijl', 0))} "
|
|
f"| {int(s.get('ge3_n_stijl', 0))} "
|
|
f"| {int(s.get('ge4_n_stijl', 0))} "
|
|
f"| {fmt(s.get('mean_stijl'))} "
|
|
f"| {fmt(s.get('ge3_mean_stijl'))} "
|
|
f"| {fmt(s.get('ge4_mean_stijl'))} "
|
|
f"| {fmt(s.get('mean_materieel'))} "
|
|
f"| {fmt(s.get('ge3_mean_materieel'))} "
|
|
f"| {fmt(s.get('ge4_mean_materieel'))} |"
|
|
)
|
|
|
|
lines += [
|
|
"",
|
|
"| Bucket | Pre-2024 Mean Stijl | Pre-2024 Mean Mat | Post-2024 Mean Stijl | Post-2024 Mean Mat |",
|
|
"|--------|-------------------|-------------------|---------------------|-------------------|",
|
|
f"| All RW | {fmt(pre_stijl)} | {fmt(pre_mat)} | {fmt(post_stijl)} | {fmt(post_mat)} |",
|
|
f"| M≥3 | {fmt(pre_ge3_stijl)} | {fmt(pre_ge3_mat)} | {fmt(post_ge3_stijl)} | {fmt(post_ge3_mat)} |",
|
|
f"| M≥4 | {fmt(pre_ge4_stijl)} | {fmt(pre_ge4_mat)} | {fmt(post_ge4_stijl)} | {fmt(post_ge4_mat)} |",
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 10. Figure",
|
|
"",
|
|
f".name})",
|
|
"",
|
|
"**Figure panels:**",
|
|
"- **Top panel:** Yearly mean stylistic (red) and material (blue) extremity scores with",
|
|
" 95% bootstrap confidence intervals. Grey dashed line = original single-dimension",
|
|
" `text_score` for comparison. Gold/orange lines show material impact for M≥3 and M≥4 subsets.",
|
|
"- **Second panel:** Gap trajectory (material minus stylistic) for all domains, migration,",
|
|
" and non-migration. Positive gap = material impact exceeds stylistic extremity.",
|
|
" A widening gap indicates increasing divergence between dimensions.",
|
|
"- **Third panel:** Per-year Pearson correlation between stylistic and material scores.",
|
|
" Declining correlation over time suggests the two dimensions are decoupling.",
|
|
"- **Fourth panel:** All-motion (dashed) vs right-wing (solid) comparison for both stylistic",
|
|
" and material dimensions. Shows how right-wing trends compare to the full motion landscape.",
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 11. Limitations",
|
|
"",
|
|
"- **Yearly resolution:** Year-level aggregation necessarily smooths within-year trends.",
|
|
" The quarterly framework from U1 provides finer resolution for other metrics.",
|
|
"- **Low-N years:** Some years (especially 2016-2018 and 2026) have fewer than 50 scored",
|
|
" motions, reducing confidence in those yearly means.",
|
|
"- **2D scores are LLM-generated:** The `stijl_extremiteit` and `materiele_impact` scores",
|
|
" come from LLM-based assessment and may contain systematic biases.",
|
|
"- **Correlation vs causation:** Per-year correlations describe association, not causation.",
|
|
" A declining correlation could reflect scoring drift rather than genuine decoupling.",
|
|
"- **Domain imbalance:** Migration-domain motions are a minority of all right-wing motions,",
|
|
" so domain-stratified analyses have lower statistical power.",
|
|
"",
|
|
"---",
|
|
"",
|
|
"## 12. All-Motion Comparison",
|
|
"",
|
|
"Yearly means for ALL motions (from `extremity_scores_all`) compared to right-wing-only means.",
|
|
"This provides context for whether right-wing trends reflect party-specific dynamics or broader",
|
|
"parliamentary trends.",
|
|
"",
|
|
"| Year | N (all) | All Stijl | All Mat | N (RW) | RW Stijl | RW Mat | Diff Stijl | Diff Mat |",
|
|
"|------|---------|-----------|---------|--------|----------|--------|------------|----------|",
|
|
]
|
|
|
|
for y in years:
|
|
s = summary[y]
|
|
a = all_summary.get(y, {})
|
|
all_n = int(a.get("n_stijl", 0))
|
|
all_s = fmt(a.get("mean_stijl"))
|
|
all_m = fmt(a.get("mean_materieel"))
|
|
rw_n = int(s.get("n_stijl", 0))
|
|
rw_s = fmt(s.get("mean_stijl"))
|
|
rw_m = fmt(s.get("mean_materieel"))
|
|
diff_s = fmt(s.get("mean_stijl", float("nan")) - a.get("mean_stijl", float("nan")) if not np.isnan(s.get("mean_stijl", float("nan"))) and not np.isnan(a.get("mean_stijl", float("nan"))) else float("nan"))
|
|
diff_m = fmt(s.get("mean_materieel", float("nan")) - a.get("mean_materieel", float("nan")) if not np.isnan(s.get("mean_materieel", float("nan"))) and not np.isnan(a.get("mean_materieel", float("nan"))) else float("nan"))
|
|
lines.append(
|
|
f"| {y} | {all_n} | {all_s} | {all_m} | {rw_n} | {rw_s} | {rw_m} | {diff_s} | {diff_m} |"
|
|
)
|
|
|
|
# Pre/post for all-motion
|
|
lines += [
|
|
"",
|
|
"| Period | All Stijl | All Mat | RW Stijl | RW Mat | Stijl Δ | Mat Δ |",
|
|
"|--------|-----------|---------|----------|--------|---------|-------|",
|
|
f"| Pre-2024 | {fmt(pre_all_stijl)} | {fmt(pre_all_mat)} | {fmt(pre_stijl)} | {fmt(pre_mat)} | {fmt(pre_stijl - pre_all_stijl if not np.isnan(pre_stijl) and not np.isnan(pre_all_stijl) else float('nan'))} | {fmt(pre_mat - pre_all_mat if not np.isnan(pre_mat) and not np.isnan(pre_all_mat) else float('nan'))} |",
|
|
f"| Post-2024 | {fmt(post_all_stijl)} | {fmt(post_all_mat)} | {fmt(post_stijl)} | {fmt(post_mat)} | {fmt(post_stijl - post_all_stijl if not np.isnan(post_stijl) and not np.isnan(post_all_stijl) else float('nan'))} | {fmt(post_mat - post_all_mat if not np.isnan(post_mat) and not np.isnan(post_all_mat) else float('nan'))} |",
|
|
"",
|
|
]
|
|
|
|
lines += [
|
|
"---",
|
|
"",
|
|
"## 13. Conclusion",
|
|
"",
|
|
f"The overall stijl-materieel correlation is r={fmt(overall_r)} (p={fmt(overall_p, 6)}),",
|
|
"consistent with the aggregate finding of r≈0.47.",
|
|
"",
|
|
f"The divergence test ({divergence.get('test', 'N/A')}) "
|
|
f"{'found' if divergence.get('p_value', 1) is not None and not np.isnan(divergence.get('p_value', float('nan'))) and divergence.get('p_value', 1) < 0.05 else 'did not find'} "
|
|
f"significant systematic divergence between stylistic and material yearly means "
|
|
f"(p={fmt(divergence.get('p_value', float('nan')))}).",
|
|
"",
|
|
f"The pre/post correlation change analysis {temporal_corr.get('correlation_change', 'could not be performed').lower()}.",
|
|
"",
|
|
f"The gap (material minus stylistic) {'widened' if not np.isnan(post_gap) and not np.isnan(pre_gap) and post_gap > pre_gap else 'narrowed'} "
|
|
f"from {fmt(pre_gap)} pre-2024 to {fmt(post_gap)} post-2024.",
|
|
]
|
|
|
|
report_path = REPORTS_DIR / "extremity_2d_temporal.md"
|
|
with open(report_path, "w") as f:
|
|
f.write("\n".join(lines))
|
|
logger.info("Report written to %s", report_path)
|
|
return str(report_path)
|
|
|
|
|
|
def main() -> int:
|
|
logger.info("Connecting to database: %s", DB_PATH)
|
|
con = duckdb.connect(DB_PATH, read_only=True)
|
|
|
|
logger.info("Fetching 2D extremity data by year...")
|
|
yearly = fetch_2d_yearly_data(con)
|
|
|
|
total_motions = sum(len(yearly[y]["stijl"]) for y in yearly)
|
|
logger.info("Fetched %d scored motions across %d years", total_motions, len(yearly))
|
|
|
|
logger.info("Fetching all-motion extremity data...")
|
|
all_yearly = fetch_all_motion_yearly(con)
|
|
|
|
con.close()
|
|
|
|
logger.info("Computing yearly summary statistics...")
|
|
summary = compute_yearly_summary(yearly)
|
|
|
|
logger.info("Computing all-motion yearly summary...")
|
|
all_summary = compute_all_motion_summary(all_yearly)
|
|
|
|
logger.info("Running divergence test (Wilcoxon)...")
|
|
divergence = compute_divergence_test(yearly)
|
|
|
|
logger.info("Computing temporal correlation changes...")
|
|
temporal_corr = compute_temporal_correlations(summary)
|
|
|
|
logger.info("Generating figure...")
|
|
fig_path = create_figure(summary, all_summary)
|
|
|
|
logger.info("Generating report...")
|
|
report_path = generate_report(summary, divergence, temporal_corr, yearly, fig_path, all_summary)
|
|
|
|
print(f"\nReport: {report_path}")
|
|
print(f"Figure: {fig_path}")
|
|
print(f"\nDivergence test: {divergence.get('conclusion', 'N/A')}")
|
|
print(f"Temporal correlation: {temporal_corr.get('correlation_change', 'N/A')}")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|
|
|