motief/analysis/right_wing/mechanism_classification.py

#!/usr/bin/env python3
"""Systematic mechanism classification of right-wing motions.

Classifies a stratified sample of 200 motions across 10 mechanism types
to validate the consensus framing hypothesis. Performs chi-squared tests
and generates a markdown report.

Usage:
    uv run python analysis/right_wing/mechanism_classification.py
    uv run python analysis/right_wing/mechanism_classification.py --n-pre-high 25 --n-pre-low 25 --n-post-high 75 --n-post-low 75
"""

from __future__ import annotations

import argparse
import json
import sys
from collections import Counter
from pathlib import Path
from typing import Any

import duckdb
import numpy as np
from scipy.stats import chi2_contingency

ROOT = Path(__file__).parent.parent.parent.resolve()
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))


# ── mechanism taxonomy ───────────────────────────────────────────────────────

MECHANISMS = [
    "consensus_framing",
    "institutional_rule_of_law",
    "welfare_service_expansion",
    "procedural_technical",
    "local_constituency",
    "coalition_alignment",
    "symbolic_declaratory",
    "targeted_restriction",
    "system_dismantling",
    "crisis_response",
]

MECHANISM_LABELS_NL = {
    "consensus_framing": "Consensus framing (gedeeld belang)",
    "institutional_rule_of_law": "Institutioneel/rechtsstatelijk",
    "welfare_service_expansion": "Welzijn/dienstverlening uitbreiding",
    "procedural_technical": "Procedureel/technisch",
    "local_constituency": "Lokaal/regionaal",
    "coalition_alignment": "Coalitie-afstemming",
    "symbolic_declaratory": "Symbolisch/declaratoir",
    "targeted_restriction": "Gerichte restrictie",
    "system_dismantling": "Systeemontmanteling",
    "crisis_response": "Crisisrespons",
}


# ── inline classifications (subagent-classified) ─────────────────────────────
# Classification key: motion_id -> mechanism
# Classified by reading full title + body_text of each motion.

CLASSIFICATIONS: dict[int, str] = {
    # === PRE_HIGH (25 motions, pre-2024, centrist_support_strict > 0.5) ===
    15458: "crisis_response",           # corona tax deferral/bureaucracy
    26477: "institutional_rule_of_law",  # Israel SOFA treaty ratification
    9149: "consensus_framing",           # arming MQ-9 Reaper (shared defense)
    17099: "procedural_technical",       # Brexit transition law amendment
    4933: "procedural_technical",        # soil amendment to Environment Act
    17751: "consensus_framing",          # zero baseline regulatory burden
    20068: "procedural_technical",       # baseline measurement manure policy
    16520: "consensus_framing",          # Dutch agriculture global leadership
    17036: "welfare_service_expansion",  # defense work guarantee scheme
    17681: "consensus_framing",          # simplify car taxation
    14554: "procedural_technical",       # tourism cooperation quartermaster
    21864: "procedural_technical",       # adapt manure processing definition
    26493: "targeted_restriction",       # crackdown on asylum seeker nuisance
    21982: "consensus_framing",          # MKB regulatory burden reduction
    14125: "crisis_response",            # minimize corona tax bureaucracy
    13683: "welfare_service_expansion",  # GLB influence on farmer income
    16691: "procedural_technical",       # wild boar population management
    15005: "procedural_technical",       # periodic franchise consultation body
    17536: "institutional_rule_of_law",  # tackle hate preachers across Schengen
    16999: "consensus_framing",          # prevent unfair steel competition
    8325: "procedural_technical",        # defense materiel budget amendment
    13370: "welfare_service_expansion",  # PGB equal position amendment
    18030: "procedural_technical",       # highway lighting at night
    11382: "procedural_technical",       # amendment removing generic exemption
    18616: "procedural_technical",       # VAT e-commerce implementation law

    # === PRE_LOW (25 motions, pre-2024, centrist_support_strict <= 0.5) ===
    12411: "crisis_response",            # temporary nitrogen threshold for housing
    22595: "crisis_response",            # shopping by appointment during lockdown
    15772: "system_dismantling",         # prevent pension cuts (challenge ECB rate)
    7111: "welfare_service_expansion",   # max support for fishing sector
    25784: "targeted_restriction",       # keep coal plants open until nuclear ready
    27731: "system_dismantling",         # BOR tax amendment (dismantle tax change)
    15626: "crisis_response",            # corona kickstart economy scenarios
    20215: "welfare_service_expansion",  # protect high-quality farmland
    16430: "symbolic_declaratory",       # don't send 45bn to southern EU states
    25982: "local_constituency",         # prevent cold sanition shrimp fishery
    17176: "targeted_restriction",       # criminalize illegal residence
    7054: "procedural_technical",        # stacking effect of housing market measures
    20323: "procedural_technical",       # optical recognition for catch registration
    18025: "system_dismantling",         # halt curriculum revision PO/VO
    14837: "system_dismantling",         # nature policy without nitrogen fixation
    19620: "targeted_restriction",       # natural gas-free housing never mandatory
    21801: "consensus_framing",          # embrace Defense Vision 2035
    19464: "crisis_response",            # keep terraces open during EK football
    26855: "targeted_restriction",       # limit immigration inflow
    22280: "local_constituency",         # farmer costs for societal tasks
    20115: "symbolic_declaratory",       # defend national veto rights in EU
    15082: "targeted_restriction",       # no residency permits for delayed procedures
    6637: "targeted_restriction",        # protect welfare state via asylum stop
    18691: "symbolic_declaratory",       # no extra troops to Afghanistan
    18062: "crisis_response",            # apologies for care home corona deaths

    # === POST_HIGH (75 motions, post-2024, centrist_support_strict > 0.5) ===
    3784: "procedural_technical",        # healthcare fraud info sharing
    10205: "procedural_technical",       # defense materiel fund budget 2025
    10278: "coalition_alignment",        # budget amendment covering OCW package
    25079: "consensus_framing",          # EU nitrogen standards for industry
    2980: "targeted_restriction",        # designate NL as under migration pressure
    10420: "crisis_response",            # citizen resilience / preparedness info
    25092: "targeted_restriction",       # Ukrainian displaced persons pay care costs
    25545: "institutional_rule_of_law",  # legal basis for housing corp data
    23065: "procedural_technical",       # Justice & Security budget 2024
    2878: "welfare_service_expansion",   # index Wbso tax scheme for R&D
    25573: "procedural_technical",       # efficient spending nature subsidies
    3298: "symbolic_declaratory",        # support Gaza peace plan
    25061: "consensus_framing",          # simplify RI&E obligations for SMEs
    4481: "consensus_framing",           # acquire control points (geo-)economic policy
    3961: "procedural_technical",        # nuclear fleet & synergy study
    473: "institutional_rule_of_law",    # recover UvA riot damages from demonstrators
    10413: "consensus_framing",          # max legal room for drone training
    974: "procedural_technical",         # WLC norm impact on housing ambition
    24009: "procedural_technical",       # scientific basis for spray zones
    9789: "institutional_rule_of_law",   # use temporary law on terrorism measures
    24651: "targeted_restriction",       # slow labor migration via top summit
    1890: "local_constituency",          # Groningen/Noord-Drenthe success stories
    1191: "consensus_framing",           # prioritize safety in Station Agenda
    3448: "targeted_restriction",        # reserve nitrogen space for PAS melders
    23910: "institutional_rule_of_law",  # legal options vs antisemitic organizations
    25566: "welfare_service_expansion",  # childminder childcare allowance fix
    2070: "targeted_restriction",        # return plan vs uncooperative countries
    23885: "consensus_framing",          # pension funds focus on purchasing power
    24906: "procedural_technical",       # repair technical omissions Succession Act
    2496: "procedural_technical",        # satellite launch capacity Netherlands
    25582: "targeted_restriction",       # stricter asylum permit withdrawal
    3053: "local_constituency",          # safety campus Assen development
    1495: "procedural_technical",        # risk-based foreign funding oversight
    10178: "procedural_technical",       # Economic Affairs budget 2025
    1614: "procedural_technical",        # nuclear sector training needs inventory
    23441: "consensus_framing",          # redirect equal opportunity budget to quality
    3569: "consensus_framing",           # infrastructure investment counted as NATO
    10285: "procedural_technical",       # States General budget 2025
    23058: "procedural_technical",       # OCW budget 2024
    3287: "procedural_technical",        # inform parliament on humanitarian spending
    10434: "consensus_framing",          # integral future-proof media system
    10089: "procedural_technical",       # Asylum & Migration budget 2025
    22706: "consensus_framing",          # entrepreneur accord process
    3877: "institutional_rule_of_law",   # safety of converted asylum seekers
    25062: "consensus_framing",          # workable hazardous substances for SMEs
    3687: "targeted_restriction",        # EVRM interpretation protocol for asylum
    25166: "procedural_technical",       # detection dogs in prisons
    4618: "procedural_technical",        # Housing budget amendment
    3468: "institutional_rule_of_law",   # expand riot police weapons/defense
    24632: "institutional_rule_of_law",  # police access fatbike menu for enforcement
    25451: "symbolic_declaratory",       # calculate Palestine Authority pay-to-slay
    2351: "targeted_restriction",        # max 1yr prison for undesired declaration
    4227: "consensus_framing",           # Nijkerk bridge as strategic infrastructure
    22853: "consensus_framing",          # accelerate North Sea gas extraction
    9884: "procedural_technical",        # innovation contribution to emission reduction
    1428: "consensus_framing",           # liberalize trade with Canada/Mexico
    3629: "symbolic_declaratory",        # modernize UN Refugee Convention
    1572: "local_constituency",          # wolf attack impact mapping
    25493: "procedural_technical",       # defense materiel fund budget amendment
    1359: "procedural_technical",        # firework ban damage compensation estimate
    2252: "procedural_technical",        # municipal fund budget amendment
    23605: "procedural_technical",       # PAS melders legal verification process
    3760: "consensus_framing",           # Defense Readiness Act submission
    1005: "consensus_framing",           # EU import tariffs to support entrepreneurs
    10110: "coalition_alignment",        # budget amendment covering OCW package
    23301: "consensus_framing",          # international tendering military projects
    24046: "symbolic_declaratory",       # abstain from WHA accord (pandemic treaty)
    651: "welfare_service_expansion",    # agri nature management for Natuurnetwerk
    1491: "targeted_restriction",        # max wolf population Netherlands
    25606: "targeted_restriction",       # prevent wolf habituation to humans
    313: "procedural_technical",         # temporarily drop pre-filled tax return
    24008: "consensus_framing",          # EU approval frameworks for green agents
    754: "targeted_restriction",         # expel third-country nationals from Ukraine
    25469: "targeted_restriction",       # EU return hubs for asylum seekers
    25091: "targeted_restriction",       # stop asylum if travel to home country

    # === POST_LOW (75 motions, post-2024, centrist_support_strict <= 0.5) ===
    2170: "institutional_rule_of_law",   # prison renovation budget amendment
    22792: "procedural_technical",       # investigate French espionage at Saab
    10597: "institutional_rule_of_law",  # remove third observer from preventive search
    23013: "institutional_rule_of_law",  # antisemitism combating work plan budget
    3472: "institutional_rule_of_law",   # minimum sentences for violence vs aid workers
    2014: "system_dismantling",          # limit asylum appeals to single instance
    920: "procedural_technical",         # transitional facility real estate box 3
    2143: "welfare_service_expansion",   # campaign working in healthcare
    688: "system_dismantling",           # reject Tromsø Convention accession
    2290: "system_dismantling",          # repeal municipal asylum task law
    4497: "targeted_restriction",        # stop funding terrorist organizations
    3823: "symbolic_declaratory",        # child attachment not against family return
    23141: "institutional_rule_of_law",  # deploy KMar for domestic security
    4436: "institutional_rule_of_law",   # standard aggravated sentence for aid worker violence
    25616: "targeted_restriction",       # scrap municipal status holder housing task
    2662: "institutional_rule_of_law",   # prevent NL germline modification tech export
    23287: "institutional_rule_of_law",  # community service ban for violence vs police
    4660: "consensus_framing",           # defense cooperation with Israel
    4761: "targeted_restriction",        # denaturalization and forced remigration
    2264: "institutional_rule_of_law",   # recover UvA demo damages from perpetrators
    4394: "institutional_rule_of_law",   # beanbag air-pressure weapon for police pilot
    1691: "targeted_restriction",        # no penal orders for criminal asylum seekers
    10601: "targeted_restriction",       # ban NGOs in human smuggling chain
    4089: "targeted_restriction",        # deny entry to Al-Hol camp persons
    23206: "procedural_technical",        # map NATO defense product leakage
    22676: "institutional_rule_of_law",  # offensive vs porn industry abuses
    115: "system_dismantling",           # oppose EU 90% emission reduction target
    3951: "consensus_framing",           # nuclear energy in CO2-low energy mix post-COP30
    1375: "targeted_restriction",        # enforce status holder housing priority ban
    3090: "targeted_restriction",        # ban Muslim Brotherhood in Netherlands
    24650: "procedural_technical",       # cash acceptance obligation for small payments
    1772: "consensus_framing",           # legislation for top-10 business climate
    3678: "system_dismantling",          # total asylum stop and family reunification stop
    1692: "institutional_rule_of_law",   # remove penal orders for serious crimes
    24077: "symbolic_declaratory",       # investigate Fatah role in Oct 7 attack
    349: "institutional_rule_of_law",    # increased penalty for organ removal/sexual exploitation
    9769: "targeted_restriction",        # return Syrians to rebuild their country
    4656: "symbolic_declaratory",        # no Ukraine NATO accession
    23984: "system_dismantling",         # don't raise eco-regulation requirements
    2168: "institutional_rule_of_law",   # prison budget for JeugdzorgPlus takeover
    4443: "institutional_rule_of_law",   # 200% sentence increase for violence vs public servants
    4489: "procedural_technical",        # fishing disturbance impact on scoter
    10290: "targeted_restriction",       # concrete migration project for JBZ Council
    4071: "targeted_restriction",        # investigate housing fraud by status holders
    4088: "targeted_restriction",        # agreements with third countries on asylum
    1507: "system_dismantling",          # empirical nature data as alternative to KDW
    2870: "procedural_technical",        # FGR transitional law amendment
    1912: "system_dismantling",          # repeal Spreidingswet
    22658: "symbolic_declaratory",       # no Dutch troops to Ukraine
    10288: "targeted_restriction",       # prepare Syrian return plan
    4080: "institutional_rule_of_law",   # research heavier forced re-education
    1847: "targeted_restriction",        # return hub for hopeless asylum seekers
    23127: "system_dismantling",         # restore 120/130 km/h speed limit
    4367: "targeted_restriction",        # no relaxation of EU accession for Ukraine
    9790: "targeted_restriction",        # no cooperation with IS returnees
    4150: "procedural_technical",        # fishing net selectivity/safety research
    741: "targeted_restriction",         # blue card minimum salary 1.3x average
    1705: "consensus_framing",           # reduce regulatory burden for industry
    1831: "consensus_framing",           # precautionary principle proportionality
    10600: "targeted_restriction",       # ban NGOs active in migrant smuggling
    9767: "targeted_restriction",        # no compulsory asylum reception in distribution decision
    3830: "system_dismantling",          # stop patronizing policy toward adults
    4221: "system_dismantling",          # overhead norm for public broadcasting
    3354: "institutional_rule_of_law",   # raise 3D-printed firearms max penalty
    9977: "symbolic_declaratory",        # oppose abolishing EU veto right
    898: "consensus_framing",            # simplify Omnibus and CSDDD
    24848: "system_dismantling",         # repeal Spreidingswet ASAP
    756: "targeted_restriction",         # temporary stop on family reunification
    24358: "institutional_rule_of_law",  # increase prison capacity via earlier lockup
    4309: "institutional_rule_of_law",   # targeted demographic policy for enforcement
    10167: "local_constituency",         # pilot projects for crayfish control
    23633: "procedural_technical",       # adjust parliament bell ringing
    23030: "targeted_restriction",       # no compulsory asylum places in distribution
    1959: "system_dismantling",          # no ban on plastic-containing wet wipes
    23454: "procedural_technical",        # legal analysis of pension transition risks
}


# ── sampling ─────────────────────────────────────────────────────────────────

# Deterministic sample: 200 motions used for inline classification.
# Motion IDs fixed to enable reproducible classification results.
DETERMINISTIC_SAMPLE_IDS = {
    "pre_high": [4933, 8325, 9149, 11382, 13370, 13683, 14125, 14554, 15005, 15458, 16520, 16691, 16999, 17036, 17099, 17536, 17681, 17751, 18030, 18616, 20068, 21864, 21982, 26477, 26493],
    "pre_low": [6637, 7054, 7111, 12411, 14837, 15082, 15626, 15772, 16430, 17176, 18025, 18062, 18691, 19464, 19620, 20115, 20215, 20323, 21801, 22280, 22595, 25784, 25982, 26855, 27731],
    "post_high": [313, 473, 651, 754, 974, 1005, 1191, 1359, 1428, 1491, 1495, 1572, 1614, 1890, 2070, 2252, 2351, 2496, 2878, 2980, 3053, 3287, 3298, 3448, 3468, 3569, 3629, 3687, 3760, 3784, 3877, 3961, 4227, 4481, 4618, 9789, 9884, 10089, 10110, 10178, 10205, 10278, 10285, 10413, 10420, 10434, 22706, 22853, 23058, 23065, 23301, 23441, 23605, 23885, 23910, 24008, 24009, 24046, 24632, 24651, 24906, 25061, 25062, 25079, 25091, 25092, 25166, 25451, 25469, 25493, 25545, 25566, 25573, 25582, 25606],
    "post_low": [115, 349, 688, 741, 756, 898, 920, 1375, 1507, 1691, 1692, 1705, 1772, 1831, 1847, 1912, 1959, 2014, 2143, 2168, 2170, 2264, 2290, 2662, 2870, 3090, 3354, 3472, 3678, 3823, 3830, 3951, 4071, 4080, 4088, 4089, 4150, 4221, 4309, 4367, 4394, 4436, 4443, 4489, 4497, 4656, 4660, 4761, 9767, 9769, 9790, 9977, 10167, 10288, 10290, 10597, 10600, 10601, 22658, 22676, 22792, 23013, 23030, 23127, 23141, 23206, 23287, 23454, 23633, 23984, 24077, 24358, 24650, 24848, 25616],
}


def sample_motions(
    db_path: str,
    n_pre_high: int = 25,
    n_pre_low: int = 25,
    n_post_high: int = 75,
    n_post_low: int = 75,
    seed: int = 42,
) -> list[dict[str, Any]]:
    """Deterministic sample of right_wing_motions JOIN motions using known IDs."""
    all_ids = []
    stratum_map = {}
    for stratum, ids in DETERMINISTIC_SAMPLE_IDS.items():
        for mid in ids:
            all_ids.append(mid)
            stratum_map[mid] = stratum

    con = duckdb.connect(db_path)
    try:
        placeholders = ",".join("?" for _ in all_ids)
        rows = con.execute(
            f"""
            SELECT r.motion_id, m.title, m.body_text, r.year, r.centrist_support_strict
            FROM right_wing_motions r
            JOIN motions m ON r.motion_id = m.id
            WHERE r.motion_id IN ({placeholders})
            ORDER BY r.motion_id
            """,
            all_ids,
        ).fetchall()

        return [
            {
                "motion_id": r[0],
                "title": r[1] or "",
                "body_text": r[2] or "",
                "year": r[3],
                "centrist_support_strict": r[4],
                "stratum": stratum_map.get(r[0], "unknown"),
            }
            for r in rows
        ]
    finally:
        con.close()


# ── analysis ─────────────────────────────────────────────────────────────────

def compute_distribution(
    sample: list[dict[str, Any]],
    classifications: dict[int, str],
) -> dict[str, Any]:
    """Compute mechanism distribution by period and support level."""
    # Build distribution table
    groups: dict[str, Counter[str]] = {
        "pre_high": Counter(),
        "pre_low": Counter(),
        "post_high": Counter(),
        "post_low": Counter(),
    }

    classified = 0
    unclassified = 0
    for motion in sample:
        mid = motion["motion_id"]
        stratum = motion["stratum"]
        mechanism = classifications.get(mid)
        if mechanism and mechanism in MECHANISMS:
            groups[stratum][mechanism] += 1
            classified += 1
        else:
            unclassified += 1
            groups[stratum]["unclassified"] = groups[stratum].get("unclassified", 0) + 1  # type: ignore[index]

    # Build contingency table for chi-squared: period × mechanism
    # Consolidate: pre = pre_high + pre_low, post = post_high + post_low
    pre_counts = groups["pre_high"] + groups["pre_low"]
    post_counts = groups["post_high"] + groups["post_low"]

    # Contingency table: rows=mechanisms, cols=[pre, post]
    contingency_pre_post = []
    row_labels = []
    for mech in MECHANISMS:
        row = [pre_counts.get(mech, 0), post_counts.get(mech, 0)]
        if sum(row) > 0:
            contingency_pre_post.append(row)
            row_labels.append(mech)

    chi2_result = None
    if len(contingency_pre_post) >= 2:
        arr = np.array(contingency_pre_post)
        # Only include rows/cols with sufficient data
        if arr.sum() > 0 and arr.shape[0] >= 2 and arr.shape[1] >= 2:
            try:
                chi2, pval, dof, expected = chi2_contingency(arr)
                chi2_result = {
                    "chi2": float(chi2),
                    "p_value": float(pval),
                    "dof": int(dof),
                    "significant": bool(pval < 0.05),
                }
            except ValueError:
                chi2_result = {"error": "Invalid contingency table"}

    # High vs low support within post-2024 only
    post_high_counts = groups["post_high"]
    post_low_counts = groups["post_low"]
    contingency_hl = []
    hl_labels = []
    for mech in MECHANISMS:
        row = [post_high_counts.get(mech, 0), post_low_counts.get(mech, 0)]
        if sum(row) > 0:
            contingency_hl.append(row)
            hl_labels.append(mech)

    chi2_hl_result = None
    if len(contingency_hl) >= 2:
        arr_hl = np.array(contingency_hl)
        if arr_hl.sum() > 0 and arr_hl.shape[0] >= 2 and arr_hl.shape[1] >= 2:
            try:
                chi2, pval, dof, expected = chi2_contingency(arr_hl)
                chi2_hl_result = {
                    "chi2": float(chi2),
                    "p_value": float(pval),
                    "dof": int(dof),
                    "significant": bool(pval < 0.05),
                }
            except ValueError:
                chi2_hl_result = {"error": "Invalid contingency table"}

    # Specific test: consensus_framing in post_high vs post_low
    cf_post_high = post_high_counts.get("consensus_framing", 0)
    cf_post_low = post_low_counts.get("consensus_framing", 0)
    total_post_high = sum(post_high_counts.values())
    total_post_low = sum(post_low_counts.values())
    cf_ratio_high = cf_post_high / total_post_high if total_post_high else 0
    cf_ratio_low = cf_post_low / total_post_low if total_post_low else 0

    # Fisher-style 2x2 for consensus_framing in post: high vs low
    non_cf_post_high = total_post_high - cf_post_high
    non_cf_post_low = total_post_low - cf_post_low
    cf_2x2 = np.array([[cf_post_high, non_cf_post_high], [cf_post_low, non_cf_post_low]])
    cf_chi2_result = None
    if cf_2x2.min() >= 0:
        try:
            chi2, pval, dof, _ = chi2_contingency(cf_2x2)
            cf_chi2_result = {
                "chi2": float(chi2),
                "p_value": float(pval),
                "dof": int(dof),
                "significant": bool(pval < 0.05),
                "cf_ratio_high": round(cf_ratio_high, 4),
                "cf_ratio_low": round(cf_ratio_low, 4),
                "cf_count_high": cf_post_high,
                "cf_count_low": cf_post_low,
                "total_high": total_post_high,
                "total_low": total_post_low,
            }
        except ValueError:
            cf_chi2_result = {"error": "Invalid 2x2 table"}

    # Pre vs post consensus framing
    cf_pre = pre_counts.get("consensus_framing", 0)
    cf_post = post_counts.get("consensus_framing", 0)
    total_pre = sum(pre_counts.values())
    total_post = sum(post_counts.values())

    return {
        "sample_size": len(sample),
        "classified": classified,
        "unclassified": unclassified,
        "distribution": {s: dict(g.most_common()) for s, g in groups.items()},
        "mechanism_totals_pre": dict(pre_counts.most_common()),
        "mechanism_totals_post": dict(post_counts.most_common()),
        "chi2_pre_vs_post": chi2_result,
        "chi2_post_high_vs_low": chi2_hl_result,
        "consensus_framing_test": cf_chi2_result,
        "cf_pre_post": {
            "cf_pre": cf_pre,
            "cf_post": cf_post,
            "total_pre": total_pre,
            "total_post": total_post,
            "ratio_pre": round(cf_pre / total_pre, 4) if total_pre else 0,
            "ratio_post": round(cf_post / total_post, 4) if total_post else 0,
        },
    }


# ── report generation ────────────────────────────────────────────────────────

def generate_report(results: dict[str, Any], output_path: str) -> None:
    """Generate mechanism classification markdown report."""
    dist = results["distribution"]
    cf_test = results["consensus_framing_test"]
    cf_pp = results["cf_pre_post"]

    lines = [
        "# Mechanism Classification Report",
        "",
        f"**Sample:** {results['sample_size']} motions (stratified: 50 pre-2024, 150 post-2024)",
        f"**Classified:** {results['classified']} motions | **Unclassified:** {results['unclassified']}",
        "",
        "## 1. Mechanism Distribution by Group",
        "",
        "### Pre-2024, High Centrist Support (CS > 0.5)",
        "",
        "| Mechanism | Count | Pct |",
        "|-----------|-------|-----|",
    ]

    pre_high = dist.get("pre_high", {})
    pre_high_total = sum(pre_high.values())
    for mech in MECHANISMS:
        cnt = pre_high.get(mech, 0)
        pct = f"{cnt / pre_high_total * 100:.1f}%" if pre_high_total else "0%"
        label = MECHANISM_LABELS_NL.get(mech, mech)
        lines.append(f"| {label} | {cnt} | {pct} |")
    lines.append(f"| **Total** | **{pre_high_total}** | **100%** |")

    lines.extend([
        "",
        "### Pre-2024, Low Centrist Support (CS <= 0.5)",
        "",
        "| Mechanism | Count | Pct |",
        "|-----------|-------|-----|",
    ])
    pre_low = dist.get("pre_low", {})
    pre_low_total = sum(pre_low.values())
    for mech in MECHANISMS:
        cnt = pre_low.get(mech, 0)
        pct = f"{cnt / pre_low_total * 100:.1f}%" if pre_low_total else "0%"
        label = MECHANISM_LABELS_NL.get(mech, mech)
        lines.append(f"| {label} | {cnt} | {pct} |")
    lines.append(f"| **Total** | **{pre_low_total}** | **100%** |")

    lines.extend([
        "",
        "### Post-2024, High Centrist Support (CS > 0.5)",
        "",
        "| Mechanism | Count | Pct |",
        "|-----------|-------|-----|",
    ])
    post_high = dist.get("post_high", {})
    post_high_total = sum(post_high.values())
    for mech in MECHANISMS:
        cnt = post_high.get(mech, 0)
        pct = f"{cnt / post_high_total * 100:.1f}%" if post_high_total else "0%"
        label = MECHANISM_LABELS_NL.get(mech, mech)
        lines.append(f"| {label} | {cnt} | {pct} |")
    lines.append(f"| **Total** | **{post_high_total}** | **100%** |")

    lines.extend([
        "",
        "### Post-2024, Low Centrist Support (CS <= 0.5)",
        "",
        "| Mechanism | Count | Pct |",
        "|-----------|-------|-----|",
    ])
    post_low = dist.get("post_low", {})
    post_low_total = sum(post_low.values())
    for mech in MECHANISMS:
        cnt = post_low.get(mech, 0)
        pct = f"{cnt / post_low_total * 100:.1f}%" if post_low_total else "0%"
        label = MECHANISM_LABELS_NL.get(mech, mech)
        lines.append(f"| {label} | {cnt} | {pct} |")
    lines.append(f"| **Total** | **{post_low_total}** | **100%** |")

    # Summary: Pre vs Post
    lines.extend([
        "",
        "## 2. Consolidated Pre vs Post-2024 Distribution",
        "",
        "| Mechanism | Pre-2024 | Pct Pre | Post-2024 | Pct Post |",
        "|-----------|----------|---------|-----------|----------|",
    ])
    pre_cons = results["mechanism_totals_pre"]
    post_cons = results["mechanism_totals_post"]
    pre_total = sum(pre_cons.values())
    post_total = sum(post_cons.values())
    for mech in MECHANISMS:
        pre_cnt = pre_cons.get(mech, 0)
        post_cnt = post_cons.get(mech, 0)
        pre_pct = f"{pre_cnt / pre_total * 100:.1f}%" if pre_total else "0%"
        post_pct = f"{post_cnt / post_total * 100:.1f}%" if post_total else "0%"
        label = MECHANISM_LABELS_NL.get(mech, mech)
        lines.append(f"| {label} | {pre_cnt} | {pre_pct} | {post_cnt} | {post_pct} |")
    lines.append(f"| **Total** | **{pre_total}** | **100%** | **{post_total}** | **100%** |")

    # Consensus framing focus
    lines.extend([
        "",
        "## 3. Consensus Framing Hypothesis Test",
        "",
        f"**H0:** Consensus framing is equally common in high-support and low-support post-2024 motions.",
        f"**H1:** Consensus framing is significantly more common in high-support post-2024 motions.",
        "",
    ])
    if cf_test and "error" not in cf_test:
        lines.append(f"- Consensus framing in post-2024 HIGH: {cf_test['cf_count_high']}/{cf_test['total_high']} ({cf_test['cf_ratio_high']:.1%})")
        lines.append(f"- Consensus framing in post-2024 LOW: {cf_test['cf_count_low']}/{cf_test['total_low']} ({cf_test['cf_ratio_low']:.1%})")
        lines.append(f"- χ²(1) = {cf_test['chi2']:.3f}, p = {cf_test['p_value']:.4f}")
        if cf_test["significant"]:
            lines.append(f"- **Result: Significant difference (p < 0.05). Consensus framing IS more common in high-support post-2024 motions.**")
        else:
            lines.append(f"- **Result: Not significant (p >= 0.05). Cannot reject the null.**")
    else:
        lines.append("- Consensus framing test could not be performed (insufficient data).")

    lines.extend([
        "",
        f"- Consensus framing pre-2024: {cf_pp['cf_pre']}/{cf_pp['total_pre']} ({cf_pp['ratio_pre']:.1%})",
        f"- Consensus framing post-2024: {cf_pp['cf_post']}/{cf_pp['total_post']} ({cf_pp['ratio_post']:.1%})",
    ])

    # Chi-squared tests
    chi2_all = results["chi2_pre_vs_post"]
    if chi2_all and "error" not in chi2_all:
        lines.extend([
            "",
            "## 4. Chi-Squared Test: Period × Mechanism",
            "",
            f"- χ²({chi2_all['dof']}) = {chi2_all['chi2']:.3f}, p = {chi2_all['p_value']:.4f}",
            f"- {'Significant' if chi2_all['significant'] else 'Not significant'} difference in mechanism distribution between pre and post-2024.",
        ])

    chi2_hl = results["chi2_post_high_vs_low"]
    if chi2_hl and "error" not in chi2_hl:
        lines.extend([
            "",
            "## 5. Chi-Squared Test: Support Level × Mechanism (Post-2024)",
            "",
            f"- χ²({chi2_hl['dof']}) = {chi2_hl['chi2']:.3f}, p = {chi2_hl['p_value']:.4f}",
            f"- {'Significant' if chi2_hl['significant'] else 'Not significant'} difference in mechanism distribution between high and low support post-2024 motions.",
        ])

    lines.extend([
        "",
        "## 6. Key Findings",
        "",
    ])

    # Compute and report key findings
    # Which mechanisms dominate in high-support post-2024?
    post_high_sorted = sorted(post_high.items(), key=lambda x: x[1], reverse=True)
    post_low_sorted = sorted(post_low.items(), key=lambda x: x[1], reverse=True)

    lines.append("### Top 3 mechanisms in post-2024 HIGH-support motions:")
    for mech, cnt in post_high_sorted[:3]:
        label = MECHANISM_LABELS_NL.get(mech, mech)
        pct = cnt / post_high_total * 100
        lines.append(f"- {label}: {cnt} ({pct:.1f}%)")

    lines.append("")
    lines.append("### Top 3 mechanisms in post-2024 LOW-support motions:")
    for mech, cnt in post_low_sorted[:3]:
        label = MECHANISM_LABELS_NL.get(mech, mech)
        pct = cnt / post_low_total * 100
        lines.append(f"- {label}: {cnt} ({pct:.1f}%)")

    # Shift analysis
    lines.extend([
        "",
        "### Mechanism shifts from pre to post-2024",
        "",
        "| Mechanism | Pre Pct | Post Pct | Δ |",
        "|-----------|---------|----------|---|",
    ])
    for mech in MECHANISMS:
        pre_cnt = pre_cons.get(mech, 0)
        post_cnt = post_cons.get(mech, 0)
        pre_pct = pre_cnt / pre_total * 100 if pre_total else 0
        post_pct = post_cnt / post_total * 100 if post_total else 0
        delta = post_pct - pre_pct
        label = MECHANISM_LABELS_NL.get(mech, mech)
        lines.append(f"| {label} | {pre_pct:.1f}% | {post_pct:.1f}% | {delta:+.1f}% |")

    lines.extend([
        "",
        "## 7. Conclusion",
        "",
    ])

    # Interpretation
    cf_consensus = ""
    if cf_test and "error" not in cf_test:
        if cf_test["significant"] and cf_test["cf_ratio_high"] > cf_test["cf_ratio_low"]:
            cf_consensus = (
                f"The consensus framing hypothesis **is supported**: consensus framing motions "
                f"are {cf_test['cf_ratio_high']:.1%} of high-support post-2024 motions vs "
                f"{cf_test['cf_ratio_low']:.1%} of low-support post-2024 motions "
                f"(χ² = {cf_test['chi2']:.3f}, p = {cf_test['p_value']:.4f})."
            )
        else:
            cf_consensus = (
                f"The consensus framing hypothesis **is not supported**: no significant difference "
                f"between high ({cf_test['cf_ratio_high']:.1%}) and low ({cf_test['cf_ratio_low']:.1%}) "
                f"support post-2024 motions (p = {cf_test['p_value']:.4f})."
            )

    lines.append(cf_consensus)
    lines.append("")
    lines.append("### Limitations")
    lines.append("- Sample: 200 motions (50 pre, 150 post) — may not capture rare mechanisms")
    lines.append("- Single-classifier: all motions classified by one subagent (inline), no inter-rater validation")
    lines.append("- Binary support threshold: CS > 0.5 vs <= 0.5 may oversimplify the support spectrum")
    lines.append("- Mechanism assignment: single primary mechanism per motion; some motions span multiple categories")

    # Write output
    out_path = Path(output_path)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
    print(f"Report written to {out_path}")


# ── main ─────────────────────────────────────────────────────────────────────

def main() -> int:
    parser = argparse.ArgumentParser(description="Systematic mechanism classification")
    parser.add_argument("--db", default="data/motions.db", help="Path to DuckDB database")
    parser.add_argument("--n-pre-high", type=int, default=25)
    parser.add_argument("--n-pre-low", type=int, default=25)
    parser.add_argument("--n-post-high", type=int, default=75)
    parser.add_argument("--n-post-low", type=int, default=75)
    parser.add_argument("--seed", type=int, default=42)
    parser.add_argument("--output", default="reports/overton_window/mechanism_classification.md")
    parser.add_argument("--save-classifications", help="Save classifications JSON to path")
    args = parser.parse_args()

    # Sample motions
    sample = sample_motions(
        db_path=args.db,
        n_pre_high=args.n_pre_high,
        n_pre_low=args.n_pre_low,
        n_post_high=args.n_post_high,
        n_post_low=args.n_post_low,
        seed=args.seed,
    )
    print(f"Sampled {len(sample)} motions")

    # Optional: save classifications mapping
    if args.save_classifications:
        class_path = Path(args.save_classifications)
        class_path.parent.mkdir(parents=True, exist_ok=True)
        class_path.write_text(json.dumps(CLASSIFICATIONS, indent=2, ensure_ascii=False), encoding="utf-8")
        print(f"Classifications saved to {class_path}")

    # Compute distribution
    results = compute_distribution(sample, CLASSIFICATIONS)
    print(f"Classified: {results['classified']}, Unclassified: {results['unclassified']}")

    # Generate report
    generate_report(results, args.output)

    # Print summary to stdout
    cf_test = results["consensus_framing_test"]
    if cf_test and "error" not in cf_test:
        print(f"\nConsensus Framing Test:")
        print(f"  Post-2024 HIGH: {cf_test['cf_count_high']}/{cf_test['total_high']} = {cf_test['cf_ratio_high']:.1%}")
        print(f"  Post-2024 LOW:  {cf_test['cf_count_low']}/{cf_test['total_low']} = {cf_test['cf_ratio_low']:.1%}")
        print(f"  χ² = {cf_test['chi2']:.3f}, p = {cf_test['p_value']:.4f} ({'SIGNIFICANT' if cf_test['significant'] else 'NOT significant'})")

    return 0


if __name__ == "__main__":
    raise SystemExit(main())