You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
751 lines
38 KiB
751 lines
38 KiB
#!/usr/bin/env python3
|
|
"""Systematic mechanism classification of right-wing motions.
|
|
|
|
Classifies a stratified sample of 200 motions across 10 mechanism types
|
|
to validate the consensus framing hypothesis. Performs chi-squared tests
|
|
and generates a markdown report.
|
|
|
|
Usage:
|
|
uv run python analysis/right_wing/mechanism_classification.py
|
|
uv run python analysis/right_wing/mechanism_classification.py --n-pre-high 25 --n-pre-low 25 --n-post-high 75 --n-post-low 75
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from collections import Counter
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import duckdb
|
|
import numpy as np
|
|
from scipy.stats import chi2_contingency
|
|
|
|
ROOT = Path(__file__).parent.parent.parent.resolve()
|
|
if str(ROOT) not in sys.path:
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
|
|
# ── mechanism taxonomy ───────────────────────────────────────────────────────
|
|
|
|
MECHANISMS = [
|
|
"consensus_framing",
|
|
"institutional_rule_of_law",
|
|
"welfare_service_expansion",
|
|
"procedural_technical",
|
|
"local_constituency",
|
|
"coalition_alignment",
|
|
"symbolic_declaratory",
|
|
"targeted_restriction",
|
|
"system_dismantling",
|
|
"crisis_response",
|
|
]
|
|
|
|
MECHANISM_LABELS_NL = {
|
|
"consensus_framing": "Consensus framing (gedeeld belang)",
|
|
"institutional_rule_of_law": "Institutioneel/rechtsstatelijk",
|
|
"welfare_service_expansion": "Welzijn/dienstverlening uitbreiding",
|
|
"procedural_technical": "Procedureel/technisch",
|
|
"local_constituency": "Lokaal/regionaal",
|
|
"coalition_alignment": "Coalitie-afstemming",
|
|
"symbolic_declaratory": "Symbolisch/declaratoir",
|
|
"targeted_restriction": "Gerichte restrictie",
|
|
"system_dismantling": "Systeemontmanteling",
|
|
"crisis_response": "Crisisrespons",
|
|
}
|
|
|
|
|
|
# ── inline classifications (subagent-classified) ─────────────────────────────
|
|
# Classification key: motion_id -> mechanism
|
|
# Classified by reading full title + body_text of each motion.
|
|
|
|
CLASSIFICATIONS: dict[int, str] = {
|
|
# === PRE_HIGH (25 motions, pre-2024, centrist_support_strict > 0.5) ===
|
|
15458: "crisis_response", # corona tax deferral/bureaucracy
|
|
26477: "institutional_rule_of_law", # Israel SOFA treaty ratification
|
|
9149: "consensus_framing", # arming MQ-9 Reaper (shared defense)
|
|
17099: "procedural_technical", # Brexit transition law amendment
|
|
4933: "procedural_technical", # soil amendment to Environment Act
|
|
17751: "consensus_framing", # zero baseline regulatory burden
|
|
20068: "procedural_technical", # baseline measurement manure policy
|
|
16520: "consensus_framing", # Dutch agriculture global leadership
|
|
17036: "welfare_service_expansion", # defense work guarantee scheme
|
|
17681: "consensus_framing", # simplify car taxation
|
|
14554: "procedural_technical", # tourism cooperation quartermaster
|
|
21864: "procedural_technical", # adapt manure processing definition
|
|
26493: "targeted_restriction", # crackdown on asylum seeker nuisance
|
|
21982: "consensus_framing", # MKB regulatory burden reduction
|
|
14125: "crisis_response", # minimize corona tax bureaucracy
|
|
13683: "welfare_service_expansion", # GLB influence on farmer income
|
|
16691: "procedural_technical", # wild boar population management
|
|
15005: "procedural_technical", # periodic franchise consultation body
|
|
17536: "institutional_rule_of_law", # tackle hate preachers across Schengen
|
|
16999: "consensus_framing", # prevent unfair steel competition
|
|
8325: "procedural_technical", # defense materiel budget amendment
|
|
13370: "welfare_service_expansion", # PGB equal position amendment
|
|
18030: "procedural_technical", # highway lighting at night
|
|
11382: "procedural_technical", # amendment removing generic exemption
|
|
18616: "procedural_technical", # VAT e-commerce implementation law
|
|
|
|
# === PRE_LOW (25 motions, pre-2024, centrist_support_strict <= 0.5) ===
|
|
12411: "crisis_response", # temporary nitrogen threshold for housing
|
|
22595: "crisis_response", # shopping by appointment during lockdown
|
|
15772: "system_dismantling", # prevent pension cuts (challenge ECB rate)
|
|
7111: "welfare_service_expansion", # max support for fishing sector
|
|
25784: "targeted_restriction", # keep coal plants open until nuclear ready
|
|
27731: "system_dismantling", # BOR tax amendment (dismantle tax change)
|
|
15626: "crisis_response", # corona kickstart economy scenarios
|
|
20215: "welfare_service_expansion", # protect high-quality farmland
|
|
16430: "symbolic_declaratory", # don't send 45bn to southern EU states
|
|
25982: "local_constituency", # prevent cold sanition shrimp fishery
|
|
17176: "targeted_restriction", # criminalize illegal residence
|
|
7054: "procedural_technical", # stacking effect of housing market measures
|
|
20323: "procedural_technical", # optical recognition for catch registration
|
|
18025: "system_dismantling", # halt curriculum revision PO/VO
|
|
14837: "system_dismantling", # nature policy without nitrogen fixation
|
|
19620: "targeted_restriction", # natural gas-free housing never mandatory
|
|
21801: "consensus_framing", # embrace Defense Vision 2035
|
|
19464: "crisis_response", # keep terraces open during EK football
|
|
26855: "targeted_restriction", # limit immigration inflow
|
|
22280: "local_constituency", # farmer costs for societal tasks
|
|
20115: "symbolic_declaratory", # defend national veto rights in EU
|
|
15082: "targeted_restriction", # no residency permits for delayed procedures
|
|
6637: "targeted_restriction", # protect welfare state via asylum stop
|
|
18691: "symbolic_declaratory", # no extra troops to Afghanistan
|
|
18062: "crisis_response", # apologies for care home corona deaths
|
|
|
|
# === POST_HIGH (75 motions, post-2024, centrist_support_strict > 0.5) ===
|
|
3784: "procedural_technical", # healthcare fraud info sharing
|
|
10205: "procedural_technical", # defense materiel fund budget 2025
|
|
10278: "coalition_alignment", # budget amendment covering OCW package
|
|
25079: "consensus_framing", # EU nitrogen standards for industry
|
|
2980: "targeted_restriction", # designate NL as under migration pressure
|
|
10420: "crisis_response", # citizen resilience / preparedness info
|
|
25092: "targeted_restriction", # Ukrainian displaced persons pay care costs
|
|
25545: "institutional_rule_of_law", # legal basis for housing corp data
|
|
23065: "procedural_technical", # Justice & Security budget 2024
|
|
2878: "welfare_service_expansion", # index Wbso tax scheme for R&D
|
|
25573: "procedural_technical", # efficient spending nature subsidies
|
|
3298: "symbolic_declaratory", # support Gaza peace plan
|
|
25061: "consensus_framing", # simplify RI&E obligations for SMEs
|
|
4481: "consensus_framing", # acquire control points (geo-)economic policy
|
|
3961: "procedural_technical", # nuclear fleet & synergy study
|
|
473: "institutional_rule_of_law", # recover UvA riot damages from demonstrators
|
|
10413: "consensus_framing", # max legal room for drone training
|
|
974: "procedural_technical", # WLC norm impact on housing ambition
|
|
24009: "procedural_technical", # scientific basis for spray zones
|
|
9789: "institutional_rule_of_law", # use temporary law on terrorism measures
|
|
24651: "targeted_restriction", # slow labor migration via top summit
|
|
1890: "local_constituency", # Groningen/Noord-Drenthe success stories
|
|
1191: "consensus_framing", # prioritize safety in Station Agenda
|
|
3448: "targeted_restriction", # reserve nitrogen space for PAS melders
|
|
23910: "institutional_rule_of_law", # legal options vs antisemitic organizations
|
|
25566: "welfare_service_expansion", # childminder childcare allowance fix
|
|
2070: "targeted_restriction", # return plan vs uncooperative countries
|
|
23885: "consensus_framing", # pension funds focus on purchasing power
|
|
24906: "procedural_technical", # repair technical omissions Succession Act
|
|
2496: "procedural_technical", # satellite launch capacity Netherlands
|
|
25582: "targeted_restriction", # stricter asylum permit withdrawal
|
|
3053: "local_constituency", # safety campus Assen development
|
|
1495: "procedural_technical", # risk-based foreign funding oversight
|
|
10178: "procedural_technical", # Economic Affairs budget 2025
|
|
1614: "procedural_technical", # nuclear sector training needs inventory
|
|
23441: "consensus_framing", # redirect equal opportunity budget to quality
|
|
3569: "consensus_framing", # infrastructure investment counted as NATO
|
|
10285: "procedural_technical", # States General budget 2025
|
|
23058: "procedural_technical", # OCW budget 2024
|
|
3287: "procedural_technical", # inform parliament on humanitarian spending
|
|
10434: "consensus_framing", # integral future-proof media system
|
|
10089: "procedural_technical", # Asylum & Migration budget 2025
|
|
22706: "consensus_framing", # entrepreneur accord process
|
|
3877: "institutional_rule_of_law", # safety of converted asylum seekers
|
|
25062: "consensus_framing", # workable hazardous substances for SMEs
|
|
3687: "targeted_restriction", # EVRM interpretation protocol for asylum
|
|
25166: "procedural_technical", # detection dogs in prisons
|
|
4618: "procedural_technical", # Housing budget amendment
|
|
3468: "institutional_rule_of_law", # expand riot police weapons/defense
|
|
24632: "institutional_rule_of_law", # police access fatbike menu for enforcement
|
|
25451: "symbolic_declaratory", # calculate Palestine Authority pay-to-slay
|
|
2351: "targeted_restriction", # max 1yr prison for undesired declaration
|
|
4227: "consensus_framing", # Nijkerk bridge as strategic infrastructure
|
|
22853: "consensus_framing", # accelerate North Sea gas extraction
|
|
9884: "procedural_technical", # innovation contribution to emission reduction
|
|
1428: "consensus_framing", # liberalize trade with Canada/Mexico
|
|
3629: "symbolic_declaratory", # modernize UN Refugee Convention
|
|
1572: "local_constituency", # wolf attack impact mapping
|
|
25493: "procedural_technical", # defense materiel fund budget amendment
|
|
1359: "procedural_technical", # firework ban damage compensation estimate
|
|
2252: "procedural_technical", # municipal fund budget amendment
|
|
23605: "procedural_technical", # PAS melders legal verification process
|
|
3760: "consensus_framing", # Defense Readiness Act submission
|
|
1005: "consensus_framing", # EU import tariffs to support entrepreneurs
|
|
10110: "coalition_alignment", # budget amendment covering OCW package
|
|
23301: "consensus_framing", # international tendering military projects
|
|
24046: "symbolic_declaratory", # abstain from WHA accord (pandemic treaty)
|
|
651: "welfare_service_expansion", # agri nature management for Natuurnetwerk
|
|
1491: "targeted_restriction", # max wolf population Netherlands
|
|
25606: "targeted_restriction", # prevent wolf habituation to humans
|
|
313: "procedural_technical", # temporarily drop pre-filled tax return
|
|
24008: "consensus_framing", # EU approval frameworks for green agents
|
|
754: "targeted_restriction", # expel third-country nationals from Ukraine
|
|
25469: "targeted_restriction", # EU return hubs for asylum seekers
|
|
25091: "targeted_restriction", # stop asylum if travel to home country
|
|
|
|
# === POST_LOW (75 motions, post-2024, centrist_support_strict <= 0.5) ===
|
|
2170: "institutional_rule_of_law", # prison renovation budget amendment
|
|
22792: "procedural_technical", # investigate French espionage at Saab
|
|
10597: "institutional_rule_of_law", # remove third observer from preventive search
|
|
23013: "institutional_rule_of_law", # antisemitism combating work plan budget
|
|
3472: "institutional_rule_of_law", # minimum sentences for violence vs aid workers
|
|
2014: "system_dismantling", # limit asylum appeals to single instance
|
|
920: "procedural_technical", # transitional facility real estate box 3
|
|
2143: "welfare_service_expansion", # campaign working in healthcare
|
|
688: "system_dismantling", # reject Tromsø Convention accession
|
|
2290: "system_dismantling", # repeal municipal asylum task law
|
|
4497: "targeted_restriction", # stop funding terrorist organizations
|
|
3823: "symbolic_declaratory", # child attachment not against family return
|
|
23141: "institutional_rule_of_law", # deploy KMar for domestic security
|
|
4436: "institutional_rule_of_law", # standard aggravated sentence for aid worker violence
|
|
25616: "targeted_restriction", # scrap municipal status holder housing task
|
|
2662: "institutional_rule_of_law", # prevent NL germline modification tech export
|
|
23287: "institutional_rule_of_law", # community service ban for violence vs police
|
|
4660: "consensus_framing", # defense cooperation with Israel
|
|
4761: "targeted_restriction", # denaturalization and forced remigration
|
|
2264: "institutional_rule_of_law", # recover UvA demo damages from perpetrators
|
|
4394: "institutional_rule_of_law", # beanbag air-pressure weapon for police pilot
|
|
1691: "targeted_restriction", # no penal orders for criminal asylum seekers
|
|
10601: "targeted_restriction", # ban NGOs in human smuggling chain
|
|
4089: "targeted_restriction", # deny entry to Al-Hol camp persons
|
|
23206: "procedural_technical", # map NATO defense product leakage
|
|
22676: "institutional_rule_of_law", # offensive vs porn industry abuses
|
|
115: "system_dismantling", # oppose EU 90% emission reduction target
|
|
3951: "consensus_framing", # nuclear energy in CO2-low energy mix post-COP30
|
|
1375: "targeted_restriction", # enforce status holder housing priority ban
|
|
3090: "targeted_restriction", # ban Muslim Brotherhood in Netherlands
|
|
24650: "procedural_technical", # cash acceptance obligation for small payments
|
|
1772: "consensus_framing", # legislation for top-10 business climate
|
|
3678: "system_dismantling", # total asylum stop and family reunification stop
|
|
1692: "institutional_rule_of_law", # remove penal orders for serious crimes
|
|
24077: "symbolic_declaratory", # investigate Fatah role in Oct 7 attack
|
|
349: "institutional_rule_of_law", # increased penalty for organ removal/sexual exploitation
|
|
9769: "targeted_restriction", # return Syrians to rebuild their country
|
|
4656: "symbolic_declaratory", # no Ukraine NATO accession
|
|
23984: "system_dismantling", # don't raise eco-regulation requirements
|
|
2168: "institutional_rule_of_law", # prison budget for JeugdzorgPlus takeover
|
|
4443: "institutional_rule_of_law", # 200% sentence increase for violence vs public servants
|
|
4489: "procedural_technical", # fishing disturbance impact on scoter
|
|
10290: "targeted_restriction", # concrete migration project for JBZ Council
|
|
4071: "targeted_restriction", # investigate housing fraud by status holders
|
|
4088: "targeted_restriction", # agreements with third countries on asylum
|
|
1507: "system_dismantling", # empirical nature data as alternative to KDW
|
|
2870: "procedural_technical", # FGR transitional law amendment
|
|
1912: "system_dismantling", # repeal Spreidingswet
|
|
22658: "symbolic_declaratory", # no Dutch troops to Ukraine
|
|
10288: "targeted_restriction", # prepare Syrian return plan
|
|
4080: "institutional_rule_of_law", # research heavier forced re-education
|
|
1847: "targeted_restriction", # return hub for hopeless asylum seekers
|
|
23127: "system_dismantling", # restore 120/130 km/h speed limit
|
|
4367: "targeted_restriction", # no relaxation of EU accession for Ukraine
|
|
9790: "targeted_restriction", # no cooperation with IS returnees
|
|
4150: "procedural_technical", # fishing net selectivity/safety research
|
|
741: "targeted_restriction", # blue card minimum salary 1.3x average
|
|
1705: "consensus_framing", # reduce regulatory burden for industry
|
|
1831: "consensus_framing", # precautionary principle proportionality
|
|
10600: "targeted_restriction", # ban NGOs active in migrant smuggling
|
|
9767: "targeted_restriction", # no compulsory asylum reception in distribution decision
|
|
3830: "system_dismantling", # stop patronizing policy toward adults
|
|
4221: "system_dismantling", # overhead norm for public broadcasting
|
|
3354: "institutional_rule_of_law", # raise 3D-printed firearms max penalty
|
|
9977: "symbolic_declaratory", # oppose abolishing EU veto right
|
|
898: "consensus_framing", # simplify Omnibus and CSDDD
|
|
24848: "system_dismantling", # repeal Spreidingswet ASAP
|
|
756: "targeted_restriction", # temporary stop on family reunification
|
|
24358: "institutional_rule_of_law", # increase prison capacity via earlier lockup
|
|
4309: "institutional_rule_of_law", # targeted demographic policy for enforcement
|
|
10167: "local_constituency", # pilot projects for crayfish control
|
|
23633: "procedural_technical", # adjust parliament bell ringing
|
|
23030: "targeted_restriction", # no compulsory asylum places in distribution
|
|
1959: "system_dismantling", # no ban on plastic-containing wet wipes
|
|
23454: "procedural_technical", # legal analysis of pension transition risks
|
|
}
|
|
|
|
|
|
# ── sampling ─────────────────────────────────────────────────────────────────
|
|
|
|
# Deterministic sample: 200 motions used for inline classification.
|
|
# Motion IDs fixed to enable reproducible classification results.
|
|
DETERMINISTIC_SAMPLE_IDS = {
|
|
"pre_high": [4933, 8325, 9149, 11382, 13370, 13683, 14125, 14554, 15005, 15458, 16520, 16691, 16999, 17036, 17099, 17536, 17681, 17751, 18030, 18616, 20068, 21864, 21982, 26477, 26493],
|
|
"pre_low": [6637, 7054, 7111, 12411, 14837, 15082, 15626, 15772, 16430, 17176, 18025, 18062, 18691, 19464, 19620, 20115, 20215, 20323, 21801, 22280, 22595, 25784, 25982, 26855, 27731],
|
|
"post_high": [313, 473, 651, 754, 974, 1005, 1191, 1359, 1428, 1491, 1495, 1572, 1614, 1890, 2070, 2252, 2351, 2496, 2878, 2980, 3053, 3287, 3298, 3448, 3468, 3569, 3629, 3687, 3760, 3784, 3877, 3961, 4227, 4481, 4618, 9789, 9884, 10089, 10110, 10178, 10205, 10278, 10285, 10413, 10420, 10434, 22706, 22853, 23058, 23065, 23301, 23441, 23605, 23885, 23910, 24008, 24009, 24046, 24632, 24651, 24906, 25061, 25062, 25079, 25091, 25092, 25166, 25451, 25469, 25493, 25545, 25566, 25573, 25582, 25606],
|
|
"post_low": [115, 349, 688, 741, 756, 898, 920, 1375, 1507, 1691, 1692, 1705, 1772, 1831, 1847, 1912, 1959, 2014, 2143, 2168, 2170, 2264, 2290, 2662, 2870, 3090, 3354, 3472, 3678, 3823, 3830, 3951, 4071, 4080, 4088, 4089, 4150, 4221, 4309, 4367, 4394, 4436, 4443, 4489, 4497, 4656, 4660, 4761, 9767, 9769, 9790, 9977, 10167, 10288, 10290, 10597, 10600, 10601, 22658, 22676, 22792, 23013, 23030, 23127, 23141, 23206, 23287, 23454, 23633, 23984, 24077, 24358, 24650, 24848, 25616],
|
|
}
|
|
|
|
|
|
def sample_motions(
|
|
db_path: str,
|
|
n_pre_high: int = 25,
|
|
n_pre_low: int = 25,
|
|
n_post_high: int = 75,
|
|
n_post_low: int = 75,
|
|
seed: int = 42,
|
|
) -> list[dict[str, Any]]:
|
|
"""Deterministic sample of right_wing_motions JOIN motions using known IDs."""
|
|
all_ids = []
|
|
stratum_map = {}
|
|
for stratum, ids in DETERMINISTIC_SAMPLE_IDS.items():
|
|
for mid in ids:
|
|
all_ids.append(mid)
|
|
stratum_map[mid] = stratum
|
|
|
|
con = duckdb.connect(db_path)
|
|
try:
|
|
placeholders = ",".join("?" for _ in all_ids)
|
|
rows = con.execute(
|
|
f"""
|
|
SELECT r.motion_id, m.title, m.body_text, r.year, r.centrist_support_strict
|
|
FROM right_wing_motions r
|
|
JOIN motions m ON r.motion_id = m.id
|
|
WHERE r.motion_id IN ({placeholders})
|
|
ORDER BY r.motion_id
|
|
""",
|
|
all_ids,
|
|
).fetchall()
|
|
|
|
return [
|
|
{
|
|
"motion_id": r[0],
|
|
"title": r[1] or "",
|
|
"body_text": r[2] or "",
|
|
"year": r[3],
|
|
"centrist_support_strict": r[4],
|
|
"stratum": stratum_map.get(r[0], "unknown"),
|
|
}
|
|
for r in rows
|
|
]
|
|
finally:
|
|
con.close()
|
|
|
|
|
|
# ── analysis ─────────────────────────────────────────────────────────────────
|
|
|
|
def compute_distribution(
|
|
sample: list[dict[str, Any]],
|
|
classifications: dict[int, str],
|
|
) -> dict[str, Any]:
|
|
"""Compute mechanism distribution by period and support level."""
|
|
# Build distribution table
|
|
groups: dict[str, Counter[str]] = {
|
|
"pre_high": Counter(),
|
|
"pre_low": Counter(),
|
|
"post_high": Counter(),
|
|
"post_low": Counter(),
|
|
}
|
|
|
|
classified = 0
|
|
unclassified = 0
|
|
for motion in sample:
|
|
mid = motion["motion_id"]
|
|
stratum = motion["stratum"]
|
|
mechanism = classifications.get(mid)
|
|
if mechanism and mechanism in MECHANISMS:
|
|
groups[stratum][mechanism] += 1
|
|
classified += 1
|
|
else:
|
|
unclassified += 1
|
|
groups[stratum]["unclassified"] = groups[stratum].get("unclassified", 0) + 1 # type: ignore[index]
|
|
|
|
# Build contingency table for chi-squared: period × mechanism
|
|
# Consolidate: pre = pre_high + pre_low, post = post_high + post_low
|
|
pre_counts = groups["pre_high"] + groups["pre_low"]
|
|
post_counts = groups["post_high"] + groups["post_low"]
|
|
|
|
# Contingency table: rows=mechanisms, cols=[pre, post]
|
|
contingency_pre_post = []
|
|
row_labels = []
|
|
for mech in MECHANISMS:
|
|
row = [pre_counts.get(mech, 0), post_counts.get(mech, 0)]
|
|
if sum(row) > 0:
|
|
contingency_pre_post.append(row)
|
|
row_labels.append(mech)
|
|
|
|
chi2_result = None
|
|
if len(contingency_pre_post) >= 2:
|
|
arr = np.array(contingency_pre_post)
|
|
# Only include rows/cols with sufficient data
|
|
if arr.sum() > 0 and arr.shape[0] >= 2 and arr.shape[1] >= 2:
|
|
try:
|
|
chi2, pval, dof, expected = chi2_contingency(arr)
|
|
chi2_result = {
|
|
"chi2": float(chi2),
|
|
"p_value": float(pval),
|
|
"dof": int(dof),
|
|
"significant": bool(pval < 0.05),
|
|
}
|
|
except ValueError:
|
|
chi2_result = {"error": "Invalid contingency table"}
|
|
|
|
# High vs low support within post-2024 only
|
|
post_high_counts = groups["post_high"]
|
|
post_low_counts = groups["post_low"]
|
|
contingency_hl = []
|
|
hl_labels = []
|
|
for mech in MECHANISMS:
|
|
row = [post_high_counts.get(mech, 0), post_low_counts.get(mech, 0)]
|
|
if sum(row) > 0:
|
|
contingency_hl.append(row)
|
|
hl_labels.append(mech)
|
|
|
|
chi2_hl_result = None
|
|
if len(contingency_hl) >= 2:
|
|
arr_hl = np.array(contingency_hl)
|
|
if arr_hl.sum() > 0 and arr_hl.shape[0] >= 2 and arr_hl.shape[1] >= 2:
|
|
try:
|
|
chi2, pval, dof, expected = chi2_contingency(arr_hl)
|
|
chi2_hl_result = {
|
|
"chi2": float(chi2),
|
|
"p_value": float(pval),
|
|
"dof": int(dof),
|
|
"significant": bool(pval < 0.05),
|
|
}
|
|
except ValueError:
|
|
chi2_hl_result = {"error": "Invalid contingency table"}
|
|
|
|
# Specific test: consensus_framing in post_high vs post_low
|
|
cf_post_high = post_high_counts.get("consensus_framing", 0)
|
|
cf_post_low = post_low_counts.get("consensus_framing", 0)
|
|
total_post_high = sum(post_high_counts.values())
|
|
total_post_low = sum(post_low_counts.values())
|
|
cf_ratio_high = cf_post_high / total_post_high if total_post_high else 0
|
|
cf_ratio_low = cf_post_low / total_post_low if total_post_low else 0
|
|
|
|
# Fisher-style 2x2 for consensus_framing in post: high vs low
|
|
non_cf_post_high = total_post_high - cf_post_high
|
|
non_cf_post_low = total_post_low - cf_post_low
|
|
cf_2x2 = np.array([[cf_post_high, non_cf_post_high], [cf_post_low, non_cf_post_low]])
|
|
cf_chi2_result = None
|
|
if cf_2x2.min() >= 0:
|
|
try:
|
|
chi2, pval, dof, _ = chi2_contingency(cf_2x2)
|
|
cf_chi2_result = {
|
|
"chi2": float(chi2),
|
|
"p_value": float(pval),
|
|
"dof": int(dof),
|
|
"significant": bool(pval < 0.05),
|
|
"cf_ratio_high": round(cf_ratio_high, 4),
|
|
"cf_ratio_low": round(cf_ratio_low, 4),
|
|
"cf_count_high": cf_post_high,
|
|
"cf_count_low": cf_post_low,
|
|
"total_high": total_post_high,
|
|
"total_low": total_post_low,
|
|
}
|
|
except ValueError:
|
|
cf_chi2_result = {"error": "Invalid 2x2 table"}
|
|
|
|
# Pre vs post consensus framing
|
|
cf_pre = pre_counts.get("consensus_framing", 0)
|
|
cf_post = post_counts.get("consensus_framing", 0)
|
|
total_pre = sum(pre_counts.values())
|
|
total_post = sum(post_counts.values())
|
|
|
|
return {
|
|
"sample_size": len(sample),
|
|
"classified": classified,
|
|
"unclassified": unclassified,
|
|
"distribution": {s: dict(g.most_common()) for s, g in groups.items()},
|
|
"mechanism_totals_pre": dict(pre_counts.most_common()),
|
|
"mechanism_totals_post": dict(post_counts.most_common()),
|
|
"chi2_pre_vs_post": chi2_result,
|
|
"chi2_post_high_vs_low": chi2_hl_result,
|
|
"consensus_framing_test": cf_chi2_result,
|
|
"cf_pre_post": {
|
|
"cf_pre": cf_pre,
|
|
"cf_post": cf_post,
|
|
"total_pre": total_pre,
|
|
"total_post": total_post,
|
|
"ratio_pre": round(cf_pre / total_pre, 4) if total_pre else 0,
|
|
"ratio_post": round(cf_post / total_post, 4) if total_post else 0,
|
|
},
|
|
}
|
|
|
|
|
|
# ── report generation ────────────────────────────────────────────────────────
|
|
|
|
def generate_report(results: dict[str, Any], output_path: str) -> None:
|
|
"""Generate mechanism classification markdown report."""
|
|
dist = results["distribution"]
|
|
cf_test = results["consensus_framing_test"]
|
|
cf_pp = results["cf_pre_post"]
|
|
|
|
lines = [
|
|
"# Mechanism Classification Report",
|
|
"",
|
|
f"**Sample:** {results['sample_size']} motions (stratified: 50 pre-2024, 150 post-2024)",
|
|
f"**Classified:** {results['classified']} motions | **Unclassified:** {results['unclassified']}",
|
|
"",
|
|
"## 1. Mechanism Distribution by Group",
|
|
"",
|
|
"### Pre-2024, High Centrist Support (CS > 0.5)",
|
|
"",
|
|
"| Mechanism | Count | Pct |",
|
|
"|-----------|-------|-----|",
|
|
]
|
|
|
|
pre_high = dist.get("pre_high", {})
|
|
pre_high_total = sum(pre_high.values())
|
|
for mech in MECHANISMS:
|
|
cnt = pre_high.get(mech, 0)
|
|
pct = f"{cnt / pre_high_total * 100:.1f}%" if pre_high_total else "0%"
|
|
label = MECHANISM_LABELS_NL.get(mech, mech)
|
|
lines.append(f"| {label} | {cnt} | {pct} |")
|
|
lines.append(f"| **Total** | **{pre_high_total}** | **100%** |")
|
|
|
|
lines.extend([
|
|
"",
|
|
"### Pre-2024, Low Centrist Support (CS <= 0.5)",
|
|
"",
|
|
"| Mechanism | Count | Pct |",
|
|
"|-----------|-------|-----|",
|
|
])
|
|
pre_low = dist.get("pre_low", {})
|
|
pre_low_total = sum(pre_low.values())
|
|
for mech in MECHANISMS:
|
|
cnt = pre_low.get(mech, 0)
|
|
pct = f"{cnt / pre_low_total * 100:.1f}%" if pre_low_total else "0%"
|
|
label = MECHANISM_LABELS_NL.get(mech, mech)
|
|
lines.append(f"| {label} | {cnt} | {pct} |")
|
|
lines.append(f"| **Total** | **{pre_low_total}** | **100%** |")
|
|
|
|
lines.extend([
|
|
"",
|
|
"### Post-2024, High Centrist Support (CS > 0.5)",
|
|
"",
|
|
"| Mechanism | Count | Pct |",
|
|
"|-----------|-------|-----|",
|
|
])
|
|
post_high = dist.get("post_high", {})
|
|
post_high_total = sum(post_high.values())
|
|
for mech in MECHANISMS:
|
|
cnt = post_high.get(mech, 0)
|
|
pct = f"{cnt / post_high_total * 100:.1f}%" if post_high_total else "0%"
|
|
label = MECHANISM_LABELS_NL.get(mech, mech)
|
|
lines.append(f"| {label} | {cnt} | {pct} |")
|
|
lines.append(f"| **Total** | **{post_high_total}** | **100%** |")
|
|
|
|
lines.extend([
|
|
"",
|
|
"### Post-2024, Low Centrist Support (CS <= 0.5)",
|
|
"",
|
|
"| Mechanism | Count | Pct |",
|
|
"|-----------|-------|-----|",
|
|
])
|
|
post_low = dist.get("post_low", {})
|
|
post_low_total = sum(post_low.values())
|
|
for mech in MECHANISMS:
|
|
cnt = post_low.get(mech, 0)
|
|
pct = f"{cnt / post_low_total * 100:.1f}%" if post_low_total else "0%"
|
|
label = MECHANISM_LABELS_NL.get(mech, mech)
|
|
lines.append(f"| {label} | {cnt} | {pct} |")
|
|
lines.append(f"| **Total** | **{post_low_total}** | **100%** |")
|
|
|
|
# Summary: Pre vs Post
|
|
lines.extend([
|
|
"",
|
|
"## 2. Consolidated Pre vs Post-2024 Distribution",
|
|
"",
|
|
"| Mechanism | Pre-2024 | Pct Pre | Post-2024 | Pct Post |",
|
|
"|-----------|----------|---------|-----------|----------|",
|
|
])
|
|
pre_cons = results["mechanism_totals_pre"]
|
|
post_cons = results["mechanism_totals_post"]
|
|
pre_total = sum(pre_cons.values())
|
|
post_total = sum(post_cons.values())
|
|
for mech in MECHANISMS:
|
|
pre_cnt = pre_cons.get(mech, 0)
|
|
post_cnt = post_cons.get(mech, 0)
|
|
pre_pct = f"{pre_cnt / pre_total * 100:.1f}%" if pre_total else "0%"
|
|
post_pct = f"{post_cnt / post_total * 100:.1f}%" if post_total else "0%"
|
|
label = MECHANISM_LABELS_NL.get(mech, mech)
|
|
lines.append(f"| {label} | {pre_cnt} | {pre_pct} | {post_cnt} | {post_pct} |")
|
|
lines.append(f"| **Total** | **{pre_total}** | **100%** | **{post_total}** | **100%** |")
|
|
|
|
# Consensus framing focus
|
|
lines.extend([
|
|
"",
|
|
"## 3. Consensus Framing Hypothesis Test",
|
|
"",
|
|
f"**H0:** Consensus framing is equally common in high-support and low-support post-2024 motions.",
|
|
f"**H1:** Consensus framing is significantly more common in high-support post-2024 motions.",
|
|
"",
|
|
])
|
|
if cf_test and "error" not in cf_test:
|
|
lines.append(f"- Consensus framing in post-2024 HIGH: {cf_test['cf_count_high']}/{cf_test['total_high']} ({cf_test['cf_ratio_high']:.1%})")
|
|
lines.append(f"- Consensus framing in post-2024 LOW: {cf_test['cf_count_low']}/{cf_test['total_low']} ({cf_test['cf_ratio_low']:.1%})")
|
|
lines.append(f"- χ²(1) = {cf_test['chi2']:.3f}, p = {cf_test['p_value']:.4f}")
|
|
if cf_test["significant"]:
|
|
lines.append(f"- **Result: Significant difference (p < 0.05). Consensus framing IS more common in high-support post-2024 motions.**")
|
|
else:
|
|
lines.append(f"- **Result: Not significant (p >= 0.05). Cannot reject the null.**")
|
|
else:
|
|
lines.append("- Consensus framing test could not be performed (insufficient data).")
|
|
|
|
lines.extend([
|
|
"",
|
|
f"- Consensus framing pre-2024: {cf_pp['cf_pre']}/{cf_pp['total_pre']} ({cf_pp['ratio_pre']:.1%})",
|
|
f"- Consensus framing post-2024: {cf_pp['cf_post']}/{cf_pp['total_post']} ({cf_pp['ratio_post']:.1%})",
|
|
])
|
|
|
|
# Chi-squared tests
|
|
chi2_all = results["chi2_pre_vs_post"]
|
|
if chi2_all and "error" not in chi2_all:
|
|
lines.extend([
|
|
"",
|
|
"## 4. Chi-Squared Test: Period × Mechanism",
|
|
"",
|
|
f"- χ²({chi2_all['dof']}) = {chi2_all['chi2']:.3f}, p = {chi2_all['p_value']:.4f}",
|
|
f"- {'Significant' if chi2_all['significant'] else 'Not significant'} difference in mechanism distribution between pre and post-2024.",
|
|
])
|
|
|
|
chi2_hl = results["chi2_post_high_vs_low"]
|
|
if chi2_hl and "error" not in chi2_hl:
|
|
lines.extend([
|
|
"",
|
|
"## 5. Chi-Squared Test: Support Level × Mechanism (Post-2024)",
|
|
"",
|
|
f"- χ²({chi2_hl['dof']}) = {chi2_hl['chi2']:.3f}, p = {chi2_hl['p_value']:.4f}",
|
|
f"- {'Significant' if chi2_hl['significant'] else 'Not significant'} difference in mechanism distribution between high and low support post-2024 motions.",
|
|
])
|
|
|
|
lines.extend([
|
|
"",
|
|
"## 6. Key Findings",
|
|
"",
|
|
])
|
|
|
|
# Compute and report key findings
|
|
# Which mechanisms dominate in high-support post-2024?
|
|
post_high_sorted = sorted(post_high.items(), key=lambda x: x[1], reverse=True)
|
|
post_low_sorted = sorted(post_low.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
lines.append("### Top 3 mechanisms in post-2024 HIGH-support motions:")
|
|
for mech, cnt in post_high_sorted[:3]:
|
|
label = MECHANISM_LABELS_NL.get(mech, mech)
|
|
pct = cnt / post_high_total * 100
|
|
lines.append(f"- {label}: {cnt} ({pct:.1f}%)")
|
|
|
|
lines.append("")
|
|
lines.append("### Top 3 mechanisms in post-2024 LOW-support motions:")
|
|
for mech, cnt in post_low_sorted[:3]:
|
|
label = MECHANISM_LABELS_NL.get(mech, mech)
|
|
pct = cnt / post_low_total * 100
|
|
lines.append(f"- {label}: {cnt} ({pct:.1f}%)")
|
|
|
|
# Shift analysis
|
|
lines.extend([
|
|
"",
|
|
"### Mechanism shifts from pre to post-2024",
|
|
"",
|
|
"| Mechanism | Pre Pct | Post Pct | Δ |",
|
|
"|-----------|---------|----------|---|",
|
|
])
|
|
for mech in MECHANISMS:
|
|
pre_cnt = pre_cons.get(mech, 0)
|
|
post_cnt = post_cons.get(mech, 0)
|
|
pre_pct = pre_cnt / pre_total * 100 if pre_total else 0
|
|
post_pct = post_cnt / post_total * 100 if post_total else 0
|
|
delta = post_pct - pre_pct
|
|
label = MECHANISM_LABELS_NL.get(mech, mech)
|
|
lines.append(f"| {label} | {pre_pct:.1f}% | {post_pct:.1f}% | {delta:+.1f}% |")
|
|
|
|
lines.extend([
|
|
"",
|
|
"## 7. Conclusion",
|
|
"",
|
|
])
|
|
|
|
# Interpretation
|
|
cf_consensus = ""
|
|
if cf_test and "error" not in cf_test:
|
|
if cf_test["significant"] and cf_test["cf_ratio_high"] > cf_test["cf_ratio_low"]:
|
|
cf_consensus = (
|
|
f"The consensus framing hypothesis **is supported**: consensus framing motions "
|
|
f"are {cf_test['cf_ratio_high']:.1%} of high-support post-2024 motions vs "
|
|
f"{cf_test['cf_ratio_low']:.1%} of low-support post-2024 motions "
|
|
f"(χ² = {cf_test['chi2']:.3f}, p = {cf_test['p_value']:.4f})."
|
|
)
|
|
else:
|
|
cf_consensus = (
|
|
f"The consensus framing hypothesis **is not supported**: no significant difference "
|
|
f"between high ({cf_test['cf_ratio_high']:.1%}) and low ({cf_test['cf_ratio_low']:.1%}) "
|
|
f"support post-2024 motions (p = {cf_test['p_value']:.4f})."
|
|
)
|
|
|
|
lines.append(cf_consensus)
|
|
lines.append("")
|
|
lines.append("### Limitations")
|
|
lines.append("- Sample: 200 motions (50 pre, 150 post) — may not capture rare mechanisms")
|
|
lines.append("- Single-classifier: all motions classified by one subagent (inline), no inter-rater validation")
|
|
lines.append("- Binary support threshold: CS > 0.5 vs <= 0.5 may oversimplify the support spectrum")
|
|
lines.append("- Mechanism assignment: single primary mechanism per motion; some motions span multiple categories")
|
|
|
|
# Write output
|
|
out_path = Path(output_path)
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
out_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
print(f"Report written to {out_path}")
|
|
|
|
|
|
# ── main ─────────────────────────────────────────────────────────────────────
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="Systematic mechanism classification")
|
|
parser.add_argument("--db", default="data/motions.db", help="Path to DuckDB database")
|
|
parser.add_argument("--n-pre-high", type=int, default=25)
|
|
parser.add_argument("--n-pre-low", type=int, default=25)
|
|
parser.add_argument("--n-post-high", type=int, default=75)
|
|
parser.add_argument("--n-post-low", type=int, default=75)
|
|
parser.add_argument("--seed", type=int, default=42)
|
|
parser.add_argument("--output", default="reports/overton_window/mechanism_classification.md")
|
|
parser.add_argument("--save-classifications", help="Save classifications JSON to path")
|
|
args = parser.parse_args()
|
|
|
|
# Sample motions
|
|
sample = sample_motions(
|
|
db_path=args.db,
|
|
n_pre_high=args.n_pre_high,
|
|
n_pre_low=args.n_pre_low,
|
|
n_post_high=args.n_post_high,
|
|
n_post_low=args.n_post_low,
|
|
seed=args.seed,
|
|
)
|
|
print(f"Sampled {len(sample)} motions")
|
|
|
|
# Optional: save classifications mapping
|
|
if args.save_classifications:
|
|
class_path = Path(args.save_classifications)
|
|
class_path.parent.mkdir(parents=True, exist_ok=True)
|
|
class_path.write_text(json.dumps(CLASSIFICATIONS, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
print(f"Classifications saved to {class_path}")
|
|
|
|
# Compute distribution
|
|
results = compute_distribution(sample, CLASSIFICATIONS)
|
|
print(f"Classified: {results['classified']}, Unclassified: {results['unclassified']}")
|
|
|
|
# Generate report
|
|
generate_report(results, args.output)
|
|
|
|
# Print summary to stdout
|
|
cf_test = results["consensus_framing_test"]
|
|
if cf_test and "error" not in cf_test:
|
|
print(f"\nConsensus Framing Test:")
|
|
print(f" Post-2024 HIGH: {cf_test['cf_count_high']}/{cf_test['total_high']} = {cf_test['cf_ratio_high']:.1%}")
|
|
print(f" Post-2024 LOW: {cf_test['cf_count_low']}/{cf_test['total_low']} = {cf_test['cf_ratio_low']:.1%}")
|
|
print(f" χ² = {cf_test['chi2']:.3f}, p = {cf_test['p_value']:.4f} ({'SIGNIFICANT' if cf_test['significant'] else 'NOT significant'})")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|
|
|