diff --git a/analysis/right_wing/direction3_migration_antidemocratic.py b/analysis/right_wing/direction3_migration_antidemocratic.py new file mode 100644 index 0000000..1bd8007 --- /dev/null +++ b/analysis/right_wing/direction3_migration_antidemocratic.py @@ -0,0 +1,442 @@ +#!/usr/bin/env python3 +"""Direction 3: Migration ↔ Anti-Democratic Overlap Analysis. + +Tests the hypothesis that migration is the primary vehicle for anti-democratic +rhetoric in right-wing parliamentary motions. +""" + +from __future__ import annotations + +import logging +import sys +from pathlib import Path + +import duckdb + +ROOT = Path(__file__).parent.parent.parent.resolve() +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +logger = logging.getLogger(__name__) + +DB_PATH = ROOT / "data" / "motions.db" + + +def _conn(): + return duckdb.connect(str(DB_PATH), read_only=True) + + +def print_section(title: str) -> None: + print(f"\n{'=' * 70}") + print(f" {title}") + print(f"{'=' * 70}") + + +def analyze_overlap() -> None: + """1. Quantify overlap: what % of high-extremity motions are migration-related?""" + print_section("1. OVERLAP QUANTIFICATION") + + conn = _conn() + + # High-extremity buckets by category + rows = conn.execute(""" + SELECT + r.category, + COUNT(*) as total, + COUNT(*) FILTER (WHERE e.text_score >= 3.5) as high_ext, + COUNT(*) FILTER (WHERE e.text_score >= 4.0) as very_high_ext, + COUNT(*) FILTER (WHERE e.text_score >= 5.0) as max_ext, + ROUND(AVG(e.text_score), 2) as avg_ext, + ROUND(AVG(s.text_score), 3) as avg_sent + FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id + WHERE r.category IS NOT NULL + GROUP BY r.category + ORDER BY high_ext DESC + """).fetchall() + + print(f"\n{'Category':<25} {'Total':>6} {'≥3.5':>6} {'≥4.0':>6} {'=5.0':>6} {'AvgExt':>7} {'AvgSent':>8}") + print("-" * 70) + total_high = 0 + total_very_high = 0 + total_max = 0 + for row in rows: + cat, tot, h, vh, mx, avg_e, avg_s = row + total_high += h + total_very_high += vh + total_max += mx + print(f"{cat:<25} {tot:>6} {h:>6} {vh:>6} {mx:>6} {avg_e:>7.2f} {avg_s:>+8.3f}") + + # Migration share of high-extremity + mig_high = conn.execute(""" + SELECT COUNT(*) FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + WHERE r.category = 'asiel/vreemdelingen' AND e.text_score >= 3.5 + """).fetchone()[0] + + mig_very_high = conn.execute(""" + SELECT COUNT(*) FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + WHERE r.category = 'asiel/vreemdelingen' AND e.text_score >= 4.0 + """).fetchone()[0] + + mig_max = conn.execute(""" + SELECT COUNT(*) FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + WHERE r.category = 'asiel/vreemdelingen' AND e.text_score >= 5.0 + """).fetchone()[0] + + print(f"\n--- Migration share of high-extremity motions ---") + print(f" Migration motions ≥3.5 extremity: {mig_high} / {total_high} ({100*mig_high/total_high:.1f}%)") + print(f" Migration motions ≥4.0 extremity: {mig_very_high} / {total_very_high} ({100*mig_very_high/total_very_high:.1f}%)") + print(f" Migration motions =5.0 extremity: {mig_max} / {total_max} ({100*mig_max/total_max:.1f}%)") + + # Category breakdown of ≥4.0 motions + print(f"\n--- Category breakdown of ≥4.0 extremity motions ---") + rows = conn.execute(""" + SELECT r.category, COUNT(*) as cnt, + ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 1) as pct + FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + WHERE e.text_score >= 4.0 + GROUP BY r.category + ORDER BY cnt DESC + """).fetchall() + for cat, cnt, pct in rows: + print(f" {cat:<25} {cnt:>3} ({pct:>5.1f}%)") + + conn.close() + + +def analyze_party_strategy() -> None: + """2. Which parties file extreme migration motions?""" + print_section("2. PARTY STRATEGY: EXTREME MIGRATION MOTIONS BY PARTY") + + conn = _conn() + + # Need to join with motions and mp_votes to get the submitting MP's party + # The title prefix tells us who submitted: "Motie van het lid " or "Motie van de leden en " + # We'll use mp_metadata to map MP names to parties + + # First, extract the lead MP name from the title + print("\n--- Top 20 highest-extremity migration motions with lead MP ---") + rows = conn.execute(""" + SELECT r.title, r.year, e.text_score, e.layman_score, + s.text_score, s.layman_score + FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id + WHERE r.category = 'asiel/vreemdelingen' + ORDER BY e.text_score DESC, r.year DESC + LIMIT 20 + """).fetchall() + + for title, year, ext_t, ext_l, sent_t, sent_l in rows: + sent_t_str = f"{sent_t:+.2f}" if sent_t is not None else " N/A" + sent_l_str = f"{sent_l:+.2f}" if sent_l is not None else " N/A" + print(f" [{year}] ext={ext_t:.1f}/{ext_l:.1f} sent={sent_t_str}/{sent_l_str} {title[:65]}") + + # Party breakdown of migration motions by extremity bucket + # We need to parse the title to get the MP name, then map to party via mp_metadata + # The pattern is: "Motie van het lid " or "Motie van de leden en " + # or "Gewijzigde motie van ..." + + print("\n--- Party attribution of migration motions (by keyword in title) ---") + # Use a heuristic: known MPs from the extreme list + mp_parties = { + "Wilders": "PVV", "Baudet": "FVD", "Kops": "PVV", "Markuszower": "PVV", + "Vondeling": "PVV", "Boon": "PVV", "Eerdmans": "JA21", "Léon de Jong": "PVV", + "Van Haga": "BVNL", "Smolders": "PVV", "Van der Plas": "BBB", + "Van Zanten": "SGP", "Ceder": "CU", "Faber": "PVV", "Ram": "PVV", + "Rajkowski": "PVV", "Boomsma": "BBB", + } + + for mp, party in mp_parties.items(): + cnt = conn.execute(f""" + SELECT COUNT(*) FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + WHERE r.category = 'asiel/vreemdelingen' + AND r.title LIKE '%{mp}%' + """).fetchone()[0] + avg_ext = conn.execute(f""" + SELECT ROUND(AVG(e.text_score), 2) FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + WHERE r.category = 'asiel/vreemdelingen' + AND r.title LIKE '%{mp}%' + """).fetchone()[0] + high_cnt = conn.execute(f""" + SELECT COUNT(*) FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + WHERE r.category = 'asiel/vreemdelingen' + AND r.title LIKE '%{mp}%' + AND e.text_score >= 4.0 + """).fetchone()[0] + if cnt > 0: + print(f" {mp:<15} ({party:<5}) | n={cnt:>3} | avg_ext={avg_ext:>4.2f} | ≥4.0={high_cnt}") + + # Overall party shares among migration motions (all) + print("\n--- Overall party share of migration motions (title keyword heuristic) ---") + party_keywords = { + "PVV": ["Wilders", "Kops", "Markuszower", "Vondeling", "Boon", "Smolders", "Ram", "Rajkowski", "Faber"], + "FVD": ["Baudet"], + "JA21": ["Eerdmans"], + "BBB": ["Van der Plas", "Boomsma"], + "SGP": ["Van Zanten"], + "CU": ["Ceder"], + "BVNL": ["Van Haga"], + } + + total_migration = conn.execute(""" + SELECT COUNT(*) FROM right_wing_motions + WHERE category = 'asiel/vreemdelingen' + """).fetchone()[0] + + for party, mps in party_keywords.items(): + conditions = " OR ".join([f"title LIKE '%{mp}%'" for mp in mps]) + cnt = conn.execute(f""" + SELECT COUNT(*) FROM right_wing_motions + WHERE category = 'asiel/vreemdelingen' AND ({conditions}) + """).fetchone()[0] + pct = 100 * cnt / total_migration if total_migration else 0 + print(f" {party:<5} | {cnt:>3} / {total_migration} ({pct:>5.1f}%)") + + conn.close() + + +def analyze_framing_shift() -> None: + """3. Compare 2018-2020 vs 2023-2025 migration motions.""" + print_section("3. FRAMING SHIFT: 2018-2020 VS 2023-2025") + + conn = _conn() + + periods = [ + ("2018-2020", "2018", "2020"), + ("2021-2022", "2021", "2022"), + ("2023-2025", "2023", "2025"), + ("2026", "2026", "2026"), + ] + + print(f"\n{'Period':<12} {'Count':>6} {'AvgExt':>7} {'AvgSent':>8} {'≥4.0':>6} {'=5.0':>6}") + print("-" * 55) + for label, start, end in periods: + if start == end: + where = f"r.year = {start}" + else: + where = f"r.year BETWEEN {start} AND {end}" + + row = conn.execute(f""" + SELECT + COUNT(*), + ROUND(AVG(e.text_score), 2), + ROUND(AVG(s.text_score), 3), + COUNT(*) FILTER (WHERE e.text_score >= 4.0), + COUNT(*) FILTER (WHERE e.text_score >= 5.0) + FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id + WHERE r.category = 'asiel/vreemdelingen' AND {where} + """).fetchone() + + cnt, avg_e, avg_s, high, max_e = row + avg_s_str = f"{avg_s:+.3f}" if avg_s is not None else " N/A" + print(f"{label:<12} {cnt:>6} {avg_e:>7.2f} {avg_s_str:>8} {high:>6} {max_e:>6}") + + # Sample titles from each period + print("\n--- Sample titles: 2018-2020 (early period) ---") + rows = conn.execute(""" + SELECT r.title, e.text_score, s.text_score + FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id + WHERE r.category = 'asiel/vreemdelingen' + AND r.year BETWEEN 2018 AND 2020 + ORDER BY e.text_score DESC + LIMIT 8 + """).fetchall() + for title, ext, sent in rows: + sent_str = f"{sent:+.2f}" if sent is not None else "N/A" + print(f" ext={ext:.1f} sent={sent_str:>6} {title[:60]}") + + print("\n--- Sample titles: 2023-2025 (recent period) ---") + rows = conn.execute(""" + SELECT r.title, e.text_score, s.text_score + FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id + WHERE r.category = 'asiel/vreemdelingen' + AND r.year BETWEEN 2023 AND 2025 + ORDER BY e.text_score DESC + LIMIT 8 + """).fetchall() + for title, ext, sent in rows: + sent_str = f"{sent:+.2f}" if sent is not None else "N/A" + print(f" ext={ext:.1f} sent={sent_str:>6} {title[:60]}") + + # Keyword evolution + print("\n--- Keyword themes in titles by period ---") + themes = { + "asiel": ["asiel", "asielzoeker", "asielaanvraag"], + "immigrant": ["immigrant", "immigratie"], + "vreemdeling": ["vreemdeling", "vreemdelingen"], + "opvang": ["opvang", "opvangplaats", "opvangcrisis"], + "terugkeer": ["terugkeer", "uitzetting", "uitschrijving", "afschiet"], + "grenzen": ["grens", "grenzen", "schengen"], + "denaturalisatie": ["denaturalisatie", "nationaliteit", "paspoort"], + "moslim/islam": ["islam", "moslim", "imam"], + "syrische": ["syrische", "syrie", "syrier"], + } + + for label, start, end in [("2018-2020", "2018", "2020"), ("2023-2025", "2023", "2025")]: + print(f"\n Period: {label}") + for theme, kws in themes.items(): + conditions = " OR ".join([f"LOWER(title) LIKE '%{kw}%'" for kw in kws]) + cnt = conn.execute(f""" + SELECT COUNT(*) FROM right_wing_motions + WHERE category = 'asiel/vreemdelingen' + AND year BETWEEN {start} AND {end} + AND ({conditions}) + """).fetchone()[0] + print(f" {theme:<18} {cnt:>3}") + + conn.close() + + +def analyze_cross_category() -> None: + """4. Cross-category migration-adjacent analysis.""" + print_section("4. CROSS-CATEGORY MIGRATION-ADJACENT ANALYSIS") + + conn = _conn() + + # Find migration-adjacent motions in other categories (by title keywords) + mig_keywords = ["asiel", "asielzoeker", "vreemdeling", "immigrant", "immigratie", + "opvang", "terugkeer", "uitzetting", "schengen", "grens", "syrische"] + conditions = " OR ".join([f"LOWER(title) LIKE '%{kw}%'" for kw in mig_keywords]) + + print(f"\n--- Migration-adjacent motions outside 'asiel/vreemdelingen' category ---") + rows = conn.execute(f""" + SELECT r.category, COUNT(*) as cnt, + ROUND(AVG(e.text_score), 2) as avg_ext, + ROUND(AVG(s.text_score), 3) as avg_sent + FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id + WHERE r.category != 'asiel/vreemdelingen' + AND ({conditions}) + GROUP BY r.category + ORDER BY cnt DESC + """).fetchall() + + total_adjacent = sum(r[1] for r in rows) + print(f" Total migration-adjacent in other categories: {total_adjacent}") + print(f"\n {'Category':<25} {'Count':>6} {'AvgExt':>7} {'AvgSent':>8}") + print(" " + "-" * 50) + for cat, cnt, avg_e, avg_s in rows: + avg_s_str = f"{avg_s:+.3f}" if avg_s is not None else " N/A" + print(f" {cat:<25} {cnt:>6} {avg_e:>7.2f} {avg_s_str:>8}") + + # Specific high-extremity migration-adjacent outside migration category + print(f"\n--- High-extremity (≥4.0) migration-adjacent outside migration category ---") + rows = conn.execute(f""" + SELECT r.title, r.category, r.year, e.text_score, s.text_score + FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id + WHERE r.category != 'asiel/vreemdelingen' + AND e.text_score >= 4.0 + AND ({conditions}) + ORDER BY e.text_score DESC, r.year DESC + LIMIT 15 + """).fetchall() + + for title, cat, year, ext, sent in rows: + sent_str = f"{sent:+.2f}" if sent is not None else "N/A" + print(f" [{year}] ext={ext:.1f} sent={sent_str:>6} [{cat}] {title[:55]}") + + # Combined migration + migration-adjacent totals + mig_total = conn.execute(""" + SELECT COUNT(*) FROM right_wing_motions + WHERE category = 'asiel/vreemdelingen' + """).fetchone()[0] + + print(f"\n--- Combined migration scope ---") + print(f" Pure migration category: {mig_total:>3} motions") + print(f" Migration-adjacent (other): {total_adjacent:>3} motions") + print(f" Total migration-relevant: {mig_total + total_adjacent:>3} motions") + print(f" Share of all right-wing: {100*(mig_total + total_adjacent)/2986:.1f}%") + + conn.close() + + +def analyze_sentiment_divergence() -> None: + """5. Sentiment divergence: why is migration the only negative-sentiment category?""" + print_section("5. SENTIMENT DIVERGENCE: MIGRATION VS ALL OTHER CATEGORIES") + + conn = _conn() + + print("\n--- Sentiment comparison (raw text score) ---") + rows = conn.execute(""" + SELECT + r.category, + COUNT(*) as cnt, + ROUND(AVG(s.text_score), 3) as avg_sent_text, + ROUND(AVG(s.layman_score), 3) as avg_sent_layman, + ROUND(AVG(s.layman_score - s.text_score), 3) as layman_minus_text + FROM right_wing_motions r + JOIN sentiment_scores s ON r.motion_id = s.motion_id + WHERE r.category IS NOT NULL + GROUP BY r.category + ORDER BY avg_sent_text ASC + """).fetchall() + + print(f" {'Category':<25} {'Count':>6} {'Text':>7} {'Layman':>7} {'L-T':>6}") + print(" " + "-" * 55) + for cat, cnt, st, sl, diff in rows: + print(f" {cat:<25} {cnt:>6} {st:>+7.3f} {sl:>+7.3f} {diff:>+6.3f}") + + # Migration-specific sentiment by extremity bucket + print("\n--- Migration sentiment by extremity bucket ---") + rows = conn.execute(""" + SELECT + CASE + WHEN e.text_score < 2.0 THEN '1-2 (Low)' + WHEN e.text_score < 3.0 THEN '2-3 (Moderate)' + WHEN e.text_score < 4.0 THEN '3-4 (High)' + ELSE '4-5 (Very High)' + END as bucket, + COUNT(*) as cnt, + ROUND(AVG(s.text_score), 3) as avg_sent_text, + ROUND(AVG(s.layman_score), 3) as avg_sent_layman + FROM right_wing_motions r + JOIN extremity_scores e ON r.motion_id = e.motion_id + JOIN sentiment_scores s ON r.motion_id = s.motion_id + WHERE r.category = 'asiel/vreemdelingen' + GROUP BY bucket + ORDER BY bucket + """).fetchall() + + for bucket, cnt, st, sl in rows: + print(f" {bucket:<18} n={cnt:>3} text={st:>+.3f} layman={sl:>+.3f}") + + conn.close() + + +def main() -> None: + print("=" * 70) + print(" DIRECTION 3: MIGRATION ↔ ANTI-DEMOCRATIC OVERLAP ANALYSIS") + print("=" * 70) + + analyze_overlap() + analyze_party_strategy() + analyze_framing_shift() + analyze_cross_category() + analyze_sentiment_divergence() + + print("\n" + "=" * 70) + print(" ANALYSIS COMPLETE") + print("=" * 70) + + +if __name__ == "__main__": + main()