feat(analysis): add migration anti-democratic overlap analysis

1 month ago · d170444bda
parent fbf92c82cf
commit d170444bda
1 changed files with 442 additions and 0 deletions
--- a/analysis/right_wing/direction3_migration_antidemocratic.py
+++ b/analysis/right_wing/direction3_migration_antidemocratic.py
@ -0,0 +1,442 @@
+#!/usr/bin/env python3
+"""Direction 3: Migration ↔ Anti-Democratic Overlap Analysis.
+
+Tests the hypothesis that migration is the primary vehicle for anti-democratic
+rhetoric in right-wing parliamentary motions.
+"""
+
+from __future__ import annotations
+
+import logging
+import sys
+from pathlib import Path
+
+import duckdb
+
+ROOT = Path(__file__).parent.parent.parent.resolve()
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+logger = logging.getLogger(__name__)
+
+DB_PATH = ROOT / "data" / "motions.db"
+
+
+def _conn():
+    return duckdb.connect(str(DB_PATH), read_only=True)
+
+
+def print_section(title: str) -> None:
+    print(f"\n{'=' * 70}")
+    print(f"  {title}")
+    print(f"{'=' * 70}")
+
+
+def analyze_overlap() -> None:
+    """1. Quantify overlap: what % of high-extremity motions are migration-related?"""
+    print_section("1. OVERLAP QUANTIFICATION")
+
+    conn = _conn()
+
+    # High-extremity buckets by category
+    rows = conn.execute("""
+        SELECT
+            r.category,
+            COUNT(*) as total,
+            COUNT(*) FILTER (WHERE e.text_score >= 3.5) as high_ext,
+            COUNT(*) FILTER (WHERE e.text_score >= 4.0) as very_high_ext,
+            COUNT(*) FILTER (WHERE e.text_score >= 5.0) as max_ext,
+            ROUND(AVG(e.text_score), 2) as avg_ext,
+            ROUND(AVG(s.text_score), 3) as avg_sent
+        FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id
+        WHERE r.category IS NOT NULL
+        GROUP BY r.category
+        ORDER BY high_ext DESC
+    """).fetchall()
+
+    print(f"\n{'Category':<25} {'Total':>6} {'≥3.5':>6} {'≥4.0':>6} {'=5.0':>6} {'AvgExt':>7} {'AvgSent':>8}")
+    print("-" * 70)
+    total_high = 0
+    total_very_high = 0
+    total_max = 0
+    for row in rows:
+        cat, tot, h, vh, mx, avg_e, avg_s = row
+        total_high += h
+        total_very_high += vh
+        total_max += mx
+        print(f"{cat:<25} {tot:>6} {h:>6} {vh:>6} {mx:>6} {avg_e:>7.2f} {avg_s:>+8.3f}")
+
+    # Migration share of high-extremity
+    mig_high = conn.execute("""
+        SELECT COUNT(*) FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        WHERE r.category = 'asiel/vreemdelingen' AND e.text_score >= 3.5
+    """).fetchone()[0]
+
+    mig_very_high = conn.execute("""
+        SELECT COUNT(*) FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        WHERE r.category = 'asiel/vreemdelingen' AND e.text_score >= 4.0
+    """).fetchone()[0]
+
+    mig_max = conn.execute("""
+        SELECT COUNT(*) FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        WHERE r.category = 'asiel/vreemdelingen' AND e.text_score >= 5.0
+    """).fetchone()[0]
+
+    print(f"\n--- Migration share of high-extremity motions ---")
+    print(f"  Migration motions ≥3.5 extremity: {mig_high} / {total_high} ({100*mig_high/total_high:.1f}%)")
+    print(f"  Migration motions ≥4.0 extremity: {mig_very_high} / {total_very_high} ({100*mig_very_high/total_very_high:.1f}%)")
+    print(f"  Migration motions =5.0 extremity: {mig_max} / {total_max} ({100*mig_max/total_max:.1f}%)")
+
+    # Category breakdown of ≥4.0 motions
+    print(f"\n--- Category breakdown of ≥4.0 extremity motions ---")
+    rows = conn.execute("""
+        SELECT r.category, COUNT(*) as cnt,
+               ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 1) as pct
+        FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        WHERE e.text_score >= 4.0
+        GROUP BY r.category
+        ORDER BY cnt DESC
+    """).fetchall()
+    for cat, cnt, pct in rows:
+        print(f"  {cat:<25} {cnt:>3} ({pct:>5.1f}%)")
+
+    conn.close()
+
+
+def analyze_party_strategy() -> None:
+    """2. Which parties file extreme migration motions?"""
+    print_section("2. PARTY STRATEGY: EXTREME MIGRATION MOTIONS BY PARTY")
+
+    conn = _conn()
+
+    # Need to join with motions and mp_votes to get the submitting MP's party
+    # The title prefix tells us who submitted: "Motie van het lid <name>" or "Motie van de leden <name> en <name>"
+    # We'll use mp_metadata to map MP names to parties
+
+    # First, extract the lead MP name from the title
+    print("\n--- Top 20 highest-extremity migration motions with lead MP ---")
+    rows = conn.execute("""
+        SELECT r.title, r.year, e.text_score, e.layman_score,
+               s.text_score, s.layman_score
+        FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id
+        WHERE r.category = 'asiel/vreemdelingen'
+        ORDER BY e.text_score DESC, r.year DESC
+        LIMIT 20
+    """).fetchall()
+
+    for title, year, ext_t, ext_l, sent_t, sent_l in rows:
+        sent_t_str = f"{sent_t:+.2f}" if sent_t is not None else "  N/A"
+        sent_l_str = f"{sent_l:+.2f}" if sent_l is not None else "  N/A"
+        print(f"  [{year}] ext={ext_t:.1f}/{ext_l:.1f} sent={sent_t_str}/{sent_l_str} {title[:65]}")
+
+    # Party breakdown of migration motions by extremity bucket
+    # We need to parse the title to get the MP name, then map to party via mp_metadata
+    # The pattern is: "Motie van het lid <name>" or "Motie van de leden <name> en <name>"
+    # or "Gewijzigde motie van ..."
+
+    print("\n--- Party attribution of migration motions (by keyword in title) ---")
+    # Use a heuristic: known MPs from the extreme list
+    mp_parties = {
+        "Wilders": "PVV", "Baudet": "FVD", "Kops": "PVV", "Markuszower": "PVV",
+        "Vondeling": "PVV", "Boon": "PVV", "Eerdmans": "JA21", "Léon de Jong": "PVV",
+        "Van Haga": "BVNL", "Smolders": "PVV", "Van der Plas": "BBB",
+        "Van Zanten": "SGP", "Ceder": "CU", "Faber": "PVV", "Ram": "PVV",
+        "Rajkowski": "PVV", "Boomsma": "BBB",
+    }
+
+    for mp, party in mp_parties.items():
+        cnt = conn.execute(f"""
+            SELECT COUNT(*) FROM right_wing_motions r
+            JOIN extremity_scores e ON r.motion_id = e.motion_id
+            WHERE r.category = 'asiel/vreemdelingen'
+              AND r.title LIKE '%{mp}%'
+        """).fetchone()[0]
+        avg_ext = conn.execute(f"""
+            SELECT ROUND(AVG(e.text_score), 2) FROM right_wing_motions r
+            JOIN extremity_scores e ON r.motion_id = e.motion_id
+            WHERE r.category = 'asiel/vreemdelingen'
+              AND r.title LIKE '%{mp}%'
+        """).fetchone()[0]
+        high_cnt = conn.execute(f"""
+            SELECT COUNT(*) FROM right_wing_motions r
+            JOIN extremity_scores e ON r.motion_id = e.motion_id
+            WHERE r.category = 'asiel/vreemdelingen'
+              AND r.title LIKE '%{mp}%'
+              AND e.text_score >= 4.0
+        """).fetchone()[0]
+        if cnt > 0:
+            print(f"  {mp:<15} ({party:<5}) | n={cnt:>3} | avg_ext={avg_ext:>4.2f} | ≥4.0={high_cnt}")
+
+    # Overall party shares among migration motions (all)
+    print("\n--- Overall party share of migration motions (title keyword heuristic) ---")
+    party_keywords = {
+        "PVV": ["Wilders", "Kops", "Markuszower", "Vondeling", "Boon", "Smolders", "Ram", "Rajkowski", "Faber"],
+        "FVD": ["Baudet"],
+        "JA21": ["Eerdmans"],
+        "BBB": ["Van der Plas", "Boomsma"],
+        "SGP": ["Van Zanten"],
+        "CU": ["Ceder"],
+        "BVNL": ["Van Haga"],
+    }
+
+    total_migration = conn.execute("""
+        SELECT COUNT(*) FROM right_wing_motions
+        WHERE category = 'asiel/vreemdelingen'
+    """).fetchone()[0]
+
+    for party, mps in party_keywords.items():
+        conditions = " OR ".join([f"title LIKE '%{mp}%'" for mp in mps])
+        cnt = conn.execute(f"""
+            SELECT COUNT(*) FROM right_wing_motions
+            WHERE category = 'asiel/vreemdelingen' AND ({conditions})
+        """).fetchone()[0]
+        pct = 100 * cnt / total_migration if total_migration else 0
+        print(f"  {party:<5} | {cnt:>3} / {total_migration} ({pct:>5.1f}%)")
+
+    conn.close()
+
+
+def analyze_framing_shift() -> None:
+    """3. Compare 2018-2020 vs 2023-2025 migration motions."""
+    print_section("3. FRAMING SHIFT: 2018-2020 VS 2023-2025")
+
+    conn = _conn()
+
+    periods = [
+        ("2018-2020", "2018", "2020"),
+        ("2021-2022", "2021", "2022"),
+        ("2023-2025", "2023", "2025"),
+        ("2026", "2026", "2026"),
+    ]
+
+    print(f"\n{'Period':<12} {'Count':>6} {'AvgExt':>7} {'AvgSent':>8} {'≥4.0':>6} {'=5.0':>6}")
+    print("-" * 55)
+    for label, start, end in periods:
+        if start == end:
+            where = f"r.year = {start}"
+        else:
+            where = f"r.year BETWEEN {start} AND {end}"
+
+        row = conn.execute(f"""
+            SELECT
+                COUNT(*),
+                ROUND(AVG(e.text_score), 2),
+                ROUND(AVG(s.text_score), 3),
+                COUNT(*) FILTER (WHERE e.text_score >= 4.0),
+                COUNT(*) FILTER (WHERE e.text_score >= 5.0)
+            FROM right_wing_motions r
+            JOIN extremity_scores e ON r.motion_id = e.motion_id
+            LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id
+            WHERE r.category = 'asiel/vreemdelingen' AND {where}
+        """).fetchone()
+
+        cnt, avg_e, avg_s, high, max_e = row
+        avg_s_str = f"{avg_s:+.3f}" if avg_s is not None else "   N/A"
+        print(f"{label:<12} {cnt:>6} {avg_e:>7.2f} {avg_s_str:>8} {high:>6} {max_e:>6}")
+
+    # Sample titles from each period
+    print("\n--- Sample titles: 2018-2020 (early period) ---")
+    rows = conn.execute("""
+        SELECT r.title, e.text_score, s.text_score
+        FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id
+        WHERE r.category = 'asiel/vreemdelingen'
+          AND r.year BETWEEN 2018 AND 2020
+        ORDER BY e.text_score DESC
+        LIMIT 8
+    """).fetchall()
+    for title, ext, sent in rows:
+        sent_str = f"{sent:+.2f}" if sent is not None else "N/A"
+        print(f"  ext={ext:.1f} sent={sent_str:>6} {title[:60]}")
+
+    print("\n--- Sample titles: 2023-2025 (recent period) ---")
+    rows = conn.execute("""
+        SELECT r.title, e.text_score, s.text_score
+        FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id
+        WHERE r.category = 'asiel/vreemdelingen'
+          AND r.year BETWEEN 2023 AND 2025
+        ORDER BY e.text_score DESC
+        LIMIT 8
+    """).fetchall()
+    for title, ext, sent in rows:
+        sent_str = f"{sent:+.2f}" if sent is not None else "N/A"
+        print(f"  ext={ext:.1f} sent={sent_str:>6} {title[:60]}")
+
+    # Keyword evolution
+    print("\n--- Keyword themes in titles by period ---")
+    themes = {
+        "asiel": ["asiel", "asielzoeker", "asielaanvraag"],
+        "immigrant": ["immigrant", "immigratie"],
+        "vreemdeling": ["vreemdeling", "vreemdelingen"],
+        "opvang": ["opvang", "opvangplaats", "opvangcrisis"],
+        "terugkeer": ["terugkeer", "uitzetting", "uitschrijving", "afschiet"],
+        "grenzen": ["grens", "grenzen", "schengen"],
+        "denaturalisatie": ["denaturalisatie", "nationaliteit", "paspoort"],
+        "moslim/islam": ["islam", "moslim", "imam"],
+        "syrische": ["syrische", "syrie", "syrier"],
+    }
+
+    for label, start, end in [("2018-2020", "2018", "2020"), ("2023-2025", "2023", "2025")]:
+        print(f"\n  Period: {label}")
+        for theme, kws in themes.items():
+            conditions = " OR ".join([f"LOWER(title) LIKE '%{kw}%'" for kw in kws])
+            cnt = conn.execute(f"""
+                SELECT COUNT(*) FROM right_wing_motions
+                WHERE category = 'asiel/vreemdelingen'
+                  AND year BETWEEN {start} AND {end}
+                  AND ({conditions})
+            """).fetchone()[0]
+            print(f"    {theme:<18} {cnt:>3}")
+
+    conn.close()
+
+
+def analyze_cross_category() -> None:
+    """4. Cross-category migration-adjacent analysis."""
+    print_section("4. CROSS-CATEGORY MIGRATION-ADJACENT ANALYSIS")
+
+    conn = _conn()
+
+    # Find migration-adjacent motions in other categories (by title keywords)
+    mig_keywords = ["asiel", "asielzoeker", "vreemdeling", "immigrant", "immigratie",
+                    "opvang", "terugkeer", "uitzetting", "schengen", "grens", "syrische"]
+    conditions = " OR ".join([f"LOWER(title) LIKE '%{kw}%'" for kw in mig_keywords])
+
+    print(f"\n--- Migration-adjacent motions outside 'asiel/vreemdelingen' category ---")
+    rows = conn.execute(f"""
+        SELECT r.category, COUNT(*) as cnt,
+               ROUND(AVG(e.text_score), 2) as avg_ext,
+               ROUND(AVG(s.text_score), 3) as avg_sent
+        FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id
+        WHERE r.category != 'asiel/vreemdelingen'
+          AND ({conditions})
+        GROUP BY r.category
+        ORDER BY cnt DESC
+    """).fetchall()
+
+    total_adjacent = sum(r[1] for r in rows)
+    print(f"  Total migration-adjacent in other categories: {total_adjacent}")
+    print(f"\n  {'Category':<25} {'Count':>6} {'AvgExt':>7} {'AvgSent':>8}")
+    print("  " + "-" * 50)
+    for cat, cnt, avg_e, avg_s in rows:
+        avg_s_str = f"{avg_s:+.3f}" if avg_s is not None else "   N/A"
+        print(f"  {cat:<25} {cnt:>6} {avg_e:>7.2f} {avg_s_str:>8}")
+
+    # Specific high-extremity migration-adjacent outside migration category
+    print(f"\n--- High-extremity (≥4.0) migration-adjacent outside migration category ---")
+    rows = conn.execute(f"""
+        SELECT r.title, r.category, r.year, e.text_score, s.text_score
+        FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        LEFT JOIN sentiment_scores s ON r.motion_id = s.motion_id
+        WHERE r.category != 'asiel/vreemdelingen'
+          AND e.text_score >= 4.0
+          AND ({conditions})
+        ORDER BY e.text_score DESC, r.year DESC
+        LIMIT 15
+    """).fetchall()
+
+    for title, cat, year, ext, sent in rows:
+        sent_str = f"{sent:+.2f}" if sent is not None else "N/A"
+        print(f"  [{year}] ext={ext:.1f} sent={sent_str:>6} [{cat}] {title[:55]}")
+
+    # Combined migration + migration-adjacent totals
+    mig_total = conn.execute("""
+        SELECT COUNT(*) FROM right_wing_motions
+        WHERE category = 'asiel/vreemdelingen'
+    """).fetchone()[0]
+
+    print(f"\n--- Combined migration scope ---")
+    print(f"  Pure migration category:     {mig_total:>3} motions")
+    print(f"  Migration-adjacent (other):  {total_adjacent:>3} motions")
+    print(f"  Total migration-relevant:    {mig_total + total_adjacent:>3} motions")
+    print(f"  Share of all right-wing:     {100*(mig_total + total_adjacent)/2986:.1f}%")
+
+    conn.close()
+
+
+def analyze_sentiment_divergence() -> None:
+    """5. Sentiment divergence: why is migration the only negative-sentiment category?"""
+    print_section("5. SENTIMENT DIVERGENCE: MIGRATION VS ALL OTHER CATEGORIES")
+
+    conn = _conn()
+
+    print("\n--- Sentiment comparison (raw text score) ---")
+    rows = conn.execute("""
+        SELECT
+            r.category,
+            COUNT(*) as cnt,
+            ROUND(AVG(s.text_score), 3) as avg_sent_text,
+            ROUND(AVG(s.layman_score), 3) as avg_sent_layman,
+            ROUND(AVG(s.layman_score - s.text_score), 3) as layman_minus_text
+        FROM right_wing_motions r
+        JOIN sentiment_scores s ON r.motion_id = s.motion_id
+        WHERE r.category IS NOT NULL
+        GROUP BY r.category
+        ORDER BY avg_sent_text ASC
+    """).fetchall()
+
+    print(f"  {'Category':<25} {'Count':>6} {'Text':>7} {'Layman':>7} {'L-T':>6}")
+    print("  " + "-" * 55)
+    for cat, cnt, st, sl, diff in rows:
+        print(f"  {cat:<25} {cnt:>6} {st:>+7.3f} {sl:>+7.3f} {diff:>+6.3f}")
+
+    # Migration-specific sentiment by extremity bucket
+    print("\n--- Migration sentiment by extremity bucket ---")
+    rows = conn.execute("""
+        SELECT
+            CASE
+                WHEN e.text_score < 2.0 THEN '1-2 (Low)'
+                WHEN e.text_score < 3.0 THEN '2-3 (Moderate)'
+                WHEN e.text_score < 4.0 THEN '3-4 (High)'
+                ELSE '4-5 (Very High)'
+            END as bucket,
+            COUNT(*) as cnt,
+            ROUND(AVG(s.text_score), 3) as avg_sent_text,
+            ROUND(AVG(s.layman_score), 3) as avg_sent_layman
+        FROM right_wing_motions r
+        JOIN extremity_scores e ON r.motion_id = e.motion_id
+        JOIN sentiment_scores s ON r.motion_id = s.motion_id
+        WHERE r.category = 'asiel/vreemdelingen'
+        GROUP BY bucket
+        ORDER BY bucket
+    """).fetchall()
+
+    for bucket, cnt, st, sl in rows:
+        print(f"  {bucket:<18} n={cnt:>3}  text={st:>+.3f}  layman={sl:>+.3f}")
+
+    conn.close()
+
+
+def main() -> None:
+    print("=" * 70)
+    print("  DIRECTION 3: MIGRATION ↔ ANTI-DEMOCRATIC OVERLAP ANALYSIS")
+    print("=" * 70)
+
+    analyze_overlap()
+    analyze_party_strategy()
+    analyze_framing_shift()
+    analyze_cross_category()
+    analyze_sentiment_divergence()
+
+    print("\n" + "=" * 70)
+    print("  ANALYSIS COMPLETE")
+    print("=" * 70)
+
+
+if __name__ == "__main__":
+    main()