#!/usr/bin/env python3 """Temporal aggregation: compute yearly trends in right-wing motion activity. Usage: uv run python analysis/right_wing/temporal_analysis.py """ from __future__ import annotations import argparse import json import logging import sys from pathlib import Path from typing import Any import duckdb import pandas as pd ROOT = Path(__file__).parent.parent.parent.resolve() if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) logger = logging.getLogger(__name__) def compute_yearly_summary( db_path: str = "data/motions.db", output_table: str = "yearly_right_wing_summary", ) -> dict[str, Any]: """Aggregate right-wing motion metrics by year. Creates or replaces `output_table` with yearly summary statistics. """ db = Path(db_path) if not db.exists(): raise FileNotFoundError(f"Database not found: {db}") con = duckdb.connect(str(db)) try: # Ensure right_wing_motions exists tables = {t[0] for t in con.execute("SHOW TABLES").fetchall()} if "right_wing_motions" not in tables: raise RuntimeError( "Table 'right_wing_motions' not found. Run classify_motions.py first." ) # Build summary using DuckDB SQL for efficiency con.execute(f"DROP TABLE IF EXISTS {output_table}") con.execute( f""" CREATE TABLE {output_table} AS WITH yearly_classified AS ( SELECT year, COUNT(*) AS total_right_wing, AVG(right_support) AS avg_right_support, AVG(left_opposition) AS avg_left_opposition, AVG(centrist_support) AS centrist_support, AVG(right_keyword_matches) AS avg_right_keyword_matches FROM right_wing_motions WHERE classified = TRUE GROUP BY year ), yearly_total AS ( SELECT EXTRACT(YEAR FROM date) AS year, COUNT(*) AS total_motions FROM motions WHERE date IS NOT NULL GROUP BY EXTRACT(YEAR FROM date) ) SELECT t.year, COALESCE(c.total_right_wing, 0) AS total_right_wing, COALESCE(c.total_right_wing, 0) * 100.0 / NULLIF(t.total_motions, 0) AS pct_of_total, t.total_motions, c.avg_right_support, c.avg_left_opposition, c.centrist_support, c.avg_right_keyword_matches, NULL::DOUBLE AS extremity_index -- placeholder for U4 FROM yearly_total t LEFT JOIN yearly_classified c ON t.year = c.year ORDER BY t.year """ ) # Compute YoY deltas in Python/pandas for simplicity df = con.execute(f"SELECT * FROM {output_table} ORDER BY year").fetchdf() df["yoy_right_wing_delta"] = df["total_right_wing"].diff() df["yoy_pct_delta"] = df["pct_of_total"].diff() # Replace table with enriched version con.execute(f"DROP TABLE {output_table}") con.execute( f""" CREATE TABLE {output_table} ( year INTEGER PRIMARY KEY, total_right_wing INTEGER, pct_of_total DOUBLE, total_motions INTEGER, avg_right_support DOUBLE, avg_left_opposition DOUBLE, centrist_support DOUBLE, avg_right_keyword_matches DOUBLE, extremity_index DOUBLE, yoy_right_wing_delta DOUBLE, yoy_pct_delta DOUBLE ) """ ) con.execute( f""" INSERT INTO {output_table} SELECT year, total_right_wing, pct_of_total, total_motions, avg_right_support, avg_left_opposition, centrist_support, avg_right_keyword_matches, extremity_index, yoy_right_wing_delta, yoy_pct_delta FROM df """ ) con.commit() logger.info("Wrote %d yearly rows to %s", len(df), output_table) return { "rows_written": len(df), "year_range": (int(df["year"].min()), int(df["year"].max())) if not df.empty else None, "total_right_wing": int(df["total_right_wing"].sum()) if not df.empty else 0, "table": output_table, } finally: con.close() def main() -> int: parser = argparse.ArgumentParser(description="Compute yearly right-wing motion trends") parser.add_argument("--db", default="data/motions.db") parser.add_argument("--output-table", default="yearly_right_wing_summary") args = parser.parse_args() result = compute_yearly_summary(db_path=args.db, output_table=args.output_table) print(json.dumps(result, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())