diff --git a/analysis/right_wing/temporal_analysis.py b/analysis/right_wing/temporal_analysis.py new file mode 100644 index 0000000..ac99d42 --- /dev/null +++ b/analysis/right_wing/temporal_analysis.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +"""Temporal aggregation: compute yearly trends in right-wing motion activity. + +Usage: + uv run python analysis/right_wing/temporal_analysis.py +""" + +from __future__ import annotations + +import argparse +import json +import logging +import sys +from pathlib import Path +from typing import Any + +import duckdb +import pandas as pd + +ROOT = Path(__file__).parent.parent.parent.resolve() +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +logger = logging.getLogger(__name__) + + +def compute_yearly_summary( + db_path: str = "data/motions.db", + output_table: str = "yearly_right_wing_summary", +) -> dict[str, Any]: + """Aggregate right-wing motion metrics by year. + + Creates or replaces `output_table` with yearly summary statistics. + """ + db = Path(db_path) + if not db.exists(): + raise FileNotFoundError(f"Database not found: {db}") + + con = duckdb.connect(str(db)) + try: + # Ensure right_wing_motions exists + tables = {t[0] for t in con.execute("SHOW TABLES").fetchall()} + if "right_wing_motions" not in tables: + raise RuntimeError( + "Table 'right_wing_motions' not found. Run classify_motions.py first." + ) + + # Build summary using DuckDB SQL for efficiency + con.execute(f"DROP TABLE IF EXISTS {output_table}") + con.execute( + f""" + CREATE TABLE {output_table} AS + WITH yearly_classified AS ( + SELECT + year, + COUNT(*) AS total_right_wing, + AVG(right_support) AS avg_right_support, + AVG(left_opposition) AS avg_left_opposition, + AVG(centrist_support) AS centrist_support, + AVG(right_keyword_matches) AS avg_right_keyword_matches + FROM right_wing_motions + WHERE classified = TRUE + GROUP BY year + ), + yearly_total AS ( + SELECT + EXTRACT(YEAR FROM date) AS year, + COUNT(*) AS total_motions + FROM motions + WHERE date IS NOT NULL + GROUP BY EXTRACT(YEAR FROM date) + ) + SELECT + t.year, + COALESCE(c.total_right_wing, 0) AS total_right_wing, + COALESCE(c.total_right_wing, 0) * 100.0 / NULLIF(t.total_motions, 0) AS pct_of_total, + t.total_motions, + c.avg_right_support, + c.avg_left_opposition, + c.centrist_support, + c.avg_right_keyword_matches, + NULL::DOUBLE AS extremity_index -- placeholder for U4 + FROM yearly_total t + LEFT JOIN yearly_classified c ON t.year = c.year + ORDER BY t.year + """ + ) + + # Compute YoY deltas in Python/pandas for simplicity + df = con.execute(f"SELECT * FROM {output_table} ORDER BY year").fetchdf() + df["yoy_right_wing_delta"] = df["total_right_wing"].diff() + df["yoy_pct_delta"] = df["pct_of_total"].diff() + + # Replace table with enriched version + con.execute(f"DROP TABLE {output_table}") + con.execute( + f""" + CREATE TABLE {output_table} ( + year INTEGER PRIMARY KEY, + total_right_wing INTEGER, + pct_of_total DOUBLE, + total_motions INTEGER, + avg_right_support DOUBLE, + avg_left_opposition DOUBLE, + centrist_support DOUBLE, + avg_right_keyword_matches DOUBLE, + extremity_index DOUBLE, + yoy_right_wing_delta DOUBLE, + yoy_pct_delta DOUBLE + ) + """ + ) + con.execute( + f""" + INSERT INTO {output_table} + SELECT + year, total_right_wing, pct_of_total, total_motions, + avg_right_support, avg_left_opposition, centrist_support, + avg_right_keyword_matches, extremity_index, + yoy_right_wing_delta, yoy_pct_delta + FROM df + """ + ) + con.commit() + + logger.info("Wrote %d yearly rows to %s", len(df), output_table) + return { + "rows_written": len(df), + "year_range": (int(df["year"].min()), int(df["year"].max())) if not df.empty else None, + "total_right_wing": int(df["total_right_wing"].sum()) if not df.empty else 0, + "table": output_table, + } + finally: + con.close() + + +def main() -> int: + parser = argparse.ArgumentParser(description="Compute yearly right-wing motion trends") + parser.add_argument("--db", default="data/motions.db") + parser.add_argument("--output-table", default="yearly_right_wing_summary") + args = parser.parse_args() + + result = compute_yearly_summary(db_path=args.db, output_table=args.output_table) + print(json.dumps(result, indent=2)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())