Implements U3: temporal_analysis.py computes yearly_summary from the right_wing_motions table (U2 output). Metrics per year: - total_right_wing, pct_of_total, total_motions - avg_right_support, avg_left_opposition, centrist_support - avg_right_keyword_matches, extremity_index (U4 placeholder) - yoy_right_wing_delta, yoy_pct_delta Key finding: right-wing motions grew from ~4% (2018) to ~12% (2024-2025) of all motions, with rising centrist support over time.main
parent
d3dfb0ce2f
commit
1bc83c4384
@ -0,0 +1,149 @@ |
||||
#!/usr/bin/env python3 |
||||
"""Temporal aggregation: compute yearly trends in right-wing motion activity. |
||||
|
||||
Usage: |
||||
uv run python analysis/right_wing/temporal_analysis.py |
||||
""" |
||||
|
||||
from __future__ import annotations |
||||
|
||||
import argparse |
||||
import json |
||||
import logging |
||||
import sys |
||||
from pathlib import Path |
||||
from typing import Any |
||||
|
||||
import duckdb |
||||
import pandas as pd |
||||
|
||||
ROOT = Path(__file__).parent.parent.parent.resolve() |
||||
if str(ROOT) not in sys.path: |
||||
sys.path.insert(0, str(ROOT)) |
||||
|
||||
logger = logging.getLogger(__name__) |
||||
|
||||
|
||||
def compute_yearly_summary( |
||||
db_path: str = "data/motions.db", |
||||
output_table: str = "yearly_right_wing_summary", |
||||
) -> dict[str, Any]: |
||||
"""Aggregate right-wing motion metrics by year. |
||||
|
||||
Creates or replaces `output_table` with yearly summary statistics. |
||||
""" |
||||
db = Path(db_path) |
||||
if not db.exists(): |
||||
raise FileNotFoundError(f"Database not found: {db}") |
||||
|
||||
con = duckdb.connect(str(db)) |
||||
try: |
||||
# Ensure right_wing_motions exists |
||||
tables = {t[0] for t in con.execute("SHOW TABLES").fetchall()} |
||||
if "right_wing_motions" not in tables: |
||||
raise RuntimeError( |
||||
"Table 'right_wing_motions' not found. Run classify_motions.py first." |
||||
) |
||||
|
||||
# Build summary using DuckDB SQL for efficiency |
||||
con.execute(f"DROP TABLE IF EXISTS {output_table}") |
||||
con.execute( |
||||
f""" |
||||
CREATE TABLE {output_table} AS |
||||
WITH yearly_classified AS ( |
||||
SELECT |
||||
year, |
||||
COUNT(*) AS total_right_wing, |
||||
AVG(right_support) AS avg_right_support, |
||||
AVG(left_opposition) AS avg_left_opposition, |
||||
AVG(centrist_support) AS centrist_support, |
||||
AVG(right_keyword_matches) AS avg_right_keyword_matches |
||||
FROM right_wing_motions |
||||
WHERE classified = TRUE |
||||
GROUP BY year |
||||
), |
||||
yearly_total AS ( |
||||
SELECT |
||||
EXTRACT(YEAR FROM date) AS year, |
||||
COUNT(*) AS total_motions |
||||
FROM motions |
||||
WHERE date IS NOT NULL |
||||
GROUP BY EXTRACT(YEAR FROM date) |
||||
) |
||||
SELECT |
||||
t.year, |
||||
COALESCE(c.total_right_wing, 0) AS total_right_wing, |
||||
COALESCE(c.total_right_wing, 0) * 100.0 / NULLIF(t.total_motions, 0) AS pct_of_total, |
||||
t.total_motions, |
||||
c.avg_right_support, |
||||
c.avg_left_opposition, |
||||
c.centrist_support, |
||||
c.avg_right_keyword_matches, |
||||
NULL::DOUBLE AS extremity_index -- placeholder for U4 |
||||
FROM yearly_total t |
||||
LEFT JOIN yearly_classified c ON t.year = c.year |
||||
ORDER BY t.year |
||||
""" |
||||
) |
||||
|
||||
# Compute YoY deltas in Python/pandas for simplicity |
||||
df = con.execute(f"SELECT * FROM {output_table} ORDER BY year").fetchdf() |
||||
df["yoy_right_wing_delta"] = df["total_right_wing"].diff() |
||||
df["yoy_pct_delta"] = df["pct_of_total"].diff() |
||||
|
||||
# Replace table with enriched version |
||||
con.execute(f"DROP TABLE {output_table}") |
||||
con.execute( |
||||
f""" |
||||
CREATE TABLE {output_table} ( |
||||
year INTEGER PRIMARY KEY, |
||||
total_right_wing INTEGER, |
||||
pct_of_total DOUBLE, |
||||
total_motions INTEGER, |
||||
avg_right_support DOUBLE, |
||||
avg_left_opposition DOUBLE, |
||||
centrist_support DOUBLE, |
||||
avg_right_keyword_matches DOUBLE, |
||||
extremity_index DOUBLE, |
||||
yoy_right_wing_delta DOUBLE, |
||||
yoy_pct_delta DOUBLE |
||||
) |
||||
""" |
||||
) |
||||
con.execute( |
||||
f""" |
||||
INSERT INTO {output_table} |
||||
SELECT |
||||
year, total_right_wing, pct_of_total, total_motions, |
||||
avg_right_support, avg_left_opposition, centrist_support, |
||||
avg_right_keyword_matches, extremity_index, |
||||
yoy_right_wing_delta, yoy_pct_delta |
||||
FROM df |
||||
""" |
||||
) |
||||
con.commit() |
||||
|
||||
logger.info("Wrote %d yearly rows to %s", len(df), output_table) |
||||
return { |
||||
"rows_written": len(df), |
||||
"year_range": (int(df["year"].min()), int(df["year"].max())) if not df.empty else None, |
||||
"total_right_wing": int(df["total_right_wing"].sum()) if not df.empty else 0, |
||||
"table": output_table, |
||||
} |
||||
finally: |
||||
con.close() |
||||
|
||||
|
||||
def main() -> int: |
||||
parser = argparse.ArgumentParser(description="Compute yearly right-wing motion trends") |
||||
parser.add_argument("--db", default="data/motions.db") |
||||
parser.add_argument("--output-table", default="yearly_right_wing_summary") |
||||
args = parser.parse_args() |
||||
|
||||
result = compute_yearly_summary(db_path=args.db, output_table=args.output_table) |
||||
print(json.dumps(result, indent=2)) |
||||
return 0 |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
raise SystemExit(main()) |
||||
Loading…
Reference in new issue