Implements U3: temporal_analysis.py computes yearly_summary from the right_wing_motions table (U2 output). Metrics per year: - total_right_wing, pct_of_total, total_motions - avg_right_support, avg_left_opposition, centrist_support - avg_right_keyword_matches, extremity_index (U4 placeholder) - yoy_right_wing_delta, yoy_pct_delta Key finding: right-wing motions grew from ~4% (2018) to ~12% (2024-2025) of all motions, with rising centrist support over time.main
parent
d3dfb0ce2f
commit
1bc83c4384
@ -0,0 +1,149 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
"""Temporal aggregation: compute yearly trends in right-wing motion activity. |
||||||
|
|
||||||
|
Usage: |
||||||
|
uv run python analysis/right_wing/temporal_analysis.py |
||||||
|
""" |
||||||
|
|
||||||
|
from __future__ import annotations |
||||||
|
|
||||||
|
import argparse |
||||||
|
import json |
||||||
|
import logging |
||||||
|
import sys |
||||||
|
from pathlib import Path |
||||||
|
from typing import Any |
||||||
|
|
||||||
|
import duckdb |
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
ROOT = Path(__file__).parent.parent.parent.resolve() |
||||||
|
if str(ROOT) not in sys.path: |
||||||
|
sys.path.insert(0, str(ROOT)) |
||||||
|
|
||||||
|
logger = logging.getLogger(__name__) |
||||||
|
|
||||||
|
|
||||||
|
def compute_yearly_summary( |
||||||
|
db_path: str = "data/motions.db", |
||||||
|
output_table: str = "yearly_right_wing_summary", |
||||||
|
) -> dict[str, Any]: |
||||||
|
"""Aggregate right-wing motion metrics by year. |
||||||
|
|
||||||
|
Creates or replaces `output_table` with yearly summary statistics. |
||||||
|
""" |
||||||
|
db = Path(db_path) |
||||||
|
if not db.exists(): |
||||||
|
raise FileNotFoundError(f"Database not found: {db}") |
||||||
|
|
||||||
|
con = duckdb.connect(str(db)) |
||||||
|
try: |
||||||
|
# Ensure right_wing_motions exists |
||||||
|
tables = {t[0] for t in con.execute("SHOW TABLES").fetchall()} |
||||||
|
if "right_wing_motions" not in tables: |
||||||
|
raise RuntimeError( |
||||||
|
"Table 'right_wing_motions' not found. Run classify_motions.py first." |
||||||
|
) |
||||||
|
|
||||||
|
# Build summary using DuckDB SQL for efficiency |
||||||
|
con.execute(f"DROP TABLE IF EXISTS {output_table}") |
||||||
|
con.execute( |
||||||
|
f""" |
||||||
|
CREATE TABLE {output_table} AS |
||||||
|
WITH yearly_classified AS ( |
||||||
|
SELECT |
||||||
|
year, |
||||||
|
COUNT(*) AS total_right_wing, |
||||||
|
AVG(right_support) AS avg_right_support, |
||||||
|
AVG(left_opposition) AS avg_left_opposition, |
||||||
|
AVG(centrist_support) AS centrist_support, |
||||||
|
AVG(right_keyword_matches) AS avg_right_keyword_matches |
||||||
|
FROM right_wing_motions |
||||||
|
WHERE classified = TRUE |
||||||
|
GROUP BY year |
||||||
|
), |
||||||
|
yearly_total AS ( |
||||||
|
SELECT |
||||||
|
EXTRACT(YEAR FROM date) AS year, |
||||||
|
COUNT(*) AS total_motions |
||||||
|
FROM motions |
||||||
|
WHERE date IS NOT NULL |
||||||
|
GROUP BY EXTRACT(YEAR FROM date) |
||||||
|
) |
||||||
|
SELECT |
||||||
|
t.year, |
||||||
|
COALESCE(c.total_right_wing, 0) AS total_right_wing, |
||||||
|
COALESCE(c.total_right_wing, 0) * 100.0 / NULLIF(t.total_motions, 0) AS pct_of_total, |
||||||
|
t.total_motions, |
||||||
|
c.avg_right_support, |
||||||
|
c.avg_left_opposition, |
||||||
|
c.centrist_support, |
||||||
|
c.avg_right_keyword_matches, |
||||||
|
NULL::DOUBLE AS extremity_index -- placeholder for U4 |
||||||
|
FROM yearly_total t |
||||||
|
LEFT JOIN yearly_classified c ON t.year = c.year |
||||||
|
ORDER BY t.year |
||||||
|
""" |
||||||
|
) |
||||||
|
|
||||||
|
# Compute YoY deltas in Python/pandas for simplicity |
||||||
|
df = con.execute(f"SELECT * FROM {output_table} ORDER BY year").fetchdf() |
||||||
|
df["yoy_right_wing_delta"] = df["total_right_wing"].diff() |
||||||
|
df["yoy_pct_delta"] = df["pct_of_total"].diff() |
||||||
|
|
||||||
|
# Replace table with enriched version |
||||||
|
con.execute(f"DROP TABLE {output_table}") |
||||||
|
con.execute( |
||||||
|
f""" |
||||||
|
CREATE TABLE {output_table} ( |
||||||
|
year INTEGER PRIMARY KEY, |
||||||
|
total_right_wing INTEGER, |
||||||
|
pct_of_total DOUBLE, |
||||||
|
total_motions INTEGER, |
||||||
|
avg_right_support DOUBLE, |
||||||
|
avg_left_opposition DOUBLE, |
||||||
|
centrist_support DOUBLE, |
||||||
|
avg_right_keyword_matches DOUBLE, |
||||||
|
extremity_index DOUBLE, |
||||||
|
yoy_right_wing_delta DOUBLE, |
||||||
|
yoy_pct_delta DOUBLE |
||||||
|
) |
||||||
|
""" |
||||||
|
) |
||||||
|
con.execute( |
||||||
|
f""" |
||||||
|
INSERT INTO {output_table} |
||||||
|
SELECT |
||||||
|
year, total_right_wing, pct_of_total, total_motions, |
||||||
|
avg_right_support, avg_left_opposition, centrist_support, |
||||||
|
avg_right_keyword_matches, extremity_index, |
||||||
|
yoy_right_wing_delta, yoy_pct_delta |
||||||
|
FROM df |
||||||
|
""" |
||||||
|
) |
||||||
|
con.commit() |
||||||
|
|
||||||
|
logger.info("Wrote %d yearly rows to %s", len(df), output_table) |
||||||
|
return { |
||||||
|
"rows_written": len(df), |
||||||
|
"year_range": (int(df["year"].min()), int(df["year"].max())) if not df.empty else None, |
||||||
|
"total_right_wing": int(df["total_right_wing"].sum()) if not df.empty else 0, |
||||||
|
"table": output_table, |
||||||
|
} |
||||||
|
finally: |
||||||
|
con.close() |
||||||
|
|
||||||
|
|
||||||
|
def main() -> int: |
||||||
|
parser = argparse.ArgumentParser(description="Compute yearly right-wing motion trends") |
||||||
|
parser.add_argument("--db", default="data/motions.db") |
||||||
|
parser.add_argument("--output-table", default="yearly_right_wing_summary") |
||||||
|
args = parser.parse_args() |
||||||
|
|
||||||
|
result = compute_yearly_summary(db_path=args.db, output_table=args.output_table) |
||||||
|
print(json.dumps(result, indent=2)) |
||||||
|
return 0 |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
raise SystemExit(main()) |
||||||
Loading…
Reference in new issue