You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
motief/analysis/right_wing/temporal_analysis.py

149 lines
5.0 KiB

#!/usr/bin/env python3
"""Temporal aggregation: compute yearly trends in right-wing motion activity.
Usage:
uv run python analysis/right_wing/temporal_analysis.py
"""
from __future__ import annotations
import argparse
import json
import logging
import sys
from pathlib import Path
from typing import Any
import duckdb
import pandas as pd
ROOT = Path(__file__).parent.parent.parent.resolve()
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
logger = logging.getLogger(__name__)
def compute_yearly_summary(
db_path: str = "data/motions.db",
output_table: str = "yearly_right_wing_summary",
) -> dict[str, Any]:
"""Aggregate right-wing motion metrics by year.
Creates or replaces `output_table` with yearly summary statistics.
"""
db = Path(db_path)
if not db.exists():
raise FileNotFoundError(f"Database not found: {db}")
con = duckdb.connect(str(db))
try:
# Ensure right_wing_motions exists
tables = {t[0] for t in con.execute("SHOW TABLES").fetchall()}
if "right_wing_motions" not in tables:
raise RuntimeError(
"Table 'right_wing_motions' not found. Run classify_motions.py first."
)
# Build summary using DuckDB SQL for efficiency
con.execute(f"DROP TABLE IF EXISTS {output_table}")
con.execute(
f"""
CREATE TABLE {output_table} AS
WITH yearly_classified AS (
SELECT
year,
COUNT(*) AS total_right_wing,
AVG(right_support) AS avg_right_support,
AVG(left_opposition) AS avg_left_opposition,
AVG(centrist_support) AS centrist_support,
AVG(right_keyword_matches) AS avg_right_keyword_matches
FROM right_wing_motions
WHERE classified = TRUE
GROUP BY year
),
yearly_total AS (
SELECT
EXTRACT(YEAR FROM date) AS year,
COUNT(*) AS total_motions
FROM motions
WHERE date IS NOT NULL
GROUP BY EXTRACT(YEAR FROM date)
)
SELECT
t.year,
COALESCE(c.total_right_wing, 0) AS total_right_wing,
COALESCE(c.total_right_wing, 0) * 100.0 / NULLIF(t.total_motions, 0) AS pct_of_total,
t.total_motions,
c.avg_right_support,
c.avg_left_opposition,
c.centrist_support,
c.avg_right_keyword_matches,
NULL::DOUBLE AS extremity_index -- placeholder for U4
FROM yearly_total t
LEFT JOIN yearly_classified c ON t.year = c.year
ORDER BY t.year
"""
)
# Compute YoY deltas in Python/pandas for simplicity
df = con.execute(f"SELECT * FROM {output_table} ORDER BY year").fetchdf()
df["yoy_right_wing_delta"] = df["total_right_wing"].diff()
df["yoy_pct_delta"] = df["pct_of_total"].diff()
# Replace table with enriched version
con.execute(f"DROP TABLE {output_table}")
con.execute(
f"""
CREATE TABLE {output_table} (
year INTEGER PRIMARY KEY,
total_right_wing INTEGER,
pct_of_total DOUBLE,
total_motions INTEGER,
avg_right_support DOUBLE,
avg_left_opposition DOUBLE,
centrist_support DOUBLE,
avg_right_keyword_matches DOUBLE,
extremity_index DOUBLE,
yoy_right_wing_delta DOUBLE,
yoy_pct_delta DOUBLE
)
"""
)
con.execute(
f"""
INSERT INTO {output_table}
SELECT
year, total_right_wing, pct_of_total, total_motions,
avg_right_support, avg_left_opposition, centrist_support,
avg_right_keyword_matches, extremity_index,
yoy_right_wing_delta, yoy_pct_delta
FROM df
"""
)
con.commit()
logger.info("Wrote %d yearly rows to %s", len(df), output_table)
return {
"rows_written": len(df),
"year_range": (int(df["year"].min()), int(df["year"].max())) if not df.empty else None,
"total_right_wing": int(df["total_right_wing"].sum()) if not df.empty else 0,
"table": output_table,
}
finally:
con.close()
def main() -> int:
parser = argparse.ArgumentParser(description="Compute yearly right-wing motion trends")
parser.add_argument("--db", default="data/motions.db")
parser.add_argument("--output-table", default="yearly_right_wing_summary")
args = parser.parse_args()
result = compute_yearly_summary(db_path=args.db, output_table=args.output_table)
print(json.dumps(result, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())