You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
149 lines
5.0 KiB
149 lines
5.0 KiB
#!/usr/bin/env python3
|
|
"""Temporal aggregation: compute yearly trends in right-wing motion activity.
|
|
|
|
Usage:
|
|
uv run python analysis/right_wing/temporal_analysis.py
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import duckdb
|
|
import pandas as pd
|
|
|
|
ROOT = Path(__file__).parent.parent.parent.resolve()
|
|
if str(ROOT) not in sys.path:
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def compute_yearly_summary(
|
|
db_path: str = "data/motions.db",
|
|
output_table: str = "yearly_right_wing_summary",
|
|
) -> dict[str, Any]:
|
|
"""Aggregate right-wing motion metrics by year.
|
|
|
|
Creates or replaces `output_table` with yearly summary statistics.
|
|
"""
|
|
db = Path(db_path)
|
|
if not db.exists():
|
|
raise FileNotFoundError(f"Database not found: {db}")
|
|
|
|
con = duckdb.connect(str(db))
|
|
try:
|
|
# Ensure right_wing_motions exists
|
|
tables = {t[0] for t in con.execute("SHOW TABLES").fetchall()}
|
|
if "right_wing_motions" not in tables:
|
|
raise RuntimeError(
|
|
"Table 'right_wing_motions' not found. Run classify_motions.py first."
|
|
)
|
|
|
|
# Build summary using DuckDB SQL for efficiency
|
|
con.execute(f"DROP TABLE IF EXISTS {output_table}")
|
|
con.execute(
|
|
f"""
|
|
CREATE TABLE {output_table} AS
|
|
WITH yearly_classified AS (
|
|
SELECT
|
|
year,
|
|
COUNT(*) AS total_right_wing,
|
|
AVG(right_support) AS avg_right_support,
|
|
AVG(left_opposition) AS avg_left_opposition,
|
|
AVG(centrist_support) AS centrist_support,
|
|
AVG(right_keyword_matches) AS avg_right_keyword_matches
|
|
FROM right_wing_motions
|
|
WHERE classified = TRUE
|
|
GROUP BY year
|
|
),
|
|
yearly_total AS (
|
|
SELECT
|
|
EXTRACT(YEAR FROM date) AS year,
|
|
COUNT(*) AS total_motions
|
|
FROM motions
|
|
WHERE date IS NOT NULL
|
|
GROUP BY EXTRACT(YEAR FROM date)
|
|
)
|
|
SELECT
|
|
t.year,
|
|
COALESCE(c.total_right_wing, 0) AS total_right_wing,
|
|
COALESCE(c.total_right_wing, 0) * 100.0 / NULLIF(t.total_motions, 0) AS pct_of_total,
|
|
t.total_motions,
|
|
c.avg_right_support,
|
|
c.avg_left_opposition,
|
|
c.centrist_support,
|
|
c.avg_right_keyword_matches,
|
|
NULL::DOUBLE AS extremity_index -- placeholder for U4
|
|
FROM yearly_total t
|
|
LEFT JOIN yearly_classified c ON t.year = c.year
|
|
ORDER BY t.year
|
|
"""
|
|
)
|
|
|
|
# Compute YoY deltas in Python/pandas for simplicity
|
|
df = con.execute(f"SELECT * FROM {output_table} ORDER BY year").fetchdf()
|
|
df["yoy_right_wing_delta"] = df["total_right_wing"].diff()
|
|
df["yoy_pct_delta"] = df["pct_of_total"].diff()
|
|
|
|
# Replace table with enriched version
|
|
con.execute(f"DROP TABLE {output_table}")
|
|
con.execute(
|
|
f"""
|
|
CREATE TABLE {output_table} (
|
|
year INTEGER PRIMARY KEY,
|
|
total_right_wing INTEGER,
|
|
pct_of_total DOUBLE,
|
|
total_motions INTEGER,
|
|
avg_right_support DOUBLE,
|
|
avg_left_opposition DOUBLE,
|
|
centrist_support DOUBLE,
|
|
avg_right_keyword_matches DOUBLE,
|
|
extremity_index DOUBLE,
|
|
yoy_right_wing_delta DOUBLE,
|
|
yoy_pct_delta DOUBLE
|
|
)
|
|
"""
|
|
)
|
|
con.execute(
|
|
f"""
|
|
INSERT INTO {output_table}
|
|
SELECT
|
|
year, total_right_wing, pct_of_total, total_motions,
|
|
avg_right_support, avg_left_opposition, centrist_support,
|
|
avg_right_keyword_matches, extremity_index,
|
|
yoy_right_wing_delta, yoy_pct_delta
|
|
FROM df
|
|
"""
|
|
)
|
|
con.commit()
|
|
|
|
logger.info("Wrote %d yearly rows to %s", len(df), output_table)
|
|
return {
|
|
"rows_written": len(df),
|
|
"year_range": (int(df["year"].min()), int(df["year"].max())) if not df.empty else None,
|
|
"total_right_wing": int(df["total_right_wing"].sum()) if not df.empty else 0,
|
|
"table": output_table,
|
|
}
|
|
finally:
|
|
con.close()
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="Compute yearly right-wing motion trends")
|
|
parser.add_argument("--db", default="data/motions.db")
|
|
parser.add_argument("--output-table", default="yearly_right_wing_summary")
|
|
args = parser.parse_args()
|
|
|
|
result = compute_yearly_summary(db_path=args.db, output_table=args.output_table)
|
|
print(json.dumps(result, indent=2))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|
|
|