#!/usr/bin/env python3
"""Temporal aggregation: compute yearly trends in right-wing motion activity.

Usage:
    uv run python analysis/right_wing/temporal_analysis.py
"""

from __future__ import annotations

import argparse
import json
import logging
import sys
from pathlib import Path
from typing import Any

import duckdb
import pandas as pd

ROOT = Path(__file__).parent.parent.parent.resolve()
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

logger = logging.getLogger(__name__)


def compute_yearly_summary(
    db_path: str = "data/motions.db",
    output_table: str = "yearly_right_wing_summary",
) -> dict[str, Any]:
    """Aggregate right-wing motion metrics by year.

    Creates or replaces `output_table` with yearly summary statistics.
    """
    db = Path(db_path)
    if not db.exists():
        raise FileNotFoundError(f"Database not found: {db}")

    con = duckdb.connect(str(db))
    try:
        # Ensure right_wing_motions exists
        tables = {t[0] for t in con.execute("SHOW TABLES").fetchall()}
        if "right_wing_motions" not in tables:
            raise RuntimeError(
                "Table 'right_wing_motions' not found. Run classify_motions.py first."
            )

        # Build summary using DuckDB SQL for efficiency
        con.execute(f"DROP TABLE IF EXISTS {output_table}")
        con.execute(
            f"""
            CREATE TABLE {output_table} AS
            WITH yearly_classified AS (
                SELECT
                    year,
                    COUNT(*) AS total_right_wing,
                    AVG(right_support) AS avg_right_support,
                    AVG(left_opposition) AS avg_left_opposition,
                    AVG(centrist_support) AS centrist_support,
                    AVG(right_keyword_matches) AS avg_right_keyword_matches
                FROM right_wing_motions
                WHERE classified = TRUE
                GROUP BY year
            ),
            yearly_total AS (
                SELECT
                    EXTRACT(YEAR FROM date) AS year,
                    COUNT(*) AS total_motions
                FROM motions
                WHERE date IS NOT NULL
                GROUP BY EXTRACT(YEAR FROM date)
            )
            SELECT
                t.year,
                COALESCE(c.total_right_wing, 0) AS total_right_wing,
                COALESCE(c.total_right_wing, 0) * 100.0 / NULLIF(t.total_motions, 0) AS pct_of_total,
                t.total_motions,
                c.avg_right_support,
                c.avg_left_opposition,
                c.centrist_support,
                c.avg_right_keyword_matches,
                NULL::DOUBLE AS extremity_index  -- placeholder for U4
            FROM yearly_total t
            LEFT JOIN yearly_classified c ON t.year = c.year
            ORDER BY t.year
            """
        )

        # Compute YoY deltas in Python/pandas for simplicity
        df = con.execute(f"SELECT * FROM {output_table} ORDER BY year").fetchdf()
        df["yoy_right_wing_delta"] = df["total_right_wing"].diff()
        df["yoy_pct_delta"] = df["pct_of_total"].diff()

        # Replace table with enriched version
        con.execute(f"DROP TABLE {output_table}")
        con.execute(
            f"""
            CREATE TABLE {output_table} (
                year INTEGER PRIMARY KEY,
                total_right_wing INTEGER,
                pct_of_total DOUBLE,
                total_motions INTEGER,
                avg_right_support DOUBLE,
                avg_left_opposition DOUBLE,
                centrist_support DOUBLE,
                avg_right_keyword_matches DOUBLE,
                extremity_index DOUBLE,
                yoy_right_wing_delta DOUBLE,
                yoy_pct_delta DOUBLE
            )
            """
        )
        con.execute(
            f"""
            INSERT INTO {output_table}
            SELECT
                year, total_right_wing, pct_of_total, total_motions,
                avg_right_support, avg_left_opposition, centrist_support,
                avg_right_keyword_matches, extremity_index,
                yoy_right_wing_delta, yoy_pct_delta
            FROM df
            """
        )
        con.commit()

        logger.info("Wrote %d yearly rows to %s", len(df), output_table)
        return {
            "rows_written": len(df),
            "year_range": (int(df["year"].min()), int(df["year"].max())) if not df.empty else None,
            "total_right_wing": int(df["total_right_wing"].sum()) if not df.empty else 0,
            "table": output_table,
        }
    finally:
        con.close()


def main() -> int:
    parser = argparse.ArgumentParser(description="Compute yearly right-wing motion trends")
    parser.add_argument("--db", default="data/motions.db")
    parser.add_argument("--output-table", default="yearly_right_wing_summary")
    args = parser.parse_args()

    result = compute_yearly_summary(db_path=args.db, output_table=args.output_table)
    print(json.dumps(result, indent=2))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())