From be007165b145e630f99da2908580e9067308beff Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Sun, 24 May 2026 22:33:44 +0200 Subject: [PATCH] fix(right-wing): add resume support to extremity and sentiment scorers Use CREATE TABLE IF NOT EXISTS and skip already-scored motions to allow resuming interrupted batch runs. --- analysis/right_wing/extremity_scorer.py | 14 +++++++++----- analysis/right_wing/sentiment_analysis.py | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/analysis/right_wing/extremity_scorer.py b/analysis/right_wing/extremity_scorer.py index 8459a6d..ca86905 100644 --- a/analysis/right_wing/extremity_scorer.py +++ b/analysis/right_wing/extremity_scorer.py @@ -177,12 +177,10 @@ def score_motions( logger.warning("No classified right-wing motions found.") return {"scored": 0, "failed": 0} - logger.info("Scoring %d motions in batches of %d...", len(rows), batch_size) - - con.execute("DROP TABLE IF EXISTS extremity_scores") + # Resume support: only create table if missing, skip already-scored motions con.execute( """ - CREATE TABLE extremity_scores ( + CREATE TABLE IF NOT EXISTS extremity_scores ( motion_id INTEGER PRIMARY KEY, text_score INTEGER, text_explanation VARCHAR, @@ -192,6 +190,12 @@ def score_motions( ) """ ) + already_scored = { + r[0] for r in con.execute("SELECT motion_id FROM extremity_scores WHERE error IS NULL").fetchall() + } + rows = [r for r in rows if r[0] not in already_scored] + + logger.info("Scoring %d motions in batches of %d...", len(rows), batch_size) scored = 0 failed = 0 @@ -209,7 +213,7 @@ def score_motions( for mid, res in zip(motion_ids, results): con.execute( """ - INSERT INTO extremity_scores + INSERT OR REPLACE INTO extremity_scores (motion_id, text_score, text_explanation, layman_score, layman_explanation, error) VALUES (?, ?, ?, ?, ?, ?) """, diff --git a/analysis/right_wing/sentiment_analysis.py b/analysis/right_wing/sentiment_analysis.py index 19d9a59..35248a1 100644 --- a/analysis/right_wing/sentiment_analysis.py +++ b/analysis/right_wing/sentiment_analysis.py @@ -178,12 +178,10 @@ def analyze_sentiment( logger.warning("No classified right-wing motions found.") return {"scored": 0, "failed": 0} - logger.info("Scoring sentiment for %d motions in batches of %d...", len(rows), batch_size) - - con.execute("DROP TABLE IF EXISTS sentiment_scores") + # Resume support: only create table if missing, skip already-scored motions con.execute( """ - CREATE TABLE sentiment_scores ( + CREATE TABLE IF NOT EXISTS sentiment_scores ( motion_id INTEGER PRIMARY KEY, year INTEGER, text_score DOUBLE, @@ -194,6 +192,12 @@ def analyze_sentiment( ) """ ) + already_scored = { + r[0] for r in con.execute("SELECT motion_id FROM sentiment_scores WHERE error IS NULL").fetchall() + } + rows = [r for r in rows if r[0] not in already_scored] + + logger.info("Scoring sentiment for %d motions in batches of %d...", len(rows), batch_size) scored = 0 failed = 0 @@ -212,7 +216,7 @@ def analyze_sentiment( for mid, year, res in zip(motion_ids, years, results): con.execute( """ - INSERT INTO sentiment_scores + INSERT OR REPLACE INTO sentiment_scores (motion_id, year, text_score, text_explanation, layman_score, layman_explanation, error) VALUES (?, ?, ?, ?, ?, ?, ?) """,