fix(right-wing): add resume support to extremity and sentiment scorers

Use CREATE TABLE IF NOT EXISTS and skip already-scored motions
to allow resuming interrupted batch runs.
main
Sven Geboers 4 weeks ago
parent ec18fe0540
commit be007165b1
  1. 14
      analysis/right_wing/extremity_scorer.py
  2. 14
      analysis/right_wing/sentiment_analysis.py

@ -177,12 +177,10 @@ def score_motions(
logger.warning("No classified right-wing motions found.") logger.warning("No classified right-wing motions found.")
return {"scored": 0, "failed": 0} return {"scored": 0, "failed": 0}
logger.info("Scoring %d motions in batches of %d...", len(rows), batch_size) # Resume support: only create table if missing, skip already-scored motions
con.execute("DROP TABLE IF EXISTS extremity_scores")
con.execute( con.execute(
""" """
CREATE TABLE extremity_scores ( CREATE TABLE IF NOT EXISTS extremity_scores (
motion_id INTEGER PRIMARY KEY, motion_id INTEGER PRIMARY KEY,
text_score INTEGER, text_score INTEGER,
text_explanation VARCHAR, text_explanation VARCHAR,
@ -192,6 +190,12 @@ def score_motions(
) )
""" """
) )
already_scored = {
r[0] for r in con.execute("SELECT motion_id FROM extremity_scores WHERE error IS NULL").fetchall()
}
rows = [r for r in rows if r[0] not in already_scored]
logger.info("Scoring %d motions in batches of %d...", len(rows), batch_size)
scored = 0 scored = 0
failed = 0 failed = 0
@ -209,7 +213,7 @@ def score_motions(
for mid, res in zip(motion_ids, results): for mid, res in zip(motion_ids, results):
con.execute( con.execute(
""" """
INSERT INTO extremity_scores INSERT OR REPLACE INTO extremity_scores
(motion_id, text_score, text_explanation, layman_score, layman_explanation, error) (motion_id, text_score, text_explanation, layman_score, layman_explanation, error)
VALUES (?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?)
""", """,

@ -178,12 +178,10 @@ def analyze_sentiment(
logger.warning("No classified right-wing motions found.") logger.warning("No classified right-wing motions found.")
return {"scored": 0, "failed": 0} return {"scored": 0, "failed": 0}
logger.info("Scoring sentiment for %d motions in batches of %d...", len(rows), batch_size) # Resume support: only create table if missing, skip already-scored motions
con.execute("DROP TABLE IF EXISTS sentiment_scores")
con.execute( con.execute(
""" """
CREATE TABLE sentiment_scores ( CREATE TABLE IF NOT EXISTS sentiment_scores (
motion_id INTEGER PRIMARY KEY, motion_id INTEGER PRIMARY KEY,
year INTEGER, year INTEGER,
text_score DOUBLE, text_score DOUBLE,
@ -194,6 +192,12 @@ def analyze_sentiment(
) )
""" """
) )
already_scored = {
r[0] for r in con.execute("SELECT motion_id FROM sentiment_scores WHERE error IS NULL").fetchall()
}
rows = [r for r in rows if r[0] not in already_scored]
logger.info("Scoring sentiment for %d motions in batches of %d...", len(rows), batch_size)
scored = 0 scored = 0
failed = 0 failed = 0
@ -212,7 +216,7 @@ def analyze_sentiment(
for mid, year, res in zip(motion_ids, years, results): for mid, year, res in zip(motion_ids, years, results):
con.execute( con.execute(
""" """
INSERT INTO sentiment_scores INSERT OR REPLACE INTO sentiment_scores
(motion_id, year, text_score, text_explanation, layman_score, layman_explanation, error) (motion_id, year, text_score, text_explanation, layman_score, layman_explanation, error)
VALUES (?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?)
""", """,

Loading…
Cancel
Save