You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
101 lines
3.6 KiB
101 lines
3.6 KiB
# summarizer.py (refactored to use ai_provider)
|
|
from typing import Optional
|
|
import logging
|
|
|
|
import duckdb
|
|
|
|
from config import config
|
|
import ai_provider
|
|
from database import db
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class MotionSummarizer:
|
|
def __init__(self):
|
|
# Stateless; use ai_provider functions directly
|
|
pass
|
|
|
|
def _build_prompt_messages(self, title: str, body_text: str) -> list[dict]:
|
|
prompt = f"""
|
|
Leg deze Nederlandse parlementaire motie uit in eenvoudige, toegankelijke taal:
|
|
|
|
Titel: {title}
|
|
Tekst: {body_text}
|
|
|
|
Geef een uitleg van 2-3 zinnen die:
|
|
- Gebruik maakt van alledaagse taal
|
|
- De praktische impact op burgers uitlegt
|
|
- Politiek jargon vermijdt
|
|
- Neutraal en feitelijk blijft
|
|
|
|
Antwoord alleen met de uitleg, geen introductie of extra tekst.
|
|
"""
|
|
return [
|
|
{
|
|
"role": "system",
|
|
"content": "Je bent een expert in het uitleggen van politieke onderwerpen in eenvoudige taal voor Nederlandse burgers.",
|
|
},
|
|
{"role": "user", "content": prompt},
|
|
]
|
|
|
|
def generate_layman_explanation(self, title: str, body_text: str) -> str:
|
|
"""Generate a layman-friendly explanation via ai_provider.
|
|
|
|
Returns an empty string on failure (non-fatal).
|
|
"""
|
|
messages = self._build_prompt_messages(title, body_text or "")
|
|
try:
|
|
return ai_provider.chat_completion(messages, model=config.QWEN_MODEL)
|
|
except ai_provider.ProviderError:
|
|
logger.exception("AI provider failed to generate summary")
|
|
return ""
|
|
|
|
def update_motion_summaries(
|
|
self,
|
|
compute_embeddings: bool = True,
|
|
embedding_model: str = "qwen/qwen3-embedding-4b",
|
|
):
|
|
"""Find motions missing layman_explanation and generate summaries.
|
|
|
|
Uses body_text when available, falls back to description, then title only.
|
|
If compute_embeddings is True and database provides store_embedding, compute and store embeddings.
|
|
"""
|
|
conn = duckdb.connect(config.DATABASE_PATH)
|
|
try:
|
|
rows = conn.execute(
|
|
"SELECT id, title, description, body_text FROM motions WHERE layman_explanation IS NULL OR layman_explanation = '' LIMIT 50"
|
|
).fetchall()
|
|
|
|
for motion_id, title, description, body_text in rows:
|
|
input_text = body_text or description or ""
|
|
summary = self.generate_layman_explanation(title, input_text)
|
|
if summary is None:
|
|
summary = ""
|
|
conn.execute(
|
|
"UPDATE motions SET layman_explanation = ? WHERE id = ?",
|
|
(summary, motion_id),
|
|
)
|
|
logger.info("Updated summary for motion %s", motion_id)
|
|
|
|
if compute_embeddings and summary:
|
|
logger.info(
|
|
"Computing embedding for motion %s using model %s",
|
|
motion_id,
|
|
embedding_model,
|
|
)
|
|
# compute embedding and try to store via database helper if available
|
|
try:
|
|
emb = ai_provider.get_embedding(summary, model=embedding_model)
|
|
store_fn = getattr(db, "store_embedding", None)
|
|
if callable(store_fn):
|
|
store_fn(motion_id, embedding_model, emb)
|
|
except ai_provider.ProviderError:
|
|
logger.exception(
|
|
"Failed to compute/store embedding for motion %s", motion_id
|
|
)
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
summarizer = MotionSummarizer()
|
|
|