You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/summarizer.py

101 lines
3.6 KiB

# summarizer.py (refactored to use ai_provider)
from typing import Optional
import logging
import duckdb
from config import config
import ai_provider
from database import db
logger = logging.getLogger(__name__)
class MotionSummarizer:
def __init__(self):
# Stateless; use ai_provider functions directly
pass
def _build_prompt_messages(self, title: str, body_text: str) -> list[dict]:
prompt = f"""
Leg deze Nederlandse parlementaire motie uit in eenvoudige, toegankelijke taal:
Titel: {title}
Tekst: {body_text}
Geef een uitleg van 2-3 zinnen die:
- Gebruik maakt van alledaagse taal
- De praktische impact op burgers uitlegt
- Politiek jargon vermijdt
- Neutraal en feitelijk blijft
Antwoord alleen met de uitleg, geen introductie of extra tekst.
"""
return [
{
"role": "system",
"content": "Je bent een expert in het uitleggen van politieke onderwerpen in eenvoudige taal voor Nederlandse burgers.",
},
{"role": "user", "content": prompt},
]
def generate_layman_explanation(self, title: str, body_text: str) -> str:
"""Generate a layman-friendly explanation via ai_provider.
Returns an empty string on failure (non-fatal).
"""
messages = self._build_prompt_messages(title, body_text or "")
try:
return ai_provider.chat_completion(messages, model=config.QWEN_MODEL)
except ai_provider.ProviderError:
logger.exception("AI provider failed to generate summary")
return ""
def update_motion_summaries(
self,
compute_embeddings: bool = True,
embedding_model: str = "qwen/qwen3-embedding-4b",
):
"""Find motions missing layman_explanation and generate summaries.
Uses body_text when available, falls back to description, then title only.
If compute_embeddings is True and database provides store_embedding, compute and store embeddings.
"""
conn = duckdb.connect(config.DATABASE_PATH)
try:
rows = conn.execute(
"SELECT id, title, description, body_text FROM motions WHERE layman_explanation IS NULL OR layman_explanation = '' LIMIT 50"
).fetchall()
for motion_id, title, description, body_text in rows:
input_text = body_text or description or ""
summary = self.generate_layman_explanation(title, input_text)
if summary is None:
summary = ""
conn.execute(
"UPDATE motions SET layman_explanation = ? WHERE id = ?",
(summary, motion_id),
)
logger.info("Updated summary for motion %s", motion_id)
if compute_embeddings and summary:
logger.info(
"Computing embedding for motion %s using model %s",
motion_id,
embedding_model,
)
# compute embedding and try to store via database helper if available
try:
emb = ai_provider.get_embedding(summary, model=embedding_model)
store_fn = getattr(db, "store_embedding", None)
if callable(store_fn):
store_fn(motion_id, embedding_model, emb)
except ai_provider.ProviderError:
logger.exception(
"Failed to compute/store embedding for motion %s", motion_id
)
finally:
conn.close()
summarizer = MotionSummarizer()