"""Content validation primitives for agent operation. Tools for validating data quality, coverage, and content correctness. """ from __future__ import annotations import logging from datetime import datetime, timedelta from typing import Any, Dict from agent_tools.database import query_motions, query_svd_vectors logger = logging.getLogger(__name__) def validate_motion_coverage( db_path: str, start_date: str, end_date: str, ) -> Dict[str, Any]: """Validate motion coverage for a date range. Returns gaps where no motions exist in the database. """ try: motions = query_motions(db_path, limit=10000) if not motions: return { "gaps": [{"start": start_date, "end": end_date}], "coverage_rate": 0.0, "total_motions": 0, } # Convert dates start = datetime.fromisoformat(start_date) end = datetime.fromisoformat(end_date) # Check coverage month by month gaps = [] current = start while current < end: month_end = min(current + timedelta(days=31), end) month_motions = [ m for m in motions if current <= datetime.fromisoformat(str(m.get("date", "1970-01-01"))) < month_end ] if not month_motions: gaps.append({ "start": current.isoformat(), "end": month_end.isoformat(), }) current = month_end total_days = (end - start).days gap_days = sum( (datetime.fromisoformat(g["end"]) - datetime.fromisoformat(g["start"])).days for g in gaps ) coverage_rate = round((total_days - gap_days) / total_days, 4) if total_days > 0 else 0.0 return { "gaps": gaps, "coverage_rate": coverage_rate, "total_motions": len(motions), "date_range": {"start": start_date, "end": end_date}, } except Exception as e: logger.exception("validate_motion_coverage failed") return {"gaps": [], "coverage_rate": 0.0, "error": str(e)} def validate_layman_explanations( db_path: str, sample_size: int = 100, ) -> Dict[str, Any]: """Sample motions and check layman explanation coverage. Returns quality metrics for explanations. """ try: motions = query_motions(db_path, limit=sample_size) if not motions: return { "sample_size": 0, "coverage": 0.0, "empty_count": 0, } with_explanation = sum( 1 for m in motions if m.get("layman_explanation") and str(m.get("layman_explanation")).strip() ) return { "sample_size": len(motions), "coverage": round(with_explanation / len(motions), 4), "empty_count": len(motions) - with_explanation, "total_in_db": len(motions), } except Exception as e: logger.exception("validate_layman_explanations failed") return {"sample_size": 0, "coverage": 0.0, "error": str(e)} def check_embedding_quality( db_path: str, window_id: str, ) -> Dict[str, Any]: """Check embedding coverage for a window. Returns raw coverage stats. The agent decides whether coverage is acceptable. """ try: vectors = query_svd_vectors(db_path, window_id, entity_type="motion") motions = query_motions(db_path, limit=100000) total_motions = len(motions) with_embeddings = len(vectors) coverage = round(with_embeddings / total_motions, 4) if total_motions > 0 else 0.0 return { "window_id": window_id, "total_motions": total_motions, "with_embeddings": with_embeddings, "coverage": coverage, } except Exception as e: logger.exception("check_embedding_quality failed") return {"window_id": window_id, "coverage": 0.0, "error": str(e)}