You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
160 lines
5.2 KiB
160 lines
5.2 KiB
"""Parity tests: verify agent tools can achieve what humans can.
|
|
|
|
These tests ensure the agent-native architecture satisfies the parity principle:
|
|
"Whatever the user can do through the UI/scripts, the agent can achieve through tools."
|
|
"""
|
|
|
|
import os
|
|
import pytest
|
|
|
|
pytest.importorskip("duckdb")
|
|
|
|
|
|
class TestDatabaseParity:
|
|
"""Agent database queries vs human SQL queries."""
|
|
|
|
def test_agent_query_motions_matches_raw_sql(self, tmp_duckdb_path):
|
|
"""Human: SELECT * FROM motions LIMIT 10
|
|
Agent: query_motions(db_path, limit=10)
|
|
"""
|
|
import duckdb
|
|
from agent_tools.database import query_motions
|
|
|
|
# Human approach — handle empty DB gracefully
|
|
con = duckdb.connect(tmp_duckdb_path)
|
|
try:
|
|
human_result = con.execute("SELECT * FROM motions LIMIT 10").fetchdf().to_dict("records")
|
|
except Exception:
|
|
human_result = []
|
|
con.close()
|
|
|
|
# Agent approach
|
|
agent_result = query_motions(tmp_duckdb_path, limit=10)
|
|
|
|
# Both should return lists
|
|
assert isinstance(human_result, list)
|
|
assert isinstance(agent_result, list)
|
|
assert len(agent_result) == len(human_result)
|
|
|
|
def test_agent_pipeline_status_matches_raw_query(self, tmp_duckdb_path):
|
|
"""Human: SELECT COUNT(*) FROM motions
|
|
Agent: query_pipeline_status(db_path)
|
|
"""
|
|
import duckdb
|
|
from agent_tools.database import query_pipeline_status
|
|
|
|
con = duckdb.connect(tmp_duckdb_path)
|
|
try:
|
|
human_count = con.execute("SELECT COUNT(*) FROM motions").fetchone()[0]
|
|
except Exception:
|
|
human_count = 0
|
|
con.close()
|
|
|
|
agent_status = query_pipeline_status(tmp_duckdb_path)
|
|
|
|
assert agent_status["motion_count"] == human_count
|
|
|
|
|
|
class TestHealthCheckParity:
|
|
"""Agent health check vs human script execution."""
|
|
|
|
def test_agent_health_check_matches_script(self, tmp_duckdb_path):
|
|
"""Human: python scripts/health_check.py
|
|
Agent: pipeline_check_health(db_path)
|
|
"""
|
|
from agent_tools.pipeline import pipeline_check_health
|
|
|
|
# Agent approach
|
|
agent_result = pipeline_check_health(tmp_duckdb_path)
|
|
|
|
assert isinstance(agent_result, dict)
|
|
assert "healthy" in agent_result
|
|
assert "checks" in agent_result
|
|
|
|
|
|
class TestReportGenerationParity:
|
|
"""Agent report generation vs human manual analysis."""
|
|
|
|
def test_agent_generates_summary_report(self, tmp_duckdb_path, tmp_path):
|
|
"""Human: Write a summary of pipeline state
|
|
Agent: generate_report(db_path, "summary", ...)
|
|
"""
|
|
from agent_tools.reports import generate_report
|
|
|
|
output_path = str(tmp_path / "summary.md")
|
|
result = generate_report(
|
|
tmp_duckdb_path,
|
|
report_type="summary",
|
|
parameters={},
|
|
output_path=output_path,
|
|
)
|
|
|
|
assert result["status"] == "written"
|
|
assert os.path.exists(output_path)
|
|
|
|
# Should contain key sections
|
|
content = open(output_path).read()
|
|
assert "Pipeline Summary" in content
|
|
assert "Motions in database" in content
|
|
|
|
|
|
class TestAnalysisParity:
|
|
"""Agent analysis vs human analytical queries."""
|
|
|
|
def test_agent_party_shift_analysis(self, tmp_duckdb_path):
|
|
"""Human: Write SQL to compare party positions across windows
|
|
Agent: analyze_party_shift(db_path, ...)
|
|
"""
|
|
from agent_tools.analysis import analyze_party_shift
|
|
|
|
result = analyze_party_shift(
|
|
tmp_duckdb_path,
|
|
party="VVD",
|
|
window_start="2020",
|
|
window_end="2024",
|
|
)
|
|
|
|
# Should return structured result (or error if no data)
|
|
assert isinstance(result, dict)
|
|
assert "party" in result
|
|
# Either shift data or error (empty DB is fine)
|
|
assert "shift" in result or "error" in result
|
|
|
|
|
|
class TestIntegrationAgentDiagnosticLoop:
|
|
"""Integration: Agent performs full diagnostic loop."""
|
|
|
|
def test_agent_diagnoses_stale_data(self, tmp_duckdb_path):
|
|
"""Agent loop:
|
|
1. Check health
|
|
2. Query pipeline status
|
|
3. Identify issue (empty DB = no data)
|
|
4. Suggest remediation
|
|
"""
|
|
from agent_tools.pipeline import pipeline_check_health
|
|
from agent_tools.database import query_pipeline_status
|
|
|
|
# Step 1: Check health
|
|
health = pipeline_check_health(tmp_duckdb_path)
|
|
|
|
# Step 2: Query status
|
|
status = query_pipeline_status(tmp_duckdb_path)
|
|
|
|
# Step 3: Agent reasoning (simulated)
|
|
issues = []
|
|
if status["motion_count"] == 0:
|
|
issues.append("No motions in database")
|
|
if status["svd_window_count"] == 0:
|
|
issues.append("No SVD windows computed")
|
|
|
|
# Step 4: Suggest remediation
|
|
suggestions = []
|
|
if "No motions in database" in issues:
|
|
suggestions.append("Run pipeline ingestion stage")
|
|
if "No SVD windows computed" in issues:
|
|
suggestions.append("Run SVD computation after ingestion")
|
|
|
|
assert isinstance(issues, list)
|
|
assert isinstance(suggestions, list)
|
|
# Empty DB should produce actionable suggestions
|
|
assert len(suggestions) > 0
|
|
|