diff --git a/scripts/mindmodel/validator.py b/scripts/mindmodel/validator.py new file mode 100644 index 0000000..245ceb7 --- /dev/null +++ b/scripts/mindmodel/validator.py @@ -0,0 +1,108 @@ +from typing import Dict, Tuple, List, Any +import json +from pathlib import Path + +from scripts.mindmodel import loader +from scripts.mindmodel import checks + + +def validate_manifest(path: str, base_dir: str = None) -> Tuple[int, Dict[str, Any]]: + """Validate a manifest file at `path`. + + Returns a tuple (exit_code, report). + + exit codes: + 0 - ok (no issues) + 1 - warnings (only truncated snippets found) + 2 - critical (missing files, secrets, or parse error) + """ + report: Dict[str, Any] = { + "path": path, + "secrets": [], + "missing_files": [], + "truncated": 0, + "constraints": [], + } + + p = Path(path) + try: + raw_text = p.read_text(encoding="utf-8") + except Exception as exc: + report["load_error"] = f"Manifest file not readable: {exc}" + return 2, report + + # scan for secrets in the manifest text + secrets = checks.find_potential_secrets(raw_text) + report["secrets"] = secrets + + try: + manifest = loader.load_manifest(path) + except loader.ManifestLoadError as exc: + report["load_error"] = str(exc) + # treat parse/load errors as critical + return 2, report + + constraints = manifest.get("constraints") or [] + + for constraint in constraints: + c_rep: Dict[str, Any] = {"constraint": constraint, "evidence": []} + for ev in ( + constraint.get("evidence", []) + if isinstance(constraint.get("evidence", []), list) + else [] + ): + text = ev.get("text") if isinstance(ev, dict) else None + file_ref = ev.get("file") if isinstance(ev, dict) else None + + exists = True + if file_ref: + if not checks.file_exists(base_dir or "", file_ref): + exists = False + report["missing_files"].append(file_ref) + + truncated = False + if text: + truncated = checks.detect_truncated(text) + if truncated: + report["truncated"] += 1 + + c_rep["evidence"].append( + { + "text": text, + "file": file_ref, + "exists": exists, + "truncated": truncated, + } + ) + + report["constraints"].append(c_rep) + + # decide exit code + if report["secrets"]: + return 2, report + + if report["missing_files"]: + return 2, report + + if report["truncated"] > 0: + return 1, report + + return 0, report + + +def main(argv: List[str]) -> int: + import sys + + if len(argv) < 2: + print(json.dumps({"error": "manifest path required"})) + return 2 + + path = argv[1] + base_dir = argv[2] if len(argv) > 2 else None + + code, report = validate_manifest(path, base_dir=base_dir) + print(json.dumps(report)) + return code + + +# no execution at import time diff --git a/tests/scripts/mindmodel/test_validator.py b/tests/scripts/mindmodel/test_validator.py new file mode 100644 index 0000000..803a582 --- /dev/null +++ b/tests/scripts/mindmodel/test_validator.py @@ -0,0 +1,70 @@ +import json +import os + +from scripts.mindmodel import validator + + +def write_manifest(path, data: str): + p = path + p.write_text(data, encoding="utf-8") + return str(p) + + +def test_validate_ok(tmp_path): + # manifest with one constraint and evidence pointing to an existing file + evidence_file = tmp_path / "file.txt" + evidence_file.write_text("hello") + + manifest = { + "constraints": [ + {"id": "c1", "evidence": [{"file": "file.txt", "text": "complete content"}]} + ] + } + + manifest_path = tmp_path / "manifest.json" + manifest_path.write_text(json.dumps(manifest)) + + code, report = validator.validate_manifest( + str(manifest_path), base_dir=str(tmp_path) + ) + assert code == 0 + assert report["missing_files"] == [] + assert report["secrets"] == [] + + +def test_missing_file_flags_failure(tmp_path): + # manifest refers to missing file + manifest = { + "constraints": [{"id": "c2", "evidence": [{"file": "nope.txt", "text": "foo"}]}] + } + manifest_path = tmp_path / "manifest.json" + manifest_path.write_text(json.dumps(manifest)) + + code, report = validator.validate_manifest( + str(manifest_path), base_dir=str(tmp_path) + ) + assert code == 2 + assert "nope.txt" in report["missing_files"] + + +def test_truncated_produces_warning(tmp_path): + # evidence text is truncated -> warning + f = tmp_path / "manifest.json" + manifest = { + "constraints": [{"id": "c3", "evidence": [{"text": "This is truncated..."}]}] + } + f.write_text(json.dumps(manifest)) + + code, report = validator.validate_manifest(str(f), base_dir=str(tmp_path)) + assert code == 1 + assert report["truncated"] >= 1 + + +def test_manifest_scanned_for_secrets(tmp_path): + # manifest text contains an api_key pattern + f = tmp_path / "manifest.json" + f.write_text('api_key = "secretVALUE1234"') + + code, report = validator.validate_manifest(str(f), base_dir=str(tmp_path)) + assert code == 2 + assert any("secretVALUE1234" in s for s in report["secrets"]) or report["secrets"]