From d1faf2b3e408592f7e0cccb440cf902215a9c8a1 Mon Sep 17 00:00:00 2001
From: Sven Geboers <geboers.sven@gmail.com>
Date: Tue, 24 Mar 2026 22:41:28 +0100
Subject: [PATCH] feat(mindmodel): add CLI wrapper, edge-case tests, and
 manifest schema tests

---
 scripts/validate_mindmodel.py                | 56 ++++++++++++++++++++
 tests/mindmodel/test_manifest_parse.py       | 29 ++++++++++
 tests/mindmodel/test_manifest_schema.py      | 32 +++++++++++
 tests/scripts/test_validate_cli.py           | 52 ++++++++++++++++++
 tests/validators/test_validator_edgecases.py | 56 ++++++++++++++++++++
 5 files changed, 225 insertions(+)
 create mode 100644 scripts/validate_mindmodel.py
 create mode 100644 tests/mindmodel/test_manifest_parse.py
 create mode 100644 tests/mindmodel/test_manifest_schema.py
 create mode 100644 tests/scripts/test_validate_cli.py
 create mode 100644 tests/validators/test_validator_edgecases.py

diff --git a/scripts/validate_mindmodel.py b/scripts/validate_mindmodel.py
new file mode 100644
index 0000000..d6deead
--- /dev/null
+++ b/scripts/validate_mindmodel.py
@@ -0,0 +1,56 @@
+"""Command-line wrapper around src.validators.mindmodel_validator.validate_manifest
+
+This tiny CLI loads a manifest and writes a structured JSON report to stdout
+and optionally to a file path. It is report-only: it never raises an error or
+changes exit code based on findings.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+
+def _write_report(report: dict[str, Any], path: Path | None) -> None:
+    text = json.dumps(report, indent=2, ensure_ascii=False)
+    print(text)
+    if path:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(text, encoding="utf-8")
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser("validate_mindmodel")
+    parser.add_argument("manifest", nargs="?", help="path to manifest file")
+    parser.add_argument("--manifest", dest="manifest_opt", help="path to manifest file")
+    parser.add_argument("--report", help="optional output report path")
+    args = parser.parse_args(argv)
+
+    manifest = args.manifest_opt or args.manifest
+    if not manifest:
+        parser.error("manifest path is required (positional or --manifest)")
+
+    # import here to keep CLI tiny when unused
+    try:
+        from src.validators.mindmodel_validator import validate_manifest
+    except Exception as e:  # pragma: no cover - defensive
+        print(f"Failed to import validator: {e}")
+        return 0
+
+    try:
+        report = validate_manifest(manifest, report_only=True)
+    except Exception as e:  # never fail the process
+        report = {"error": str(e)}
+
+    report_path = Path(args.report) if args.report else None
+    _write_report(report, report_path)
+
+    # always exit zero for report-only operation
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/mindmodel/test_manifest_parse.py b/tests/mindmodel/test_manifest_parse.py
new file mode 100644
index 0000000..0b9c0b8
--- /dev/null
+++ b/tests/mindmodel/test_manifest_parse.py
@@ -0,0 +1,29 @@
+import re
+from pathlib import Path
+
+try:
+    import yaml  # type: ignore
+except Exception:
+    yaml = None
+
+
+def test_manifest_loads():
+    """Ensure the .mindmodel/manifest.yaml can be read and contains a 'files' list."""
+    p = Path(".mindmodel/manifest.yaml")
+    assert p.exists(), ".mindmodel/manifest.yaml must exist"
+    text = p.read_text(encoding="utf-8")
+
+    if yaml is not None:
+        data = yaml.safe_load(text)
+        assert isinstance(data, dict), "manifest should parse to a mapping"
+        assert "files" in data, "top-level 'files' key missing"
+        assert isinstance(data["files"], list), "'files' should be a list"
+        assert len(data["files"]) >= 1, "'files' must have at least one entry"
+    else:
+        # Fallback simple checks if PyYAML is not available in the environment.
+        assert re.search(r"^\s*files:\s*$", text, re.M), (
+            "manifest must contain top-level 'files:'"
+        )
+        assert re.search(r"^\s*-\s+path:\s+", text, re.M), (
+            "manifest must contain at least one '- path:' entry"
+        )
diff --git a/tests/mindmodel/test_manifest_schema.py b/tests/mindmodel/test_manifest_schema.py
new file mode 100644
index 0000000..cb26538
--- /dev/null
+++ b/tests/mindmodel/test_manifest_schema.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+
+from src.validators.types import parse_manifest
+
+
+def test_manifest_schema_parses_into_types():
+    """Ensure the .mindmodel/manifest.yaml parses via parse_manifest and
+    yields a manifest-like object with a files list where each entry has a
+    `path` key.
+
+    The test relies on parse_manifest to use its PyYAML fallback when
+    PyYAML is not available in the test environment.
+    """
+    p = Path(".mindmodel/manifest.yaml")
+    assert p.exists(), ".mindmodel/manifest.yaml must exist"
+
+    manifest = parse_manifest(str(p))
+
+    # Accept either a plain mapping or the Manifest dataclass returned by
+    # parse_manifest. Normalize to the files list for assertions.
+    if isinstance(manifest, dict):
+        files = manifest.get("files", [])
+    else:
+        # Manifest dataclass has .files attribute
+        files = getattr(manifest, "files", [])
+
+    assert isinstance(files, list), "manifest.files must be a list"
+    assert files, "manifest must contain at least one file entry"
+
+    for entry in files:
+        assert isinstance(entry, dict), "each file entry should be a mapping"
+        assert "path" in entry, f"file entry missing 'path': {entry}"
diff --git a/tests/scripts/test_validate_cli.py b/tests/scripts/test_validate_cli.py
new file mode 100644
index 0000000..ebd1de4
--- /dev/null
+++ b/tests/scripts/test_validate_cli.py
@@ -0,0 +1,52 @@
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+
+def test_cli_runs(tmp_path):
+    manifest = Path(".mindmodel/manifest.yaml")
+    assert manifest.exists(), "expected .mindmodel/manifest.yaml to exist in repo"
+
+    report_path = tmp_path / "report.json"
+
+    # Try module mode first, fallback to direct script invocation
+    cmds = [
+        [
+            sys.executable,
+            "-m",
+            "scripts.validate_mindmodel",
+            str(manifest),
+            "--report",
+            str(report_path),
+        ],
+        [
+            sys.executable,
+            "scripts/validate_mindmodel.py",
+            str(manifest),
+            "--report",
+            str(report_path),
+        ],
+    ]
+
+    result = None
+    for cmd in cmds:
+        try:
+            result = subprocess.run(cmd, check=False, capture_output=True, text=True)
+            # if process ran (any exit code), break and use this result
+            break
+        except FileNotFoundError:
+            continue
+
+    assert result is not None, "Failed to run script (no suitable invocation)"
+    # CLI should exit with 0 (report-only)
+    assert result.returncode == 0, (
+        f"CLI exited non-zero: {result.returncode}\nstderr: {result.stderr}"
+    )
+
+    assert report_path.exists(), f"Report file was not created at {report_path}"
+
+    data = json.loads(report_path.read_text(encoding="utf-8"))
+    # top-level keys expected from validator
+    for key in ("missing_files", "truncated_evidence", "potential_secrets"):
+        assert key in data, f"Report JSON missing key: {key}"
diff --git a/tests/validators/test_validator_edgecases.py b/tests/validators/test_validator_edgecases.py
new file mode 100644
index 0000000..e01e9dd
--- /dev/null
+++ b/tests/validators/test_validator_edgecases.py
@@ -0,0 +1,56 @@
+import os
+from pathlib import Path
+
+from src.validators.mindmodel_validator import validate_manifest
+
+
+def test_missing_files_reported(tmp_path):
+    # create two paths that do not exist
+    p1 = str(tmp_path / "missing_one.txt")
+    p2 = str(tmp_path / "missing_two.txt")
+
+    manifest = f"""
+files:
+  - path: {p1}
+  - path: {p2}
+"""
+
+    mpath = tmp_path / "manifest_missing.yaml"
+    mpath.write_text(manifest, encoding="utf-8")
+
+    report = validate_manifest(str(mpath))
+    assert "missing_files" in report
+    # both missing paths should be reported
+    assert p1 in report["missing_files"]
+    assert p2 in report["missing_files"]
+
+
+def test_truncated_evidence_and_secrets_reported(tmp_path):
+    # entry with truncated evidence (ends with ...)
+    trunc_path = str(tmp_path / "trunc.txt")
+    trunc_evidence = "This output was cut off..."
+
+    # entry with potential secret (contains PASSWORD)
+    secret_path = str(tmp_path / "secret.txt")
+    secret_evidence = "Found PASSWORD=sekret123 in the logs"
+
+    manifest = f"""
+files:
+  - path: {trunc_path}
+    evidence_excerpt: "{trunc_evidence}"
+  - path: {secret_path}
+    evidence_excerpt: "{secret_evidence}"
+"""
+
+    mpath = tmp_path / "manifest_edgecases.yaml"
+    mpath.write_text(manifest, encoding="utf-8")
+
+    report = validate_manifest(str(mpath))
+
+    # truncated evidence should report the trunc_path
+    assert "truncated_evidence" in report
+    assert any(item.get("path") == trunc_path for item in report["truncated_evidence"])
+
+    # potential secrets should report the secret_path
+    assert "potential_secrets" in report
+    assert any(item.get("path") == secret_path for item in report["potential_secrets"])