"""Conservative, report-only mindmodel/manifest validator. This module provides a small validator that reads a manifest (YAML if PyYAML is available, otherwise a tiny fallback parser) and reports potential issues without making changes. The returned report contains the keys: - missing_files: list of file paths referenced in the manifest that don't exist - truncated_evidence: list of items (dicts) where evidence_excerpt appears truncated - potential_secrets: list of items (dicts) where evidence_excerpt looks like it may contain secrets The manifest is expected to contain a top-level `files` list with entries that are mappings and have at least a `path` (or `file_path`) and optionally `evidence_excerpt`. """ from __future__ import annotations import os from typing import List, Dict, Any def _load_yaml_native(path: str) -> Dict[str, Any]: try: import yaml # type: ignore with open(path, "r", encoding="utf-8") as f: return yaml.safe_load(f) or {} except Exception: raise def _load_yaml_fallback(path: str) -> Dict[str, Any]: """Tiny YAML-ish fallback parser that understands a minimal manifest. It only supports a top-level `files:` key and a sequence of simple mappings with `-` list items and `key: value` pairs indented. This is intentionally conservative and fragile; it's only used when PyYAML is not available. """ result: Dict[str, Any] = {} files: List[Dict[str, Any]] = [] current: Dict[str, Any] | None = None with open(path, "r", encoding="utf-8") as f: for raw in f: line = raw.rstrip("\n") stripped = line.lstrip() if not stripped or stripped.startswith("#"): continue if stripped.startswith("files:") and line.startswith(stripped): # top-level marker, skip continue if stripped.startswith("- "): # start new item if current is not None: files.append(current) current = {} # possible inline key: - path: something rest = stripped[2:].strip() if rest: if ":" in rest: k, v = rest.split(":", 1) current[k.strip()] = v.strip() continue # key: value lines (indented) if ":" in stripped and current is not None: k, v = stripped.split(":", 1) current[k.strip()] = v.strip() if current is not None: files.append(current) if files: result["files"] = files return result def _normalize_entry(entry: Any) -> Dict[str, Any]: if not isinstance(entry, dict): return {"path": str(entry)} # prefer path or file_path if "file_path" in entry and "path" not in entry: entry = dict(entry) entry["path"] = entry.pop("file_path") return entry def validate_manifest(manifest_path: str, report_only: bool = True) -> dict: """Validate a minimal mindmodel manifest and return a report. Parameters - manifest_path: path to the YAML manifest file - report_only: unused flag for now; kept to emphasise this is report-only Returns a dict with keys: missing_files, truncated_evidence, potential_secrets """ if not os.path.exists(manifest_path): raise FileNotFoundError(manifest_path) # attempt to use PyYAML if available, otherwise fallback try: manifest = _load_yaml_native(manifest_path) except Exception: manifest = _load_yaml_fallback(manifest_path) files = manifest.get("files") or [] report = {"missing_files": [], "truncated_evidence": [], "potential_secrets": []} def _strip_surrounding_quotes(s: str) -> str: s = s.strip() if len(s) >= 2 and s[0] == s[-1] and s[0] in ('"', "'"): return s[1:-1] return s for raw in files: entry = _normalize_entry(raw) path = entry.get("path") evidence = entry.get("evidence_excerpt") or entry.get("evidence") or "" # Remove surrounding quotes if the fallback YAML parser left them in place if isinstance(evidence, str): evidence = _strip_surrounding_quotes(evidence) # missing files if path: if not os.path.exists(path): report["missing_files"].append(path) # truncated evidence heuristics if isinstance(evidence, str): if len(evidence) > 1000 or evidence.strip().endswith("..."): report["truncated_evidence"].append( {"path": path, "evidence_excerpt": evidence} ) # potential secrets heuristics up = evidence.upper() if "PASSWORD" in up or "SECRET" in up or "BEGIN PRIVATE KEY" in evidence: report["potential_secrets"].append( {"path": path, "evidence_excerpt": evidence} ) return report