You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
142 lines
4.9 KiB
142 lines
4.9 KiB
"""Conservative, report-only mindmodel/manifest validator.
|
|
|
|
This module provides a small validator that reads a manifest (YAML if
|
|
PyYAML is available, otherwise a tiny fallback parser) and reports
|
|
potential issues without making changes.
|
|
|
|
The returned report contains the keys:
|
|
- missing_files: list of file paths referenced in the manifest that don't exist
|
|
- truncated_evidence: list of items (dicts) where evidence_excerpt appears truncated
|
|
- potential_secrets: list of items (dicts) where evidence_excerpt looks like it may contain secrets
|
|
|
|
The manifest is expected to contain a top-level `files` list with
|
|
entries that are mappings and have at least a `path` (or `file_path`)
|
|
and optionally `evidence_excerpt`.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from typing import List, Dict, Any
|
|
|
|
|
|
def _load_yaml_native(path: str) -> Dict[str, Any]:
|
|
try:
|
|
import yaml # type: ignore
|
|
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return yaml.safe_load(f) or {}
|
|
except Exception:
|
|
raise
|
|
|
|
|
|
def _load_yaml_fallback(path: str) -> Dict[str, Any]:
|
|
"""Tiny YAML-ish fallback parser that understands a minimal manifest.
|
|
|
|
It only supports a top-level `files:` key and a sequence of simple
|
|
mappings with `-` list items and `key: value` pairs indented.
|
|
This is intentionally conservative and fragile; it's only used when
|
|
PyYAML is not available.
|
|
"""
|
|
result: Dict[str, Any] = {}
|
|
files: List[Dict[str, Any]] = []
|
|
current: Dict[str, Any] | None = None
|
|
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
for raw in f:
|
|
line = raw.rstrip("\n")
|
|
stripped = line.lstrip()
|
|
if not stripped or stripped.startswith("#"):
|
|
continue
|
|
if stripped.startswith("files:") and line.startswith(stripped):
|
|
# top-level marker, skip
|
|
continue
|
|
if stripped.startswith("- "):
|
|
# start new item
|
|
if current is not None:
|
|
files.append(current)
|
|
current = {}
|
|
# possible inline key: - path: something
|
|
rest = stripped[2:].strip()
|
|
if rest:
|
|
if ":" in rest:
|
|
k, v = rest.split(":", 1)
|
|
current[k.strip()] = v.strip()
|
|
continue
|
|
# key: value lines (indented)
|
|
if ":" in stripped and current is not None:
|
|
k, v = stripped.split(":", 1)
|
|
current[k.strip()] = v.strip()
|
|
|
|
if current is not None:
|
|
files.append(current)
|
|
if files:
|
|
result["files"] = files
|
|
return result
|
|
|
|
|
|
def _normalize_entry(entry: Any) -> Dict[str, Any]:
|
|
if not isinstance(entry, dict):
|
|
return {"path": str(entry)}
|
|
# prefer path or file_path
|
|
if "file_path" in entry and "path" not in entry:
|
|
entry = dict(entry)
|
|
entry["path"] = entry.pop("file_path")
|
|
return entry
|
|
|
|
|
|
def validate_manifest(manifest_path: str, report_only: bool = True) -> dict:
|
|
"""Validate a minimal mindmodel manifest and return a report.
|
|
|
|
Parameters
|
|
- manifest_path: path to the YAML manifest file
|
|
- report_only: unused flag for now; kept to emphasise this is report-only
|
|
|
|
Returns a dict with keys: missing_files, truncated_evidence, potential_secrets
|
|
"""
|
|
if not os.path.exists(manifest_path):
|
|
raise FileNotFoundError(manifest_path)
|
|
|
|
# attempt to use PyYAML if available, otherwise fallback
|
|
try:
|
|
manifest = _load_yaml_native(manifest_path)
|
|
except Exception:
|
|
manifest = _load_yaml_fallback(manifest_path)
|
|
|
|
files = manifest.get("files") or []
|
|
report = {"missing_files": [], "truncated_evidence": [], "potential_secrets": []}
|
|
|
|
def _strip_surrounding_quotes(s: str) -> str:
|
|
s = s.strip()
|
|
if len(s) >= 2 and s[0] == s[-1] and s[0] in ('"', "'"):
|
|
return s[1:-1]
|
|
return s
|
|
|
|
for raw in files:
|
|
entry = _normalize_entry(raw)
|
|
path = entry.get("path")
|
|
evidence = entry.get("evidence_excerpt") or entry.get("evidence") or ""
|
|
# Remove surrounding quotes if the fallback YAML parser left them in place
|
|
if isinstance(evidence, str):
|
|
evidence = _strip_surrounding_quotes(evidence)
|
|
|
|
# missing files
|
|
if path:
|
|
if not os.path.exists(path):
|
|
report["missing_files"].append(path)
|
|
|
|
# truncated evidence heuristics
|
|
if isinstance(evidence, str):
|
|
if len(evidence) > 1000 or evidence.strip().endswith("..."):
|
|
report["truncated_evidence"].append(
|
|
{"path": path, "evidence_excerpt": evidence}
|
|
)
|
|
|
|
# potential secrets heuristics
|
|
up = evidence.upper()
|
|
if "PASSWORD" in up or "SECRET" in up or "BEGIN PRIVATE KEY" in evidence:
|
|
report["potential_secrets"].append(
|
|
{"path": path, "evidence_excerpt": evidence}
|
|
)
|
|
|
|
return report
|
|
|