motief/scripts/mindmodel/checks.py

import os
import re
from typing import List


def file_exists(base_dir: str, path: str) -> bool:
    """Check whether a path exists under base_dir without opening the file.

    This resolves the path relative to base_dir and returns True if the
    resolved path exists on the filesystem (file or directory).
    """
    if not base_dir:
        base = ""
    else:
        base = base_dir
    full = os.path.join(base, path)
    return os.path.exists(full)


def detect_truncated(snippet: str) -> bool:
    """Heuristic detection whether a snippet is truncated.

    Returns True if the snippet ends with an ellipsis '...' (after
    trimming whitespace) or contains a common truncation marker like
    the substring 'truncat' (case-insensitive).
    """
    if snippet is None:
        return False
    s = snippet.strip()
    if s.endswith("..."):
        return True
    if "truncat" in s.lower():
        return True
    return False


def find_potential_secrets(text: str) -> List[str]:
    """Scan the provided text and return a list of potential secret-like
    strings. This uses a few common heuristics and regex patterns and only
    scans the provided text (no external resources).

    The function returns a list of found token strings (values when
    capture groups are available, otherwise the matched substring).
    """
    if not text:
        return []

    candidates: List[str] = []

    # AWS access key id pattern (common): AKIA followed by 16 alphanumeric
    aws_pattern = re.compile(r"AKIA[0-9A-Z]{16}")
    candidates.extend(aws_pattern.findall(text))

    # Common key/value patterns like api_key = "..." or "api-key: ..."
    # allow shorter secret values (down to 4 chars) to catch short test values
    kv_pattern = re.compile(
        r"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token|access[_-]?key|token|password|passwd|pwd)\s*[=:]+\s*['\"]?([A-Za-z0-9\-_=+/\.]{4,128})['\"]?"
    )
    candidates.extend(m.group(1) for m in kv_pattern.finditer(text))

    # Generic long hex or base64-like strings (heuristic)
    long_hex = re.compile(r"\b([a-f0-9]{32,128})\b", re.IGNORECASE)
    candidates.extend(long_hex.findall(text))

    # Deduplicate while preserving order
    seen = set()
    result: List[str] = []
    for c in candidates:
        if c and c not in seen:
            seen.add(c)
            result.append(c)
    return result