From 7bd7d0d18c1c0567abb7c0c6046724865005cb42 Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Tue, 24 Mar 2026 21:26:38 +0100 Subject: [PATCH] feat(mindmodel): add checks utilities and tests --- scripts/mindmodel/checks.py | 72 ++++++++++++++++++++++++++ tests/scripts/mindmodel/test_checks.py | 43 +++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 scripts/mindmodel/checks.py create mode 100644 tests/scripts/mindmodel/test_checks.py diff --git a/scripts/mindmodel/checks.py b/scripts/mindmodel/checks.py new file mode 100644 index 0000000..b0bdd1a --- /dev/null +++ b/scripts/mindmodel/checks.py @@ -0,0 +1,72 @@ +import os +import re +from typing import List + + +def file_exists(base_dir: str, path: str) -> bool: + """Check whether a path exists under base_dir without opening the file. + + This resolves the path relative to base_dir and returns True if the + resolved path exists on the filesystem (file or directory). + """ + if not base_dir: + base = "" + else: + base = base_dir + full = os.path.join(base, path) + return os.path.exists(full) + + +def detect_truncated(snippet: str) -> bool: + """Heuristic detection whether a snippet is truncated. + + Returns True if the snippet ends with an ellipsis '...' (after + trimming whitespace) or contains a common truncation marker like + the substring 'truncat' (case-insensitive). + """ + if snippet is None: + return False + s = snippet.strip() + if s.endswith("..."): + return True + if "truncat" in s.lower(): + return True + return False + + +def find_potential_secrets(text: str) -> List[str]: + """Scan the provided text and return a list of potential secret-like + strings. This uses a few common heuristics and regex patterns and only + scans the provided text (no external resources). + + The function returns a list of found token strings (values when + capture groups are available, otherwise the matched substring). + """ + if not text: + return [] + + candidates: List[str] = [] + + # AWS access key id pattern (common): AKIA followed by 16 alphanumeric + aws_pattern = re.compile(r"AKIA[0-9A-Z]{16}") + candidates.extend(aws_pattern.findall(text)) + + # Common key/value patterns like api_key = "..." or "api-key: ..." + # allow shorter secret values (down to 4 chars) to catch short test values + kv_pattern = re.compile( + r"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token|access[_-]?key|token|password|passwd|pwd)\s*[=:]+\s*['\"]?([A-Za-z0-9\-_=+/\.]{4,128})['\"]?" + ) + candidates.extend(m.group(1) for m in kv_pattern.finditer(text)) + + # Generic long hex or base64-like strings (heuristic) + long_hex = re.compile(r"\b([a-f0-9]{32,128})\b", re.IGNORECASE) + candidates.extend(long_hex.findall(text)) + + # Deduplicate while preserving order + seen = set() + result: List[str] = [] + for c in candidates: + if c and c not in seen: + seen.add(c) + result.append(c) + return result diff --git a/tests/scripts/mindmodel/test_checks.py b/tests/scripts/mindmodel/test_checks.py new file mode 100644 index 0000000..e5ece9f --- /dev/null +++ b/tests/scripts/mindmodel/test_checks.py @@ -0,0 +1,43 @@ +import os +import tempfile + +from scripts.mindmodel import checks + + +def test_file_exists(tmp_path): + # create a file under tmp_path + base = str(tmp_path) + p = tmp_path / "subdir" + p.mkdir() + f = p / "file.txt" + f.write_text("hello") + + # path relative to base + assert checks.file_exists(base, "subdir/file.txt") + # non-existing + assert not checks.file_exists(base, "subdir/missing.txt") + + +def test_detect_truncated(): + assert checks.detect_truncated("This is a truncated snippet...") + assert checks.detect_truncated("Truncation marker: [truncated]") + assert checks.detect_truncated("contains truncatED word") + assert not checks.detect_truncated("This is complete") + assert not checks.detect_truncated("") + + +def test_find_potential_secrets(): + text = """ + api_key = "abcdEFGH1234ijklMNOP" + password: 'hunter2' + aws = AKIA1234567890ABCD12 + random_hex = deadbeefdeadbeefdeadbeefdeadbeef + not_a_secret = short + """ + + found = checks.find_potential_secrets(text) + # should find api_key value, password, aws and long hex + assert "abcdEFGH1234ijklMNOP" in found + assert "hunter2" in found + assert any(item.startswith("AKIA") for item in found) + assert any("deadbeef" in item for item in found)