feat(mindmodel): add checks utilities and tests

main
Sven Geboers 1 month ago
parent 2efd7ba3a0
commit 7bd7d0d18c
  1. 72
      scripts/mindmodel/checks.py
  2. 43
      tests/scripts/mindmodel/test_checks.py

@ -0,0 +1,72 @@
import os
import re
from typing import List
def file_exists(base_dir: str, path: str) -> bool:
"""Check whether a path exists under base_dir without opening the file.
This resolves the path relative to base_dir and returns True if the
resolved path exists on the filesystem (file or directory).
"""
if not base_dir:
base = ""
else:
base = base_dir
full = os.path.join(base, path)
return os.path.exists(full)
def detect_truncated(snippet: str) -> bool:
"""Heuristic detection whether a snippet is truncated.
Returns True if the snippet ends with an ellipsis '...' (after
trimming whitespace) or contains a common truncation marker like
the substring 'truncat' (case-insensitive).
"""
if snippet is None:
return False
s = snippet.strip()
if s.endswith("..."):
return True
if "truncat" in s.lower():
return True
return False
def find_potential_secrets(text: str) -> List[str]:
"""Scan the provided text and return a list of potential secret-like
strings. This uses a few common heuristics and regex patterns and only
scans the provided text (no external resources).
The function returns a list of found token strings (values when
capture groups are available, otherwise the matched substring).
"""
if not text:
return []
candidates: List[str] = []
# AWS access key id pattern (common): AKIA followed by 16 alphanumeric
aws_pattern = re.compile(r"AKIA[0-9A-Z]{16}")
candidates.extend(aws_pattern.findall(text))
# Common key/value patterns like api_key = "..." or "api-key: ..."
# allow shorter secret values (down to 4 chars) to catch short test values
kv_pattern = re.compile(
r"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token|access[_-]?key|token|password|passwd|pwd)\s*[=:]+\s*['\"]?([A-Za-z0-9\-_=+/\.]{4,128})['\"]?"
)
candidates.extend(m.group(1) for m in kv_pattern.finditer(text))
# Generic long hex or base64-like strings (heuristic)
long_hex = re.compile(r"\b([a-f0-9]{32,128})\b", re.IGNORECASE)
candidates.extend(long_hex.findall(text))
# Deduplicate while preserving order
seen = set()
result: List[str] = []
for c in candidates:
if c and c not in seen:
seen.add(c)
result.append(c)
return result

@ -0,0 +1,43 @@
import os
import tempfile
from scripts.mindmodel import checks
def test_file_exists(tmp_path):
# create a file under tmp_path
base = str(tmp_path)
p = tmp_path / "subdir"
p.mkdir()
f = p / "file.txt"
f.write_text("hello")
# path relative to base
assert checks.file_exists(base, "subdir/file.txt")
# non-existing
assert not checks.file_exists(base, "subdir/missing.txt")
def test_detect_truncated():
assert checks.detect_truncated("This is a truncated snippet...")
assert checks.detect_truncated("Truncation marker: [truncated]")
assert checks.detect_truncated("contains truncatED word")
assert not checks.detect_truncated("This is complete")
assert not checks.detect_truncated("")
def test_find_potential_secrets():
text = """
api_key = "abcdEFGH1234ijklMNOP"
password: 'hunter2'
aws = AKIA1234567890ABCD12
random_hex = deadbeefdeadbeefdeadbeefdeadbeef
not_a_secret = short
"""
found = checks.find_potential_secrets(text)
# should find api_key value, password, aws and long hex
assert "abcdEFGH1234ijklMNOP" in found
assert "hunter2" in found
assert any(item.startswith("AKIA") for item in found)
assert any("deadbeef" in item for item in found)
Loading…
Cancel
Save