parent
2efd7ba3a0
commit
7bd7d0d18c
@ -0,0 +1,72 @@ |
||||
import os |
||||
import re |
||||
from typing import List |
||||
|
||||
|
||||
def file_exists(base_dir: str, path: str) -> bool: |
||||
"""Check whether a path exists under base_dir without opening the file. |
||||
|
||||
This resolves the path relative to base_dir and returns True if the |
||||
resolved path exists on the filesystem (file or directory). |
||||
""" |
||||
if not base_dir: |
||||
base = "" |
||||
else: |
||||
base = base_dir |
||||
full = os.path.join(base, path) |
||||
return os.path.exists(full) |
||||
|
||||
|
||||
def detect_truncated(snippet: str) -> bool: |
||||
"""Heuristic detection whether a snippet is truncated. |
||||
|
||||
Returns True if the snippet ends with an ellipsis '...' (after |
||||
trimming whitespace) or contains a common truncation marker like |
||||
the substring 'truncat' (case-insensitive). |
||||
""" |
||||
if snippet is None: |
||||
return False |
||||
s = snippet.strip() |
||||
if s.endswith("..."): |
||||
return True |
||||
if "truncat" in s.lower(): |
||||
return True |
||||
return False |
||||
|
||||
|
||||
def find_potential_secrets(text: str) -> List[str]: |
||||
"""Scan the provided text and return a list of potential secret-like |
||||
strings. This uses a few common heuristics and regex patterns and only |
||||
scans the provided text (no external resources). |
||||
|
||||
The function returns a list of found token strings (values when |
||||
capture groups are available, otherwise the matched substring). |
||||
""" |
||||
if not text: |
||||
return [] |
||||
|
||||
candidates: List[str] = [] |
||||
|
||||
# AWS access key id pattern (common): AKIA followed by 16 alphanumeric |
||||
aws_pattern = re.compile(r"AKIA[0-9A-Z]{16}") |
||||
candidates.extend(aws_pattern.findall(text)) |
||||
|
||||
# Common key/value patterns like api_key = "..." or "api-key: ..." |
||||
# allow shorter secret values (down to 4 chars) to catch short test values |
||||
kv_pattern = re.compile( |
||||
r"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token|access[_-]?key|token|password|passwd|pwd)\s*[=:]+\s*['\"]?([A-Za-z0-9\-_=+/\.]{4,128})['\"]?" |
||||
) |
||||
candidates.extend(m.group(1) for m in kv_pattern.finditer(text)) |
||||
|
||||
# Generic long hex or base64-like strings (heuristic) |
||||
long_hex = re.compile(r"\b([a-f0-9]{32,128})\b", re.IGNORECASE) |
||||
candidates.extend(long_hex.findall(text)) |
||||
|
||||
# Deduplicate while preserving order |
||||
seen = set() |
||||
result: List[str] = [] |
||||
for c in candidates: |
||||
if c and c not in seen: |
||||
seen.add(c) |
||||
result.append(c) |
||||
return result |
||||
@ -0,0 +1,43 @@ |
||||
import os |
||||
import tempfile |
||||
|
||||
from scripts.mindmodel import checks |
||||
|
||||
|
||||
def test_file_exists(tmp_path): |
||||
# create a file under tmp_path |
||||
base = str(tmp_path) |
||||
p = tmp_path / "subdir" |
||||
p.mkdir() |
||||
f = p / "file.txt" |
||||
f.write_text("hello") |
||||
|
||||
# path relative to base |
||||
assert checks.file_exists(base, "subdir/file.txt") |
||||
# non-existing |
||||
assert not checks.file_exists(base, "subdir/missing.txt") |
||||
|
||||
|
||||
def test_detect_truncated(): |
||||
assert checks.detect_truncated("This is a truncated snippet...") |
||||
assert checks.detect_truncated("Truncation marker: [truncated]") |
||||
assert checks.detect_truncated("contains truncatED word") |
||||
assert not checks.detect_truncated("This is complete") |
||||
assert not checks.detect_truncated("") |
||||
|
||||
|
||||
def test_find_potential_secrets(): |
||||
text = """ |
||||
api_key = "abcdEFGH1234ijklMNOP" |
||||
password: 'hunter2' |
||||
aws = AKIA1234567890ABCD12 |
||||
random_hex = deadbeefdeadbeefdeadbeefdeadbeef |
||||
not_a_secret = short |
||||
""" |
||||
|
||||
found = checks.find_potential_secrets(text) |
||||
# should find api_key value, password, aws and long hex |
||||
assert "abcdEFGH1234ijklMNOP" in found |
||||
assert "hunter2" in found |
||||
assert any(item.startswith("AKIA") for item in found) |
||||
assert any("deadbeef" in item for item in found) |
||||
Loading…
Reference in new issue