parent
2efd7ba3a0
commit
7bd7d0d18c
@ -0,0 +1,72 @@ |
|||||||
|
import os |
||||||
|
import re |
||||||
|
from typing import List |
||||||
|
|
||||||
|
|
||||||
|
def file_exists(base_dir: str, path: str) -> bool: |
||||||
|
"""Check whether a path exists under base_dir without opening the file. |
||||||
|
|
||||||
|
This resolves the path relative to base_dir and returns True if the |
||||||
|
resolved path exists on the filesystem (file or directory). |
||||||
|
""" |
||||||
|
if not base_dir: |
||||||
|
base = "" |
||||||
|
else: |
||||||
|
base = base_dir |
||||||
|
full = os.path.join(base, path) |
||||||
|
return os.path.exists(full) |
||||||
|
|
||||||
|
|
||||||
|
def detect_truncated(snippet: str) -> bool: |
||||||
|
"""Heuristic detection whether a snippet is truncated. |
||||||
|
|
||||||
|
Returns True if the snippet ends with an ellipsis '...' (after |
||||||
|
trimming whitespace) or contains a common truncation marker like |
||||||
|
the substring 'truncat' (case-insensitive). |
||||||
|
""" |
||||||
|
if snippet is None: |
||||||
|
return False |
||||||
|
s = snippet.strip() |
||||||
|
if s.endswith("..."): |
||||||
|
return True |
||||||
|
if "truncat" in s.lower(): |
||||||
|
return True |
||||||
|
return False |
||||||
|
|
||||||
|
|
||||||
|
def find_potential_secrets(text: str) -> List[str]: |
||||||
|
"""Scan the provided text and return a list of potential secret-like |
||||||
|
strings. This uses a few common heuristics and regex patterns and only |
||||||
|
scans the provided text (no external resources). |
||||||
|
|
||||||
|
The function returns a list of found token strings (values when |
||||||
|
capture groups are available, otherwise the matched substring). |
||||||
|
""" |
||||||
|
if not text: |
||||||
|
return [] |
||||||
|
|
||||||
|
candidates: List[str] = [] |
||||||
|
|
||||||
|
# AWS access key id pattern (common): AKIA followed by 16 alphanumeric |
||||||
|
aws_pattern = re.compile(r"AKIA[0-9A-Z]{16}") |
||||||
|
candidates.extend(aws_pattern.findall(text)) |
||||||
|
|
||||||
|
# Common key/value patterns like api_key = "..." or "api-key: ..." |
||||||
|
# allow shorter secret values (down to 4 chars) to catch short test values |
||||||
|
kv_pattern = re.compile( |
||||||
|
r"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token|access[_-]?key|token|password|passwd|pwd)\s*[=:]+\s*['\"]?([A-Za-z0-9\-_=+/\.]{4,128})['\"]?" |
||||||
|
) |
||||||
|
candidates.extend(m.group(1) for m in kv_pattern.finditer(text)) |
||||||
|
|
||||||
|
# Generic long hex or base64-like strings (heuristic) |
||||||
|
long_hex = re.compile(r"\b([a-f0-9]{32,128})\b", re.IGNORECASE) |
||||||
|
candidates.extend(long_hex.findall(text)) |
||||||
|
|
||||||
|
# Deduplicate while preserving order |
||||||
|
seen = set() |
||||||
|
result: List[str] = [] |
||||||
|
for c in candidates: |
||||||
|
if c and c not in seen: |
||||||
|
seen.add(c) |
||||||
|
result.append(c) |
||||||
|
return result |
||||||
@ -0,0 +1,43 @@ |
|||||||
|
import os |
||||||
|
import tempfile |
||||||
|
|
||||||
|
from scripts.mindmodel import checks |
||||||
|
|
||||||
|
|
||||||
|
def test_file_exists(tmp_path): |
||||||
|
# create a file under tmp_path |
||||||
|
base = str(tmp_path) |
||||||
|
p = tmp_path / "subdir" |
||||||
|
p.mkdir() |
||||||
|
f = p / "file.txt" |
||||||
|
f.write_text("hello") |
||||||
|
|
||||||
|
# path relative to base |
||||||
|
assert checks.file_exists(base, "subdir/file.txt") |
||||||
|
# non-existing |
||||||
|
assert not checks.file_exists(base, "subdir/missing.txt") |
||||||
|
|
||||||
|
|
||||||
|
def test_detect_truncated(): |
||||||
|
assert checks.detect_truncated("This is a truncated snippet...") |
||||||
|
assert checks.detect_truncated("Truncation marker: [truncated]") |
||||||
|
assert checks.detect_truncated("contains truncatED word") |
||||||
|
assert not checks.detect_truncated("This is complete") |
||||||
|
assert not checks.detect_truncated("") |
||||||
|
|
||||||
|
|
||||||
|
def test_find_potential_secrets(): |
||||||
|
text = """ |
||||||
|
api_key = "abcdEFGH1234ijklMNOP" |
||||||
|
password: 'hunter2' |
||||||
|
aws = AKIA1234567890ABCD12 |
||||||
|
random_hex = deadbeefdeadbeefdeadbeefdeadbeef |
||||||
|
not_a_secret = short |
||||||
|
""" |
||||||
|
|
||||||
|
found = checks.find_potential_secrets(text) |
||||||
|
# should find api_key value, password, aws and long hex |
||||||
|
assert "abcdEFGH1234ijklMNOP" in found |
||||||
|
assert "hunter2" in found |
||||||
|
assert any(item.startswith("AKIA") for item in found) |
||||||
|
assert any("deadbeef" in item for item in found) |
||||||
Loading…
Reference in new issue