You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
72 lines
2.3 KiB
72 lines
2.3 KiB
import os
|
|
import re
|
|
from typing import List
|
|
|
|
|
|
def file_exists(base_dir: str, path: str) -> bool:
|
|
"""Check whether a path exists under base_dir without opening the file.
|
|
|
|
This resolves the path relative to base_dir and returns True if the
|
|
resolved path exists on the filesystem (file or directory).
|
|
"""
|
|
if not base_dir:
|
|
base = ""
|
|
else:
|
|
base = base_dir
|
|
full = os.path.join(base, path)
|
|
return os.path.exists(full)
|
|
|
|
|
|
def detect_truncated(snippet: str) -> bool:
|
|
"""Heuristic detection whether a snippet is truncated.
|
|
|
|
Returns True if the snippet ends with an ellipsis '...' (after
|
|
trimming whitespace) or contains a common truncation marker like
|
|
the substring 'truncat' (case-insensitive).
|
|
"""
|
|
if snippet is None:
|
|
return False
|
|
s = snippet.strip()
|
|
if s.endswith("..."):
|
|
return True
|
|
if "truncat" in s.lower():
|
|
return True
|
|
return False
|
|
|
|
|
|
def find_potential_secrets(text: str) -> List[str]:
|
|
"""Scan the provided text and return a list of potential secret-like
|
|
strings. This uses a few common heuristics and regex patterns and only
|
|
scans the provided text (no external resources).
|
|
|
|
The function returns a list of found token strings (values when
|
|
capture groups are available, otherwise the matched substring).
|
|
"""
|
|
if not text:
|
|
return []
|
|
|
|
candidates: List[str] = []
|
|
|
|
# AWS access key id pattern (common): AKIA followed by 16 alphanumeric
|
|
aws_pattern = re.compile(r"AKIA[0-9A-Z]{16}")
|
|
candidates.extend(aws_pattern.findall(text))
|
|
|
|
# Common key/value patterns like api_key = "..." or "api-key: ..."
|
|
# allow shorter secret values (down to 4 chars) to catch short test values
|
|
kv_pattern = re.compile(
|
|
r"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token|access[_-]?key|token|password|passwd|pwd)\s*[=:]+\s*['\"]?([A-Za-z0-9\-_=+/\.]{4,128})['\"]?"
|
|
)
|
|
candidates.extend(m.group(1) for m in kv_pattern.finditer(text))
|
|
|
|
# Generic long hex or base64-like strings (heuristic)
|
|
long_hex = re.compile(r"\b([a-f0-9]{32,128})\b", re.IGNORECASE)
|
|
candidates.extend(long_hex.findall(text))
|
|
|
|
# Deduplicate while preserving order
|
|
seen = set()
|
|
result: List[str] = []
|
|
for c in candidates:
|
|
if c and c not in seen:
|
|
seen.add(c)
|
|
result.append(c)
|
|
return result
|
|
|