You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

72 lines
2.3 KiB

import os
import re
from typing import List
def file_exists(base_dir: str, path: str) -> bool:
"""Check whether a path exists under base_dir without opening the file.
This resolves the path relative to base_dir and returns True if the
resolved path exists on the filesystem (file or directory).
"""
if not base_dir:
base = ""
else:
base = base_dir
full = os.path.join(base, path)
return os.path.exists(full)
def detect_truncated(snippet: str) -> bool:
"""Heuristic detection whether a snippet is truncated.
Returns True if the snippet ends with an ellipsis '...' (after
trimming whitespace) or contains a common truncation marker like
the substring 'truncat' (case-insensitive).
"""
if snippet is None:
return False
s = snippet.strip()
if s.endswith("..."):
return True
if "truncat" in s.lower():
return True
return False
def find_potential_secrets(text: str) -> List[str]:
"""Scan the provided text and return a list of potential secret-like
strings. This uses a few common heuristics and regex patterns and only
scans the provided text (no external resources).
The function returns a list of found token strings (values when
capture groups are available, otherwise the matched substring).
"""
if not text:
return []
candidates: List[str] = []
# AWS access key id pattern (common): AKIA followed by 16 alphanumeric
aws_pattern = re.compile(r"AKIA[0-9A-Z]{16}")
candidates.extend(aws_pattern.findall(text))
# Common key/value patterns like api_key = "..." or "api-key: ..."
# allow shorter secret values (down to 4 chars) to catch short test values
kv_pattern = re.compile(
r"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token|access[_-]?key|token|password|passwd|pwd)\s*[=:]+\s*['\"]?([A-Za-z0-9\-_=+/\.]{4,128})['\"]?"
)
candidates.extend(m.group(1) for m in kv_pattern.finditer(text))
# Generic long hex or base64-like strings (heuristic)
long_hex = re.compile(r"\b([a-f0-9]{32,128})\b", re.IGNORECASE)
candidates.extend(long_hex.findall(text))
# Deduplicate while preserving order
seen = set()
result: List[str] = []
for c in candidates:
if c and c not in seen:
seen.add(c)
result.append(c)
return result