import os import re from typing import List def file_exists(base_dir: str, path: str) -> bool: """Check whether a path exists under base_dir without opening the file. This resolves the path relative to base_dir and returns True if the resolved path exists on the filesystem (file or directory). """ if not base_dir: base = "" else: base = base_dir full = os.path.join(base, path) return os.path.exists(full) def detect_truncated(snippet: str) -> bool: """Heuristic detection whether a snippet is truncated. Returns True if the snippet ends with an ellipsis '...' (after trimming whitespace) or contains a common truncation marker like the substring 'truncat' (case-insensitive). """ if snippet is None: return False s = snippet.strip() if s.endswith("..."): return True if "truncat" in s.lower(): return True return False def find_potential_secrets(text: str) -> List[str]: """Scan the provided text and return a list of potential secret-like strings. This uses a few common heuristics and regex patterns and only scans the provided text (no external resources). The function returns a list of found token strings (values when capture groups are available, otherwise the matched substring). """ if not text: return [] candidates: List[str] = [] # AWS access key id pattern (common): AKIA followed by 16 alphanumeric aws_pattern = re.compile(r"AKIA[0-9A-Z]{16}") candidates.extend(aws_pattern.findall(text)) # Common key/value patterns like api_key = "..." or "api-key: ..." # allow shorter secret values (down to 4 chars) to catch short test values kv_pattern = re.compile( r"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token|access[_-]?key|token|password|passwd|pwd)\s*[=:]+\s*['\"]?([A-Za-z0-9\-_=+/\.]{4,128})['\"]?" ) candidates.extend(m.group(1) for m in kv_pattern.finditer(text)) # Generic long hex or base64-like strings (heuristic) long_hex = re.compile(r"\b([a-f0-9]{32,128})\b", re.IGNORECASE) candidates.extend(long_hex.findall(text)) # Deduplicate while preserving order seen = set() result: List[str] = [] for c in candidates: if c and c not in seen: seen.add(c) result.append(c) return result