250 lines
6.3 KiB
Python
Executable File
250 lines
6.3 KiB
Python
Executable File
import re
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
root = Path(".").resolve()
|
|
|
|
SENSITIVE_KEY_RE = re.compile(r"^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*[:=]\s*(.+)$")
|
|
|
|
SENSITIVE_KEYWORDS = (
|
|
"api_key",
|
|
"apikey",
|
|
"token",
|
|
"secret",
|
|
"password",
|
|
"passwd",
|
|
"private_key",
|
|
"webhook",
|
|
"client_secret",
|
|
"auth",
|
|
"credential",
|
|
"access_token",
|
|
)
|
|
|
|
TELEGRAM_BOT_TOKEN_RE = re.compile(r"\b\d{6,12}:[A-Za-z0-9_-]{35,}\b")
|
|
KNOWN_TOKEN_RE = re.compile(
|
|
r"(?i)\\b("
|
|
r"github_pat_[A-Za-z0-9_]{20,}|"
|
|
r"ghp_[A-Za-z0-9_]{30,}|"
|
|
r"gho_[A-Za-z0-9_]{20,}|"
|
|
r"glpat-[A-Za-z0-9_-]{20,}|"
|
|
r"pk_live_[A-Za-z0-9]{20,}|"
|
|
r"sk_live_[A-Za-z0-9]{20,}|"
|
|
r"sk_test_[A-Za-z0-9]{20,}|"
|
|
r"xox[baprs]-[A-Za-z0-9-]{10,}|"
|
|
r"xapp-[A-Za-z0-9]{30,}|"
|
|
r"vk_[A-Za-z0-9]{20,}|"
|
|
r"AIza[0-9A-Za-z_-]{35,}"
|
|
r")\\b"
|
|
)
|
|
JWT_RE = re.compile(r"\beyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\b")
|
|
URL_CRED_RE = re.compile(r"https?://[^/\s:@]+:[^/@\s]+@[^\s]+")
|
|
|
|
PLACEHOLDER_MARKERS = (
|
|
"${",
|
|
"process.env.",
|
|
"import.meta.env.",
|
|
"your-",
|
|
"your_",
|
|
"<your",
|
|
"placeholder",
|
|
"example",
|
|
"sample",
|
|
"changeme",
|
|
)
|
|
|
|
SKIP_SUFFIXES = {
|
|
".png",
|
|
".jpg",
|
|
".jpeg",
|
|
".gif",
|
|
".webp",
|
|
".ico",
|
|
".mp4",
|
|
".mov",
|
|
".zip",
|
|
".jar",
|
|
".pyc",
|
|
".wasm",
|
|
".node",
|
|
".dylib",
|
|
".so",
|
|
".pdf",
|
|
".md",
|
|
".mdx",
|
|
".rst",
|
|
".txt",
|
|
}
|
|
|
|
SKIP_PATH_SUBSTRINGS = (
|
|
"/prisma/generated/",
|
|
"/runtime/",
|
|
"/dist/",
|
|
"/build/",
|
|
"/coverage/",
|
|
)
|
|
|
|
|
|
def run(cmd: list[str]) -> str:
|
|
result = subprocess.run(
|
|
cmd,
|
|
check=True,
|
|
text=True,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.DEVNULL,
|
|
cwd=str(root),
|
|
)
|
|
return result.stdout
|
|
|
|
|
|
def clean_value(raw: str) -> str:
|
|
value = raw.strip()
|
|
if "#" in value and not value.lstrip().startswith("#"):
|
|
value = value.split("#", 1)[0].rstrip()
|
|
if (len(value) >= 2) and (value[0] == value[-1]) and value[0] in ("'", '"', "`"):
|
|
return value[1:-1].strip()
|
|
return value
|
|
|
|
|
|
def is_placeholder(value: str) -> bool:
|
|
v = value.strip()
|
|
if not v:
|
|
return True
|
|
if v in ("\"\"", "''", "``"):
|
|
return True
|
|
if v.startswith("<") and v.endswith(">"):
|
|
return True
|
|
if v.startswith("$"):
|
|
return True
|
|
low = v.lower()
|
|
return any(marker in low for marker in PLACEHOLDER_MARKERS)
|
|
|
|
|
|
def is_expression(value: str) -> bool:
|
|
expr_markers = ("(", ")", "{", "}", "[", "]", "+", "-", "*", "/", " ", "|")
|
|
return any(m in value for m in expr_markers)
|
|
|
|
|
|
def is_likely_secret_value(value: str) -> bool:
|
|
if not value:
|
|
return False
|
|
if is_expression(value):
|
|
return False
|
|
|
|
if TELEGRAM_BOT_TOKEN_RE.fullmatch(value):
|
|
return True
|
|
if KNOWN_TOKEN_RE.search(value):
|
|
return True
|
|
if JWT_RE.fullmatch(value):
|
|
return True
|
|
if URL_CRED_RE.search(value):
|
|
return True
|
|
|
|
raw = value.strip().strip("'\"")
|
|
if len(raw) < 24:
|
|
return False
|
|
|
|
if re.fullmatch(r"[A-Za-z0-9]{40,}", raw):
|
|
return True
|
|
|
|
if re.fullmatch(r"[A-Za-z0-9_./-]{30,}", raw) and len(set(raw)) >= 12:
|
|
return True
|
|
|
|
if re.fullmatch(r"[A-Za-z0-9+/._-]{40,}", raw) and len(set(raw)) >= 14:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def has_sensitive_key(key: str) -> bool:
|
|
lower = key.lower()
|
|
return any(keyword in lower for keyword in SENSITIVE_KEYWORDS)
|
|
|
|
|
|
file_list_text = run(["git", "ls-files"])
|
|
files = [p for p in file_list_text.splitlines() if p.strip()]
|
|
|
|
findings_high: list[tuple[str, int, str, str]] = []
|
|
findings_warn: list[tuple[str, int, str, str]] = []
|
|
|
|
for rel in files:
|
|
path = root / rel
|
|
if not path.is_file():
|
|
continue
|
|
|
|
rel_path = path.as_posix().lower()
|
|
if any(skip in rel_path for skip in SKIP_PATH_SUBSTRINGS):
|
|
continue
|
|
if path.suffix.lower() in SKIP_SUFFIXES:
|
|
continue
|
|
|
|
try:
|
|
if path.stat().st_size > 300_000:
|
|
continue
|
|
except OSError:
|
|
continue
|
|
|
|
try:
|
|
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
except Exception:
|
|
continue
|
|
|
|
for idx, line in enumerate(text.splitlines(), start=1):
|
|
stripped = line.strip()
|
|
if not stripped or stripped.startswith("#"):
|
|
continue
|
|
|
|
if URL_CRED_RE.search(line):
|
|
match = URL_CRED_RE.search(line)
|
|
findings_high.append((str(path), idx, match.group(0), "embedded user:pass credentials in URL"))
|
|
continue
|
|
|
|
token_match = (
|
|
KNOWN_TOKEN_RE.search(line)
|
|
or TELEGRAM_BOT_TOKEN_RE.search(line)
|
|
or JWT_RE.search(line)
|
|
)
|
|
if token_match:
|
|
findings_high.append((str(path), idx, token_match.group(0), "known credential token format"))
|
|
|
|
if path.name.startswith(".env"):
|
|
if "=" not in line or stripped.startswith("#"):
|
|
continue
|
|
env_key, _, env_value = line.partition("=")
|
|
key = env_key.strip()
|
|
value = clean_value(env_value)
|
|
if is_placeholder(value):
|
|
continue
|
|
if has_sensitive_key(key) and is_likely_secret_value(value):
|
|
if ".example" in path.name.lower():
|
|
findings_warn.append((str(path), idx, f"{key}=***", "environment example secret-like literal value"))
|
|
else:
|
|
findings_high.append((str(path), idx, f"{key}=***", "environment secret-like literal value"))
|
|
continue
|
|
|
|
m = SENSITIVE_KEY_RE.match(line)
|
|
if not m:
|
|
continue
|
|
|
|
key = m.group(1)
|
|
value = clean_value(m.group(2))
|
|
if is_placeholder(value) or not has_sensitive_key(key):
|
|
continue
|
|
if is_likely_secret_value(value):
|
|
findings_high.append((str(path), idx, f"{key}=***", "hardcoded credential-like assignment"))
|
|
|
|
if findings_high:
|
|
print("[security-scan] FOUND HIGH-RISK credential findings:")
|
|
for path, line_no, value, reason in findings_high:
|
|
print(f"- HIGH {path}:{line_no}: {reason}: {value}")
|
|
|
|
if findings_warn:
|
|
print("[security-scan] WARNINGS (manual review suggested):")
|
|
for path, line_no, value, reason in findings_warn:
|
|
print(f"- WARN {path}:{line_no}: {reason}: {value}")
|
|
|
|
if findings_high:
|
|
raise SystemExit(1)
|
|
|
|
print("[security-scan] passed")
|