chore: harden security scan and gate order

2026-06-09 18:12:03 +08:00
parent 997e1bf520
commit 47d9c60d48
3 changed files with 253 additions and 0 deletions
--- a/scripts/security_scan.py
+++ b/scripts/security_scan.py
@@ -0,0 +1,244 @@
+import re
+import subprocess
+from pathlib import Path
+
+root = Path(".").resolve()
+
+SENSITIVE_KEY_RE = re.compile(r"^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*[:=]\s*(.+)$")
+
+SENSITIVE_KEYWORDS = (
+    "api_key",
+    "apikey",
+    "token",
+    "secret",
+    "password",
+    "passwd",
+    "private_key",
+    "webhook",
+    "client_secret",
+    "auth",
+    "credential",
+    "access_token",
+)
+
+TELEGRAM_BOT_TOKEN_RE = re.compile(r"\b\d{6,12}:[A-Za-z0-9_-]{35,}\b")
+KNOWN_TOKEN_RE = re.compile(
+    r"(?i)\\b("
+    r"github_pat_[A-Za-z0-9_]{20,}|"
+    r"ghp_[A-Za-z0-9_]{30,}|"
+    r"gho_[A-Za-z0-9_]{20,}|"
+    r"glpat-[A-Za-z0-9_-]{20,}|"
+    r"pk_live_[A-Za-z0-9]{20,}|"
+    r"sk_live_[A-Za-z0-9]{20,}|"
+    r"sk_test_[A-Za-z0-9]{20,}|"
+    r"xox[baprs]-[A-Za-z0-9-]{10,}|"
+    r"xapp-[A-Za-z0-9]{30,}|"
+    r"vk_[A-Za-z0-9]{20,}|"
+    r"AIza[0-9A-Za-z_-]{35,}"
+    r")\\b"
+)
+JWT_RE = re.compile(r"\beyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\b")
+URL_CRED_RE = re.compile(r"https?://[^/\s:@]+:[^/@\s]+@[^\s]+")
+
+PLACEHOLDER_MARKERS = (
+    "${",
+    "process.env.",
+    "import.meta.env.",
+    "your-",
+    "your_",
+    "<your",
+    "placeholder",
+    "example",
+    "sample",
+    "changeme",
+)
+
+SKIP_SUFFIXES = {
+    ".png",
+    ".jpg",
+    ".jpeg",
+    ".gif",
+    ".webp",
+    ".mp4",
+    ".mov",
+    ".zip",
+    ".jar",
+    ".so",
+    ".pdf",
+    ".md",
+    ".mdx",
+    ".rst",
+    ".txt",
+}
+
+SKIP_PATH_SUBSTRINGS = (
+    "/prisma/generated/",
+    "/runtime/",
+    "/dist/",
+    "/build/",
+    "/coverage/",
+)
+
+
+def run(cmd: list[str]) -> str:
+    result = subprocess.run(
+        cmd,
+        check=True,
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.DEVNULL,
+        cwd=str(root),
+    )
+    return result.stdout
+
+
+def clean_value(raw: str) -> str:
+    value = raw.strip()
+    if "#" in value and not value.lstrip().startswith("#"):
+        value = value.split("#", 1)[0].rstrip()
+    if (len(value) >= 2) and (value[0] == value[-1]) and value[0] in ("'", '"', "`"):
+        return value[1:-1].strip()
+    return value
+
+
+def is_placeholder(value: str) -> bool:
+    v = value.strip()
+    if not v:
+        return True
+    if v in ("\"\"", "''", "``"):
+        return True
+    if v.startswith("<") and v.endswith(">"):
+        return True
+    if v.startswith("$"):
+        return True
+    low = v.lower()
+    return any(marker in low for marker in PLACEHOLDER_MARKERS)
+
+
+def is_expression(value: str) -> bool:
+    expr_markers = ("(", ")", "{", "}", "[", "]", "+", "-", "*", "/", " ", "|")
+    return any(m in value for m in expr_markers)
+
+
+def is_likely_secret_value(value: str) -> bool:
+    if not value:
+        return False
+    if is_expression(value):
+        return False
+
+    if TELEGRAM_BOT_TOKEN_RE.fullmatch(value):
+        return True
+    if KNOWN_TOKEN_RE.search(value):
+        return True
+    if JWT_RE.fullmatch(value):
+        return True
+    if URL_CRED_RE.search(value):
+        return True
+
+    raw = value.strip().strip("'\"")
+    if len(raw) < 24:
+        return False
+
+    if re.fullmatch(r"[A-Za-z0-9]{40,}", raw):
+        return True
+
+    if re.fullmatch(r"[A-Za-z0-9_./-]{30,}", raw) and len(set(raw)) >= 12:
+        return True
+
+    if re.fullmatch(r"[A-Za-z0-9+/._-]{40,}", raw) and len(set(raw)) >= 14:
+        return True
+
+    return False
+
+
+def has_sensitive_key(key: str) -> bool:
+    lower = key.lower()
+    return any(keyword in lower for keyword in SENSITIVE_KEYWORDS)
+
+
+file_list_text = run(["git", "ls-files"])
+files = [p for p in file_list_text.splitlines() if p.strip()]
+
+findings_high: list[tuple[str, int, str, str]] = []
+findings_warn: list[tuple[str, int, str, str]] = []
+
+for rel in files:
+    path = root / rel
+    if not path.is_file():
+        continue
+
+    rel_path = path.as_posix().lower()
+    if any(skip in rel_path for skip in SKIP_PATH_SUBSTRINGS):
+        continue
+    if path.suffix.lower() in SKIP_SUFFIXES:
+        continue
+
+    try:
+        if path.stat().st_size > 300_000:
+            continue
+    except OSError:
+        continue
+
+    try:
+        text = path.read_text(encoding="utf-8", errors="ignore")
+    except Exception:
+        continue
+
+    for idx, line in enumerate(text.splitlines(), start=1):
+        stripped = line.strip()
+        if not stripped or stripped.startswith("#"):
+            continue
+
+        if URL_CRED_RE.search(line):
+            match = URL_CRED_RE.search(line)
+            findings_high.append((str(path), idx, match.group(0), "embedded user:pass credentials in URL"))
+            continue
+
+        token_match = (
+            KNOWN_TOKEN_RE.search(line)
+            or TELEGRAM_BOT_TOKEN_RE.search(line)
+            or JWT_RE.search(line)
+        )
+        if token_match:
+            findings_high.append((str(path), idx, token_match.group(0), "known credential token format"))
+
+        if path.name.startswith(".env"):
+            if "=" not in line or stripped.startswith("#"):
+                continue
+            env_key, _, env_value = line.partition("=")
+            key = env_key.strip()
+            value = clean_value(env_value)
+            if is_placeholder(value):
+                continue
+            if has_sensitive_key(key) and is_likely_secret_value(value):
+                if ".example" in path.name.lower():
+                    findings_warn.append((str(path), idx, f"{key}=***", "environment example secret-like literal value"))
+                else:
+                    findings_high.append((str(path), idx, f"{key}=***", "environment secret-like literal value"))
+            continue
+
+        m = SENSITIVE_KEY_RE.match(line)
+        if not m:
+            continue
+
+        key = m.group(1)
+        value = clean_value(m.group(2))
+        if is_placeholder(value) or not has_sensitive_key(key):
+            continue
+        if is_likely_secret_value(value):
+            findings_high.append((str(path), idx, f"{key}=***", "hardcoded credential-like assignment"))
+
+if findings_high:
+    print("[security-scan] FOUND HIGH-RISK credential findings:")
+    for path, line_no, value, reason in findings_high:
+        print(f"- HIGH {path}:{line_no}: {reason}: {value}")
+
+if findings_warn:
+    print("[security-scan] WARNINGS (manual review suggested):")
+    for path, line_no, value, reason in findings_warn:
+        print(f"- WARN {path}:{line_no}: {reason}: {value}")
+
+if findings_high:
+    raise SystemExit(1)
+
+print("[security-scan] passed")
--- a/scripts/security_scan.sh
+++ b/scripts/security_scan.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+python3 scripts/security_scan.py