chore: harden security scan and gate order
Some checks failed
Deploy to 110 WOOO Server / deploy (push) Failing after 6s

This commit is contained in:
OG T
2026-06-09 18:12:03 +08:00
parent 997e1bf520
commit 47d9c60d48
3 changed files with 253 additions and 0 deletions

244
scripts/security_scan.py Executable file
View File

@@ -0,0 +1,244 @@
import re
import subprocess
from pathlib import Path
root = Path(".").resolve()
SENSITIVE_KEY_RE = re.compile(r"^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*[:=]\s*(.+)$")
SENSITIVE_KEYWORDS = (
"api_key",
"apikey",
"token",
"secret",
"password",
"passwd",
"private_key",
"webhook",
"client_secret",
"auth",
"credential",
"access_token",
)
TELEGRAM_BOT_TOKEN_RE = re.compile(r"\b\d{6,12}:[A-Za-z0-9_-]{35,}\b")
KNOWN_TOKEN_RE = re.compile(
r"(?i)\\b("
r"github_pat_[A-Za-z0-9_]{20,}|"
r"ghp_[A-Za-z0-9_]{30,}|"
r"gho_[A-Za-z0-9_]{20,}|"
r"glpat-[A-Za-z0-9_-]{20,}|"
r"pk_live_[A-Za-z0-9]{20,}|"
r"sk_live_[A-Za-z0-9]{20,}|"
r"sk_test_[A-Za-z0-9]{20,}|"
r"xox[baprs]-[A-Za-z0-9-]{10,}|"
r"xapp-[A-Za-z0-9]{30,}|"
r"vk_[A-Za-z0-9]{20,}|"
r"AIza[0-9A-Za-z_-]{35,}"
r")\\b"
)
JWT_RE = re.compile(r"\beyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\b")
URL_CRED_RE = re.compile(r"https?://[^/\s:@]+:[^/@\s]+@[^\s]+")
PLACEHOLDER_MARKERS = (
"${",
"process.env.",
"import.meta.env.",
"your-",
"your_",
"<your",
"placeholder",
"example",
"sample",
"changeme",
)
SKIP_SUFFIXES = {
".png",
".jpg",
".jpeg",
".gif",
".webp",
".mp4",
".mov",
".zip",
".jar",
".so",
".pdf",
".md",
".mdx",
".rst",
".txt",
}
SKIP_PATH_SUBSTRINGS = (
"/prisma/generated/",
"/runtime/",
"/dist/",
"/build/",
"/coverage/",
)
def run(cmd: list[str]) -> str:
result = subprocess.run(
cmd,
check=True,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
cwd=str(root),
)
return result.stdout
def clean_value(raw: str) -> str:
value = raw.strip()
if "#" in value and not value.lstrip().startswith("#"):
value = value.split("#", 1)[0].rstrip()
if (len(value) >= 2) and (value[0] == value[-1]) and value[0] in ("'", '"', "`"):
return value[1:-1].strip()
return value
def is_placeholder(value: str) -> bool:
v = value.strip()
if not v:
return True
if v in ("\"\"", "''", "``"):
return True
if v.startswith("<") and v.endswith(">"):
return True
if v.startswith("$"):
return True
low = v.lower()
return any(marker in low for marker in PLACEHOLDER_MARKERS)
def is_expression(value: str) -> bool:
expr_markers = ("(", ")", "{", "}", "[", "]", "+", "-", "*", "/", " ", "|")
return any(m in value for m in expr_markers)
def is_likely_secret_value(value: str) -> bool:
if not value:
return False
if is_expression(value):
return False
if TELEGRAM_BOT_TOKEN_RE.fullmatch(value):
return True
if KNOWN_TOKEN_RE.search(value):
return True
if JWT_RE.fullmatch(value):
return True
if URL_CRED_RE.search(value):
return True
raw = value.strip().strip("'\"")
if len(raw) < 24:
return False
if re.fullmatch(r"[A-Za-z0-9]{40,}", raw):
return True
if re.fullmatch(r"[A-Za-z0-9_./-]{30,}", raw) and len(set(raw)) >= 12:
return True
if re.fullmatch(r"[A-Za-z0-9+/._-]{40,}", raw) and len(set(raw)) >= 14:
return True
return False
def has_sensitive_key(key: str) -> bool:
lower = key.lower()
return any(keyword in lower for keyword in SENSITIVE_KEYWORDS)
file_list_text = run(["git", "ls-files"])
files = [p for p in file_list_text.splitlines() if p.strip()]
findings_high: list[tuple[str, int, str, str]] = []
findings_warn: list[tuple[str, int, str, str]] = []
for rel in files:
path = root / rel
if not path.is_file():
continue
rel_path = path.as_posix().lower()
if any(skip in rel_path for skip in SKIP_PATH_SUBSTRINGS):
continue
if path.suffix.lower() in SKIP_SUFFIXES:
continue
try:
if path.stat().st_size > 300_000:
continue
except OSError:
continue
try:
text = path.read_text(encoding="utf-8", errors="ignore")
except Exception:
continue
for idx, line in enumerate(text.splitlines(), start=1):
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
if URL_CRED_RE.search(line):
match = URL_CRED_RE.search(line)
findings_high.append((str(path), idx, match.group(0), "embedded user:pass credentials in URL"))
continue
token_match = (
KNOWN_TOKEN_RE.search(line)
or TELEGRAM_BOT_TOKEN_RE.search(line)
or JWT_RE.search(line)
)
if token_match:
findings_high.append((str(path), idx, token_match.group(0), "known credential token format"))
if path.name.startswith(".env"):
if "=" not in line or stripped.startswith("#"):
continue
env_key, _, env_value = line.partition("=")
key = env_key.strip()
value = clean_value(env_value)
if is_placeholder(value):
continue
if has_sensitive_key(key) and is_likely_secret_value(value):
if ".example" in path.name.lower():
findings_warn.append((str(path), idx, f"{key}=***", "environment example secret-like literal value"))
else:
findings_high.append((str(path), idx, f"{key}=***", "environment secret-like literal value"))
continue
m = SENSITIVE_KEY_RE.match(line)
if not m:
continue
key = m.group(1)
value = clean_value(m.group(2))
if is_placeholder(value) or not has_sensitive_key(key):
continue
if is_likely_secret_value(value):
findings_high.append((str(path), idx, f"{key}=***", "hardcoded credential-like assignment"))
if findings_high:
print("[security-scan] FOUND HIGH-RISK credential findings:")
for path, line_no, value, reason in findings_high:
print(f"- HIGH {path}:{line_no}: {reason}: {value}")
if findings_warn:
print("[security-scan] WARNINGS (manual review suggested):")
for path, line_no, value, reason in findings_warn:
print(f"- WARN {path}:{line_no}: {reason}: {value}")
if findings_high:
raise SystemExit(1)
print("[security-scan] passed")

8
scripts/security_scan.sh Executable file
View File

@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT_DIR"
python3 scripts/security_scan.py