From 47d9c60d48598e9bac79c88714eff370d2c89b0f Mon Sep 17 00:00:00 2001 From: OG T Date: Tue, 9 Jun 2026 18:12:03 +0800 Subject: [PATCH] chore: harden security scan and gate order --- .gitea/workflows/deploy.yml | 1 + scripts/security_scan.py | 244 ++++++++++++++++++++++++++++++++++++ scripts/security_scan.sh | 8 ++ 3 files changed, 253 insertions(+) create mode 100755 scripts/security_scan.py create mode 100755 scripts/security_scan.sh diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 6d5ba6e..ba4d034 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -22,5 +22,6 @@ jobs: script: | cd /opt/agent-bounty-protocol git pull origin main + bash scripts/security_scan.sh docker compose down docker compose up -d --build diff --git a/scripts/security_scan.py b/scripts/security_scan.py new file mode 100755 index 0000000..2f26e42 --- /dev/null +++ b/scripts/security_scan.py @@ -0,0 +1,244 @@ +import re +import subprocess +from pathlib import Path + +root = Path(".").resolve() + +SENSITIVE_KEY_RE = re.compile(r"^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*[:=]\s*(.+)$") + +SENSITIVE_KEYWORDS = ( + "api_key", + "apikey", + "token", + "secret", + "password", + "passwd", + "private_key", + "webhook", + "client_secret", + "auth", + "credential", + "access_token", +) + +TELEGRAM_BOT_TOKEN_RE = re.compile(r"\b\d{6,12}:[A-Za-z0-9_-]{35,}\b") +KNOWN_TOKEN_RE = re.compile( + r"(?i)\\b(" + r"github_pat_[A-Za-z0-9_]{20,}|" + r"ghp_[A-Za-z0-9_]{30,}|" + r"gho_[A-Za-z0-9_]{20,}|" + r"glpat-[A-Za-z0-9_-]{20,}|" + r"pk_live_[A-Za-z0-9]{20,}|" + r"sk_live_[A-Za-z0-9]{20,}|" + r"sk_test_[A-Za-z0-9]{20,}|" + r"xox[baprs]-[A-Za-z0-9-]{10,}|" + r"xapp-[A-Za-z0-9]{30,}|" + r"vk_[A-Za-z0-9]{20,}|" + r"AIza[0-9A-Za-z_-]{35,}" + r")\\b" +) +JWT_RE = re.compile(r"\beyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\b") +URL_CRED_RE = re.compile(r"https?://[^/\s:@]+:[^/@\s]+@[^\s]+") + +PLACEHOLDER_MARKERS = ( + "${", + "process.env.", + "import.meta.env.", + "your-", + "your_", + " str: + result = subprocess.run( + cmd, + check=True, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + cwd=str(root), + ) + return result.stdout + + +def clean_value(raw: str) -> str: + value = raw.strip() + if "#" in value and not value.lstrip().startswith("#"): + value = value.split("#", 1)[0].rstrip() + if (len(value) >= 2) and (value[0] == value[-1]) and value[0] in ("'", '"', "`"): + return value[1:-1].strip() + return value + + +def is_placeholder(value: str) -> bool: + v = value.strip() + if not v: + return True + if v in ("\"\"", "''", "``"): + return True + if v.startswith("<") and v.endswith(">"): + return True + if v.startswith("$"): + return True + low = v.lower() + return any(marker in low for marker in PLACEHOLDER_MARKERS) + + +def is_expression(value: str) -> bool: + expr_markers = ("(", ")", "{", "}", "[", "]", "+", "-", "*", "/", " ", "|") + return any(m in value for m in expr_markers) + + +def is_likely_secret_value(value: str) -> bool: + if not value: + return False + if is_expression(value): + return False + + if TELEGRAM_BOT_TOKEN_RE.fullmatch(value): + return True + if KNOWN_TOKEN_RE.search(value): + return True + if JWT_RE.fullmatch(value): + return True + if URL_CRED_RE.search(value): + return True + + raw = value.strip().strip("'\"") + if len(raw) < 24: + return False + + if re.fullmatch(r"[A-Za-z0-9]{40,}", raw): + return True + + if re.fullmatch(r"[A-Za-z0-9_./-]{30,}", raw) and len(set(raw)) >= 12: + return True + + if re.fullmatch(r"[A-Za-z0-9+/._-]{40,}", raw) and len(set(raw)) >= 14: + return True + + return False + + +def has_sensitive_key(key: str) -> bool: + lower = key.lower() + return any(keyword in lower for keyword in SENSITIVE_KEYWORDS) + + +file_list_text = run(["git", "ls-files"]) +files = [p for p in file_list_text.splitlines() if p.strip()] + +findings_high: list[tuple[str, int, str, str]] = [] +findings_warn: list[tuple[str, int, str, str]] = [] + +for rel in files: + path = root / rel + if not path.is_file(): + continue + + rel_path = path.as_posix().lower() + if any(skip in rel_path for skip in SKIP_PATH_SUBSTRINGS): + continue + if path.suffix.lower() in SKIP_SUFFIXES: + continue + + try: + if path.stat().st_size > 300_000: + continue + except OSError: + continue + + try: + text = path.read_text(encoding="utf-8", errors="ignore") + except Exception: + continue + + for idx, line in enumerate(text.splitlines(), start=1): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + + if URL_CRED_RE.search(line): + match = URL_CRED_RE.search(line) + findings_high.append((str(path), idx, match.group(0), "embedded user:pass credentials in URL")) + continue + + token_match = ( + KNOWN_TOKEN_RE.search(line) + or TELEGRAM_BOT_TOKEN_RE.search(line) + or JWT_RE.search(line) + ) + if token_match: + findings_high.append((str(path), idx, token_match.group(0), "known credential token format")) + + if path.name.startswith(".env"): + if "=" not in line or stripped.startswith("#"): + continue + env_key, _, env_value = line.partition("=") + key = env_key.strip() + value = clean_value(env_value) + if is_placeholder(value): + continue + if has_sensitive_key(key) and is_likely_secret_value(value): + if ".example" in path.name.lower(): + findings_warn.append((str(path), idx, f"{key}=***", "environment example secret-like literal value")) + else: + findings_high.append((str(path), idx, f"{key}=***", "environment secret-like literal value")) + continue + + m = SENSITIVE_KEY_RE.match(line) + if not m: + continue + + key = m.group(1) + value = clean_value(m.group(2)) + if is_placeholder(value) or not has_sensitive_key(key): + continue + if is_likely_secret_value(value): + findings_high.append((str(path), idx, f"{key}=***", "hardcoded credential-like assignment")) + +if findings_high: + print("[security-scan] FOUND HIGH-RISK credential findings:") + for path, line_no, value, reason in findings_high: + print(f"- HIGH {path}:{line_no}: {reason}: {value}") + +if findings_warn: + print("[security-scan] WARNINGS (manual review suggested):") + for path, line_no, value, reason in findings_warn: + print(f"- WARN {path}:{line_no}: {reason}: {value}") + +if findings_high: + raise SystemExit(1) + +print("[security-scan] passed") diff --git a/scripts/security_scan.sh b/scripts/security_scan.sh new file mode 100755 index 0000000..6bccdcc --- /dev/null +++ b/scripts/security_scan.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +python3 scripts/security_scan.py