128 lines
4.1 KiB
Python
128 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
|
"""High-confidence secret pattern check for operational documents.
|
|
|
|
This check intentionally scans documentation and workflow metadata. It allows
|
|
documented placeholder formats such as nvapi-... or <from vault>, but blocks
|
|
likely real tokens, private keys, and long literal credentials.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import re
|
|
import sys
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
|
|
DEFAULT_TARGETS = [Path("docs"), Path(".gitea")]
|
|
DOC_SUFFIXES = {".md", ".json", ".yml", ".yaml"}
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class SecretPattern:
|
|
name: str
|
|
regex: re.Pattern[str]
|
|
|
|
|
|
PATTERNS = [
|
|
SecretPattern("pem_private_key", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")),
|
|
SecretPattern("github_token", re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{20,}\b")),
|
|
SecretPattern("gitlab_token", re.compile(r"\bglpat-[A-Za-z0-9_-]{20,}\b")),
|
|
SecretPattern("slack_token", re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{20,}\b")),
|
|
SecretPattern("anthropic_key", re.compile(r"\bsk-ant-api03-[A-Za-z0-9_-]{20,}\b")),
|
|
SecretPattern("openai_key", re.compile(r"\bsk-(?:proj-)?[A-Za-z0-9_-]{32,}\b")),
|
|
SecretPattern("google_api_key", re.compile(r"\bAIza[0-9A-Za-z_-]{30,}\b")),
|
|
SecretPattern("nvidia_key", re.compile(r"\bnvapi-[0-9A-Za-z_-]{30,}\b")),
|
|
SecretPattern("telegram_bot_token", re.compile(r"\b\d{8,12}:[A-Za-z0-9_-]{30,}\b")),
|
|
SecretPattern("jwt", re.compile(r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b")),
|
|
SecretPattern("aws_access_key", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
|
|
SecretPattern("gitea_token_assignment", re.compile(r"\bGITEA_TOKEN\s*=\s*[\"'][A-Za-z0-9]{32,}[\"']")),
|
|
SecretPattern("authorization_token_literal", re.compile(r"\bAuthorization:\s*token\s+[A-Za-z0-9]{32,}\b", re.I)),
|
|
]
|
|
|
|
|
|
def iter_files(paths: list[Path]) -> list[Path]:
|
|
files: list[Path] = []
|
|
for path in paths:
|
|
if not path.exists():
|
|
continue
|
|
if path.is_file() and path.suffix in DOC_SUFFIXES:
|
|
files.append(path)
|
|
continue
|
|
if path.is_dir():
|
|
files.extend(sorted(p for p in path.rglob("*") if p.is_file() and p.suffix in DOC_SUFFIXES))
|
|
return sorted(set(files))
|
|
|
|
|
|
def is_placeholder(value: str) -> bool:
|
|
lower = value.lower()
|
|
placeholder_fragments = [
|
|
"...",
|
|
"<",
|
|
">",
|
|
"change_me",
|
|
"redacted",
|
|
"example",
|
|
"placeholder",
|
|
"vault-item-id",
|
|
"your_",
|
|
"${",
|
|
"$",
|
|
"新的",
|
|
"取得",
|
|
]
|
|
if any(fragment in lower for fragment in placeholder_fragments):
|
|
return True
|
|
|
|
tail = value
|
|
for prefix in ["nvapi-", "sk-ant-api03-", "sk-proj-", "sk-", "AIza"]:
|
|
if value.startswith(prefix):
|
|
tail = value[len(prefix) :]
|
|
break
|
|
if tail and set(tail.lower()) <= {"x", "0", "_", "-", "."}:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def masked(value: str) -> str:
|
|
if len(value) <= 14:
|
|
return value
|
|
return f"{value[:6]}...{value[-4:]}"
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="Scan operational docs for likely real secrets.")
|
|
parser.add_argument("paths", nargs="*", type=Path, default=DEFAULT_TARGETS)
|
|
args = parser.parse_args()
|
|
|
|
findings: list[str] = []
|
|
scanned_files = iter_files(args.paths)
|
|
for path in scanned_files:
|
|
try:
|
|
lines = path.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
except OSError as exc:
|
|
findings.append(f"{path}:0 read_error {exc}")
|
|
continue
|
|
for lineno, line in enumerate(lines, start=1):
|
|
for pattern in PATTERNS:
|
|
for match in pattern.regex.finditer(line):
|
|
value = match.group(0)
|
|
if is_placeholder(value):
|
|
continue
|
|
findings.append(f"{path}:{lineno} {pattern.name} {masked(value)}")
|
|
|
|
if findings:
|
|
print("DOC_SECRET_SANITY_BLOCKED")
|
|
for finding in findings:
|
|
print(finding)
|
|
return 1
|
|
|
|
print(f"DOC_SECRET_SANITY_OK scanned_files={len(scanned_files)}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|