Files
awoooi/scripts/ops/doc-secrets-sanity-check.py
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

128 lines
4.1 KiB
Python

#!/usr/bin/env python3
"""High-confidence secret pattern check for operational documents.
This check intentionally scans documentation and workflow metadata. It allows
documented placeholder formats such as nvapi-... or <from vault>, but blocks
likely real tokens, private keys, and long literal credentials.
"""
from __future__ import annotations
import argparse
import re
import sys
from dataclasses import dataclass
from pathlib import Path
DEFAULT_TARGETS = [Path("docs"), Path(".gitea")]
DOC_SUFFIXES = {".md", ".json", ".yml", ".yaml"}
@dataclass(frozen=True)
class SecretPattern:
name: str
regex: re.Pattern[str]
PATTERNS = [
SecretPattern("pem_private_key", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")),
SecretPattern("github_token", re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{20,}\b")),
SecretPattern("gitlab_token", re.compile(r"\bglpat-[A-Za-z0-9_-]{20,}\b")),
SecretPattern("slack_token", re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{20,}\b")),
SecretPattern("anthropic_key", re.compile(r"\bsk-ant-api03-[A-Za-z0-9_-]{20,}\b")),
SecretPattern("openai_key", re.compile(r"\bsk-(?:proj-)?[A-Za-z0-9_-]{32,}\b")),
SecretPattern("google_api_key", re.compile(r"\bAIza[0-9A-Za-z_-]{30,}\b")),
SecretPattern("nvidia_key", re.compile(r"\bnvapi-[0-9A-Za-z_-]{30,}\b")),
SecretPattern("telegram_bot_token", re.compile(r"\b\d{8,12}:[A-Za-z0-9_-]{30,}\b")),
SecretPattern("jwt", re.compile(r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b")),
SecretPattern("aws_access_key", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
SecretPattern("gitea_token_assignment", re.compile(r"\bGITEA_TOKEN\s*=\s*[\"'][A-Za-z0-9]{32,}[\"']")),
SecretPattern("authorization_token_literal", re.compile(r"\bAuthorization:\s*token\s+[A-Za-z0-9]{32,}\b", re.I)),
]
def iter_files(paths: list[Path]) -> list[Path]:
files: list[Path] = []
for path in paths:
if not path.exists():
continue
if path.is_file() and path.suffix in DOC_SUFFIXES:
files.append(path)
continue
if path.is_dir():
files.extend(sorted(p for p in path.rglob("*") if p.is_file() and p.suffix in DOC_SUFFIXES))
return sorted(set(files))
def is_placeholder(value: str) -> bool:
lower = value.lower()
placeholder_fragments = [
"...",
"<",
">",
"change_me",
"redacted",
"example",
"placeholder",
"vault-item-id",
"your_",
"${",
"$",
"新的",
"取得",
]
if any(fragment in lower for fragment in placeholder_fragments):
return True
tail = value
for prefix in ["nvapi-", "sk-ant-api03-", "sk-proj-", "sk-", "AIza"]:
if value.startswith(prefix):
tail = value[len(prefix) :]
break
if tail and set(tail.lower()) <= {"x", "0", "_", "-", "."}:
return True
return False
def masked(value: str) -> str:
if len(value) <= 14:
return value
return f"{value[:6]}...{value[-4:]}"
def main() -> int:
parser = argparse.ArgumentParser(description="Scan operational docs for likely real secrets.")
parser.add_argument("paths", nargs="*", type=Path, default=DEFAULT_TARGETS)
args = parser.parse_args()
findings: list[str] = []
scanned_files = iter_files(args.paths)
for path in scanned_files:
try:
lines = path.read_text(encoding="utf-8", errors="replace").splitlines()
except OSError as exc:
findings.append(f"{path}:0 read_error {exc}")
continue
for lineno, line in enumerate(lines, start=1):
for pattern in PATTERNS:
for match in pattern.regex.finditer(line):
value = match.group(0)
if is_placeholder(value):
continue
findings.append(f"{path}:{lineno} {pattern.name} {masked(value)}")
if findings:
print("DOC_SECRET_SANITY_BLOCKED")
for finding in findings:
print(finding)
return 1
print(f"DOC_SECRET_SANITY_OK scanned_files={len(scanned_files)}")
return 0
if __name__ == "__main__":
sys.exit(main())