Files
awoooi/scripts/security/source-control-workflow-secret-name-local-inventory.py
Your Name 9e15fd08b3
All checks were successful
CD Pipeline / tests (push) Successful in 1m39s
Code Review / ai-code-review (push) Successful in 15s
CD Pipeline / build-and-deploy (push) Successful in 5m19s
CD Pipeline / post-deploy-checks (push) Successful in 2m11s
feat(web): land iwooos security posture surfaces
2026-05-25 20:35:52 +08:00

360 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""Build local read-only workflow / secret-name inventory evidence.
The collector reads only repository files that are already present in local
working trees:
- .github/workflows/*
- .gitea/workflows/*
- CODEOWNERS / .github/CODEOWNERS
It does not call GitHub or Gitea APIs, does not read .env files, does not read
repository secret stores, and does not collect secret values. The output keeps
only secret names referenced by workflow expressions such as
`${{ secrets.NAME }}`.
"""
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
SECRET_RE = re.compile(
r"secrets(?:\.([A-Za-z_][A-Za-z0-9_]*)|\[['\"]([^'\"]+)['\"]\])"
)
EXPRESSION_RE = re.compile(r"\$\{\{(.+?)\}\}")
NAME_RE = re.compile(r"^name:\s*(.+?)\s*$", re.MULTILINE)
TOP_LEVEL_KEY_RE = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*):")
INDENTED_KEY_RE = re.compile(r"^\s{2,}([A-Za-z_][A-Za-z0-9_-]*):")
RUNS_ON_RE = re.compile(r"runs-on:\s*(.+)")
ENVIRONMENT_RE = re.compile(r"environment:\s*(.+)")
STILL_FORBIDDEN = [
"collect secret value",
"read .env or secret store",
"modify workflow",
"modify webhook",
"rotate secret",
"create GitHub repo",
"sync refs",
"switch GitHub primary",
"disable Gitea",
]
def parse_repo_spec(value: str) -> dict[str, str]:
parts = value.split("|")
if len(parts) != 6:
raise ValueError(
"--repo format must be key|path|github_repo|source_key|scope_status|risk"
)
key, path, github_repo, source_key, scope_status, risk = parts
return {
"repo_key": key,
"repo_path": str(Path(path).expanduser().resolve()),
"github_repo": github_repo,
"source_key": source_key,
"scope_status": scope_status,
"risk": risk,
}
def strip_quotes(value: str) -> str:
value = value.strip().strip(",")
if (value.startswith('"') and value.endswith('"')) or (
value.startswith("'") and value.endswith("'")
):
return value[1:-1]
return value
def parse_inline_list(value: str) -> list[str]:
value = strip_quotes(value.strip())
if value.startswith("[") and value.endswith("]"):
return [
strip_quotes(item)
for item in value[1:-1].split(",")
if strip_quotes(item)
]
return [value] if value else []
def first_scalar(pattern: re.Pattern[str], text: str, default: str = "") -> str:
match = pattern.search(text)
return strip_quotes(match.group(1)) if match else default
def extract_on_block(lines: list[str]) -> list[str]:
for index, line in enumerate(lines):
if not line.startswith("on:"):
continue
inline = line.split(":", 1)[1].strip()
if inline:
return parse_inline_list(inline)
triggers: list[str] = []
for next_line in lines[index + 1 :]:
if not next_line.strip() or next_line.lstrip().startswith("#"):
continue
if TOP_LEVEL_KEY_RE.match(next_line):
break
key_match = INDENTED_KEY_RE.match(next_line)
if key_match:
triggers.append(key_match.group(1))
return sorted(set(triggers))
return []
def extract_runs_on(text: str) -> list[str]:
labels: list[str] = []
for match in RUNS_ON_RE.finditer(text):
value = match.group(1).split("#", 1)[0].strip()
labels.extend(parse_inline_list(value))
return sorted({label for label in labels if label and "secrets." not in label})
def extract_environments(text: str) -> list[str]:
names: list[str] = []
for match in ENVIRONMENT_RE.finditer(text):
value = match.group(1).split("#", 1)[0].strip()
if value and not value.startswith("{") and "secrets." not in value:
names.extend(parse_inline_list(value))
return sorted(set(names))
def extract_secret_names(text: str) -> list[str]:
names: set[str] = set()
for expression in EXPRESSION_RE.findall(text):
if "secrets" not in expression:
continue
for match in SECRET_RE.finditer(expression):
for name in match.groups():
if name:
names.add(name)
return sorted(names)
def provider_for(path: Path) -> str:
parts = path.parts
if ".gitea" in parts:
return "gitea"
if ".github" in parts:
return "github"
return "unknown"
def workflow_files(repo: Path) -> list[Path]:
files: list[Path] = []
for base in [repo / ".gitea" / "workflows", repo / ".github" / "workflows"]:
if not base.exists():
continue
for path in sorted(base.rglob("*")):
if path.is_file() and path.suffix.lower() in {".yml", ".yaml"}:
files.append(path)
return files
def codeowners_files(repo: Path) -> list[Path]:
candidates = [
repo / "CODEOWNERS",
repo / ".github" / "CODEOWNERS",
repo / ".gitea" / "CODEOWNERS",
]
return [path for path in candidates if path.exists() and path.is_file()]
def workflow_item(repo: Path, path: Path) -> dict[str, object]:
text = path.read_text(encoding="utf-8", errors="replace")
rel = path.relative_to(repo)
return {
"provider": provider_for(path),
"workflow_file_path": str(rel),
"workflow_display_name": first_scalar(NAME_RE, text, path.stem),
"trigger_names": extract_on_block(text.splitlines()),
"runner_label_names": extract_runs_on(text),
"environment_names": extract_environments(text),
"referenced_secret_names": extract_secret_names(text),
}
def codeowners_item(repo: Path, path: Path) -> dict[str, object]:
owners: set[str] = set()
text = path.read_text(encoding="utf-8", errors="replace")
for line in text.splitlines():
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
for token in stripped.split()[1:]:
if token.startswith("@"):
owners.add(token)
return {
"codeowners_path": str(path.relative_to(repo)),
"owner_tokens": sorted(owners),
"owner_token_count": len(owners),
}
def repo_item(spec: dict[str, str]) -> dict[str, object]:
repo = Path(spec["repo_path"])
if not repo.exists():
return {
**spec,
"local_status": "missing_local_repo",
"workflow_files": [],
"codeowners_files": [],
"referenced_secret_names": [],
"runner_label_names": [],
"environment_names": [],
"api_required_lanes": [
"webhook_inventory",
"deploy_key_inventory",
"branch_protection_inventory",
"repository_secret_name_parity",
],
"still_forbidden": STILL_FORBIDDEN,
}
workflows = [workflow_item(repo, path) for path in workflow_files(repo)]
codeowners = [codeowners_item(repo, path) for path in codeowners_files(repo)]
secret_names = sorted(
{
name
for workflow in workflows
for name in workflow["referenced_secret_names"]
if isinstance(name, str)
}
)
runner_labels = sorted(
{
name
for workflow in workflows
for name in workflow["runner_label_names"]
if isinstance(name, str)
}
)
environments = sorted(
{
name
for workflow in workflows
for name in workflow["environment_names"]
if isinstance(name, str)
}
)
if workflows or codeowners:
local_status = "partial_local_evidence"
else:
local_status = "local_repo_visible_no_workflow_files"
return {
**spec,
"local_status": local_status,
"workflow_files": workflows,
"codeowners_files": codeowners,
"referenced_secret_names": secret_names,
"runner_label_names": runner_labels,
"environment_names": environments,
"api_required_lanes": [
"webhook_inventory",
"deploy_key_inventory",
"branch_protection_inventory",
"repository_secret_name_parity",
],
"still_forbidden": STILL_FORBIDDEN,
}
def build_snapshot(args: argparse.Namespace) -> dict[str, object]:
repos = [repo_item(parse_repo_spec(value)) for value in args.repo]
workflow_count = sum(len(repo["workflow_files"]) for repo in repos)
codeowners_count = sum(len(repo["codeowners_files"]) for repo in repos)
gitea_workflow_count = sum(
1
for repo in repos
for item in repo["workflow_files"]
if item["provider"] == "gitea"
)
github_workflow_count = sum(
1
for repo in repos
for item in repo["workflow_files"]
if item["provider"] == "github"
)
secret_names = sorted(
{
name
for repo in repos
for name in repo["referenced_secret_names"]
if isinstance(name, str)
}
)
runner_labels = sorted(
{
name
for repo in repos
for name in repo["runner_label_names"]
if isinstance(name, str)
}
)
local_evidence_repos = [
repo for repo in repos if repo["local_status"] == "partial_local_evidence"
]
return {
"schema_version": "source_control_workflow_secret_name_local_evidence_v1",
"status": "draft_partial_local_evidence",
"date": args.date,
"mode": "local_read_only_redacted_inventory",
"runtime_execution_authorized": False,
"source_contract": "source_control_workflow_secret_name_inventory_v1",
"summary": {
"candidate_repo_count": len(repos),
"local_repo_visible_count": sum(
1 for repo in repos if repo["local_status"] != "missing_local_repo"
),
"local_evidence_repo_count": len(local_evidence_repos),
"workflow_file_count": workflow_count,
"gitea_workflow_file_count": gitea_workflow_count,
"github_workflow_file_count": github_workflow_count,
"codeowners_file_count": codeowners_count,
"unique_secret_name_count": len(secret_names),
"runner_label_count": len(runner_labels),
"secret_value_collection_allowed": False,
"secret_value_detected": False,
"runtime_actions_authorized": False,
"action_buttons_allowed": False,
},
"unique_secret_names": secret_names,
"runner_label_names": runner_labels,
"repos": repos,
"redaction_rules": [
"只保存 workflow 內引用的 secret 名稱,不保存 secret value。",
"不讀取 .env、secrets、private key、runner registration token 或 webhook secret。",
"不呼叫 GitHub / Gitea API因此 webhook、deploy key、branch protection 與 repository secret parity 仍需後續 redacted export 或 read-only API evidence。",
"任何含 raw secret/token/private key 的 payload 都必須拒收並進 quarantine。",
],
"forbidden_actions": STILL_FORBIDDEN,
}
def write_json(path: Path, payload: dict[str, object]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(payload, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--date", default="2026-05-13")
parser.add_argument("--repo", action="append", required=True)
parser.add_argument("--output", required=True)
args = parser.parse_args()
write_json(Path(args.output), build_snapshot(args))
return 0
if __name__ == "__main__":
raise SystemExit(main())