Files
awoooi/scripts/security/source-control-ref-detail-diff.py

321 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""產生 Gitea / GitHub branch/tag 明細 diff。
此工具只執行 read-only `git ls-remote --heads/--tags`,不 fetch、不 push、
不寫入任一 remote。輸出前會遮蔽 remote URL 內的帳密。
"""
from __future__ import annotations
import argparse
import json
import re
import subprocess
from pathlib import Path
from urllib.parse import urlsplit, urlunsplit
SHA_RE = re.compile(r"^[0-9a-fA-F]{7,40}$")
STILL_FORBIDDEN = [
"fetch",
"push refs",
"force push",
"delete refs",
"create GitHub repo",
"change repo visibility",
"switch GitHub primary",
"disable Gitea",
"move secret values",
]
def run_git(repo: Path, args: list[str]) -> subprocess.CompletedProcess[str]:
return subprocess.run(
["git", *args],
cwd=repo,
check=False,
capture_output=True,
text=True,
)
def require_git(repo: Path, args: list[str]) -> str:
result = run_git(repo, args)
if result.returncode != 0:
stderr = result.stderr.strip() or result.stdout.strip()
raise RuntimeError(f"{repo}: git {' '.join(args)} failed: {stderr}")
return result.stdout
def redact_url(value: str) -> str:
if "://" in value:
parts = urlsplit(value)
netloc = parts.netloc.split("@", 1)[-1]
return urlunsplit((parts.scheme, netloc, parts.path, parts.query, parts.fragment))
if "@" in value and ":" in value.split("@", 1)[1]:
return value.split("@", 1)[1]
return value
def repo_slug_from_url(value: str) -> str:
redacted = redact_url(value).removesuffix("/")
if "://" in redacted:
path = urlsplit(redacted).path.strip("/")
elif ":" in redacted:
path = redacted.split(":", 1)[1].strip("/")
else:
path = redacted.strip("/")
return path.removesuffix(".git") or redacted
def remote_url(repo: Path, remote: str) -> str:
return redact_url(require_git(repo, ["remote", "get-url", remote]).strip())
def parse_ls_remote(output: str, prefix: str) -> dict[str, str]:
refs: dict[str, str] = {}
for line in output.splitlines():
if not line.strip():
continue
try:
sha, ref = line.split(None, 1)
except ValueError:
continue
if not SHA_RE.match(sha) or not ref.startswith(prefix):
continue
refs[ref.removeprefix(prefix)] = sha
return refs
def refs_for_remote(
repo: Path,
remote: str,
ignored_branches: set[str],
) -> tuple[dict[str, str], dict[str, str]]:
heads = parse_ls_remote(require_git(repo, ["ls-remote", "--heads", remote]), "refs/heads/")
for branch in ignored_branches:
heads.pop(branch, None)
raw_tags = parse_ls_remote(require_git(repo, ["ls-remote", "--tags", remote]), "refs/tags/")
tags = {
name.removesuffix("^{}"): sha
for name, sha in raw_tags.items()
if not name.endswith("^{}")
}
peeled = {
name.removesuffix("^{}"): sha
for name, sha in raw_tags.items()
if name.endswith("^{}")
}
tags.update({name: sha for name, sha in peeled.items()})
return heads, tags
def ref_items(refs: dict[str, str], names: list[str]) -> list[dict[str, str]]:
return [{"name": name, "sha": refs[name]} for name in names]
def compare_refs(gitea_refs: dict[str, str], github_refs: dict[str, str]) -> dict[str, object]:
gitea_names = set(gitea_refs)
github_names = set(github_refs)
common = sorted(gitea_names & github_names)
only_gitea = sorted(gitea_names - github_names)
only_github = sorted(github_names - gitea_names)
mismatches = [
{
"name": name,
"gitea_sha": gitea_refs[name],
"github_sha": github_refs[name],
}
for name in common
if gitea_refs[name] != github_refs[name]
]
matching = [name for name in common if gitea_refs[name] == github_refs[name]]
return {
"gitea_count": len(gitea_refs),
"github_count": len(github_refs),
"only_gitea_count": len(only_gitea),
"only_github_count": len(only_github),
"sha_mismatch_count": len(mismatches),
"matching_count": len(matching),
"only_gitea": ref_items(gitea_refs, only_gitea),
"only_github": ref_items(github_refs, only_github),
"sha_mismatch": mismatches,
"matching": ref_items(gitea_refs, matching),
}
def repo_status(branch_diff: dict[str, object], tag_diff: dict[str, object]) -> tuple[str, str]:
reasons: list[str] = []
if branch_diff["only_gitea_count"] or branch_diff["only_github_count"] or branch_diff["sha_mismatch_count"]:
reasons.append("branches 尚未完全對齊")
if tag_diff["only_gitea_count"] or tag_diff["only_github_count"] or tag_diff["sha_mismatch_count"]:
reasons.append("tags 尚未完全對齊")
return ("blocked", "".join(reasons)) if reasons else ("verified", "")
def parse_repo_spec(value: str) -> tuple[str, Path, str, str]:
parts = value.split("=", 3)
if len(parts) != 4:
raise ValueError("--repo 格式必須是 key=/path=gitea_remote=github_remote")
key, path, gitea_remote, github_remote = parts
return key, Path(path).expanduser().resolve(), gitea_remote, github_remote
def build_repo_item(spec: str, ignored_branches: set[str]) -> dict[str, object]:
key, repo, gitea_remote, github_remote = parse_repo_spec(spec)
gitea_url = remote_url(repo, gitea_remote)
github_url = remote_url(repo, github_remote)
gitea_heads, gitea_tags = refs_for_remote(repo, gitea_remote, ignored_branches)
github_heads, github_tags = refs_for_remote(repo, github_remote, ignored_branches)
branch_diff = compare_refs(gitea_heads, github_heads)
tag_diff = compare_refs(gitea_tags, github_tags)
status, reason = repo_status(branch_diff, tag_diff)
return {
"repo_key": key,
"repo_path": str(repo),
"gitea_remote": gitea_remote,
"github_remote": github_remote,
"gitea_url_redacted": gitea_url,
"github_url_redacted": github_url,
"gitea_repo": repo_slug_from_url(gitea_url),
"github_repo": repo_slug_from_url(github_url),
"status": status,
"blocking_reason": reason,
"branch_diff": branch_diff,
"tag_diff": tag_diff,
"still_forbidden": STILL_FORBIDDEN,
}
def build_snapshot(args: argparse.Namespace) -> dict[str, object]:
ignored_branches = set(args.ignore_branch or [])
repos = [build_repo_item(spec, ignored_branches) for spec in args.repo]
return {
"schema_version": "source_control_ref_detail_diff_v1",
"status": "draft_blocked",
"date": args.date,
"default_mode": "read_only_diff",
"ignored_branches": sorted(ignored_branches),
"repo_count": len(repos),
"repos": repos,
}
def short_sha(value: str) -> str:
return value[:8] if value else ""
def write_ref_list(lines: list[str], title: str, refs: list[dict[str, str]], *, limit: int) -> None:
lines.extend([f"#### {title}", ""])
if not refs:
lines.extend(["無。", ""])
return
visible = refs if limit <= 0 else refs[:limit]
for item in visible:
lines.append(f"- `{item['name']}` @ `{short_sha(item['sha'])}`")
if limit > 0 and len(refs) > limit:
lines.append(f"- 另有 `{len(refs) - limit}` 筆,完整清單見 JSON snapshot。")
lines.append("")
def write_markdown(snapshot: dict[str, object], path: Path, *, list_limit: int) -> None:
lines = [
"# Source Control Branch / Tag Detail Diff",
"",
"| 項目 | 內容 |",
"|------|------|",
f"| 日期 | {snapshot['date']} |",
f"| 狀態 | `{snapshot['status']}` |",
f"| 預設模式 | `{snapshot['default_mode']}` |",
f"| ignored branches | `{', '.join(snapshot['ignored_branches']) or ''}` |",
f"| repo count | {snapshot['repo_count']} |",
"",
"## 0. 核心結論",
"",
"本檔是 read-only refs 明細 diff不是同步腳本。任何 refs sync、GitHub primary 切換、repo 建立或 visibility 修改都仍需單一 repo 人工批准。",
"",
"## 1. 摘要",
"",
"| Repo | Status | Branch only Gitea | Branch only GitHub | Branch SHA diff | Tag only Gitea | Tag only GitHub | Tag SHA diff |",
"|------|--------|-------------------|--------------------|-----------------|----------------|-----------------|--------------|",
]
for repo in snapshot["repos"]:
branch = repo["branch_diff"]
tag = repo["tag_diff"]
lines.append(
"| "
+ " | ".join(
[
f"`{repo['gitea_repo']} -> {repo['github_repo']}`",
f"`{repo['status']}`",
f"`{branch['only_gitea_count']}`",
f"`{branch['only_github_count']}`",
f"`{branch['sha_mismatch_count']}`",
f"`{tag['only_gitea_count']}`",
f"`{tag['only_github_count']}`",
f"`{tag['sha_mismatch_count']}`",
]
)
+ " |"
)
lines.extend(["", "## 2. Repo 明細", ""])
for repo in snapshot["repos"]:
branch = repo["branch_diff"]
tag = repo["tag_diff"]
lines.extend(
[
f"### {repo['gitea_repo']} -> {repo['github_repo']}",
"",
f"- Status`{repo['status']}`",
f"- Blocking reason{repo['blocking_reason'] or ''}",
f"- Gitea URL`{repo['gitea_url_redacted']}`",
f"- GitHub URL`{repo['github_url_redacted']}`",
"",
"#### Branch SHA 不一致",
"",
]
)
if branch["sha_mismatch"]:
for item in branch["sha_mismatch"]:
lines.append(
f"- `{item['name']}`Gitea `{short_sha(item['gitea_sha'])}` / GitHub `{short_sha(item['github_sha'])}`"
)
lines.append("")
else:
lines.extend(["無。", ""])
write_ref_list(lines, "Branch 只在 Gitea", branch["only_gitea"], limit=list_limit)
write_ref_list(lines, "Branch 只在 GitHub", branch["only_github"], limit=list_limit)
write_ref_list(lines, "Tag 只在 Gitea", tag["only_gitea"], limit=list_limit)
write_ref_list(lines, "Tag 只在 GitHub", tag["only_github"], limit=list_limit)
lines.extend(["#### 仍然禁止", ""])
for value in repo["still_forbidden"]:
lines.append(f"- {value}")
lines.append("")
path.write_text("\n".join(lines), encoding="utf-8")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--date", required=True)
parser.add_argument("--repo", action="append", required=True)
parser.add_argument("--ignore-branch", action="append", default=[])
parser.add_argument("--output-json", required=True)
parser.add_argument("--output-md", required=True)
parser.add_argument("--md-list-limit", type=int, default=50)
args = parser.parse_args()
snapshot = build_snapshot(args)
Path(args.output_json).write_text(
json.dumps(snapshot, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
write_markdown(snapshot, Path(args.output_md), list_limit=args.md_list_limit)
print(f"OK source-control ref detail diff repos={snapshot['repo_count']}")
return 0
if __name__ == "__main__":
raise SystemExit(main())