docs(security): add source control ref detail diff [skip ci]

This commit is contained in:
Your Name
2026-05-13 09:16:31 +08:00
parent b63c6f9b79
commit 611093de5b
15 changed files with 1546 additions and 27 deletions

View File

@@ -0,0 +1,320 @@
#!/usr/bin/env python3
"""產生 Gitea / GitHub branch/tag 明細 diff。
此工具只執行 read-only `git ls-remote --heads/--tags`,不 fetch、不 push、
不寫入任一 remote。輸出前會遮蔽 remote URL 內的帳密。
"""
from __future__ import annotations
import argparse
import json
import re
import subprocess
from pathlib import Path
from urllib.parse import urlsplit, urlunsplit
SHA_RE = re.compile(r"^[0-9a-fA-F]{7,40}$")
STILL_FORBIDDEN = [
"fetch",
"push refs",
"force push",
"delete refs",
"create GitHub repo",
"change repo visibility",
"switch GitHub primary",
"disable Gitea",
"move secret values",
]
def run_git(repo: Path, args: list[str]) -> subprocess.CompletedProcess[str]:
return subprocess.run(
["git", *args],
cwd=repo,
check=False,
capture_output=True,
text=True,
)
def require_git(repo: Path, args: list[str]) -> str:
result = run_git(repo, args)
if result.returncode != 0:
stderr = result.stderr.strip() or result.stdout.strip()
raise RuntimeError(f"{repo}: git {' '.join(args)} failed: {stderr}")
return result.stdout
def redact_url(value: str) -> str:
if "://" in value:
parts = urlsplit(value)
netloc = parts.netloc.split("@", 1)[-1]
return urlunsplit((parts.scheme, netloc, parts.path, parts.query, parts.fragment))
if "@" in value and ":" in value.split("@", 1)[1]:
return value.split("@", 1)[1]
return value
def repo_slug_from_url(value: str) -> str:
redacted = redact_url(value).removesuffix("/")
if "://" in redacted:
path = urlsplit(redacted).path.strip("/")
elif ":" in redacted:
path = redacted.split(":", 1)[1].strip("/")
else:
path = redacted.strip("/")
return path.removesuffix(".git") or redacted
def remote_url(repo: Path, remote: str) -> str:
return redact_url(require_git(repo, ["remote", "get-url", remote]).strip())
def parse_ls_remote(output: str, prefix: str) -> dict[str, str]:
refs: dict[str, str] = {}
for line in output.splitlines():
if not line.strip():
continue
try:
sha, ref = line.split(None, 1)
except ValueError:
continue
if not SHA_RE.match(sha) or not ref.startswith(prefix):
continue
refs[ref.removeprefix(prefix)] = sha
return refs
def refs_for_remote(
repo: Path,
remote: str,
ignored_branches: set[str],
) -> tuple[dict[str, str], dict[str, str]]:
heads = parse_ls_remote(require_git(repo, ["ls-remote", "--heads", remote]), "refs/heads/")
for branch in ignored_branches:
heads.pop(branch, None)
raw_tags = parse_ls_remote(require_git(repo, ["ls-remote", "--tags", remote]), "refs/tags/")
tags = {
name.removesuffix("^{}"): sha
for name, sha in raw_tags.items()
if not name.endswith("^{}")
}
peeled = {
name.removesuffix("^{}"): sha
for name, sha in raw_tags.items()
if name.endswith("^{}")
}
tags.update({name: sha for name, sha in peeled.items()})
return heads, tags
def ref_items(refs: dict[str, str], names: list[str]) -> list[dict[str, str]]:
return [{"name": name, "sha": refs[name]} for name in names]
def compare_refs(gitea_refs: dict[str, str], github_refs: dict[str, str]) -> dict[str, object]:
gitea_names = set(gitea_refs)
github_names = set(github_refs)
common = sorted(gitea_names & github_names)
only_gitea = sorted(gitea_names - github_names)
only_github = sorted(github_names - gitea_names)
mismatches = [
{
"name": name,
"gitea_sha": gitea_refs[name],
"github_sha": github_refs[name],
}
for name in common
if gitea_refs[name] != github_refs[name]
]
matching = [name for name in common if gitea_refs[name] == github_refs[name]]
return {
"gitea_count": len(gitea_refs),
"github_count": len(github_refs),
"only_gitea_count": len(only_gitea),
"only_github_count": len(only_github),
"sha_mismatch_count": len(mismatches),
"matching_count": len(matching),
"only_gitea": ref_items(gitea_refs, only_gitea),
"only_github": ref_items(github_refs, only_github),
"sha_mismatch": mismatches,
"matching": ref_items(gitea_refs, matching),
}
def repo_status(branch_diff: dict[str, object], tag_diff: dict[str, object]) -> tuple[str, str]:
reasons: list[str] = []
if branch_diff["only_gitea_count"] or branch_diff["only_github_count"] or branch_diff["sha_mismatch_count"]:
reasons.append("branches 尚未完全對齊")
if tag_diff["only_gitea_count"] or tag_diff["only_github_count"] or tag_diff["sha_mismatch_count"]:
reasons.append("tags 尚未完全對齊")
return ("blocked", "".join(reasons)) if reasons else ("verified", "")
def parse_repo_spec(value: str) -> tuple[str, Path, str, str]:
parts = value.split("=", 3)
if len(parts) != 4:
raise ValueError("--repo 格式必須是 key=/path=gitea_remote=github_remote")
key, path, gitea_remote, github_remote = parts
return key, Path(path).expanduser().resolve(), gitea_remote, github_remote
def build_repo_item(spec: str, ignored_branches: set[str]) -> dict[str, object]:
key, repo, gitea_remote, github_remote = parse_repo_spec(spec)
gitea_url = remote_url(repo, gitea_remote)
github_url = remote_url(repo, github_remote)
gitea_heads, gitea_tags = refs_for_remote(repo, gitea_remote, ignored_branches)
github_heads, github_tags = refs_for_remote(repo, github_remote, ignored_branches)
branch_diff = compare_refs(gitea_heads, github_heads)
tag_diff = compare_refs(gitea_tags, github_tags)
status, reason = repo_status(branch_diff, tag_diff)
return {
"repo_key": key,
"repo_path": str(repo),
"gitea_remote": gitea_remote,
"github_remote": github_remote,
"gitea_url_redacted": gitea_url,
"github_url_redacted": github_url,
"gitea_repo": repo_slug_from_url(gitea_url),
"github_repo": repo_slug_from_url(github_url),
"status": status,
"blocking_reason": reason,
"branch_diff": branch_diff,
"tag_diff": tag_diff,
"still_forbidden": STILL_FORBIDDEN,
}
def build_snapshot(args: argparse.Namespace) -> dict[str, object]:
ignored_branches = set(args.ignore_branch or [])
repos = [build_repo_item(spec, ignored_branches) for spec in args.repo]
return {
"schema_version": "source_control_ref_detail_diff_v1",
"status": "draft_blocked",
"date": args.date,
"default_mode": "read_only_diff",
"ignored_branches": sorted(ignored_branches),
"repo_count": len(repos),
"repos": repos,
}
def short_sha(value: str) -> str:
return value[:8] if value else ""
def write_ref_list(lines: list[str], title: str, refs: list[dict[str, str]], *, limit: int) -> None:
lines.extend([f"#### {title}", ""])
if not refs:
lines.extend(["無。", ""])
return
visible = refs if limit <= 0 else refs[:limit]
for item in visible:
lines.append(f"- `{item['name']}` @ `{short_sha(item['sha'])}`")
if limit > 0 and len(refs) > limit:
lines.append(f"- 另有 `{len(refs) - limit}` 筆,完整清單見 JSON snapshot。")
lines.append("")
def write_markdown(snapshot: dict[str, object], path: Path, *, list_limit: int) -> None:
lines = [
"# Source Control Branch / Tag Detail Diff",
"",
"| 項目 | 內容 |",
"|------|------|",
f"| 日期 | {snapshot['date']} |",
f"| 狀態 | `{snapshot['status']}` |",
f"| 預設模式 | `{snapshot['default_mode']}` |",
f"| ignored branches | `{', '.join(snapshot['ignored_branches']) or ''}` |",
f"| repo count | {snapshot['repo_count']} |",
"",
"## 0. 核心結論",
"",
"本檔是 read-only refs 明細 diff不是同步腳本。任何 refs sync、GitHub primary 切換、repo 建立或 visibility 修改都仍需單一 repo 人工批准。",
"",
"## 1. 摘要",
"",
"| Repo | Status | Branch only Gitea | Branch only GitHub | Branch SHA diff | Tag only Gitea | Tag only GitHub | Tag SHA diff |",
"|------|--------|-------------------|--------------------|-----------------|----------------|-----------------|--------------|",
]
for repo in snapshot["repos"]:
branch = repo["branch_diff"]
tag = repo["tag_diff"]
lines.append(
"| "
+ " | ".join(
[
f"`{repo['gitea_repo']} -> {repo['github_repo']}`",
f"`{repo['status']}`",
f"`{branch['only_gitea_count']}`",
f"`{branch['only_github_count']}`",
f"`{branch['sha_mismatch_count']}`",
f"`{tag['only_gitea_count']}`",
f"`{tag['only_github_count']}`",
f"`{tag['sha_mismatch_count']}`",
]
)
+ " |"
)
lines.extend(["", "## 2. Repo 明細", ""])
for repo in snapshot["repos"]:
branch = repo["branch_diff"]
tag = repo["tag_diff"]
lines.extend(
[
f"### {repo['gitea_repo']} -> {repo['github_repo']}",
"",
f"- Status`{repo['status']}`",
f"- Blocking reason{repo['blocking_reason'] or ''}",
f"- Gitea URL`{repo['gitea_url_redacted']}`",
f"- GitHub URL`{repo['github_url_redacted']}`",
"",
"#### Branch SHA 不一致",
"",
]
)
if branch["sha_mismatch"]:
for item in branch["sha_mismatch"]:
lines.append(
f"- `{item['name']}`Gitea `{short_sha(item['gitea_sha'])}` / GitHub `{short_sha(item['github_sha'])}`"
)
lines.append("")
else:
lines.extend(["無。", ""])
write_ref_list(lines, "Branch 只在 Gitea", branch["only_gitea"], limit=list_limit)
write_ref_list(lines, "Branch 只在 GitHub", branch["only_github"], limit=list_limit)
write_ref_list(lines, "Tag 只在 Gitea", tag["only_gitea"], limit=list_limit)
write_ref_list(lines, "Tag 只在 GitHub", tag["only_github"], limit=list_limit)
lines.extend(["#### 仍然禁止", ""])
for value in repo["still_forbidden"]:
lines.append(f"- {value}")
lines.append("")
path.write_text("\n".join(lines), encoding="utf-8")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--date", required=True)
parser.add_argument("--repo", action="append", required=True)
parser.add_argument("--ignore-branch", action="append", default=[])
parser.add_argument("--output-json", required=True)
parser.add_argument("--output-md", required=True)
parser.add_argument("--md-list-limit", type=int, default=50)
args = parser.parse_args()
snapshot = build_snapshot(args)
Path(args.output_json).write_text(
json.dumps(snapshot, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
write_markdown(snapshot, Path(args.output_md), list_limit=args.md_list_limit)
print(f"OK source-control ref detail diff repos={snapshot['repo_count']}")
return 0
if __name__ == "__main__":
raise SystemExit(main())