#!/usr/bin/env python3 """產生 Gitea / GitHub branch/tag 明細 diff。 此工具只執行 read-only `git ls-remote --heads/--tags`,不 fetch、不 push、 不寫入任一 remote。輸出前會遮蔽 remote URL 內的帳密。 """ from __future__ import annotations import argparse import json import re import subprocess from pathlib import Path from urllib.parse import urlsplit, urlunsplit SHA_RE = re.compile(r"^[0-9a-fA-F]{7,40}$") STILL_FORBIDDEN = [ "fetch", "push refs", "force push", "delete refs", "create GitHub repo", "change repo visibility", "switch GitHub primary", "disable Gitea", "move secret values", ] def run_git(repo: Path, args: list[str]) -> subprocess.CompletedProcess[str]: return subprocess.run( ["git", *args], cwd=repo, check=False, capture_output=True, text=True, ) def require_git(repo: Path, args: list[str]) -> str: result = run_git(repo, args) if result.returncode != 0: stderr = result.stderr.strip() or result.stdout.strip() raise RuntimeError(f"{repo}: git {' '.join(args)} failed: {stderr}") return result.stdout def redact_url(value: str) -> str: if "://" in value: parts = urlsplit(value) netloc = parts.netloc.split("@", 1)[-1] return urlunsplit((parts.scheme, netloc, parts.path, parts.query, parts.fragment)) if "@" in value and ":" in value.split("@", 1)[1]: return value.split("@", 1)[1] return value def repo_slug_from_url(value: str) -> str: redacted = redact_url(value).removesuffix("/") if "://" in redacted: path = urlsplit(redacted).path.strip("/") elif ":" in redacted: path = redacted.split(":", 1)[1].strip("/") else: path = redacted.strip("/") return path.removesuffix(".git") or redacted def remote_url(repo: Path, remote: str) -> str: return redact_url(require_git(repo, ["remote", "get-url", remote]).strip()) def parse_ls_remote(output: str, prefix: str) -> dict[str, str]: refs: dict[str, str] = {} for line in output.splitlines(): if not line.strip(): continue try: sha, ref = line.split(None, 1) except ValueError: continue if not SHA_RE.match(sha) or not ref.startswith(prefix): continue refs[ref.removeprefix(prefix)] = sha return refs def refs_for_remote( repo: Path, remote: str, ignored_branches: set[str], ) -> tuple[dict[str, str], dict[str, str]]: heads = parse_ls_remote(require_git(repo, ["ls-remote", "--heads", remote]), "refs/heads/") for branch in ignored_branches: heads.pop(branch, None) raw_tags = parse_ls_remote(require_git(repo, ["ls-remote", "--tags", remote]), "refs/tags/") tags = { name.removesuffix("^{}"): sha for name, sha in raw_tags.items() if not name.endswith("^{}") } peeled = { name.removesuffix("^{}"): sha for name, sha in raw_tags.items() if name.endswith("^{}") } tags.update({name: sha for name, sha in peeled.items()}) return heads, tags def ref_items(refs: dict[str, str], names: list[str]) -> list[dict[str, str]]: return [{"name": name, "sha": refs[name]} for name in names] def compare_refs(gitea_refs: dict[str, str], github_refs: dict[str, str]) -> dict[str, object]: gitea_names = set(gitea_refs) github_names = set(github_refs) common = sorted(gitea_names & github_names) only_gitea = sorted(gitea_names - github_names) only_github = sorted(github_names - gitea_names) mismatches = [ { "name": name, "gitea_sha": gitea_refs[name], "github_sha": github_refs[name], } for name in common if gitea_refs[name] != github_refs[name] ] matching = [name for name in common if gitea_refs[name] == github_refs[name]] return { "gitea_count": len(gitea_refs), "github_count": len(github_refs), "only_gitea_count": len(only_gitea), "only_github_count": len(only_github), "sha_mismatch_count": len(mismatches), "matching_count": len(matching), "only_gitea": ref_items(gitea_refs, only_gitea), "only_github": ref_items(github_refs, only_github), "sha_mismatch": mismatches, "matching": ref_items(gitea_refs, matching), } def repo_status(branch_diff: dict[str, object], tag_diff: dict[str, object]) -> tuple[str, str]: reasons: list[str] = [] if branch_diff["only_gitea_count"] or branch_diff["only_github_count"] or branch_diff["sha_mismatch_count"]: reasons.append("branches 尚未完全對齊") if tag_diff["only_gitea_count"] or tag_diff["only_github_count"] or tag_diff["sha_mismatch_count"]: reasons.append("tags 尚未完全對齊") return ("blocked", ";".join(reasons)) if reasons else ("verified", "") def parse_repo_spec(value: str) -> tuple[str, Path, str, str]: parts = value.split("=", 3) if len(parts) != 4: raise ValueError("--repo 格式必須是 key=/path=gitea_remote=github_remote") key, path, gitea_remote, github_remote = parts return key, Path(path).expanduser().resolve(), gitea_remote, github_remote def build_repo_item(spec: str, ignored_branches: set[str]) -> dict[str, object]: key, repo, gitea_remote, github_remote = parse_repo_spec(spec) gitea_url = remote_url(repo, gitea_remote) github_url = remote_url(repo, github_remote) gitea_heads, gitea_tags = refs_for_remote(repo, gitea_remote, ignored_branches) github_heads, github_tags = refs_for_remote(repo, github_remote, ignored_branches) branch_diff = compare_refs(gitea_heads, github_heads) tag_diff = compare_refs(gitea_tags, github_tags) status, reason = repo_status(branch_diff, tag_diff) return { "repo_key": key, "repo_path": str(repo), "gitea_remote": gitea_remote, "github_remote": github_remote, "gitea_url_redacted": gitea_url, "github_url_redacted": github_url, "gitea_repo": repo_slug_from_url(gitea_url), "github_repo": repo_slug_from_url(github_url), "status": status, "blocking_reason": reason, "branch_diff": branch_diff, "tag_diff": tag_diff, "still_forbidden": STILL_FORBIDDEN, } def build_snapshot(args: argparse.Namespace) -> dict[str, object]: ignored_branches = set(args.ignore_branch or []) repos = [build_repo_item(spec, ignored_branches) for spec in args.repo] return { "schema_version": "source_control_ref_detail_diff_v1", "status": "draft_blocked", "date": args.date, "default_mode": "read_only_diff", "ignored_branches": sorted(ignored_branches), "repo_count": len(repos), "repos": repos, } def short_sha(value: str) -> str: return value[:8] if value else "" def write_ref_list(lines: list[str], title: str, refs: list[dict[str, str]], *, limit: int) -> None: lines.extend([f"#### {title}", ""]) if not refs: lines.extend(["無。", ""]) return visible = refs if limit <= 0 else refs[:limit] for item in visible: lines.append(f"- `{item['name']}` @ `{short_sha(item['sha'])}`") if limit > 0 and len(refs) > limit: lines.append(f"- 另有 `{len(refs) - limit}` 筆,完整清單見 JSON snapshot。") lines.append("") def write_markdown(snapshot: dict[str, object], path: Path, *, list_limit: int) -> None: lines = [ "# Source Control Branch / Tag Detail Diff", "", "| 項目 | 內容 |", "|------|------|", f"| 日期 | {snapshot['date']} |", f"| 狀態 | `{snapshot['status']}` |", f"| 預設模式 | `{snapshot['default_mode']}` |", f"| ignored branches | `{', '.join(snapshot['ignored_branches']) or '無'}` |", f"| repo count | {snapshot['repo_count']} |", "", "## 0. 核心結論", "", "本檔是 read-only refs 明細 diff,不是同步腳本。任何 refs sync、GitHub primary 切換、repo 建立或 visibility 修改都仍需單一 repo 人工批准。", "", "## 1. 摘要", "", "| Repo | Status | Branch only Gitea | Branch only GitHub | Branch SHA diff | Tag only Gitea | Tag only GitHub | Tag SHA diff |", "|------|--------|-------------------|--------------------|-----------------|----------------|-----------------|--------------|", ] for repo in snapshot["repos"]: branch = repo["branch_diff"] tag = repo["tag_diff"] lines.append( "| " + " | ".join( [ f"`{repo['gitea_repo']} -> {repo['github_repo']}`", f"`{repo['status']}`", f"`{branch['only_gitea_count']}`", f"`{branch['only_github_count']}`", f"`{branch['sha_mismatch_count']}`", f"`{tag['only_gitea_count']}`", f"`{tag['only_github_count']}`", f"`{tag['sha_mismatch_count']}`", ] ) + " |" ) lines.extend(["", "## 2. Repo 明細", ""]) for repo in snapshot["repos"]: branch = repo["branch_diff"] tag = repo["tag_diff"] lines.extend( [ f"### {repo['gitea_repo']} -> {repo['github_repo']}", "", f"- Status:`{repo['status']}`", f"- Blocking reason:{repo['blocking_reason'] or '無'}", f"- Gitea URL:`{repo['gitea_url_redacted']}`", f"- GitHub URL:`{repo['github_url_redacted']}`", "", "#### Branch SHA 不一致", "", ] ) if branch["sha_mismatch"]: for item in branch["sha_mismatch"]: lines.append( f"- `{item['name']}`:Gitea `{short_sha(item['gitea_sha'])}` / GitHub `{short_sha(item['github_sha'])}`" ) lines.append("") else: lines.extend(["無。", ""]) write_ref_list(lines, "Branch 只在 Gitea", branch["only_gitea"], limit=list_limit) write_ref_list(lines, "Branch 只在 GitHub", branch["only_github"], limit=list_limit) write_ref_list(lines, "Tag 只在 Gitea", tag["only_gitea"], limit=list_limit) write_ref_list(lines, "Tag 只在 GitHub", tag["only_github"], limit=list_limit) lines.extend(["#### 仍然禁止", ""]) for value in repo["still_forbidden"]: lines.append(f"- {value}") lines.append("") path.write_text("\n".join(lines), encoding="utf-8") def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--date", required=True) parser.add_argument("--repo", action="append", required=True) parser.add_argument("--ignore-branch", action="append", default=[]) parser.add_argument("--output-json", required=True) parser.add_argument("--output-md", required=True) parser.add_argument("--md-list-limit", type=int, default=50) args = parser.parse_args() snapshot = build_snapshot(args) Path(args.output_json).write_text( json.dumps(snapshot, ensure_ascii=False, indent=2) + "\n", encoding="utf-8", ) write_markdown(snapshot, Path(args.output_md), list_limit=args.md_list_limit) print(f"OK source-control ref detail diff repos={snapshot['repo_count']}") return 0 if __name__ == "__main__": raise SystemExit(main())