#!/usr/bin/env python3 """本機 repo canonical lineage 只讀探測。 此工具比較多個本機 Git working tree 的 HEAD、branch、remote 與近期 commit ancestry,協助判斷它們是否可能屬於同一個 canonical repo。 它不 fetch、不push、不修改 remote,也不讀取 commit message。 """ from __future__ import annotations import argparse import configparser import json import subprocess from pathlib import Path from urllib.parse import urlsplit, urlunsplit def redact_url(value: str) -> str: if "://" not in value: if "@" in value and ":" in value.split("@", 1)[1]: return value.split("@", 1)[1] return value parts = urlsplit(value) netloc = parts.netloc.split("@", 1)[-1] return urlunsplit((parts.scheme, netloc, parts.path, parts.query, parts.fragment)) def run_git(repo: Path, args: list[str], timeout: int) -> subprocess.CompletedProcess[str]: try: return subprocess.run( ["git", *args], cwd=repo, check=False, capture_output=True, text=True, timeout=timeout, ) except subprocess.TimeoutExpired: return subprocess.CompletedProcess(["git", *args], 124, "", "git command timeout") def git_value(repo: Path, args: list[str], timeout: int) -> str: result = run_git(repo, args, timeout) if result.returncode != 0: return "" return result.stdout.strip() def git_config_path(repo_path: Path) -> Path | None: git_path = repo_path / ".git" if git_path.is_dir(): config_path = git_path / "config" return config_path if config_path.exists() else None if not git_path.is_file(): return None text = git_path.read_text(encoding="utf-8", errors="replace") for line in text.splitlines(): if line.startswith("gitdir:"): raw_gitdir = line.split(":", 1)[1].strip() gitdir = Path(raw_gitdir) if not gitdir.is_absolute(): gitdir = (repo_path / gitdir).resolve() config_path = gitdir / "config" return config_path if config_path.exists() else None return None def remote_name(section: str) -> str | None: prefix = 'remote "' if section.startswith(prefix) and section.endswith('"'): return section[len(prefix) : -1] return None def read_remotes(repo_path: Path) -> list[dict[str, str]]: config_path = git_config_path(repo_path) if config_path is None: return [] parser = configparser.RawConfigParser(strict=False) parser.read(config_path, encoding="utf-8") remotes: list[dict[str, str]] = [] for section in parser.sections(): name = remote_name(section) if not name or not parser.has_option(section, "url"): continue remotes.append( { "name": name, "url_redacted": redact_url(parser.get(section, "url").strip()), } ) return remotes def parse_repo_arg(value: str) -> tuple[str, Path]: if "=" not in value: raise argparse.ArgumentTypeError("--repo 必須是 label=/absolute/path") label, raw_path = value.split("=", 1) if not label.strip() or not raw_path.strip(): raise argparse.ArgumentTypeError("--repo label 與 path 不可為空") return label.strip(), Path(raw_path).expanduser().resolve() def repo_summary(label: str, repo_path: Path, sample_limit: int, git_timeout: int) -> dict[str, object]: exists = (repo_path / ".git").exists() if not exists: return { "label": label, "repo_path": str(repo_path), "exists": False, "head_sha": "", "head_short": "", "branch": "", "commit_sample_count": 0, "commits": [], "remotes": [], "probe_error": "repo missing", } probe_errors: list[str] = [] head_result = run_git(repo_path, ["rev-parse", "HEAD"], git_timeout) head_sha = head_result.stdout.strip() if head_result.returncode == 0 else "" if head_result.returncode != 0: probe_errors.append("HEAD 讀取失敗或逾時") branch = git_value(repo_path, ["rev-parse", "--abbrev-ref", "HEAD"], git_timeout) rev_list_result = run_git(repo_path, ["rev-list", f"--max-count={sample_limit}", "HEAD"], git_timeout) commits = rev_list_result.stdout.splitlines() if rev_list_result.returncode == 0 else [] if rev_list_result.returncode != 0: probe_errors.append("rev-list 讀取失敗或逾時") return { "label": label, "repo_path": str(repo_path), "exists": True, "head_sha": head_sha, "head_short": head_sha[:7], "branch": branch, "commit_sample_count": len(commits), "commits": commits, "remotes": read_remotes(repo_path), "probe_error": ";".join(probe_errors), } def compare_repos(left: dict[str, object], right: dict[str, object]) -> dict[str, object]: left_commits = set(left.get("commits", [])) right_commits = set(right.get("commits", [])) left_head = str(left.get("head_sha") or "") right_head = str(right.get("head_sha") or "") common = sorted(left_commits & right_commits) if not left.get("exists") or not right.get("exists"): relation = "missing_repo" elif left_head and left_head == right_head: relation = "same_head" elif left.get("probe_error") or right.get("probe_error"): relation = "partial_probe" elif right_head and right_head in left_commits: relation = "left_descends_from_right" elif left_head and left_head in right_commits: relation = "right_descends_from_left" elif common: relation = "shared_history" else: relation = "no_shared_history" return { "left_label": left["label"], "right_label": right["label"], "relation": relation, "left_head": left_head, "right_head": right_head, "common_commit_count": len(common), "common_commit_samples": common[:5], } def build_payload( group_name: str, repo_args: list[tuple[str, Path]], sample_limit: int, git_timeout: int, ) -> dict[str, object]: repos = [repo_summary(label, path, sample_limit, git_timeout) for label, path in repo_args] comparisons = [] for left_index, left in enumerate(repos): for right in repos[left_index + 1 :]: comparisons.append(compare_repos(left, right)) partial = any(item["relation"] == "partial_probe" for item in comparisons) related = any( item["relation"] in ("same_head", "left_descends_from_right", "right_descends_from_left", "shared_history") for item in comparisons ) no_shared = any(item["relation"] == "no_shared_history" for item in comparisons) if partial: status = "partial" elif related and no_shared: status = "mixed" elif related: status = "related" elif comparisons: status = "unrelated" else: status = "partial" return { "schema_version": "local_repo_canonical_probe_v1", "group_name": group_name, "status": status, "sample_limit": sample_limit, "git_timeout_seconds": git_timeout, "repo_count": len(repos), "comparison_count": len(comparisons), "repos": repos, "comparisons": comparisons, } def write_markdown(payload: dict[str, object], path: Path) -> None: lines = [ "# 本機 Repo Canonical Lineage Probe 快照", "", "| 項目 | 值 |", "|------|----|", f"| 群組 | `{payload['group_name']}` |", f"| 狀態 | `{payload['status']}` |", f"| repo 數 | `{payload['repo_count']}` |", f"| 比對數 | `{payload['comparison_count']}` |", f"| sample limit | `{payload['sample_limit']}` |", f"| git timeout seconds | `{payload['git_timeout_seconds']}` |", "", "## Repo HEAD", "", "| Label | Path | Branch | HEAD | Remotes |", "|-------|------|--------|------|---------|", ] for repo in payload.get("repos", []): if not isinstance(repo, dict): continue remotes = repo.get("remotes", []) remote_text = ", ".join( f"`{remote.get('name', '')}:{remote.get('url_redacted', '')}`" for remote in remotes if isinstance(remote, dict) ) lines.append( "| " + " | ".join( [ f"`{repo.get('label', '')}`", f"`{repo.get('repo_path', '')}`", f"`{repo.get('branch', '')}`", f"`{repo.get('head_short', '')}`", remote_text or "-", ] ) + " |" ) lines.extend(["", "## Lineage 比對", "", "| Left | Right | Relation | Common commits |", "|------|-------|----------|----------------|"]) for comparison in payload.get("comparisons", []): if not isinstance(comparison, dict): continue lines.append( "| " + " | ".join( [ f"`{comparison.get('left_label', '')}`", f"`{comparison.get('right_label', '')}`", f"`{comparison.get('relation', '')}`", f"`{comparison.get('common_commit_count', 0)}`", ] ) + " |" ) lines.extend( [ "", "> 注意:本檔只比較本機 Git 物件,未 fetch 遠端;common commit sample 只用 SHA,不含 commit message。", "", ] ) path.write_text("\n".join(lines), encoding="utf-8") def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--group-name", required=True) parser.add_argument("--repo", action="append", type=parse_repo_arg, required=True) parser.add_argument("--sample-limit", type=int, default=5000) parser.add_argument("--git-timeout", type=int, default=10) parser.add_argument("--output-json", required=True) parser.add_argument("--output-md", required=True) args = parser.parse_args() payload = build_payload(args.group_name, args.repo, args.sample_limit, args.git_timeout) Path(args.output_json).write_text( json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8", ) write_markdown(payload, Path(args.output_md)) return 0 if __name__ == "__main__": raise SystemExit(main())