308 lines
10 KiB
Python
308 lines
10 KiB
Python
#!/usr/bin/env python3
|
||
"""本機 repo canonical lineage 只讀探測。
|
||
|
||
此工具比較多個本機 Git working tree 的 HEAD、branch、remote 與近期
|
||
commit ancestry,協助判斷它們是否可能屬於同一個 canonical repo。
|
||
它不 fetch、不push、不修改 remote,也不讀取 commit message。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import configparser
|
||
import json
|
||
import subprocess
|
||
from pathlib import Path
|
||
from urllib.parse import urlsplit, urlunsplit
|
||
|
||
|
||
def redact_url(value: str) -> str:
|
||
if "://" not in value:
|
||
if "@" in value and ":" in value.split("@", 1)[1]:
|
||
return value.split("@", 1)[1]
|
||
return value
|
||
parts = urlsplit(value)
|
||
netloc = parts.netloc.split("@", 1)[-1]
|
||
return urlunsplit((parts.scheme, netloc, parts.path, parts.query, parts.fragment))
|
||
|
||
|
||
def run_git(repo: Path, args: list[str], timeout: int) -> subprocess.CompletedProcess[str]:
|
||
try:
|
||
return subprocess.run(
|
||
["git", *args],
|
||
cwd=repo,
|
||
check=False,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=timeout,
|
||
)
|
||
except subprocess.TimeoutExpired:
|
||
return subprocess.CompletedProcess(["git", *args], 124, "", "git command timeout")
|
||
|
||
|
||
def git_value(repo: Path, args: list[str], timeout: int) -> str:
|
||
result = run_git(repo, args, timeout)
|
||
if result.returncode != 0:
|
||
return ""
|
||
return result.stdout.strip()
|
||
|
||
|
||
def git_config_path(repo_path: Path) -> Path | None:
|
||
git_path = repo_path / ".git"
|
||
if git_path.is_dir():
|
||
config_path = git_path / "config"
|
||
return config_path if config_path.exists() else None
|
||
if not git_path.is_file():
|
||
return None
|
||
text = git_path.read_text(encoding="utf-8", errors="replace")
|
||
for line in text.splitlines():
|
||
if line.startswith("gitdir:"):
|
||
raw_gitdir = line.split(":", 1)[1].strip()
|
||
gitdir = Path(raw_gitdir)
|
||
if not gitdir.is_absolute():
|
||
gitdir = (repo_path / gitdir).resolve()
|
||
config_path = gitdir / "config"
|
||
return config_path if config_path.exists() else None
|
||
return None
|
||
|
||
|
||
def remote_name(section: str) -> str | None:
|
||
prefix = 'remote "'
|
||
if section.startswith(prefix) and section.endswith('"'):
|
||
return section[len(prefix) : -1]
|
||
return None
|
||
|
||
|
||
def read_remotes(repo_path: Path) -> list[dict[str, str]]:
|
||
config_path = git_config_path(repo_path)
|
||
if config_path is None:
|
||
return []
|
||
parser = configparser.RawConfigParser(strict=False)
|
||
parser.read(config_path, encoding="utf-8")
|
||
remotes: list[dict[str, str]] = []
|
||
for section in parser.sections():
|
||
name = remote_name(section)
|
||
if not name or not parser.has_option(section, "url"):
|
||
continue
|
||
remotes.append(
|
||
{
|
||
"name": name,
|
||
"url_redacted": redact_url(parser.get(section, "url").strip()),
|
||
}
|
||
)
|
||
return remotes
|
||
|
||
|
||
def parse_repo_arg(value: str) -> tuple[str, Path]:
|
||
if "=" not in value:
|
||
raise argparse.ArgumentTypeError("--repo 必須是 label=/absolute/path")
|
||
label, raw_path = value.split("=", 1)
|
||
if not label.strip() or not raw_path.strip():
|
||
raise argparse.ArgumentTypeError("--repo label 與 path 不可為空")
|
||
return label.strip(), Path(raw_path).expanduser().resolve()
|
||
|
||
|
||
def repo_summary(label: str, repo_path: Path, sample_limit: int, git_timeout: int) -> dict[str, object]:
|
||
exists = (repo_path / ".git").exists()
|
||
if not exists:
|
||
return {
|
||
"label": label,
|
||
"repo_path": str(repo_path),
|
||
"exists": False,
|
||
"head_sha": "",
|
||
"head_short": "",
|
||
"branch": "",
|
||
"commit_sample_count": 0,
|
||
"commits": [],
|
||
"remotes": [],
|
||
"probe_error": "repo missing",
|
||
}
|
||
|
||
probe_errors: list[str] = []
|
||
head_result = run_git(repo_path, ["rev-parse", "HEAD"], git_timeout)
|
||
head_sha = head_result.stdout.strip() if head_result.returncode == 0 else ""
|
||
if head_result.returncode != 0:
|
||
probe_errors.append("HEAD 讀取失敗或逾時")
|
||
|
||
branch = git_value(repo_path, ["rev-parse", "--abbrev-ref", "HEAD"], git_timeout)
|
||
rev_list_result = run_git(repo_path, ["rev-list", f"--max-count={sample_limit}", "HEAD"], git_timeout)
|
||
commits = rev_list_result.stdout.splitlines() if rev_list_result.returncode == 0 else []
|
||
if rev_list_result.returncode != 0:
|
||
probe_errors.append("rev-list 讀取失敗或逾時")
|
||
return {
|
||
"label": label,
|
||
"repo_path": str(repo_path),
|
||
"exists": True,
|
||
"head_sha": head_sha,
|
||
"head_short": head_sha[:7],
|
||
"branch": branch,
|
||
"commit_sample_count": len(commits),
|
||
"commits": commits,
|
||
"remotes": read_remotes(repo_path),
|
||
"probe_error": ";".join(probe_errors),
|
||
}
|
||
|
||
|
||
def compare_repos(left: dict[str, object], right: dict[str, object]) -> dict[str, object]:
|
||
left_commits = set(left.get("commits", []))
|
||
right_commits = set(right.get("commits", []))
|
||
left_head = str(left.get("head_sha") or "")
|
||
right_head = str(right.get("head_sha") or "")
|
||
common = sorted(left_commits & right_commits)
|
||
|
||
if not left.get("exists") or not right.get("exists"):
|
||
relation = "missing_repo"
|
||
elif left_head and left_head == right_head:
|
||
relation = "same_head"
|
||
elif left.get("probe_error") or right.get("probe_error"):
|
||
relation = "partial_probe"
|
||
elif right_head and right_head in left_commits:
|
||
relation = "left_descends_from_right"
|
||
elif left_head and left_head in right_commits:
|
||
relation = "right_descends_from_left"
|
||
elif common:
|
||
relation = "shared_history"
|
||
else:
|
||
relation = "no_shared_history"
|
||
|
||
return {
|
||
"left_label": left["label"],
|
||
"right_label": right["label"],
|
||
"relation": relation,
|
||
"left_head": left_head,
|
||
"right_head": right_head,
|
||
"common_commit_count": len(common),
|
||
"common_commit_samples": common[:5],
|
||
}
|
||
|
||
|
||
def build_payload(
|
||
group_name: str,
|
||
repo_args: list[tuple[str, Path]],
|
||
sample_limit: int,
|
||
git_timeout: int,
|
||
) -> dict[str, object]:
|
||
repos = [repo_summary(label, path, sample_limit, git_timeout) for label, path in repo_args]
|
||
comparisons = []
|
||
for left_index, left in enumerate(repos):
|
||
for right in repos[left_index + 1 :]:
|
||
comparisons.append(compare_repos(left, right))
|
||
partial = any(item["relation"] == "partial_probe" for item in comparisons)
|
||
related = any(
|
||
item["relation"]
|
||
in ("same_head", "left_descends_from_right", "right_descends_from_left", "shared_history")
|
||
for item in comparisons
|
||
)
|
||
no_shared = any(item["relation"] == "no_shared_history" for item in comparisons)
|
||
if partial:
|
||
status = "partial"
|
||
elif related and no_shared:
|
||
status = "mixed"
|
||
elif related:
|
||
status = "related"
|
||
elif comparisons:
|
||
status = "unrelated"
|
||
else:
|
||
status = "partial"
|
||
return {
|
||
"schema_version": "local_repo_canonical_probe_v1",
|
||
"group_name": group_name,
|
||
"status": status,
|
||
"sample_limit": sample_limit,
|
||
"git_timeout_seconds": git_timeout,
|
||
"repo_count": len(repos),
|
||
"comparison_count": len(comparisons),
|
||
"repos": repos,
|
||
"comparisons": comparisons,
|
||
}
|
||
|
||
|
||
def write_markdown(payload: dict[str, object], path: Path) -> None:
|
||
lines = [
|
||
"# 本機 Repo Canonical Lineage Probe 快照",
|
||
"",
|
||
"| 項目 | 值 |",
|
||
"|------|----|",
|
||
f"| 群組 | `{payload['group_name']}` |",
|
||
f"| 狀態 | `{payload['status']}` |",
|
||
f"| repo 數 | `{payload['repo_count']}` |",
|
||
f"| 比對數 | `{payload['comparison_count']}` |",
|
||
f"| sample limit | `{payload['sample_limit']}` |",
|
||
f"| git timeout seconds | `{payload['git_timeout_seconds']}` |",
|
||
"",
|
||
"## Repo HEAD",
|
||
"",
|
||
"| Label | Path | Branch | HEAD | Remotes |",
|
||
"|-------|------|--------|------|---------|",
|
||
]
|
||
for repo in payload.get("repos", []):
|
||
if not isinstance(repo, dict):
|
||
continue
|
||
remotes = repo.get("remotes", [])
|
||
remote_text = ", ".join(
|
||
f"`{remote.get('name', '')}:{remote.get('url_redacted', '')}`"
|
||
for remote in remotes
|
||
if isinstance(remote, dict)
|
||
)
|
||
lines.append(
|
||
"| "
|
||
+ " | ".join(
|
||
[
|
||
f"`{repo.get('label', '')}`",
|
||
f"`{repo.get('repo_path', '')}`",
|
||
f"`{repo.get('branch', '')}`",
|
||
f"`{repo.get('head_short', '')}`",
|
||
remote_text or "-",
|
||
]
|
||
)
|
||
+ " |"
|
||
)
|
||
|
||
lines.extend(["", "## Lineage 比對", "", "| Left | Right | Relation | Common commits |", "|------|-------|----------|----------------|"])
|
||
for comparison in payload.get("comparisons", []):
|
||
if not isinstance(comparison, dict):
|
||
continue
|
||
lines.append(
|
||
"| "
|
||
+ " | ".join(
|
||
[
|
||
f"`{comparison.get('left_label', '')}`",
|
||
f"`{comparison.get('right_label', '')}`",
|
||
f"`{comparison.get('relation', '')}`",
|
||
f"`{comparison.get('common_commit_count', 0)}`",
|
||
]
|
||
)
|
||
+ " |"
|
||
)
|
||
lines.extend(
|
||
[
|
||
"",
|
||
"> 注意:本檔只比較本機 Git 物件,未 fetch 遠端;common commit sample 只用 SHA,不含 commit message。",
|
||
"",
|
||
]
|
||
)
|
||
path.write_text("\n".join(lines), encoding="utf-8")
|
||
|
||
|
||
def main() -> int:
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument("--group-name", required=True)
|
||
parser.add_argument("--repo", action="append", type=parse_repo_arg, required=True)
|
||
parser.add_argument("--sample-limit", type=int, default=5000)
|
||
parser.add_argument("--git-timeout", type=int, default=10)
|
||
parser.add_argument("--output-json", required=True)
|
||
parser.add_argument("--output-md", required=True)
|
||
args = parser.parse_args()
|
||
|
||
payload = build_payload(args.group_name, args.repo, args.sample_limit, args.git_timeout)
|
||
Path(args.output_json).write_text(
|
||
json.dumps(payload, ensure_ascii=False, indent=2) + "\n",
|
||
encoding="utf-8",
|
||
)
|
||
write_markdown(payload, Path(args.output_md))
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main())
|