Files
awoooi/scripts/security/local-repo-canonical-probe.py
Your Name 9e15fd08b3
All checks were successful
CD Pipeline / tests (push) Successful in 1m39s
Code Review / ai-code-review (push) Successful in 15s
CD Pipeline / build-and-deploy (push) Successful in 5m19s
CD Pipeline / post-deploy-checks (push) Successful in 2m11s
feat(web): land iwooos security posture surfaces
2026-05-25 20:35:52 +08:00

308 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""本機 repo canonical lineage 只讀探測。
此工具比較多個本機 Git working tree 的 HEAD、branch、remote 與近期
commit ancestry協助判斷它們是否可能屬於同一個 canonical repo。
它不 fetch、不push、不修改 remote也不讀取 commit message。
"""
from __future__ import annotations
import argparse
import configparser
import json
import subprocess
from pathlib import Path
from urllib.parse import urlsplit, urlunsplit
def redact_url(value: str) -> str:
if "://" not in value:
if "@" in value and ":" in value.split("@", 1)[1]:
return value.split("@", 1)[1]
return value
parts = urlsplit(value)
netloc = parts.netloc.split("@", 1)[-1]
return urlunsplit((parts.scheme, netloc, parts.path, parts.query, parts.fragment))
def run_git(repo: Path, args: list[str], timeout: int) -> subprocess.CompletedProcess[str]:
try:
return subprocess.run(
["git", *args],
cwd=repo,
check=False,
capture_output=True,
text=True,
timeout=timeout,
)
except subprocess.TimeoutExpired:
return subprocess.CompletedProcess(["git", *args], 124, "", "git command timeout")
def git_value(repo: Path, args: list[str], timeout: int) -> str:
result = run_git(repo, args, timeout)
if result.returncode != 0:
return ""
return result.stdout.strip()
def git_config_path(repo_path: Path) -> Path | None:
git_path = repo_path / ".git"
if git_path.is_dir():
config_path = git_path / "config"
return config_path if config_path.exists() else None
if not git_path.is_file():
return None
text = git_path.read_text(encoding="utf-8", errors="replace")
for line in text.splitlines():
if line.startswith("gitdir:"):
raw_gitdir = line.split(":", 1)[1].strip()
gitdir = Path(raw_gitdir)
if not gitdir.is_absolute():
gitdir = (repo_path / gitdir).resolve()
config_path = gitdir / "config"
return config_path if config_path.exists() else None
return None
def remote_name(section: str) -> str | None:
prefix = 'remote "'
if section.startswith(prefix) and section.endswith('"'):
return section[len(prefix) : -1]
return None
def read_remotes(repo_path: Path) -> list[dict[str, str]]:
config_path = git_config_path(repo_path)
if config_path is None:
return []
parser = configparser.RawConfigParser(strict=False)
parser.read(config_path, encoding="utf-8")
remotes: list[dict[str, str]] = []
for section in parser.sections():
name = remote_name(section)
if not name or not parser.has_option(section, "url"):
continue
remotes.append(
{
"name": name,
"url_redacted": redact_url(parser.get(section, "url").strip()),
}
)
return remotes
def parse_repo_arg(value: str) -> tuple[str, Path]:
if "=" not in value:
raise argparse.ArgumentTypeError("--repo 必須是 label=/absolute/path")
label, raw_path = value.split("=", 1)
if not label.strip() or not raw_path.strip():
raise argparse.ArgumentTypeError("--repo label 與 path 不可為空")
return label.strip(), Path(raw_path).expanduser().resolve()
def repo_summary(label: str, repo_path: Path, sample_limit: int, git_timeout: int) -> dict[str, object]:
exists = (repo_path / ".git").exists()
if not exists:
return {
"label": label,
"repo_path": str(repo_path),
"exists": False,
"head_sha": "",
"head_short": "",
"branch": "",
"commit_sample_count": 0,
"commits": [],
"remotes": [],
"probe_error": "repo missing",
}
probe_errors: list[str] = []
head_result = run_git(repo_path, ["rev-parse", "HEAD"], git_timeout)
head_sha = head_result.stdout.strip() if head_result.returncode == 0 else ""
if head_result.returncode != 0:
probe_errors.append("HEAD 讀取失敗或逾時")
branch = git_value(repo_path, ["rev-parse", "--abbrev-ref", "HEAD"], git_timeout)
rev_list_result = run_git(repo_path, ["rev-list", f"--max-count={sample_limit}", "HEAD"], git_timeout)
commits = rev_list_result.stdout.splitlines() if rev_list_result.returncode == 0 else []
if rev_list_result.returncode != 0:
probe_errors.append("rev-list 讀取失敗或逾時")
return {
"label": label,
"repo_path": str(repo_path),
"exists": True,
"head_sha": head_sha,
"head_short": head_sha[:7],
"branch": branch,
"commit_sample_count": len(commits),
"commits": commits,
"remotes": read_remotes(repo_path),
"probe_error": "".join(probe_errors),
}
def compare_repos(left: dict[str, object], right: dict[str, object]) -> dict[str, object]:
left_commits = set(left.get("commits", []))
right_commits = set(right.get("commits", []))
left_head = str(left.get("head_sha") or "")
right_head = str(right.get("head_sha") or "")
common = sorted(left_commits & right_commits)
if not left.get("exists") or not right.get("exists"):
relation = "missing_repo"
elif left_head and left_head == right_head:
relation = "same_head"
elif left.get("probe_error") or right.get("probe_error"):
relation = "partial_probe"
elif right_head and right_head in left_commits:
relation = "left_descends_from_right"
elif left_head and left_head in right_commits:
relation = "right_descends_from_left"
elif common:
relation = "shared_history"
else:
relation = "no_shared_history"
return {
"left_label": left["label"],
"right_label": right["label"],
"relation": relation,
"left_head": left_head,
"right_head": right_head,
"common_commit_count": len(common),
"common_commit_samples": common[:5],
}
def build_payload(
group_name: str,
repo_args: list[tuple[str, Path]],
sample_limit: int,
git_timeout: int,
) -> dict[str, object]:
repos = [repo_summary(label, path, sample_limit, git_timeout) for label, path in repo_args]
comparisons = []
for left_index, left in enumerate(repos):
for right in repos[left_index + 1 :]:
comparisons.append(compare_repos(left, right))
partial = any(item["relation"] == "partial_probe" for item in comparisons)
related = any(
item["relation"]
in ("same_head", "left_descends_from_right", "right_descends_from_left", "shared_history")
for item in comparisons
)
no_shared = any(item["relation"] == "no_shared_history" for item in comparisons)
if partial:
status = "partial"
elif related and no_shared:
status = "mixed"
elif related:
status = "related"
elif comparisons:
status = "unrelated"
else:
status = "partial"
return {
"schema_version": "local_repo_canonical_probe_v1",
"group_name": group_name,
"status": status,
"sample_limit": sample_limit,
"git_timeout_seconds": git_timeout,
"repo_count": len(repos),
"comparison_count": len(comparisons),
"repos": repos,
"comparisons": comparisons,
}
def write_markdown(payload: dict[str, object], path: Path) -> None:
lines = [
"# 本機 Repo Canonical Lineage Probe 快照",
"",
"| 項目 | 值 |",
"|------|----|",
f"| 群組 | `{payload['group_name']}` |",
f"| 狀態 | `{payload['status']}` |",
f"| repo 數 | `{payload['repo_count']}` |",
f"| 比對數 | `{payload['comparison_count']}` |",
f"| sample limit | `{payload['sample_limit']}` |",
f"| git timeout seconds | `{payload['git_timeout_seconds']}` |",
"",
"## Repo HEAD",
"",
"| Label | Path | Branch | HEAD | Remotes |",
"|-------|------|--------|------|---------|",
]
for repo in payload.get("repos", []):
if not isinstance(repo, dict):
continue
remotes = repo.get("remotes", [])
remote_text = ", ".join(
f"`{remote.get('name', '')}:{remote.get('url_redacted', '')}`"
for remote in remotes
if isinstance(remote, dict)
)
lines.append(
"| "
+ " | ".join(
[
f"`{repo.get('label', '')}`",
f"`{repo.get('repo_path', '')}`",
f"`{repo.get('branch', '')}`",
f"`{repo.get('head_short', '')}`",
remote_text or "-",
]
)
+ " |"
)
lines.extend(["", "## Lineage 比對", "", "| Left | Right | Relation | Common commits |", "|------|-------|----------|----------------|"])
for comparison in payload.get("comparisons", []):
if not isinstance(comparison, dict):
continue
lines.append(
"| "
+ " | ".join(
[
f"`{comparison.get('left_label', '')}`",
f"`{comparison.get('right_label', '')}`",
f"`{comparison.get('relation', '')}`",
f"`{comparison.get('common_commit_count', 0)}`",
]
)
+ " |"
)
lines.extend(
[
"",
"> 注意:本檔只比較本機 Git 物件,未 fetch 遠端common commit sample 只用 SHA不含 commit message。",
"",
]
)
path.write_text("\n".join(lines), encoding="utf-8")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--group-name", required=True)
parser.add_argument("--repo", action="append", type=parse_repo_arg, required=True)
parser.add_argument("--sample-limit", type=int, default=5000)
parser.add_argument("--git-timeout", type=int, default=10)
parser.add_argument("--output-json", required=True)
parser.add_argument("--output-md", required=True)
args = parser.parse_args()
payload = build_payload(args.group_name, args.repo, args.sample_limit, args.git_timeout)
Path(args.output_json).write_text(
json.dumps(payload, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
write_markdown(payload, Path(args.output_md))
return 0
if __name__ == "__main__":
raise SystemExit(main())