Files
awoooi/scripts/security/local-git-remote-inventory.py
Your Name 9e15fd08b3
All checks were successful
CD Pipeline / tests (push) Successful in 1m39s
Code Review / ai-code-review (push) Successful in 15s
CD Pipeline / build-and-deploy (push) Successful in 5m19s
CD Pipeline / post-deploy-checks (push) Successful in 2m11s
feat(web): land iwooos security posture surfaces
2026-05-25 20:35:52 +08:00

353 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""本機 Git remote 只讀盤點工具。
此工具掃描指定 root 底下可見的 Git working tree讀取 `.git/config`
中的 remote URL並在輸出前移除 URL 內的帳密。它不會 fetch、push、
修改 remote也不會連線到 GitHub 或 Gitea。
"""
from __future__ import annotations
import argparse
import configparser
import json
import os
import sys
from pathlib import Path
from urllib.parse import urlsplit, urlunsplit
DEFAULT_EXCLUDE_NAMES = {
".cache",
".cargo",
".claude",
".codex",
".gemini",
".git",
".gradle",
".npm",
".nvm",
".openclaw",
".pyenv",
".rustup",
".Trash",
".venv",
"__pycache__",
"Applications",
"Applications (Parallels)",
"Caches",
"DerivedData",
"Library",
"Movies",
"Music",
"node_modules",
"Parallels",
"Pictures",
"venv",
}
def redact_url(value: str) -> str:
if "://" not in value:
if "@" in value and ":" in value.split("@", 1)[1]:
return value.split("@", 1)[1]
return value
parts = urlsplit(value)
netloc = parts.netloc.split("@", 1)[-1]
return urlunsplit((parts.scheme, netloc, parts.path, parts.query, parts.fragment))
def repo_slug_from_url(value: str) -> str:
redacted = redact_url(value).removesuffix("/")
if "://" in redacted:
path = urlsplit(redacted).path.strip("/")
elif ":" in redacted:
path = redacted.split(":", 1)[1].strip("/")
else:
path = redacted.strip("/")
return path.removesuffix(".git")
def classify_remote(url: str, gitea_fragment: str) -> str:
lowered = url.lower()
if gitea_fragment.lower() in lowered:
return "gitea"
if "github.com" in lowered:
return "github"
if "192.168.0.110:8929" in lowered:
return "gitlab_110"
if "192.168.0.110" in lowered:
return "internal_git_110"
return "other"
def git_config_path(repo_path: Path) -> Path | None:
git_path = repo_path / ".git"
if git_path.is_dir():
config_path = git_path / "config"
return config_path if config_path.exists() else None
if not git_path.is_file():
return None
text = git_path.read_text(encoding="utf-8", errors="replace")
for line in text.splitlines():
if line.startswith("gitdir:"):
raw_gitdir = line.split(":", 1)[1].strip()
gitdir = Path(raw_gitdir)
if not gitdir.is_absolute():
gitdir = (repo_path / gitdir).resolve()
config_path = gitdir / "config"
return config_path if config_path.exists() else None
return None
def remote_name(section: str) -> str | None:
prefix = 'remote "'
if section.startswith(prefix) and section.endswith('"'):
return section[len(prefix) : -1]
return None
def read_remotes(repo_path: Path, gitea_fragment: str) -> list[dict[str, str]]:
config_path = git_config_path(repo_path)
if config_path is None:
return []
parser = configparser.RawConfigParser(strict=False)
parser.read(config_path, encoding="utf-8")
remotes: list[dict[str, str]] = []
for section in parser.sections():
name = remote_name(section)
if not name or not parser.has_option(section, "url"):
continue
raw_url = parser.get(section, "url").strip()
redacted_url = redact_url(raw_url)
remotes.append(
{
"name": name,
"kind": classify_remote(redacted_url, gitea_fragment),
"url_redacted": redacted_url,
"repo_slug": repo_slug_from_url(redacted_url),
}
)
return remotes
def should_skip_dir(path: Path, root: Path, max_depth: int, exclude_names: set[str]) -> bool:
if path.name in exclude_names:
return True
try:
depth = len(path.relative_to(root).parts)
except ValueError:
return True
return depth > max_depth
def find_repos(roots: list[Path], max_depth: int, exclude_names: set[str]) -> list[Path]:
repos: dict[str, Path] = {}
for root in roots:
if not root.exists():
continue
for current, dirs, _files in os.walk(root):
current_path = Path(current)
if should_skip_dir(current_path, root, max_depth, exclude_names):
dirs[:] = []
continue
if (current_path / ".git").exists():
repos[str(current_path.resolve())] = current_path.resolve()
dirs[:] = []
continue
dirs[:] = [
name
for name in dirs
if not should_skip_dir(current_path / name, root, max_depth, exclude_names)
]
return sorted(repos.values(), key=lambda path: str(path))
def summarize_repo(repo_path: Path, remotes: list[dict[str, str]]) -> dict[str, object]:
gitea = [remote["repo_slug"] for remote in remotes if remote["kind"] == "gitea"]
github = [remote["repo_slug"] for remote in remotes if remote["kind"] == "github"]
internal_110 = [
remote["repo_slug"]
for remote in remotes
if remote["kind"] in ("internal_git_110", "gitlab_110")
]
if gitea and github:
status = "mapped"
elif gitea:
status = "gitea_only_local"
elif github:
status = "github_only_local"
elif internal_110:
status = "internal_110_only"
else:
status = "other_remote"
return {
"repo_path": str(repo_path),
"repo_name": repo_path.name,
"status": status,
"gitea_repos": sorted(set(gitea)),
"github_repos": sorted(set(github)),
"internal_110_repos": sorted(set(internal_110)),
"remotes": remotes,
}
def build_inventory(
roots: list[Path],
max_depth: int,
exclude_names: set[str],
gitea_fragment: str,
) -> dict[str, object]:
repo_paths = find_repos(roots, max_depth, exclude_names)
repos = [
summarize_repo(repo_path, read_remotes(repo_path, gitea_fragment))
for repo_path in repo_paths
]
gitea_linked = [repo for repo in repos if repo["gitea_repos"]]
github_linked = [repo for repo in repos if repo["github_repos"]]
mapped = [repo for repo in repos if repo["status"] == "mapped"]
gitea_only = [repo for repo in repos if repo["status"] == "gitea_only_local"]
github_only = [repo for repo in repos if repo["status"] == "github_only_local"]
internal_110 = [repo for repo in repos if repo["status"] == "internal_110_only"]
unique_gitea = sorted(
{
item
for repo in repos
for item in repo.get("gitea_repos", [])
if isinstance(item, str)
}
)
unique_github = sorted(
{
item
for repo in repos
for item in repo.get("github_repos", [])
if isinstance(item, str)
}
)
unique_internal_110 = sorted(
{
item
for repo in repos
for item in repo.get("internal_110_repos", [])
if isinstance(item, str)
}
)
return {
"schema_version": "local_git_remote_inventory_v1",
"status": "partial" if repos else "empty",
"roots": [str(root) for root in roots],
"max_depth": max_depth,
"gitea_host_fragment": gitea_fragment,
"repo_count": len(repos),
"gitea_linked_count": len(gitea_linked),
"github_linked_count": len(github_linked),
"mapped_count": len(mapped),
"gitea_only_count": len(gitea_only),
"github_only_count": len(github_only),
"internal_110_only_count": len(internal_110),
"unique_gitea_repo_count": len(unique_gitea),
"unique_github_repo_count": len(unique_github),
"unique_internal_110_repo_count": len(unique_internal_110),
"unique_gitea_repos": unique_gitea,
"unique_github_repos": unique_github,
"unique_internal_110_repos": unique_internal_110,
"repos": repos,
}
def write_markdown(inventory: dict[str, object], path: Path) -> None:
lines = [
"# 本機 Git Remote 盤點快照",
"",
"| 項目 | 值 |",
"|------|----|",
f"| 狀態 | `{inventory['status']}` |",
f"| 掃描 root | `{', '.join(inventory['roots'])}` |",
f"| max depth | `{inventory['max_depth']}` |",
f"| Gitea host fragment | `{inventory['gitea_host_fragment']}` |",
f"| repo 數量 | `{inventory['repo_count']}` |",
f"| Gitea linked | `{inventory['gitea_linked_count']}` |",
f"| GitHub linked | `{inventory['github_linked_count']}` |",
f"| mapped | `{inventory['mapped_count']}` |",
f"| Gitea-only local | `{inventory['gitea_only_count']}` |",
f"| GitHub-only local | `{inventory['github_only_count']}` |",
f"| Internal 110-only local | `{inventory['internal_110_only_count']}` |",
f"| 去重後 Gitea repo | `{inventory['unique_gitea_repo_count']}` |",
f"| 去重後 GitHub repo | `{inventory['unique_github_repo_count']}` |",
f"| 去重後 110 內部 repo | `{inventory['unique_internal_110_repo_count']}` |",
"",
"## Repo 對照",
"",
"| 狀態 | 本機路徑 | Gitea repo | GitHub repo | 110 內部 remote |",
"|------|----------|------------|-------------|----------------|",
]
repos = inventory.get("repos")
if isinstance(repos, list):
for repo in repos:
if not isinstance(repo, dict):
continue
gitea = ", ".join(f"`{item}`" for item in repo.get("gitea_repos", [])) or "-"
github = ", ".join(f"`{item}`" for item in repo.get("github_repos", [])) or "-"
internal_110 = (
", ".join(f"`{item}`" for item in repo.get("internal_110_repos", [])) or "-"
)
lines.append(
"| "
+ " | ".join(
[
f"`{repo.get('status', '')}`",
f"`{repo.get('repo_path', '')}`",
gitea,
github,
internal_110,
]
)
+ " |"
)
lines.extend(
[
"",
"> 注意:本檔只代表本機指定 roots 可見的 Git working tree不等同 Gitea server 全量 repo 清單。",
"> 輸出前已移除 remote URL 中的 username、password、token。",
"",
]
)
path.write_text("\n".join(lines), encoding="utf-8")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--root", action="append", required=True)
parser.add_argument("--max-depth", type=int, default=4)
parser.add_argument("--exclude-name", action="append", default=[])
parser.add_argument("--gitea-host-fragment", default="192.168.0.110:3001")
parser.add_argument("--output-json", required=True)
parser.add_argument("--output-md", required=True)
args = parser.parse_args()
roots = [Path(root).expanduser().resolve() for root in args.root]
exclude_names = set(DEFAULT_EXCLUDE_NAMES)
exclude_names.update(args.exclude_name)
inventory = build_inventory(
roots=roots,
max_depth=args.max_depth,
exclude_names=exclude_names,
gitea_fragment=args.gitea_host_fragment,
)
Path(args.output_json).write_text(
json.dumps(inventory, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
write_markdown(inventory, Path(args.output_md))
if inventory["status"] == "empty":
print("沒有找到本機 Git working tree", file=sys.stderr)
return 2
return 0
if __name__ == "__main__":
raise SystemExit(main())